Skip to content

Commit

Permalink
Clean up the HTML before searching for the <title>.
Browse files Browse the repository at this point in the history
Strips <script> and <style> tags from the page.
Fixes sites like Kotaku.
  • Loading branch information
eheikes committed Sep 14, 2017
1 parent e09e002 commit 871e40a
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion toreadapi.php
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,9 @@ function postEntry() {
}

// Save the <title>.
if (preg_match("#<title[^>]*>(.*)</title>#iU", $result, $matches)
$cleanedHtml = preg_replace('#<script.*</script>#isU', '', $result);
$cleanedHtml = preg_replace('#<style.*</style>#isU', '', $cleanedHtml);
if (preg_match("#<title[^>]*>(.*)</title>#iU", $cleanedHtml, $matches)
and $matches[1] != "")
{
$title = mb_substr($matches[1], 0, $maxTitleLength, 'UTF-8');
Expand Down

0 comments on commit 871e40a

Please sign in to comment.