From cbc9239850c956c8b95ec915eee998735d959c87 Mon Sep 17 00:00:00 2001 From: Allen Kinzalow Date: Fri, 22 Sep 2023 15:51:50 -0500 Subject: [PATCH] [#53] Fix css selector for news post content container --- .changeset/metal-jokes-poke.md | 5 +++++ package.json | 2 +- src/scrapers/news/news.ts | 2 +- src/scrapers/news/transformers/breakTransformer.ts | 5 ++++- yarn.lock | 8 ++++---- 5 files changed, 15 insertions(+), 7 deletions(-) create mode 100644 .changeset/metal-jokes-poke.md diff --git a/.changeset/metal-jokes-poke.md b/.changeset/metal-jokes-poke.md new file mode 100644 index 0000000..06dfbff --- /dev/null +++ b/.changeset/metal-jokes-poke.md @@ -0,0 +1,5 @@ +--- +"osrs-web-scraper": patch +--- + +Fix css selector for news post content container diff --git a/package.json b/package.json index 65d8746..37052b6 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ "date-fns": "^2.30.0", "dotenv": "^16.0.3", "image-size": "^1.0.2", - "node-html-parser": "^6.1.5", + "node-html-parser": "^6.1.10", "puppeteer": "^20.8.0", "rss-parser": "^3.13.0", "tsconfig-paths": "^4.2.0", diff --git a/src/scrapers/news/news.ts b/src/scrapers/news/news.ts index b0165e8..c41d035 100644 --- a/src/scrapers/news/news.ts +++ b/src/scrapers/news/news.ts @@ -20,7 +20,7 @@ const news: ScrapingService = { const $ = window.$; const title = $(".news-article-header__title").html(); const headerHtml = $(".news-article-header").html(); - const contentHtml = $(".news-article-content #article-top").html(); + const contentHtml = $(".news-article-content").html(); return { title, header: headerHtml, diff --git a/src/scrapers/news/transformers/breakTransformer.ts b/src/scrapers/news/transformers/breakTransformer.ts index ece10a3..87daf9c 100644 --- a/src/scrapers/news/transformers/breakTransformer.ts +++ b/src/scrapers/news/transformers/breakTransformer.ts @@ -22,7 +22,10 @@ class NewsBreakTransformer extends MediaWikiTransformer { after instanceof MediaWikiBreak ) { index++; - } else if (before instanceof MediaWikiHeader) { + } else if ( + before instanceof MediaWikiHeader && + after instanceof MediaWikiBreak + ) { continue; } else { transformedContent.push(current); diff --git a/yarn.lock b/yarn.lock index 45c410b..036582f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4161,10 +4161,10 @@ node-fetch@^2.6.12: dependencies: whatwg-url "^5.0.0" -node-html-parser@^6.1.5: - version "6.1.5" - resolved "https://registry.yarnpkg.com/node-html-parser/-/node-html-parser-6.1.5.tgz#c819dceb13a10a7642ff92f94f870b4f77968097" - integrity sha512-fAaM511feX++/Chnhe475a0NHD8M7AxDInsqQpz6x63GRF7xYNdS8Vo5dKsIVPgsOvG7eioRRTZQnWBrhDHBSg== +node-html-parser@^6.1.10: + version "6.1.10" + resolved "https://registry.yarnpkg.com/node-html-parser/-/node-html-parser-6.1.10.tgz#5db11eac3ccbea6fc1b04a22c8a0e3a0774cfae6" + integrity sha512-6/uWdWxjQWQ7tMcFK2wWlrflsQUzh1HsEzlIf2j5+TtzfhT2yUvg3DwZYAmjEHeR3uX74ko7exjHW69J0tOzIg== dependencies: css-select "^5.1.0" he "1.2.0"