From 849cfaae8bd822fc50f893c8191916c8ab5e35af Mon Sep 17 00:00:00 2001 From: Allen Kinzalow Date: Thu, 14 Mar 2024 15:08:58 -0700 Subject: [PATCH] [#95] Fix table header parsing --- .changeset/afraid-gifts-act.md | 5 +++++ .changeset/five-numbers-repeat.md | 5 +++++ .../__snapshots__/table.test.ts.snap | 22 +++++++++++++++---- .../newsContent/nodes/__tests__/table.test.ts | 13 +++++++++-- .../news/sections/newsContent/nodes/table.ts | 8 +++++-- 5 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 .changeset/afraid-gifts-act.md create mode 100644 .changeset/five-numbers-repeat.md diff --git a/.changeset/afraid-gifts-act.md b/.changeset/afraid-gifts-act.md new file mode 100644 index 0000000..745cd4f --- /dev/null +++ b/.changeset/afraid-gifts-act.md @@ -0,0 +1,5 @@ +--- +"osrs-web-scraper": patch +--- + +Re-order table option params diff --git a/.changeset/five-numbers-repeat.md b/.changeset/five-numbers-repeat.md new file mode 100644 index 0000000..dee53c3 --- /dev/null +++ b/.changeset/five-numbers-repeat.md @@ -0,0 +1,5 @@ +--- +"osrs-web-scraper": patch +--- + +Add support to table parser for parsing thead diff --git a/src/scrapers/news/sections/newsContent/nodes/__tests__/__snapshots__/table.test.ts.snap b/src/scrapers/news/sections/newsContent/nodes/__tests__/__snapshots__/table.test.ts.snap index c9f2f37..331a041 100644 --- a/src/scrapers/news/sections/newsContent/nodes/__tests__/__snapshots__/table.test.ts.snap +++ b/src/scrapers/news/sections/newsContent/nodes/__tests__/__snapshots__/table.test.ts.snap @@ -1,10 +1,24 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`table node A basic table should render 1`] = ` -"{| style=\\"text-align: center;\\" class=\\"wikitable\\" +exports[`table node A table with no thead should render 1`] = ` +"{| class=\\"wikitable\\" style=\\"text-align: center;\\" |- -! test -! test +! header1 +! header2 +|- +| test +| test +|}" +`; + +exports[`table node A table with thead should render 1`] = ` +"{| class=\\"wikitable\\" style=\\"text-align: center;\\" +|- +! header1 +! header2 +|- +| test +| test |- | test | test diff --git a/src/scrapers/news/sections/newsContent/nodes/__tests__/table.test.ts b/src/scrapers/news/sections/newsContent/nodes/__tests__/table.test.ts index 777f755..3506b3e 100644 --- a/src/scrapers/news/sections/newsContent/nodes/__tests__/table.test.ts +++ b/src/scrapers/news/sections/newsContent/nodes/__tests__/table.test.ts @@ -4,9 +4,18 @@ import parse from "node-html-parser"; import tableParser from "../table"; describe("table node", () => { - test("A basic table should render", () => { + test("A table with no thead should render", () => { const root = parse( - "
testtest
testtest
" + "
header1header2
testtest
" + ); + const builder = new MediaWikiBuilder(); + builder.addContents([tableParser(root.firstChild)].flat()); + expect(builder.build()).toMatchSnapshot(); + }); + + test("A table with thead should render", () => { + const root = parse( + "
header1header2
testtest
testtest
" ); const builder = new MediaWikiBuilder(); builder.addContents([tableParser(root.firstChild)].flat()); diff --git a/src/scrapers/news/sections/newsContent/nodes/table.ts b/src/scrapers/news/sections/newsContent/nodes/table.ts index 5817ad1..5b0f058 100644 --- a/src/scrapers/news/sections/newsContent/nodes/table.ts +++ b/src/scrapers/news/sections/newsContent/nodes/table.ts @@ -12,9 +12,13 @@ import { ContentNodeParser } from "../types"; export const tableParser: ContentNodeParser = (node, options) => { if (node instanceof HTMLElement) { const table = node as HTMLElement; + const thead = table.querySelector("thead"); const tbody = table.querySelector("tbody"); const rowNodes = tbody.querySelectorAll("tr"); - const headerRowNodes = rowNodes.shift().querySelectorAll("td"); + const headerNodes = thead?.querySelectorAll("tr") ?? []; + const headerRowNodes = (headerNodes?.length > 0 ? headerNodes : rowNodes) + .shift() + .querySelectorAll("td"); const headers: MediaWikiTableCell[] = headerRowNodes.map((node) => ({ content: [new MediaWikiText(node.textContent.trim())], @@ -39,8 +43,8 @@ export const tableParser: ContentNodeParser = (node, options) => { return new MediaWikiTable({ options: { - style: "text-align: center;", class: "wikitable", + style: "text-align: center;", }, rows: [ {