Skip to content

Commit

Permalink
[#95] Fix table header parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
allenkinzalow committed Mar 14, 2024
1 parent c1c086d commit 849cfaa
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 8 deletions.
5 changes: 5 additions & 0 deletions .changeset/afraid-gifts-act.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"osrs-web-scraper": patch
---

Re-order table option params
5 changes: 5 additions & 0 deletions .changeset/five-numbers-repeat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"osrs-web-scraper": patch
---

Add support to table parser for parsing thead
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`table node A basic table should render 1`] = `
"{| style=\\"text-align: center;\\" class=\\"wikitable\\"
exports[`table node A table with no thead should render 1`] = `
"{| class=\\"wikitable\\" style=\\"text-align: center;\\"
|-
! test
! test
! header1
! header2
|-
| test
| test
|}"
`;
exports[`table node A table with thead should render 1`] = `
"{| class=\\"wikitable\\" style=\\"text-align: center;\\"
|-
! header1
! header2
|-
| test
| test
|-
| test
| test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,18 @@ import parse from "node-html-parser";
import tableParser from "../table";

describe("table node", () => {
test("A basic table should render", () => {
test("A table with no thead should render", () => {
const root = parse(
"<table><tbody><tr><td>test</td><td>test</td></tr><tr><td>test</td><td>test</td></tr></tbody></table>"
"<table><tbody><tr><td>header1</td><td>header2</td></tr><tr><td>test</td><td>test</td></tr></tbody></table>"
);
const builder = new MediaWikiBuilder();
builder.addContents([tableParser(root.firstChild)].flat());
expect(builder.build()).toMatchSnapshot();
});

test("A table with thead should render", () => {
const root = parse(
"<table><thead><tr><td>header1</td><td>header2</td></tr></thead><tbody><tr><td>test</td><td>test</td></tr><tr><td>test</td><td>test</td></tr></tbody></table>"
);
const builder = new MediaWikiBuilder();
builder.addContents([tableParser(root.firstChild)].flat());
Expand Down
8 changes: 6 additions & 2 deletions src/scrapers/news/sections/newsContent/nodes/table.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@ import { ContentNodeParser } from "../types";
export const tableParser: ContentNodeParser = (node, options) => {
if (node instanceof HTMLElement) {
const table = node as HTMLElement;
const thead = table.querySelector("thead");
const tbody = table.querySelector("tbody");
const rowNodes = tbody.querySelectorAll("tr");
const headerRowNodes = rowNodes.shift().querySelectorAll("td");
const headerNodes = thead?.querySelectorAll("tr") ?? [];
const headerRowNodes = (headerNodes?.length > 0 ? headerNodes : rowNodes)
.shift()
.querySelectorAll("td");
const headers: MediaWikiTableCell[] =
headerRowNodes.map<MediaWikiTableCell>((node) => ({
content: [new MediaWikiText(node.textContent.trim())],
Expand All @@ -39,8 +43,8 @@ export const tableParser: ContentNodeParser = (node, options) => {

return new MediaWikiTable({
options: {
style: "text-align: center;",
class: "wikitable",
style: "text-align: center;",
},
rows: [
{
Expand Down

0 comments on commit 849cfaa

Please sign in to comment.