Skip to content

Commit

Permalink
[#35][#15] Fix various new lines and improper header-3 (#36)
Browse files Browse the repository at this point in the history
  • Loading branch information
allenkinzalow authored Aug 29, 2023
1 parent bde9130 commit ae249c0
Show file tree
Hide file tree
Showing 12 changed files with 177 additions and 3 deletions.
5 changes: 5 additions & 0 deletions .changeset/modern-countries-rescue.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"osrs-web-scraper": minor
---

Add transformer for converting single line bold text to header-3
5 changes: 5 additions & 0 deletions .changeset/shy-boats-try.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"osrs-web-scraper": minor
---

Add transformer for combining consecutive MediaWikiBreak's
4 changes: 4 additions & 0 deletions src/scrapers/news/news.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ import fs from "fs";

import { newsContent, newsHeader } from "./sections";
import {
NewsBreakTransformer,
NewsFooterTransformer,
NewsHeaderTransformer,
NewsImageCaptionTransformer,
} from "./transformers";
import { formatFileName } from "../../utils/images";
Expand Down Expand Up @@ -34,7 +36,9 @@ const news: ScrapingService<MediaWikiBuilder> = {
.addContents(
await newsContent.format(results.content, page.url(), results.title)
)
.addTransformer(new NewsBreakTransformer())
.addTransformer(new NewsImageCaptionTransformer())
.addTransformer(new NewsHeaderTransformer())
.addTransformer(new NewsFooterTransformer());

console.info("Writing newspost results to file...");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`NewsHeaderTransformer should change bold text to header-3 1`] = `
"__TOC__
===You can also discuss this update on our===
''The Old School Team.''"
`;

exports[`NewsHeaderTransformer should not change bold text to header-3 1`] = `
"__TOC__
'''test'''
''The Old School Team.''"
`;
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`NewsImageCaptionTransformer should combine the adjacent MediaWikiFile, MediaWikiBreak and MediaWikiImage 1`] = `"[[File:image|''caption'']]"`;
exports[`NewsImageCaptionTransformer should combine the adjacent MediaWikiFile, MediaWikiBreak and MediaWikiImage 1`] = `
"[[File:image|''caption'']]
"
`;

exports[`NewsImageCaptionTransformer should combine the adjacent MediaWikiFile, MediaWikiBreak and MediaWikiImage with surrounding content 1`] = `
"__TOC__
[[test|test]]
[[File:image|''caption'']]
You can also discuss this update on our
"
`;
Expand Down
24 changes: 24 additions & 0 deletions src/scrapers/news/transformers/__tests__/breakTransformer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import {
MediaWikiBreak,
MediaWikiBuilder,
MediaWikiContent,
MediaWikiText,
MediaWikiTOC,
} from "../../../../utils/mediawiki";
import NewsBreakTransformer from "../breakTransformer";

describe("NewsBreakTransformer", () => {
it("should combine three lines breaks to one", () => {
const originalContent: MediaWikiContent[] = [
new MediaWikiTOC(),
new MediaWikiBreak(),
new MediaWikiBreak(),
new MediaWikiBreak(),
new MediaWikiText("The Old School Team.", { italics: true }),
];
const transformed = new NewsBreakTransformer().transform(originalContent);
expect(
new MediaWikiBuilder().addContents(transformed).build()
).toMatchSnapshot();
});
});
39 changes: 39 additions & 0 deletions src/scrapers/news/transformers/__tests__/headerTransformer.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import {
MediaWikiBreak,
MediaWikiBuilder,
MediaWikiContent,
MediaWikiText,
MediaWikiTOC,
} from "../../../../utils/mediawiki";
import NewsHeaderTransformer from "../headerTransformer";

describe("NewsHeaderTransformer", () => {
it("should change bold text to header-3", () => {
const originalContent: MediaWikiContent[] = [
new MediaWikiTOC(),
new MediaWikiBreak(),
new MediaWikiText("You can also discuss this update on our", {
bold: true,
}),
new MediaWikiBreak(),
new MediaWikiText("The Old School Team.", { italics: true }),
];
const transformed = new NewsHeaderTransformer().transform(originalContent);
expect(
new MediaWikiBuilder().addContents(transformed).build()
).toMatchSnapshot();
});

it("should not change bold text to header-3", () => {
const originalContent: MediaWikiContent[] = [
new MediaWikiTOC(),
new MediaWikiText("test", { bold: true }),
new MediaWikiBreak(),
new MediaWikiText("The Old School Team.", { italics: true }),
];
const transformed = new NewsHeaderTransformer().transform(originalContent);
expect(
new MediaWikiBuilder().addContents(transformed).build()
).toMatchSnapshot();
});
});
35 changes: 35 additions & 0 deletions src/scrapers/news/transformers/breakTransformer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import {
MediaWikiBreak,
MediaWikiContent,
MediaWikiTransformer,
} from "../../../utils/mediawiki";

class NewsBreakTransformer extends MediaWikiTransformer {
transform(content: MediaWikiContent[]): MediaWikiContent[] {
const transformedContent = [];
for (let index = 0; index < content.length; index++) {
const current = content[index];
if (
index > 0 &&
index < content.length - 1 &&
current instanceof MediaWikiBreak
) {
const before = content[index - 1];
const after = content[index + 1];
if (
before instanceof MediaWikiBreak &&
after instanceof MediaWikiBreak
) {
index++;
} else {
transformedContent.push(current);
}
} else {
transformedContent.push(current);
}
}
return transformedContent;
}
}

export default NewsBreakTransformer;
39 changes: 39 additions & 0 deletions src/scrapers/news/transformers/headerTransformer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import {
MediaWikiBreak,
MediaWikiContent,
MediaWikiHeader,
MediaWikiText,
MediaWikiTransformer,
} from "../../../utils/mediawiki";

class NewsHeaderTransformer extends MediaWikiTransformer {
transform(content: MediaWikiContent[]): MediaWikiContent[] {
const transformedContent = [];
for (let index = 0; index < content.length; index++) {
const current = content[index];
if (
index > 0 &&
index < content.length - 1 &&
current instanceof MediaWikiText &&
current.value.length <= 70 &&
current.styling?.bold
) {
const before = content[index - 1];
const after = content[index + 1];
if (
before instanceof MediaWikiBreak &&
after instanceof MediaWikiBreak
) {
transformedContent.push(new MediaWikiHeader(current.value, 3));
} else {
transformedContent.push(current);
}
} else {
transformedContent.push(current);
}
}
return transformedContent;
}
}

export default NewsHeaderTransformer;
1 change: 1 addition & 0 deletions src/scrapers/news/transformers/imageCaptionTransformer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class NewsImageCaptionTransformer extends MediaWikiTransformer {
caption: second,
})
);
transformedContent.push(new MediaWikiBreak());
index += 2;
} else {
transformedContent.push(current);
Expand Down
2 changes: 2 additions & 0 deletions src/scrapers/news/transformers/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
export { default as NewsBreakTransformer } from "./breakTransformer";
export { default as NewsFooterTransformer } from "./footerTransformer";
export { default as NewsHeaderTransformer } from "./headerTransformer";
export { default as NewsImageCaptionTransformer } from "./imageCaptionTransformer";
6 changes: 4 additions & 2 deletions src/utils/mediawiki/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@ class MediaWikiBuilder {
}

addContent(content: MediaWikiContent): MediaWikiBuilder {
if (content !== null) {
if (content !== null && content !== undefined) {
this.content.push(content);
}
return this;
}

addContents(contents: MediaWikiContent[]): MediaWikiBuilder {
this.content = this.content.concat(contents);
this.content = this.content.concat(
contents.filter((content) => content !== undefined && content !== null)
);
return this;
}

Expand Down

0 comments on commit ae249c0

Please sign in to comment.