-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* [#125] Add support for scraping the world list * workflow node versions * README
- Loading branch information
1 parent
c3f94ed
commit bc845dd
Showing
18 changed files
with
201 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"osrs-web-scraper": minor | ||
--- | ||
|
||
Update workflow dispatch to allow choiced tasks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"osrs-web-scraper": patch | ||
--- | ||
|
||
Update all workflows to use Node 21 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"osrs-web-scraper": minor | ||
--- | ||
|
||
Add commander for cli parsing |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"osrs-web-scraper": minor | ||
--- | ||
|
||
Add world list scraping |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,38 @@ | ||
import config from "@config"; | ||
import { Command } from "commander"; | ||
|
||
import scraper from "./scraper"; | ||
import { news, polls } from "./scrapers"; | ||
import { news, polls, worlds } from "./scrapers"; | ||
import { WORLD_LIST_URL } from "./scrapers/worlds/worlds.utils"; | ||
import packageJson from "../package.json"; | ||
|
||
console.log(`Running ${config.environment}`); | ||
|
||
const newsLink = process.env.NEWS_LINK; | ||
const pollLink = process.env.POLL_LINK; | ||
const program = new Command(); | ||
|
||
if (newsLink) { | ||
scraper.scrape(newsLink, news); | ||
} | ||
program.name("OSRS Web Scraper").description("").version(packageJson.version); | ||
|
||
if (pollLink) { | ||
scraper.scrape(pollLink, polls); | ||
} | ||
program | ||
.command("news") | ||
.description("Scrape an OSRS news posts.") | ||
.argument("<string>", "The news post to scrape.") | ||
.action((newsLink) => { | ||
scraper.scrape(newsLink, news); | ||
}); | ||
|
||
program | ||
.command("poll") | ||
.description("Scrape an OSRS poll.") | ||
.argument("<string>", "The poll to scrape.") | ||
.action((pollLink) => { | ||
scraper.scrape(pollLink, polls); | ||
}); | ||
|
||
program | ||
.command("worlds") | ||
.description("Scrape the OSRS world list.") | ||
.action(() => { | ||
scraper.scrape(WORLD_LIST_URL, worlds); | ||
}); | ||
|
||
program.parse(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
export { default as news } from "./news"; | ||
export { default as polls } from "./polls"; | ||
export { default as worlds } from "./worlds"; | ||
|
||
export * from "./types"; |
6 changes: 6 additions & 0 deletions
6
src/scrapers/worlds/__tests__/__snapshots__/worlds.utils.test.ts.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
// Jest Snapshot v1, https://goo.gl/fbAQLP | ||
|
||
exports[`world scraper utils getWorldLines 1`] = ` | ||
"{{WorldLine|101|United States|mems=deadman|111-126 Deadman}} | ||
" | ||
`; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import { MediaWikiBuilder } from "@osrs-wiki/mediawiki-builder"; | ||
import parse from "node-html-parser"; | ||
|
||
import { getWorldLines } from "../worlds.utils"; | ||
|
||
describe("world scraper utils", () => { | ||
test("getWorldLines", () => { | ||
const worldNodes = parse( | ||
'<tr><td>\n<a href="">OldSchool 101</a></td><td>0 players</td><td>United States</td><td>Members</td><td>111-126 Deadman</td></tr>' | ||
); | ||
const builder = new MediaWikiBuilder(); | ||
builder.addContents(getWorldLines(worldNodes)); | ||
expect(builder.build()).toMatchSnapshot(); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
import worlds from "./worlds"; | ||
|
||
export default worlds; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import { MediaWikiBuilder } from "@osrs-wiki/mediawiki-builder"; | ||
import fs from "fs"; | ||
import parse from "node-html-parser"; | ||
|
||
import { getWorldLines } from "./worlds.utils"; | ||
import { ScrapingService } from "../types"; | ||
|
||
const worlds: ScrapingService<MediaWikiBuilder> = { | ||
scrape: async (page): Promise<MediaWikiBuilder> => { | ||
try { | ||
const results = await page.evaluate(() => { | ||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment | ||
// @ts-ignore Ignore window typing | ||
const $ = window.$; | ||
const worldRows = $(".server-list__body").html(); | ||
|
||
return { | ||
worldRows, | ||
}; | ||
}); | ||
|
||
const worldNodes = parse(results.worldRows); | ||
|
||
const builder = new MediaWikiBuilder(); | ||
builder.addContents(getWorldLines(worldNodes)); | ||
|
||
console.info("Writing world list results to file..."); | ||
try { | ||
if (!fs.existsSync("out/worlds")) { | ||
fs.mkdirSync("out/worlds", { recursive: true }); | ||
} | ||
await fs.writeFileSync(`out/worlds/worlds.txt`, builder.build()); | ||
console.info("Successfully created worlds file"); | ||
} catch (err) { | ||
console.error(err); | ||
} | ||
|
||
return builder; | ||
} catch (error) { | ||
console.error(error); | ||
} | ||
}, | ||
}; | ||
|
||
export default worlds; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import { MediaWikiTemplate } from "@osrs-wiki/mediawiki-builder"; | ||
import { HTMLElement } from "node-html-parser"; | ||
|
||
export const WORLD_LIST_URL = "https://oldschool.runescape.com/a=13/slu"; | ||
|
||
export const getWorldLines = (worldRows: HTMLElement) => { | ||
const worldRowNodes = worldRows.childNodes.filter( | ||
(node) => node instanceof HTMLElement && node.tagName === "TR" | ||
); | ||
const worldLines = worldRowNodes.map((node) => { | ||
const tdNodes = node.childNodes.filter( | ||
(node) => node instanceof HTMLElement && node.tagName === "TD" | ||
); | ||
const worldLine = new MediaWikiTemplate("WorldLine", { collapsed: true }); | ||
const worldNumber = | ||
tdNodes[0].childNodes?.[1].textContent?.replaceAll(/^\D+/g, "") ?? ""; | ||
const region = tdNodes[2].textContent; | ||
const activity = tdNodes[4].textContent; | ||
const members = activity.includes("Deadman") | ||
? "deadman" | ||
: tdNodes[3].textContent === "Members" | ||
? "yes" | ||
: "no"; | ||
worldLine.add("", worldNumber); | ||
worldLine.add("", region); | ||
worldLine.add("mems", members); | ||
worldLine.add("", activity); | ||
return worldLine; | ||
}); | ||
return worldLines; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters