-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapKenNews.js
94 lines (84 loc) · 2.49 KB
/
scrapKenNews.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import puppeteer from "puppeteer";
import postToGhost from "./ghost.js";
export default async function ScrapData(
url,
IdClassesTags,
articletitle,
articlebody,
articleimage
) {
const browser = await puppeteer.launch({
headless: true,
executablePath: "/usr/bin/chromium-browser",
args: ["--no-sandbox"],
});
const page = await browser.newPage();
await page.goto(url, {
waitUntil: "load",
// Remove the timeout
timeout: 0,
});
const resultsSelector = IdClassesTags;
const articleTitle = articletitle;
const articleBody = articlebody;
const articleImage = articleimage;
const links = await page.evaluate((resultsSelector) => {
return [...document.querySelectorAll(resultsSelector)].map((anchor) => {
const title = anchor.textContent.split("|")[0].trim();
return `${anchor.href}`;
});
}, resultsSelector);
if ((links !== null) | undefined) {
await page.goto(links[1], {
waitUntil: "load",
// Remove the timeout
timeout: 0,
});
// Full article title
const fullArticleTitle = await page.evaluate((articleTitle) => {
return [...document.querySelectorAll(articleTitle)].map((anchor) => {
const title = anchor.textContent.split("|")[0].trim();
return `${title}`;
});
}, articleTitle);
//Full article body
const fullArticleBody = await page.evaluate((articleBody) => {
return [...document.querySelectorAll(articleBody)].map((anchor) => {
const title = anchor.innerHTML.split("|")[0].trim();
return `${title}`;
});
}, articleBody);
// Full article image
const fullArticleImage = await page.evaluate((articleImage) => {
return [...document.querySelectorAll(articleImage)].map((anchor) => {
const image = anchor.getAttribute("src");
return `${image}`;
});
}, articleImage);
const tags = [
{
created_at: null,
description: null,
feature_image: null,
id: "5ddc9063c35e7700383b27e0",
meta_description: null,
meta_title: null,
name: "Breaking",
slug: "breaking-news",
updated_at: null,
url: "https://verixr.com/tag/breaking-news/",
visibility: "public",
},
];
const status = "draft";
const publishBody = fullArticleBody[0];
postToGhost(
fullArticleImage[0],
fullArticleTitle[0],
publishBody,
status,
tags
);
console.log(fullArticleTitle + " has been published");
}
}