Skip to content

Commit

Permalink
linkcheck script
Browse files Browse the repository at this point in the history
  • Loading branch information
creinkin committed Feb 6, 2023
1 parent 3107e62 commit eccab68
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 1 deletion.
6 changes: 6 additions & 0 deletions .github/workflows/airtable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,17 @@ jobs:
env:
AIRTABLE_API_KEY: ${{ secrets.AIRTABLE_API_KEY }}
AIRTABLE_BASE_ID: ${{ secrets.AIRTABLE_BASE_ID }}
- name: Check Link Responses
run: |
OUTPUT_FILE_NAME=reports_`date +%s`.json
npm run --silent airtable | tee link_reports/$OUTPUT_FILE_NAME
ln -sf $OUTPUT_FILE_NAME link_reports/latest.json
- name: Push changes to repo
run: |
git config user.email "${{ github.actor }}@users.noreply.github.com"
git config user.name "${{ github.actor }}"
git add data/$OUTPUT_FILE_NAME data/latest.json
git add data/$OUTPUT_FILE_NAME link_reports/latest.json
git status
git commit -m "Triggered automated Airtable data fetch via release on $(date)"
git push
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,5 @@ dist
.vscode-test

# Local Node Environment
nenv
nenv
.DS_Store
45 changes: 45 additions & 0 deletions js/linkcheck.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
const axios = require("axios");
const DOCUMENTS = require("../data/latest.json");
const fs = require("fs");
const { stringify } = require("csv-stringify");

const records = DOCUMENTS.records;

let endpoints = [];
const delayIncrement = 500;
let failed_links = []
let delay = 0;
for (i = 0; i < records.length; i++) {
if (records[i]["Internet URL of Document"]) {
let endpoint = records[i]["Internet URL of Document"];
let doc_id = records[i]["Document ID"]
endpoints.push(
new Promise((resolve) => {
setTimeout(resolve, delay)})
.then(() => {
axios.get(endpoint, {timeout: 5000})
.then((data) => {
/*console.log(endpoint, data.status);*/
})
.catch((error) => {
console.log([doc_id, endpoint, error.code].join('\t'))
failed_links.push({documentId: doc_id, url: endpoint, error: error.code})
})})
);
delay += delayIncrement;
}
}

Promise.all(endpoints).finally(() => {
console.log("checked all sources.");
let output = `| Document ID | Error | Link
| ----------- | ----------- | --------------------|
`;

failed_links.forEach((val) => output += `|${val.documentId} | ${val.error} | ${val.url} |\n`)

console.log(output)


});

5 changes: 5 additions & 0 deletions link_reports/latest.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
| Document ID | Error | Link
| ----------- | ----------- | --------------------|
|0000-BOURI-GLO | ERR_BAD_REQUEST | https://www.usaid.gov/sites/default/files/documents/USAID_Private_Capital_508.pdf |
|0000-CARBO-GLO | ERR_BAD_REQUEST | https://www.researchgate.net/publication/6696367_Privatisation_and_outsourcing_in_wartime_The_humanitarian_challenges |
|0000-EIRIS-IDN | ERR_BAD_RESPONSE | https://www.tlffindonesia.org/wp-content/uploads/2021/05/RLU-Factsheet.pdf |
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"main": "js/airtable.js",
"scripts": {
"airtable": "node js/airtable.js",
"linkcheck": "node js/linkcheck.js",
"dev": "lite-server"
},
"repository": {
Expand All @@ -19,6 +20,8 @@
"homepage": "https://github.com/crcresearch/usaid-pse-egm#readme",
"dependencies": {
"airtable": "^0.10.1",
"axios": "^1.3.1",
"csv": "^6.2.6",
"csvtojson": "^2.0.10",
"lite-server": "^2.5.4"
}
Expand Down

0 comments on commit eccab68

Please sign in to comment.