diff --git a/.github/workflows/airtable.yml b/.github/workflows/airtable.yml index 285f37f..b21f477 100644 --- a/.github/workflows/airtable.yml +++ b/.github/workflows/airtable.yml @@ -27,11 +27,17 @@ jobs: env: AIRTABLE_API_KEY: ${{ secrets.AIRTABLE_API_KEY }} AIRTABLE_BASE_ID: ${{ secrets.AIRTABLE_BASE_ID }} + - name: Check Link Responses + run: | + OUTPUT_FILE_NAME=reports_`date +%s`.json + npm run --silent airtable | tee link_reports/$OUTPUT_FILE_NAME + ln -sf $OUTPUT_FILE_NAME link_reports/latest.json - name: Push changes to repo run: | git config user.email "${{ github.actor }}@users.noreply.github.com" git config user.name "${{ github.actor }}" git add data/$OUTPUT_FILE_NAME data/latest.json + git add data/$OUTPUT_FILE_NAME link_reports/latest.json git status git commit -m "Triggered automated Airtable data fetch via release on $(date)" git push diff --git a/.gitignore b/.gitignore index 8e8cb2b..f8e1ffe 100644 --- a/.gitignore +++ b/.gitignore @@ -108,4 +108,5 @@ dist .vscode-test # Local Node Environment -nenv \ No newline at end of file +nenv +.DS_Store diff --git a/js/linkcheck.js b/js/linkcheck.js new file mode 100644 index 0000000..9dff4ed --- /dev/null +++ b/js/linkcheck.js @@ -0,0 +1,45 @@ +const axios = require("axios"); +const DOCUMENTS = require("../data/latest.json"); +const fs = require("fs"); +const { stringify } = require("csv-stringify"); + +const records = DOCUMENTS.records; + +let endpoints = []; +const delayIncrement = 500; +let failed_links = [] +let delay = 0; +for (i = 0; i < records.length; i++) { + if (records[i]["Internet URL of Document"]) { + let endpoint = records[i]["Internet URL of Document"]; + let doc_id = records[i]["Document ID"] + endpoints.push( + new Promise((resolve) => { + setTimeout(resolve, delay)}) + .then(() => { + axios.get(endpoint, {timeout: 5000}) + .then((data) => { + /*console.log(endpoint, data.status);*/ + }) + .catch((error) => { + console.log([doc_id, endpoint, error.code].join('\t')) + failed_links.push({documentId: doc_id, url: endpoint, error: error.code}) + })}) + ); + delay += delayIncrement; + } +} + +Promise.all(endpoints).finally(() => { + console.log("checked all sources."); + let output = `| Document ID | Error | Link +| ----------- | ----------- | --------------------| +`; + +failed_links.forEach((val) => output += `|${val.documentId} | ${val.error} | ${val.url} |\n`) + +console.log(output) + + +}); + diff --git a/link_reports/latest.md b/link_reports/latest.md new file mode 100644 index 0000000..f65b9a7 --- /dev/null +++ b/link_reports/latest.md @@ -0,0 +1,5 @@ +| Document ID | Error | Link +| ----------- | ----------- | --------------------| +|0000-BOURI-GLO | ERR_BAD_REQUEST | https://www.usaid.gov/sites/default/files/documents/USAID_Private_Capital_508.pdf | +|0000-CARBO-GLO | ERR_BAD_REQUEST | https://www.researchgate.net/publication/6696367_Privatisation_and_outsourcing_in_wartime_The_humanitarian_challenges | +|0000-EIRIS-IDN | ERR_BAD_RESPONSE | https://www.tlffindonesia.org/wp-content/uploads/2021/05/RLU-Factsheet.pdf | diff --git a/package.json b/package.json index 838a6fd..4193f00 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "main": "js/airtable.js", "scripts": { "airtable": "node js/airtable.js", + "linkcheck": "node js/linkcheck.js", "dev": "lite-server" }, "repository": { @@ -19,6 +20,8 @@ "homepage": "https://github.com/crcresearch/usaid-pse-egm#readme", "dependencies": { "airtable": "^0.10.1", + "axios": "^1.3.1", + "csv": "^6.2.6", "csvtojson": "^2.0.10", "lite-server": "^2.5.4" }