diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b95a975..1085d1c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,7 +25,7 @@ jobs: strategy: matrix: - node-version: [18.x] + node-version: [20.x] steps: - name: Start MongoDB @@ -75,6 +75,7 @@ jobs: run: | cd openreview-api mkdir -p logs files/attachments files/pdfs files/temp + export PUPPETEER_SKIP_DOWNLOAD='true' npm ci - name: Run openreview-api run: | diff --git a/package-lock.json b/package-lock.json index 4bdd8b6..0ec419b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -982,15 +982,16 @@ } }, "node_modules/@puppeteer/browsers": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.0.0.tgz", - "integrity": "sha512-3PS82/5+tnpEaUWonjAFFvlf35QHF15xqyGd34GBa5oP5EPVfFXRsbSxIGYf1M+vZlqBZ3oxT1kRg9OYhtt8ng==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.1.0.tgz", + "integrity": "sha512-xloWvocjvryHdUjDam/ZuGMh7zn4Sn3ZAaV4Ah2e2EwEt90N3XphZlSsU3n0VDc1F7kggCjMuH0UuxfPQ5mD9w==", "dependencies": { "debug": "4.3.4", "extract-zip": "2.0.1", "progress": "2.0.3", - "proxy-agent": "6.3.1", - "tar-fs": "3.0.4", + "proxy-agent": "6.4.0", + "semver": "7.6.0", + "tar-fs": "3.0.5", "unbzip2-stream": "1.4.3", "yargs": "17.7.2" }, @@ -1280,7 +1281,8 @@ }, "node_modules/@tootallnate/quickjs-emscripten": { "version": "0.23.0", - "license": "MIT" + "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", + "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==" }, "node_modules/@tufjs/canonical-json": { "version": "1.0.0", @@ -1752,7 +1754,8 @@ }, "node_modules/ast-types": { "version": "0.13.4", - "license": "MIT", + "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz", + "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", "dependencies": { "tslib": "^2.0.1" }, @@ -1793,12 +1796,46 @@ }, "node_modules/b4a": { "version": "1.6.6", - "license": "Apache-2.0" + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz", + "integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg==" }, "node_modules/balanced-match": { "version": "1.0.2", "license": "MIT" }, + "node_modules/bare-events": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.2.0.tgz", + "integrity": "sha512-Yyyqff4PIFfSuthCZqLlPISTWHmnQxoPuAvkmgzsJEmG3CesdIv6Xweayl0JkCZJSB2yYIdJyEz97tpxNhgjbg==", + "optional": true + }, + "node_modules/bare-fs": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-2.1.5.tgz", + "integrity": "sha512-5t0nlecX+N2uJqdxe9d18A98cp2u9BETelbjKpiVgQqzzmVNFYWEAjQHqS+2Khgto1vcwhik9cXucaj5ve2WWA==", + "optional": true, + "dependencies": { + "bare-events": "^2.0.0", + "bare-os": "^2.0.0", + "bare-path": "^2.0.0", + "streamx": "^2.13.0" + } + }, + "node_modules/bare-os": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-2.2.0.tgz", + "integrity": "sha512-hD0rOPfYWOMpVirTACt4/nK8mC55La12K5fY1ij8HAdfQakD62M+H4o4tpfKzVGLgRDTuk3vjA4GqGXXCeFbag==", + "optional": true + }, + "node_modules/bare-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-2.1.0.tgz", + "integrity": "sha512-DIIg7ts8bdRKwJRJrUMy/PICEaQZaPGZ26lsSx9MJSwIhSrcdHn7/C8W+XmnG/rKi6BaRcz+JO00CjZteybDtw==", + "optional": true, + "dependencies": { + "bare-os": "^2.1.0" + } + }, "node_modules/base64-js": { "version": "1.5.1", "funding": [ @@ -1819,7 +1856,8 @@ }, "node_modules/basic-ftp": { "version": "5.0.4", - "license": "MIT", + "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.4.tgz", + "integrity": "sha512-8PzkB0arJFV4jJWSGOYR+OEic6aeKMu/osRhBULN6RY0ykby6LKhbmuQ5ublvaas5BOwboah5D87nrHyuh8PPA==", "engines": { "node": ">=10.0.0" } @@ -2108,8 +2146,9 @@ } }, "node_modules/chromium-bidi": { - "version": "0.5.8", - "license": "Apache-2.0", + "version": "0.5.9", + "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.5.9.tgz", + "integrity": "sha512-wOTX3m2zuHX0zRX4h7Ol1DAGz0cqHzo2IrAPvOqBxdd4ZR32vxg4FKNjmBihi1oP9b1QGSBBG5VNUUXUCsxDfg==", "dependencies": { "mitt": "3.0.1", "urlpattern-polyfill": "10.0.0" @@ -2524,8 +2563,9 @@ } }, "node_modules/data-uri-to-buffer": { - "version": "6.0.1", - "license": "MIT", + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", "engines": { "node": ">= 14" } @@ -2652,7 +2692,8 @@ }, "node_modules/degenerator": { "version": "5.0.1", - "license": "MIT", + "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz", + "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==", "dependencies": { "ast-types": "^0.13.4", "escodegen": "^2.1.0", @@ -2689,8 +2730,9 @@ } }, "node_modules/devtools-protocol": { - "version": "0.0.1232444", - "license": "BSD-3-Clause" + "version": "0.0.1249869", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1249869.tgz", + "integrity": "sha512-Ctp4hInA0BEavlUoRy9mhGq0i+JSo/AwVyX2EFgZmV1kYB+Zq+EMBAn52QWu6FbRr10hRb6pBl420upbp4++vg==" }, "node_modules/diff": { "version": "5.0.0", @@ -2977,7 +3019,8 @@ }, "node_modules/escodegen": { "version": "2.1.0", - "license": "BSD-2-Clause", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", "dependencies": { "esprima": "^4.0.1", "estraverse": "^5.2.0", @@ -3389,7 +3432,8 @@ }, "node_modules/fast-fifo": { "version": "1.3.2", - "license": "MIT" + "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==" }, "node_modules/fast-glob": { "version": "3.3.2", @@ -3712,7 +3756,6 @@ }, "node_modules/fs-extra": { "version": "11.2.0", - "dev": true, "license": "MIT", "dependencies": { "graceful-fs": "^4.2.0", @@ -3933,44 +3976,19 @@ } }, "node_modules/get-uri": { - "version": "6.0.2", - "license": "MIT", + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.3.tgz", + "integrity": "sha512-BzUrJBS9EcUb4cFol8r4W3v1cPsSyajLSthNkz5BxbpDcHN5tIrM10E2eNvfnvBn3DaT3DUgx0OpsBKkaOpanw==", "dependencies": { "basic-ftp": "^5.0.2", - "data-uri-to-buffer": "^6.0.0", + "data-uri-to-buffer": "^6.0.2", "debug": "^4.3.4", - "fs-extra": "^8.1.0" + "fs-extra": "^11.2.0" }, "engines": { "node": ">= 14" } }, - "node_modules/get-uri/node_modules/fs-extra": { - "version": "8.1.0", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^4.0.0", - "universalify": "^0.1.0" - }, - "engines": { - "node": ">=6 <7 || >=8" - } - }, - "node_modules/get-uri/node_modules/jsonfile": { - "version": "4.0.0", - "license": "MIT", - "optionalDependencies": { - "graceful-fs": "^4.1.6" - } - }, - "node_modules/get-uri/node_modules/universalify": { - "version": "0.1.2", - "license": "MIT", - "engines": { - "node": ">= 4.0.0" - } - }, "node_modules/git-raw-commits": { "version": "3.0.0", "dev": true, @@ -4301,8 +4319,9 @@ "license": "BSD-2-Clause" }, "node_modules/http-proxy-agent": { - "version": "7.0.0", - "license": "MIT", + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", "dependencies": { "agent-base": "^7.1.0", "debug": "^4.3.4" @@ -4312,8 +4331,9 @@ } }, "node_modules/https-proxy-agent": { - "version": "7.0.2", - "license": "MIT", + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz", + "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==", "dependencies": { "agent-base": "^7.0.2", "debug": "4" @@ -5441,7 +5461,6 @@ }, "node_modules/lru-cache": { "version": "6.0.0", - "dev": true, "license": "ISC", "dependencies": { "yallist": "^4.0.0" @@ -5982,7 +6001,8 @@ }, "node_modules/mitt": { "version": "3.0.1", - "license": "MIT" + "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", + "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==" }, "node_modules/mixin-object": { "version": "2.0.1", @@ -6013,10 +6033,6 @@ "node": ">=10" } }, - "node_modules/mkdirp-classic": { - "version": "0.5.3", - "license": "MIT" - }, "node_modules/mocha": { "version": "10.3.0", "dev": true, @@ -6211,7 +6227,8 @@ }, "node_modules/netmask": { "version": "2.0.2", - "license": "MIT", + "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz", + "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==", "engines": { "node": ">= 0.4.0" } @@ -7224,7 +7241,8 @@ }, "node_modules/pac-proxy-agent": { "version": "7.0.1", - "license": "MIT", + "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.0.1.tgz", + "integrity": "sha512-ASV8yU4LLKBAjqIPMbrgtaKIvxQri/yh2OpI+S6hVa9JRkUI3Y3NPFbfngDtY7oFtSMD3w31Xns89mDa3Feo5A==", "dependencies": { "@tootallnate/quickjs-emscripten": "^0.23.0", "agent-base": "^7.0.2", @@ -7240,21 +7258,17 @@ } }, "node_modules/pac-resolver": { - "version": "7.0.0", - "license": "MIT", + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz", + "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", "dependencies": { "degenerator": "^5.0.0", - "ip": "^1.1.8", "netmask": "^2.0.2" }, "engines": { "node": ">= 14" } }, - "node_modules/pac-resolver/node_modules/ip": { - "version": "1.1.8", - "license": "MIT" - }, "node_modules/pacote": { "version": "17.0.6", "dev": true, @@ -7818,13 +7832,14 @@ "license": "MIT" }, "node_modules/proxy-agent": { - "version": "6.3.1", - "license": "MIT", + "version": "6.4.0", + "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.4.0.tgz", + "integrity": "sha512-u0piLU+nCOHMgGjRbimiXmA9kM/L9EHh3zL81xCdp7m+Y2pHIsnmbdDoEDoAz5geaonNR6q6+yOPQs6n4T6sBQ==", "dependencies": { "agent-base": "^7.0.2", "debug": "^4.3.4", - "http-proxy-agent": "^7.0.0", - "https-proxy-agent": "^7.0.2", + "http-proxy-agent": "^7.0.1", + "https-proxy-agent": "^7.0.3", "lru-cache": "^7.14.1", "pac-proxy-agent": "^7.0.1", "proxy-from-env": "^1.1.0", @@ -7836,7 +7851,8 @@ }, "node_modules/proxy-agent/node_modules/lru-cache": { "version": "7.18.3", - "license": "ISC", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", + "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", "engines": { "node": ">=12" } @@ -7858,14 +7874,14 @@ "license": "MIT" }, "node_modules/puppeteer": { - "version": "22.0.0", - "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.0.0.tgz", - "integrity": "sha512-zYVnjwJngnSB4dbkWp7DHFSIc3nqHvZzrdHyo9+ugV1nq1Lm8obOMcmCFaGfR3PJs0EmYNz+/skBeO45yvASCQ==", + "version": "22.2.0", + "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.2.0.tgz", + "integrity": "sha512-0Ax7zeqqbQL6Zcpo1WAvrqWQAnGsLB4tmQUUwsb5Cfo05XaQ78LWUUjaO4um7qaddKpZfk0vXlGcRVwtedpWfg==", "hasInstallScript": true, "dependencies": { - "@puppeteer/browsers": "2.0.0", + "@puppeteer/browsers": "2.1.0", "cosmiconfig": "9.0.0", - "puppeteer-core": "22.0.0" + "puppeteer-core": "22.2.0" }, "bin": { "puppeteer": "lib/esm/puppeteer/node/cli.js" @@ -7875,15 +7891,15 @@ } }, "node_modules/puppeteer-core": { - "version": "22.0.0", - "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.0.0.tgz", - "integrity": "sha512-S3s91rLde0A86PWVeNY82h+P0fdS7CTiNWAicCVH/bIspRP4nS2PnO5j+VTFqCah0ZJizGzpVPAmxVYbLxTc9w==", + "version": "22.2.0", + "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.2.0.tgz", + "integrity": "sha512-rxLM860FP05CxCPAn6dwY0KnVhbnogsXu4XORb+2hb/va69v7R1VdJWLMGHd7EE5wfpT8oFZ7Q6NN85OhOtV9Q==", "dependencies": { - "@puppeteer/browsers": "2.0.0", - "chromium-bidi": "0.5.8", + "@puppeteer/browsers": "2.1.0", + "chromium-bidi": "0.5.9", "cross-fetch": "4.0.0", "debug": "4.3.4", - "devtools-protocol": "0.0.1232444", + "devtools-protocol": "0.0.1249869", "ws": "8.16.0" }, "engines": { @@ -8137,7 +8153,8 @@ }, "node_modules/queue-tick": { "version": "1.0.1", - "license": "MIT" + "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz", + "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==" }, "node_modules/quick-lru": { "version": "4.0.1", @@ -8772,7 +8789,6 @@ }, "node_modules/semver": { "version": "7.6.0", - "dev": true, "license": "ISC", "dependencies": { "lru-cache": "^6.0.0" @@ -9218,11 +9234,15 @@ } }, "node_modules/streamx": { - "version": "2.15.7", - "license": "MIT", + "version": "2.16.1", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.16.1.tgz", + "integrity": "sha512-m9QYj6WygWyWa3H1YY69amr4nVgy61xfjys7xO7kviL5rfIEc2naf+ewFiOA+aEJD7y0JO3h2GoiUv4TDwEGzQ==", "dependencies": { "fast-fifo": "^1.1.0", "queue-tick": "^1.0.1" + }, + "optionalDependencies": { + "bare-events": "^2.2.0" } }, "node_modules/string_decoder": { @@ -9420,17 +9440,22 @@ } }, "node_modules/tar-fs": { - "version": "3.0.4", - "license": "MIT", + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.5.tgz", + "integrity": "sha512-JOgGAmZyMgbqpLwct7ZV8VzkEB6pxXFBVErLtb+XCOqzc6w1xiWKI9GVd6bwk68EX7eJ4DWmfXVmq8K2ziZTGg==", "dependencies": { - "mkdirp-classic": "^0.5.2", "pump": "^3.0.0", "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^2.1.1", + "bare-path": "^2.1.0" } }, "node_modules/tar-fs/node_modules/tar-stream": { "version": "3.1.7", - "license": "MIT", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", "dependencies": { "b4a": "^1.6.4", "fast-fifo": "^1.2.0", @@ -10053,7 +10078,8 @@ }, "node_modules/urlpattern-polyfill": { "version": "10.0.0", - "license": "MIT" + "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz", + "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==" }, "node_modules/util-deprecate": { "version": "1.0.2", @@ -10370,7 +10396,6 @@ }, "node_modules/yallist": { "version": "4.0.0", - "dev": true, "license": "ISC" }, "node_modules/yargs": { @@ -10501,12 +10526,12 @@ }, "packages/meta-extraction": { "name": "@openreview/meta-extraction", - "version": "0.0.2", + "version": "0.0.3", "license": "MIT", "dependencies": { "htmltidy2": "^1.1.1", "lodash": "^4.17.21", - "puppeteer": "^22.0.0", + "puppeteer": "^22.2.0", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-anonymize-ua": "^2.4.6", "puppeteer-extra-plugin-stealth": "^2.11.2", diff --git a/packages/meta-extraction/package.json b/packages/meta-extraction/package.json index e8de5ba..36c7971 100644 --- a/packages/meta-extraction/package.json +++ b/packages/meta-extraction/package.json @@ -1,6 +1,6 @@ { "name": "@openreview/meta-extraction", - "version": "0.0.2", + "version": "0.0.3", "description": "Extract abstracts for DBLP papers", "main": "src/index.js", "type": "module", @@ -10,7 +10,7 @@ "dependencies": { "htmltidy2": "^1.1.1", "lodash": "^4.17.21", - "puppeteer": "^22.0.0", + "puppeteer": "^22.2.0", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-anonymize-ua": "^2.4.6", "puppeteer-extra-plugin-stealth": "^2.11.2", @@ -43,4 +43,4 @@ }, "homepage": "https://github.com/openreview/openreview-js#readme", "gitHead": "e83fe20886ca81f61c67c1884941b8df937c24c3" -} +} \ No newline at end of file diff --git a/packages/meta-extraction/src/abstractExtractionRules.js b/packages/meta-extraction/src/abstractExtractionRules.js index 2226539..059eea6 100644 --- a/packages/meta-extraction/src/abstractExtractionRules.js +++ b/packages/meta-extraction/src/abstractExtractionRules.js @@ -465,34 +465,8 @@ const aclanthologyRule = { } }; -const nipsCCRule = { - shouldApplyRule: (url) => /nips.cc/.test(url), - executeRule: async (html, page) => { - console.log(' run nips rule'); - - const highwirePressTags = await gatherHighwirePressTags(page); - const abstract = await selectElemTextEvidence(page, 'h4 + p'); - - const allEvidence = [ - ...highwirePressTags, - { type: 'abstract', value: abstract } - ]; - - const abstractEvidences = allEvidence.filter( - (p) => p?.type === 'abstract' && p.value - ); - - const longestAbstractEvidence = _.maxBy(abstractEvidences, 'value.length'); - return { - abstract:longestAbstractEvidence?.value, - pdf:allEvidence.find( - (p) => p?.type === 'pdf' && p.value - )?.value}; - } -}; - const neuripsCCRule = { - shouldApplyRule: (url) => /neurips.cc/.test(url), + shouldApplyRule: (url) => /nips.cc/.test(url) || /neurips.cc/.test(url), executeRule: async (html, page) => { console.log(' run neurips.cc rule'); const highwirePressTags = await gatherHighwirePressTags(page); @@ -691,7 +665,6 @@ const runAllRules = async (html, page, url) => { scienceDirectRule, aaaiOrgRule, aclanthologyRule, - nipsCCRule, neuripsCCRule, dlAcmOrgRule, ieeeXploreOrgRule, diff --git a/packages/meta-extraction/src/index.js b/packages/meta-extraction/src/index.js index 21e703d..6946b47 100644 --- a/packages/meta-extraction/src/index.js +++ b/packages/meta-extraction/src/index.js @@ -23,8 +23,8 @@ const extractAbstract = async (url, skipTidy = false) => { }); const page = await browserInstance.newPage(); - page.setDefaultNavigationTimeout(10_000); - page.setDefaultTimeout(10_000); + page.setDefaultNavigationTimeout(15_000); + page.setDefaultTimeout(15_000); page.setJavaScriptEnabled(enableJavaScript); await page.setRequestInterception(true); initRequestInterception(page, enableJavaScript, isRewritable);