From b4b7880b2ed7b60832e88ef82a29c0ec33aaa1d6 Mon Sep 17 00:00:00 2001 From: David de Boer Date: Thu, 23 Nov 2023 15:51:51 +0100 Subject: [PATCH] feat: Extend crawler logging --- jest.config.js | 4 ++-- src/crawler.ts | 9 ++++++++- src/graphdb.ts | 4 ++-- src/registration.ts | 2 +- test/mock.ts | 2 +- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/jest.config.js b/jest.config.js index 92aac4e3..abe8079b 100644 --- a/jest.config.js +++ b/jest.config.js @@ -10,8 +10,8 @@ export default { coverageReporters: ['json-summary', 'text'], coverageThreshold: { global: { - lines: 71.6, - statements: 71.63, + lines: 71.8, + statements: 71.83, branches: 63.3, functions: 69.72, }, diff --git a/src/crawler.ts b/src/crawler.ts index 4272430a..d3758af3 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -23,7 +23,7 @@ export class Crawler { const registrations = await this.registrationStore.findRegistrationsReadBefore(dateLastRead); for (const registration of registrations) { - this.logger.info(`Crawling registration URL ${registration.url}`); + this.logger.info(`Crawling registration URL ${registration.url}...`); let datasets: DatasetExt[] = []; let statusCode = 200; let isValid = false; @@ -33,6 +33,7 @@ export class Crawler { const validationResult = await this.validator.validate(data); isValid = validationResult.state === 'valid'; if (isValid) { + this.logger.info(`${registration.url} passes validation`); datasets = await fetch(registration.url); await this.datasetStore.store(datasets); datasets.map(async dataset => { @@ -40,14 +41,20 @@ export class Crawler { const rating = rate(dcatValidationResult as Valid); await this.ratingStore.store(extractIri(dataset), rating); }); + } else { + this.logger.info(`${registration.url} does not pass validation`); } } catch (e) { if (e instanceof HttpError) { statusCode = e.statusCode; + this.logger.info( + `${registration.url} returned HTTP error ${statusCode}` + ); } if (e instanceof NoDatasetFoundAtUrl) { // Request was successful, but no datasets exist any longer at the URL. + this.logger.info(`${registration.url} has no datasets`); } } diff --git a/src/graphdb.ts b/src/graphdb.ts index 2b39d9a3..42fe29b1 100644 --- a/src/graphdb.ts +++ b/src/graphdb.ts @@ -154,7 +154,7 @@ export class GraphDbRegistrationStore implements RegistrationStore { constructor(private client: GraphDbClient) {} - async store(registration: Registration) { + async store(registration: Registration): Promise { const quads = [ this.registrationQuad( registration, @@ -268,7 +268,7 @@ export class GraphDbRegistrationStore implements RegistrationStore { url: '/statements', body: result, }); - resolve(null); + resolve(); } catch (e) { reject(e); } diff --git a/src/registration.ts b/src/registration.ts index 50aa8ba2..b90af27a 100644 --- a/src/registration.ts +++ b/src/registration.ts @@ -52,7 +52,7 @@ export interface RegistrationStore { /** * Store a {@see Registration}, replacing any Registrations with the same URL. */ - store(registration: Registration): void; + store(registration: Registration): Promise; findRegistrationsReadBefore(date: Date): Promise; } diff --git a/test/mock.ts b/test/mock.ts index 05c26ce2..d1d41215 100644 --- a/test/mock.ts +++ b/test/mock.ts @@ -31,7 +31,7 @@ export class MockRegistrationStore implements RegistrationStore { ); } - store(registration: Registration): void { + async store(registration: Registration): Promise { this.registrations.set(registration.url, registration); } }