diff --git a/README.md b/README.md index 824578a..d6e7310 100644 --- a/README.md +++ b/README.md @@ -140,9 +140,11 @@ pipeline itself. ### Vocabularies +The vocabularies that the dataset’s predicates refer to: + ```ttl a void:Dataset; - void:vocabulary , , . + void:vocabulary , . ``` ### Distributions diff --git a/jest.config.js b/jest.config.js index a1dbeda..d1d314c 100644 --- a/jest.config.js +++ b/jest.config.js @@ -10,10 +10,10 @@ export default { coverageReporters: ['json-summary', 'text'], coverageThreshold: { global: { - lines: 20.19, - statements: 20.19, - branches: 15.23, - functions: 17.39, + lines: 18.89, + statements: 18.89, + branches: 14.54, + functions: 16.32, }, }, transform: { diff --git a/src/analyzer/vocabulary.ts b/src/analyzer/vocabulary.ts new file mode 100644 index 0000000..8765b82 --- /dev/null +++ b/src/analyzer/vocabulary.ts @@ -0,0 +1,55 @@ +import {Analyzer} from '../analyzer.js'; +import {Dataset} from '../dataset.js'; +import {DataFactory} from 'n3'; +import {Failure, NotSupported, Success} from '../pipeline.js'; +import namedNode = DataFactory.namedNode; + +const vocabularyPrefixes = new Map([ + ['http://schema.org/', 'http://schema.org'], + ['https://schema.org/', 'http://schema.org'], + [ + 'https://www.ica.org/standards/RiC/ontology#', + 'https://www.ica.org/standards/RiC/ontology', + ], + ['http://www.cidoc-crm.org/cidoc-crm/', 'http://www.cidoc-crm.org/cidoc-crm'], + ['http://purl.org/ontology/bibo/', 'http://purl.org/ontology/bibo/'], + ['http://purl.org/dc/elements/1.1/', 'http://purl.org/dc/elements/1.1/'], + ['http://purl.org/dc/terms/', 'http://purl.org/dc/terms/'], + ['http://purl.org/dc/dcmitype/', 'http://purl.org/dc/dcmitype/'], + [ + 'http://www.w3.org/2004/02/skos/core#', + 'http://www.w3.org/2004/02/skos/core#', + ], + ['http://xmlns.com/foaf/0.1/', 'http://xmlns.com/foaf/0.1/'], +]); + +export class VocabularyAnalyzer implements Analyzer { + constructor(private readonly decorated: Analyzer) {} + + async execute(dataset: Dataset): Promise { + const result = await this.decorated.execute(dataset); + if (result instanceof NotSupported || result instanceof Failure) { + return result; + } + + for (const quad of result.data) { + if ('http://rdfs.org/ns/void#property' === quad.predicate.value) { + const match = [...vocabularyPrefixes].find(([prefix]) => + quad.object.value.startsWith(prefix) + ); + if (match) { + const [, vocabulary] = match; + result.data.add( + DataFactory.quad( + namedNode(dataset.iri), + namedNode('http://rdfs.org/ns/void#vocabulary'), + namedNode(vocabulary) + ) + ); + } + } + } + + return new Success(result.data); + } +} diff --git a/src/main.ts b/src/main.ts index 373f18f..3bee02c 100644 --- a/src/main.ts +++ b/src/main.ts @@ -11,6 +11,7 @@ import {SparqlWriter} from './writer/sparql.js'; import {config} from './config.js'; import {RdfDumpImporter} from './importer.js'; import {GraphDBClient} from './graphdb.js'; +import {VocabularyAnalyzer} from './analyzer/vocabulary.js'; const queryEngine = new QueryEngine(); new Pipeline({ @@ -37,7 +38,6 @@ new Pipeline({ }) ), await SparqlQueryAnalyzer.fromFile(queryEngine, 'class-partition.rq'), - await SparqlQueryAnalyzer.fromFile(queryEngine, 'entity-properties.rq'), await SparqlQueryAnalyzer.fromFile(queryEngine, 'object-literals.rq'), await SparqlQueryAnalyzer.fromFile(queryEngine, 'object-uris.rq'), await SparqlQueryAnalyzer.fromFile(queryEngine, 'properties.rq'), @@ -47,6 +47,9 @@ new Pipeline({ new UriSpaceAnalyzer( await SparqlQueryAnalyzer.fromFile(queryEngine, 'object-uri-space.rq') ), + new VocabularyAnalyzer( + await SparqlQueryAnalyzer.fromFile(queryEngine, 'entity-properties.rq') + ), ], writers: [ new FileWriter(),