From 8f9a0d22b7dcc8c21a1a0edf177a7161044e1fb2 Mon Sep 17 00:00:00 2001 From: Philippe Renzen Date: Fri, 29 Dec 2023 16:57:07 +0100 Subject: [PATCH] Fixes Implement paralel generators for each Iterator #4 Adds parallel approach to LDWorkbench with base of feature/3/mightymax branch --- src/lib/Generator.class.ts | 20 ++-- src/lib/LDWorkbenchConfiguration.d.ts | 112 ++++++++++++------ src/lib/Stage.class.ts | 78 +++++++----- src/lib/tests/Generator.class.test.ts | 81 ++++++++++--- src/lib/tests/Iterator.class.test.ts | 24 ++-- src/lib/tests/Pipeline.class.test.ts | 84 +++++++------ src/lib/tests/PreviousStage.class.test.ts | 48 ++++---- src/lib/tests/Stage.class.test.ts | 53 +++++---- src/utils/getEndpoint.ts | 5 +- src/utils/tests/static/correct/conf1.yml | 2 +- src/utils/tests/static/correct/conf2.yml | 2 +- src/utils/tests/static/duplicate/conf1.yml | 2 +- src/utils/tests/static/duplicate/conf2.yml | 2 +- src/utils/tests/static/single/conf.yml | 6 +- src/utils/tests/utilities.test.ts | 96 ++++++++------- static/example/config.yml | 9 +- ...ator-stage-1.rq => generator-stage-1-1.rq} | 0 static/example/generator-stage-1-2.rq | 10 ++ static/ld-workbench.schema.json | 40 ++++--- 19 files changed, 428 insertions(+), 246 deletions(-) rename static/example/{generator-stage-1.rq => generator-stage-1-1.rq} (100%) create mode 100644 static/example/generator-stage-1-2.rq diff --git a/src/lib/Generator.class.ts b/src/lib/Generator.class.ts index 5765a18..bd6ea4b 100644 --- a/src/lib/Generator.class.ts +++ b/src/lib/Generator.class.ts @@ -15,11 +15,11 @@ const DEFAULT_BATCH_SIZE = 10 declare interface Generator { on(event: "data", listener: (statement: Quad) => void): this; - on(event: "end", listener: (iterations: number, statements: number) => void): this; + on(event: "end", listener: (iterations: number, statements: number, processed: number) => void): this; on(event: "error", listener: (e: Error) => void): this; emit(event: "data", statement: Quad): boolean; - emit(event: "end", iterations: number, statements: number): boolean; + emit(event: "end", iterations: number, statements: number, processed: number): boolean; emit(event: "error", e: Error): boolean; } class Generator extends EventEmitter { @@ -31,17 +31,19 @@ class Generator extends EventEmitter { private source: string = '' private readonly $thisList: NamedNode[] = [] private readonly endpoint: Endpoint; - public constructor(private readonly stage: Stage) { + public constructor(private readonly stage: Stage, private readonly index: number) { + if (stage.configuration.generator === undefined) throw new Error('Error in Generator: no generators were present in stage configuration') super() + this.index = index this.query = getSPARQLQuery( - stage.configuration.generator.query, + stage.configuration.generator[this.index].query, "construct" ); this.endpoint = - stage.configuration.generator.endpoint === undefined + stage.configuration.generator[this.index].endpoint === undefined ? stage.iterator.endpoint - : getEndpoint(stage, "generator"); + : getEndpoint(stage, "generator", this.index); this.engine = getEngine(this.endpoint) @@ -56,7 +58,7 @@ class Generator extends EventEmitter { } private get batchSize(): number { - return this.stage.configuration.generator.batchSize ?? DEFAULT_BATCH_SIZE + return this.stage.configuration.generator[this.index].batchSize ?? DEFAULT_BATCH_SIZE } private $thisToBind($this: NamedNode): BindPattern { @@ -72,7 +74,7 @@ class Generator extends EventEmitter { variable: DataFactory.variable('this') } - } + } public run($this?: NamedNode, batchSize?: number): void { @@ -102,7 +104,7 @@ class Generator extends EventEmitter { }) stream.on('end', () => { if (this.iterationsIncoming !== undefined && this.iterationsProcessed >= this.iterationsIncoming) { - this.emit('end', this.iterationsIncoming, this.statements) + this.emit('end', this.iterationsIncoming, this.statements, this.iterationsProcessed) } }) }).catch(e => { diff --git a/src/lib/LDWorkbenchConfiguration.d.ts b/src/lib/LDWorkbenchConfiguration.d.ts index a0623b9..ff295a2 100644 --- a/src/lib/LDWorkbenchConfiguration.d.ts +++ b/src/lib/LDWorkbenchConfiguration.d.ts @@ -50,23 +50,45 @@ export interface LDWorkbenchConfiguration { */ batchSize?: number; }; - generator: { - /** - * Path (prefixed with "file://") or SPARQL Query - * that makes the generator using SPARQL construct. - */ - query: string; - /** - * The SPARQL endpoint for the generator. - * If it starts with "file://", a local RDF file is queried. - * If ommmitted the endpoint of the Iterator is used. - */ - endpoint?: string; - /** - * Overrule the generator's behaviour of fetching results for 10 bindings of $this per request. - */ - batchSize?: number; - }; + /** + * @minItems 1 + */ + generator: [ + { + /** + * Path (prefixed with "file://") or SPARQL Query + * that makes the generator using SPARQL construct. + */ + query: string; + /** + * The SPARQL endpoint for the generator. + * If it starts with "file://", a local RDF file is queried. + * If ommmitted the endpoint of the Iterator is used. + */ + endpoint?: string; + /** + * Overrule the generator's behaviour of fetching results for 10 bindings of $this per request. + */ + batchSize?: number; + }, + ...{ + /** + * Path (prefixed with "file://") or SPARQL Query + * that makes the generator using SPARQL construct. + */ + query: string; + /** + * The SPARQL endpoint for the generator. + * If it starts with "file://", a local RDF file is queried. + * If ommmitted the endpoint of the Iterator is used. + */ + endpoint?: string; + /** + * Overrule the generator's behaviour of fetching results for 10 bindings of $this per request. + */ + batchSize?: number; + }[] + ]; /** * The file where the results are saved. * This is not a required property, @@ -96,23 +118,45 @@ export interface LDWorkbenchConfiguration { */ batchSize?: number; }; - generator: { - /** - * Path (prefixed with "file://") or SPARQL Query - * that makes the generator using SPARQL construct. - */ - query: string; - /** - * The SPARQL endpoint for the generator. - * If it starts with "file://", a local RDF file is queried. - * If ommmitted the endpoint of the Iterator is used. - */ - endpoint?: string; - /** - * Overrule the generator's behaviour of fetching results for 10 bindings of $this per request. - */ - batchSize?: number; - }; + /** + * @minItems 1 + */ + generator: [ + { + /** + * Path (prefixed with "file://") or SPARQL Query + * that makes the generator using SPARQL construct. + */ + query: string; + /** + * The SPARQL endpoint for the generator. + * If it starts with "file://", a local RDF file is queried. + * If ommmitted the endpoint of the Iterator is used. + */ + endpoint?: string; + /** + * Overrule the generator's behaviour of fetching results for 10 bindings of $this per request. + */ + batchSize?: number; + }, + ...{ + /** + * Path (prefixed with "file://") or SPARQL Query + * that makes the generator using SPARQL construct. + */ + query: string; + /** + * The SPARQL endpoint for the generator. + * If it starts with "file://", a local RDF file is queried. + * If ommmitted the endpoint of the Iterator is used. + */ + endpoint?: string; + /** + * Overrule the generator's behaviour of fetching results for 10 bindings of $this per request. + */ + batchSize?: number; + }[] + ]; /** * The file where the results are saved. * This is not a required property, diff --git a/src/lib/Stage.class.ts b/src/lib/Stage.class.ts index bba5490..21e7fa6 100644 --- a/src/lib/Stage.class.ts +++ b/src/lib/Stage.class.ts @@ -25,23 +25,29 @@ declare interface Stage { class Stage extends EventEmitter { public destination: () => WriteStream public iterator: Iterator - public generator: Generator + public generators: Generator[] = [] + private totalProcessed: number public constructor( public readonly pipeline: Pipeline, public readonly configuration: LDWorkbenchConfiguration['stages'][0] ) { super() + this.totalProcessed = 0 try { this.iterator = new Iterator(this) } catch(e) { throw new Error(`Error in the iterator of stage \`${configuration.name}\`: ${(e as Error).message}`) } - try { - this.generator = new Generator(this) - } catch(e) { - throw new Error(`Error in the generator of stage \`${configuration.name}\`: ${(e as Error).message}`) + // Handle both single generator and array of generators + for (let index = 0; index < this.configuration.generator.length; index++) { + const generatorConfig = this.configuration.generator[index]; + try { + this.generators.push(new Generator({...this, generators: [generatorConfig]}, index)) + } catch(e) { + throw new Error(`Error in the generator of stage \`${configuration.name}\`: ${(e as Error).message}`) + } } this.destination = () => new File(this.destinationPath).getStream() } @@ -55,30 +61,44 @@ class Stage extends EventEmitter { } public run(): void { - let quadCount = 0 - // let iteratorCount = 0 - const writer = new Writer(this.destination(), { end: false, format: 'N-Triples' }) - - this.generator.on('data', quad => { - writer.addQuad(quad) - quadCount ++ - this.emit('generatorResult', quadCount) - }) - - this.generator.on('error', e => { - this.emit('error', e) - }) - - this.iterator.on('data', $this => { - this.generator.run($this) - this.emit('iteratorResult', $this) - }) - - this.iterator.on('error', e => { - this.emit('error', e) - }) - this.iterator.run() - } + const writer = new Writer(this.destination(), { end: false, format: 'N-Triples' }); + let quadCount = 0; + + this.generators.forEach(generator => { + generator.on('data', (quad) => { + writer.addQuad(quad); + quadCount++; + this.emit('generatorResult', quadCount); + }); + + generator.on('end', (iterationsIncoming, statements, processed) => { + this.totalProcessed += processed + if (this.totalProcessed >= (iterationsIncoming * this.configuration.generator.length)){ + this.emit('end', iterationsIncoming, statements); + } + }); + + generator.on('error', e => { + this.emit('error', e) + }) + }); + + this.iterator.on('data', ($this) => { + this.generators.forEach(generator => { + generator.run($this); + }); + this.emit('iteratorResult', $this); + }); + + this.iterator.on('error', e => { + this.emit('error', e) + }) + + // Start the iterator + this.iterator.run(); + + +} } diff --git a/src/lib/tests/Generator.class.test.ts b/src/lib/tests/Generator.class.test.ts index 5919eb6..a9f5bea 100644 --- a/src/lib/tests/Generator.class.test.ts +++ b/src/lib/tests/Generator.class.test.ts @@ -24,26 +24,28 @@ describe('Generator Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}) const stageConfig = configuration.stages[0] const stage = new Stage(pipeline, stageConfig) - const generator = new Generator(stage) + const generator = new Generator(stage, 0) expect(generator).to.be.an.instanceOf(Generator); expect(generator).to.be.an.instanceOf(EventEmitter); expect(generator).to.have.property('query'); @@ -52,8 +54,52 @@ describe('Generator Class', () => { expect(generator).to.have.property('source'); }); }); - // BUG when both the generator and iterator tests are running, it seems the iterator will never terminate describe('run', () => { + it('Should work with multiple generators in parallel for one pipeline', async function (){ + const filePath = 'pipelines/data/example-pipelineParallel.nt'; + + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://'+ filePath, + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'file://static/tests/iris.nt' + }, + generator: + [ + {query: 'file://static/example/generator-stage-1-1.rq'}, + {query: 'file://static/example/generator-stage-1-2.rq'} + ] + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq' + }, + generator: + [{ + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'file://static/tests/wikidata.nt' + }] + } + ] + } + // read file after pipeline has finished + const pipelineParallelGenerators = new Pipeline(config, {silent: true}) + pipelineParallelGenerators.validate() + pipelineParallelGenerators.run().then(() => { + const file = fs.readFileSync(filePath, {encoding: 'utf-8'}) + const fileLines = file.split('\n').sort() + expect(fileLines.length).to.equal(873) + expect(fileLines[0]).to.equal('') + expect(fileLines[1]).to.equal(' .') + expect(fileLines[fileLines.length - 1]).to.equal(' "Instance 150 of the Iris Virginica"@en .') + }).catch( e => { throw e } ) + }) it('Should work in batchSize for pipeline\'s generator', async function () { const filePath = 'pipelines/data/example-pipelineBatch.nt'; @@ -69,12 +115,13 @@ describe('Generator Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq', + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq', // adjust batchsize for test here batchSize: 7 } - } +] } ] } const pipelineBatch = new Pipeline(batchConfiguration, {silent: true}) @@ -103,26 +150,28 @@ describe('Generator Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}) const stageConfig = configuration.stages[0] const stage = new Stage(pipeline, stageConfig) - const generator = new Generator(stage); + const generator = new Generator(stage, 0); const emittedEvents: any[] = []; const testNamedNode = new NamedNode('https://triplydb.com/triply/iris/id/floweringPlant/00106'); diff --git a/src/lib/tests/Iterator.class.test.ts b/src/lib/tests/Iterator.class.test.ts index 532af9d..0753403 100644 --- a/src/lib/tests/Iterator.class.test.ts +++ b/src/lib/tests/Iterator.class.test.ts @@ -22,20 +22,22 @@ describe('Iterator Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}) @@ -66,20 +68,22 @@ describe('Iterator Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}) diff --git a/src/lib/tests/Pipeline.class.test.ts b/src/lib/tests/Pipeline.class.test.ts index 24bc8a4..3d07dbf 100644 --- a/src/lib/tests/Pipeline.class.test.ts +++ b/src/lib/tests/Pipeline.class.test.ts @@ -21,20 +21,22 @@ describe('Pipeline Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}); @@ -61,20 +63,22 @@ describe('Pipeline Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}); @@ -99,20 +103,22 @@ describe('Pipeline Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { iterator: { query: 'file://static/example/iterator-stage-2.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } as unknown as LDWorkbenchConfiguration const pipeline = new Pipeline(configuration, {silent: true}); @@ -155,19 +161,21 @@ describe('Pipeline Class', () => { iterator: { query: 'file://static/example/iterator-stage-1.rq', }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', } - } +] } ] } as unknown as LDWorkbenchConfiguration const pipeline = new Pipeline(invalidConfiguration, {silent: true}); @@ -198,20 +206,22 @@ describe('Pipeline Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 1', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configDuplicateStageName, {silent: true}); @@ -244,20 +254,22 @@ describe('Pipeline Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configDuplicateStageName, {silent: true}); @@ -287,20 +299,22 @@ describe('Pipeline Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}) diff --git a/src/lib/tests/PreviousStage.class.test.ts b/src/lib/tests/PreviousStage.class.test.ts index d8263ff..18ea014 100644 --- a/src/lib/tests/PreviousStage.class.test.ts +++ b/src/lib/tests/PreviousStage.class.test.ts @@ -21,20 +21,22 @@ describe('PreviousStage Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) @@ -61,20 +63,22 @@ describe('PreviousStage Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) @@ -97,21 +101,23 @@ describe('PreviousStage Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) @@ -138,21 +144,23 @@ describe('PreviousStage Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) diff --git a/src/lib/tests/Stage.class.test.ts b/src/lib/tests/Stage.class.test.ts index 4196dcb..944e61f 100644 --- a/src/lib/tests/Stage.class.test.ts +++ b/src/lib/tests/Stage.class.test.ts @@ -26,20 +26,22 @@ describe('Stage Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}) @@ -48,9 +50,9 @@ describe('Stage Class', () => { expect(stage).to.be.an.instanceOf(Stage); expect(stage).to.have.property('destination'); expect(stage).to.have.property('iterator'); - expect(stage).to.have.property('generator'); + expect(stage).to.have.property('generators'); expect(stage.iterator).to.be.an.instanceOf(Iterator); - expect(stage.generator).to.be.an.instanceOf(Generator); + expect(stage.generators[0]).to.be.an.instanceOf(Generator); expect(stage.pipeline).to.be.an.instanceOf(Pipeline); expect(stage).to.have.property('pipeline', pipeline); expect(stage).to.have.property('configuration'); @@ -70,20 +72,22 @@ describe('Stage Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}) @@ -107,20 +111,22 @@ describe('Stage Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}) @@ -133,7 +139,6 @@ describe('Stage Class', () => { // BUG throws error when in combined test on stage's Iterator, when set to only it will pass. describe.skip('run', () => { it('should run the stage correctly', async function () { - this.timeout(5000) const configuration: LDWorkbenchConfiguration = { name: 'Example Pipeline', description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', @@ -145,20 +150,22 @@ describe('Stage Class', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(configuration, {silent: true}) diff --git a/src/utils/getEndpoint.ts b/src/utils/getEndpoint.ts index cefb792..6b469f9 100644 --- a/src/utils/getEndpoint.ts +++ b/src/utils/getEndpoint.ts @@ -7,10 +7,11 @@ import { isFilePathString } from './guards.js'; export default function getEndpoint( stage: Stage, - type: 'iterator' | 'generator' = 'iterator' + type: 'iterator' | 'generator' = 'iterator', + index?: number ): Endpoint { const t: keyof LDWorkbenchConfiguration['stages'][0] = type - const endpoint = stage.configuration[t].endpoint + const endpoint = t === "generator" ? stage.configuration[t]?.[index!]?.endpoint : stage.configuration[t]?.endpoint; if (isFilePathString(endpoint)) { return new File(endpoint); } else if (endpoint !== undefined) { diff --git a/src/utils/tests/static/correct/conf1.yml b/src/utils/tests/static/correct/conf1.yml index 9067715..69c6b9a 100644 --- a/src/utils/tests/static/correct/conf1.yml +++ b/src/utils/tests/static/correct/conf1.yml @@ -15,4 +15,4 @@ stages: query: file://static/example/iterator-stage-1.rq endpoint: file://static/tests/iris.nt generator: - query: file://static/example/generator-stage-1.rq \ No newline at end of file + - query: file://static/example/generator-stage-1-1.rq \ No newline at end of file diff --git a/src/utils/tests/static/correct/conf2.yml b/src/utils/tests/static/correct/conf2.yml index e6c4ca6..7fa287d 100644 --- a/src/utils/tests/static/correct/conf2.yml +++ b/src/utils/tests/static/correct/conf2.yml @@ -15,4 +15,4 @@ stages: query: file://static/example/iterator-stage-1.rq endpoint: file://static/tests/iris.nt generator: - query: file://static/example/generator-stage-1.rq \ No newline at end of file + - query: file://static/example/generator-stage-1-1.rq \ No newline at end of file diff --git a/src/utils/tests/static/duplicate/conf1.yml b/src/utils/tests/static/duplicate/conf1.yml index 6c521e3..e49fdc6 100644 --- a/src/utils/tests/static/duplicate/conf1.yml +++ b/src/utils/tests/static/duplicate/conf1.yml @@ -15,5 +15,5 @@ stages: query: file://static/example/iterator-stage-1.rq endpoint: file://static/tests/iris.nt generator: - query: file://static/example/generator-stage-1.rq + - query: file://static/example/generator-stage-1-1.rq diff --git a/src/utils/tests/static/duplicate/conf2.yml b/src/utils/tests/static/duplicate/conf2.yml index 00e9cce..f922b51 100644 --- a/src/utils/tests/static/duplicate/conf2.yml +++ b/src/utils/tests/static/duplicate/conf2.yml @@ -15,4 +15,4 @@ stages: query: file://static/example/iterator-stage-1.rq endpoint: file://static/tests/iris.nt generator: - query: file://static/example/generator-stage-1.rq \ No newline at end of file + - query: file://static/example/generator-stage-1-1.rq \ No newline at end of file diff --git a/src/utils/tests/static/single/conf.yml b/src/utils/tests/static/single/conf.yml index 34c5563..31a0e82 100644 --- a/src/utils/tests/static/single/conf.yml +++ b/src/utils/tests/static/single/conf.yml @@ -15,10 +15,10 @@ stages: query: file://static/example/iterator-stage-1.rq endpoint: file://static/tests/iris.nt generator: - query: file://static/example/generator-stage-1.rq + - query: file://static/example/generator-stage-1-1.rq - name: "Stage 2" iterator: query: file://static/example/iterator-stage-2.rq generator: - query: file://static/example/generator-stage-2.rq - endpoint: file://static/tests/wikidata.nt + - query: file://static/example/generator-stage-2.rq + endpoint: file://static/tests/wikidata.nt diff --git a/src/utils/tests/utilities.test.ts b/src/utils/tests/utilities.test.ts index d13df13..110bafb 100644 --- a/src/utils/tests/utilities.test.ts +++ b/src/utils/tests/utilities.test.ts @@ -88,20 +88,22 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } expect(isConfiguration(configuration)).to.equal(true) @@ -208,8 +210,9 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { query: 'file://static/example/generator-stage-1.rq' } - } + generator: [ +{ query: 'file://static/example/generator-stage-1-1.rq' } +] } ] } ], @@ -226,8 +229,9 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { query: 'file://static/example/generator-stage-1.rq' } - } + generator: [ +{ query: 'file://static/example/generator-stage-1-1.rq' } +] } ] } ] @@ -250,18 +254,20 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { query: 'file://static/example/generator-stage-1.rq' } - }, + generator: [ +{ query: 'file://static/example/generator-stage-1-1.rq' } +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } ]] @@ -292,10 +298,11 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: filePath }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) @@ -317,10 +324,11 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: url }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) @@ -342,10 +350,11 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: url }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) @@ -366,10 +375,11 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) @@ -389,20 +399,22 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: url }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) @@ -435,20 +447,22 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) @@ -481,20 +495,22 @@ describe('Utilities', () => { query: 'file://static/example/iterator-stage-1.rq', endpoint: 'file://static/tests/iris.nt' }, - generator: { - query: 'file://static/example/generator-stage-1.rq' + generator: [ +{ + query: 'file://static/example/generator-stage-1-1.rq' } - }, +] }, { name: 'Stage 2', iterator: { query: 'file://static/example/iterator-stage-2.rq', }, - generator: { + generator: [ +{ query: 'file://static/example/generator-stage-2.rq', endpoint: 'file://static/tests/wikidata.nt' } - } +] } ] } const pipeline = new Pipeline(config, {silent: true}) diff --git a/static/example/config.yml b/static/example/config.yml index e9d7fc0..098ebe5 100644 --- a/static/example/config.yml +++ b/static/example/config.yml @@ -15,10 +15,13 @@ stages: query: file://static/example/iterator-stage-1.rq endpoint: https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql generator: - query: file://static/example/generator-stage-1.rq + # First generator + - query: file://static/example/generator-stage-1-1.rq + # Second generator + - query: file://static/example/generator-stage-1-2.rq - name: "Stage 2" iterator: query: file://static/example/iterator-stage-2.rq generator: - query: file://static/example/generator-stage-2.rq - endpoint: https://query.wikidata.org/sparql + - query: file://static/example/generator-stage-2.rq + endpoint: https://query.wikidata.org/sparql diff --git a/static/example/generator-stage-1.rq b/static/example/generator-stage-1-1.rq similarity index 100% rename from static/example/generator-stage-1.rq rename to static/example/generator-stage-1-1.rq diff --git a/static/example/generator-stage-1-2.rq b/static/example/generator-stage-1-2.rq new file mode 100644 index 0000000..8095e02 --- /dev/null +++ b/static/example/generator-stage-1-2.rq @@ -0,0 +1,10 @@ +prefix sdo: +prefix dbo: +prefix rdf: +prefix rdfs: +construct { + ?this rdfs:comment "This was generated using LDWorkbench"; +} +where { + $this a/rdfs:subClassOf* dbo:Plant +} diff --git a/static/ld-workbench.schema.json b/static/ld-workbench.schema.json index 2a07c0e..166e0fe 100644 --- a/static/ld-workbench.schema.json +++ b/static/ld-workbench.schema.json @@ -51,25 +51,29 @@ } }, "generator": { - "type": "object", - "additionalProperties": false, - "required": ["query"], - "properties": { - "query": { - "type": "string", - "description": "Path (prefixed with \"file://\") or SPARQL Query \nthat makes the generator using SPARQL construct." - }, - "endpoint": { - "type": "string", - "description": "The SPARQL endpoint for the generator. \nIf it starts with \"file://\", a local RDF file is queried.\nIf ommmitted the endpoint of the Iterator is used." - }, - "batchSize": { - "type": "number", - "minimum": 1, - "description": "Overrule the generator's behaviour of fetching results for 10 bindings of $this per request." + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["query"], + "properties": { + "query": { + "type": "string", + "description": "Path (prefixed with \"file://\") or SPARQL Query \nthat makes the generator using SPARQL construct." + }, + "endpoint": { + "type": "string", + "description": "The SPARQL endpoint for the generator. \nIf it starts with \"file://\", a local RDF file is queried.\nIf ommmitted the endpoint of the Iterator is used." + }, + "batchSize": { + "type": "number", + "minimum": 1, + "description": "Overrule the generator's behaviour of fetching results for 10 bindings of $this per request." + } } - } - }, + } + }, "destination": { "type": "string", "description": "The file where the results are saved. \nThis is not a required property, \nif ommitted a temporary file will be created automatically."