Skip to content

Commit

Permalink
Data warehouse step
Browse files Browse the repository at this point in the history
  • Loading branch information
fmendezh committed Sep 24, 2024
1 parent c919b8a commit 4ca6a9a
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 13 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
</parent>

<artifactId>gbif-api</artifactId>
<version>1.17.0-SNAPSHOT</version>
<version>1.17.0-ICEBERG-SNAPSHOT</version>
<packaging>jar</packaging>

<name>GBIF Common :: API</name>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ private PipelinesWorkflow() {
OCCURRENCE_WF_GRAPH.addNode(VERBATIM_TO_INTERPRETED, INTERPRETED_TO_INDEX);
OCCURRENCE_WF_GRAPH.addNode(VERBATIM_TO_INTERPRETED, HDFS_VIEW);
OCCURRENCE_WF_GRAPH.addNode(VERBATIM_TO_INTERPRETED, FRAGMENTER);
// 4
OCCURRENCE_WF_GRAPH.addNode(HDFS_VIEW, DATA_WAREHOUSE);

// Pipelines event-occurrence workflow
// 1
Expand All @@ -66,13 +68,17 @@ private PipelinesWorkflow() {
// 4
EVENT_OCCURRENCE_WF_GRAPH.addNode(EVENTS_VERBATIM_TO_INTERPRETED, EVENTS_INTERPRETED_TO_INDEX);
EVENT_OCCURRENCE_WF_GRAPH.addNode(EVENTS_VERBATIM_TO_INTERPRETED, EVENTS_HDFS_VIEW);
// 5
EVENT_OCCURRENCE_WF_GRAPH.addNode(EVENTS_HDFS_VIEW, DATA_WAREHOUSE);

// Pipelines event only workflow
// 1
EVENT_WF_GRAPH.addNode(DWCA_TO_VERBATIM, EVENTS_VERBATIM_TO_INTERPRETED);
// 2
EVENT_WF_GRAPH.addNode(EVENTS_VERBATIM_TO_INTERPRETED, EVENTS_INTERPRETED_TO_INDEX);
EVENT_WF_GRAPH.addNode(EVENTS_VERBATIM_TO_INTERPRETED, EVENTS_HDFS_VIEW);
// 3
EVENT_WF_GRAPH.addNode(EVENTS_HDFS_VIEW, DATA_WAREHOUSE);

// Pipelines validator workflow
// 1
Expand Down
11 changes: 8 additions & 3 deletions src/main/java/org/gbif/api/model/pipelines/StepType.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@
*/
package org.gbif.api.model.pipelines;

import java.util.EnumSet;
import java.util.Set;

import lombok.AllArgsConstructor;
import lombok.Getter;

import org.gbif.api.annotation.Experimental;

/** Enum to represent the pipelines step names. */
@Getter
@AllArgsConstructor
Expand All @@ -37,6 +40,8 @@ public enum StepType {
VERBATIM_TO_INTERPRETED("verbatimToInterpreted"),
INTERPRETED_TO_INDEX("interpretedToIndex"),
HDFS_VIEW("hdfsView"),
@Experimental
DATA_WAREHOUSE("dataWarehouse"),

// Event interpretation
EVENTS_VERBATIM_TO_INTERPRETED("eventsVerbatimToInterpreted"),
Expand All @@ -57,13 +62,13 @@ public enum StepType {
private final String label;

private static final Set<StepType> VERBATIM_TYPE =
Set.of(TO_VERBATIM, DWCA_TO_VERBATIM, XML_TO_VERBATIM, ABCD_TO_VERBATIM, FRAGMENTER);
EnumSet.of(TO_VERBATIM, DWCA_TO_VERBATIM, XML_TO_VERBATIM, ABCD_TO_VERBATIM, FRAGMENTER);

private static final Set<StepType> OCCURRENCE_TYPE =
Set.of(VERBATIM_TO_IDENTIFIER, VERBATIM_TO_INTERPRETED, INTERPRETED_TO_INDEX, HDFS_VIEW);
EnumSet.of(VERBATIM_TO_IDENTIFIER, VERBATIM_TO_INTERPRETED, INTERPRETED_TO_INDEX, HDFS_VIEW);

private static final Set<StepType> EVENT_TYPE =
Set.of(EVENTS_VERBATIM_TO_INTERPRETED, EVENTS_INTERPRETED_TO_INDEX, EVENTS_HDFS_VIEW);
EnumSet.of(EVENTS_VERBATIM_TO_INTERPRETED, EVENTS_INTERPRETED_TO_INDEX, EVENTS_HDFS_VIEW);

public static boolean isEventType(StepType type) {
return EVENT_TYPE.contains(type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ public void findAllStepsToRunTest() {

Set<StepType> allNodesFor = PipelinesWorkflow.getOccurrenceWorkflow().getAllNodesFor(types);

Assertions.assertEquals(5, allNodesFor.size());
Assertions.assertEquals(6, allNodesFor.size());
Assertions.assertTrue(allNodesFor.containsAll(
Arrays.asList(VERBATIM_TO_IDENTIFIER, VERBATIM_TO_INTERPRETED, HDFS_VIEW, INTERPRETED_TO_INDEX, FRAGMENTER)));
Arrays.asList(VERBATIM_TO_IDENTIFIER, VERBATIM_TO_INTERPRETED, HDFS_VIEW, INTERPRETED_TO_INDEX, FRAGMENTER, DATA_WAREHOUSE)));
}

@Test
Expand All @@ -51,7 +51,7 @@ public void findAllStepsToRunSameLevelTest() {

Set<StepType> allNodesFor = PipelinesWorkflow.getEventOccurrenceWorkflow().getAllNodesFor(types);

Assertions.assertEquals(5, allNodesFor.size());
Assertions.assertEquals(6, allNodesFor.size());
Assertions.assertTrue(allNodesFor.containsAll(
Arrays.asList(HDFS_VIEW, INTERPRETED_TO_INDEX, EVENTS_VERBATIM_TO_INTERPRETED, EVENTS_HDFS_VIEW,
EVENTS_INTERPRETED_TO_INDEX)));
Expand Down Expand Up @@ -98,7 +98,7 @@ public void eventOccurrenceWorkflowTest() {

Graph<StepType> wf = PipelinesWorkflow.getEventOccurrenceWorkflow();

Assertions.assertEquals(9, wf.getNodesQuantity());
Assertions.assertEquals(10, wf.getNodesQuantity());

Assertions.assertEquals(1, wf.getLevel(DWCA_TO_VERBATIM));
Assertions.assertEquals(2, wf.getLevel(VERBATIM_TO_IDENTIFIER));
Expand Down Expand Up @@ -152,20 +152,21 @@ public void eventOccurrenceWorkflowTest() {

// EVENTS_HDFS_VIEW -> 0
List<Graph<StepType>.Edge> eventsHdfsViewEdges = wf.getNodeEdges(EVENTS_HDFS_VIEW);
Assertions.assertEquals(0, eventsHdfsViewEdges.size());
Assertions.assertEquals(1, eventsHdfsViewEdges.size());
}

@Test
public void eventOnlyWorkflowTest() {

Graph<StepType> wf = PipelinesWorkflow.getEventWorkflow();

Assertions.assertEquals(4, wf.getNodesQuantity());
Assertions.assertEquals(5, wf.getNodesQuantity());

Assertions.assertEquals(1, wf.getLevel(DWCA_TO_VERBATIM));
Assertions.assertEquals(2, wf.getLevel(EVENTS_VERBATIM_TO_INTERPRETED));
Assertions.assertEquals(3, wf.getLevel(EVENTS_INTERPRETED_TO_INDEX));
Assertions.assertEquals(3, wf.getLevel(EVENTS_HDFS_VIEW));
Assertions.assertEquals(4, wf.getLevel(DATA_WAREHOUSE));

// DWCA_TO_VERBATIM -> EVENTS_VERBATIM_TO_INTERPRETED
List<Graph<StepType>.Edge> dwcaToVerbatimEdges = wf.getNodeEdges(DWCA_TO_VERBATIM);
Expand All @@ -184,7 +185,7 @@ public void eventOnlyWorkflowTest() {

// EVENTS_HDFS_VIEW -> 0
List<Graph<StepType>.Edge> eventsHdfsViewEdges = wf.getNodeEdges(EVENTS_HDFS_VIEW);
Assertions.assertEquals(0, eventsHdfsViewEdges.size());
Assertions.assertEquals(1, eventsHdfsViewEdges.size());
}

@Test
Expand All @@ -200,8 +201,9 @@ public void occurrenceWorkflowTest() {
Assertions.assertEquals(4, wf.getLevel(INTERPRETED_TO_INDEX));
Assertions.assertEquals(4, wf.getLevel(HDFS_VIEW));
Assertions.assertEquals(4, wf.getLevel(FRAGMENTER));
Assertions.assertEquals(5, wf.getLevel(DATA_WAREHOUSE));

Assertions.assertEquals(8, wf.getNodesQuantity());
Assertions.assertEquals(9, wf.getNodesQuantity());

// DWCA_TO_VERBATIM -> VERBATIM_TO_IDENTIFIER
List<Graph<StepType>.Edge> dwcaToVerbatimEdges = wf.getNodeEdges(DWCA_TO_VERBATIM);
Expand Down Expand Up @@ -236,7 +238,7 @@ public void occurrenceWorkflowTest() {

// HDFS_VIEW -> 0
List<Graph<StepType>.Edge> hdfsViewEdges = wf.getNodeEdges(HDFS_VIEW);
Assertions.assertEquals(0, hdfsViewEdges.size());
Assertions.assertEquals(1, hdfsViewEdges.size());

// FRAGMENTER -> 0
List<Graph<StepType>.Edge> fragmenterEdges = wf.getNodeEdges(FRAGMENTER);
Expand Down

0 comments on commit 4ca6a9a

Please sign in to comment.