Skip to content

Commit

Permalink
test: add test that checks that we do not regress re/ non-standard co…
Browse files Browse the repository at this point in the history
…lumn names
  • Loading branch information
Taepper committed May 24, 2024
1 parent 340f8ab commit 9326f7a
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 1 deletion.
26 changes: 25 additions & 1 deletion src/silo/preprocessing/preprocessor.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,29 @@ const Scenario NDJSON_WITH_SQL_KEYWORD_AS_FIELD = {
)
};

const Scenario NDJSON_WITH_NUMERIC_NAMES = {
.input_directory = "testBaseData/numericNames/",
.expected_sequence_count = 2,
.query = R"(
{
"action": {
"type": "Aggregated",
"groupByFields": ["2"],
"orderByFields": ["2"]
},
"filterExpression": {
"type": "True"
}
}
)",
.expected_query_result = nlohmann::json::parse(
R"([
{"count": 1, "2": null},
{"count": 1, "2": "google.com"}
])"
)
};

const Scenario TSV_FILE_WITH_SQL_KEYWORD_AS_FIELD = {
.input_directory = "testBaseData/tsvWithSqlKeywordField/",
.expected_sequence_count = NDJSON_WITH_SQL_KEYWORD_AS_FIELD.expected_sequence_count,
Expand All @@ -99,7 +122,8 @@ INSTANTIATE_TEST_SUITE_P(
FASTA_FILES_WITH_MISSING_SEGMENTS_AND_GENES,
NDJSON_FILE_WITH_MISSING_SEGMENTS_AND_GENES,
NDJSON_WITH_SQL_KEYWORD_AS_FIELD,
TSV_FILE_WITH_SQL_KEYWORD_AS_FIELD
TSV_FILE_WITH_SQL_KEYWORD_AS_FIELD,
NDJSON_WITH_NUMERIC_NAMES
),
printTestName
);
Expand Down
31 changes: 31 additions & 0 deletions testBaseData/numericNames/database_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
schema:
instanceName: Test
metadata:
- name: date
type: date
- name: dateSubmitted
type: date
- name: "2"
type: string
- name: age
type: int
- name: sex
type: string
- name: pangoLineage
type: pango_lineage
- name: qc
type: float
- name: accession
type: string
- name: version
type: int
- name: submissionId
type: string
- name: accessionVersion
type: string
- name: isRevocation
type: string
- name: versionStatus
type: string
primaryKey: accessionVersion
dateToSortBy: date
2 changes: 2 additions & 0 deletions testBaseData/numericNames/input_file.ndjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"metadata":{"qc":0.9,"age":42,"sex":null,"date":"2002-12-15","2":"google.com","pangoLineage":"XBB.1.5","dateSubmitted":null,"accession":"1","version":1,"submissionId":"custom0","accessionVersion":"1.1","isRevocation":"false","versionStatus":"REVOKED"},"unalignedNucleotideSequences":{"main":"NNACTGNN","3":null},"alignedNucleotideSequences":{"main":"ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCT","3":null},"nucleotideInsertions":{"main":["123:ACTG"],"3":[]},"alignedAminoAcidSequences":{"someLongGene":"ACDEFGHIKLMNPQRSTVWYBZX-*","someShortGene":"MADS"},"aminoAcidInsertions":{"someLongGene":["123:RNRNRN"],"someShortGene":["123:RN"]}}
{"metadata":{"qc":null,"age":null,"sex":null,"date":null,"2":null,"pangoLineage":null,"dateSubmitted":null,"accession":"1","version":3,"submissionId":"custom0","accessionVersion":"1.3","isRevocation":"true","versionStatus":"REVISED"},"unalignedNucleotideSequences":{"main":null,"3":null},"alignedNucleotideSequences":{"main":null,"3":null},"nucleotideInsertions":{"main":[],"3":[]},"alignedAminoAcidSequences":{"someLongGene":null,"someShortGene":null},"aminoAcidInsertions":{"someLongGene":[],"someShortGene":[]}}
4 changes: 4 additions & 0 deletions testBaseData/numericNames/preprocessing_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
inputDirectory: "testBaseData/numericNames"
ndjsonInputFilename: "input_file.ndjson"
referenceGenomeFilename: "reference_genomes.json"
preprocessingDatabaseLocation: "debug.duckdb"
22 changes: 22 additions & 0 deletions testBaseData/numericNames/reference_genomes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"nucleotideSequences": [
{
"name": "main",
"sequence": "ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCT"
},
{
"name": "3",
"sequence": "AAAAAAAAAAAAAAAA"
}
],
"genes": [
{
"name": "someLongGene",
"sequence": "AAAAAAAAAAAAAAAAAAAAAAAAA"
},
{
"name": "someShortGene",
"sequence": "MADS"
}
]
}

0 comments on commit 9326f7a

Please sign in to comment.