diff --git a/include/silo/query_engine/query_parse_sequence_name.h b/include/silo/query_engine/query_parse_sequence_name.h new file mode 100644 index 000000000..ff8996411 --- /dev/null +++ b/include/silo/query_engine/query_parse_sequence_name.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include + +#include "silo/database.h" + +namespace silo { + +template +std::string validateSequenceName(std::string sequence_name, const silo::Database& database) { + CHECK_SILO_QUERY( + database.getSequenceStores().contains(sequence_name), + fmt::format( + "Database does not contain the {} Sequence with name: '{}'", + SymbolType::SYMBOL_NAME, + sequence_name + ) + ); + return sequence_name; +} + +template +std::string validateSequenceNameOrGetDefault( + std::optional sequence_name, + const silo::Database& database +) { + if (sequence_name.has_value()) { + return validateSequenceName(sequence_name.value(), database); + } + + CHECK_SILO_QUERY( + database.getDefaultSequenceName().has_value(), + "The database has no default " + std::string(SymbolType::SYMBOL_NAME_LOWER_CASE) + + " sequence name" + ); + + const auto default_sequence_name = database.getDefaultSequenceName().value(); + return validateSequenceName(default_sequence_name, database); +} + +} // namespace silo \ No newline at end of file diff --git a/include/silo/test/query_fixture.test.h b/include/silo/test/query_fixture.test.h index 20f9b809f..e918ffaf7 100644 --- a/include/silo/test/query_fixture.test.h +++ b/include/silo/test/query_fixture.test.h @@ -58,11 +58,20 @@ namespace silo::test { \ TEST_P(TEST_SUITE_NAME##FixtureAlias, testQuery) { \ const auto scenario = GetParam(); \ - const auto result = query_engine.executeQuery(nlohmann::to_string(scenario.query)); \ - const auto actual = nlohmann::json(result.query_result); \ - ASSERT_EQ(actual, scenario.expected_query_result); \ + if (!scenario.expected_error_message.empty()) { \ + try { \ + const auto result = query_engine.executeQuery(nlohmann::to_string(scenario.query)); \ + FAIL() << "Expected an error in test case, but noting was thrown"; \ + } catch (const std::exception& e) { \ + EXPECT_EQ(std::string(e.what()), scenario.expected_error_message); \ + } \ + } else { \ + const auto result = query_engine.executeQuery(nlohmann::to_string(scenario.query)); \ + const auto actual = nlohmann::json(result.query_result); \ + ASSERT_EQ(actual, scenario.expected_query_result); \ + } \ } \ - } // namespace + } // namespace \ struct QueryTestData { const std::vector ndjson_input_data; @@ -75,6 +84,7 @@ struct QueryTestScenario { std::string name; nlohmann::json query; nlohmann::json expected_query_result; + std::string expected_error_message; }; std::string printScenarioName(const ::testing::TestParamInfo& scenario); diff --git a/src/silo/query_engine/filter_expressions/has_mutation.cpp b/src/silo/query_engine/filter_expressions/has_mutation.cpp index 44483febd..ba5559c74 100644 --- a/src/silo/query_engine/filter_expressions/has_mutation.cpp +++ b/src/silo/query_engine/filter_expressions/has_mutation.cpp @@ -15,6 +15,7 @@ #include "silo/query_engine/filter_expressions/symbol_equals.h" #include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_parse_exception.h" +#include "silo/query_engine/query_parse_sequence_name.h" namespace silo { class DatabasePartition; @@ -48,20 +49,12 @@ std::unique_ptr HasMutation::compile( "Database does not have a default sequence name for {} Sequences", SymbolType::SYMBOL_NAME ) ); - const std::string sequence_name_or_default = - sequence_name.has_value() ? sequence_name.value() - : database.getDefaultSequenceName().value(); - CHECK_SILO_QUERY( - database.getSequenceStores().contains(sequence_name_or_default), - fmt::format( - "Database does not contain the {} sequence with name: '{}'", - SymbolType::SYMBOL_NAME, - sequence_name_or_default - ) - ) + + const auto valid_sequence_name = + validateSequenceNameOrGetDefault(sequence_name, database); auto ref_symbol = database.getSequenceStores() - .at(sequence_name_or_default) + .at(valid_sequence_name) .reference_sequence.at(position_idx); std::vector symbols = @@ -82,7 +75,7 @@ std::unique_ptr HasMutation::compile( std::back_inserter(symbol_filters), [&](typename SymbolType::Symbol symbol) { return std::make_unique>( - sequence_name_or_default, position_idx, symbol + valid_sequence_name, position_idx, symbol ); } ); diff --git a/src/silo/query_engine/filter_expressions/insertion_contains.cpp b/src/silo/query_engine/filter_expressions/insertion_contains.cpp index 2ef09e92a..7077def2e 100644 --- a/src/silo/query_engine/filter_expressions/insertion_contains.cpp +++ b/src/silo/query_engine/filter_expressions/insertion_contains.cpp @@ -19,6 +19,7 @@ #include "silo/query_engine/operators/operator.h" #include "silo/query_engine/operators/union.h" #include "silo/query_engine/query_parse_exception.h" +#include "silo/query_engine/query_parse_sequence_name.h" #include "silo/storage/database_partition.h" #include "silo/storage/insertion_index.h" #include "silo/storage/sequence_store.h" @@ -55,28 +56,14 @@ std::unique_ptr InsertionContains(database_partition.sequence_count); } - std::string validated_sequence_name; - if (sequence_name.has_value()) { - validated_sequence_name = sequence_name.value(); - } else { - CHECK_SILO_QUERY( - database.getDefaultSequenceName().has_value(), - "The database has no default " + std::string(SymbolType::SYMBOL_NAME_LOWER_CASE) + - " sequence name" - ) - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) -- the previous statement checks it - validated_sequence_name = *database.getDefaultSequenceName(); - } + const auto valid_sequence_name = + validateSequenceNameOrGetDefault(sequence_name, database); + const std::map&>& sequence_stores = database_partition.getSequenceStores(); - CHECK_SILO_QUERY( - sequence_stores.contains(validated_sequence_name), - "The database has no default " + std::string(SymbolType::SYMBOL_NAME_LOWER_CASE) + - " sequence name" - ) const SequenceStorePartition& sequence_store = - sequence_stores.at(validated_sequence_name); + sequence_stores.at(valid_sequence_name); return std::make_unique( [&]() { auto search_result = sequence_store.insertion_index.search(position_idx, value); @@ -121,10 +108,6 @@ void from_json(const nlohmann::json& json, std::unique_ptr sequence_name; + std::optional sequence_name = std::nullopt; if (json.contains("sequenceName")) { sequence_name = json["sequenceName"].get(); } diff --git a/src/silo/query_engine/filter_expressions/symbol_equals.cpp b/src/silo/query_engine/filter_expressions/symbol_equals.cpp index 207ef2cc8..19c8c78cb 100644 --- a/src/silo/query_engine/filter_expressions/symbol_equals.cpp +++ b/src/silo/query_engine/filter_expressions/symbol_equals.cpp @@ -20,6 +20,7 @@ #include "silo/query_engine/operators/index_scan.h" #include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_parse_exception.h" +#include "silo/query_engine/query_parse_sequence_name.h" #include "silo/storage/database_partition.h" namespace silo::query_engine::filter_expressions { @@ -77,19 +78,12 @@ std::unique_ptr SymbolEquals().value(); - CHECK_SILO_QUERY( - database.getSequenceStores().contains(sequence_name_or_default), - fmt::format( - "Database does not contain the {} Sequence with name: '{}'", - SymbolType::SYMBOL_NAME, - sequence_name_or_default - ) - ) + + const auto valid_sequence_name = + validateSequenceNameOrGetDefault(sequence_name, database); + const auto& seq_store_partition = - database_partition.getSequenceStores().at(sequence_name_or_default); + database_partition.getSequenceStores().at(valid_sequence_name); if (position_idx >= seq_store_partition.reference_sequence.size()) { throw QueryParseException( "SymbolEquals position is out of bounds '" + std::to_string(position_idx + 1) + "' > '" + @@ -107,7 +101,7 @@ std::unique_ptr SymbolEquals>( - sequence_name_or_default, position_idx, symbol + valid_sequence_name, position_idx, symbol ); } ); @@ -120,7 +114,7 @@ std::unique_ptr SymbolEquals( - sequence_name_or_default, position_idx, SymbolType::SYMBOL_MISSING + valid_sequence_name, position_idx, SymbolType::SYMBOL_MISSING ); return std::make_unique( std::move(logical_equivalent), @@ -137,7 +131,7 @@ std::unique_ptr SymbolEquals( - std::make_unique(sequence_name_or_default, position_idx, symbol) + std::make_unique(valid_sequence_name, position_idx, symbol) ); return std::make_unique( std::make_unique( @@ -164,9 +158,9 @@ std::unique_ptr SymbolEquals(std::make_unique>( - sequence_name_or_default, position_idx, symbol - )); + return std::make_unique( + std::make_unique>(valid_sequence_name, position_idx, symbol) + ); } ); return And(std::move(symbol_filters)).compile(database, database_partition, NONE); @@ -175,7 +169,7 @@ std::unique_ptr SymbolEquals(sequence_name_or_default, position_idx, symbol); + std::make_unique(valid_sequence_name, position_idx, symbol); return std::make_unique( std::move(logical_equivalent), seq_store_partition.getBitmap(position_idx, symbol), diff --git a/src/silo/test/amino_acid_insertion_contains.test.cpp b/src/silo/test/amino_acid_insertion_contains.test.cpp new file mode 100644 index 000000000..8f7eb3716 --- /dev/null +++ b/src/silo/test/amino_acid_insertion_contains.test.cpp @@ -0,0 +1,102 @@ +#include + +#include + +#include "silo/test/query_fixture.test.h" + +using silo::ReferenceGenomes; +using silo::config::DatabaseConfig; +using silo::config::ValueType; +using silo::test::QueryTestData; +using silo::test::QueryTestScenario; + +nlohmann::json createDataWithAminoAcidInsertions( + const std::string& primaryKey, + const nlohmann::json& aminoAcidInsertions +) { + return { + {"metadata", {{"primaryKey", primaryKey}}}, + {"alignedNucleotideSequences", {{"segment1", nullptr}, {"segment2", nullptr}}}, + {"unalignedNucleotideSequences", {{"segment1", nullptr}, {"segment2", nullptr}}}, + {"alignedAminoAcidSequences", {{"gene1", nullptr}, {"gene2", nullptr}}}, + {"nucleotideInsertions", {{"segment1", {}}, {"segment2", {}}}}, + {"aminoAcidInsertions", aminoAcidInsertions} + }; +} + +const std::vector DATA = { + createDataWithAminoAcidInsertions("id_0", {{"gene1", {"123:A"}}, {"gene2", {}}}), + createDataWithAminoAcidInsertions("id_1", {{"gene1", {"123:A"}}, {"gene2", {}}}), + createDataWithAminoAcidInsertions("id_2", {{"gene1", {"234:BB"}}, {"gene2", {}}}), + createDataWithAminoAcidInsertions("id_3", {{"gene1", {"123:CCC"}}, {"gene2", {}}}), +}; + +const auto DATABASE_CONFIG = DatabaseConfig{ + .default_nucleotide_sequence = "segment1", + .schema = + {.instance_name = "dummy name", + .metadata = {{.name = "primaryKey", .type = ValueType::STRING}}, + .primary_key = "primaryKey"} +}; + +const auto REFERENCE_GENOMES = ReferenceGenomes{ + {{"segment1", "A"}, {"segment2", "T"}}, + {{"gene1", "*"}, {"gene2", "*"}}, +}; + +const QueryTestData TEST_DATA{ + .ndjson_input_data = {DATA}, + .database_config = DATABASE_CONFIG, + .reference_genomes = REFERENCE_GENOMES +}; + +nlohmann::json createAminoAcidInsertionContainsQuery( + const nlohmann::json& sequenceName, + int position, + const std::string& insertedSymbols +) { + return { + {"action", {{"type", "Details"}}}, + {"filterExpression", + {{"type", "AminoAcidInsertionContains"}, + {"position", position}, + {"value", insertedSymbols}, + {"sequenceName", sequenceName}}} + }; +} + +nlohmann::json createAminoAcidInsertionContainsQueryWithEmptySequenceName( + int position, + const std::string& insertedSymbols +) { + return { + {"action", {{"type", "Details"}}}, + {"filterExpression", + { + {"type", "AminoAcidInsertionContains"}, + {"position", position}, + {"value", insertedSymbols}, + }} + }; +} + +const QueryTestScenario AMINO_ACID_INSERTION_CONTAINS_SCENARIO = { + .name = "aminoAcidInsertionContains", + .query = createAminoAcidInsertionContainsQuery("gene1", 123, "A"), + .expected_query_result = nlohmann::json({{{"primaryKey", "id_0"}}, {{"primaryKey", "id_1"}}}) +}; + +const QueryTestScenario AMINO_ACID_INSERTION_CONTAINS_WITH_NULL_SEGMENT_SCENARIO = { + .name = "aminoAcidInsertionWithNullSegment", + .query = createAminoAcidInsertionContainsQueryWithEmptySequenceName(123, "A"), + .expected_error_message = "The database has no default amino acid sequence name", +}; + +QUERY_TEST( + AminoAcidInsertionContainsTest, + TEST_DATA, + ::testing::Values( + AMINO_ACID_INSERTION_CONTAINS_SCENARIO, + AMINO_ACID_INSERTION_CONTAINS_WITH_NULL_SEGMENT_SCENARIO + ) +); diff --git a/src/silo/test/insertion_contains.test.cpp b/src/silo/test/insertion_contains.test.cpp new file mode 100644 index 000000000..5e46ece8f --- /dev/null +++ b/src/silo/test/insertion_contains.test.cpp @@ -0,0 +1,110 @@ +#include + +#include + +#include "silo/test/query_fixture.test.h" + +using silo::ReferenceGenomes; +using silo::config::DatabaseConfig; +using silo::config::ValueType; +using silo::test::QueryTestData; +using silo::test::QueryTestScenario; + +nlohmann::json createDataWithNucleotideInsertions( + const std::string& primaryKey, + const nlohmann::json& nucleotideInsertions +) { + return { + {"metadata", {{"primaryKey", primaryKey}}}, + {"alignedNucleotideSequences", {{"segment1", nullptr}, {"segment2", nullptr}}}, + {"unalignedNucleotideSequences", {{"segment1", nullptr}, {"segment2", nullptr}}}, + {"alignedAminoAcidSequences", {{"gene1", nullptr}}}, + {"nucleotideInsertions", nucleotideInsertions}, + {"aminoAcidInsertions", {{"gene1", {}}}} + }; +} + +const std::vector DATA = { + createDataWithNucleotideInsertions("id_0", {{"segment1", {"123:A"}}, {"segment2", {}}}), + createDataWithNucleotideInsertions("id_1", {{"segment1", {"123:A"}}, {"segment2", {}}}), + createDataWithNucleotideInsertions("id_2", {{"segment1", {"234:TT"}}, {"segment2", {}}}), + createDataWithNucleotideInsertions("id_3", {{"segment1", {"123:CCC"}}, {"segment2", {}}}), +}; + +const auto DATABASE_CONFIG = DatabaseConfig{ + .default_nucleotide_sequence = "segment1", + .schema = + {.instance_name = "dummy name", + .metadata = {{.name = "primaryKey", .type = ValueType::STRING}}, + .primary_key = "primaryKey"} +}; + +const auto REFERENCE_GENOMES = ReferenceGenomes{ + {{"segment1", "A"}, {"segment2", "T"}}, + {{"gene1", "*"}}, +}; + +const QueryTestData TEST_DATA{ + .ndjson_input_data = {DATA}, + .database_config = DATABASE_CONFIG, + .reference_genomes = REFERENCE_GENOMES +}; + +nlohmann::json createInsertionContainsQuery( + const nlohmann::json& sequenceName, + int position, + const std::string& insertedSymbols +) { + return { + {"action", {{"type", "Details"}}}, + {"filterExpression", + {{"type", "InsertionContains"}, + {"position", position}, + {"value", insertedSymbols}, + {"sequenceName", sequenceName}}} + }; +} + +nlohmann::json createInsertionContainsQueryWithEmptySequenceName( + int position, + const std::string& insertedSymbols +) { + return { + {"action", {{"type", "Details"}}}, + {"filterExpression", + { + {"type", "InsertionContains"}, + {"position", position}, + {"value", insertedSymbols}, + }} + }; +} + +const QueryTestScenario INSERTION_CONTAINS_SCENARIO = { + .name = "insertionContains", + .query = createInsertionContainsQuery("segment1", 123, "A"), + .expected_query_result = nlohmann::json({{{"primaryKey", "id_0"}}, {{"primaryKey", "id_1"}}}) +}; + +const QueryTestScenario INSERTION_CONTAINS_WITH_EMPTY_SEGMENT_SCENARIO = { + .name = "insertionContainsWithNullSegmentDefaultsToDefaultSegment", + .query = createInsertionContainsQueryWithEmptySequenceName(123, "A"), + .expected_query_result = nlohmann::json({{{"primaryKey", "id_0"}}, {{"primaryKey", "id_1"}}}) +}; + +const QueryTestScenario INSERTION_CONTAINS_WITH_UNKNOWN_SEGMENT_SCENARIO = { + .name = "insertionContainsWithUnknownSegment", + .query = createInsertionContainsQuery("unknownSegmentName", 123, "A"), + .expected_error_message = + "Database does not contain the Nucleotide Sequence with name: 'unknownSegmentName'" +}; + +QUERY_TEST( + InsertionContainsTest, + TEST_DATA, + ::testing::Values( + INSERTION_CONTAINS_SCENARIO, + INSERTION_CONTAINS_WITH_EMPTY_SEGMENT_SCENARIO, + INSERTION_CONTAINS_WITH_UNKNOWN_SEGMENT_SCENARIO + ) +);