From 58db6596e9661095a5b3a72a6557f79f2c6622e3 Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Sat, 16 Jan 2021 00:37:22 +0100 Subject: [PATCH] cleaned up BOSSConstruct unit tests: faster compilation and execution --- .../graph/succinct/test_boss_construct.cpp | 387 ++++++++---------- 1 file changed, 163 insertions(+), 224 deletions(-) diff --git a/metagraph/tests/graph/succinct/test_boss_construct.cpp b/metagraph/tests/graph/succinct/test_boss_construct.cpp index 107d3a565f..15fec24a97 100644 --- a/metagraph/tests/graph/succinct/test_boss_construct.cpp +++ b/metagraph/tests/graph/succinct/test_boss_construct.cpp @@ -1,11 +1,11 @@ -#include +#include #include #include #include #include #include -#include "gtest/gtest.h" +#include #define protected public #define private public @@ -31,12 +31,8 @@ const std::string test_data_dir = TEST_DATA_DIR; const std::string test_fasta = test_data_dir + "/test_construct.fa"; const std::string test_dump_basename = test_data_dir + "/graph_dump_test"; +#define kMaxK ( 256 / KmerExtractorBOSS::bits_per_char ) -template -class BOSSConstruct : public ::testing::Test { }; - -template -class WeightedBOSSConstruct : public ::testing::Test { }; template class CollectKmers : public ::testing::Test { }; @@ -44,31 +40,6 @@ class CollectKmers : public ::testing::Test { }; template class CountKmers : public ::testing::Test { }; -template -class BOSSConfigurationType { - public: - typedef KMER Kmer; - static const bool kWeighted = Weighted; -}; -template -const bool BOSSConfigurationType::kWeighted; - -typedef ::testing::Types, - BOSSConfigurationType, - BOSSConfigurationType, - BOSSConfigurationType, - BOSSConfigurationType, - BOSSConfigurationType> KmerAndWeightedTypes; - -typedef ::testing::Types, - BOSSConfigurationType, - BOSSConfigurationType> KmerWeightedTypes; - -TYPED_TEST_SUITE(BOSSConstruct, KmerAndWeightedTypes); -TYPED_TEST_SUITE(WeightedBOSSConstruct, KmerWeightedTypes); - -#define kMaxK ( sizeof(typename TypeParam::Kmer) * 8 / KmerExtractorBOSS::bits_per_char ) - typedef ::testing::Types, kmer::KMerBOSS, kmer::KMerBOSS> KmerTypes; @@ -77,107 +48,113 @@ TYPED_TEST_SUITE(CollectKmers, KmerTypes); TYPED_TEST_SUITE(CountKmers, KmerTypes); -TYPED_TEST(BOSSConstruct, ConstructionEQAppendingSimplePath) { +TEST(BOSSConstruct, ConstructionEQAppendingSimplePath) { for (size_t k = 1; k < kMaxK; ++k) { - BOSSConstructor constructor(k, false, TypeParam::kWeighted ? 8 : 0); - constructor.add_sequences({ std::string(100, 'A') }); - BOSS constructed(&constructor); - BOSS appended(k); appended.add_sequence(std::string(100, 'A')); - EXPECT_EQ(constructed, appended); + for (bool weighted : { false, true }) { + BOSSConstructor constructor(k, false, weighted ? 8 : 0); + constructor.add_sequences({ std::string(100, 'A') }); + BOSS constructed(&constructor); + + EXPECT_EQ(constructed, appended); + } } } -TYPED_TEST(BOSSConstruct, ConstructionEQAppendingTwoPaths) { +TEST(BOSSConstruct, ConstructionEQAppendingTwoPaths) { for (size_t k = 1; k < kMaxK; ++k) { - BOSSConstructor constructor(k, false, TypeParam::kWeighted ? 8 : 0); - constructor.add_sequences({ std::string(100, 'A'), - std::string(50, 'B') }); - BOSS constructed(&constructor); - BOSS appended(k); appended.add_sequence(std::string(100, 'A')); appended.add_sequence(std::string(50, 'B')); - EXPECT_EQ(constructed, appended); + for (bool weighted : { false, true }) { + BOSSConstructor constructor(k, false, weighted ? 8 : 0); + constructor.add_sequences({ std::string(100, 'A'), + std::string(50, 'B') }); + BOSS constructed(&constructor); + + EXPECT_EQ(constructed, appended); + } } } -TYPED_TEST(BOSSConstruct, ConstructionLowerCase) { +TEST(BOSSConstruct, ConstructionLowerCase) { for (size_t k = 1; k < kMaxK; ++k) { - BOSSConstructor constructor_first(k, false, TypeParam::kWeighted ? 8 : 0); - constructor_first.add_sequences({ std::string(100, 'A'), - std::string(50, 'C') }); - BOSS first(&constructor_first); + for (bool weighted : { false, true }) { + BOSSConstructor constructor_first(k, false, weighted ? 8 : 0); + constructor_first.add_sequences({ std::string(100, 'A'), + std::string(50, 'C') }); + BOSS first(&constructor_first); - BOSSConstructor constructor_second(k, false, TypeParam::kWeighted ? 8 : 0); - constructor_second.add_sequences({ std::string(100, 'a'), - std::string(50, 'c') }); - BOSS second(&constructor_second); + BOSSConstructor constructor_second(k, false, weighted ? 8 : 0); + constructor_second.add_sequences({ std::string(100, 'a'), + std::string(50, 'c') }); + BOSS second(&constructor_second); #if _DNA_CASE_SENSITIVE_GRAPH - EXPECT_FALSE(first.equals_internally(second)); + EXPECT_FALSE(first.equals_internally(second)); #else - EXPECT_TRUE(first.equals_internally(second)); + EXPECT_TRUE(first.equals_internally(second)); #endif + } } } -TYPED_TEST(BOSSConstruct, ConstructionDummySentinel) { +TEST(BOSSConstruct, ConstructionDummySentinel) { for (size_t k = 1; k < kMaxK; ++k) { - BOSSConstructor constructor_first(k, false, TypeParam::kWeighted ? 8 : 0); - constructor_first.add_sequences({ std::string(100, 'N'), - std::string(50, '$') }); - BOSS first(&constructor_first); - - BOSSConstructor constructor_second(k, false, TypeParam::kWeighted ? 8 : 0); - constructor_second.add_sequences({ std::string(100, 'N'), - std::string(50, '.') }); - BOSS second(&constructor_second); - - EXPECT_TRUE(first.equals_internally(second)); + for (bool weighted : { false, true }) { + BOSSConstructor constructor_first(k, false, weighted ? 8 : 0); + constructor_first.add_sequences({ std::string(100, 'N'), + std::string(50, '$') }); + BOSS first(&constructor_first); + + BOSSConstructor constructor_second(k, false, weighted ? 8 : 0); + constructor_second.add_sequences({ std::string(100, 'N'), + std::string(50, '.') }); + BOSS second(&constructor_second); + + EXPECT_TRUE(first.equals_internally(second)); + } } } -TYPED_TEST(BOSSConstruct, ConstructionEQAppending) { - for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { - for (size_t k = 1; k < kMaxK; ++k) { - std::vector input_data = { - "ACAGCTAGCTAGCTAGCTAGCTG", - "ATATTATAAAAAATTTTAAAAAA", - "ATATATTCTCTCTCTCTCATA", - "GTGTGTGTGGGGGGCCCTTTTTTCATA", - }; - BOSSConstructor constructor(k, false, TypeParam::kWeighted ? 8 : 0, "", 1, - 20000, container); - constructor.add_sequences(std::vector(input_data)); - BOSS constructed(&constructor); - - BOSS appended(k); - for (const auto &sequence : input_data) { - appended.add_sequence(sequence); +TEST(BOSSConstruct, ConstructionEQAppending) { + std::vector input_data = { + "ACAGCTAGCTAGCTAGCTAGCTG", + "ATATTATAAAAAATTTTAAAAAA", + "ATATATTCTCTCTCTCTCATA", + "GTGTGTGTGGGGGGCCCTTTTTTCATA", + }; + for (size_t k = 1; k < kMaxK; ++k) { + BOSS appended(k); + for (const auto &sequence : input_data) { + appended.add_sequence(sequence); + } + for (bool weighted : { false, true }) { + for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { + BOSSConstructor constructor(k, false, weighted ? 8 : 0, "", 1, + 20000, container); + constructor.add_sequences(std::vector(input_data)); + BOSS constructed(&constructor); + + EXPECT_EQ(constructed, appended); } - - EXPECT_EQ(constructed, appended); } } } TYPED_TEST(WeightedBOSSConstruct, ConstructionDummyKmersZeroWeight) { - ASSERT_TRUE(TypeParam::kWeighted); + std::vector input_data = { + "ACAGCTAGCTAGCTAGCTAGCTG", + "ATATTATAAAAAATTTTAAAAAA", + "ATATATTCTCTCTCTCTCATA", + "GTGTGTGTGGGGGGCCCTTTTTTCATA", + }; for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { for (size_t k = 1; k < kMaxK; ++k) { - std::vector input_data = { - "ACAGCTAGCTAGCTAGCTAGCTG", - "ATATTATAAAAAATTTTAAAAAA", - "ATATATTCTCTCTCTCTCATA", - "GTGTGTGTGGGGGGCCCTTTTTTCATA", - }; - - BOSSConstructor constructor(k, false, TypeParam::kWeighted ? 8 : 0, "", 1, - 20000, container); + BOSSConstructor constructor(k, false, 8, "", 1, 20000, container); constructor.add_sequences(std::vector(input_data)); BOSS constructed; @@ -203,22 +180,18 @@ TYPED_TEST(WeightedBOSSConstruct, ConstructionDummyKmersZeroWeight) { } TYPED_TEST(WeightedBOSSConstruct, ConstructionDummyKmersZeroWeightChunks) { - ASSERT_TRUE(TypeParam::kWeighted); - + std::vector input_data = { + "ACAGCTAGCTAGCTAGCTAGCTG", + "ATATTATAAAAAATTTTAAAAAA", + "ATATATTCTCTCTCTCTCATA", + "GTGTGTGTGGGGGGCCCTTTTTTCATA", + }; for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { for (size_t k = 1; k < kMaxK; ++k) { - std::vector input_data = { - "ACAGCTAGCTAGCTAGCTAGCTG", - "ATATTATAAAAAATTTTAAAAAA", - "ATATATTCTCTCTCTCTCATA", - "GTGTGTGTGGGGGGCCCTTTTTTCATA", - }; - BOSS constructed(k); auto constructor - = IBOSSChunkConstructor::initialize(k, false, TypeParam::kWeighted ? 8 : 0, - "", 1, 20000, container); + = IBOSSChunkConstructor::initialize(k, false, 8, "", 1, 20000, container); for (auto &&sequence : input_data) { constructor->add_sequence(std::move(sequence)); @@ -247,149 +220,115 @@ TYPED_TEST(WeightedBOSSConstruct, ConstructionDummyKmersZeroWeightChunks) { } } -TYPED_TEST(BOSSConstruct, ConstructionEQAppendingCanonical) { - for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { - for (size_t k = 1; k < kMaxK; ++k) { - std::vector input_data = { - "ACAGCTAGCTAGCTAGCTAGCTG", - "ATATTATAAAAAATTTTAAAAAA", - "ATATATTCTCTCTCTCTCATA", - "GTGTGTGTGGGGGGCCCTTTTTTCATA", - }; - BOSSConstructor constructor(k, true, TypeParam::kWeighted ? 8 : 0, "", 1, - 20'000, container); - constructor.add_sequences(std::vector(input_data)); - BOSS constructed(&constructor); - - BOSS appended(k); - for (auto &sequence : input_data) { - appended.add_sequence(sequence); - reverse_complement(sequence.begin(), sequence.end()); - appended.add_sequence(sequence); - } - - EXPECT_EQ(constructed, appended); +TEST(BOSSConstruct, ConstructionEQAppendingCanonical) { + std::vector input_data = { + "ACAGCTAGCTAGCTAGCTAGCTG", + "ATATTATAAAAAATTTTAAAAAA", + "ATATATTCTCTCTCTCTCATA", + "GTGTGTGTGGGGGGCCCTTTTTTCATA", + }; + for (size_t k = 1; k < kMaxK; ++k) { + BOSS appended(k); + for (auto &sequence : input_data) { + appended.add_sequence(sequence); + reverse_complement(sequence.begin(), sequence.end()); + appended.add_sequence(sequence); } - } -} - -TYPED_TEST(BOSSConstruct, ConstructionLong) { - for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { - for (size_t k = 1; k < kMaxK; ++k) { - BOSSConstructor constructor(k, false, TypeParam::kWeighted ? 8 : 0, "", 1, - 20'000, container); - constructor.add_sequences({ std::string(k + 1, 'A') }); - BOSS constructed(&constructor); - - BOSS appended(k); - appended.add_sequence(std::string(k + 1, 'A')); - - EXPECT_EQ(constructed, appended); - ASSERT_TRUE(constructed.num_nodes() > 1u); + for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { + for (bool weighted : { false, true }) { + BOSSConstructor constructor(k, true, weighted ? 8 : 0, "", 1, + 20'000, container); + constructor.add_sequences(std::vector(input_data)); + BOSS constructed(&constructor); + + EXPECT_EQ(constructed, appended); + } } } } -TYPED_TEST(BOSSConstruct, ConstructionShort) { - for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { - for (size_t k = 1; k < kMaxK; ++k) { - BOSSConstructor constructor(k, false, TypeParam::kWeighted ? 8 : 0, "", 1, - 20'000, container); - constructor.add_sequences({ std::string(k, 'A') }); - BOSS constructed(&constructor); +TEST(BOSSConstruct, ConstructionLong) { + for (size_t k = 1; k < kMaxK; ++k) { + BOSS appended(k); + appended.add_sequence(std::string(k + 1, 'A')); - BOSS appended(k); - appended.add_sequence(std::string(k, 'A')); + for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { + for (bool weighted : { false, true }) { + BOSSConstructor constructor(k, false, weighted ? 8 : 0, "", 1, + 20'000, container); + constructor.add_sequences({ std::string(k + 1, 'A') }); + BOSS constructed(&constructor); - EXPECT_EQ(constructed, appended); - ASSERT_EQ(1u, constructed.num_nodes()); + EXPECT_EQ(constructed, appended); + ASSERT_TRUE(constructed.num_nodes() > 1u); + } } } } -TYPED_TEST(BOSSConstruct, ConstructionFromChunks) { - for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { - for (size_t k = 1; k < kMaxK; k += 6) { - BOSS boss_dynamic(k); - boss_dynamic.add_sequence(std::string(100, 'A')); - boss_dynamic.add_sequence(std::string(100, 'C')); - boss_dynamic.add_sequence(std::string(100, 'T') + "A" - + std::string(100, 'G')); - - for (size_t suffix_len = 0; suffix_len < k && suffix_len <= 3u; ++suffix_len) { - std::unique_ptr graph_data; - - for (const std::string &suffix : KmerExtractorBOSS::generate_suffixes(suffix_len)) { - std::unique_ptr constructor( - IBOSSChunkConstructor::initialize(k, false, TypeParam::kWeighted ? 8 : 0, - suffix, 1, 20000, container)); - - constructor->add_sequence(std::string(100, 'A')); - constructor->add_sequence(std::string(100, 'C')); - constructor->add_sequence(std::string(100, 'T') + "A" - + std::string(100, 'G')); - - auto next_block = constructor->build_chunk(); - if (graph_data) { - graph_data->extend(*next_block); - delete next_block; - } else { - graph_data.reset(next_block); - } - } +TEST(BOSSConstruct, ConstructionShort) { + for (size_t k = 1; k < kMaxK; ++k) { + BOSS appended(k); + appended.add_sequence(std::string(k, 'A')); - BOSS boss; - graph_data->initialize_boss(&boss); + for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { + for (bool weighted : { false, true }) { + BOSSConstructor constructor(k, false, weighted ? 8 : 0, "", 1, + 20'000, container); + constructor.add_sequences({ std::string(k, 'A') }); + BOSS constructed(&constructor); - EXPECT_EQ(boss_dynamic, boss); + EXPECT_EQ(constructed, appended); + ASSERT_EQ(1u, constructed.num_nodes()); } } } } -TYPED_TEST(BOSSConstruct, ConstructionFromChunksParallel) { - const uint64_t num_threads = 4; - - for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { - for (size_t k = 1; k < kMaxK; k += 6) { - BOSS boss_dynamic(k); - boss_dynamic.add_sequence(std::string(100, 'A')); - boss_dynamic.add_sequence(std::string(100, 'C')); - boss_dynamic.add_sequence(std::string(100, 'T') + "A" - + std::string(100, 'G')); +TEST(BOSSConstruct, ConstructionFromChunks) { + for (size_t k = 1; k < kMaxK; k += 6) { + BOSS boss_dynamic(k); + boss_dynamic.add_sequence(std::string(100, 'A')); + boss_dynamic.add_sequence(std::string(100, 'C')); + boss_dynamic.add_sequence(std::string(100, 'T') + "A" + + std::string(100, 'G')); + for (auto container : { kmer::ContainerType::VECTOR, kmer::ContainerType::VECTOR_DISK }) { for (size_t suffix_len = 0; suffix_len < k && suffix_len <= 3u; ++suffix_len) { - std::unique_ptr graph_data; - - for (const std::string &suffix : KmerExtractorBOSS::generate_suffixes(suffix_len)) { - std::unique_ptr constructor( - IBOSSChunkConstructor::initialize(k, false, TypeParam::kWeighted ? 8 : 0, - suffix, num_threads, 20000, container)); - - constructor->add_sequence(std::string(100, 'A')); - constructor->add_sequence(std::string(100, 'C')); - constructor->add_sequence(std::string(100, 'T') + "A" - + std::string(100, 'G')); - - auto next_block = constructor->build_chunk(); - if (graph_data) { - graph_data->extend(*next_block); - delete next_block; - } else { - graph_data.reset(next_block); + for (bool weighted : { false, true }) { + for (size_t num_threads : { 1, 4 }) { + std::unique_ptr graph_data; + + for (const std::string &suffix : KmerExtractorBOSS::generate_suffixes(suffix_len)) { + std::unique_ptr constructor( + IBOSSChunkConstructor::initialize(k, false, weighted ? 8 : 0, + suffix, num_threads, 20000, container)); + + constructor->add_sequence(std::string(100, 'A')); + constructor->add_sequence(std::string(100, 'C')); + constructor->add_sequence(std::string(100, 'T') + "A" + + std::string(100, 'G')); + + auto next_block = constructor->build_chunk(); + if (graph_data) { + graph_data->extend(*next_block); + delete next_block; + } else { + graph_data.reset(next_block); + } + } + + BOSS boss; + graph_data->initialize_boss(&boss); + + EXPECT_EQ(boss_dynamic, boss); } } - - BOSS boss; - graph_data->initialize_boss(&boss); - - EXPECT_EQ(boss_dynamic, boss); } } } } - // TODO: k is node length template void sequence_to_kmers_parallel_wrapper(std::vector *reads,