Skip to content

Commit

Permalink
fixup!
Browse files Browse the repository at this point in the history
  • Loading branch information
Taepper committed May 29, 2024
1 parent f1d67ca commit bb85541
Show file tree
Hide file tree
Showing 23 changed files with 213 additions and 1,340 deletions.
1 change: 1 addition & 0 deletions include/silo/common/aa_symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class AminoAcid {
static constexpr std::string_view SYMBOL_NAME_LOWER_CASE = "amino acid";
static constexpr std::string_view SYMBOL_NAME_UPPER_CASE = "AMINO ACID";
static constexpr std::string_view SYMBOL_NAME_SHORT = "AA";
static constexpr std::string_view PREFIX = "aa_";

static constexpr std::array<Symbol, COUNT> SYMBOLS{
Symbol::GAP, Symbol::A, Symbol::C, Symbol::D, Symbol::E, Symbol::F, Symbol::G,
Expand Down
1 change: 1 addition & 0 deletions include/silo/common/nucleotide_symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class Nucleotide {
static constexpr std::string_view SYMBOL_NAME_LOWER_CASE = "nucleotide";
static constexpr std::string_view SYMBOL_NAME_UPPER_CASE = "NUCLEOTIDE";
static constexpr std::string_view SYMBOL_NAME_SHORT = "NUC";
static constexpr std::string_view PREFIX = "nuc_";

static constexpr std::array<Symbol, COUNT> SYMBOLS{
Symbol::GAP,
Expand Down
11 changes: 4 additions & 7 deletions include/silo/common/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,14 @@ std::string removeSymbol(const std::string& value, char symbol);

std::vector<std::string> slice(const std::vector<std::string>& elements, size_t start, size_t end);

std::vector<std::string> prepend(
const std::string& prefix,
const std::vector<std::string>& elements
);
std::vector<std::string> prepend(std::string_view prefix, const std::vector<std::string>& elements);

std::vector<std::string> tie(
const std::string& prefix,
std::string_view prefix,
const std::vector<std::string>& elements1,
const std::string& delimiter,
std::string_view delimiter,
const std::vector<std::string>& elements2,
const std::string& suffix
std::string_view suffix
);

} // namespace silo
8 changes: 0 additions & 8 deletions include/silo/config/database_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,6 @@ class DatabaseConfigReader {

} // namespace silo::config

template <>
struct std::less<silo::config::DatabaseMetadata> {
bool operator()(
const silo::config::DatabaseMetadata& lhs,
const silo::config::DatabaseMetadata& rhs
) const;
};

template <>
struct [[maybe_unused]] fmt::formatter<silo::config::DatabaseConfig> : fmt::formatter<std::string> {
[[maybe_unused]] static auto format(
Expand Down
4 changes: 1 addition & 3 deletions include/silo/preprocessing/metadata_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,10 @@ class MetadataInfo {
const silo::config::DatabaseConfig& database_config
);

static std::vector<std::string> getMetadataTypes(
static std::vector<std::string> getMetadataSQLTypes(
const silo::config::DatabaseConfig& database_config
);

static std::string getMetadataStruct(const silo::config::DatabaseConfig& database_config);

static std::vector<std::string> getMetadataSelects(
const silo::config::DatabaseConfig& database_config
);
Expand Down
31 changes: 20 additions & 11 deletions include/silo/preprocessing/preprocessor.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <optional>

#include "silo/config/database_config.h"
#include "silo/config/preprocessing_config.h"
#include "silo/preprocessing/preprocessing_database.h"
Expand All @@ -14,15 +16,21 @@ namespace preprocessing {
class SequenceInfo;

class Preprocessor {
std::vector<std::string> nuc_sequences;
std::vector<std::string> aa_sequences;
std::vector<std::string> order_by_fields;
config::PreprocessingConfig preprocessing_config;
config::DatabaseConfig database_config;
PreprocessingDatabase preprocessing_db;
ReferenceGenomes reference_genomes_;
PangoLineageAliasLookup alias_lookup_;

std::vector<std::string> nuc_sequences;
std::vector<std::string> aa_sequences;
std::vector<std::string> order_by_fields;
std::vector<std::string> prefixed_order_by_fields;
std::vector<std::string> prefixed_nuc_sequences;
std::vector<std::string> prefixed_aa_sequences;
std::vector<std::string> prefixed_nuc_insertions_fields;
std::vector<std::string> prefixed_aa_insertions_fields;

public:
Preprocessor(
config::PreprocessingConfig preprocessing_config,
Expand All @@ -34,6 +42,9 @@ class Preprocessor {
Database preprocess();

private:
static std::string makeNonNullKey(const std::string& field);
static std::string makeNonNullKeyOrConstant(const std::optional<std::string>& field);

void buildTablesFromNdjsonInput(const std::filesystem::path& file_name);
void buildMetadataTableFromFile(const std::filesystem::path& metadata_filename);

Expand All @@ -57,11 +68,12 @@ class Preprocessor {
);

void createPartitionedSequenceTablesFromSequenceFiles();

template <typename SymbolType>
void createPartitionedTableForSequence(
const std::string& sequence_name,
const std::string& reference_sequence,
const std::filesystem::path& filename,
const std::string& table_prefix
const std::filesystem::path& filename
);

Database buildDatabase(
Expand All @@ -74,12 +86,9 @@ class Preprocessor {
const preprocessing::Partitions& partition_descriptor,
const std::string& order_by_clause
);
void buildNucleotideSequenceStore(
Database& database,
const preprocessing::Partitions& partition_descriptor,
const std::string& order_by_clause
);
void buildAminoAcidSequenceStore(

template <typename SymbolType>
void buildSequenceStore(
Database& database,
const preprocessing::Partitions& partition_descriptor,
const std::string& order_by_clause
Expand Down
6 changes: 6 additions & 0 deletions include/silo/storage/reference_genomes.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ struct ReferenceGenomes {

static ReferenceGenomes readFromFile(const std::filesystem::path& reference_genomes_path);

template <typename SymbolType>
std::vector<std::string> getSequenceNames() const;

template <typename SymbolType>
std::map<std::string, std::string> getRawSequenceMap() const;

template <typename SymbolType>
static std::vector<typename SymbolType::Symbol> stringToVector(const std::string& string);

Expand Down
12 changes: 6 additions & 6 deletions src/silo/common/string_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,30 +45,30 @@ std::vector<std::string> slice(const std::vector<std::string>& elements, size_t
}

std::vector<std::string> prepend(
const std::string& prefix,
std::string_view prefix,
const std::vector<std::string>& elements
) {
std::vector<std::string> output;
output.reserve(elements.size());
for (const std::string& str : elements) {
output.emplace_back(prefix + str);
output.emplace_back(fmt::format("{}{}", prefix, str));
}
return output;
}

std::vector<std::string> tie(
const std::string& prefix,
std::string_view prefix,
const std::vector<std::string>& elements1,
const std::string& delimiter,
std::string_view delimiter,
const std::vector<std::string>& elements2,
const std::string& suffix
std::string_view suffix
) {
assert(elements1.size() == elements2.size());
std::vector<std::string> output;
output.reserve(elements1.size());
for (size_t i = 0; i < elements1.size(); ++i) {
output.emplace_back(
fmt::format("{}{}{}{}{}", prefix, elements1[i], delimiter, elements2[i], suffix)
fmt::format("{}{}{}{}{}", prefix, elements1.at(i), delimiter, elements2.at(i), suffix)
);
}
return output;
Expand Down
7 changes: 0 additions & 7 deletions src/silo/config/database_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,6 @@ std::string toString(ValueType type) {
}
} // namespace

bool std::less<silo::config::DatabaseMetadata>::operator()(
const silo::config::DatabaseMetadata& lhs,
const silo::config::DatabaseMetadata& rhs
) const {
return lhs.name < rhs.name;
}

namespace YAML {
template <>
struct convert<silo::config::DatabaseConfig> {
Expand Down
16 changes: 4 additions & 12 deletions src/silo/preprocessing/metadata_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,15 @@ void MetadataInfo::validateNdjsonFile(
));

if (result->HasError()) {
SPDLOG_WARN(
const std::string error_message = fmt::format(
"Preprocessing exception when retrieving the fields of the struct 'metadata' from the "
"metadata ndjson file '{}', "
"duckdb threw with error: {}",
ndjson_file.string(),
result->GetError()
);
return;
SPDLOG_ERROR(error_message);
throw PreprocessingException(error_message);
}

std::set<std::string> actual_fields;
Expand Down Expand Up @@ -149,7 +150,7 @@ std::vector<std::string> MetadataInfo::getMetadataFields(
return ret;
}

std::vector<std::string> MetadataInfo::getMetadataTypes(
std::vector<std::string> MetadataInfo::getMetadataSQLTypes(
const silo::config::DatabaseConfig& database_config
) {
std::vector<std::string> ret;
Expand All @@ -160,15 +161,6 @@ std::vector<std::string> MetadataInfo::getMetadataTypes(
return ret;
}

std::string MetadataInfo::getMetadataStruct(const silo::config::DatabaseConfig& database_config) {
std::vector<std::string> ret;
ret.reserve(database_config.schema.metadata.size());
for (const auto& field : database_config.schema.metadata) {
ret.push_back(fmt::format("{}: \'{}\'", field.name, toSQLType(field.type)));
}
return fmt::format("{{{}}}", boost::join(ret, ","));
}

std::vector<std::string> MetadataInfo::getMetadataSelects(
const silo::config::DatabaseConfig& database_config
) {
Expand Down
Loading

0 comments on commit bb85541

Please sign in to comment.