Skip to content

Commit

Permalink
delete tax test files
Browse files Browse the repository at this point in the history
Signed-off-by: Radu Muntean <[email protected]>
  • Loading branch information
heracle committed Aug 3, 2021
1 parent 4fafcb8 commit 76dcb32
Show file tree
Hide file tree
Showing 10 changed files with 52 additions and 1,125 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
*.fai
!metagraph/tests/data/*.fa
!metagraph/tests/data/*.fai
!metagraph/tests/data/taxonomic_data/*.fa
metagraph/tests/data/*dump_test*
projects/*/temp
visualization/geolocation/data/*
Expand Down
9 changes: 4 additions & 5 deletions metagraph/src/annotation/taxonomy/label_to_taxid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@ namespace annot {
using mtg::common::logger;

void TaxonomyBase::assign_label_type(const std::string &label, bool *require_accversion_to_taxid_map) {
if (utils::starts_with(label, ">gi|")) {
if (utils::starts_with(label, "gi|")) {
// e.g. >gi|1070643132|ref|NC_031224.1| Arthrobacter phage Mudcat, complete genome
label_type = GEN_BANK;
*require_accversion_to_taxid_map = true;
} else if (utils::starts_with(label, ">") &&
utils::starts_with(utils::split_string(label, ":")[1], "taxid|")) {
} else if (utils::starts_with(utils::split_string(label, ":")[1], "taxid|")) {
// e.g. >kraken:taxid|2016032|NC_047834.1 Alteromonas virus vB_AspP-H4/4, complete genome
label_type = TAXID;
*require_accversion_to_taxid_map = false;
Expand Down Expand Up @@ -71,8 +70,8 @@ void TaxonomyBase::read_accversion_to_taxid_map(const std::string &filepath,

tsl::hopscotch_set<std::string> input_accessions;
if (anno_matrix != NULL) {
for (const std::string &accversion : anno_matrix->get_annotation().get_all_labels()) {
input_accessions.insert(accversion);
for (const std::string &label : anno_matrix->get_annotation().get_all_labels()) {
input_accessions.insert(get_accession_version_from_label(label));
}
}

Expand Down
36 changes: 28 additions & 8 deletions metagraph/src/annotation/taxonomy/tax_classifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, ChildrenLi
continue;
}

if (considered_relevant_taxids.find(full_parents_list[taxid]) == considered_relevant_taxids.end()) {
if (not considered_relevant_taxids.count(full_parents_list[taxid])) {
relevant_taxids.push_back(full_parents_list[taxid]);
considered_relevant_taxids.insert(full_parents_list[taxid]);
}
Expand Down Expand Up @@ -137,11 +137,13 @@ void TaxonomyClsAnno::dfs_statistics(const TaxId node,
node_to_linearization_idx[node] = tree_linearization->size();
tree_linearization->push_back(node);
uint32_t depth = 0;
for (const TaxId &child : tree.at(node)) {
dfs_statistics(child, tree, tree_linearization);
tree_linearization->push_back(node);
if (node_depth[child] > depth) {
depth = node_depth[child];
if (tree.count(node)) {
for (const TaxId &child : tree.at(node)) {
dfs_statistics(child, tree, tree_linearization);
tree_linearization->push_back(node);
if (node_depth[child] > depth) {
depth = node_depth[child];
}
}
}
node_depth[node] = depth + 1;
Expand Down Expand Up @@ -177,8 +179,26 @@ void TaxonomyClsAnno::rmq_preprocessing(const std::vector<TaxId> &tree_lineariza
}
}

TaxId TaxonomyClsAnno::assign_class(const std::string &sequence) const {
throw std::runtime_error("Assign class not implemented. Received " + sequence);
std::vector<TaxId> TaxonomyClsAnno::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const {
cerr << "Assign class not implemented reversed = " << reversed << "\n";
throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsAnno not implemented. Received seq size" + to_string(sequence.size()));
exit(0);
}

std::vector<TaxId> TaxonomyClsImportDB::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const {
cerr << "Assign class not implemented reversed = " << reversed << "\n";
throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsImportDB not implemented. Received seq size" + to_string(sequence.size()));
exit(0);
}

TaxId TaxonomyClsAnno::find_lca(const std::vector<TaxId> &taxids) const {
throw std::runtime_error("find_lca TaxonomyClsAnno not implemented. Received taxids size" + to_string(taxids.size()));
exit(0);
}

TaxId TaxonomyClsImportDB::find_lca(const std::vector<TaxId> &taxids) const {
throw std::runtime_error("find_lca TaxonomyClsImportDB not implemented. Received taxids size" + to_string(taxids.size()));
exit(0);
}

} // namespace annot
Expand Down
27 changes: 18 additions & 9 deletions metagraph/src/annotation/taxonomy/tax_classifier.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,12 @@ class TaxonomyBase {

virtual ~TaxonomyBase() {};

// TODO implement
virtual TaxId assign_class(const std::string &sequence) const = 0;
TaxId assign_class(const std::string &sequence) const;

PROTECTED_TESTABLE:
void assign_label_type(const std::string &label, bool *require_accversion_to_taxid_map);

// TODO implement.
TaxId find_lca(const std::vector<TaxId> &taxids) const;
virtual TaxId find_lca(const std::vector<TaxId> &taxids) const = 0;

std::string get_accession_version_from_label(const std::string &label) const;

Expand All @@ -57,7 +55,6 @@ class TaxonomyBase {
*/
void read_accversion_to_taxid_map(const std::string &filepath, const graph::AnnotatedDBG *anno_matrix);

// TODO implement.
/**
* Update the current node_scores and best_lca by taking into account the weight of the start_node and all its ancestors.
*
Expand All @@ -75,7 +72,13 @@ class TaxonomyBase {
tsl::hopscotch_map<TaxId, uint64_t> *node_scores,
tsl::hopscotch_set<TaxId> *nodes_already_propagated,
TaxId *best_lca,
uint32_t *best_lca_dist_to_root);
uint32_t *best_lca_dist_to_root) const;

/**
* Get the list of LCA taxid for each kmer in a given sequences.
* The sequence can be given in forward or in reversed orientation.
*/
virtual std::vector<TaxId> get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const = 0;

LabelType label_type;

Expand Down Expand Up @@ -104,7 +107,10 @@ class TaxonomyClsImportDB : public TaxonomyBase {
TaxonomyClsImportDB(const std::string &taxdb_filepath,
const double lca_coverage_rate,
const double kmers_discovery_rate);
TaxId assign_class(const std::string &sequence) const;

PRIVATE_TESTABLE:
std::vector<TaxId> get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const;
TaxId find_lca(const std::vector<TaxId> &taxids) const;
};

class TaxonomyClsAnno : public TaxonomyBase {
Expand All @@ -128,8 +134,7 @@ class TaxonomyClsAnno : public TaxonomyBase {
// todo implement
void export_taxdb(const std::string &filepath) const;

// todo implement
TaxId assign_class(const std::string &sequence) const;
TaxId assign_class_toplabels(const std::string &sequence, const double label_fraction) const;

PRIVATE_TESTABLE:
/**
Expand Down Expand Up @@ -162,6 +167,10 @@ class TaxonomyClsAnno : public TaxonomyBase {
const ChildrenList &tree,
std::vector<TaxId> *tree_linearization);

TaxId find_lca(const std::vector<TaxId> &taxids) const;

std::vector<TaxId> get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const;

/**
* rmq_data[0] contains the taxonomic tree linearization
* (e.g. for root 1 and edges={1-2; 1-3}, the linearization is "1 2 1 3 1").
Expand Down
6 changes: 2 additions & 4 deletions metagraph/tests/annotation/taxonomy/test_taxonomy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@

#include <tsl/hopscotch_map.h>
#include <tsl/hopscotch_set.h>
#include <stdio.h>
#include <stdlib.h>

#include "annotation/taxonomy/tax_classifier.hpp"

namespace mtg {
namespace test {

TEST (TaxonomyTest, DfsStatistics) {
TEST (TaxonomyTest, ClsAnno_DfsStatistics) {
mtg::annot::TaxonomyClsAnno *tax = new mtg::annot::TaxonomyClsAnno();
tsl::hopscotch_map<uint32_t, std::vector<uint32_t>> tree {
{0, {1, 2, 3}}, // node 0 -> root
Expand Down Expand Up @@ -59,7 +57,7 @@ TEST (TaxonomyTest, DfsStatistics) {
EXPECT_EQ(expected_node_to_linearization_idx, tax->node_to_linearization_idx);
}

TEST (TaxonomyTest, RmqPreprocessing) {
TEST (TaxonomyTest, ClsAnno_RmqPreprocessing) {
mtg::annot::TaxonomyClsAnno *tax = new mtg::annot::TaxonomyClsAnno();

tax->node_depth = {
Expand Down
21 changes: 0 additions & 21 deletions metagraph/tests/data/taxonomic_data/dumb.accession2taxid

This file was deleted.

20 changes: 0 additions & 20 deletions metagraph/tests/data/taxonomic_data/dumb_nodes.dmp

This file was deleted.

161 changes: 0 additions & 161 deletions metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa

This file was deleted.

Loading

0 comments on commit 76dcb32

Please sign in to comment.