Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support outputting alignments in GAF format #384

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 53 additions & 12 deletions metagraph/src/cli/align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,18 +287,7 @@ std::string format_alignment(std::string_view header,
const DeBruijnGraph &graph,
const Config &config) {
std::string sout;
if (!config.output_json) {
sout += fmt::format("{}\t{}", header, paths.get_query());
if (paths.empty()) {
sout += fmt::format("\t*\t*\t{}\t*\t*\t*", config.alignment_min_path_score);
} else {
for (const auto &path : paths.data()) {
sout += fmt::format("\t{}", path);
}
}

sout += "\n";
} else {
if (config.output_json) {
Json::StreamWriterBuilder builder;
builder["indentation"] = "";

Expand All @@ -319,6 +308,58 @@ std::string format_alignment(std::string_view header,

sout += fmt::format("{}\n", Json::writeString(builder, json_line));
}
} else if (config.output_gaf) {
std::string base = fmt::format("{}\t{}\t", header, paths.get_query().size());
for (size_t i = 0; i < paths.size(); ++i) {
const auto &path = paths[i];
Comment on lines +313 to +314
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (size_t i = 0; i < paths.size(); ++i) {
const auto &path = paths[i];
for (const auto &path : paths) {

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had it set up this way because a bug in GCC-8 was causing the compilation to crash because of this kind of for-loop. Let's see if it happens this time.

const auto &cigar = path.get_cigar();
const auto &nodes = path.get_nodes();
sout += base;
if (path.empty()) {
sout += "*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n";
continue;
}

size_t begin = 0;
size_t end = 0;

if (path.get_orientation()) {
begin = cigar.get_end_clipping();
end = paths.get_query().size() - cigar.get_clipping();
} else {
begin = cigar.get_clipping();
end = paths.get_query().size() - cigar.get_end_clipping();
}

sout += fmt::format(
"{}\t{}\t+\t{}\t{}\t{}\t{}\t{}\t{}\t255\tAS:i:{}\tcg:Z:{}\tMD:Z:{}",
begin, end,
path.get_orientation()
? fmt::format("<{}", fmt::join(nodes.rbegin(), nodes.rend(), ">"))
: fmt::format(">{}", fmt::join(nodes, ">")),
path.get_sequence().size() + path.get_offset(),
path.get_orientation() ? path.get_offset() : 0,
!path.get_orientation() ? path.get_offset() : 0,
cigar.get_num_matches(),
path.get_sequence().size(),
path.get_score(),
cigar.to_string(),
cigar.to_md_string(path.get_sequence())
);

sout += "\n";
}
} else {
sout += fmt::format("{}\t{}", header, paths.get_query());
if (paths.empty()) {
sout += fmt::format("\t*\t*\t{}\t*\t*\t*", config.alignment_min_path_score);
} else {
for (const auto &path : paths.data()) {
sout += fmt::format("\t{}", path);
}
}

sout += "\n";
}

return sout;
Expand Down
3 changes: 3 additions & 0 deletions metagraph/src/cli/config/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ Config::Config(int argc, char *argv[]) {
output_compacted = true;
} else if (!strcmp(argv[i], "--json")) {
output_json = true;
} else if (!strcmp(argv[i], "--gaf")) {
output_gaf = true;
} else if (!strcmp(argv[i], "--unitigs")) {
to_fasta = true;
unitigs = true;
Expand Down Expand Up @@ -1005,6 +1007,7 @@ if (advanced) {
fprintf(stderr, "Available options for alignment:\n");
fprintf(stderr, "\t-o --outfile-base [STR]\t\t\t\tbasename of output file []\n");
fprintf(stderr, "\t --json \t\t\t\t\toutput alignment in JSON format [off]\n");
fprintf(stderr, "\t --gaf \t\t\t\t\toutput alignment in GAF format [off]\n");
if (advanced) {
fprintf(stderr, "\t --align-only-forwards \t\t\tdo not align backwards from a seed on basic-mode graphs [off]\n");
}
Expand Down
1 change: 1 addition & 0 deletions metagraph/src/cli/config/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class Config {
bool align_only_forwards = false;
bool filter_by_kmer = false;
bool output_json = false;
bool output_gaf = false;
bool aggregate_columns = false;
bool coordinates = false;
bool advanced = false;
Expand Down
40 changes: 40 additions & 0 deletions metagraph/src/graph/alignment/aligner_cigar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,46 @@ std::string Cigar::to_string() const {
return cigar_string;
}

std::string Cigar::to_md_string(std::string_view reference) const {
std::string md_string;
auto ref_it = reference.begin();

size_t match_count = 0;
for (const auto &[op, num] : cigar_) {
switch (op) {
case CLIPPED:
case INSERTION:
case NODE_INSERTION: {} break;
case MATCH: {
match_count += num;
ref_it += num;
} break;
case MISMATCH: {
if (match_count) {
md_string += std::to_string(match_count);
match_count = 0;
}
md_string += std::string(ref_it, ref_it + num);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

string_view would be enough. Why do you create a new string?

ref_it += num;
} break;
case DELETION: {
if (match_count) {
md_string += std::to_string(match_count);
match_count = 0;
}
md_string += "^" + std::string(ref_it, ref_it + num);
ref_it += num;
}
}
}

if (match_count)
md_string += std::to_string(match_count);

assert(ref_it == reference.end());
return md_string;
}

void Cigar::append(Operator op, LengthType num) {
if (!num)
return;
Expand Down
1 change: 1 addition & 0 deletions metagraph/src/graph/alignment/aligner_cigar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class Cigar {
bool empty() const { return cigar_.empty(); }

std::string to_string() const;
std::string to_md_string(std::string_view reference) const;

void append(Operator op, LengthType num = 1);
void append(Cigar&& other);
Expand Down