Skip to content

Commit

Permalink
Backmerge: #2539 - Export of unknown monomer to HELM doesn't work
Browse files Browse the repository at this point in the history
Backmerge to master
  • Loading branch information
AliaksandrDziarkach committed Jan 15, 2025
1 parent a83ae2a commit 6b95d54
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 6 deletions.
1 change: 1 addition & 0 deletions api/tests/integration/ref/formats/helm_to_ket.py.out
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
*** HELM to KET ***
helm_aminoacids_variants.ket:SUCCEED
helm_annotations.ket:SUCCEED
helm_any_chem.ket:SUCCEED
helm_chem_peptide.ket:SUCCEED
helm_chem_rna.ket:SUCCEED
helm_chem_rna_hydro.ket:SUCCEED
Expand Down
1 change: 1 addition & 0 deletions api/tests/integration/ref/formats/ket_to_helm.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
dna_variants.ket:SUCCEED
helm_aminoacids_variants.ket:SUCCEED
helm_annotations.ket:SUCCEED
helm_any_chem.ket:SUCCEED
helm_chem_peptide.ket:SUCCEED
helm_chem_rna.ket:SUCCEED
helm_chem_rna_hydro.ket:SUCCEED
Expand Down
1 change: 1 addition & 0 deletions api/tests/integration/tests/formats/helm_to_ket.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def find_diff(a, b):
"helm_chem_rna_hydro": "CHEM1{[MCC]}|RNA1{R(U)P}$CHEM1,RNA1,1:pair-3:pair$$$V2.0",
"helm_unsplit": "RNA1{[5Br-dU]}$$$$V2.0",
"helm_smiles_no_ap": "CHEM1{[P(O)(O)(=O)O]}$$$$V2.0",
"helm_any_chem": "CHEM1{*}|CHEM2{*}$$$$V2.0",
}

lib = indigo.loadMonomerLibraryFromFile(
Expand Down
9 changes: 7 additions & 2 deletions api/tests/integration/tests/formats/ket_to_helm.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,18 @@ def find_diff(a, b):
"helm_molecule_2418": "PEPTIDE1{A}|CHEM1{[C1C=CC=CC=1[*:1] |$;;;;;;_R1$|]}$PEPTIDE1,CHEM1,1:R1-1:R1$$$V2.0",
"helm_chem_rna_hydro": "CHEM1{[MCC]}|RNA1{R(U)P}$CHEM1,RNA1,1:pair-3:pair$$$V2.0",
"helm_monomer_molecule_direct": "PEPTIDE1{A}|CHEM1{[C(=C)N[*:1] |$;;;_R1$|]}$PEPTIDE1,CHEM1,1:R2-1:R1$$$V2.0",
"helm_unknown": "CHEM1{[C1([*:1])C([*:4])C([*:3])C1[*:2] |$;_R1;;_R4;;_R3;;_R2$|]}$$$$V2.0",
"helm_unknown": "CHEM1{*}$$$$V2.0",
"helm_different_id": "PEPTIDE1{A}|RNA1{R(A)P}$$$$V2.0",
"helm_any_chem": "CHEM1{*}|CHEM2{*}$$$$V2.0",
}

for filename in sorted(helm_data.keys()):
mol = indigo.loadKetDocumentFromFile(os.path.join(ref, filename + ".ket"))
helm = mol.helm(lib)
try:
helm = mol.helm(lib)
except IndigoException as e:
print("Test %s failed: %s" % (filename, getIndigoExceptionText(e)))
continue
helm_ref = helm_data[filename]
if helm_ref == helm:
print(filename + ".ket:SUCCEED")
Expand Down
102 changes: 102 additions & 0 deletions api/tests/integration/tests/formats/ref/helm_any_chem.ket
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{
"root": {
"nodes": [
{
"$ref": "monomer0"
},
{
"$ref": "monomer1"
}
],
"templates": [
{
"$ref": "monomerTemplate-unknown_monomer_0"
},
{
"$ref": "monomerTemplate-unknown_monomer_1"
}
]
},
"monomer0": {
"type": "monomer",
"id": "0",
"seqid": 1,
"position": {
"x": 0.000000,
"y": -0.000000
},
"alias": "unknown_monomer_0",
"templateId": "unknown_monomer_0"
},
"monomer1": {
"type": "monomer",
"id": "1",
"seqid": 2,
"position": {
"x": 0.000000,
"y": -1.500000
},
"alias": "unknown_monomer_1",
"templateId": "unknown_monomer_1"
},
"monomerTemplate-unknown_monomer_0": {
"type": "monomerTemplate",
"id": "unknown_monomer_0",
"class": "CHEM",
"alias": "unknown_monomer_0",
"unresolved": true,
"idtAliases": {
"base": "unknown_monomer_0"
},
"attachmentPoints": [
{
"attachmentAtom": -1,
"label": "R1"
},
{
"attachmentAtom": -1,
"label": "R2"
},
{
"attachmentAtom": -1,
"label": "R3"
},
{
"attachmentAtom": -1,
"label": "R4"
}
],
"atoms": [],
"bonds": []
},
"monomerTemplate-unknown_monomer_1": {
"type": "monomerTemplate",
"id": "unknown_monomer_1",
"class": "CHEM",
"alias": "unknown_monomer_1",
"unresolved": true,
"idtAliases": {
"base": "unknown_monomer_1"
},
"attachmentPoints": [
{
"attachmentAtom": -1,
"label": "R1"
},
{
"attachmentAtom": -1,
"label": "R2"
},
{
"attachmentAtom": -1,
"label": "R3"
},
{
"attachmentAtom": -1,
"label": "R4"
}
],
"atoms": [],
"bonds": []
}
}
24 changes: 21 additions & 3 deletions core/indigo-core/molecule/src/sequence_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1248,15 +1248,12 @@ void SequenceLoader::loadIdt(KetDocument& document)
checkAddTemplate(document, monomer_template);
single_monomer = monomer_template_id;
single_monomer_alias = monomer_template.getStringProp("alias");
single_monomer_class = MonomerTemplates::classToStr(monomer_template.monomerClass());
}
else // IDT alias not found
{
unresolved = true;
single_monomer = "unknown_monomer_with_idt_alias_" + idt_alias;
single_monomer_alias = idt_alias;
auto monomer_class = MonomerClass::CHEM;
single_monomer_class = MonomerTemplates::classToStr(monomer_class);
// Unresoved monomer could be in any position
MonomerTemplate monomer_template(single_monomer, monomer_class, IdtAlias(idt_alias, idt_alias, idt_alias, idt_alias), true);
monomer_template.setStringProp("alias", idt_alias);
Expand Down Expand Up @@ -1302,6 +1299,14 @@ std::string SequenceLoader::readHelmMonomerAlias(KetDocument& document, MonomerC
std::string monomer_alias;
auto ch = _scanner.lookNext();

if (ch == '*')
{
if (monomer_class != MonomerClass::CHEM)
throw Error("'*' could be used only for CHEM monomers for now.");
_scanner.skip(1);
return "*";
}

if (ch == '[')
{
_scanner.skip(1);
Expand Down Expand Up @@ -1692,6 +1697,7 @@ void SequenceLoader::loadHELM(KetDocument& document)
std::string simple_polymer_type = "";
int monomer_idx = 0;
int prev_monomer_template_atom_idx = -1;
int unknown_count = 0;
_unknown_ambiguous_count = 0;
using polymer_map = std::map<std::string, std::map<int, size_t>>;
polymer_map used_polymer_nums;
Expand Down Expand Up @@ -1753,6 +1759,18 @@ void SequenceLoader::loadHELM(KetDocument& document)
ch = _scanner.lookNext();
if (ch != '}')
throw Error("Unexpected symbol. Expected '}' but found '%c'.", ch); // only one monomer in chem

auto& alias = std::get<0>(monomer_info);
if (alias == "*") // if monomer_alias == "*"
{
alias = "unknown_monomer_" + std::to_string(unknown_count++);
MonomerTemplate monomer_template(alias, MonomerClass::CHEM, IdtAlias(alias, alias, alias, alias), true);
monomer_template.setStringProp("alias", alias);
for (auto ap : {"R1", "R2", "R3", "R4"})
monomer_template.AddAttachmentPoint(ap, -1);
checkAddTemplate(document, monomer_template);
_added_templates.emplace(monomer_class, alias);
}
cur_polymer_map->second[monomer_idx] = addKetMonomer(document, monomer_info, monomer_class, pos);
}
else if (monomer_class == MonomerClass::AminoAcid)
Expand Down
7 changes: 6 additions & 1 deletion core/indigo-core/molecule/src/sequence_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1285,7 +1285,12 @@ std::string SequenceSaver::saveHELM(KetDocument& document, std::vector<std::dequ
if (monomer_class == MonomerClass::Base)
helm_string += '(';
if (monomer->monomerType() == KetBaseMonomer::MonomerType::Monomer)
add_monomer(document, monomer, helm_string);
{
if (templates.at(monomer->templateId()).unresolved())
helm_string += '*';
else
add_monomer(document, monomer, helm_string);
}
else if (monomer->monomerType() == KetBaseMonomer::MonomerType::AmbiguousMonomer)
{
const auto& templ = variant_templates.at(monomer->templateId());
Expand Down

0 comments on commit 6b95d54

Please sign in to comment.