From 241480e7ea787ce2bba970a91d6b36d33654a644 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Wed, 11 Dec 2024 15:40:30 +0300 Subject: [PATCH] Backmerge: #2681 - Molfile V3000 with SGROUP type DAT fails to load in Ketcher due to missing spaces in FIELDDISP (#2685) --- .../ref/basic/sgroups_basic.py.out | 134 ++++++++++++++++++ .../integration/tests/basic/sgroups_basic.py | 7 +- .../molecule/src/molfile_loader.cpp | 132 ++++++++++++----- .../basic/2681-mol3000-fielddisp.mol | 127 +++++++++++++++++ 4 files changed, 363 insertions(+), 37 deletions(-) create mode 100644 data/molecules/basic/2681-mol3000-fielddisp.mol diff --git a/api/tests/integration/ref/basic/sgroups_basic.py.out b/api/tests/integration/ref/basic/sgroups_basic.py.out index d430293ab0..bf76a7f3f0 100644 --- a/api/tests/integration/ref/basic/sgroups_basic.py.out +++ b/api/tests/integration/ref/basic/sgroups_basic.py.out @@ -3346,3 +3346,137 @@ k 38 C 39 C 40 C + +molecules/basic/2681-mol3000-fielddisp.mol + +1 A G meS A G + -INDIGO-01000000002D + + 0 0 0 0 0 0 0 0 0 0 0 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 5 4 2 0 0 +M V30 BEGIN ATOM +M V30 1 Ala 1.5563 -9.2328 0.0 0 CLASS=AA SEQID=1 ATTCHORD=(2 2 Br) +M V30 2 Gly 2.7137 -9.23 0.0 0 CLASS=AA SEQID=2 ATTCHORD=(4 3 Br 1 Al) +M V30 3 meS 3.8712 -9.23 0.0 0 CLASS=AA SEQID=3 ATTCHORD=(4 4 Br 2 Al) +M V30 4 Ala 5.4539 -9.23 0.0 0 CLASS=AA SEQID=4 ATTCHORD=(4 3 Al 5 Br) +M V30 5 Gly 6.6114 -9.23 0.0 0 CLASS=AA SEQID=5 ATTCHORD=(2 4 Al) +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 2 1 +M V30 2 1 3 2 +M V30 3 1 4 3 +M V30 4 1 5 4 +M V30 END BOND +M V30 BEGIN SGROUP +M V30 1 DAT 1 ATOMS=(1 1) FIELDNAME="SMMX:sequence position data" FIELDDISP=- +M V30 " 1.5563 -9.2328 ARU ALL 1 1 " +M V30 2 DAT 2 ATOMS=(1 2) FIELDNAME="SMMX:sequence pos data 2" FIELDDISP=" - +M V30 2.5563 -9.2328 AA ALL 1 1 " +M V30 END SGROUP +M V30 END CTAB +M V30 BEGIN TEMPLATE +M V30 TEMPLATE 1 AA/Ala/A/ +M V30 BEGIN CTAB +M V30 COUNTS 7 6 3 0 0 +M V30 BEGIN ATOM +M V30 1 O 6.6266 -2.0662 0.0 0 +M V30 2 H 5.0016 -2.0876 0.0 0 +M V30 3 N 5.1358 -2.0784 0.0 0 +M V30 4 C 5.7844 -1.5983 0.0 0 CFG=2 +M V30 5 C 6.4753 -2.0653 0.0 0 +M V30 6 O 6.4753 -2.8977 0.0 0 +M V30 7 C 5.7844 -0.7662 0.0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 3 4 +M V30 2 1 4 5 +M V30 3 2 5 6 +M V30 4 1 4 7 CFG=1 +M V30 5 1 3 2 +M V30 6 1 5 1 +M V30 END BOND +M V30 BEGIN COLLECTION +M V30 MDLV30/STEABS ATOMS=(1 4) +M V30 END COLLECTION +M V30 BEGIN SGROUP +M V30 1 SUP 1 ATOMS=(1 1) XBONDS=(1 6) BRKXYZ=(9 7.020000 -2.260000 0.000000- +M V30 7.020000 -1.850000 0.000000 0.000000 0.000000 0.000000) CSTATE=(4 6 -- +M V30 0.820000 -0.010000 0.000000) LABEL=OH CLASS=LGRP +M V30 2 SUP 2 ATOMS=(1 2) XBONDS=(1 5) BRKXYZ=(9 4.580000 -1.870000 0.000000- +M V30 4.600000 -2.280000 0.000000 0.000000 0.000000 0.000000) CSTATE=(4 5 0- +M V30 .800000 0.020000 0.000000) LABEL=H CLASS=LGRP +M V30 3 SUP 3 ATOMS=(5 3 4 5 6 7) XBONDS=(2 5 6) BRKXYZ=(9 3.950000 -3.33000- +M V30 0 0.000000 3.950000 -0.380000 0.000000 0.000000 0.000000 0.000000) CST- +M V30 ATE=(4 5 -0.800000 -0.020000 0.000000) CSTATE=(4 6 0.820000 0.010000 0- +M V30 .000000) LABEL=A CLASS=AA SAP=(3 3 2 Al) SAP=(3 5 1 Br) +M V30 END SGROUP +M V30 END CTAB +M V30 TEMPLATE 2 AA/Gly/G/ +M V30 BEGIN CTAB +M V30 COUNTS 6 5 3 0 0 +M V30 BEGIN ATOM +M V30 1 N 3.676 -12.5274 0.0 0 +M V30 2 C 4.2675 -12.095 0.0 0 +M V30 3 O 4.8932 -13.2691 0.0 0 +M V30 4 C 4.8904 -12.5161 0.0 0 +M V30 5 O 5.1042 -12.5167 0.0 0 +M V30 6 H 3.4542 -12.5125 0.0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 2 4 +M V30 3 2 4 3 +M V30 4 1 4 5 +M V30 5 1 1 6 +M V30 END BOND +M V30 BEGIN SGROUP +M V30 1 SUP 1 ATOMS=(1 5) XBONDS=(1 4) CSTATE=(4 4 -0.820000 -0.010000 0.000- +M V30 000) LABEL=OH CLASS=LGRP +M V30 2 SUP 2 ATOMS=(4 1 2 3 4) XBONDS=(2 4 5) CSTATE=(4 4 0.820000 0.010000- +M V30 0.000000) CSTATE=(4 5 -0.830000 0.010000 0.000000) LABEL=G CLASS=AA S- +M V30 AP=(3 4 5 Br) SAP=(3 1 6 Al) +M V30 3 SUP 3 ATOMS=(1 6) XBONDS=(1 5) CSTATE=(4 5 0.830000 -0.010000 0.0000- +M V30 00) LABEL=H CLASS=LGRP +M V30 END SGROUP +M V30 END CTAB +M V30 TEMPLATE 3 AA/meS/meS/ NATREPLACE=AA/S +M V30 BEGIN CTAB +M V30 COUNTS 9 8 3 0 0 +M V30 BEGIN ATOM +M V30 1 C 9.9525 -5.6641 0.0 0 +M V30 2 O 9.9451 -6.8641 0.0 0 +M V30 3 N 7.3518 -5.6442 0.0 0 +M V30 4 C 8.6579 -4.9049 0.0 0 CFG=1 +M V30 5 C 8.6671 -3.4041 0.0 0 +M V30 6 O 7.6319 -2.7971 0.0 0 +M V30 7 C 6.3173 -5.0361 0.0 0 +M V30 8 O 10.8217 -5.1697 0.0 0 +M V30 9 H 7.3436 -6.6442 0.0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 2 2 1 +M V30 2 1 1 4 +M V30 3 1 4 3 +M V30 4 1 4 5 CFG=1 +M V30 5 1 5 6 +M V30 6 1 3 7 +M V30 7 1 1 8 +M V30 8 1 3 9 +M V30 END BOND +M V30 BEGIN COLLECTION +M V30 MDLV30/STEABS ATOMS=(1 4) +M V30 END COLLECTION +M V30 BEGIN SGROUP +M V30 1 SUP 1 ATOMS=(7 1 2 3 4 5 6 7) XBONDS=(2 7 8) CSTATE=(4 7 2.170000 0.- +M V30 480000 0.000000) CSTATE=(4 8 -1.310000 -0.990000 0.000000) LABEL=meS C- +M V30 LASS=AA SAP=(3 1 8 Br) SAP=(3 3 9 Al) NATREPLACE=AA/S +M V30 2 SUP 2 ATOMS=(1 8) XBONDS=(1 7) CSTATE=(4 7 -2.170000 -0.480000 0.000- +M V30 000) LABEL=OH CLASS=LGRP +M V30 3 SUP 3 ATOMS=(1 9) XBONDS=(1 8) CSTATE=(4 8 1.310000 0.990000 0.00000- +M V30 0) LABEL=H CLASS=LGRP +M V30 END SGROUP +M V30 END CTAB +M V30 END TEMPLATE +M END + diff --git a/api/tests/integration/tests/basic/sgroups_basic.py b/api/tests/integration/tests/basic/sgroups_basic.py index 3841aa61a4..9c9a828089 100644 --- a/api/tests/integration/tests/basic/sgroups_basic.py +++ b/api/tests/integration/tests/basic/sgroups_basic.py @@ -7,7 +7,7 @@ os.path.join(os.path.abspath(__file__), "..", "..", "..", "common") ) ) -from env_indigo import * # noqa +from env_indigo import Indigo, dataPath # noqa indigo = Indigo() indigo.setOption("molfile-saving-skip-date", True) @@ -151,3 +151,8 @@ print(g.getSGroupMultiplier()) for a in g.iterateAtoms(): print("{0} {1}".format(a.index(), a.symbol())) + +fname = "molecules/basic/2681-mol3000-fielddisp.mol" +m = indigo.loadMoleculeFromFile(dataPath(fname)) +print("\n%s\n" % fname) +print(m.molfile()) diff --git a/core/indigo-core/molecule/src/molfile_loader.cpp b/core/indigo-core/molecule/src/molfile_loader.cpp index 424b582672..a2c97cc0c0 100644 --- a/core/indigo-core/molecule/src/molfile_loader.cpp +++ b/core/indigo-core/molecule/src/molfile_loader.cpp @@ -3600,6 +3600,8 @@ void MolfileLoader::_readSGroup3000(const char* str) QS_DEF(Array, substr); substr.clear(); _readStringInQuotes(scanner, &substr); + if (substr.size() > 0) + substr.pop(); // remove trailing 0 if (dsg != 0) { BufferScanner subscan(substr); @@ -3903,46 +3905,104 @@ void MolfileLoader::_readTGroups3000() void MolfileLoader::_readSGroupDisplay(Scanner& scanner, DataSGroup& dsg) { - dsg.display_pos.x = scanner.readFloatFix(10); - dsg.display_pos.y = scanner.readFloatFix(10); - scanner.skip(4); - if (scanner.readChar() == 'A') // means "attached" - dsg.detached = false; - else - dsg.detached = true; - if (scanner.readChar() == 'R') - dsg.relative = true; - if (scanner.readChar() == 'U') - dsg.display_units = true; - - long long cur = scanner.tell(); - scanner.seek(0LL, SEEK_END); - long long end = scanner.tell(); - scanner.seek(cur, SEEK_SET); - - scanner.skip(3); - - char chars[4] = {0, 0, 0, 0}; - scanner.readCharsFix(3, chars); - if (strncmp(chars, "ALL", 3) == 0) - dsg.num_chars = 0; - else + try { - scanner.seek(cur + 3, SEEK_CUR); - dsg.num_chars = scanner.readInt1(); - } + int constexpr MIN_SDD_SIZE = 36; + bool well_formatted = scanner.length() >= MIN_SDD_SIZE; + dsg.display_pos.x = scanner.readFloatFix(10); + dsg.display_pos.y = scanner.readFloatFix(10); + int ch = ' '; + if (well_formatted) + { + scanner.skip(4); + ch = scanner.readChar(); + } + else + { + for (int i = 0; i < 5 && ch == ' '; i++) + ch = scanner.readChar(); + } + if (ch == 'A') // means "attached" + dsg.detached = false; + else if (ch == 'D') + dsg.detached = true; + else + throw Error("Expected 'A' or 'D' but got '%c'.", ch); + ch = scanner.readChar(); + if (ch == 'R') + dsg.relative = true; + else if (ch != 'A') + throw Error("Expected 'A' or 'R' but got '%c'.", ch); + ch = scanner.readChar(); + if (ch == 'U') + dsg.display_units = true; + else if (ch != ' ') + throw Error("Expected 'U' or ' ' but got '%c'.", ch); + + if (well_formatted) + { + scanner.skip(3); + } + else + { + for (int i = 0; i < 4; i++) + { + ch = scanner.lookNext(); + if (ch != ' ') + break; + scanner.skip(1); + } + } + + long long cur = scanner.tell(); + + char chars[4] = {0, 0, 0, 0}; + scanner.readCharsFix(3, chars); + if (strncmp(chars, "ALL", 3) == 0) + dsg.num_chars = 0; + else + { + scanner.seek(cur, SEEK_CUR); + dsg.num_chars = scanner.readInt1(); + } + + if (well_formatted) + { + scanner.skip(7); + dsg.tag = scanner.readChar(); + } + else + { + ch = ' '; + // read kkk: Number of lines to display (unused, always 1) + for (int i = 0; i < 3 && ch == ' '; i++) + ch = scanner.readChar(); + ch = ' '; + // read tag + for (int i = 0; i < 5 && ch == ' '; i++) + ch = scanner.readChar(); + if (ch != ' ') + dsg.tag = ch; + } - scanner.skip(7); - dsg.tag = scanner.readChar(); + cur = scanner.tell(); + scanner.seek(0LL, SEEK_END); + long long end = scanner.tell(); + scanner.seek(cur, SEEK_SET); - if (end - cur + 1 > 16) + if (end - cur + 1 > 2) + { + scanner.skip(2); + if (scanner.lookNext() == '\n' || scanner.lookNext() == '\r') + return; + int c = scanner.readChar(); + if (c >= '1' && c <= '9') + dsg.dasp_pos = c - '0'; + } + } + catch (Scanner::Error) { - scanner.skip(2); - if (scanner.lookNext() == '\n' || scanner.lookNext() == '\r') - return; - int c = scanner.readChar(); - if (c >= '1' && c <= '9') - dsg.dasp_pos = c - '0'; + // Ignore scanner error - just use default values. } } diff --git a/data/molecules/basic/2681-mol3000-fielddisp.mol b/data/molecules/basic/2681-mol3000-fielddisp.mol new file mode 100644 index 0000000000..3106f0a548 --- /dev/null +++ b/data/molecules/basic/2681-mol3000-fielddisp.mol @@ -0,0 +1,127 @@ +1 A G meS A G +ACCLDraw03281611052D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 5 4 2 0 1 +M V30 BEGIN ATOM +M V30 1 Ala 1.5563 -9.2328 0 0 CLASS=AA ATTCHORD=(2 2 Br) SEQID=1 +M V30 2 Gly 2.7137 -9.23 0 0 CLASS=AA ATTCHORD=(4 3 Br 1 Al) SEQID=2 +M V30 3 meS 3.8712 -9.23 0 0 CLASS=AA ATTCHORD=(4 4 Br 2 Al) SEQID=3 +M V30 4 Ala 5.4539 -9.23 0 0 CLASS=AA ATTCHORD=(4 3 Al 5 Br) SEQID=4 +M V30 5 Gly 6.6114 -9.23 0 0 CLASS=AA ATTCHORD=(2 4 Al) SEQID=5 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 2 1 +M V30 2 1 3 2 +M V30 3 1 4 3 +M V30 4 1 5 4 +M V30 END BOND +M V30 BEGIN SGROUP +M V30 1 DAT 1 ATOMS=(1 1) FIELDNAME="SMMX:sequence position data" - +M V30 FIELDDISP=" 1.5563 -9.2328 ARU ALL 1 " +M V30 2 DAT 2 ATOMS=(1 2) FIELDNAME="SMMX:sequence pos data 2" - +M V30 FIELDDISP=" 2.5563 -9.2328" +M V30 END SGROUP +M V30 END CTAB +M V30 BEGIN TEMPLATE +M V30 TEMPLATE 1 AA/Ala/A/ +M V30 BEGIN CTAB +M V30 COUNTS 7 6 3 0 1 +M V30 BEGIN ATOM +M V30 1 O 6.6266 -2.0662 0 0 +M V30 2 H 5.0016 -2.0876 0 0 +M V30 3 N 5.1358 -2.0784 0 0 CFG=3 +M V30 4 C 5.7844 -1.5983 0 0 CFG=2 +M V30 5 C 6.4753 -2.0653 0 0 +M V30 6 O 6.4753 -2.8977 0 0 +M V30 7 C 5.7844 -0.7662 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 3 4 +M V30 2 1 4 5 +M V30 3 2 5 6 +M V30 4 1 4 7 CFG=1 +M V30 5 1 3 2 +M V30 6 1 5 1 +M V30 END BOND +M V30 BEGIN SGROUP +M V30 1 SUP 1 ATOMS=(1 1) XBONDS=(1 6) BRKXYZ=(9 7.02 -2.26 0 7.02 -1.85 0 - +M V30 0 0 0) CSTATE=(4 6 -0.82 -0.01 0) LABEL=OH CLASS=LGRP +M V30 2 SUP 2 ATOMS=(1 2) XBONDS=(1 5) BRKXYZ=(9 4.58 -1.87 0 4.6 -2.28 0 - +M V30 0 0 0) CSTATE=(4 5 0.8 0.02 0) LABEL=H CLASS=LGRP +M V30 3 SUP 3 ATOMS=(5 3 4 5 6 7) XBONDS=(2 5 6) BRKXYZ=(9 3.95 -3.33 0 3.95 - +M V30 -0.38 0 0 0 0) CSTATE=(4 5 -0.8 -0.02 0) CSTATE=(4 6 0.82 0.01 - +M V30 0) LABEL=A CLASS=AA SAP=(3 3 2 Al) SAP=(3 5 1 Br) +M V30 END SGROUP +M V30 BEGIN COLLECTION +M V30 MDLV30/STEABS ATOMS=(1 4) +M V30 END COLLECTION +M V30 END CTAB +M V30 TEMPLATE 2 AA/Gly/G/ +M V30 BEGIN CTAB +M V30 COUNTS 6 5 3 0 0 +M V30 BEGIN ATOM +M V30 1 N 3.676 -12.5274 0 0 CFG=3 +M V30 2 C 4.2675 -12.095 0 0 +M V30 3 O 4.8932 -13.2691 0 0 +M V30 4 C 4.8904 -12.5161 0 0 +M V30 5 O 5.1042 -12.5167 0 0 +M V30 6 H 3.4542 -12.5125 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 2 4 +M V30 3 2 4 3 +M V30 4 1 4 5 +M V30 5 1 1 6 +M V30 END BOND +M V30 BEGIN SGROUP +M V30 1 SUP 1 ATOMS=(1 5) XBONDS=(1 4) CSTATE=(4 4 -0.82 -0.01 0) LABEL=OH - +M V30 CLASS=LGRP +M V30 2 SUP 2 ATOMS=(4 1 2 3 4) XBONDS=(2 4 5) CSTATE=(4 4 0.82 0.01 0) - +M V30 CSTATE=(4 5 -0.83 0.01 0) LABEL=G CLASS=AA SAP=(3 4 5 Br) - +M V30 SAP=(3 1 6 Al) +M V30 3 SUP 3 ATOMS=(1 6) XBONDS=(1 5) CSTATE=(4 5 0.83 -0.01 0) LABEL=H - +M V30 CLASS=LGRP +M V30 END SGROUP +M V30 END CTAB +M V30 TEMPLATE 3 AA/meS/meS/ NATREPLACE=AA/S +M V30 BEGIN CTAB +M V30 COUNTS 9 8 3 0 1 +M V30 BEGIN ATOM +M V30 1 C 9.9525 -5.6641 0 0 +M V30 2 O 9.9451 -6.8641 0 0 +M V30 3 N 7.3518 -5.6442 0 0 CFG=3 +M V30 4 C 8.6579 -4.9049 0 0 CFG=1 +M V30 5 C 8.6671 -3.4041 0 0 +M V30 6 O 7.6319 -2.7971 0 0 +M V30 7 C 6.3173 -5.0361 0 0 +M V30 8 O 10.8217 -5.1697 0 0 +M V30 9 H 7.3436 -6.6442 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 2 2 1 +M V30 2 1 1 4 +M V30 3 1 4 3 +M V30 4 1 4 5 CFG=1 +M V30 5 1 5 6 +M V30 6 1 3 7 +M V30 7 1 1 8 +M V30 8 1 3 9 +M V30 END BOND +M V30 BEGIN SGROUP +M V30 1 SUP 1 ATOMS=(7 1 2 3 4 5 6 7) XBONDS=(2 7 8) CSTATE=(4 7 2.17 0.48 - +M V30 0) CSTATE=(4 8 -1.31 -0.99 0) LABEL=meS CLASS=AA SAP=(3 1 8 Br) - +M V30 SAP=(3 3 9 Al) NATREPLACE=AA/S +M V30 2 SUP 2 ATOMS=(1 8) XBONDS=(1 7) CSTATE=(4 7 -2.17 -0.48 0) LABEL=OH - +M V30 CLASS=LGRP +M V30 3 SUP 3 ATOMS=(1 9) XBONDS=(1 8) CSTATE=(4 8 1.31 0.99 0) LABEL=H - +M V30 CLASS=LGRP +M V30 END SGROUP +M V30 BEGIN COLLECTION +M V30 MDLV30/STEABS ATOMS=(1 4) +M V30 END COLLECTION +M V30 END CTAB +M V30 END TEMPLATE +M END