diff --git a/README.md b/README.md index 55a21e2..b089a74 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,10 @@ This package performs in-silico MeOX + TMS derivatization (as described e.g. in https://doi.org/10.1021/acs.analchem.7b01010): -* Methoximation: ketone R(C=O)R' and aldehyde (-HC=O) karboxyl groups +* Methoximation: ketone R(C=O)R' and aldehyde (-HC=O) carbonyl groups are substituted with -C=NOCH3 * Trimethylsilylation: the acidic hydrogen in -OH, -SH, -COOH, -NH2, -NHR, =NH, the hydrogen is substituted with -Si(CH3)3 -The substitution needn't happen always, their probability currently hardcoded in the package. +The substitution doesn't always have to happen; its probability is currently hardcoded in the package. Typically, multiple substitution attempts are run on each input molecule, and all distinct results are gathered. Known limitation is methoximation on cycles which should be broken. This is not implemented yet. diff --git a/tests/test_derivatization.py b/tests/test_derivatization.py index 7090980..85f250d 100644 --- a/tests/test_derivatization.py +++ b/tests/test_derivatization.py @@ -1,11 +1,11 @@ +import random + import pytest from rdkit import Chem from gc_meox_tms import (add_derivatization_groups, is_derivatized, process_one_mol, remove_derivatization_groups) -FLAKY_RERUNS = 6 - @pytest.fixture(params=[ ("CC(=O)N([Si](C)(C)C)[Si](C)(C)C", True), @@ -27,7 +27,8 @@ ("C[N+]#[C-]", False) ]) def is_derivatized_data(request): - """Return a tuple of (smiles, boolean indicating if the molecule is MeOX or TMS derivatized).""" + """Return a tuple of (smiles, boolean indicating if the molecule + is MeOX or TMS derivatized).""" smiles, _is_derivatized = request.param return smiles, _is_derivatized @@ -35,7 +36,8 @@ def is_derivatized_data(request): @pytest.fixture(params=[ ("CC(=O)N([Si](C)(C)C)[Si](C)(C)C", "CC(=O)N[Si](C)(C)C", "CC(N)=O"), ("C[Si](C)(C)OC1=CC=CC=C1", None, "OC1=CC=CC=C1"), - ("C[Si](C)(C)OC1=CC=C(O[Si](C)(C)C)C=C1", "C[Si](C)(C)OC1=CC=C(O)C=C1", "OC1=CC=C(O)C=C1"), + ("C[Si](C)(C)OC1=CC=C(O[Si](C)(C)C)C=C1", "C[Si](C)(C)OC1=CC=C(O)C=C1", + "OC1=CC=C(O)C=C1"), ("CCO[Si](C)(C)C", None, "CCO"), ("CC(=O)O[Si](C)(C)C", None, "CC(=O)O"), ("CCCS[Si](C)(C)C", None, "CCCS"), @@ -43,8 +45,9 @@ def is_derivatized_data(request): ("CC=NOC", None, "CC=O") ]) def derivatization_groups_data(request): - """Return a tuple of (smiles of a derivatized molecule, smiles of this molecule with different degree of conversion, - smiles of the original non-derivatized molecule).""" + """Return a tuple of (smiles of a derivatized molecule, smiles of this + molecule with different degree of conversion, smiles of the original + non-derivatized molecule).""" derivatized, alternative, original = request.param return derivatized, alternative, original @@ -76,7 +79,8 @@ def test_remove_derivatization_groups_from_smiles(derivatization_groups_data): def test_remove_derivatization_groups_from_mol(derivatization_groups_data): - """Test if the remove_derivatization_groups function works with RDKit molecules.""" + """Test if the remove_derivatization_groups function works with RDKit + molecules.""" smiles, _, expected = derivatization_groups_data mol = Chem.MolFromSmiles(smiles) actual = remove_derivatization_groups(mol=mol) @@ -85,10 +89,11 @@ def test_remove_derivatization_groups_from_mol(derivatization_groups_data): assert actual_smiles == expected -@pytest.mark.flaky(reruns=FLAKY_RERUNS) def test_add_derivatization_groups_from_smiles(derivatization_groups_data): - """Test if the add_derivatization_groups function works with SMILES. The test will run FLAKY_RERUNS times or until - success due to non-deterministic nature of add_derivatization_groups.""" + """Test if the add_derivatization_groups function works with SMILES. The + test will run FLAKY_RERUNS times or until success due to + non-deterministic nature of add_derivatization_groups.""" + random.seed(3) expected, alternative, original = derivatization_groups_data derivatized = add_derivatization_groups(smiles=original) derivatized_smiles = Chem.MolToSmiles(derivatized, kekuleSmiles=True) @@ -96,10 +101,11 @@ def test_add_derivatization_groups_from_smiles(derivatization_groups_data): assert derivatized_smiles in [expected, alternative] -@pytest.mark.flaky(reruns=FLAKY_RERUNS) def test_add_derivatization_groups_from_mol(derivatization_groups_data): - """Test if the add_derivatization_groups function works with RDKit molecules. The test will run FLAKY_RERUNS times - or until success due to non-deterministic nature of add_derivatization_groups.""" + """Test if the add_derivatization_groups function works with RDKit + molecules. The test will run FLAKY_RERUNS times or until success + due to non-deterministic nature of add_derivatization_groups.""" + random.seed(3) expected, alternative, original = derivatization_groups_data mol = Chem.MolFromSmiles(original) derivatized = add_derivatization_groups(mol=mol) @@ -109,8 +115,12 @@ def test_add_derivatization_groups_from_mol(derivatization_groups_data): @pytest.mark.parametrize("smiles, expected", [ - ("CC(N)=O", {"CC(N)=O", "CC(=O)N([Si](C)(C)C)[Si](C)(C)C", "CC(=O)N[Si](C)(C)C"}), - ("C[Si](C)(C)OC1=CC=C(O)C=C1", {"OC1=CC=C(O)C=C1", "C[Si](C)(C)OC1=CC=C(O[Si](C)(C)C)C=C1", "C[Si](C)(C)OC1=CC=C(O)C=C1"}), + ("CC(N)=O", {"CC(N)=O", + "CC(=O)N([Si](C)(C)C)[Si](C)(C)C", + "CC(=O)N[Si](C)(C)C"}), + ("C[Si](C)(C)OC1=CC=C(O)C=C1", {"OC1=CC=C(O)C=C1", + "C[Si](C)(C)OC1=CC=C(O[Si](C)(C)C)C=C1", + "C[Si](C)(C)OC1=CC=C(O)C=C1"}), ("CCC(C)=O", {"CCC(C)=O", "CCC(C)=NOC"}), ("CC=NOC", {"CC=O", "CC=NOC"}) ]) @@ -118,6 +128,7 @@ def test_process_one_mol(smiles, expected): """Test processing one molecule.""" mol = (smiles, Chem.MolFromSmiles(smiles)) n = 40 + random.seed(3) actual = process_one_mol(mol, n) actual = {actual[0], actual[1], *actual[2]}