Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed test units failure #31

Merged
merged 3 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

This package performs in-silico MeOX + TMS derivatization (as described e.g. in https://doi.org/10.1021/acs.analchem.7b01010):

* Methoximation: ketone R(<font color='pink'>C</font>=O)R' and aldehyde (-H<font color='pink'>C</font>=O) karboxyl groups
* Methoximation: ketone R(<font color='pink'>C</font>=O)R' and aldehyde (-H<font color='pink'>C</font>=O) carbonyl groups
are substituted with -<font color='pink'>C</font>=NOCH<sub>3</sub>
* Trimethylsilylation: the acidic hydrogen in -OH, -SH, -COOH, -NH<sub>2</sub>, -NHR, =NH, the hydrogen is substituted with -Si(CH<sub>3</sub>)<sub>3</sub>
The substitution needn't happen always, their probability currently hardcoded in the package.
The substitution doesn't always have to happen; its probability is currently hardcoded in the package.
Typically, multiple substitution attempts are run on each input molecule, and all distinct results are gathered.

Known limitation is methoximation on cycles which should be broken. This is not implemented yet.
Expand Down
41 changes: 26 additions & 15 deletions tests/test_derivatization.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import random

import pytest
from rdkit import Chem

from gc_meox_tms import (add_derivatization_groups, is_derivatized,
process_one_mol, remove_derivatization_groups)

FLAKY_RERUNS = 6


@pytest.fixture(params=[
("CC(=O)N([Si](C)(C)C)[Si](C)(C)C", True),
Expand All @@ -27,24 +27,27 @@
("C[N+]#[C-]", False)
])
def is_derivatized_data(request):
"""Return a tuple of (smiles, boolean indicating if the molecule is MeOX or TMS derivatized)."""
"""Return a tuple of (smiles, boolean indicating if the molecule
is MeOX or TMS derivatized)."""
smiles, _is_derivatized = request.param
return smiles, _is_derivatized


@pytest.fixture(params=[
("CC(=O)N([Si](C)(C)C)[Si](C)(C)C", "CC(=O)N[Si](C)(C)C", "CC(N)=O"),
("C[Si](C)(C)OC1=CC=CC=C1", None, "OC1=CC=CC=C1"),
("C[Si](C)(C)OC1=CC=C(O[Si](C)(C)C)C=C1", "C[Si](C)(C)OC1=CC=C(O)C=C1", "OC1=CC=C(O)C=C1"),
("C[Si](C)(C)OC1=CC=C(O[Si](C)(C)C)C=C1", "C[Si](C)(C)OC1=CC=C(O)C=C1",
"OC1=CC=C(O)C=C1"),
("CCO[Si](C)(C)C", None, "CCO"),
("CC(=O)O[Si](C)(C)C", None, "CC(=O)O"),
("CCCS[Si](C)(C)C", None, "CCCS"),
("CCC(C)=NOC", None, "CCC(C)=O"),
("CC=NOC", None, "CC=O")
])
def derivatization_groups_data(request):
"""Return a tuple of (smiles of a derivatized molecule, smiles of this molecule with different degree of conversion,
smiles of the original non-derivatized molecule)."""
"""Return a tuple of (smiles of a derivatized molecule, smiles of this
molecule with different degree of conversion, smiles of the original
non-derivatized molecule)."""
derivatized, alternative, original = request.param
return derivatized, alternative, original

Expand Down Expand Up @@ -76,7 +79,8 @@ def test_remove_derivatization_groups_from_smiles(derivatization_groups_data):


def test_remove_derivatization_groups_from_mol(derivatization_groups_data):
"""Test if the remove_derivatization_groups function works with RDKit molecules."""
"""Test if the remove_derivatization_groups function works with RDKit
molecules."""
smiles, _, expected = derivatization_groups_data
mol = Chem.MolFromSmiles(smiles)
actual = remove_derivatization_groups(mol=mol)
Expand All @@ -85,21 +89,23 @@ def test_remove_derivatization_groups_from_mol(derivatization_groups_data):
assert actual_smiles == expected


@pytest.mark.flaky(reruns=FLAKY_RERUNS)
def test_add_derivatization_groups_from_smiles(derivatization_groups_data):
"""Test if the add_derivatization_groups function works with SMILES. The test will run FLAKY_RERUNS times or until
success due to non-deterministic nature of add_derivatization_groups."""
"""Test if the add_derivatization_groups function works with SMILES. The
test will run FLAKY_RERUNS times or until success due to
non-deterministic nature of add_derivatization_groups."""
random.seed(3)
expected, alternative, original = derivatization_groups_data
derivatized = add_derivatization_groups(smiles=original)
derivatized_smiles = Chem.MolToSmiles(derivatized, kekuleSmiles=True)

assert derivatized_smiles in [expected, alternative]


@pytest.mark.flaky(reruns=FLAKY_RERUNS)
def test_add_derivatization_groups_from_mol(derivatization_groups_data):
"""Test if the add_derivatization_groups function works with RDKit molecules. The test will run FLAKY_RERUNS times
or until success due to non-deterministic nature of add_derivatization_groups."""
"""Test if the add_derivatization_groups function works with RDKit
molecules. The test will run FLAKY_RERUNS times or until success
due to non-deterministic nature of add_derivatization_groups."""
random.seed(3)
expected, alternative, original = derivatization_groups_data
mol = Chem.MolFromSmiles(original)
derivatized = add_derivatization_groups(mol=mol)
Expand All @@ -109,15 +115,20 @@ def test_add_derivatization_groups_from_mol(derivatization_groups_data):


@pytest.mark.parametrize("smiles, expected", [
("CC(N)=O", {"CC(N)=O", "CC(=O)N([Si](C)(C)C)[Si](C)(C)C", "CC(=O)N[Si](C)(C)C"}),
("C[Si](C)(C)OC1=CC=C(O)C=C1", {"OC1=CC=C(O)C=C1", "C[Si](C)(C)OC1=CC=C(O[Si](C)(C)C)C=C1", "C[Si](C)(C)OC1=CC=C(O)C=C1"}),
("CC(N)=O", {"CC(N)=O",
"CC(=O)N([Si](C)(C)C)[Si](C)(C)C",
"CC(=O)N[Si](C)(C)C"}),
("C[Si](C)(C)OC1=CC=C(O)C=C1", {"OC1=CC=C(O)C=C1",
"C[Si](C)(C)OC1=CC=C(O[Si](C)(C)C)C=C1",
"C[Si](C)(C)OC1=CC=C(O)C=C1"}),
("CCC(C)=O", {"CCC(C)=O", "CCC(C)=NOC"}),
("CC=NOC", {"CC=O", "CC=NOC"})
])
def test_process_one_mol(smiles, expected):
"""Test processing one molecule."""
mol = (smiles, Chem.MolFromSmiles(smiles))
n = 40
random.seed(3)
actual = process_one_mol(mol, n)
actual = {actual[0], actual[1], *actual[2]}

Expand Down
Loading