From c7df17747250a2e50f98b21c32581b34f0721943 Mon Sep 17 00:00:00 2001 From: Fanwang Meng Date: Tue, 9 Nov 2021 12:28:15 -0500 Subject: [PATCH 1/8] Add support of SDF file --- padelpy/__init__.py | 2 +- padelpy/functions.py | 129 +++++++++++++++++++++++++++++++++---------- 2 files changed, 102 insertions(+), 29 deletions(-) diff --git a/padelpy/__init__.py b/padelpy/__init__.py index 79627f6..de5b956 100644 --- a/padelpy/__init__.py +++ b/padelpy/__init__.py @@ -1,3 +1,3 @@ from padelpy.wrapper import padeldescriptor -from padelpy.functions import from_mdl, from_smiles +from padelpy.functions import from_mdl, from_smiles, from_sdf __version__ = '0.1.10' diff --git a/padelpy/functions.py b/padelpy/functions.py index d4ea3ee..8ae1ace 100644 --- a/padelpy/functions.py +++ b/padelpy/functions.py @@ -23,7 +23,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, fingerprints: bool = False, timeout: int = 60) -> OrderedDict: - ''' from_smiles: converts SMILES string to QSPR descriptors/fingerprints + """ from_smiles: converts SMILES string to QSPR descriptors/fingerprints Args: smiles (str, list): SMILES string for a given molecule, or a list of @@ -38,17 +38,17 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, list of OrderedDicts, else single OrderedDict; each OrderedDict contains labels and values for each descriptor generated for each supplied molecule - ''' + """ - timestamp = datetime.now().strftime('%Y%m%d%H%M%S%f')[:-3] + timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3] - with open('{}.smi'.format(timestamp), 'w') as smi_file: + with open("{}.smi".format(timestamp), "w") as smi_file: if type(smiles) == str: smi_file.write(smiles) elif type(smiles) == list: - smi_file.write('\n'.join(smiles)) + smi_file.write("\n".join(smiles)) else: - raise RuntimeError('Unknown input format for `smiles`: {}'.format( + raise RuntimeError("Unknown input format for `smiles`: {}".format( type(smiles) )) smi_file.close() @@ -56,12 +56,12 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, save_csv = True if output_csv is None: save_csv = False - output_csv = '{}.csv'.format(timestamp) + output_csv = "{}.csv".format(timestamp) for attempt in range(3): try: padeldescriptor( - mol_dir='{}.smi'.format(timestamp), + mol_dir="{}.smi".format(timestamp), d_file=output_csv, convert3d=True, retain3d=True, @@ -74,7 +74,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, break except RuntimeError as exception: if attempt == 2: - remove('{}.smi'.format(timestamp)) + remove("{}.smi".format(timestamp)) if not save_csv: sleep(0.5) try: @@ -85,33 +85,33 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, else: continue - with open(output_csv, 'r', encoding='utf-8') as desc_file: + with open(output_csv, "r", encoding="utf-8") as desc_file: reader = DictReader(desc_file) rows = [row for row in reader] desc_file.close() - remove('{}.smi'.format(timestamp)) + remove("{}.smi".format(timestamp)) if not save_csv: remove(output_csv) if type(smiles) == list and len(rows) != len(smiles): - raise RuntimeError('PaDEL-Descriptor failed on one or more mols.' + - ' Ensure the input structures are correct.') + raise RuntimeError("PaDEL-Descriptor failed on one or more mols." + + " Ensure the input structures are correct.") elif type(smiles) == str and len(rows) == 0: raise RuntimeError( - 'PaDEL-Descriptor failed on {}.'.format(smiles) + - ' Ensure input structure is correct.' + "PaDEL-Descriptor failed on {}.".format(smiles) + + " Ensure input structure is correct." ) for idx, r in enumerate(rows): if len(r) == 0: raise RuntimeError( - 'PaDEL-Descriptor failed on {}.'.format(smiles[idx]) + - ' Ensure input structure is correct.' + "PaDEL-Descriptor failed on {}.".format(smiles[idx]) + + " Ensure input structure is correct." ) for idx in range(len(rows)): - del rows[idx]['Name'] + del rows[idx]["Name"] if type(smiles) == str: return rows[0] @@ -120,7 +120,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, fingerprints: bool = False, timeout: int = 60) -> list: - ''' from_mdl: converts MDL file into QSPR descriptors/fingerprints; + """ from_mdl: converts MDL file into QSPR descriptors/fingerprints; multiple molecules may be represented in the MDL file Args: @@ -133,19 +133,19 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, Returns: list: list of dicts, where each dict corresponds sequentially to a compound in the supplied MDL file - ''' + """ - is_mdl = compile(r'.*\.mdl$', IGNORECASE) + is_mdl = compile(r".*\.mdl$", IGNORECASE) if is_mdl.match(mdl_file) is None: - raise ValueError('MDL file must have a `.mdl` extension: {}'.format( + raise ValueError("MDL file must have a `.mdl` extension: {}".format( mdl_file )) save_csv = True if output_csv is None: save_csv = False - output_csv = '{}.csv'.format( - datetime.now().strftime('%Y%m%d%H%M%S%f')[:-3] + output_csv = "{}.csv".format( + datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3] ) for attempt in range(3): @@ -174,15 +174,88 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, else: continue - with open(output_csv, 'r', encoding='utf-8') as desc_file: + with open(output_csv, "r", encoding="utf-8") as desc_file: reader = DictReader(desc_file) rows = [row for row in reader] desc_file.close() if not save_csv: remove(output_csv) if len(rows) == 0: - raise RuntimeError('PaDEL-Descriptor returned no calculated values.' + - ' Ensure the input structure is correct.') + raise RuntimeError("PaDEL-Descriptor returned no calculated values." + + " Ensure the input structure is correct.") for row in rows: - del row['Name'] + del row["Name"] + return rows + + +def from_sdf(sdf_file: str, + output_csv: str = None, + descriptors: bool = True, + fingerprints: bool = False, + timeout: int = 60) -> list: + """ Converts sdf file into QSPR descriptors/fingerprints. + Multiple molecules may be represented in the sdf file + + Args: + sdf_file (str): path to sdf file + output_csv (str): if supplied, saves descriptors/fingerprints here + descriptors (bool): if `True`, calculates descriptors + fingerprints (bool): if `True`, calculates fingerprints + timeout (int): maximum time, in seconds, for conversion + + Returns: + list: list of dicts, where each dict corresponds sequentially to a compound in the + supplied sdf file + """ + + is_sdf = compile(r".*\.sdf$", IGNORECASE) + if is_sdf.match(sdf_file) is None: + raise ValueError("sdf file must have a `.sdf` extension: {}".format( + sdf_file + )) + + save_csv = True + if output_csv is None: + save_csv = False + output_csv = "{}.csv".format( + datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3] + ) + + for attempt in range(3): + try: + padeldescriptor( + mol_dir=sdf_file, + d_file=output_csv, + convert3d=True, + retain3d=True, + retainorder=True, + d_2d=descriptors, + d_3d=descriptors, + fingerprints=fingerprints, + sp_timeout=timeout + ) + break + except RuntimeError as exception: + if attempt == 2: + if not save_csv: + sleep(0.5) + try: + remove(output_csv) + except FileNotFoundError as e: + warnings.warn(e, RuntimeWarning) + raise RuntimeError(exception) + else: + continue + + with open(output_csv, "r", encoding="utf-8") as desc_file: + reader = DictReader(desc_file) + rows = [row for row in reader] + desc_file.close() + if not save_csv: + remove(output_csv) + if len(rows) == 0: + raise RuntimeError("PaDEL-Descriptor returned no calculated values." + + " Ensure the input structure is correct.") + for row in rows: + del row["Name"] return rows From 76e0565c591426444236cf00a45157da4852bdf8 Mon Sep 17 00:00:00 2001 From: Fanwang Meng Date: Sat, 13 Nov 2021 13:51:14 -0500 Subject: [PATCH 2/8] Add .gitignore --- .gitignore | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..83d765a --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Prerequisites +*.d + +# Compiled object files +*.slo +*.lo +*.o +*.obj + +# Precompiled headers +*.gch +*.pch + +# Compiled dynamic libraries +*.so +*.so.[0-9]* +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +.installed.cfg +MANIFEST +*.egg-info/ +*.egg +*.manifest +*.spec +pip-log.txt +pip-delete-this-directory.txt +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.pytest_cache/ + +# Documentation +doc/html/ +doc/latex/ +doc/man/ +doc/xml/ +doc/_build/ +doc/source +doc/modules + +# Environments +.env +.venv +env/ +venv/ +ENV/ + +# Editor junk +tags +[._]*.s[a-v][a-z] +[._]*.sw[a-p] +[._]s[a-v][a-z] +[._]sw[a-p] +*~ +\#*\# +.\#* +.ropeproject +.idea/ +.spyderproject +.spyproject +.vscode/ +# Mac .DS_Store +.DS_Store + +# jupyter notebook checkpoints +.ipynb_checkpoints + +# version file generated by rob +padelpy/_version.py + From 8a621008b6db9bfce6ccf07fafd6835c4805ad6b Mon Sep 17 00:00:00 2001 From: Fanwang Meng Date: Sat, 13 Nov 2021 13:51:44 -0500 Subject: [PATCH 3/8] Update import settings --- padelpy/__init__.py | 4 ++-- padelpy/functions.py | 8 +++++++- padelpy/wrapper.py | 6 +++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/padelpy/__init__.py b/padelpy/__init__.py index de5b956..011cc1a 100644 --- a/padelpy/__init__.py +++ b/padelpy/__init__.py @@ -1,3 +1,3 @@ -from padelpy.wrapper import padeldescriptor -from padelpy.functions import from_mdl, from_smiles, from_sdf +from .wrapper import padeldescriptor +from .functions import from_mdl, from_smiles, from_sdf __version__ = '0.1.10' diff --git a/padelpy/functions.py b/padelpy/functions.py index 8ae1ace..d18b0e2 100644 --- a/padelpy/functions.py +++ b/padelpy/functions.py @@ -8,6 +8,7 @@ # Contains various functions commonly used with PaDEL-Descriptor # +import warnings # stdlib. imports from collections import OrderedDict from csv import DictReader @@ -15,11 +16,16 @@ from os import remove from re import compile, IGNORECASE from time import sleep -import warnings # PaDELPy imports from padelpy import padeldescriptor +__all__ = [ + "from_mdl", + "from_smiles", + "from_sdf", +] + def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, fingerprints: bool = False, timeout: int = 60) -> OrderedDict: diff --git a/padelpy/wrapper.py b/padelpy/wrapper.py index 8dbb263..65e18ba 100644 --- a/padelpy/wrapper.py +++ b/padelpy/wrapper.py @@ -21,6 +21,10 @@ 'PaDEL-Descriptor.jar' ) +__all__ = [ + "padeldescriptor", +] + def _popen_timeout(command: str, timeout: int) -> tuple: ''' Calls PaDEL-Descriptor, with optional subprocess timeout @@ -154,4 +158,4 @@ def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1, err.decode('utf-8') )) return - \ No newline at end of file + From 30e7d2a95639eb0429de2372fbae5fad473e1274 Mon Sep 17 00:00:00 2001 From: Fanwang Meng Date: Wed, 17 Nov 2021 11:42:28 -0500 Subject: [PATCH 4/8] Update version information --- padelpy/version.py | 5 +++++ setup.py | 14 +++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 padelpy/version.py diff --git a/padelpy/version.py b/padelpy/version.py new file mode 100644 index 0000000..f919204 --- /dev/null +++ b/padelpy/version.py @@ -0,0 +1,5 @@ + +VERSION = (0, 0, 10, "") + +__version__ = ".".join(map(str, VERSION[:-1])) +__release__ = ".".join(map(str, VERSION)) diff --git a/setup.py b/setup.py index d906720..d9fbbed 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,21 @@ +import os from setuptools import setup + +def get_readme(): + """Load README.rst for display on PyPI.""" + with open('README.md') as fhandle: + return fhandle.read() + + +VERSION = get_version_info() + setup( name='padelpy', - version='0.1.10', + version=VERSION, description='A Python wrapper for PaDEL-Descriptor', + long_description=get_readme(), + long_description_content_type='text/markdown', url='https://github.com/ecrl/padelpy', author='Travis Kessler', author_email='Travis_Kessler@student.uml.edu', From 7ac0738cb4d3a35e7417168d441d1fcdb37005cc Mon Sep 17 00:00:00 2001 From: Fanwang Meng Date: Wed, 17 Nov 2021 13:29:49 -0500 Subject: [PATCH 5/8] Clean up `from_sdf` function --- padelpy/functions.py | 62 ++++++++++++-------------------------------- setup.py | 10 +++++++ 2 files changed, 27 insertions(+), 45 deletions(-) diff --git a/padelpy/functions.py b/padelpy/functions.py index d18b0e2..f4675c8 100644 --- a/padelpy/functions.py +++ b/padelpy/functions.py @@ -147,50 +147,11 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, mdl_file )) - save_csv = True - if output_csv is None: - save_csv = False - output_csv = "{}.csv".format( - datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3] - ) - - for attempt in range(3): - try: - padeldescriptor( - mol_dir=mdl_file, - d_file=output_csv, - convert3d=True, - retain3d=True, - retainorder=True, - d_2d=descriptors, - d_3d=descriptors, - fingerprints=fingerprints, - sp_timeout=timeout - ) - break - except RuntimeError as exception: - if attempt == 2: - if not save_csv: - sleep(0.5) - try: - remove(output_csv) - except FileNotFoundError as e: - warnings.warn(e, RuntimeWarning) - raise RuntimeError(exception) - else: - continue - - with open(output_csv, "r", encoding="utf-8") as desc_file: - reader = DictReader(desc_file) - rows = [row for row in reader] - desc_file.close() - if not save_csv: - remove(output_csv) - if len(rows) == 0: - raise RuntimeError("PaDEL-Descriptor returned no calculated values." + - " Ensure the input structure is correct.") - for row in rows: - del row["Name"] + rows = _from_mdl_lower(mol_file=mdl_file, + output_csv=output_csv, + descriptors=descriptors, + fingerprints=fingerprints, + timeout=timeout) return rows @@ -220,6 +181,16 @@ def from_sdf(sdf_file: str, sdf_file )) + rows = _from_mdl_lower(mol_file=sdf_file, + output_csv=output_csv, + descriptors=descriptors, + fingerprints=fingerprints, + timeout=timeout) + return rows + + +def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = True, + fingerprints: bool = False, timeout: int = 60) -> list: save_csv = True if output_csv is None: save_csv = False @@ -230,7 +201,7 @@ def from_sdf(sdf_file: str, for attempt in range(3): try: padeldescriptor( - mol_dir=sdf_file, + mol_dir=mol_file, d_file=output_csv, convert3d=True, retain3d=True, @@ -264,4 +235,5 @@ def from_sdf(sdf_file: str, " Ensure the input structure is correct.") for row in rows: del row["Name"] + return rows diff --git a/setup.py b/setup.py index d9fbbed..a9f7cd1 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,16 @@ def get_readme(): return fhandle.read() +def get_version_info(): + """Read __version__ from version.py, using exec, not import.""" + fn_version = os.path.join("padelpy", "version.py") + myglobals = {} + with open(fn_version, "r") as f: + # pylint: disable=exec-used + exec(f.read(), myglobals) + return myglobals["__version__"] + + VERSION = get_version_info() setup( From 4b05c62f1a8308c018f761fb53e5dc3e4c44e17e Mon Sep 17 00:00:00 2001 From: Fanwang Meng Date: Wed, 1 Dec 2021 20:04:29 -0500 Subject: [PATCH 6/8] Update version info: now 0.1.11 --- padelpy/version.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/padelpy/version.py b/padelpy/version.py index f919204..5ed587d 100644 --- a/padelpy/version.py +++ b/padelpy/version.py @@ -1,5 +1,5 @@ - -VERSION = (0, 0, 10, "") - -__version__ = ".".join(map(str, VERSION[:-1])) -__release__ = ".".join(map(str, VERSION)) + +VERSION = (0, 1, 11, "") + +__version__ = ".".join(map(str, VERSION[:-1])) +__release__ = ".".join(map(str, VERSION)) From 4c30bc4af0d3496e6ac9d9901c5b87b70a5a76dc Mon Sep 17 00:00:00 2001 From: Fanwang Meng Date: Wed, 1 Dec 2021 20:09:32 -0500 Subject: [PATCH 7/8] Add usage of SDF as inputs --- README.md | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 42f486d..b2b27d2 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,28 @@ fingerprints = from_mdl('mols.mdl', fingerprints=True, descriptors=False) _ = from_mdl('mols.mdl', output_csv='descriptors.csv') ``` +### SDF to Descriptors/Fingerprints + +The "from_sdf" function accepts a filepath as an argument, and returns a list. +Each list element is a dictionary with descriptors/fingerprints corresponding to each supplied +molecule (indexed as they appear in the SDF file). + +```python +from padelpy import from_sdf + +# calculate molecular descriptors for molecules in `mols.sdf` +descriptors = from_sdf('mols.sdf') + +# in addition to descriptors, calculate PubChem fingerprints +desc_fp = from_sdf('mols.sdf', fingerprints=True) + +# only calculate fingerprints +fingerprints = from_sdf('mols.sdf', fingerprints=True, descriptors=False) + +# save descriptors to a CSV file +_ = from_sdf('mols.sdf', output_csv='descriptors.csv') +``` + ### Command Line Wrapper Alternatively, you can have more control over PaDEL-Descriptor with the command-line wrapper function. Any combination of arguments supported by PaDEL-Descriptor can be accepted by the "padeldescriptor" function. @@ -84,9 +106,12 @@ from padelpy import padeldescriptor # to supply a configuration file padeldescriptor(config='\\path\\to\\config') -# to supply an input and output file +# to supply an input (MDL) and output file padeldescriptor(mol_dir='molecules.mdl', d_file='descriptors.csv') +# to supply an input (SDF) and output file +padeldescriptor(mol_dir='molecules.sdf', d_file='descriptors.csv') + # a SMILES file can be supplied padeldescriptor(mol_dir='molecules.smi', d_file='descriptors.csv') From 723cf5d212115f7c4c0eb90f443d754071d82ded Mon Sep 17 00:00:00 2001 From: Fanwang Meng Date: Wed, 1 Dec 2021 20:33:12 -0500 Subject: [PATCH 8/8] Add tests for `from_sdf` --- tests/aspirin_3d.sdf | 51 ++++++++++++++++++++++++++++++++++++++++++++ tests/test.py | 14 +++++++----- 2 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 tests/aspirin_3d.sdf diff --git a/tests/aspirin_3d.sdf b/tests/aspirin_3d.sdf new file mode 100644 index 0000000..9442fd1 --- /dev/null +++ b/tests/aspirin_3d.sdf @@ -0,0 +1,51 @@ +2244 + -OEChem-12012120113D + + 21 21 0 0 0 0 0 0 0999 V2000 + 1.2333 0.5540 0.7792 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6952 -2.7148 -0.7502 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7958 -2.1843 0.8685 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1.7813 0.8105 -1.4821 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0857 0.6088 0.4403 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7927 -0.5515 0.1244 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7288 1.8464 0.4133 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1426 -0.4741 -0.2184 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0787 1.9238 0.0706 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7855 0.7636 -0.2453 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1409 -1.8536 0.1477 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1094 0.6715 -0.3113 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5305 0.5996 0.1635 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1851 2.7545 0.6593 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7247 -1.3605 -0.4564 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5797 2.8872 0.0506 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.8374 0.8238 -0.5090 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7290 1.4184 0.8593 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2045 0.6969 -0.6924 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7105 -0.3659 0.6426 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.2555 -3.5916 -0.7337 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +$$$$ diff --git a/tests/test.py b/tests/test.py index 357ec65..5dd7a74 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,33 +1,37 @@ import unittest from collections import OrderedDict -from padelpy import from_smiles, from_mdl +from padelpy import from_mdl, from_sdf, from_smiles class TestAll(unittest.TestCase): def test_from_smiles(self): - descriptors = from_smiles('CCC') self.assertEqual(len(descriptors), 1875) self.assertAlmostEqual(float(descriptors['MW']), 44.0626, 4) self.assertEqual(int(descriptors['nC']), 3) def test_multiple_smiles(self): - smiles = ['CCC', 'CCCC'] descriptors = from_smiles(smiles) self.assertEqual(len(descriptors), 2) self.assertEqual(len(descriptors[0]), 1875) def test_errors(self): - bad_smiles = 'SJLDFGSJ' self.assertRaises(RuntimeError, from_smiles, bad_smiles) bad_smiles = ['SJLDFGSJ', 'CCC'] self.assertRaises(RuntimeError, from_smiles, bad_smiles) + def test_from_sdf(self): + """Test SDF file input functionality.""" + descriptors = from_sdf("aspirin_3d.sdf")[0] + self.assertEqual(len(descriptors), 1875) + self.assertAlmostEqual(float(descriptors['MW']), 180.04225, 4) + self.assertAlmostEqual(float(descriptors['SsCH3']), 1.2209, 4) + self.assertEqual(int(descriptors['nC']), 9) -if __name__ == '__main__': +if __name__ == '__main__': unittest.main()