diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..f49cfb3a --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# Autoformat all files using `black` +8f061e7da99eb78353e9392d6929673da5b352a3 diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml new file mode 100644 index 00000000..0a8b8e92 --- /dev/null +++ b/.github/workflows/black.yaml @@ -0,0 +1,10 @@ +name: Lint + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable \ No newline at end of file diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000..876fcd74 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "ms-python.black-formatter", + "ms-python.python" + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..ea9d4d09 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "[python]": { + "editor.formatOnSave": true + } +} \ No newline at end of file diff --git a/AUTHORS b/AUTHORS index 0acf2990..19708124 100644 --- a/AUTHORS +++ b/AUTHORS @@ -39,3 +39,7 @@ their first commit. GitHub handle is optional. - Cade Duckworth (cadeduckworth) +2023 +---- + +- Alexander Moriarty (@a-ws-m) diff --git a/CHANGES b/CHANGES index c89fa9da..d9cc73f0 100644 --- a/CHANGES +++ b/CHANGES @@ -6,7 +6,7 @@ Add summary of changes for each release. Use ISO 8061 dates. Reference GitHub issues numbers and PR numbers. 2023-??-?? 0.9.0 -cadeduckworth, orbeckst, VOD555 +cadeduckworth, orbeckst, VOD555, a-ws-m Changes @@ -24,6 +24,7 @@ Changes * _prepare_universe and _conclude_universe removed from EnsembleAnalysis.run() method, no longer needed (per comments, #199) * internal log_banner() now uses logger as argument (PR #247) +* use `black` formatter for codebase (#271) Enhancements diff --git a/README.rst b/README.rst index de85b25b..7c4e37c1 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ README for MDPOW =================== -|build| |cov| |docs| |zenodo| +|build| |cov| |docs| |black| |zenodo| .. |P_ow| replace:: *P*\ :sub:`OW` .. |P_cw| replace:: *P*\ :sub:`CW` @@ -74,9 +74,14 @@ Source code *MDPOW* is open source and published under the `GNU General Public License v3`_. Source code is available at https://github.com/Becksteinlab/MDPOW . +We use `black`_ for uniform code formatting. + .. _`GNU General Public License v3`: http://www.gnu.org/licenses/gpl-3.0.html +.. _`black`: https://github.com/psf/black + + Footnotes --------- @@ -99,6 +104,8 @@ Footnotes :target: https://zenodo.org/badge/latestdoi/44999898 :alt: Zenodo - +.. |black| image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/psf/black + :alt: black .. _INSTALL: INSTALL.rst diff --git a/doc/examples/benzene/session.py b/doc/examples/benzene/session.py index b0ff5276..da1ac8d9 100644 --- a/doc/examples/benzene/session.py +++ b/doc/examples/benzene/session.py @@ -1,31 +1,39 @@ import mdpow.equil + S = mdpow.equil.WaterSimulation(molecule="BNZ") S.topology("benzene.itp") S.solvate(struct="benzene.pdb") S.energy_minimize() -S.MD_relaxed(runtime=5) # should be at least 1e3 ps for production not just 5 ps +S.MD_relaxed(runtime=5) # should be at least 1e3 ps for production not just 5 ps # run simulation externally or use MDrunner # (see docs for using mpi etc) import gromacs -r = gromacs.run.MDrunner(dirname=S.dirs['MD_relaxed'], deffnm="md", c="md.pdb", cpi=True, append=True, v=True) -r.run() # runs mdrun in the python shell + +r = gromacs.run.MDrunner( + dirname=S.dirs["MD_relaxed"], deffnm="md", c="md.pdb", cpi=True, append=True, v=True +) +r.run() # runs mdrun in the python shell -S.MD(runtime=10, qscript=['local.sh']) # should be at least 10e3 ps for production, not just 10 ps +S.MD( + runtime=10, qscript=["local.sh"] +) # should be at least 10e3 ps for production, not just 10 ps # run simulation -r = gromacs.run.MDrunner(dirname=S.dirs['MD_NPT'], deffnm="md", c="md.pdb", cpi=True, append=True, v=True) -r.run() # runs mdrun in the python shell +r = gromacs.run.MDrunner( + dirname=S.dirs["MD_NPT"], deffnm="md", c="md.pdb", cpi=True, append=True, v=True +) +r.run() # runs mdrun in the python shell import mdpow.fep + gwat = mdpow.fep.Ghyd(simulation=S, runtime=10) gwat.setup() # run multiple simulations on cluster - O = mdpow.equil.OctanolSimulation(molecule="BNZ") O.topology("benzene.itp") O.solvate(struct="benzene.pdb") diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index d32857d0..b55c6df5 100644 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -14,6 +14,7 @@ import sys import os import datetime + # https://sphinx-rtd-theme.readthedocs.io/en/stable/ import sphinx_rtd_theme @@ -22,45 +23,51 @@ # is relative to the documentation root, use os.path.abspath to make it # absolute, like shown here. # make sure sphinx always uses the current branch -sys.path.insert(0, os.path.abspath('../../..')) +sys.path.insert(0, os.path.abspath("../../..")) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. # 'sphinx.ext.pngmath', 'sphinx.ext.jsmath' -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', - 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', - 'sphinx_rtd_theme'] +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx_rtd_theme", +] -mathjax_path = 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML' +mathjax_path = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML" # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.txt' +source_suffix = ".txt" # The encoding of source files. -#source_encoding = 'utf-8' +# source_encoding = 'utf-8' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'MDPOW' +project = "MDPOW" now = datetime.datetime.now() -copyright = u'2010–{}, Shujie Fan, Ian Kenney, Alia Lescoulie, Bogdan Iorga, and Oliver Beckstein'.format(now.year) +copyright = "2010–{}, Shujie Fan, Ian Kenney, Alia Lescoulie, Bogdan Iorga, and Oliver Beckstein".format( + now.year +) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # Dynamically calculate the version -packageversion = __import__('mdpow').__version__ +packageversion = __import__("mdpow").__version__ # The short X.Y version. -version = '.'.join(packageversion.split('.')[:2]) +version = ".".join(packageversion.split(".")[:2]) # The full version, including alpha/beta/rc tags. ##release = packageversion @@ -73,7 +80,7 @@ except ValueError: ver, rc = packageversion, None -if not rc or rc.startswith('0'): +if not rc or rc.startswith("0"): release = ver else: release = ver + "+" + rc.replace(".dirty", "") @@ -81,73 +88,71 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. -#unused_docs = [] +# unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" html_theme_options = { - 'logo_only': False, - 'display_version': True, - 'prev_next_buttons_location': 'bottom', - 'style_external_links': False, - 'style_nav_header_background': 'white', + "logo_only": False, + "display_version": True, + "prev_next_buttons_location": "bottom", + "style_external_links": False, + "style_nav_header_background": "white", # Toc options - 'collapse_navigation': True, - 'sticky_navigation': True, - 'navigation_depth': 4, - 'includehidden': True, - 'titles_only': False, + "collapse_navigation": True, + "sticky_navigation": True, + "navigation_depth": 4, + "includehidden": True, + "titles_only": False, } # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = [ - sphinx_rtd_theme.get_html_theme_path() -] +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. @@ -161,98 +166,102 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -html_last_updated_fmt = '%b %d, %Y' +html_last_updated_fmt = "%b %d, %Y" # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_use_modindex = True +# html_use_modindex = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = '' +# html_file_suffix = '' # Output file base name for HTML help builder. -htmlhelp_basename = 'MDPOWdoc' +htmlhelp_basename = "MDPOWdoc" # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' +# latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' +# latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'MDpow.tex', u'MDpow Documentation', - u'Ian Kenney, Bogdan Iorga, and Oliver Beckstein', 'manual'), + ( + "index", + "MDpow.tex", + "MDpow Documentation", + "Ian Kenney, Bogdan Iorga, and Oliver Beckstein", + "manual", + ), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # Additional stuff for the LaTeX preamble. -#latex_preamble = '' +# latex_preamble = '' # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_use_modindex = True - +# latex_use_modindex = True # Options for ext.intersphinx # --------------------------- # intersphinx: reference standard lib and RecSQL # http://sphinx.pocoo.org/latest/ext/intersphinx.html -intersphinx_mapping = {'https://docs.python.org/': None, - 'https://numpy.org/doc/stable/': None, - 'https://docs.scipy.org/doc/scipy/reference/': None, - 'https://gromacswrapper.readthedocs.io/en/latest': None, - 'https://docs.mdanalysis.org/stable/': None, - 'https://www.rdkit.org/docs/': None, - 'https://pandas.pydata.org/docs/': None, - 'https://seaborn.pydata.org': None, - 'https://cairosvg.org/documentation/': None, - 'https://svgutils.readthedocs.io/en/latest/': None, - 'https://pypdf.readthedocs.io/en/stable/': None, - } - +intersphinx_mapping = { + "https://docs.python.org/": None, + "https://numpy.org/doc/stable/": None, + "https://docs.scipy.org/doc/scipy/reference/": None, + "https://gromacswrapper.readthedocs.io/en/latest": None, + "https://docs.mdanalysis.org/stable/": None, + "https://www.rdkit.org/docs/": None, + "https://pandas.pydata.org/docs/": None, + "https://seaborn.pydata.org": None, + "https://cairosvg.org/documentation/": None, + "https://svgutils.readthedocs.io/en/latest/": None, + "https://pypdf.readthedocs.io/en/stable/": None, +} # Options for ext.autodoc @@ -262,4 +271,4 @@ # This value selects what content will be inserted into the main body of an autoclass directive. # "class", "init", "both" autoclass_content = "both" -automodule_content = "both" \ No newline at end of file +automodule_content = "both" diff --git a/mdpow/__init__.py b/mdpow/__init__.py index 63e1a96f..371ec35f 100644 --- a/mdpow/__init__.py +++ b/mdpow/__init__.py @@ -5,10 +5,12 @@ from . import log from ._version import get_versions -__version__ = get_versions()['version'] + +__version__ = get_versions()["version"] del get_versions -__all__ = ['fep', 'equil'] +__all__ = ["fep", "equil"] + def create_logger(logfile="mdpow.log"): """Create the default logger. @@ -16,19 +18,25 @@ def create_logger(logfile="mdpow.log"): Channels the output from :mod:`mdpow`, :mod:`gromacs`, and :mod:`numkit` into the file *logfile*. """ - logger = log.create('mdpow', logfile) - log.create('numkit', logfile) # capture numkit messages to same file - log.create('gromacs', logfile) # and the GromacsWrapper messages + logger = log.create("mdpow", logfile) + log.create("numkit", logfile) # capture numkit messages to same file + log.create("gromacs", logfile) # and the GromacsWrapper messages return logger + def log_banner(logger): """Log program name and licence at INFO level.""" logger.info("MDPOW %s starting.", __version__) - logger.info("Copyright (c) 2010-2023 Shujie Fan, Ian Kenney, " - "Alia Lescoulie, Cade Duckworth, Bogdan Iorga, and " - "Oliver Beckstein") + logger.info( + "Copyright (c) 2010-2023 Shujie Fan, Ian Kenney, " + "Alia Lescoulie, Cade Duckworth, Bogdan Iorga, and " + "Oliver Beckstein" + ) logger.info("Released under the GNU Public Licence, version 3.") - logger.info("For bug reports and help: https://github.com/Becksteinlab/MDPOW/issues") + logger.info( + "For bug reports and help: https://github.com/Becksteinlab/MDPOW/issues" + ) + logger = create_logger() log_banner(logger) @@ -43,5 +51,4 @@ def log_banner(logger): #: Avogadro's constant |NA| in mol^-1 (`NA NIST value`_). N_AVOGADRO = 6.02214179e23 #: Boltzmann's constant |kB| in kJ mol^-1 (`kB NIST value`_). -kBOLTZ = 1.3806504e-23 *1e-3 * N_AVOGADRO - +kBOLTZ = 1.3806504e-23 * 1e-3 * N_AVOGADRO diff --git a/mdpow/_version.py b/mdpow/_version.py index 2484e7bd..e5328388 100644 --- a/mdpow/_version.py +++ b/mdpow/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -57,17 +56,18 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None @@ -75,10 +75,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -115,16 +118,22 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -180,7 +189,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -189,7 +198,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -197,19 +206,26 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -224,8 +240,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -233,10 +248,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -259,17 +283,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -278,10 +301,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -292,13 +317,13 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces @@ -329,8 +354,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -444,11 +468,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -468,9 +494,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } def get_versions(): @@ -484,8 +514,7 @@ def get_versions(): verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass @@ -494,13 +523,16 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for i in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -514,6 +546,10 @@ def get_versions(): except NotThisMethod: pass - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/mdpow/analysis/dihedral.py b/mdpow/analysis/dihedral.py index 41996f22..05ca6c53 100644 --- a/mdpow/analysis/dihedral.py +++ b/mdpow/analysis/dihedral.py @@ -14,7 +14,7 @@ import logging -logger = logging.getLogger('mdpow.analysis.dihedral') +logger = logging.getLogger("mdpow.analysis.dihedral") class DihedralAnalysis(EnsembleAnalysis): @@ -48,7 +48,9 @@ def __init__(self, dihedral_groups: List[EnsembleAtomGroup]): self.check_groups_from_common_ensemble(dihedral_groups) self.check_dihedral_inputs(dihedral_groups) super(DihedralAnalysis, self).__init__(dihedral_groups[0].ensemble) - self.g1, self.g2, self.g3, self.g4, self.names = self._reorg_groups(dihedral_groups) + self.g1, self.g2, self.g3, self.g4, self.names = self._reorg_groups( + dihedral_groups + ) @staticmethod def _reorg_groups(groups: List[EnsembleAtomGroup]): @@ -63,14 +65,30 @@ def _reorg_groups(groups: List[EnsembleAtomGroup]): ag2 += [mda.AtomGroup([ag[1]]) for ag in [group[k] for k in group.keys()]] ag3 += [mda.AtomGroup([ag[2]]) for ag in [group[k] for k in group.keys()]] ag4 += [mda.AtomGroup([ag[3]]) for ag in [group[k] for k in group.keys()]] - names.append('-'.join([ag1[-1].atoms[0].name, ag2[-1].atoms[0].name, - ag3[-1].atoms[0].name, ag4[-1].atoms[0].name])) + names.append( + "-".join( + [ + ag1[-1].atoms[0].name, + ag2[-1].atoms[0].name, + ag3[-1].atoms[0].name, + ag4[-1].atoms[0].name, + ] + ) + ) for k in group.keys(): ag_keys.append((names[-1], k[0], k[1], k[2])) - eag1 = EnsembleAtomGroup({ag_keys[i]: ag1[i] for i in range(len(ag_keys))}, groups[0].ensemble) - eag2 = EnsembleAtomGroup({ag_keys[i]: ag2[i] for i in range(len(ag_keys))}, groups[0].ensemble) - eag3 = EnsembleAtomGroup({ag_keys[i]: ag3[i] for i in range(len(ag_keys))}, groups[0].ensemble) - eag4 = EnsembleAtomGroup({ag_keys[i]: ag4[i] for i in range(len(ag_keys))}, groups[0].ensemble) + eag1 = EnsembleAtomGroup( + {ag_keys[i]: ag1[i] for i in range(len(ag_keys))}, groups[0].ensemble + ) + eag2 = EnsembleAtomGroup( + {ag_keys[i]: ag2[i] for i in range(len(ag_keys))}, groups[0].ensemble + ) + eag3 = EnsembleAtomGroup( + {ag_keys[i]: ag3[i] for i in range(len(ag_keys))}, groups[0].ensemble + ) + eag4 = EnsembleAtomGroup( + {ag_keys[i]: ag4[i] for i in range(len(ag_keys))}, groups[0].ensemble + ) return eag1, eag2, eag3, eag4, names @staticmethod @@ -78,14 +96,22 @@ def check_dihedral_inputs(selections): for group in selections: for k in group.keys(): if len(group[k]) != 4: - msg = ("Dihedral calculations require AtomGroups with " - f"only 4 atoms, {len(group)} selected") + msg = ( + "Dihedral calculations require AtomGroups with " + f"only 4 atoms, {len(group)} selected" + ) logger.error(msg) raise SelectionError(msg) def _prepare_ensemble(self): - self._col = ['selection', 'solvent', 'interaction', - 'lambda', 'time', 'dihedral'] + self._col = [ + "selection", + "solvent", + "interaction", + "lambda", + "time", + "dihedral", + ] self.results = pd.DataFrame(columns=self._col) self._res_dict = {key: [] for key in self._col} @@ -99,8 +125,9 @@ def _single_frame(self): cord2 = np.concatenate(tuple([cord_dict2[k] for k in key_list])) cord3 = np.concatenate(tuple([cord_dict3[k] for k in key_list])) cord4 = np.concatenate(tuple([cord_dict4[k] for k in key_list])) - angle = calc_dihedrals(cord1, cord2, cord3, cord4, - box=self.g1[key_list[0]].dimensions) + angle = calc_dihedrals( + cord1, cord2, cord3, cord4, box=self.g1[key_list[0]].dimensions + ) angle = np.rad2deg(angle) for i in range(len(self.names)): result = list(key_list[i]) + [self._ts.time, angle[i]] diff --git a/mdpow/analysis/ensemble.py b/mdpow/analysis/ensemble.py index d4ba40a6..308f5115 100644 --- a/mdpow/analysis/ensemble.py +++ b/mdpow/analysis/ensemble.py @@ -9,17 +9,22 @@ import MDAnalysis as mda from MDAnalysis.lib.log import ProgressBar -from MDAnalysis.exceptions import FileFormatWarning, NoDataError, MissingDataWarning, SelectionError +from MDAnalysis.exceptions import ( + FileFormatWarning, + NoDataError, + MissingDataWarning, + SelectionError, +) from gromacs.utilities import in_dir import logging -logger = logging.getLogger('mdpow._ensemble') +logger = logging.getLogger("mdpow._ensemble") class Ensemble(object): - """ Collection of related :class:`MDAnalysis.Universe ` + """Collection of related :class:`MDAnalysis.Universe ` objects. Stores systems produced by running mdpow-fep organized @@ -86,9 +91,14 @@ class Ensemble(object): .. versionadded:: 0.8.0 """ - def __init__(self, dirname=None, solvents=('octanol', 'water'), - topology_paths=None, interactions=('Coulomb', 'VDW'), - **universe_kwargs): + def __init__( + self, + dirname=None, + solvents=("octanol", "water"), + topology_paths=None, + interactions=("Coulomb", "VDW"), + **universe_kwargs, + ): self.top_dict = topology_paths self._num_systems = 0 self._ensemble = {} @@ -102,8 +112,7 @@ def __init__(self, dirname=None, solvents=('octanol', 'water'), if not os.path.exists(dirname): logger.error(f"Directory {dirname} does not exist") - raise FileNotFoundError(errno.ENOENT, 'Directory does not' - 'exist', dirname) + raise FileNotFoundError(errno.ENOENT, "Directory does not" "exist", dirname) self._ensemble_dir = dirname self._build_ensemble() @@ -119,7 +128,9 @@ def __getitem__(self, index): return self._ensemble[index] @staticmethod - def _load_universe_from_dir(solv_dir=None, **universe_kwargs) -> Optional[mda.Universe]: + def _load_universe_from_dir( + solv_dir=None, **universe_kwargs + ) -> Optional[mda.Universe]: """Loads system simulation files in directory into an :class:`MDAnalysis.Universe ` @@ -141,10 +152,12 @@ def _sort_trajectories(trajectories: list) -> list: def _sort_topologies(topologies: list) -> list: """sorts list of trajectory files with .tpr first""" tops = [] - logger.info('If more than one topology is present the tpr will be the one used') + logger.info( + "If more than one topology is present the tpr will be the one used" + ) for i in range(len(topologies)): f = topologies[i] - if f.endswith('.tpr'): + if f.endswith(".tpr"): topologies.pop(i) tops = [f] + topologies break @@ -159,16 +172,20 @@ def _sort_topologies(topologies: list) -> list: top = [solv_dir] for file in cur_dir: - if file.endswith('.xtc'): + if file.endswith(".xtc"): # Saving trajectory directories trj.append(file) - elif (file.endswith('gro') or file.endswith('.tpr') or file.endswith('gro.bz2') - or file.endswith('gro.gz')) and solv_dir is None: + elif ( + file.endswith("gro") + or file.endswith(".tpr") + or file.endswith("gro.bz2") + or file.endswith("gro.gz") + ) and solv_dir is None: # Saving topology directories top.append(file) if len(top) == 0 or len(trj) == 0: - logger.warning('No MD files detected in %s', os.curdir) + logger.warning("No MD files detected in %s", os.curdir) return trj = _sort_trajectories(trj) @@ -177,8 +194,14 @@ def _sort_topologies(topologies: list) -> list: try: return mda.Universe(os.path.abspath(top[0]), trj, **universe_kwargs) - except (ValueError, FileFormatWarning, NoDataError, MissingDataWarning, OSError) as err: - logger.error(f'{err} raised while loading {top[0]} {trj} in dir {cur_dir}') + except ( + ValueError, + FileFormatWarning, + NoDataError, + MissingDataWarning, + OSError, + ) as err: + logger.error(f"{err} raised while loading {top[0]} {trj} in dir {cur_dir}") raise NoDataError def keys(self): @@ -193,9 +216,11 @@ def _build_ensemble(self): Run if :code:`dirname` argument is given when initializing the class. First enters FEP directory, then traverses solvent and interaction directories to search lambda directories for system files.""" - fep_dir = os.path.join(self._ensemble_dir, 'FEP') + fep_dir = os.path.join(self._ensemble_dir, "FEP") solv_top_path = None - for solvent in self._solvents: # Ugly set of loops, may have to find way to clean up + for ( + solvent + ) in self._solvents: # Ugly set of loops, may have to find way to clean up if self.top_dict is not None: solv_top_path = self.top_dict[solvent] for dirs in self._interactions: # Attribute folder names @@ -206,10 +231,11 @@ def _build_ensemble(self): for file in sorted(files): # Traversing lambda directories if os.path.isdir(file): with in_dir(file, create=False): - u = self._load_universe_from_dir(solv_dir=solv_top_path, - **self.unv_kwargs) + u = self._load_universe_from_dir( + solv_dir=solv_top_path, **self.unv_kwargs + ) if u is None: - logger.warning(f'No system loaded in {file}') + logger.warning(f"No system loaded in {file}") else: self.add_system((solvent, dirs, file), u) @@ -236,20 +262,33 @@ def select_atoms(self, *args, **kwargs): Uses the same `selection commands `_ - as MDAnalysis, and has the same keys as the :class:`~mdpow.analysis.ensemble.Ensemble`""" + as MDAnalysis, and has the same keys as the :class:`~mdpow.analysis.ensemble.Ensemble` + """ selections = {} for key in self.keys(): try: ag = self[key].select_atoms(*args, **kwargs) except SelectionError as err: - logger.error("%r on system %r with selection settings %r %r", err, key, args, kwargs) + logger.error( + "%r on system %r with selection settings %r %r", + err, + key, + args, + kwargs, + ) raise else: selections[key] = ag return EnsembleAtomGroup(selections, ensemble=self) - def select_systems(self, keys=None, solvents=None, interactions=None, - lambdas=None, lambda_range=None): + def select_systems( + self, + keys=None, + solvents=None, + interactions=None, + lambdas=None, + lambda_range=None, + ): """ Select specific subset of systems and returns them in an Ensemble. @@ -318,10 +357,14 @@ def select_systems(self, keys=None, solvents=None, interactions=None, elif lambda_range is not None: # Selecting range of lambdas for k in self.keys(): - if lambda_range[0] <= int(k[2]) / 1000 <= lambda_range[1]: + if ( + lambda_range[0] + <= int(k[2]) / 1000 + <= lambda_range[1] + ): new_key.append((s, i, k[2])) for k in new_key: - logger.info('adding system %r to ensemble', k) + logger.info("adding system %r to ensemble", k) new_ens.add_system(k, universe=self[k]) new_ens._ensemble_dir = self._ensemble_dir return new_ens @@ -374,13 +417,20 @@ def select_atoms(self, *args, **kwargs): Uses the same `selection commands `_ - as MDAnalysis, and has the same keys as :class:`~mdpow.analysis.ensemble.EnsembleAtomGroup`""" + as MDAnalysis, and has the same keys as :class:`~mdpow.analysis.ensemble.EnsembleAtomGroup` + """ selections = {} for key in self.keys(): try: ag = self[key].select_atoms(*args, **kwargs) except SelectionError as err: - logger.error("%r on system %r with selection settings %r %r", err, key, args, kwargs) + logger.error( + "%r on system %r with selection settings %r %r", + err, + key, + args, + kwargs, + ) raise else: selections[key] = ag @@ -458,39 +508,41 @@ def _setup_system(self, key, start=None, stop=None, step=None): def _setup_frames(self, trajectory): self._trajectory = trajectory - start, stop, step = trajectory.check_slice_indices(self.start, self.stop, self.step) + start, stop, step = trajectory.check_slice_indices( + self.start, self.stop, self.step + ) self.n_frames = len(range(start, stop, step)) self.frames = np.zeros(self.n_frames, dtype=int) self.times = np.zeros(self.n_frames) def _single_universe(self): - """Calculations on a single :class:`MDAnalysis.Universe - ` object. - - Run on each :class:`MDAnalysis.Universe - ` - in the :class:`~mdpow.analysis.ensemble.Ensemble` - during when :meth:`run` in called. - :exc:`NotImplementedError` will detect whether - :meth:`~EnsembleAnalysis._single_universe` - or :meth:`~EnsembleAnalysis._single_frame` - should be implemented, based on which is defined - in the :class:`~mdpow.analysis.ensemble.EnsembleAnalysis`. + """Calculations on a single :class:`MDAnalysis.Universe + ` object. + + Run on each :class:`MDAnalysis.Universe + ` + in the :class:`~mdpow.analysis.ensemble.Ensemble` + during when :meth:`run` in called. + :exc:`NotImplementedError` will detect whether + :meth:`~EnsembleAnalysis._single_universe` + or :meth:`~EnsembleAnalysis._single_frame` + should be implemented, based on which is defined + in the :class:`~mdpow.analysis.ensemble.EnsembleAnalysis`. """ raise NotImplementedError def _single_frame(self): """Calculate data from a single frame of trajectory. - Called on each frame for each - :class:`MDAnalysis.Universe ` - in the :class:`~mdpow.analysis.ensemble.Ensemble`. - - :exc:`NotImplementedError` will detect whether - :meth:`~EnsembleAnalysis._single_universe` - or :meth:`~EnsembleAnalysis._single_frame` - should be implemented, based on which is defined - in the :class:`~mdpow.analysis.ensemble.EnsembleAnalysis`. + Called on each frame for each + :class:`MDAnalysis.Universe ` + in the :class:`~mdpow.analysis.ensemble.Ensemble`. + + :exc:`NotImplementedError` will detect whether + :meth:`~EnsembleAnalysis._single_universe` + or :meth:`~EnsembleAnalysis._single_frame` + should be implemented, based on which is defined + in the :class:`~mdpow.analysis.ensemble.EnsembleAnalysis`. """ raise NotImplementedError @@ -521,15 +573,15 @@ def _conclude_ensemble(self): pass # pragma: no cover def run(self, start=None, stop=None, step=None): - """Runs :meth:`~EnsembleAnalysis._single_universe` + """Runs :meth:`~EnsembleAnalysis._single_universe` on each system or :meth:`~EnsembleAnalysis._single_frame` on each frame in the system. - First iterates through keys of ensemble, then runs - :meth:`~EnsembleAnalysis._setup_system`which defines + First iterates through keys of ensemble, then runs + :meth:`~EnsembleAnalysis._setup_system`which defines the system and trajectory. Then iterates over each - system universe or trajectory frames of each universe - as defined by :meth:`~EnsembleAnalysis._single_universe` + system universe or trajectory frames of each universe + as defined by :meth:`~EnsembleAnalysis._single_universe` or :meth:`~EnsembleAnalysis._single_frame`. """ logger.info("Setting up systems") @@ -539,8 +591,13 @@ def run(self, start=None, stop=None, step=None): try: self._single_universe() except NotImplementedError: - for i, ts in enumerate(ProgressBar(self._trajectory[self.start:self.stop:self.step], verbose=True, - postfix=f'running system {self._key}')): + for i, ts in enumerate( + ProgressBar( + self._trajectory[self.start : self.stop : self.step], + verbose=True, + postfix=f"running system {self._key}", + ) + ): self._frame_index = i self._ts = ts self.frames[i] = ts.frame @@ -566,9 +623,11 @@ def check_groups_from_common_ensemble(groups: List[EnsembleAtomGroup]): for j in range(i + 1, len(groups)): # Checking if EnsembleAtomGroup.ensemble references same object in memory if groups[i].ensemble is not groups[j].ensemble: - msg = ('Dihedral selections from different Ensembles, ' - 'ensure that all EnsembleAtomGroups are created ' - 'from the same Ensemble. ' - f'mismatch: group[{i}].ensemble != group[{j}].ensemble') + msg = ( + "Dihedral selections from different Ensembles, " + "ensure that all EnsembleAtomGroups are created " + "from the same Ensemble. " + f"mismatch: group[{i}].ensemble != group[{j}].ensemble" + ) logger.error(msg) raise ValueError(msg) diff --git a/mdpow/analysis/solvation.py b/mdpow/analysis/solvation.py index 6ed6137a..47be995d 100644 --- a/mdpow/analysis/solvation.py +++ b/mdpow/analysis/solvation.py @@ -13,7 +13,7 @@ import logging -logger = logging.getLogger('mdpow.dihedral') +logger = logging.getLogger("mdpow.dihedral") class SolvationAnalysis(EnsembleAnalysis): @@ -48,7 +48,13 @@ class SolvationAnalysis(EnsembleAnalysis): solv_dist = SolvationAnalysis(solute, solvent, [1.2, 2.4]).run(stop=10) """ - def __init__(self, solute: EnsembleAtomGroup, solvent: EnsembleAtomGroup, distances: List[float]): + + def __init__( + self, + solute: EnsembleAtomGroup, + solvent: EnsembleAtomGroup, + distances: List[float], + ): self.check_groups_from_common_ensemble([solute, solvent]) super(SolvationAnalysis, self).__init__(solute.ensemble) self._solute = solute @@ -56,21 +62,37 @@ def __init__(self, solute: EnsembleAtomGroup, solvent: EnsembleAtomGroup, distan self._dists = distances def _prepare_ensemble(self): - self._col = ['distance', 'solvent', 'interaction', - 'lambda', 'time', 'N_solvent'] + self._col = [ + "distance", + "solvent", + "interaction", + "lambda", + "time", + "N_solvent", + ] self.results = pd.DataFrame(columns=self._col) self._res_dict = {key: [] for key in self._col} def _single_frame(self): solute = self._solute[self._key] solvent = self._solvent[self._key] - pairs, distances = capped_distance(solute.positions, solvent.positions, - max(self._dists), box=self._ts.dimensions) + pairs, distances = capped_distance( + solute.positions, + solvent.positions, + max(self._dists), + box=self._ts.dimensions, + ) solute_i, solvent_j = np.transpose(pairs) for d in self._dists: close_solv_atoms = solvent[solvent_j[distances < d]] - result = [d, self._key[0], self._key[1],self._key[2], - self._ts.time, close_solv_atoms.n_atoms] + result = [ + d, + self._key[0], + self._key[1], + self._key[2], + self._ts.time, + close_solv_atoms.n_atoms, + ] for i in range(len(self._col)): self._res_dict[self._col[i]].append(result[i]) diff --git a/mdpow/config.py b/mdpow/config.py index 3d2e2878..18f2890d 100644 --- a/mdpow/config.py +++ b/mdpow/config.py @@ -104,6 +104,7 @@ import warnings import logging + logger = logging.getLogger("mdpow.config") # Reading of configuration files @@ -112,7 +113,8 @@ #: Locations of default run input files and configurations. defaults = { "runinput": resource_filename(__name__, "templates/runinput.yml"), - } +} + class NoSectionError(ValueError): """Section entry is missing. @@ -120,13 +122,16 @@ class NoSectionError(ValueError): .. versionadded:: 0.8.0 """ + # not used at the moment # class NoOptionError(ValueError): # """Option entry is missing from section""" + class NoOptionWarning(UserWarning): """Warning that an option is missing.""" + def merge_dicts(user, default): """Merge two dictionaries recursively. @@ -186,7 +191,7 @@ def merge(self, fn): return self.conf def write(self, filename): - with open(filename, 'w') as f: + with open(filename, "w") as f: f.write(yaml.dump(self.conf)) def get(self, section, option): @@ -213,10 +218,12 @@ def get(self, section, option): value = value if value != "None" else None # still needed?? except KeyError: # Returning None has been standard behavior. - #raise NoOptionError(f"Config file section {section} contains " + # raise NoOptionError(f"Config file section {section} contains " # f"no option {option}.") - msg = (f"Config file section {section} contains " - f"no option {option}. Using 'None'.") + msg = ( + f"Config file section {section} contains " + f"no option {option}. Using 'None'." + ) warnings.warn(msg, category=NoOptionWarning) logger.warning(msg) value = None @@ -240,8 +247,9 @@ def getpath(self, section, option): instead of raising a :exc:`TypeError`. """ item = self.get(section, option) - return os.path.expanduser( - os.path.expandvars(item)) if item is not None else None + return ( + os.path.expanduser(os.path.expandvars(item)) if item is not None else None + ) def findfile(self, section, option): """Return location of a file ``option`` or ``None``. @@ -263,8 +271,7 @@ def getlist(self, section, option): instead of raising a :exc:`TypeError`. """ item = self.get(section, option) - return [x.strip() - for x in str(item).split(",")] if item is not None else [] + return [x.strip() for x in str(item).split(",")] if item is not None else [] def getarray(self, section, option): """Return option as a numpy array of floats or ``np.array([])``. @@ -277,6 +284,7 @@ def getarray(self, section, option): """ return np.asarray(self.getlist(section, option), dtype=float) + # def getintarray(self, section, option): # """Return option as a numpy array of integers. # @@ -285,6 +293,7 @@ def getarray(self, section, option): # """ # return np.asarray(self.getlist(section, option), dtype=int) + def get_configuration(filename=None): """Reads and parses a run input config file. @@ -294,47 +303,61 @@ def get_configuration(filename=None): cfg.readfp(open(defaults["runinput"])) logger.debug("Loaded runinput defaults from %r", defaults["runinput"]) if filename is not None: - cfg.merge(open(filename)) # override package defaults + cfg.merge(open(filename)) # override package defaults logger.debug("Loaded user runinput from %r (replacing defaults)", filename) else: - logger.warning("Running with package defaults for the run; you should supply a runinput file!") + logger.warning( + "Running with package defaults for the run; you should supply a runinput file!" + ) return cfg + def modify_gromacs_environment(name, value): from gromacs.environment import flags + if flags[name] != value: - logger.warning("Changing GromacsWrapper environment: flags[%(name)r] = %(value)r", vars()) + logger.warning( + "Changing GromacsWrapper environment: flags[%(name)r] = %(value)r", vars() + ) flags[name] = value + def set_gromacsoutput(cfg): # maybe allow setting this on a per protocol basis? - modify_gromacs_environment('capture_output', not cfg.getboolean('setup', 'gromacsoutput')) + modify_gromacs_environment( + "capture_output", not cfg.getboolean("setup", "gromacsoutput") + ) # Functions to locate template files # ---------------------------------- + def _generate_template_dict(dirname): """Generate a list of included top-level files *and* extract them to a temp space. Only lists files and directories at the *top level* of the *dirname*; however, all directories are extracted recursively and will be available. """ - return dict((resource_basename(fn), resource_filename(__name__, dirname+'/'+fn)) - for fn in resource_listdir(__name__, dirname) - if not fn.endswith('~')) + return dict( + (resource_basename(fn), resource_filename(__name__, dirname + "/" + fn)) + for fn in resource_listdir(__name__, dirname) + if not fn.endswith("~") + ) + def resource_basename(resource): - """Last component of a resource (which always uses '/' as sep).""" - if resource.endswith('/'): - resource = resource[:-1] - parts = resource.split('/') - return parts[-1] + """Last component of a resource (which always uses '/' as sep).""" + if resource.endswith("/"): + resource = resource[:-1] + parts = resource.split("/") + return parts[-1] # Functions to access configuration data # -------------------------------------- + def get_template(t): """Find template file *t* and return its real path. @@ -358,9 +381,10 @@ def get_template(t): # Not sure if this is the best way to get asiterables templates = [_get_template(s) for s in gromacs.utilities.asiterable(t)] if len(templates) == 1: - return templates[0] + return templates[0] return templates + def get_templates(t): """Find template file(s) *t* and return their real paths. @@ -381,26 +405,27 @@ def get_templates(t): """ return [_get_template(s) for s in gromacs.utilities.asiterable(t)] + def _get_template(t): """Return a single template *t*.""" - if os.path.exists(t): # 1) Is it an accessible file? + if os.path.exists(t): # 1) Is it an accessible file? pass - else: # 2) check the packaged template files + else: # 2) check the packaged template files _t = os.path.basename(t) _t_found = False for p in templates.values(): if _t == os.path.basename(p): t = p - _t_found = True # NOTE: in principle this could match multiple - break # times if more than one template dir existed. - if not _t_found: # 3) try it as a key into templates + _t_found = True # NOTE: in principle this could match multiple + break # times if more than one template dir existed. + if not _t_found: # 3) try it as a key into templates try: t = templates[t] except KeyError: pass else: _t_found = True - if not _t_found: # 4) nothing else to try... or introduce a PATH? + if not _t_found: # 4) nothing else to try... or introduce a PATH? raise ValueError("Failed to locate the template file %(t)r." % vars()) return os.path.realpath(t) @@ -408,19 +433,21 @@ def _get_template(t): # utility functions (from gromacs.utilities) # Copied so that config does not have a dependency on gromacs.utilities + def iterable(obj): """Returns ``True`` if *obj* can be iterated over and is *not* a string.""" if isinstance(obj, str): return False # avoid iterating over characters of a string - if hasattr(obj, 'next'): - return True # any iterator will do + if hasattr(obj, "next"): + return True # any iterator will do try: - len(obj) # anything else that might work + len(obj) # anything else that might work except TypeError: return False return True + def asiterable(obj): """Returns obj so that it can be iterated over; a string is *not* treated as iterable""" if not iterable(obj): @@ -429,9 +456,9 @@ def asiterable(obj): # Setting up configuration variables and paths -#--------------------------------------------- +# --------------------------------------------- -templates = _generate_template_dict('templates') +templates = _generate_template_dict("templates") """*POW* comes with a number of templates for run input files and queuing system scripts. They are provided as a convenience and examples but **WITHOUT ANY GUARANTEE FOR CORRECTNESS OR SUITABILITY FOR @@ -459,8 +486,8 @@ def asiterable(obj): #: List of all topology files that are included in the package. #: (includes force field files under ``top/oplsaa.ff``) -topfiles = _generate_template_dict('top') -topfiles.update(_generate_template_dict('top/oplsaa.ff')) # added manually! +topfiles = _generate_template_dict("top") +topfiles.update(_generate_template_dict("top/oplsaa.ff")) # added manually! # Find the top include dir by looking for an important file 'ffoplsaa.itp'. # Since Gromacs 4.5.x, force fields are ONLY found in @@ -471,21 +498,30 @@ def asiterable(obj): #: The package's include directory for :func:`gromacs.grompp`; the #: environment variable :envvar:`GMXLIB` is set to :data:`includedir` #: so that the bundled version of the force field is picked up. - includedir = os.path.dirname(topfiles['ffoplsaa.itp']) + includedir = os.path.dirname(topfiles["ffoplsaa.itp"]) except KeyError: errmsg = "Missing required data files (ffoplsaa.itp). Check your installation." logger.fatal(errmsg) raise ImportError(errmsg) -if not 'GMXLIB' in os.environ: +if not "GMXLIB" in os.environ: if not os.path.exists(includedir): - errmsg = "Likely installation problem: cannot access the package GMXLIB " \ + errmsg = ( + "Likely installation problem: cannot access the package GMXLIB " "directory (try re-installing): " + ) logger.fatal(errmsg + includedir) raise OSError(errno.ENOENT, errmsg, includedir) - os.environ['GMXLIB'] = includedir + os.environ["GMXLIB"] = includedir logger.info("Using the bundled force fields from GMXLIB=%(includedir)r.", vars()) - logger.info("If required, override this behaviour by setting the environment variable GMXLIB yourself.") + logger.info( + "If required, override this behaviour by setting the environment variable GMXLIB yourself." + ) else: - logger.warning("Using user-supplied environment variable GMXLIB=%r to find force fields", os.environ['GMXLIB']) - logger.info("(You can use the MDPOW default by executing 'unset GMXLIB' in your shell before running MDPOW.)") + logger.warning( + "Using user-supplied environment variable GMXLIB=%r to find force fields", + os.environ["GMXLIB"], + ) + logger.info( + "(You can use the MDPOW default by executing 'unset GMXLIB' in your shell before running MDPOW.)" + ) diff --git a/mdpow/equil.py b/mdpow/equil.py index 56d79f40..d1ac36ff 100644 --- a/mdpow/equil.py +++ b/mdpow/equil.py @@ -47,7 +47,8 @@ from .restart import Journalled import logging -logger = logging.getLogger('mdpow.equil') + +logger = logging.getLogger("mdpow.equil") # ITP <-- forcefields.get_solvent_model(id).itp # BOX <-- forcefields.get_solvent_model(id).coordinates @@ -56,7 +57,14 @@ # TODO: change to water distance 1.2 in the future (1.0 for # compatibility with our SAMPL5 runs) #: minimum distance between solute and box surface (in nm) -DIST = {'water': 1.0, 'octanol': 1.5, 'cyclohexane': 1.5, 'wetoctanol': 1.5, 'toluene': 1.5} +DIST = { + "water": 1.0, + "octanol": 1.5, + "cyclohexane": 1.5, + "wetoctanol": 1.5, + "toluene": 1.5, +} + class Simulation(Journalled): """Simple MD simulation of a single compound molecule in water. @@ -76,33 +84,60 @@ class Simulation(Journalled): """ #: Keyword arguments to pre-set some file names; they are keys in :attr:`Simulation.files`. - filekeys = ('topology', 'processed_topology', 'structure', 'solvated', 'ndx', - 'energy_minimized', 'MD_relaxed', 'MD_restrained', 'MD_NPT') + filekeys = ( + "topology", + "processed_topology", + "structure", + "solvated", + "ndx", + "energy_minimized", + "MD_relaxed", + "MD_restrained", + "MD_NPT", + ) topdir_default = "Equilibrium" dirname_default = os.path.curdir - solvent_default = 'water' + solvent_default = "water" #: Coordinate files of the full system in increasing order of advancement of #: the protocol; the later the better. The values are keys into :attr:`Simulation.files`. - coordinate_structures = ('solvated', 'energy_minimized', 'MD_relaxed', - 'MD_restrained', 'MD_NPT') - checkpoints = ('solvated','energy_minimized','MD_relaxed','MD_restrained','MD_NPT') - + coordinate_structures = ( + "solvated", + "energy_minimized", + "MD_relaxed", + "MD_restrained", + "MD_NPT", + ) + checkpoints = ( + "solvated", + "energy_minimized", + "MD_relaxed", + "MD_restrained", + "MD_NPT", + ) #: Check list of all methods that can be run as an independent protocol; see also #: :meth:`Simulation.get_protocol` and :class:`restart.Journal` - protocols = ("MD_NPT", "MD_NPT_run", # *_run as dummies for the ... - "MD_relaxed", "MD_relaxed_run", # ...checkpointing logic - "MD_restrained", "MD_restrained_run", - "energy_minimize", "solvate", "topology") + protocols = ( + "MD_NPT", + "MD_NPT_run", # *_run as dummies for the ... + "MD_relaxed", + "MD_relaxed_run", # ...checkpointing logic + "MD_restrained", + "MD_restrained_run", + "energy_minimize", + "solvate", + "topology", + ) #: Default Gromacs *MDP* run parameter files for the different stages. #: (All are part of the package and are found with :func:`mdpow.config.get_template`.) - mdp_defaults = {'MD_relaxed': 'NPT_opls.mdp', - 'MD_restrained': 'NPT_opls.mdp', - 'MD_NPT': 'NPT_opls.mdp', - 'energy_minimize': 'em_opls.mdp', - } + mdp_defaults = { + "MD_relaxed": "NPT_opls.mdp", + "MD_restrained": "NPT_opls.mdp", + "MD_NPT": "NPT_opls.mdp", + "energy_minimize": "em_opls.mdp", + } def __init__(self, molecule=None, **kwargs): """Set up Simulation instance. @@ -142,11 +177,11 @@ def __init__(self, molecule=None, **kwargs): """ self.__cache = {} - filename = kwargs.pop('filename', None) - dirname = kwargs.pop('dirname', self.dirname_default) + filename = kwargs.pop("filename", None) + dirname = kwargs.pop("dirname", self.dirname_default) - forcefield = kwargs.pop('forcefield', 'OPLS-AA') - solvent = kwargs.pop('solvent', self.solvent_default) + forcefield = kwargs.pop("forcefield", "OPLS-AA") + solvent = kwargs.pop("solvent", self.solvent_default) # mdp files --- should get values from default runinput.cfg # None values in the kwarg mdp dict are ignored # self.mdp: key = stage, value = path to MDP file @@ -154,25 +189,36 @@ def __init__(self, molecule=None, **kwargs): # 'water' will choose the default ('tip4p'), other choices are # 'tip3p', 'spc', 'spce', 'm24', for water; no choices # available for 'cyclohexane' and 'octanol' - solventmodel = kwargs.pop('solventmodel', None) - - mdp_kw = kwargs.pop('mdp', {}) - self.mdp = dict((stage, config.get_template(fn)) for stage,fn in self.mdp_defaults.items()) - self.mdp.update(dict((stage, config.get_template(fn)) for stage,fn in mdp_kw.items() if fn is not None)) + solventmodel = kwargs.pop("solventmodel", None) + + mdp_kw = kwargs.pop("mdp", {}) + self.mdp = dict( + (stage, config.get_template(fn)) for stage, fn in self.mdp_defaults.items() + ) + self.mdp.update( + dict( + (stage, config.get_template(fn)) + for stage, fn in mdp_kw.items() + if fn is not None + ) + ) if molecule is None and filename is not None: # load from pickle file self.load(filename) self.filename = filename - kwargs = {} # for super + kwargs = {} # for super else: - self.molecule = molecule or 'DRUG' + self.molecule = molecule or "DRUG" self.dirs = AttributeDict( - basedir=realpath(dirname), # .../Equilibrium/ - includes=list(asiterable(kwargs.pop('includes',[]))) + [config.includedir], - ) + basedir=realpath(dirname), # .../Equilibrium/ + includes=list(asiterable(kwargs.pop("includes", []))) + + [config.includedir], + ) # pre-set filenames: keyword == variable name - self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys]) + self.files = AttributeDict( + [(k, kwargs.pop(k, None)) for k in self.filekeys] + ) self.deffnm = kwargs.pop("deffnm", "md") if self.files.topology: @@ -184,28 +230,31 @@ def __init__(self, molecule=None, **kwargs): self.forcefield = forcefield self.solvent_type = solvent self.solventmodel_identifier = forcefields.get_solvent_identifier( - solvent, - model=solventmodel, - forcefield=forcefield, - ) + solvent, + model=solventmodel, + forcefield=forcefield, + ) if self.solventmodel_identifier is None: msg = "No parameters for solvent {0} and solventmodel {1} available.".format( - solvent, solventmodel) + solvent, solventmodel + ) logger.error(msg) raise ValueError(msg) self.solventmodel = forcefields.get_solvent_model( self.solventmodel_identifier, forcefield=forcefield, - ) + ) - distance = kwargs.pop('distance', None) + distance = kwargs.pop("distance", None) distance = distance if distance is not None else DIST[solvent] - self.solvent = AttributeDict(itp=self.solventmodel.itp, - box=self.solventmodel.coordinates, - distance=distance) + self.solvent = AttributeDict( + itp=self.solventmodel.itp, + box=self.solventmodel.coordinates, + distance=distance, + ) - self.filename = filename or self.solvent_type+'.simulation' + self.filename = filename or self.solvent_type + ".simulation" super(Simulation, self).__init__(**kwargs) @@ -220,12 +269,14 @@ def save(self, filename=None): """ if filename is None: if self.filename is None: - self.filename = filename or self.solvent_type+'.simulation' - logger.warning("No filename known, saving instance under name %r", self.filename) + self.filename = filename or self.solvent_type + ".simulation" + logger.warning( + "No filename known, saving instance under name %r", self.filename + ) filename = self.filename else: self.filename = filename - with open(filename, 'wb') as f: + with open(filename, "wb") as f: pickle.dump(self, f) logger.debug("Instance pickled to %(filename)r" % vars()) @@ -233,10 +284,10 @@ def load(self, filename=None): """Re-instantiate class from pickled file.""" if filename is None: if self.filename is None: - self.filename = self.molecule.lower() + '.pickle' + self.filename = self.molecule.lower() + ".pickle" logger.warning("No filename known, trying name %r", self.filename) filename = self.filename - with open(filename, 'rb') as f: + with open(filename, "rb") as f: instance = pickle.load(f) self.__dict__.update(instance.__dict__) logger.debug("Instance loaded from %(filename)r" % vars()) @@ -248,6 +299,7 @@ def make_paths_relative(self, prefix=os.path.curdir): check :attr:`mdpow.equil.Simulation.dirs.includes` and adjust manually if necessary. """ + def assinglet(m): if len(m) == 1: return m[0] @@ -272,10 +324,13 @@ def assinglet(m): self.mdp[key] = fn.replace(basedir, prefix) except AttributeError: pass - logger.warning("make_paths_relative(): check/manually adjust %s.dirs.includes = %r !", - self.__class__.__name__, self.dirs.includes) + logger.warning( + "make_paths_relative(): check/manually adjust %s.dirs.includes = %r !", + self.__class__.__name__, + self.dirs.includes, + ) - def topology(self, itp='drug.itp', prm=None, **kwargs): + def topology(self, itp="drug.itp", prm=None, **kwargs): """Generate a topology for compound *molecule*. :Keywords: @@ -290,22 +345,22 @@ def topology(self, itp='drug.itp', prm=None, **kwargs): *kwargs* see source for *top_template*, *topol* """ - self.journal.start('topology') + self.journal.start("topology") - dirname = kwargs.pop('dirname', self.BASEDIR('top')) + dirname = kwargs.pop("dirname", self.BASEDIR("top")) self.dirs.topology = realpath(dirname) setting = forcefields.get_ff_paths(self.forcefield) template = forcefields.get_top_template(self.solvent_type) - top_template = config.get_template(kwargs.pop('top_template', template)) - topol = kwargs.pop('topol', os.path.basename(top_template)) + top_template = config.get_template(kwargs.pop("top_template", template)) + topol = kwargs.pop("topol", os.path.basename(top_template)) self.top_template = top_template itp = os.path.realpath(itp) _itp = os.path.basename(itp) if prm is None: - prm_kw = '' + prm_kw = "" else: prm = os.path.realpath(prm) _prm = os.path.basename(prm) @@ -315,45 +370,48 @@ def topology(self, itp='drug.itp', prm=None, **kwargs): shutil.copy(itp, _itp) if prm is not None: shutil.copy(prm, _prm) - gromacs.cbook.edit_txt(top_template, - [(r'#include +"oplsaa\.ff/forcefield\.itp"', - r'oplsaa\.ff/', - setting[0]), - (r'#include +"compound\.itp"', - r'compound\.itp', - _itp), - (r'#include +"oplsaa\.ff/tip4p\.itp"', - r'oplsaa\.ff/tip4p\.itp', - setting[0] + self.solvent.itp), - (r'#include +"oplsaa\.ff/ions_opls\.itp"', - r'oplsaa\.ff/ions_opls\.itp', - setting[1]), - (r'#include +"compound\.prm"', - r'#include +"compound\.prm"', - prm_kw), - (r'#include +"water\.itp"', - r'water\.itp', - setting[2]), - (r'Compound', - 'solvent', - self.solvent_type), - (r'Compound', - 'DRUG', - self.molecule), - (r'DRUG\s*1', - 'DRUG', - self.molecule), - ], - newname=topol) - logger.info('[%(dirname)s] Created topology %(topol)r that includes %(_itp)r', vars()) + gromacs.cbook.edit_txt( + top_template, + [ + ( + r'#include +"oplsaa\.ff/forcefield\.itp"', + r"oplsaa\.ff/", + setting[0], + ), + (r'#include +"compound\.itp"', r"compound\.itp", _itp), + ( + r'#include +"oplsaa\.ff/tip4p\.itp"', + r"oplsaa\.ff/tip4p\.itp", + setting[0] + self.solvent.itp, + ), + ( + r'#include +"oplsaa\.ff/ions_opls\.itp"', + r"oplsaa\.ff/ions_opls\.itp", + setting[1], + ), + ( + r'#include +"compound\.prm"', + r'#include +"compound\.prm"', + prm_kw, + ), + (r'#include +"water\.itp"', r"water\.itp", setting[2]), + (r"Compound", "solvent", self.solvent_type), + (r"Compound", "DRUG", self.molecule), + (r"DRUG\s*1", "DRUG", self.molecule), + ], + newname=topol, + ) + logger.info( + "[%(dirname)s] Created topology %(topol)r that includes %(_itp)r", vars() + ) # update known files and dirs self.files.topology = realpath(dirname, topol) if not self.dirs.topology in self.dirs.includes: self.dirs.includes.append(self.dirs.topology) - self.journal.completed('topology') - return {'dirname': dirname, 'topol': topol} + self.journal.completed("topology") + return {"dirname": dirname, "topol": topol} @staticmethod def _setup_solvate(**kwargs): @@ -385,43 +443,49 @@ def solvate(self, struct=None, **kwargs): All other arguments are passed on to :func:`gromacs.setup.solvate`, but set to sensible default values. *top* and *water* are always fixed. """ - self.journal.start('solvate') - - self.dirs.solvation = realpath(kwargs.setdefault('dirname', self.BASEDIR('solvation'))) - kwargs['struct'] = self._checknotempty(struct or self.files.structure, 'struct') - kwargs['top'] = self._checknotempty(self.files.topology, 'top') - kwargs['water'] = self.solvent.box - kwargs.setdefault('mainselection', '"%s"' % self.molecule) # quotes are needed for make_ndx - kwargs.setdefault('distance', self.solvent.distance) - - boxtype = kwargs.pop('bt', None) + self.journal.start("solvate") + + self.dirs.solvation = realpath( + kwargs.setdefault("dirname", self.BASEDIR("solvation")) + ) + kwargs["struct"] = self._checknotempty(struct or self.files.structure, "struct") + kwargs["top"] = self._checknotempty(self.files.topology, "top") + kwargs["water"] = self.solvent.box + kwargs.setdefault( + "mainselection", '"%s"' % self.molecule + ) # quotes are needed for make_ndx + kwargs.setdefault("distance", self.solvent.distance) + + boxtype = kwargs.pop("bt", None) boxtype = boxtype if boxtype is not None else "dodecahedron" if boxtype not in ("dodecahedron", "triclinic", "cubic", "octahedron"): - msg = "Invalid boxtype '{0}', not suitable for 'gmx editconf'.".format(boxtype) + msg = "Invalid boxtype '{0}', not suitable for 'gmx editconf'.".format( + boxtype + ) logger.error(msg) raise ValueError(msg) - kwargs['bt'] = boxtype + kwargs["bt"] = boxtype - kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes + kwargs["includes"] = asiterable(kwargs.pop("includes", [])) + self.dirs.includes params = self._setup_solvate(**kwargs) - self.files.structure = kwargs['struct'] - self.files.solvated = params['struct'] - self.files.ndx = params['ndx'] + self.files.structure = kwargs["struct"] + self.files.solvated = params["struct"] + self.files.ndx = params["ndx"] # we can also make a processed topology right now self.processed_topology(**kwargs) - self.journal.completed('solvate') + self.journal.completed("solvate") return params def processed_topology(self, **kwargs): """Create a portable topology file from the topology and the solvated system.""" if self.files.solvated is None or not os.path.exists(self.files.solvated): self.solvate(**kwargs) - kwargs['topol'] = self.files.topology - kwargs['struct'] = self.files.solvated - kwargs['includes'] = self.dirs.includes + kwargs["topol"] = self.files.topology + kwargs["struct"] = self.files.solvated + kwargs["includes"] = self.dirs.includes self.files.processed_topology = gromacs.cbook.create_portable_topology(**kwargs) return self.files.processed_topology @@ -432,55 +496,71 @@ def energy_minimize(self, **kwargs): :meth:`~mdpow.equil.Simulation.solvate` step has been carried out previously all the defaults should just work. """ - self.journal.start('energy_minimize') + self.journal.start("energy_minimize") - self.dirs.energy_minimization = realpath(kwargs.setdefault('dirname', self.BASEDIR('em'))) - kwargs['top'] = self.files.topology - kwargs.setdefault('struct', self.files.solvated) - kwargs.setdefault('mdp', self.mdp['energy_minimize']) - kwargs['mainselection'] = None - kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes + self.dirs.energy_minimization = realpath( + kwargs.setdefault("dirname", self.BASEDIR("em")) + ) + kwargs["top"] = self.files.topology + kwargs.setdefault("struct", self.files.solvated) + kwargs.setdefault("mdp", self.mdp["energy_minimize"]) + kwargs["mainselection"] = None + kwargs["includes"] = asiterable(kwargs.pop("includes", [])) + self.dirs.includes params = gromacs.setup.energy_minimize(**kwargs) - self.files.energy_minimized = params['struct'] + self.files.energy_minimized = params["struct"] - self.journal.completed('energy_minimize') + self.journal.completed("energy_minimize") return params def _MD(self, protocol, **kwargs): """Basic MD driver for this Simulation. Do not call directly.""" self.journal.start(protocol) - kwargs.setdefault('dirname', self.BASEDIR(protocol)) - kwargs.setdefault('deffnm', self.deffnm) - kwargs.setdefault('mdp', config.get_template('NPT_opls.mdp')) - self.dirs[protocol] = realpath(kwargs['dirname']) - setupMD = kwargs.pop('MDfunc', gromacs.setup.MD) - kwargs['top'] = self.files.topology - kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes - kwargs['ndx'] = self.files.ndx - kwargs['mainselection'] = None # important for SD (use custom mdp and ndx!, gromacs.setup._MD) - self._checknotempty(kwargs['struct'], 'struct') - if not os.path.exists(kwargs['struct']): + kwargs.setdefault("dirname", self.BASEDIR(protocol)) + kwargs.setdefault("deffnm", self.deffnm) + kwargs.setdefault("mdp", config.get_template("NPT_opls.mdp")) + self.dirs[protocol] = realpath(kwargs["dirname"]) + setupMD = kwargs.pop("MDfunc", gromacs.setup.MD) + kwargs["top"] = self.files.topology + kwargs["includes"] = asiterable(kwargs.pop("includes", [])) + self.dirs.includes + kwargs["ndx"] = self.files.ndx + kwargs[ + "mainselection" + ] = None # important for SD (use custom mdp and ndx!, gromacs.setup._MD) + self._checknotempty(kwargs["struct"], "struct") + if not os.path.exists(kwargs["struct"]): # struct is not reliable as it depends on qscript so now we just try everything... - struct = gromacs.utilities.find_first(kwargs['struct'], suffices=['pdb', 'gro']) + struct = gromacs.utilities.find_first( + kwargs["struct"], suffices=["pdb", "gro"] + ) if struct is None: - logger.error("Starting structure %(struct)r does not exist (yet)" % kwargs) - raise IOError(errno.ENOENT, "Starting structure not found", kwargs['struct']) + logger.error( + "Starting structure %(struct)r does not exist (yet)" % kwargs + ) + raise IOError( + errno.ENOENT, "Starting structure not found", kwargs["struct"] + ) else: - logger.info("Found starting structure %r (instead of %r).", struct, kwargs['struct']) - kwargs['struct'] = struct + logger.info( + "Found starting structure %r (instead of %r).", + struct, + kwargs["struct"], + ) + kwargs["struct"] = struct # now setup the whole simulation (this is typically gromacs.setup.MD() ) - params = setupMD(**kwargs) + params = setupMD(**kwargs) # params['struct'] is md.gro but could also be md.pdb --- depends entirely on qscript - self.files[protocol] = params['struct'] + self.files[protocol] = params["struct"] # Gromacs 4.5.x 'mdrun -c PDB' fails if it cannot find 'residuetypes.dat' # so instead of fuffing with GMXLIB we just dump it into the directory try: - shutil.copy(config.topfiles['residuetypes.dat'], self.dirs[protocol]) + shutil.copy(config.topfiles["residuetypes.dat"], self.dirs[protocol]) except IOError: - logger.warning("Failed to copy 'residuetypes.dat': mdrun will likely fail to write a final structure") + logger.warning( + "Failed to copy 'residuetypes.dat': mdrun will likely fail to write a final structure" + ) self.journal.completed(protocol) return params @@ -513,11 +593,11 @@ def MD_relaxed(self, **kwargs): """ # user structure or restrained or solvated - kwargs.setdefault('struct', self.files.energy_minimized) - kwargs.setdefault('dt', 0.0001) # ps - kwargs.setdefault('runtime', 5) # ps - kwargs.setdefault('mdp', self.mdp['MD_relaxed']) - return self._MD('MD_relaxed', **kwargs) + kwargs.setdefault("struct", self.files.energy_minimized) + kwargs.setdefault("dt", 0.0001) # ps + kwargs.setdefault("runtime", 5) # ps + kwargs.setdefault("mdp", self.mdp["MD_relaxed"]) + return self._MD("MD_relaxed", **kwargs) def MD_restrained(self, **kwargs): """Short MD simulation with position restraints on compound. @@ -551,11 +631,13 @@ def MD_restrained(self, **kwargs): :class:`gromacs.manager.Manager` """ - kwargs.setdefault('struct', - self._lastnotempty([self.files.energy_minimized, self.files.MD_relaxed])) - kwargs.setdefault('mdp', self.mdp['MD_restrained']) - kwargs['MDfunc'] = gromacs.setup.MD_restrained - return self._MD('MD_restrained', **kwargs) + kwargs.setdefault( + "struct", + self._lastnotempty([self.files.energy_minimized, self.files.MD_relaxed]), + ) + kwargs.setdefault("mdp", self.mdp["MD_restrained"]) + kwargs["MDfunc"] = gromacs.setup.MD_restrained + return self._MD("MD_restrained", **kwargs) def MD_NPT(self, **kwargs): """Short NPT MD simulation. @@ -595,10 +677,12 @@ def MD_NPT(self, **kwargs): """ # user structure or relaxed or restrained or solvated - kwargs.setdefault('struct', self.get_last_structure()) - kwargs.setdefault('t',self.get_last_checkpoint()) # Pass checkpoint file from md_relaxed - kwargs.setdefault('mdp', self.mdp['MD_NPT']) - return self._MD('MD_NPT', **kwargs) + kwargs.setdefault("struct", self.get_last_structure()) + kwargs.setdefault( + "t", self.get_last_checkpoint() + ) # Pass checkpoint file from md_relaxed + kwargs.setdefault("mdp", self.mdp["MD_NPT"]) + return self._MD("MD_NPT", **kwargs) # for convenience and compatibility MD = MD_NPT @@ -617,49 +701,64 @@ def _lastnotempty(l): def get_last_structure(self): """Returns the coordinates of the most advanced step in the protocol.""" - return self._lastnotempty([self.files[name] for name in self.coordinate_structures]) + return self._lastnotempty( + [self.files[name] for name in self.coordinate_structures] + ) def get_last_checkpoint(self): """Returns the checkpoint of the most advanced step in the protocol. Relies on md.gro being present from previous simulation, assumes that checkpoint is then present. """ - return self._lastnotempty([self.files[name] for name in self.checkpoints]).replace('.gro','.cpt') + return self._lastnotempty( + [self.files[name] for name in self.checkpoints] + ).replace(".gro", ".cpt") + class WaterSimulation(Simulation): """Equilibrium MD of a solute in a box of water.""" - solvent_default = 'water' + + solvent_default = "water" dirname_default = os.path.join(Simulation.topdir_default, solvent_default) + class CyclohexaneSimulation(Simulation): """Equilibrium MD of a solute in a box of cyclohexane.""" - solvent_default = 'cyclohexane' + + solvent_default = "cyclohexane" dirname_default = os.path.join(Simulation.topdir_default, solvent_default) + class OctanolSimulation(Simulation): """Equilibrium MD of a solute in a box of octanol.""" - solvent_default = 'octanol' + + solvent_default = "octanol" dirname_default = os.path.join(Simulation.topdir_default, solvent_default) + class WetOctanolSimulation(Simulation): """Equilibrium MD of a solute in a box of wet octanol.""" - solvent_default = 'wetoctanol' + + solvent_default = "wetoctanol" dirname_default = os.path.join(Simulation.topdir_default, solvent_default) - def _setup_solvate(self, **kwargs): + def _setup_solvate(self, **kwargs): sol = gromacs.setup.solvate_sol(**kwargs) with in_dir(self.dirs.solvation, create=False): - u = mda.Universe('solvated.gro') - octanol = u.select_atoms('resname OcOH') + u = mda.Universe("solvated.gro") + octanol = u.select_atoms("resname OcOH") n = octanol.n_residues with in_dir(self.dirs.topology, create=False): - gromacs.cbook.edit_txt(self.files.topology, - [('OcOH 1', '1', n)]) + gromacs.cbook.edit_txt( + self.files.topology, [("OcOH 1", "1", n)] + ) ionkwargs = kwargs - ionkwargs['struct'] = sol['struct'] + ionkwargs["struct"] = sol["struct"] params = gromacs.setup.solvate_ion(**ionkwargs) return params + class TolueneSimulation(Simulation): """Equilibrium MD of a solute in a box of toluene.""" - solvent_default = 'toluene' - dirname_default = os.path.join(Simulation.topdir_default, solvent_default) \ No newline at end of file + + solvent_default = "toluene" + dirname_default = os.path.join(Simulation.topdir_default, solvent_default) diff --git a/mdpow/fep.py b/mdpow/fep.py index b5b7e406..4edd0362 100644 --- a/mdpow/fep.py +++ b/mdpow/fep.py @@ -163,6 +163,7 @@ import gromacs import gromacs.utilities + try: import gromacs.setup except (ImportError, OSError): @@ -170,16 +171,19 @@ from gromacs.utilities import asiterable, AttributeDict, in_dir, openany import logging -logger = logging.getLogger('mdpow.fep') + +logger = logging.getLogger("mdpow.fep") from . import config from .restart import Journalled from . import kBOLTZ, N_AVOGADRO + def molar_to_nm3(c): """Convert a concentration in Molar to nm|^-3|.""" return c * N_AVOGADRO * 1e-24 + def bar_to_kJmolnm3(p): """Convert pressure in bar to kJ mol|^-1| nm|^-3|. @@ -187,17 +191,20 @@ def bar_to_kJmolnm3(p): """ return p * N_AVOGADRO * 1e-25 + def kcal_to_kJ(x): """Convert a energy in kcal to kJ.""" return 4.184 * x + def kJ_to_kcal(x): """Convert a energy in kJ to kcal.""" return x / 4.184 + def kBT_to_kJ(x, T): """Convert a energy in kBT to kJ/mol.""" - return x * constants.N_A*constants.k*T*1e-3 + return x * constants.N_A * constants.k * T * 1e-3 class FEPschedule(AttributeDict): @@ -232,19 +239,23 @@ class FEPschedule(AttributeDict): lambdas = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1 """ - mdp_keywords = dict((('sc_alpha', float), - ('sc_power', int), - ('sc_sigma', float), - ('couple_lambda0', str), - ('couple_lambda1', str), - )) - meta_keywords = dict((('name', str), ('description', str), ('label', str))) - other_keywords = dict((('lambdas', list), )) + + mdp_keywords = dict( + ( + ("sc_alpha", float), + ("sc_power", int), + ("sc_sigma", float), + ("couple_lambda0", str), + ("couple_lambda1", str), + ) + ) + meta_keywords = dict((("name", str), ("description", str), ("label", str))) + other_keywords = dict((("lambdas", list),)) @property def mdp_dict(self): """Dict of key-values that can be set in a mdp file.""" - return dict(((k,v) for k,v in self.items() if k in self.mdp_keywords)) + return dict(((k, v) for k, v in self.items() if k in self.mdp_keywords)) @staticmethod def load(cfg, section): @@ -254,12 +265,14 @@ def load(cfg, section): keys.update(FEPschedule.meta_keywords) keys.update(FEPschedule.other_keywords) - cfg_get = {float: cfg.getfloat, - int: cfg.getint, - str: cfg.getstr, # literal strings, no conversion of None (which we need for the MDP!) - # CHECK: THIS IS LIKELY NOT GUARANTEED ANYMORE since getstr == get - list: cfg.getarray # numpy float array from list - } + cfg_get = { + float: cfg.getfloat, + int: cfg.getint, + str: cfg.getstr, # literal strings, no conversion of None (which we need for the MDP!) + # CHECK: THIS IS LIKELY NOT GUARANTEED ANYMORE since getstr == get + list: cfg.getarray, # numpy float array from list + } + def getter(type, section, key): value = cfg_get[type](section, key) try: @@ -269,9 +282,13 @@ def getter(type, section, key): except TypeError: pass return value + # skip any None values - return FEPschedule((key, getter(keytype, section, key)) for key,keytype in keys.items() - if getter(keytype, section, key) is not None) + return FEPschedule( + (key, getter(keytype, section, key)) + for key, keytype in keys.items() + if getter(keytype, section, key) is not None + ) def __deepcopy__(self, memo): x = FEPschedule() @@ -279,6 +296,7 @@ def __deepcopy__(self, memo): x[k] = copy.deepcopy(v) return x + class Gsolv(Journalled): r"""Simulations to calculate and analyze the solvation free energy. @@ -326,36 +344,58 @@ class Gsolv(Journalled): protocols = ["setup", "fep_run"] #: Estimators in alchemlyb - estimators = {'TI': {'extract': extract_dHdl, 'estimator': TI}, - 'BAR': {'extract': extract_u_nk, 'estimator': BAR}, - 'MBAR': {'extract': extract_u_nk, 'estimator': MBAR} - } + estimators = { + "TI": {"extract": extract_dHdl, "estimator": TI}, + "BAR": {"extract": extract_u_nk, "estimator": BAR}, + "MBAR": {"extract": extract_u_nk, "estimator": MBAR}, + } # TODO: initialize from default cfg - schedules_default = {'coulomb': - FEPschedule(name='coulomb', - description="dis-charging vdw+q --> vdw", - label='Coul', - couple_lambda0='vdw-q', couple_lambda1='vdw', - sc_alpha=0, # linear scaling for coulomb - lambdas=np.array([0.0, 0.25, 0.5, 0.75, 1.0]), # default values - ), - 'vdw': - FEPschedule(name='vdw', - description="decoupling vdw --> none", - label='VDW', - couple_lambda0='vdw', couple_lambda1='none', - sc_alpha=0.5, sc_power=1, sc_sigma=0.3, # recommended values - lambdas=np.array([0.0, 0.05, 0.1, 0.2, 0.3, - 0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 0.8, - 0.85, 0.9, 0.95, 1]), # defaults - ), - } + schedules_default = { + "coulomb": FEPschedule( + name="coulomb", + description="dis-charging vdw+q --> vdw", + label="Coul", + couple_lambda0="vdw-q", + couple_lambda1="vdw", + sc_alpha=0, # linear scaling for coulomb + lambdas=np.array([0.0, 0.25, 0.5, 0.75, 1.0]), # default values + ), + "vdw": FEPschedule( + name="vdw", + description="decoupling vdw --> none", + label="VDW", + couple_lambda0="vdw", + couple_lambda1="none", + sc_alpha=0.5, + sc_power=1, + sc_sigma=0.3, # recommended values + lambdas=np.array( + [ + 0.0, + 0.05, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.65, + 0.7, + 0.75, + 0.8, + 0.85, + 0.9, + 0.95, + 1, + ] + ), # defaults + ), + } #: Default Gromacs *MDP* run parameter file for FEP. #: (The file is part of the package and is found with :func:`mdpow.config.get_template`.) - mdp_default = 'bar_opls.mdp' - + mdp_default = "bar_opls.mdp" def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs): """Set up Gsolv from input files or a equilibrium simulation. @@ -436,41 +476,49 @@ def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs) parameters. """ - required_args = ('molecule', 'top', 'struct') + required_args = ("molecule", "top", "struct") # should this be below somewhere? if not method in ("TI", "BAR", "MBAR"): raise ValueError("method can only be TI, BAR, or MBAR") self.method = method - filename = kwargs.pop('filename', None) - basedir = kwargs.pop('basedir', os.path.curdir) # all other dirs relative to basedir - simulation = kwargs.pop('simulation', None) - solvent = kwargs.pop('solvent', self.solvent_default) - if (None in (molecule, top, struct) and simulation is None) and filename is not None: + filename = kwargs.pop("filename", None) + basedir = kwargs.pop( + "basedir", os.path.curdir + ) # all other dirs relative to basedir + simulation = kwargs.pop("simulation", None) + solvent = kwargs.pop("solvent", self.solvent_default) + if ( + None in (molecule, top, struct) and simulation is None + ) and filename is not None: # load from pickle file self.load(filename) self.filename = filename - kwargs = {} # for super + kwargs = {} # for super else: if simulation is not None: # load data from Simulation instance self.molecule = simulation.molecule - self.top = simulation.files.processed_topology or simulation.files.topology + self.top = ( + simulation.files.processed_topology or simulation.files.topology + ) self.struct = simulation.files.MD_NPT self.ndx = simulation.files.ndx if simulation.solvent_type == solvent: self.solvent_type = simulation.solvent_type else: - errmsg = "Solvent mismatch: simulation was run for %s but Gsolv is set up for %s" % \ - (simulation.solvent_type, solvent) + errmsg = ( + "Solvent mismatch: simulation was run for %s but Gsolv is set up for %s" + % (simulation.solvent_type, solvent) + ) logger.error(errmsg) raise ValueError(errmsg) else: - self.molecule = molecule # should check that this is in top (?) + self.molecule = molecule # should check that this is in top (?) self.top = top self.struct = struct - self.ndx = kwargs.pop('ndx', None) + self.ndx = kwargs.pop("ndx", None) self.solvent_type = solvent for attr in required_args: @@ -480,54 +528,71 @@ def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs) # fix struct (issue with qscripts being independent from rest of code) if not os.path.exists(self.struct): # struct is not reliable as it depends on qscript so now we just try everything... - struct = gromacs.utilities.find_first(self.struct, suffices=['pdb', 'gro']) + struct = gromacs.utilities.find_first( + self.struct, suffices=["pdb", "gro"] + ) if struct is None: logger.error("Starting structure %r does not exist.", self.struct) - raise IOError(errno.ENOENT, "Starting structure not found", self.struct) + raise IOError( + errno.ENOENT, "Starting structure not found", self.struct + ) else: - logger.info("Found starting structure %r (instead of %r).", struct, self.struct) + logger.info( + "Found starting structure %r (instead of %r).", + struct, + self.struct, + ) self.struct = struct - self.Temperature = kwargs.pop('temperature', 300.0) - self.qscript = kwargs.pop('qscript', ['local.sh']) - self.deffnm = kwargs.pop('deffnm', 'md') + self.Temperature = kwargs.pop("temperature", 300.0) + self.qscript = kwargs.pop("qscript", ["local.sh"]) + self.deffnm = kwargs.pop("deffnm", "md") - self.mdp = kwargs.pop('mdp', config.get_template(self.mdp_default)) + self.mdp = kwargs.pop("mdp", config.get_template(self.mdp_default)) # schedules (deepcopy because we might modify) # For some reason 2.7 tests failed with deepcopy in 2.7 so used merge_dict instead self.schedules = config.merge_dicts(self.schedules_default, {}) - schedules = kwargs.pop('schedules', {}) + schedules = kwargs.pop("schedules", {}) self.schedules.update(schedules) self.lambdas = { - 'coulomb': kwargs.pop('lambda_coulomb', self.schedules['coulomb'].lambdas), - 'vdw': kwargs.pop('lambda_vdw', self.schedules['vdw'].lambdas), - } - self.runtime = kwargs.pop('runtime', 5000.0) # ps - self.dirname = kwargs.pop('dirname', self.dirname_default) - self.includes = list(asiterable(kwargs.pop('includes',[]))) + [config.includedir] - self.component_dirs = {'coulomb': os.path.join(self.dirname, 'Coulomb'), - 'vdw': os.path.join(self.dirname, 'VDW')} + "coulomb": kwargs.pop( + "lambda_coulomb", self.schedules["coulomb"].lambdas + ), + "vdw": kwargs.pop("lambda_vdw", self.schedules["vdw"].lambdas), + } + self.runtime = kwargs.pop("runtime", 5000.0) # ps + self.dirname = kwargs.pop("dirname", self.dirname_default) + self.includes = list(asiterable(kwargs.pop("includes", []))) + [ + config.includedir + ] + self.component_dirs = { + "coulomb": os.path.join(self.dirname, "Coulomb"), + "vdw": os.path.join(self.dirname, "VDW"), + } # for analysis - self.stride = kwargs.pop('stride', 1) - self.start = kwargs.pop('start', 0) - self.stop = kwargs.pop('stop', None) - self.SI = kwargs.pop('SI', True) + self.stride = kwargs.pop("stride", 1) + self.start = kwargs.pop("start", 0) + self.stop = kwargs.pop("stop", None) + self.SI = kwargs.pop("SI", True) # other variables #: Results from the analysis - self.results = AttributeDict(xvg=AttributeDict(), - dvdl=AttributeDict(), - DeltaA=AttributeDict(), # contains QuantityWithError - ) + self.results = AttributeDict( + xvg=AttributeDict(), + dvdl=AttributeDict(), + DeltaA=AttributeDict(), # contains QuantityWithError + ) #: Generated run scripts self.scripts = AttributeDict() # sanity checks if os.path.exists(self.dirname): - wmsg = "Directory %(dirname)r already exists --- will overwrite " \ - "existing files." % vars(self) + wmsg = ( + "Directory %(dirname)r already exists --- will overwrite " + "existing files." % vars(self) + ) warnings.warn(wmsg) logger.warning(wmsg) @@ -541,16 +606,23 @@ def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs) self.filename = os.path.abspath(self.filename) except (AttributeError, TypeError): # default filename if none was provided - self.filename = self.frombase(self.dirname, self.__class__.__name__+os.extsep+'fep') + self.filename = self.frombase( + self.dirname, self.__class__.__name__ + os.extsep + "fep" + ) # override pickle file for this dangerous option: must be set # on a case-by-case basis - self.permissive = kwargs.pop('permissive', False) + self.permissive = kwargs.pop("permissive", False) - logger.info("Solvation free energy calculation for molecule " - "%(molecule)s in solvent %(solvent_type)s.", vars(self)) + logger.info( + "Solvation free energy calculation for molecule " + "%(molecule)s in solvent %(solvent_type)s.", + vars(self), + ) logger.info("Base directory is %(basedir)r", vars(self)) - logger.info("Using setup directories under %(dirname)r: %(component_dirs)r", vars(self)) + logger.info( + "Using setup directories under %(dirname)r: %(component_dirs)r", vars(self) + ) logger.info("Default checkpoint file is %(filename)r", vars(self)) logger.debug("Coulomb lambdas = %(coulomb)r", self.lambdas) logger.debug("VDW lambdas = %(vdw)r", self.lambdas) @@ -585,11 +657,16 @@ def label(self, component): def tasklabel(self, component, lmbda): """Batch submission script name for a single task job.""" - return self.molecule[:3]+'_'+self.schedules[component].label+"%04d" % (1000 * lmbda) + return ( + self.molecule[:3] + + "_" + + self.schedules[component].label + + "%04d" % (1000 * lmbda) + ) def arraylabel(self, component): """Batch submission script name for a job array.""" - return self.molecule[:3]+'_'+self.schedules[component].label + return self.molecule[:3] + "_" + self.schedules[component].label def fep_dirs(self): """Generator for all simulation sub directories""" @@ -625,39 +702,46 @@ def setup(self, **kwargs): .. versionchanged:: 0.6.0 Gromacs now uses option ``-dhdl`` instead of ``-dgdl``. """ - self.journal.start('setup') + self.journal.start("setup") # -dgdl for FEP output (although that seems to have been changed to -dHdl in Gromacs 4.5.3) # NOW use -dhdl - kwargs['mdrun_opts'] = " ".join([kwargs.pop('mdrun_opts',''), '-dhdl']) - kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.includes - kwargs['deffnm'] = self.deffnm - kwargs.setdefault('maxwarn', 1) + kwargs["mdrun_opts"] = " ".join([kwargs.pop("mdrun_opts", ""), "-dhdl"]) + kwargs["includes"] = asiterable(kwargs.pop("includes", [])) + self.includes + kwargs["deffnm"] = self.deffnm + kwargs.setdefault("maxwarn", 1) qsubargs = kwargs.copy() - qsubargs['dirname'] = self.frombase(self.dirname) + qsubargs["dirname"] = self.frombase(self.dirname) # handle templates separately (necessary for array jobs) - qscripts = qsubargs.pop('sge', None) or self.qscript - qscripts.extend(qsubargs.pop('qscript',[])) # also allow canonical 'templates' + qscripts = qsubargs.pop("sge", None) or self.qscript + qscripts.extend(qsubargs.pop("qscript", [])) # also allow canonical 'templates' # make sure that the individual batch scripts are also written - kwargs.setdefault('qscript', qscripts) + kwargs.setdefault("qscript", qscripts) for component, lambdas in self.lambdas.items(): for l in lambdas: - params = self._setup(component, l, - foreign_lambdas=lambdas, **kwargs) + params = self._setup(component, l, foreign_lambdas=lambdas, **kwargs) # generate queuing system script for array job directories = [self.wdir(component, l) for l in lambdas] - qsubargs['jobname'] = self.arraylabel(component) - qsubargs['prefix'] = self.label(component)+'_' - self.scripts[component] = gromacs.qsub.generate_submit_array(qscripts, directories, **qsubargs) - logger.info("[%s] Wrote array job scripts %r", component, self.scripts[component]) - - self.journal.completed('setup') + qsubargs["jobname"] = self.arraylabel(component) + qsubargs["prefix"] = self.label(component) + "_" + self.scripts[component] = gromacs.qsub.generate_submit_array( + qscripts, directories, **qsubargs + ) + logger.info( + "[%s] Wrote array job scripts %r", component, self.scripts[component] + ) + + self.journal.completed("setup") self.save(self.filename) - logger.info("Saved state information to %r; reload later with G = %r.", self.filename, self) + logger.info( + "Saved state information to %r; reload later with G = %r.", + self.filename, + self, + ) logger.info("Finished setting up all individual simulations. Now run them...") - params.pop('struct', None) # scrub window-specific params + params.pop("struct", None) # scrub window-specific params return params def _setup(self, component, lmbda, foreign_lambdas, **kwargs): @@ -668,36 +752,41 @@ def _setup(self, component, lmbda, foreign_lambdas, **kwargs): logger.info("Preparing %(component)s for lambda=%(lmbda)g" % vars()) wdir = self.wdir(component, lmbda) - kwargs.setdefault('couple-intramol', 'no') + kwargs.setdefault("couple-intramol", "no") ### XXX Issue 20: if an entry is None then the dict will not be updated: ### I *must* keep "none" as a legal string value - kwargs.update(self.schedules[component].mdp_dict) # sets soft core & lambda0/1 state + kwargs.update( + self.schedules[component].mdp_dict + ) # sets soft core & lambda0/1 state - if kwargs.pop('edr', True): - logger.info('Setting dhdl file to edr format') - kwargs.setdefault('separate-dhdl-file', 'no') + if kwargs.pop("edr", True): + logger.info("Setting dhdl file to edr format") + kwargs.setdefault("separate-dhdl-file", "no") else: - logger.info('Setting dhdl file to xvg format') - kwargs.setdefault('separate-dhdl-file', 'yes') + logger.info("Setting dhdl file to xvg format") + kwargs.setdefault("separate-dhdl-file", "yes") foreign_lambdas = np.asarray(foreign_lambdas) lambda_index = np.where(foreign_lambdas == lmbda)[0][0] - kwargs.update(dirname=wdir, struct=self.struct, top=self.top, - mdp=self.mdp, - ndx=self.ndx, - mainselection=None, - runtime=self.runtime, - ref_t=self.Temperature, # TODO: maybe not working yet, check _setup() - gen_temp=self.Temperature, # needed until gromacs.setup() is smarter - qname=self.tasklabel(component,lmbda), - free_energy='yes', - couple_moltype=self.molecule, - init_lambda_state=lambda_index, - fep_lambdas=foreign_lambdas, - calc_lambda_neighbors=-1, - ) + kwargs.update( + dirname=wdir, + struct=self.struct, + top=self.top, + mdp=self.mdp, + ndx=self.ndx, + mainselection=None, + runtime=self.runtime, + ref_t=self.Temperature, # TODO: maybe not working yet, check _setup() + gen_temp=self.Temperature, # needed until gromacs.setup() is smarter + qname=self.tasklabel(component, lmbda), + free_energy="yes", + couple_moltype=self.molecule, + init_lambda_state=lambda_index, + fep_lambdas=foreign_lambdas, + calc_lambda_neighbors=-1, + ) return gromacs.setup.MD(**kwargs) @@ -718,9 +807,9 @@ def dgdl_xvg(self, *args): :Raises: :exc:`IOError` with error code ENOENT if no file could be found - """ - EXTENSIONS = ('', os.path.extsep+'bz2', os.path.extsep+'gz') - root = os.path.join(*args + (self.deffnm + '.xvg',)) + """ + EXTENSIONS = ("", os.path.extsep + "bz2", os.path.extsep + "gz") + root = os.path.join(*args + (self.deffnm + ".xvg",)) for ext in EXTENSIONS: fn = root + ext if os.path.exists(fn): @@ -741,8 +830,8 @@ def dgdl_edr(self, *args): :Raises: :exc:`IOError` with error code ENOENT if no file could be found - """ - pattern = os.path.join(*args + (self.deffnm + '*.edr',)) + """ + pattern = os.path.join(*args + (self.deffnm + "*.edr",)) edrs = glob(pattern) if not edrs: logger.error("Missing dgdl.edr file %(pattern)r.", vars()) @@ -762,8 +851,8 @@ def dgdl_tpr(self, *args): :Raises: :exc:`IOError` with error code ENOENT if no file could be found - """ - fn = os.path.join(*args + (self.deffnm + '.tpr',)) + """ + fn = os.path.join(*args + (self.deffnm + ".tpr",)) if not os.path.exists(fn): logger.error("Missing TPR file %(fn)r.", vars()) raise IOError(errno.ENOENT, "Missing TPR file", fn) @@ -802,7 +891,7 @@ def convert_edr(self): total_edr = edr[0] else: total_edr = self.dgdl_total_edr(dirct) - logger.info(" {0} --> {1}".format('edrs', total_edr)) + logger.info(" {0} --> {1}".format("edrs", total_edr)) gromacs.eneconv(f=edr, o=total_edr) xvgfile = os.path.join(dirct, self.deffnm + ".xvg") # hack logger.info(" {0} --> {1}".format(total_edr, xvgfile)) @@ -829,14 +918,20 @@ def collect(self, stride=None, autosave=True, autocompress=True): # must be done before adding to results.xvg or we will not find the file later self.compress_dgdl_xvg() - logger.info("[%(dirname)s] Finding dgdl xvg files, reading with " - "stride=%(stride)d permissive=%(permissive)r." % vars(self)) + logger.info( + "[%(dirname)s] Finding dgdl xvg files, reading with " + "stride=%(stride)d permissive=%(permissive)r." % vars(self) + ) for component, lambdas in self.lambdas.items(): xvg_files = [self.dgdl_xvg(self.wdir(component, l)) for l in lambdas] - self.results.xvg[component] = (np.array(lambdas), - [XVG(xvg, permissive=self.permissive, stride=self.stride) - for xvg in xvg_files]) + self.results.xvg[component] = ( + np.array(lambdas), + [ + XVG(xvg, permissive=self.permissive, stride=self.stride) + for xvg in xvg_files + ], + ) if autosave: self.save() @@ -852,23 +947,30 @@ def compress_dgdl_xvg(self): for component, lambdas in self.lambdas.items(): xvg_files = [self.dgdl_xvg(self.wdir(component, l)) for l in lambdas] for xvg in xvg_files: - root,ext = os.path.splitext(xvg) - if ext == os.path.extsep+"xvg": + root, ext = os.path.splitext(xvg) + if ext == os.path.extsep + "xvg": fnbz2 = xvg + os.path.extsep + "bz2" - logger.info("[%s] Compressing dgdl file %r with bzip2", self.dirname, xvg) + logger.info( + "[%s] Compressing dgdl file %r with bzip2", self.dirname, xvg + ) # speed is similar to 'bzip2 -9 FILE' (using a 1 Mio buffer) # (Since GW 0.8, openany() does not take kwargs anymore so the write buffer cannot be # set anymore (buffering=1048576) so the performance might be lower in MDPOW >= 0.7.0) - with open(xvg, 'rb', buffering=1048576) as source: - with openany(fnbz2, 'wb') as target: + with open(xvg, "rb", buffering=1048576) as source: + with openany(fnbz2, "wb") as target: target.writelines(source) if os.path.exists(fnbz2) and os.path.exists(xvg): os.unlink(xvg) if not os.path.exists(fnbz2): - logger.error("[%s] Failed to compress %r --- mysterious!", self.dirname, fnbz2) + logger.error( + "[%s] Failed to compress %r --- mysterious!", + self.dirname, + fnbz2, + ) else: - logger.info("[%s] Compression complete: %r", self.dirname, fnbz2) - + logger.info( + "[%s] Compression complete: %r", self.dirname, fnbz2 + ) def contains_corrupted_xvgs(self): """Check if any of the source datafiles had reported corrupted lines. @@ -880,18 +982,22 @@ def contains_corrupted_xvgs(self): :attr:`Gsolv._corrupted` as dicts of dicts with the component as primary and the lambda as secondary key. """ + def _lencorrupted(xvg): try: return len(xvg.corrupted_lineno) except AttributeError: # backwards compatible (pre gw 0.1.10 are always ok) return 0 - except TypeError: # len(None): XVG.parse() has not been run yet - return 0 # ... so we cannot conclude that it does contain bad ones + except TypeError: # len(None): XVG.parse() has not been run yet + return 0 # ... so we cannot conclude that it does contain bad ones + corrupted = {} - self._corrupted = {} # debugging ... + self._corrupted = {} # debugging ... for component, (lambdas, xvgs) in self.results.xvg.items(): corrupted[component] = np.any([(_lencorrupted(xvg) > 0) for xvg in xvgs]) - self._corrupted[component] = dict(((l, _lencorrupted(xvg)) for l,xvg in zip(lambdas, xvgs))) + self._corrupted[component] = dict( + ((l, _lencorrupted(xvg)) for l, xvg in zip(lambdas, xvgs)) + ) return np.any([x for x in corrupted.values()]) def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000): @@ -977,7 +1083,7 @@ def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000): .. _p526: http://books.google.co.uk/books?id=XmyO2oRUg0cC&pg=PA526 """ stride = stride or self.stride - logger.info("Analysis stride is %s.",stride) + logger.info("Analysis stride is %s.", stride) if force or not self.has_dVdl(): try: @@ -993,36 +1099,52 @@ def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000): logger.info("Analyzing stored data.") # total free energy difference at const P (all simulations are done in NPT) - GibbsFreeEnergy = QuantityWithError(0,0) + GibbsFreeEnergy = QuantityWithError(0, 0) for component, (lambdas, xvgs) in self.results.xvg.items(): - logger.info("[%s %s] Computing averages and errors for %d lambda values.", - self.molecule, component, len(lambdas)) + logger.info( + "[%s %s] Computing averages and errors for %d lambda values.", + self.molecule, + component, + len(lambdas), + ) # for TI just get the average dv/dl value (in array column 1; col 0 is the time) # (This can take a while if the XVG is now reading the array from disk first time) # Use XVG class properties: first data in column 0! Y = np.array([x.mean[0] for x in xvgs]) - stdY = np.array([x.std[0] for x in xvgs]) + stdY = np.array([x.std[0] for x in xvgs]) # compute auto correlation time and error estimate for independent samples # (this can take a while). x.array[0] == time, x.array[1] == dHdl # nstep is calculated to give ncorrel samples (or all samples if less than ncorrel are # available) - tc_data = [numkit.timeseries.tcorrel(x.array[0], x.array[1], - nstep=int(np.ceil(len(x.array[0])/float(ncorrel)))) - for x in xvgs] - DY = np.array([tc['sigma'] for tc in tc_data]) - tc = np.array([tc['tc'] for tc in tc_data]) - - self.results.dvdl[component] = {'lambdas':lambdas, 'mean':Y, 'error':DY, - 'stddev':stdY, 'tcorrel':tc} + tc_data = [ + numkit.timeseries.tcorrel( + x.array[0], + x.array[1], + nstep=int(np.ceil(len(x.array[0]) / float(ncorrel))), + ) + for x in xvgs + ] + DY = np.array([tc["sigma"] for tc in tc_data]) + tc = np.array([tc["tc"] for tc in tc_data]) + + self.results.dvdl[component] = { + "lambdas": lambdas, + "mean": Y, + "error": DY, + "stddev": stdY, + "tcorrel": tc, + } # Combined Simpson rule integration: # even="last" because dV/dl is smoother at the beginning so using trapezoidal # integration there makes less of an error (one hopes...) - a = scipy.integrate.simps(Y, x=lambdas, even='last') - da = numkit.integration.simps_error(DY, x=lambdas, even='last') + a = scipy.integrate.simps(Y, x=lambdas, even="last") + da = numkit.integration.simps_error(DY, x=lambdas, even="last") self.results.DeltaA[component] = QuantityWithError(a, da) - GibbsFreeEnergy += self.results.DeltaA[component] # error propagation is automagic! + GibbsFreeEnergy += self.results.DeltaA[ + component + ] # error propagation is automagic! # hydration free energy Delta A = -(Delta A_coul + Delta A_vdw) GibbsFreeEnergy *= -1 @@ -1034,20 +1156,24 @@ def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000): self.logger_DeltaA0() return self.results.DeltaA.Gibbs - def collect_alchemlyb(self, SI=True, start=0, stop=None, stride=None, autosave=True, autocompress=True): + def collect_alchemlyb( + self, SI=True, start=0, stop=None, stride=None, autosave=True, autocompress=True + ): """Collect the data files using alchemlyb. Unlike :meth:`collect`, this method can subsample with the statistical inefficiency (parameter `SI`). """ - extract = self.estimators[self.method]['extract'] + extract = self.estimators[self.method]["extract"] if autocompress: # must be done before adding to results.xvg or we will not find the file later self.compress_dgdl_xvg() - logger.info("[%(dirname)s] Finding dgdl xvg files, reading with " - "stride=%(stride)d permissive=%(permissive)r." % vars(self)) + logger.info( + "[%(dirname)s] Finding dgdl xvg files, reading with " + "stride=%(stride)d permissive=%(permissive)r." % vars(self) + ) for component, lambdas in self.lambdas.items(): val = [] for l in lambdas: @@ -1055,21 +1181,34 @@ def collect_alchemlyb(self, SI=True, start=0, stop=None, stride=None, autosave=T xvg_df = extract(xvg_file, T=self.Temperature).iloc[start:stop:stride] full_len = len(xvg_df) if SI: - logger.info("Performing statistical inefficiency analysis for window %s %04d" % (component, 1000 * l)) + logger.info( + "Performing statistical inefficiency analysis for window %s %04d" + % (component, 1000 * l) + ) ts = _extract_dataframe(xvg_file).iloc[start:stop:stride] - ts = pd.DataFrame({'time': ts.iloc[:,0], 'dhdl': ts.iloc[:,1]}) - ts = ts.set_index('time') + ts = pd.DataFrame({"time": ts.iloc[:, 0], "dhdl": ts.iloc[:, 1]}) + ts = ts.set_index("time") # use the statistical_inefficiency function to subsample the data xvg_df = statistical_inefficiency(xvg_df, ts, conservative=True) - logger.info("The statistical inefficiency value is {:.4f}.".format(full_len/len(xvg_df)/2)) - logger.info("The data are subsampled every {:d} frames.".format(int(np.ceil(full_len/len(xvg_df)/2)))) + logger.info( + "The statistical inefficiency value is {:.4f}.".format( + full_len / len(xvg_df) / 2 + ) + ) + logger.info( + "The data are subsampled every {:d} frames.".format( + int(np.ceil(full_len / len(xvg_df) / 2)) + ) + ) val.append(xvg_df) self.results.xvg[component] = (np.array(lambdas), pd.concat(val)) if autosave: self.save() - def analyze_alchemlyb(self, SI=True, start=0, stop=None, stride=None, force=False, autosave=True): + def analyze_alchemlyb( + self, SI=True, start=0, stop=None, stride=None, force=False, autosave=True + ): """Compute the free energy from the simulation data with alchemlyb. Unlike :meth:`analyze`, the MBAR estimator is available (in @@ -1081,12 +1220,12 @@ def analyze_alchemlyb(self, SI=True, start=0, stop=None, stride=None, force=Fals start = start or self.start stop = stop or self.stop - logger.info("Analysis stride is %s.",stride) - logger.info("Analysis starts from frame %s.",start) + logger.info("Analysis stride is %s.", stride) + logger.info("Analysis starts from frame %s.", start) logger.info("Analysis stops at frame %s.", stop) - if self.method in ['TI', 'BAR', 'MBAR']: - estimator = self.estimators[self.method]['estimator'] + if self.method in ["TI", "BAR", "MBAR"]: + estimator = self.estimators[self.method]["estimator"] else: errmsg = "The method is not supported." logger.error(errmsg) @@ -1106,13 +1245,13 @@ def analyze_alchemlyb(self, SI=True, start=0, stop=None, stride=None, force=Fals logger.info("Analyzing stored data.") # total free energy difference at const P (all simulations are done in NPT) - GibbsFreeEnergy = QuantityWithError(0,0) + GibbsFreeEnergy = QuantityWithError(0, 0) for component, (lambdas, xvgs) in self.results.xvg.items(): result = estimator().fit(xvgs) - if self.method == 'BAR': - DeltaA = QuantityWithError(0,0) - a_s= np.diagonal(result.delta_f_, offset=1) + if self.method == "BAR": + DeltaA = QuantityWithError(0, 0) + a_s = np.diagonal(result.delta_f_, offset=1) da_s = np.diagonal(result.d_delta_f_, offset=1) for a, da in zip(a_s, da_s): DeltaA += QuantityWithError(a, da) @@ -1120,8 +1259,12 @@ def analyze_alchemlyb(self, SI=True, start=0, stop=None, stride=None, force=Fals else: a = result.delta_f_.loc[0.00, 1.00] da = result.d_delta_f_.loc[0.00, 1.00] - self.results.DeltaA[component] = kBT_to_kJ(QuantityWithError(a, da), self.Temperature) - GibbsFreeEnergy += self.results.DeltaA[component] # error propagation is automagic! + self.results.DeltaA[component] = kBT_to_kJ( + QuantityWithError(a, da), self.Temperature + ) + GibbsFreeEnergy += self.results.DeltaA[ + component + ] # error propagation is automagic! # hydration free energy Delta A = -(Delta A_coul + Delta A_vdw) GibbsFreeEnergy *= -1 @@ -1136,7 +1279,7 @@ def analyze_alchemlyb(self, SI=True, start=0, stop=None, stride=None, force=Fals if autosave: self.save() - def write_DeltaA0(self, filename, mode='w'): + def write_DeltaA0(self, filename, mode="w"): """Write free energy components to a file. :Arguments: @@ -1150,7 +1293,7 @@ def write_DeltaA0(self, filename, mode='w'): molecule solvent total coulomb vdw """ with open(filename, mode) as tab: - tab.write(self.summary() + '\n') + tab.write(self.summary() + "\n") def summary(self): """Return a string that summarizes the energetics. @@ -1165,21 +1308,29 @@ def summary(self): """ fmt = "%-10s %-14s %+8.2f %8.2f %+8.2f %8.2f %+8.2f %8.2f" d = self.results.DeltaA - return fmt % ((self.molecule, self.solvent_type) + \ - d.Gibbs.astuple() + d.coulomb.astuple() + \ - d.vdw.astuple()) + return fmt % ( + (self.molecule, self.solvent_type) + + d.Gibbs.astuple() + + d.coulomb.astuple() + + d.vdw.astuple() + ) def logger_DeltaA0(self): """Print the free energy contributions (errors in parentheses).""" - if not 'DeltaA' in self.results or len(self.results.DeltaA) == 0: + if not "DeltaA" in self.results or len(self.results.DeltaA) == 0: logger.info("No DeltaA free energies computed yet.") return logger.info("DeltaG0 = -(DeltaG_coul + DeltaG_vdw)") for component, energy in self.results.DeltaA.items(): - logger.info("[%s] %s solvation free energy (%s) %g (%.2f) kJ/mol", - self.molecule, self.solvent_type.capitalize(), component, - energy.value, energy.error) + logger.info( + "[%s] %s solvation free energy (%s) %g (%.2f) kJ/mol", + self.molecule, + self.solvent_type.capitalize(), + component, + energy.value, + energy.error, + ) def has_dVdl(self): """Check if dV/dl data have already been collected. @@ -1192,7 +1343,9 @@ def has_dVdl(self): return False except AttributeError: return False - return np.all(np.array([len(xvgs) for (lambdas,xvgs) in self.results.xvg.values()]) > 0) + return np.all( + np.array([len(xvgs) for (lambdas, xvgs) in self.results.xvg.values()]) > 0 + ) def plot(self, **kwargs): """Plot the TI data with error bars. @@ -1206,9 +1359,9 @@ def plot(self, **kwargs): import matplotlib import matplotlib.pyplot as plt - kwargs.setdefault('color', 'black') - kwargs.setdefault('capsize', 0) - kwargs.setdefault('elinewidth', 2) + kwargs.setdefault("color", "black") + kwargs.setdefault("capsize", 0) + kwargs.setdefault("elinewidth", 2) try: if self.results.DeltaA.Gibbs is None or len(self.results.dvdl) == 0: @@ -1221,20 +1374,32 @@ def plot(self, **kwargs): nplots = len(dvdl) fig, axs = plt.subplots(nrows=1, ncols=nplots) for i, component in enumerate(np.sort(dvdl.keys())): # stable plot order - x,y,dy = [dvdl[component][k] for k in ('lambdas', 'mean', 'error')] + x, y, dy = [dvdl[component][k] for k in ("lambdas", "mean", "error")] iplot = i ax = axs[i] energy = self.results.DeltaA[component] - label = r"$\Delta A^{\rm{%s}}_{\rm{%s}} = %.2f\pm%.2f$ kJ/mol" \ - % (component, self.solvent_type, energy.value, energy.error) + label = r"$\Delta A^{\rm{%s}}_{\rm{%s}} = %.2f\pm%.2f$ kJ/mol" % ( + component, + self.solvent_type, + energy.value, + energy.error, + ) ax.errorbar(x, y, yerr=dy, label=label, **kwargs) - ax.set_xlabel(r'$\lambda$') - ax.legend(loc='best') + ax.set_xlabel(r"$\lambda$") + ax.legend(loc="best") ax.set_xlim(-0.05, 1.05) - axs[0].set_ylabel(r'$dV/d\lambda$ in kJ/mol') - fig.suptitle(r"Free energy difference $\Delta A^{0}_{\rm{%s}}$ for %s: $%.2f\pm%.2f$ kJ/mol" % - ((self.solvent_type, self.molecule,) + self.results.DeltaA.Gibbs.astuple())) - fig.savefig('DeltaA.png') + axs[0].set_ylabel(r"$dV/d\lambda$ in kJ/mol") + fig.suptitle( + r"Free energy difference $\Delta A^{0}_{\rm{%s}}$ for %s: $%.2f\pm%.2f$ kJ/mol" + % ( + ( + self.solvent_type, + self.molecule, + ) + + self.results.DeltaA.Gibbs.astuple() + ) + ) + fig.savefig("DeltaA.png") plt.close() return fig @@ -1259,8 +1424,10 @@ def choose_script_from(scripts): with in_dir(self.dirname, create=False): for component, scripts in self.scripts.items(): - s = relpath(choose_script_from(scripts), self.dirname) # relative to dirname - cmd = ['qsub', s] + s = relpath( + choose_script_from(scripts), self.dirname + ) # relative to dirname + cmd = ["qsub", s] logger.debug("[%s] submitting locally: %s", " ".join(cmd), component) rc = call(cmd) if rc != 0: @@ -1268,7 +1435,9 @@ def choose_script_from(scripts): logger.error(errmsg) raise OSError(errmsg) - logger.info("[%r] Submitted jobs locally for %r", self.dirname, self.scripts.keys()) + logger.info( + "[%r] Submitted jobs locally for %r", self.dirname, self.scripts.keys() + ) def __repr__(self): return "%s(filename=%r)" % (self.__class__.__name__, self.filename) @@ -1276,6 +1445,7 @@ def __repr__(self): class Ghyd(Gsolv): """Sets up and analyses MD to obtain the hydration free energy of a solute.""" + solvent_default = "water" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default) @@ -1292,28 +1462,50 @@ class Goct(Gsolv): part of the dV/dl curve is quite sensitive. By adding two additional points we hope to reduce the overall error on the dis-charging free energy. """ + solvent_default = "octanol" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default) - schedules = {'coulomb': - FEPschedule(name='coulomb', - description="dis-charging vdw+q --> vdw", - label='Coul', - couple_lambda0='vdw-q', couple_lambda1='vdw', - sc_alpha=0, # linear scaling for coulomb - #lambdas=[0, 0.25, 0.5, 0.75, 1.0], # default - lambdas=[0, 0.125, 0.25, 0.375, 0.5, 0.75, 1.0], # +0.125, 0.375 enhanced - ), - 'vdw': - FEPschedule(name='vdw', - description="decoupling vdw --> none", - label='VDW', - couple_lambda0='vdw', couple_lambda1='none', - sc_alpha=0.5, sc_power=1, sc_sigma=0.3, # recommended values - lambdas=[0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, # defaults - 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1], - ), - } + schedules = { + "coulomb": FEPschedule( + name="coulomb", + description="dis-charging vdw+q --> vdw", + label="Coul", + couple_lambda0="vdw-q", + couple_lambda1="vdw", + sc_alpha=0, # linear scaling for coulomb + # lambdas=[0, 0.25, 0.5, 0.75, 1.0], # default + lambdas=[0, 0.125, 0.25, 0.375, 0.5, 0.75, 1.0], # +0.125, 0.375 enhanced + ), + "vdw": FEPschedule( + name="vdw", + description="decoupling vdw --> none", + label="VDW", + couple_lambda0="vdw", + couple_lambda1="none", + sc_alpha=0.5, + sc_power=1, + sc_sigma=0.3, # recommended values + lambdas=[ + 0.0, + 0.05, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, # defaults + 0.65, + 0.7, + 0.75, + 0.8, + 0.85, + 0.9, + 0.95, + 1, + ], + ), + } class Gwoct(Goct): @@ -1323,13 +1515,14 @@ class Gwoct(Goct): part of the dV/dl curve is quite sensitive. By adding two additional points we hope to reduce the overall error on the dis-charging free energy. """ + solvent_default = "wetoctanol" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default) class Gtol(Gsolv): - """Sets up and analyses MD to obtain the solvation free energy of a solute in toluene. - """ + """Sets up and analyses MD to obtain the solvation free energy of a solute in toluene.""" + solvent_default = "toluene" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default) @@ -1388,10 +1581,13 @@ def p_transfer(G1, G2, **kwargs): """ - kwargs.setdefault('force', False) - estimator = kwargs.pop('estimator', 'alchemlyb') - if not estimator in ('mdpow', 'alchemlyb'): - errmsg = "estimator = %r is not supported, must be 'mdpow' or 'alchemlyb'" % estimator + kwargs.setdefault("force", False) + estimator = kwargs.pop("estimator", "alchemlyb") + if not estimator in ("mdpow", "alchemlyb"): + errmsg = ( + "estimator = %r is not supported, must be 'mdpow' or 'alchemlyb'" + % estimator + ) logger.error(errmsg) raise ValueError(errmsg) @@ -1400,40 +1596,49 @@ def p_transfer(G1, G2, **kwargs): if G1.Temperature != G2.Temperature: raise ValueError("The two simulations were done at different temperatures.") - logger.info("[%s] transfer free energy %s --> %s calculation", - G1.molecule, G1.solvent_type, G2.solvent_type) + logger.info( + "[%s] transfer free energy %s --> %s calculation", + G1.molecule, + G1.solvent_type, + G2.solvent_type, + ) for G in (G1, G2): G_kwargs = kwargs.copy() # for fep files generated with old code which doesn't have these attributes - if not hasattr(G, 'start'): - G.start = G_kwargs.pop('start', 0) - if not hasattr(G, 'stop'): - G.stop = G_kwargs.pop('stop', None) - if not hasattr(G, 'SI'): - G_kwargs.setdefault('SI', True) + if not hasattr(G, "start"): + G.start = G_kwargs.pop("start", 0) + if not hasattr(G, "stop"): + G.stop = G_kwargs.pop("stop", None) + if not hasattr(G, "SI"): + G_kwargs.setdefault("SI", True) else: - G_kwargs.setdefault('SI', G.SI) + G_kwargs.setdefault("SI", G.SI) # for this version. use the method given instead of the one in the input cfg file - G.method = G_kwargs.pop('method', 'MBAR') - if estimator == 'mdpow': + G.method = G_kwargs.pop("method", "MBAR") + if estimator == "mdpow": if G.method != "TI": - errmsg = "Method %s is not implemented in MDPOW, use estimator='alchemlyb'" % G.method + errmsg = ( + "Method %s is not implemented in MDPOW, use estimator='alchemlyb'" + % G.method + ) logger.error(errmsg) raise ValueError(errmsg) logger.info("The solvent is %s .", G.solvent_type) - if kwargs['force'] or 'Gibbs' not in G.results.DeltaA: + if kwargs["force"] or "Gibbs" not in G.results.DeltaA: # write out the settings when the analysis is performed logger.info("Estimator is %s.", estimator) logger.info("Free energy calculation method is %s.", G.method) - if estimator == 'mdpow': - G_kwargs.pop('SI', None) # G.analyze() does not know SI + if estimator == "mdpow": + G_kwargs.pop("SI", None) # G.analyze() does not know SI G.analyze(**G_kwargs) - elif estimator == 'alchemlyb': - logger.info("Statistical inefficiency analysis will %s be performed." % - ("" if G_kwargs['SI'] else "not")) + elif estimator == "alchemlyb": + logger.info( + "Statistical inefficiency analysis will %s be performed." + % ("" if G_kwargs["SI"] else "not") + ) G.analyze_alchemlyb(**G_kwargs) else: logger.info("Using already calculated free energy DeltaA") @@ -1451,18 +1656,25 @@ def p_transfer(G1, G2, **kwargs): # water -> cyclohexane: P_cw # water -> toluene: P_tw coefficient = "P_{0}{1}".format( - G2.solvent_type.lower()[0], G1.solvent_type.lower()[0]) + G2.solvent_type.lower()[0], G1.solvent_type.lower()[0] + ) logger.info("[%s] Values at T = %g K", molecule, G1.Temperature) - logger.info("[%s] Free energy of transfer %s --> %s: %.3f (%.3f) kJ/mol", - molecule, - G1.solvent_type, G2.solvent_type, - transferFE.value, transferFE.error) - logger.info("[%s] log %s: %.3f (%.3f)", - molecule, coefficient, logPow.value, logPow.error) + logger.info( + "[%s] Free energy of transfer %s --> %s: %.3f (%.3f) kJ/mol", + molecule, + G1.solvent_type, + G2.solvent_type, + transferFE.value, + transferFE.error, + ) + logger.info( + "[%s] log %s: %.3f (%.3f)", molecule, coefficient, logPow.value, logPow.error + ) return transferFE, logPow + def pOW(G1, G2, **kwargs): """Compute water-octanol partition coefficient from two :class:`Gsolv` objects. @@ -1503,11 +1715,13 @@ def pOW(G1, G2, **kwargs): args = (G2, G1) else: msg = "For pOW need water and octanol simulations but instead got {0} and {1}".format( - G1.solvent_type, G2.solvent_type) + G1.solvent_type, G2.solvent_type + ) logger.error(msg) raise ValueError(msg) return p_transfer(*args, **kwargs) + def pCW(G1, G2, **kwargs): """Compute water-cyclohexane partition coefficient from two :class:`Gsolv` objects. @@ -1548,11 +1762,13 @@ def pCW(G1, G2, **kwargs): args = (G2, G1) else: msg = "For pCW need water and cyclohexane simulations but instead got {0} and {1}".format( - G1.solvent_type, G2.solvent_type) + G1.solvent_type, G2.solvent_type + ) logger.error(msg) raise ValueError(msg) return p_transfer(*args, **kwargs) + def pTW(G1, G2, **kwargs): """Compute water-toluene partition coefficient from two :class:`Gsolv` objects. @@ -1593,7 +1809,8 @@ def pTW(G1, G2, **kwargs): args = (G2, G1) else: msg = "For pTW need water and toluene simulations but instead got {0} and {1}".format( - G1.solvent_type, G2.solvent_type) + G1.solvent_type, G2.solvent_type + ) logger.error(msg) raise ValueError(msg) - return p_transfer(*args, **kwargs) \ No newline at end of file + return p_transfer(*args, **kwargs) diff --git a/mdpow/filelock.py b/mdpow/filelock.py index f01dc0fc..948b2a96 100644 --- a/mdpow/filelock.py +++ b/mdpow/filelock.py @@ -30,18 +30,20 @@ import time import errno + class FileLockException(Exception): pass + class FileLock(object): - """ A file locking mechanism that has context-manager support so - you can use it in a with statement. This should be relatively cross - compatible as it doesn't rely on msvcrt or fcntl for the locking. + """A file locking mechanism that has context-manager support so + you can use it in a with statement. This should be relatively cross + compatible as it doesn't rely on msvcrt or fcntl for the locking. """ - def __init__(self, file_name, timeout=10, delay=.05): - """ Prepare the file locker. Specify the file to lock and optionally - the maximum timeout and the delay between each attempt to lock. + def __init__(self, file_name, timeout=10, delay=0.05): + """Prepare the file locker. Specify the file to lock and optionally + the maximum timeout and the delay between each attempt to lock. """ self.is_locked = False self.lockfile = os.path.join(os.getcwd(), "%s.lock" % file_name) @@ -49,17 +51,16 @@ def __init__(self, file_name, timeout=10, delay=.05): self.timeout = timeout self.delay = delay - def acquire(self): - """ Acquire the lock, if possible. If the lock is in use, it check again - every `wait` seconds. It does this until it either gets the lock or - exceeds `timeout` number of seconds, in which case it throws - an exception. + """Acquire the lock, if possible. If the lock is in use, it check again + every `wait` seconds. It does this until it either gets the lock or + exceeds `timeout` number of seconds, in which case it throws + an exception. """ start_time = time.time() while True: try: - self.fd = os.open(self.lockfile, os.O_CREAT|os.O_EXCL|os.O_RDWR) + self.fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) break except OSError as e: if e.errno != errno.EEXIST: @@ -69,37 +70,33 @@ def acquire(self): time.sleep(self.delay) self.is_locked = True - def release(self): - """ Get rid of the lock by deleting the lockfile. - When working in a `with` statement, this gets automatically - called at the end. + """Get rid of the lock by deleting the lockfile. + When working in a `with` statement, this gets automatically + called at the end. """ if self.is_locked: os.close(self.fd) os.unlink(self.lockfile) self.is_locked = False - def __enter__(self): - """ Activated when used in the with statement. - Should automatically acquire a lock to be used in the with block. + """Activated when used in the with statement. + Should automatically acquire a lock to be used in the with block. """ if not self.is_locked: self.acquire() return self - def __exit__(self, type, value, traceback): - """ Activated at the end of the with statement. - It automatically releases the lock if it isn't locked. + """Activated at the end of the with statement. + It automatically releases the lock if it isn't locked. """ if self.is_locked: self.release() - def __del__(self): - """ Make sure that the FileLock instance doesn't leave a lockfile - lying around. + """Make sure that the FileLock instance doesn't leave a lockfile + lying around. """ self.release() diff --git a/mdpow/forcefields.py b/mdpow/forcefields.py index 7ad9ebe3..348944da 100644 --- a/mdpow/forcefields.py +++ b/mdpow/forcefields.py @@ -58,6 +58,7 @@ from collections import defaultdict import logging + logger = logging.getLogger("mdpow.forecefields") #: Default force field. At the moment, OPLS-AA, CHARMM/CGENFF, and AMBER/GAFF @@ -65,13 +66,13 @@ #: default here as this behavior is not tested. DEFAULT_FORCEFIELD = "OPLS-AA" -#------------------------------------------------------------ +# ------------------------------------------------------------ # Gromacs water models -#------------------------------------------------------------ +# ------------------------------------------------------------ #: See the file ``top/oplsaa.ff/watermodels.dat`` for a description of #: available water models that are bundled with MDPOW. -GMX_WATERMODELS_DAT=""" +GMX_WATERMODELS_DAT = """ tip4p TIP4P TIP 4-point, recommended tip3p TIP3P TIP 3-point tip5p TIP5P TIP 5-point @@ -82,14 +83,25 @@ tip4pew TIP4PEW TIP 4-point modified for use with Ewald techniques (TIP4PEW) """ + class GromacsSolventModel(object): """Data for a solvent model in Gromacs.""" - def __init__(self, identifier, name=None, itp=None, coordinates=None, - description=None, forcefield="OPLS-AA"): + + def __init__( + self, + identifier, + name=None, + itp=None, + coordinates=None, + description=None, + forcefield="OPLS-AA", + ): self.identifier = identifier self.name = name if name is not None else str(identifier).upper() - self.itp = itp if itp is not None else self.guess_filename('itp') - self.coordinates = coordinates if coordinates is not None else self.guess_filename('gro') + self.itp = itp if itp is not None else self.guess_filename("itp") + self.coordinates = ( + coordinates if coordinates is not None else self.guess_filename("gro") + ) self.description = description self.forcefield = forcefield @@ -98,35 +110,44 @@ def guess_filename(self, extension): return self.identifier.lower() + os.extsep + str(extension) def __repr__(self): - return "<{0[name]} water: identifier={0[identifier]}, ff={0[forcefield]}>".format(vars(self)) + return ( + "<{0[name]} water: identifier={0[identifier]}, ff={0[forcefield]}>".format( + vars(self) + ) + ) + #: For some water models we cannot derive the filename for the equilibrated box #: so we supply them explicitly. SPECIAL_WATER_COORDINATE_FILES = defaultdict( lambda: None, - spc='spc216.gro', - spce='spc216.gro', - tip3p='spc216.gro', - m24='spc216.gro', - tip4pd='tip4p.gro', - tip4pew='tip4p.gro', + spc="spc216.gro", + spce="spc216.gro", + tip3p="spc216.gro", + m24="spc216.gro", + tip4pd="tip4p.gro", + tip4pew="tip4p.gro", ) + def _create_water_models(watermodelsdat): models = {} - for line in GMX_WATERMODELS_DAT.split('\n'): + for line in GMX_WATERMODELS_DAT.split("\n"): line = line.strip() - if not line or line.startswith('#'): + if not line or line.startswith("#"): continue fields = line.split() identifier, name = fields[:2] description = " ".join(fields[2:]) models[identifier] = GromacsSolventModel( - identifier, name=name, + identifier, + name=name, coordinates=SPECIAL_WATER_COORDINATE_FILES[identifier], - description=description) + description=description, + ) return models + #: Use the default water model unless another water model is chosen in the #: :ref:`run input file ` file by setting the #: ``setup.watermodel`` parameter. @@ -145,6 +166,7 @@ def _create_water_models(watermodelsdat): #: For OPLS-AA the following ones are available. GROMACS_WATER_MODELS = _create_water_models(GMX_WATERMODELS_DAT) + def get_water_model(watermodel=DEFAULT_WATER_MODEL): """Return a :class:`GromacsSolventModel` corresponding to identifier *watermodel*""" @@ -152,63 +174,84 @@ def get_water_model(watermodel=DEFAULT_WATER_MODEL): return GROMACS_WATER_MODELS[watermodel] except KeyError: msg = "{0} is not a valid water model: choose one from {1}".format( - watermodel, ", ".join(GROMACS_WATER_MODELS.keys())) + watermodel, ", ".join(GROMACS_WATER_MODELS.keys()) + ) logger.error(msg) raise ValueError(msg) + #: Other solvents (not water, see :data:`GROMACS_WATER_MODELS` for those). -new_octanol = '''Zangi R (2018) Refinement of the OPLSAA force-field +new_octanol = """Zangi R (2018) Refinement of the OPLSAA force-field for liquid alcohols.; ACS Omega 3(12):18089-18099. - doi: 10.1021/acsomega.8b03132''' + doi: 10.1021/acsomega.8b03132""" OPLS_SOLVENT_MODELS = { - 'octanol': GromacsSolventModel( - identifier="octanol", itp="1oct.itp", coordinates="1oct.gro"), - 'octanolnew': GromacsSolventModel( - identifier="octanol", itp="1octnew.itp", coordinates="1oct.gro", - description=new_octanol), - 'cyclohexane': GromacsSolventModel( - identifier="cyclohexane", itp="1cyclo.itp", coordinates="1cyclo.gro"), - 'wetoctanol': GromacsSolventModel( - identifier="wetoctanol", itp="1octwet.itp", coordinates="1octwet.gro"), - 'wetoctanolnew': GromacsSolventModel( - identifier="wetoctanol", itp="1octwetnew.itp", coordinates="1octwet.gro", - description=new_octanol), - 'toluene': GromacsSolventModel( - identifier="toluene", itp="1tol.itp", coordinates="1tol_oplsaa.gro"), - } + "octanol": GromacsSolventModel( + identifier="octanol", itp="1oct.itp", coordinates="1oct.gro" + ), + "octanolnew": GromacsSolventModel( + identifier="octanol", + itp="1octnew.itp", + coordinates="1oct.gro", + description=new_octanol, + ), + "cyclohexane": GromacsSolventModel( + identifier="cyclohexane", itp="1cyclo.itp", coordinates="1cyclo.gro" + ), + "wetoctanol": GromacsSolventModel( + identifier="wetoctanol", itp="1octwet.itp", coordinates="1octwet.gro" + ), + "wetoctanolnew": GromacsSolventModel( + identifier="wetoctanol", + itp="1octwetnew.itp", + coordinates="1octwet.gro", + description=new_octanol, + ), + "toluene": GromacsSolventModel( + identifier="toluene", itp="1tol.itp", coordinates="1tol_oplsaa.gro" + ), +} CHARMM_SOLVENT_MODELS = { - 'octanol': GromacsSolventModel( - identifier="octanol", itp="1oct.itp", coordinates="1oct_charmm.gro"), - 'wetoctanol': GromacsSolventModel( - identifier="wetoctanol", itp="1octwet.itp", coordinates="1octwet_charmm.gro"), - 'cyclohexane': GromacsSolventModel( - identifier="cyclohexane", itp="1cyclo.itp", coordinates="1cyclo_charmm.gro"), - 'toluene': GromacsSolventModel( - identifier="toluene", itp="1tol.itp", coordinates="1tol_charmm.gro"), - } + "octanol": GromacsSolventModel( + identifier="octanol", itp="1oct.itp", coordinates="1oct_charmm.gro" + ), + "wetoctanol": GromacsSolventModel( + identifier="wetoctanol", itp="1octwet.itp", coordinates="1octwet_charmm.gro" + ), + "cyclohexane": GromacsSolventModel( + identifier="cyclohexane", itp="1cyclo.itp", coordinates="1cyclo_charmm.gro" + ), + "toluene": GromacsSolventModel( + identifier="toluene", itp="1tol.itp", coordinates="1tol_charmm.gro" + ), +} AMBER_SOLVENT_MODELS = { - 'octanol': GromacsSolventModel( - identifier="octanol", itp="1oct.itp", coordinates="1oct_amber.gro"), - 'wetoctanol': GromacsSolventModel( - identifier="wetoctanol", itp="1octwet.itp", coordinates="1octwet_amber.gro"), - 'cyclohexane': GromacsSolventModel( - identifier="cyclohexane", itp="1cyclo.itp", coordinates="1cyclo_amber.gro"), - 'toluene': GromacsSolventModel( - identifier="toluene", itp="1tol.itp", coordinates="1tol_amber.gro"), - } + "octanol": GromacsSolventModel( + identifier="octanol", itp="1oct.itp", coordinates="1oct_amber.gro" + ), + "wetoctanol": GromacsSolventModel( + identifier="wetoctanol", itp="1octwet.itp", coordinates="1octwet_amber.gro" + ), + "cyclohexane": GromacsSolventModel( + identifier="cyclohexane", itp="1cyclo.itp", coordinates="1cyclo_amber.gro" + ), + "toluene": GromacsSolventModel( + identifier="toluene", itp="1tol.itp", coordinates="1tol_amber.gro" + ), +} #: Solvents available in GROMACS; the keys of the dictionary #: are the forcefields. GROMACS_SOLVENT_MODELS = { - 'OPLS-AA': OPLS_SOLVENT_MODELS, - 'CHARMM': CHARMM_SOLVENT_MODELS, - 'AMBER': AMBER_SOLVENT_MODELS, - } + "OPLS-AA": OPLS_SOLVENT_MODELS, + "CHARMM": CHARMM_SOLVENT_MODELS, + "AMBER": AMBER_SOLVENT_MODELS, +} -def get_solvent_identifier(solvent_type, model=None, forcefield='OPLS-AA'): + +def get_solvent_identifier(solvent_type, model=None, forcefield="OPLS-AA"): """Get the identifier for a solvent model. The identifier is needed to access a water model (i.e., a @@ -232,9 +275,9 @@ def get_solvent_identifier(solvent_type, model=None, forcefield='OPLS-AA'): :Returns: Either an identifier or ``None`` """ - + if solvent_type == "water": - identifier = model if not model in (None, 'water') else DEFAULT_WATER_MODEL + identifier = model if not model in (None, "water") else DEFAULT_WATER_MODEL return identifier if identifier in GROMACS_WATER_MODELS else None if model not in GROMACS_SOLVENT_MODELS[forcefield]: if solvent_type in GROMACS_SOLVENT_MODELS[forcefield]: @@ -244,7 +287,7 @@ def get_solvent_identifier(solvent_type, model=None, forcefield='OPLS-AA'): return model -def get_solvent_model(identifier, forcefield='OPLS-AA'): +def get_solvent_model(identifier, forcefield="OPLS-AA"): """Return a :class:`GromacsSolventModel` corresponding to identifier *identifier*. If identifier is "water" then the :data:`DEFAULT_WATER_MODEL` is assumed. @@ -263,19 +306,20 @@ def get_solvent_model(identifier, forcefield='OPLS-AA'): raise ValueError(msg) -def get_ff_paths(forcefield='OPLS-AA'): +def get_ff_paths(forcefield="OPLS-AA"): """Return a :list: containing the forcefield directory, paths of ions and default watermodel itp files. """ - + settings = { - 'OPLS-AA': ['oplsaa.ff/', 'oplsaa.ff/ions_opls.itp', - 'oplsaa.ff/tip4p.itp'], - 'AMBER': ['amber99sb.ff/', 'amber99sb.ff/ions.itp', - 'amber99sb.ff/tip3p.itp'], - 'CHARMM': ['charmm36-mar2019.ff/', 'charmm36-mar2019.ff/ions.itp', - 'charmm36-mar2019.ff/tip3p.itp'], - } + "OPLS-AA": ["oplsaa.ff/", "oplsaa.ff/ions_opls.itp", "oplsaa.ff/tip4p.itp"], + "AMBER": ["amber99sb.ff/", "amber99sb.ff/ions.itp", "amber99sb.ff/tip3p.itp"], + "CHARMM": [ + "charmm36-mar2019.ff/", + "charmm36-mar2019.ff/ions.itp", + "charmm36-mar2019.ff/tip3p.itp", + ], + } try: return settings[forcefield] except KeyError: @@ -286,10 +330,14 @@ def get_ff_paths(forcefield='OPLS-AA'): def get_top_template(identifier): """Return the topology file template suitable for the solvent model.""" - - templates = {'water': 'system.top', 'octanol': 'system.top', - 'cyclohexane': 'system.top', 'wetoctanol': 'system_octwet.top', - 'toluene': 'system.top',} + + templates = { + "water": "system.top", + "octanol": "system.top", + "cyclohexane": "system.top", + "wetoctanol": "system_octwet.top", + "toluene": "system.top", + } try: return templates[identifier] except KeyError: diff --git a/mdpow/log.py b/mdpow/log.py index f2727293..fbe0b31d 100644 --- a/mdpow/log.py +++ b/mdpow/log.py @@ -26,6 +26,7 @@ import logging + def create(logname, logfile): """Create a top level logger. @@ -38,7 +39,9 @@ def create(logname, logfile): logger.setLevel(logging.DEBUG) logfile = logging.FileHandler(logfile) - logfile_formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') + logfile_formatter = logging.Formatter( + "%(asctime)s %(name)-12s %(levelname)-8s %(message)s" + ) logfile.setFormatter(logfile_formatter) logger.addHandler(logfile) @@ -46,12 +49,13 @@ def create(logname, logfile): console = logging.StreamHandler() console.setLevel(logging.INFO) # set a format which is simpler for console use - formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') + formatter = logging.Formatter("%(name)-12s: %(levelname)-8s %(message)s") console.setFormatter(formatter) logger.addHandler(console) return logger + def clear_handlers(logger): """clean out handlers in the library top level logger @@ -59,5 +63,3 @@ def clear_handlers(logger): """ for h in logger.handlers: logger.removeHandler(h) - - diff --git a/mdpow/restart.py b/mdpow/restart.py index 02d55f6e..2b2e362a 100644 --- a/mdpow/restart.py +++ b/mdpow/restart.py @@ -26,16 +26,20 @@ import os import logging -logger = logging.getLogger('mdpow.checkpoint') + +logger = logging.getLogger("mdpow.checkpoint") + def checkpoint(name, sim, filename): """Execute the :meth:`Journalled.save` method and log the event.""" logger.info("checkpoint: %(name)s", vars()) sim.save(filename) + class JournalSequenceError(Exception): """Raised when a stage is started without another one having been completed.""" + class Journal(object): """Class that keeps track of the stage in a protocol. @@ -69,6 +73,7 @@ class Journal(object): # raises JournalSequenceError """ + def __init__(self, stages): """Initialise the journal that keeps track of stages. @@ -92,8 +97,10 @@ def current(self): @current.setter def current(self, stage): if not stage in self.stages: - raise ValueError("Can only assign a registered stage from %r, not %r" % - (self.stages, stage)) + raise ValueError( + "Can only assign a registered stage from %r, not %r" + % (self.stages, stage) + ) self.__current = stage @current.deleter @@ -108,7 +115,9 @@ def incomplete(self): @incomplete.setter def incomplete(self, stage): if not stage in self.stages: - raise ValueError("can only assign a registered stage from %(stages)r" % vars(self)) + raise ValueError( + "can only assign a registered stage from %(stages)r" % vars(self) + ) self.__incomplete = stage @incomplete.deleter @@ -126,15 +135,19 @@ def history(self): def completed(self, stage): """Record completed stage and reset :attr:`Journal.current`""" - assert stage == self.current, "Program logic error: can only complete the current stage" + assert ( + stage == self.current + ), "Program logic error: can only complete the current stage" self.__history.append(self.current) del self.current def start(self, stage): """Record that *stage* is starting.""" if self.current is not None: - errmsg = "Cannot start stage %s because previously started stage %s " \ + errmsg = ( + "Cannot start stage %s because previously started stage %s " "has not been completed." % (stage, self.current) + ) logger.error(errmsg) raise JournalSequenceError(errmsg) self.current = stage @@ -155,12 +168,14 @@ def clear(self): def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.stages) + class Journalled(object): """A base class providing methods for journalling and restarts. It installs an instance of :class:`Journal` in the attribute :attr:`Journalled.journal` if it does not exist already. """ + #: Class-attribute that contains the names of computation protocols #: supported by the class. These are either method names or dummy names, #: whose logic is provided by an external callback function. @@ -199,7 +214,9 @@ def get_protocol(self, protocol): """ if protocol not in self.protocols: - raise ValueError("%r: protocol must be one of %r" % (protocol, self.protocols)) + raise ValueError( + "%r: protocol must be one of %r" % (protocol, self.protocols) + ) try: return self.__getattribute__(protocol) except AttributeError: @@ -216,6 +233,7 @@ def dummy_protocol(*args, **kwargs): if success: self.journal.completed(protocol) return success + return dummy_protocol def save(self, filename=None): @@ -237,7 +255,7 @@ def save(self, filename=None): raise ValueError(errmsg) else: self.filename = os.path.abspath(filename) - with open(self.filename, 'wb') as f: + with open(self.filename, "wb") as f: pickle.dump(self, f) logger.debug("Instance pickled to %(filename)r" % vars(self)) @@ -264,12 +282,12 @@ def load(self, filename=None): # Do not remove this code when dropping Py 2.7 support as it is needed to # be able to read old data files with Python 3 MDPOW. - with open(filename, 'rb') as f: + with open(filename, "rb") as f: try: instance = pickle.load(f) except UnicodeDecodeError: logger.debug("Reading old Python 2 Pickle file %(filename)r" % vars()) - instance = pickle.load(f, encoding='latin1') + instance = pickle.load(f, encoding="latin1") self.__dict__.update(instance.__dict__) logger.debug("Instance loaded from %(filename)r" % vars()) diff --git a/mdpow/run.py b/mdpow/run.py index 4738634f..44469626 100644 --- a/mdpow/run.py +++ b/mdpow/run.py @@ -44,14 +44,14 @@ import gromacs.run import gromacs.exceptions -from .config import (get_configuration, set_gromacsoutput, - NoSectionError) +from .config import get_configuration, set_gromacsoutput, NoSectionError from . import equil from . import fep from .restart import checkpoint import logging -logger = logging.getLogger('mdpow.run') + +logger = logging.getLogger("mdpow.run") def setupMD(S, protocol, cfg): @@ -63,37 +63,48 @@ def setupMD(S, protocol, cfg): maxwarn = 0 simulation_protocol = S.get_protocol(protocol) - params = simulation_protocol(runtime=cfg.getfloat(protocol, "runtime"), - qscript=cfg.getlist(protocol, "qscript"), - maxwarn=maxwarn, - ) + params = simulation_protocol( + runtime=cfg.getfloat(protocol, "runtime"), + qscript=cfg.getlist(protocol, "qscript"), + maxwarn=maxwarn, + ) return params + def get_mdp_files(cfg, protocols): """Get file names of MDP files from *cfg* for all *protocols*""" mdpfiles = {} for protocol in protocols: try: - mdp = cfg.findfile(protocol, 'mdp') + mdp = cfg.findfile(protocol, "mdp") except NoSectionError: # skip anything for which we do not define sections, such as # the dummy run protocols mdp = None except ValueError: # But it is a problem if we can't find a file! - logger.critical("Failed to find custom MDP file %r for protocol [%s]", - cfg.get(protocol, 'mdp'), protocol) + logger.critical( + "Failed to find custom MDP file %r for protocol [%s]", + cfg.get(protocol, "mdp"), + protocol, + ) raise else: if mdp is None: # Should not happen... let's continue and wait for hard-coded defaults - logger.warning("No 'mdp' config file entry for protocol [%s]---check input files! " - "Using package defaults.", protocol) + logger.warning( + "No 'mdp' config file entry for protocol [%s]---check input files! " + "Using package defaults.", + protocol, + ) if mdp: mdpfiles[protocol] = mdp - logger.debug("%(protocol)s: Using MDP file %(mdp)r from config file", vars()) + logger.debug( + "%(protocol)s: Using MDP file %(mdp)r from config file", vars() + ) return mdpfiles + def runMD_or_exit(S, protocol, params, cfg, exit_on_error=True, **kwargs): """run simulation @@ -129,15 +140,17 @@ def runMD_or_exit(S, protocol, params, cfg, exit_on_error=True, **kwargs): raise ValueError("supply dirname as a keyword argument") simulation_done = False if cfg.getboolean(protocol, "runlocal"): - logger.info("Running %s (%s.log) ... stand by.", protocol, params['deffnm']) + logger.info("Running %s (%s.log) ... stand by.", protocol, params["deffnm"]) logger.info("Run directory: %(dirname)s", vars()) mdrun = gromacs.run.MDrunner( - dirname=dirname, deffnm=params['deffnm'], - v=cfg.getboolean('mdrun','verbose'), - stepout=cfg.getint('mdrun','stepout'), - nice=cfg.getint('mdrun','nice'), - nt=cfg.get('mdrun','maxthreads'), - cpi=True) + dirname=dirname, + deffnm=params["deffnm"], + v=cfg.getboolean("mdrun", "verbose"), + stepout=cfg.getint("mdrun", "stepout"), + nice=cfg.getint("mdrun", "nice"), + nt=cfg.get("mdrun", "maxthreads"), + cpi=True, + ) simulation_done = mdrun.run_check() if not simulation_done: # should probably stop @@ -146,12 +159,13 @@ def runMD_or_exit(S, protocol, params, cfg, exit_on_error=True, **kwargs): sys.exit(1) else: raise gromacs.exceptions.GromacsError( - f"Failed {protocol}, investigate manually.") + f"Failed {protocol}, investigate manually." + ) else: # must check if the simulation was run externally - logfile = os.path.join(dirname, params['deffnm']+os.extsep+"log") + logfile = os.path.join(dirname, params["deffnm"] + os.extsep + "log") logger.debug("Checking logfile %r if simulation has been completed.", logfile) - simulation_done = gromacs.run.check_mdrun_success(logfile) ### broken?? + simulation_done = gromacs.run.check_mdrun_success(logfile) ### broken?? if simulation_done is None: logger.info("Now go and run %(protocol)s in directory %(dirname)r.", vars()) if exit_on_error: @@ -159,12 +173,16 @@ def runMD_or_exit(S, protocol, params, cfg, exit_on_error=True, **kwargs): else: return simulation_done elif simulation_done is False: - logger.warning("Simulation %(protocol)s in directory %(dirname)r is incomplete (log=%(logfile)s).", vars()) + logger.warning( + "Simulation %(protocol)s in directory %(dirname)r is incomplete (log=%(logfile)s).", + vars(), + ) if exit_on_error: sys.exit(1) else: raise gromacs.exceptions.MissingDataError( - f"Simulation {protocol} in directory {dirname} is incomplete (log={logfile}).") + f"Simulation {protocol} in directory {dirname} is incomplete (log={logfile})." + ) logger.info("Simulation %(protocol)s seems complete (log=%(logfile)s)", vars()) return simulation_done @@ -178,19 +196,20 @@ def equilibrium_simulation(cfg, solvent, **kwargs): (``runlocal``), :program:`mdrun` is executed at various stages, and hence this process can take a while. """ - deffnm = kwargs.pop('deffnm', "md") + deffnm = kwargs.pop("deffnm", "md") Simulations = { - 'water': equil.WaterSimulation, - 'octanol': equil.OctanolSimulation, - 'wetoctanol': equil.WetOctanolSimulation, - 'cyclohexane':equil.CyclohexaneSimulation, - 'toluene': equil.TolueneSimulation, - } + "water": equil.WaterSimulation, + "octanol": equil.OctanolSimulation, + "wetoctanol": equil.WetOctanolSimulation, + "cyclohexane": equil.CyclohexaneSimulation, + "toluene": equil.TolueneSimulation, + } try: Simulation = Simulations[solvent] except KeyError: - raise ValueError("solvent must be one of {0}".format( - ", ".join(Simulations.keys()))) + raise ValueError( + "solvent must be one of {0}".format(", ".join(Simulations.keys())) + ) # generate a canonical path under dirname topdir = kwargs.get("dirname", None) @@ -208,18 +227,17 @@ def equilibrium_simulation(cfg, solvent, **kwargs): # custom mdp files mdpfiles = get_mdp_files(cfg, Simulation.protocols) try: - distance = cfg.get('setup', 'distance') + distance = cfg.get("setup", "distance") except KeyError: - distance = None # if no distance is specified, None = default + distance = None # if no distance is specified, None = default try: - boxtype = cfg.get('setup', 'boxtype') + boxtype = cfg.get("setup", "boxtype") except KeyError: - boxtype = None # if no distance is specified, None = default - + boxtype = None # if no distance is specified, None = default solventmodel = None try: - solventmodel = cfg.get('setup', 'solventmodel') + solventmodel = cfg.get("setup", "solventmodel") logger.info("Selected solvent model: {0}".format(solventmodel)) except KeyError: solventmodel = None @@ -228,22 +246,28 @@ def equilibrium_simulation(cfg, solvent, **kwargs): # parameterization included and hence there is no mechanism to # choose between different models. - S = Simulation(molecule=cfg.get("setup", "molecule"), - forcefield=cfg.get("setup", "forcefield"), - dirname=dirname, deffnm=deffnm, mdp=mdpfiles, - distance=distance, - solventmodel=solventmodel) + S = Simulation( + molecule=cfg.get("setup", "molecule"), + forcefield=cfg.get("setup", "forcefield"), + dirname=dirname, + deffnm=deffnm, + mdp=mdpfiles, + distance=distance, + solventmodel=solventmodel, + ) if S.journal.has_not_completed("energy_minimize"): maxwarn = cfg.getint("setup", "maxwarn") or None prm = cfg.get("setup", "prm") or None - maxthreads = cfg.get('mdrun', 'maxthreads') or None + maxthreads = cfg.get("mdrun", "maxthreads") or None S.topology(itp=cfg.getpath("setup", "itp"), prm=prm) - S.solvate(struct=cfg.getpath("setup", "structure"), - bt=cfg.get("setup", "boxtype"), - maxwarn=maxwarn) - S.energy_minimize(maxwarn=maxwarn, mdrun_args={'nt': maxthreads}) - checkpoint('energy_minize', S, savefilename) + S.solvate( + struct=cfg.getpath("setup", "structure"), + bt=cfg.get("setup", "boxtype"), + maxwarn=maxwarn, + ) + S.energy_minimize(maxwarn=maxwarn, mdrun_args={"nt": maxthreads}) + checkpoint("energy_minize", S, savefilename) else: logger.info("Fast-forwarding: setup + energy_minimize done") @@ -251,12 +275,14 @@ def equilibrium_simulation(cfg, solvent, **kwargs): params = setupMD(S, "MD_relaxed", cfg) checkpoint("MD_relaxed", S, savefilename) else: - params = {'deffnm': deffnm} + params = {"deffnm": deffnm} logger.info("Fast-forwarding: MD_relaxed (setup) done") if S.journal.has_not_completed("MD_relaxed_run"): wrapper = S.get_protocol("MD_relaxed_run") - success = wrapper(runMD_or_exit, S, "MD_relaxed", params, cfg) # note: MD_relaxed! + success = wrapper( + runMD_or_exit, S, "MD_relaxed", params, cfg + ) # note: MD_relaxed! checkpoint("MD_relaxed_run", S, savefilename) else: logger.info("Fast-forwarding: MD_relaxed (run) done") @@ -269,13 +295,15 @@ def equilibrium_simulation(cfg, solvent, **kwargs): if S.journal.has_not_completed("MD_NPT_run"): wrapper = S.get_protocol("MD_NPT_run") - success = wrapper(runMD_or_exit, S, "MD_NPT", params, cfg) # note: MD_NPT + success = wrapper(runMD_or_exit, S, "MD_NPT", params, cfg) # note: MD_NPT checkpoint("MD_NPT_run", S, savefilename) else: logger.info("Fast-forwarding: MD_NPT (run) done") - logger.info("Equilibrium simulation phase complete, use %(savefilename)r to continue.", - vars()) + logger.info( + "Equilibrium simulation phase complete, use %(savefilename)r to continue.", + vars(), + ) return savefilename @@ -290,27 +318,29 @@ def fep_simulation(cfg, solvent, **kwargs): recommended to use ``runlocal = False`` in the run input file and submit all window simulations to a cluster. """ - exit_on_error = kwargs.pop('exit_on_error', True) - deffnm = kwargs.pop('deffnm', "md") + exit_on_error = kwargs.pop("exit_on_error", True) + deffnm = kwargs.pop("deffnm", "md") EquilSimulations = { - 'water': equil.WaterSimulation, - 'octanol': equil.OctanolSimulation, - 'wetoctanol': equil.WetOctanolSimulation, - 'cyclohexane': equil.CyclohexaneSimulation, - 'toluene': equil.TolueneSimulation, - } + "water": equil.WaterSimulation, + "octanol": equil.OctanolSimulation, + "wetoctanol": equil.WetOctanolSimulation, + "cyclohexane": equil.CyclohexaneSimulation, + "toluene": equil.TolueneSimulation, + } Simulations = { - 'water': fep.Ghyd, - 'octanol': fep.Goct, - 'wetoctanol': fep.Gwoct, - 'cyclohexane': fep.Gcyclo, - 'toluene': fep.Gtol, - } + "water": fep.Ghyd, + "octanol": fep.Goct, + "wetoctanol": fep.Gwoct, + "cyclohexane": fep.Gcyclo, + "toluene": fep.Gtol, + } try: EquilSimulation = EquilSimulations[solvent] Simulation = Simulations[solvent] except KeyError: - raise ValueError("solvent must be 'water', 'octanol', 'wetoctanol', 'cyclohexane' or 'toluene'") + raise ValueError( + "solvent must be 'water', 'octanol', 'wetoctanol', 'cyclohexane' or 'toluene'" + ) # generate a canonical path under dirname topdir = kwargs.get("dirname", None) if topdir is None: @@ -320,15 +350,19 @@ def fep_simulation(cfg, solvent, **kwargs): # Gsolv ... should be a static method or something else I can use before # the class is instantiated. Note that the pickle files live under dirname # and NOT topdir (bit of an historic inconsistency) - savefilename = os.path.join(dirname, Simulation.__name__ + os.extsep + 'fep') + savefilename = os.path.join(dirname, Simulation.__name__ + os.extsep + "fep") # need pickle files for the equilibrium simulation ... another nasty guess: equil_savefilename = os.path.join(topdir, "%(solvent)s.simulation" % vars()) try: equil_S = EquilSimulation(filename=equil_savefilename) except IOError as err: if err.errno == errno.ENOENT: - logger.critical("Missing the equilibrium simulation %(equil_savefilename)r.", vars()) - logger.critical("Run `mdpow-equilibrium -S %s %s' first!", solvent, "RUNINPUT.cfg") + logger.critical( + "Missing the equilibrium simulation %(equil_savefilename)r.", vars() + ) + logger.critical( + "Run `mdpow-equilibrium -S %s %s' first!", solvent, "RUNINPUT.cfg" + ) raise # output to screen or hidden? @@ -347,33 +381,44 @@ def fep_simulation(cfg, solvent, **kwargs): logger.debug("Using [FEP] MDP file %r from config file", mdp) # lambda schedules can be read from [FEP_schedule_*] sections - schedules = {'coulomb': fep.FEPschedule.load(cfg, "FEP_schedule_Coulomb"), - 'vdw': fep.FEPschedule.load(cfg, "FEP_schedule_VDW"), - } + schedules = { + "coulomb": fep.FEPschedule.load(cfg, "FEP_schedule_Coulomb"), + "vdw": fep.FEPschedule.load(cfg, "FEP_schedule_VDW"), + } logger.debug("Loaded FEP schedules %r from config file", schedules.keys()) # Note that we set basedir=topdir (and *not* dirname=dirname!)...FEP is a bit convoluted - S = Simulation(simulation=equil_S, runtime=cfg.getfloat("FEP", "runtime"), - basedir=topdir, deffnm=deffnm, mdp=mdp, schedules=schedules, - method=method) + S = Simulation( + simulation=equil_S, + runtime=cfg.getfloat("FEP", "runtime"), + basedir=topdir, + deffnm=deffnm, + mdp=mdp, + schedules=schedules, + method=method, + ) if S.journal.has_not_completed("setup"): - params = S.setup(qscript=cfg.getlist("FEP", "qscript"), - maxwarn=cfg.getint("FEP", "maxwarn")) + params = S.setup( + qscript=cfg.getlist("FEP", "qscript"), maxwarn=cfg.getint("FEP", "maxwarn") + ) checkpoint("setup", S, savefilename) else: - params = {'deffnm': deffnm} + params = {"deffnm": deffnm} logger.info("Fast-forwarding: FEP setup done") if S.journal.has_not_completed("fep_run"): + def run_all_FEPS(): for wdir in S.fep_dirs(): runMD_or_exit(S, "FEP", params, cfg, dirname=wdir, dgdl=True) + wrapper = S.get_protocol("fep_run") wrapper(run_all_FEPS) checkpoint("fep_run", S, savefilename) else: logger.info("Fast-forwarding: fep (run) done") - logger.info("FEP simulation phase complete, use %(savefilename)r to continue.", - vars()) + logger.info( + "FEP simulation phase complete, use %(savefilename)r to continue.", vars() + ) return savefilename diff --git a/mdpow/tests/__init__.py b/mdpow/tests/__init__.py index df6a7e6a..a59ab5ed 100644 --- a/mdpow/tests/__init__.py +++ b/mdpow/tests/__init__.py @@ -4,7 +4,7 @@ from pkg_resources import resource_filename -RESOURCES = py.path.local(resource_filename(__name__, 'testing_resources')) +RESOURCES = py.path.local(resource_filename(__name__, "testing_resources")) MANIFEST = RESOURCES / "manifest.yml" diff --git a/mdpow/tests/tempdir.py b/mdpow/tests/tempdir.py index 3e865d58..28c5c943 100644 --- a/mdpow/tests/tempdir.py +++ b/mdpow/tests/tempdir.py @@ -28,11 +28,11 @@ class TempDir(object): - """ class for temporary directories -creates a (named) directory which is deleted after use. -All files created within the directory are destroyed -Might not work on windows when the files are still opened -""" + """class for temporary directories + creates a (named) directory which is deleted after use. + All files created within the directory are destroyed + Might not work on windows when the files are still opened""" + def __init__(self, suffix="", prefix="tmp", basedir=None): self.name = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=basedir) @@ -57,13 +57,15 @@ def dissolve(self): def __str__(self): if self.name: - return "temporary directory at: {}".format(self.name,) + return "temporary directory at: {}".format( + self.name, + ) else: return "dissolved temporary directory" class in_tempdir(object): - """Create a temporary directory and change to it. """ + """Create a temporary directory and change to it.""" def __init__(self, *args, **kwargs): self.tmpdir = TempDir(*args, **kwargs) @@ -82,10 +84,13 @@ def run_in_tempdir(*args, **kwargs): """Make a function execute in a new tempdir. Any time the function is called, a new tempdir is created and destroyed. """ + def change_dird(fnc): @wraps(fnc) def wrapper(*funcargs, **funckwargs): with in_tempdir(*args, **kwargs): return fnc(*funcargs, **funckwargs) + return wrapper + return change_dird diff --git a/mdpow/tests/test_Gsolv.py b/mdpow/tests/test_Gsolv.py index bd9e603c..6fda59ec 100644 --- a/mdpow/tests/test_Gsolv.py +++ b/mdpow/tests/test_Gsolv.py @@ -9,19 +9,21 @@ from . import RESOURCES -class Test_Gsolv_manual(object): +class Test_Gsolv_manual(object): def setup_method(self): self.tmpdir = td.TempDir() - self.m = pybol.Manifest(str(RESOURCES / 'manifest.yml')) - self.m.assemble('md_npt',self.tmpdir.name) - simulation_filename = os.path.join(self.tmpdir.name,'benzene', - 'water.simulation') - self.S = equil.Simulation(filename = simulation_filename) - - self.S.make_paths_relative(prefix=os.path.join( - self.tmpdir.name,'benzene', 'Equilibrium', 'water')) - self.S.dirs.includes = os.path.join(self.tmpdir.name, 'top') + self.m = pybol.Manifest(str(RESOURCES / "manifest.yml")) + self.m.assemble("md_npt", self.tmpdir.name) + simulation_filename = os.path.join( + self.tmpdir.name, "benzene", "water.simulation" + ) + self.S = equil.Simulation(filename=simulation_filename) + + self.S.make_paths_relative( + prefix=os.path.join(self.tmpdir.name, "benzene", "Equilibrium", "water") + ) + self.S.dirs.includes = os.path.join(self.tmpdir.name, "top") self.S.save() def teardown_method(self): @@ -30,19 +32,18 @@ def teardown_method(self): def _setup(self, **kwargs): mdp = config.get_template("bar_opls.mdp") with in_dir(self.tmpdir.name, create=False): - self.Gsolv = fep.Gsolv(simulation=self.S, molecule='BNZ', - mdp=mdp ,**kwargs) + self.Gsolv = fep.Gsolv(simulation=self.S, molecule="BNZ", mdp=mdp, **kwargs) self.Gsolv.setup(maxwarn=1) def test_default_setup(self): self._setup() def test_list_foreign_lambdas(self): - lambda_coulomb = [0,0.5,1.0] - lambda_vdw = [0,0.2,1.0] + lambda_coulomb = [0, 0.5, 1.0] + lambda_vdw = [0, 0.2, 1.0] self._setup(lambda_coulomb=lambda_coulomb, lambda_vdw=lambda_vdw) def test_array_foreign_lambdas(self): - lambda_coulomb = np.array([0,0.5,1.0]) - lambda_vdw = np.array([0,0.2,1.0]) + lambda_coulomb = np.array([0, 0.5, 1.0]) + lambda_vdw = np.array([0, 0.2, 1.0]) self._setup(lambda_coulomb=lambda_coulomb, lambda_vdw=lambda_vdw) diff --git a/mdpow/tests/test_analysis.py b/mdpow/tests/test_analysis.py index bf242919..d4b21e22 100644 --- a/mdpow/tests/test_analysis.py +++ b/mdpow/tests/test_analysis.py @@ -17,6 +17,7 @@ MANIFEST = RESOURCES.join("manifest.yml") + def fix_manifest(topdir): """Create a temporary manifest with a custom `path`. @@ -46,7 +47,7 @@ def fix_manifest(topdir): manifest = yaml.safe_load(MANIFEST.open()) # simple heuristic: last element of the recorded manifest::path is the name # of the states directory, typically 'states' (from .../testing_resources/states) - manifest['path'] = RESOURCES.join(os.path.basename(manifest['path'])).strpath + manifest["path"] = RESOURCES.join(os.path.basename(manifest["path"])).strpath new_manifest = topdir.join("local_manifest.yml") yaml.dump(manifest, stream=new_manifest.open("w")) return new_manifest @@ -54,13 +55,15 @@ def fix_manifest(topdir): # session scope if read-only use + @pytest.fixture(scope="function") def fep_benzene_directory(tmpdir_factory): - topdir = tmpdir_factory.mktemp('analysis') + topdir = tmpdir_factory.mktemp("analysis") m = pybol.Manifest(fix_manifest(topdir).strpath) - m.assemble('FEP', topdir.strpath) + m.assemble("FEP", topdir.strpath) return topdir.join("benzene") + class TestAnalyze(object): def get_Gsolv(self, pth): gsolv = pth.join("FEP", "water", "Gsolv.fep") @@ -82,15 +85,17 @@ def assert_DeltaA(G): # - June 2023: in CI, >= 3.8 results differ from reference values (although # locally no changes are obvious) after ~4 decimals for unknown reasons. DeltaA = G.results.DeltaA - assert_array_almost_equal(DeltaA.Gibbs.astuple(), - (-3.7217472974883794, 2.3144288928034911), - decimal=3) - assert_array_almost_equal(DeltaA.coulomb.astuple(), - (8.3346255170099575, 0.73620918517131495), - decimal=3) - assert_array_almost_equal(DeltaA.vdw.astuple(), - (-4.6128782195215781, 2.1942144688960972), - decimal=3) + assert_array_almost_equal( + DeltaA.Gibbs.astuple(), (-3.7217472974883794, 2.3144288928034911), decimal=3 + ) + assert_array_almost_equal( + DeltaA.coulomb.astuple(), + (8.3346255170099575, 0.73620918517131495), + decimal=3, + ) + assert_array_almost_equal( + DeltaA.vdw.astuple(), (-4.6128782195215781, 2.1942144688960972), decimal=3 + ) def test_convert_edr(self, fep_benzene_directory): G = self.get_Gsolv(fep_benzene_directory) @@ -98,11 +103,13 @@ def test_convert_edr(self, fep_benzene_directory): with pytest.warns(numkit.LowAccuracyWarning): G.analyze(force=True, autosave=False) except IOError as err: - raise AssertionError("Failed to auto-convert edr to xvg: {0}: {1}".format( - err.strerror, err.filename)) + raise AssertionError( + "Failed to auto-convert edr to xvg: {0}: {1}".format( + err.strerror, err.filename + ) + ) self.assert_DeltaA(G) - def test_TI(self, fep_benzene_directory): G = self.get_Gsolv(fep_benzene_directory) # ensure conversion EDR to XVG.bz2; if the fixture is session scoped @@ -114,6 +121,9 @@ def test_TI(self, fep_benzene_directory): with pytest.warns(numkit.LowAccuracyWarning): G.analyze(force=True, autosave=False) except IOError as err: - raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format( - err.strerror, err.filename)) + raise AssertionError( + "Failed to convert edr to xvg: {0}: {1}".format( + err.strerror, err.filename + ) + ) self.assert_DeltaA(G) diff --git a/mdpow/tests/test_analysis_alchemlyb.py b/mdpow/tests/test_analysis_alchemlyb.py index 2452b6bc..9dbb0f1f 100644 --- a/mdpow/tests/test_analysis_alchemlyb.py +++ b/mdpow/tests/test_analysis_alchemlyb.py @@ -15,6 +15,7 @@ MANIFEST = RESOURCES.join("manifest.yml") + def fix_manifest(topdir): """Create a temporary manifest with a custom `path`. @@ -44,7 +45,7 @@ def fix_manifest(topdir): manifest = yaml.safe_load(MANIFEST.open()) # simple heuristic: last element of the recorded manifest::path is the name # of the states directory, typically 'states' (from .../testing_resources/states) - manifest['path'] = RESOURCES.join(os.path.basename(manifest['path'])).strpath + manifest["path"] = RESOURCES.join(os.path.basename(manifest["path"])).strpath new_manifest = topdir.join("local_manifest.yml") yaml.dump(manifest, stream=new_manifest.open("w")) return new_manifest @@ -52,13 +53,15 @@ def fix_manifest(topdir): # session scope if read-only use + @pytest.fixture(scope="function") def fep_benzene_directory(tmpdir_factory): - topdir = tmpdir_factory.mktemp('analysis') + topdir = tmpdir_factory.mktemp("analysis") m = pybol.Manifest(fix_manifest(topdir).strpath) - m.assemble('FEP', topdir.strpath) + m.assemble("FEP", topdir.strpath) return topdir.join("benzene") + class TestAnalyze(object): def get_Gsolv(self, pth): gsolv = pth.join("FEP", "water", "Gsolv.fep") @@ -70,25 +73,26 @@ def get_Gsolv(self, pth): G.filename = gsolv.strpath return G - @pytest.mark.parametrize('method, Gibbs, coulomb, vdw', [ - ('TI', - (-3.901068, 0.550272), - (8.417035, 0.22289), - (-4.515967, 0.50311)), - ('BAR', - (-4.091241, 0.385413), - (8.339705, 0.166802), - (-4.248463, 0.347449)), - ('MBAR', - (-6.793117, 0.475149), - (8.241836, 0.219235), - (-1.448719, 0.421548)) - ]) - - @pytest.mark.xfail(pandas.__version__.startswith("1.3.0"), - reason="bug in pandas 1.3.0 see alchemistry/alchemlyb#147") - def test_estimator_alchemlyb(self, fep_benzene_directory, method, - Gibbs, coulomb, vdw): + @pytest.mark.parametrize( + "method, Gibbs, coulomb, vdw", + [ + ("TI", (-3.901068, 0.550272), (8.417035, 0.22289), (-4.515967, 0.50311)), + ("BAR", (-4.091241, 0.385413), (8.339705, 0.166802), (-4.248463, 0.347449)), + ( + "MBAR", + (-6.793117, 0.475149), + (8.241836, 0.219235), + (-1.448719, 0.421548), + ), + ], + ) + @pytest.mark.xfail( + pandas.__version__.startswith("1.3.0"), + reason="bug in pandas 1.3.0 see alchemistry/alchemlyb#147", + ) + def test_estimator_alchemlyb( + self, fep_benzene_directory, method, Gibbs, coulomb, vdw + ): G = self.get_Gsolv(fep_benzene_directory) G.method = method G.start = 0 @@ -101,42 +105,58 @@ def test_estimator_alchemlyb(self, fep_benzene_directory, method, try: G.analyze_alchemlyb(force=True, autosave=False, SI=False) except IOError as err: - raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format( - err.strerror, err.filename)) + raise AssertionError( + "Failed to convert edr to xvg: {0}: {1}".format( + err.strerror, err.filename + ) + ) DeltaA = G.results.DeltaA - assert_array_almost_equal(DeltaA.Gibbs.astuple(), Gibbs, - decimal=5) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") - assert_array_almost_equal(DeltaA.coulomb.astuple(), coulomb, - decimal=5) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") - assert_array_almost_equal(DeltaA.vdw.astuple(), vdw, - decimal=5) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") - - @pytest.mark.xfail(pandas.__version__.startswith("1.3.0"), - reason="bug in pandas 1.3.0 see alchemistry/alchemlyb#147") + assert_array_almost_equal( + DeltaA.Gibbs.astuple(), Gibbs, decimal=5 + ) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") + assert_array_almost_equal( + DeltaA.coulomb.astuple(), coulomb, decimal=5 + ) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") + assert_array_almost_equal( + DeltaA.vdw.astuple(), vdw, decimal=5 + ) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") + + @pytest.mark.xfail( + pandas.__version__.startswith("1.3.0"), + reason="bug in pandas 1.3.0 see alchemistry/alchemlyb#147", + ) def test_SI(self, fep_benzene_directory): G = self.get_Gsolv(fep_benzene_directory) - G.method = 'TI' + G.method = "TI" G.start = 0 G.stop = None G.convert_edr() try: G.analyze_alchemlyb(force=True, SI=True, autosave=False) except IOError as err: - raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format( - err.strerror, err.filename)) + raise AssertionError( + "Failed to convert edr to xvg: {0}: {1}".format( + err.strerror, err.filename + ) + ) DeltaA = G.results.DeltaA - assert_array_almost_equal(DeltaA.Gibbs.astuple(), (-2.908885, 2.175976), - decimal=5) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") - assert_array_almost_equal(DeltaA.coulomb.astuple(), (7.755779, 0.531481), - decimal=5) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") - assert_array_almost_equal(DeltaA.vdw.astuple(), (-4.846894, 2.110071), - decimal=5) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") - - @pytest.mark.xfail(pandas.__version__.startswith("1.3.0"), - reason="bug in pandas 1.3.0 see alchemistry/alchemlyb#147") + assert_array_almost_equal( + DeltaA.Gibbs.astuple(), (-2.908885, 2.175976), decimal=5 + ) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") + assert_array_almost_equal( + DeltaA.coulomb.astuple(), (7.755779, 0.531481), decimal=5 + ) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") + assert_array_almost_equal( + DeltaA.vdw.astuple(), (-4.846894, 2.110071), decimal=5 + ) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") + + @pytest.mark.xfail( + pandas.__version__.startswith("1.3.0"), + reason="bug in pandas 1.3.0 see alchemistry/alchemlyb#147", + ) def test_start_stop_stride(self, fep_benzene_directory): G = self.get_Gsolv(fep_benzene_directory) - G.method = 'TI' + G.method = "TI" G.start = 10 G.stride = 2 G.stop = 200 @@ -144,12 +164,18 @@ def test_start_stop_stride(self, fep_benzene_directory): try: G.analyze_alchemlyb(force=True, autosave=False, SI=False) except IOError as err: - raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format( - err.strerror, err.filename)) + raise AssertionError( + "Failed to convert edr to xvg: {0}: {1}".format( + err.strerror, err.filename + ) + ) DeltaA = G.results.DeltaA - assert_array_almost_equal(DeltaA.Gibbs.astuple(), (-3.318109, 0.905128), - decimal=5) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") - assert_array_almost_equal(DeltaA.coulomb.astuple(), (8.146806, 0.348866), - decimal=5) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") - assert_array_almost_equal(DeltaA.vdw.astuple(), (-4.828696, 0.835195), - decimal=5) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") + assert_array_almost_equal( + DeltaA.Gibbs.astuple(), (-3.318109, 0.905128), decimal=5 + ) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") + assert_array_almost_equal( + DeltaA.coulomb.astuple(), (8.146806, 0.348866), decimal=5 + ) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") + assert_array_almost_equal( + DeltaA.vdw.astuple(), (-4.828696, 0.835195), decimal=5 + ) # with more recent versions of pandas/alchemlyb/numpy the original values are only reproduced to 5 decimals, see PR #166") diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index b5f70139..0bc1850f 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -18,20 +18,21 @@ from pkg_resources import resource_filename from mdpow.workflows import dihedrals -RESOURCES = pathlib.PurePath(resource_filename(__name__, 'testing_resources')) +RESOURCES = pathlib.PurePath(resource_filename(__name__, "testing_resources")) MANIFEST = RESOURCES / "manifest.yml" resname = "UNK" molname = "SM25" + @pytest.fixture -def molname_workflows_directory(tmp_path, molname='SM25'): +def molname_workflows_directory(tmp_path, molname="SM25"): m = pybol.Manifest(str(MANIFEST)) - m.assemble('workflows', tmp_path) + m.assemble("workflows", tmp_path) return tmp_path / molname -class TestAutomatedDihedralAnalysis(object): +class TestAutomatedDihedralAnalysis(object): @pytest.fixture def SM25_tmp_dir(self, molname_workflows_directory): dirname = molname_workflows_directory @@ -50,7 +51,9 @@ def atom_indices(self, mol_sol_data): # testing optional user input of alternate SMARTS string # for automated dihedral atom group selection - atom_group_indices_alt = dihedrals.get_atom_indices(mol=mol, SMARTS='[!$(*#*)&!D1]-!@[!$(*#*)&!D1]') + atom_group_indices_alt = dihedrals.get_atom_indices( + mol=mol, SMARTS="[!$(*#*)&!D1]-!@[!$(*#*)&!D1]" + ) return atom_group_indices, atom_group_indices_alt # fixture output, tuple: # atom_indices[0]=atom_group_indices @@ -67,15 +70,17 @@ def bond_indices(self, mol_sol_data, atom_indices): def dihedral_groups(self, mol_sol_data, atom_indices): _, solute = mol_sol_data atom_index, _ = atom_indices - dihedral_groups = dihedrals.get_dihedral_groups(solute=solute, atom_indices=atom_index) + dihedral_groups = dihedrals.get_dihedral_groups( + solute=solute, atom_indices=atom_index + ) return dihedral_groups @pytest.fixture def dihedral_data(self, SM25_tmp_dir, atom_indices): atom_group_indices, _ = atom_indices - df = dihedrals.dihedral_groups_ensemble(atom_indices=atom_group_indices, - dirname=SM25_tmp_dir, - solvents=('water',)) + df = dihedrals.dihedral_groups_ensemble( + atom_indices=atom_group_indices, dirname=SM25_tmp_dir, solvents=("water",) + ) df_aug = dihedrals.periodic_angle_padding(df) return df, df_aug # fixture output, tuple: @@ -84,58 +89,129 @@ def dihedral_data(self, SM25_tmp_dir, atom_indices): # tuple-tuples of dihedral atom group indices # collected using mdpow.workflows.dihedrals.SMARTS_DEFAULT - check_atom_group_indices = ((0, 1, 2, 3),(0, 1, 12, 13),(1, 2, 3, 11),(1, 2, 3, 10), - (1, 2, 3, 4),(1, 12, 13, 14),(2, 3, 4, 5),(2, 3, 4, 9), - (2, 1, 12, 13),(3, 2, 1, 12),(5, 4, 3, 11),(5, 4, 3, 10), - (9, 4, 3, 11),(9, 4, 3, 10),(12, 13, 14, 15),(12, 13, 14, 19)) + check_atom_group_indices = ( + (0, 1, 2, 3), + (0, 1, 12, 13), + (1, 2, 3, 11), + (1, 2, 3, 10), + (1, 2, 3, 4), + (1, 12, 13, 14), + (2, 3, 4, 5), + (2, 3, 4, 9), + (2, 1, 12, 13), + (3, 2, 1, 12), + (5, 4, 3, 11), + (5, 4, 3, 10), + (9, 4, 3, 11), + (9, 4, 3, 10), + (12, 13, 14, 15), + (12, 13, 14, 19), + ) # tuple-tuples of dihedral atom group indices # collected using alternate SMARTS input (explicitly defined) # see: fixture - atom_indices().atom_group_indices_alt check_atom_group_indices_alt = ((1, 2), (1, 12), (2, 3), (3, 4), (12, 13), (13, 14)) - check_atom_name_index_pairs = {'O1-C2-N3-S4': (0, 1, 2, 3), - 'O1-C2-C13-C14': (0, 1, 12, 13), - 'C2-N3-S4-O12': (1, 2, 3, 11), - 'C2-N3-S4-O11': (1, 2, 3, 10), - 'C2-N3-S4-C5': (1, 2, 3, 4), - 'C2-C13-C14-C15': (1, 12, 13, 14), - 'N3-S4-C5-C6': (2, 3, 4, 5), - 'N3-S4-C5-C10': (2, 3, 4, 9), - 'N3-C2-C13-C14': (2, 1, 12, 13), - 'S4-N3-C2-C13': (3, 2, 1, 12), - 'C6-C5-S4-O12': (5, 4, 3, 11), - 'C6-C5-S4-O11': (5, 4, 3, 10), - 'C10-C5-S4-O12': (9, 4, 3, 11), - 'C10-C5-S4-O11': (9, 4, 3, 10), - 'C13-C14-C15-C16': (12, 13, 14, 15), - 'C13-C14-C15-C20': (12, 13, 14, 19)} - - check_groups = [np.array(['O1', 'C2', 'N3', 'S4'], dtype=object), - np.array(['O1', 'C2', 'C13', 'C14'], dtype=object), - np.array(['C2', 'N3', 'S4', 'O12'], dtype=object), - np.array(['C2', 'N3', 'S4', 'O11'], dtype=object), - np.array(['C2', 'N3', 'S4', 'C5'], dtype=object), - np.array(['C2', 'C13', 'C14', 'C15'], dtype=object), - np.array(['N3', 'S4', 'C5', 'C6'], dtype=object), - np.array(['N3', 'S4', 'C5', 'C10'], dtype=object), - np.array(['N3', 'C2', 'C13', 'C14'], dtype=object), - np.array(['S4', 'N3', 'C2', 'C13'], dtype=object), - np.array(['C6', 'C5', 'S4', 'O12'], dtype=object), - np.array(['C6', 'C5', 'S4', 'O11'], dtype=object), - np.array(['C10', 'C5', 'S4', 'O12'], dtype=object), - np.array(['C10', 'C5', 'S4', 'O11'], dtype=object), - np.array(['C13', 'C14', 'C15', 'C16'], dtype=object), - np.array(['C13', 'C14', 'C15', 'C20'], dtype=object)] - - universe_solute_atom_names = np.array(['O1', 'C2', 'N3', 'S4', 'C5', 'C6', 'C7', 'C8', - 'C9', 'C10', 'O11', 'O12', 'C13', 'C14', 'C15', - 'C16', 'C17', 'C18', 'C19', 'C20', 'H21', 'H22', - 'H23', 'H24', 'H25', 'H26', 'H27', 'H28', 'H29', - 'H30', 'H31', 'H32', 'H33', 'H34', 'H35'], dtype=object) - - check_hydrogens = np.array(['H21', 'H22', 'H23', 'H24', 'H25', 'H26', 'H27', 'H28', - 'H29', 'H30', 'H31', 'H32', 'H33', 'H34', 'H35'], dtype=object) + check_atom_name_index_pairs = { + "O1-C2-N3-S4": (0, 1, 2, 3), + "O1-C2-C13-C14": (0, 1, 12, 13), + "C2-N3-S4-O12": (1, 2, 3, 11), + "C2-N3-S4-O11": (1, 2, 3, 10), + "C2-N3-S4-C5": (1, 2, 3, 4), + "C2-C13-C14-C15": (1, 12, 13, 14), + "N3-S4-C5-C6": (2, 3, 4, 5), + "N3-S4-C5-C10": (2, 3, 4, 9), + "N3-C2-C13-C14": (2, 1, 12, 13), + "S4-N3-C2-C13": (3, 2, 1, 12), + "C6-C5-S4-O12": (5, 4, 3, 11), + "C6-C5-S4-O11": (5, 4, 3, 10), + "C10-C5-S4-O12": (9, 4, 3, 11), + "C10-C5-S4-O11": (9, 4, 3, 10), + "C13-C14-C15-C16": (12, 13, 14, 15), + "C13-C14-C15-C20": (12, 13, 14, 19), + } + + check_groups = [ + np.array(["O1", "C2", "N3", "S4"], dtype=object), + np.array(["O1", "C2", "C13", "C14"], dtype=object), + np.array(["C2", "N3", "S4", "O12"], dtype=object), + np.array(["C2", "N3", "S4", "O11"], dtype=object), + np.array(["C2", "N3", "S4", "C5"], dtype=object), + np.array(["C2", "C13", "C14", "C15"], dtype=object), + np.array(["N3", "S4", "C5", "C6"], dtype=object), + np.array(["N3", "S4", "C5", "C10"], dtype=object), + np.array(["N3", "C2", "C13", "C14"], dtype=object), + np.array(["S4", "N3", "C2", "C13"], dtype=object), + np.array(["C6", "C5", "S4", "O12"], dtype=object), + np.array(["C6", "C5", "S4", "O11"], dtype=object), + np.array(["C10", "C5", "S4", "O12"], dtype=object), + np.array(["C10", "C5", "S4", "O11"], dtype=object), + np.array(["C13", "C14", "C15", "C16"], dtype=object), + np.array(["C13", "C14", "C15", "C20"], dtype=object), + ] + + universe_solute_atom_names = np.array( + [ + "O1", + "C2", + "N3", + "S4", + "C5", + "C6", + "C7", + "C8", + "C9", + "C10", + "O11", + "O12", + "C13", + "C14", + "C15", + "C16", + "C17", + "C18", + "C19", + "C20", + "H21", + "H22", + "H23", + "H24", + "H25", + "H26", + "H27", + "H28", + "H29", + "H30", + "H31", + "H32", + "H33", + "H34", + "H35", + ], + dtype=object, + ) + + check_hydrogens = np.array( + [ + "H21", + "H22", + "H23", + "H24", + "H25", + "H26", + "H27", + "H28", + "H29", + "H30", + "H31", + "H32", + "H33", + "H34", + "H35", + ], + dtype=object, + ) # pre 'angle padding' - scipy.stats for # dihedral atom group: O1-C2-N3-S4 @@ -156,7 +232,7 @@ def dihedral_data(self, SM25_tmp_dir, atom_indices): def test_build_universe(self, SM25_tmp_dir): u = dihedrals.build_universe(dirname=SM25_tmp_dir) - solute = u.select_atoms('resname UNK') + solute = u.select_atoms("resname UNK") solute_names = solute.atoms.names assert solute_names.all() == self.universe_solute_atom_names.all() @@ -164,7 +240,6 @@ def test_build_universe(self, SM25_tmp_dir): # between RDKIT versions; issue raised (#239) to identify and # resolve exact package/version responsible def test_dihedral_indices(self, atom_indices): - atom_group_indices = atom_indices[0] assert set(atom_group_indices) == set(self.check_atom_group_indices) @@ -188,14 +263,17 @@ def test_dihedral_groups(self, dihedral_groups): # bond indices are determined by atom indices and are subsequently self-consistent # dihedral group names are determined by the MDAnalysis solute object from RDKit-derived atom indices # this test checks if indexing schemes for RDKit and MDAnalysis are consistent - def test_RDKit_MDAnalysis_atom_index_consistency(self, atom_indices, bond_indices, dihedral_groups): + def test_RDKit_MDAnalysis_atom_index_consistency( + self, atom_indices, bond_indices, dihedral_groups + ): atom_index, _ = atom_indices bond_index = bond_indices groups = dihedral_groups - name_index_pairs = dihedrals.get_paired_indices(atom_indices=atom_index, bond_indices=bond_index, - dihedral_groups=groups) - + name_index_pairs = dihedrals.get_paired_indices( + atom_indices=atom_index, bond_indices=bond_index, dihedral_groups=groups + ) + atom_name_index_pairs = {} for key in name_index_pairs.keys(): @@ -205,14 +283,13 @@ def test_RDKit_MDAnalysis_atom_index_consistency(self, atom_indices, bond_indice # Possible ordering issue (#239) def test_dihedral_groups_ensemble(self, dihedral_data): - df, _ = dihedral_data - dh1_result = df.loc[df['selection'] == 'O1-C2-N3-S4']['dihedral'] + dh1_result = df.loc[df["selection"] == "O1-C2-N3-S4"]["dihedral"] dh1_mean = circmean(dh1_result, high=180, low=-180) dh1_var = circvar(dh1_result, high=180, low=-180) - dh2_result = df.loc[df['selection'] == 'C13-C14-C15-C20']['dihedral'] + dh2_result = df.loc[df["selection"] == "C13-C14-C15-C20"]["dihedral"] dh2_mean = circmean(dh2_result, high=180, low=-180) dh2_var = circvar(dh2_result, high=180, low=-180) @@ -225,22 +302,32 @@ def test_dihedral_groups_ensemble(self, dihedral_data): def test_save_df(self, dihedral_data, SM25_tmp_dir): df, _ = dihedral_data - dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, resname='UNK', molname='SM25') - assert (SM25_tmp_dir / 'SM25' / 'SM25_full_df.csv.bz2').exists(), 'Compressed csv file not saved' + dihedrals.save_df( + df=df, df_save_dir=SM25_tmp_dir, resname="UNK", molname="SM25" + ) + assert ( + SM25_tmp_dir / "SM25" / "SM25_full_df.csv.bz2" + ).exists(), "Compressed csv file not saved" def test_save_df_info(self, dihedral_data, SM25_tmp_dir, caplog): df, _ = dihedral_data caplog.clear() - caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') - dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, resname='UNK', molname='SM25') - assert f'Results DataFrame saved as {SM25_tmp_dir}/SM25/SM25_full_df.csv.bz2' in caplog.text, 'Save location not logged or returned' + caplog.set_level(logging.INFO, logger="mdpow.workflows.dihedrals") + dihedrals.save_df( + df=df, df_save_dir=SM25_tmp_dir, resname="UNK", molname="SM25" + ) + assert ( + f"Results DataFrame saved as {SM25_tmp_dir}/SM25/SM25_full_df.csv.bz2" + in caplog.text + ), "Save location not logged or returned" # Possible ordering issue (#239) def test_periodic_angle(self, dihedral_data): - _, df_aug = dihedral_data - aug_dh2_result = df_aug.loc[df_aug['selection'] == 'C13-C14-C15-C20']['dihedral'] + aug_dh2_result = df_aug.loc[df_aug["selection"] == "C13-C14-C15-C20"][ + "dihedral" + ] aug_dh2_mean = circmean(aug_dh2_result, high=180, low=-180) aug_dh2_var = circvar(aug_dh2_result, high=180, low=-180) @@ -252,47 +339,76 @@ def test_periodic_angle(self, dihedral_data): # Tests using similar instances of the automated analyses # will use module or class-scoped fixtures, pending #235 def test_save_fig(self, SM25_tmp_dir): - dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=resname, molname='SM25', - solvents=('water',)) - assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' + dihedrals.automated_dihedral_analysis( + dirname=SM25_tmp_dir, + figdir=SM25_tmp_dir, + resname=resname, + molname="SM25", + solvents=("water",), + ) + assert ( + SM25_tmp_dir / "SM25" / "SM25_C10-C5-S4-O11_violins.pdf" + ).exists(), "PDF file not generated" # Possible ordering issue (#239) # Tests using similar instances of the automated analyses # will use module or class-scoped fixtures, pending #235 def test_save_fig_info(self, SM25_tmp_dir, caplog): caplog.clear() - caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') - dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=resname, molname='SM25', - solvents=('water',)) - assert f'Figure saved as {SM25_tmp_dir}/SM25/SM25_C10-C5-S4-O11_violins.pdf' in caplog.text, 'PDF file not saved' + caplog.set_level(logging.INFO, logger="mdpow.workflows.dihedrals") + dihedrals.automated_dihedral_analysis( + dirname=SM25_tmp_dir, + figdir=SM25_tmp_dir, + resname=resname, + molname="SM25", + solvents=("water",), + ) + assert ( + f"Figure saved as {SM25_tmp_dir}/SM25/SM25_C10-C5-S4-O11_violins.pdf" + in caplog.text + ), "PDF file not saved" # Tests using similar instances of the automated analyses # will use module or class-scoped fixtures, pending #235 def test_DataFrame_input(self, SM25_tmp_dir, dihedral_data): df, _ = dihedral_data - dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=resname, molname=molname, - solvents=('water',), dataframe=df) - assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' + dihedrals.automated_dihedral_analysis( + dirname=SM25_tmp_dir, + figdir=SM25_tmp_dir, + resname=resname, + molname=molname, + solvents=("water",), + dataframe=df, + ) + assert ( + SM25_tmp_dir / "SM25" / "SM25_C10-C5-S4-O11_violins.pdf" + ).exists(), "PDF file not generated" # Tests using similar instances of the automated analyses # will use module or class-scoped fixtures, pending #235 def test_DataFrame_input_info(self, SM25_tmp_dir, dihedral_data, caplog): caplog.clear() - caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') + caplog.set_level(logging.INFO, logger="mdpow.workflows.dihedrals") df, _ = dihedral_data - dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=resname, molname=molname, - solvents=('water',), dataframe=df) - assert 'Proceeding with results DataFrame provided.' in caplog.text, 'No dataframe provided or dataframe not recognized' + dihedrals.automated_dihedral_analysis( + dirname=SM25_tmp_dir, + figdir=SM25_tmp_dir, + resname=resname, + molname=molname, + solvents=("water",), + dataframe=df, + ) + assert ( + "Proceeding with results DataFrame provided." in caplog.text + ), "No dataframe provided or dataframe not recognized" # testing resources only contain analyses with single solvent input def test_single_solvent(self, dihedral_data): df, _ = dihedral_data # all analysis data in one violin plot - g = dihedrals.dihedral_violins(df=df, width=0.9, solvents=('water',), plot_title='test') + g = dihedrals.dihedral_violins( + df=df, width=0.9, solvents=("water",), plot_title="test" + ) # number of solvents in DataFrame used to generate plot - number_of_solvents = g.data['solvent'].nunique() - assert number_of_solvents == 1 \ No newline at end of file + number_of_solvents = g.data["solvent"].nunique() + assert number_of_solvents == 1 diff --git a/mdpow/tests/test_config.py b/mdpow/tests/test_config.py index 4a34dd25..a3646539 100644 --- a/mdpow/tests/test_config.py +++ b/mdpow/tests/test_config.py @@ -7,33 +7,39 @@ import mdpow.config + @pytest.fixture def cfg(): # default bundled config return mdpow.config.get_configuration() + @pytest.fixture def minicfg(): s = StringIO("setup:\n name: 'Alice'\n gromacsoutput: False\n") yield s s.close() + class TestConfigurationParser: def test_get_NoSectionError(self): cfg = mdpow.config.POWConfigParser() - with pytest.raises(mdpow.config.NoSectionError, - match="Config file has no section Jabberwocky"): - cfg.get('Jabberwocky', 'elump') + with pytest.raises( + mdpow.config.NoSectionError, match="Config file has no section Jabberwocky" + ): + cfg.get("Jabberwocky", "elump") def test_get_NoOptionWarning_gives_None(self, cfg): - with pytest.warns(mdpow.config.NoOptionWarning, - match="Config file section FEP contains no " - "option Jabberwocky. Using 'None'."): + with pytest.warns( + mdpow.config.NoOptionWarning, + match="Config file section FEP contains no " + "option Jabberwocky. Using 'None'.", + ): item = cfg.get("FEP", "Jabberwocky") assert item is None def test_get(self, cfg): - item = cfg.get("setup","solventmodel") + item = cfg.get("setup", "solventmodel") assert isinstance(item, str) assert item == "tip4p" @@ -42,7 +48,7 @@ def test_get_None(self, cfg): assert item is None def test_getstr(self, cfg): - args = "setup","solventmodel" + args = "setup", "solventmodel" item = cfg.getstr(*args) assert isinstance(item, str) assert item == "tip4p" @@ -75,7 +81,7 @@ def test_getfloat(self, cfg): def test_getpath(self, cfg): pth = "~/mirrors/jbwck.itp" - cfg.conf['setup']['itp'] = pth + cfg.conf["setup"]["itp"] = pth item = cfg.getpath("setup", "itp") @@ -99,7 +105,7 @@ def test_getlist(self, cfg): item = cfg.getlist("FEP_schedule_Coulomb", "lambdas") assert isinstance(item, list) - assert item == ['0', '0.25', '0.5', '0.75', '1.0'] + assert item == ["0", "0.25", "0.5", "0.75", "1.0"] def test_getlist_empty(self, cfg): # get an option with None for this test diff --git a/mdpow/tests/test_dihedral.py b/mdpow/tests/test_dihedral.py index a8a0b7ee..0a48a62f 100644 --- a/mdpow/tests/test_dihedral.py +++ b/mdpow/tests/test_dihedral.py @@ -19,7 +19,7 @@ from pkg_resources import resource_filename -RESOURCES = py.path.local(resource_filename(__name__, 'testing_resources')) +RESOURCES = py.path.local(resource_filename(__name__, "testing_resources")) MANIFEST = RESOURCES.join("manifest.yml") @@ -31,49 +31,55 @@ class TestDihedral(object): def setup_method(self): self.tmpdir = td.TempDir() - self.m = pybol.Manifest(str(RESOURCES / 'manifest.yml')) - self.m.assemble('example_FEP', self.tmpdir.name) - self.Ens = Ensemble(dirname=self.tmpdir.name, solvents=['water']) + self.m = pybol.Manifest(str(RESOURCES / "manifest.yml")) + self.m.assemble("example_FEP", self.tmpdir.name) + self.Ens = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) def teardown_method(self): self.tmpdir.dissolve() def test_dataframe(self): - dh1 = self.Ens.select_atoms('name C4', 'name C17', 'name S2', 'name N3') + dh1 = self.Ens.select_atoms("name C4", "name C17", "name S2", "name N3") dh_run = DihedralAnalysis([dh1]).run(start=0, stop=4, step=1) results = dh_run.results - assert results['selection'][0] == 'C4-C17-S2-N3' - for s in results['solvent']: - assert s == 'water' - for i in results['interaction'][:12]: - assert i == 'Coulomb' + assert results["selection"][0] == "C4-C17-S2-N3" + for s in results["solvent"]: + assert s == "water" + for i in results["interaction"][:12]: + assert i == "Coulomb" def test_selection_error(self): - dh1 = self.Ens.select_atoms('name C17', 'name S2', 'name N3') + dh1 = self.Ens.select_atoms("name C17", "name S2", "name N3") with pytest.raises(SelectionError): dh_run = DihedralAnalysis([dh1]).run(start=0, stop=4, step=1) def test_results_recursive1(self): - dh1 = self.Ens.select_atoms('name C11', 'name C10', 'name C9', 'name C4') - dh2 = self.Ens.select_atoms('name C11', 'name C10', 'name C9', 'name C4') + dh1 = self.Ens.select_atoms("name C11", "name C10", "name C9", "name C4") + dh2 = self.Ens.select_atoms("name C11", "name C10", "name C9", "name C4") dh_run1 = DihedralAnalysis([dh1]).run(start=0, stop=4, step=1) dh_run2 = DihedralAnalysis([dh2]).run(start=0, stop=4, step=1) - assert len(dh_run1.results['dihedral']) == len(dh_run2.results['dihedral']) - for i in range(len(dh_run1.results['dihedral'])): - assert dh_run1.results['dihedral'][i] == dh_run2.results['dihedral'][i] + assert len(dh_run1.results["dihedral"]) == len(dh_run2.results["dihedral"]) + for i in range(len(dh_run1.results["dihedral"])): + assert dh_run1.results["dihedral"][i] == dh_run2.results["dihedral"][i] - @pytest.mark.skipif(sys.version_info < (3, 8), reason="scipy circvar gives wrong answers") + @pytest.mark.skipif( + sys.version_info < (3, 8), reason="scipy circvar gives wrong answers" + ) def test_results_recursive2(self): - dh1 = self.Ens.select_atoms('name C11', 'name C10', 'name C9', 'name C4') - dh2 = self.Ens.select_atoms('name C8', 'name C4', 'name C9', 'name C10') + dh1 = self.Ens.select_atoms("name C11", "name C10", "name C9", "name C4") + dh2 = self.Ens.select_atoms("name C8", "name C4", "name C9", "name C10") dh_run = DihedralAnalysis([dh1, dh2]).run(start=0, stop=4, step=1) - dh1_result = dh_run.results.loc[dh_run.results['selection'] == 'C11-C10-C9-C4']['dihedral'] - dh2_result = dh_run.results.loc[dh_run.results['selection'] == 'C8-C4-C9-C10']['dihedral'] + dh1_result = dh_run.results.loc[dh_run.results["selection"] == "C11-C10-C9-C4"][ + "dihedral" + ] + dh2_result = dh_run.results.loc[dh_run.results["selection"] == "C8-C4-C9-C10"][ + "dihedral" + ] dh1_mean = circmean(dh1_result, high=180, low=-180) dh2_mean = circmean(dh2_result, high=180, low=-180) @@ -86,15 +92,15 @@ def test_results_recursive2(self): assert_almost_equal(dh2_var, self.DG491011_var, 6) def test_ValueError_different_ensemble(self): - other = Ensemble(dirname=self.tmpdir.name, solvents=['water']) - dh1 = self.Ens.select_atoms('name C11', 'name C10', 'name C9', 'name C4') - dh2 = other.select_atoms('name C8', 'name C4', 'name C9', 'name C10') - with pytest.raises(ValueError, - match='Dihedral selections from different Ensembles, '): + other = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) + dh1 = self.Ens.select_atoms("name C11", "name C10", "name C9", "name C4") + dh2 = other.select_atoms("name C8", "name C4", "name C9", "name C10") + with pytest.raises( + ValueError, match="Dihedral selections from different Ensembles, " + ): DihedralAnalysis([dh1, dh2]) def test_single_universe(self): - dh = self.Ens.select_atoms('name C4', 'name C17', 'name S2', 'name N3') + dh = self.Ens.select_atoms("name C4", "name C17", "name S2", "name N3") with pytest.raises(NotImplementedError): - DihedralAnalysis([dh])._single_universe() diff --git a/mdpow/tests/test_ensemble.py b/mdpow/tests/test_ensemble.py index d6e95fde..75c7bb39 100644 --- a/mdpow/tests/test_ensemble.py +++ b/mdpow/tests/test_ensemble.py @@ -21,110 +21,120 @@ from pkg_resources import resource_filename -RESOURCES = py.path.local(resource_filename(__name__, 'testing_resources')) +RESOURCES = py.path.local(resource_filename(__name__, "testing_resources")) MANIFEST = RESOURCES.join("manifest.yml") -ensemble_keys = [('water', 'Coulomb', '0000'), - ('water', 'Coulomb', '0500'), - ('water', 'Coulomb', '1000'), - ('water', 'VDW', '0000'), - ('water', 'VDW', '0250'), - ('water', 'VDW', '0500'), - ('water', 'VDW', '1000')] +ensemble_keys = [ + ("water", "Coulomb", "0000"), + ("water", "Coulomb", "0500"), + ("water", "Coulomb", "1000"), + ("water", "VDW", "0000"), + ("water", "VDW", "0250"), + ("water", "VDW", "0500"), + ("water", "VDW", "1000"), +] class TestEnsemble(object): def setup_method(self): self.tmpdir = td.TempDir() - self.m = pybol.Manifest(str(RESOURCES / 'manifest.yml')) - self.m.assemble('example_FEP', self.tmpdir.name) + self.m = pybol.Manifest(str(RESOURCES / "manifest.yml")) + self.m.assemble("example_FEP", self.tmpdir.name) def teardown_method(self): self.tmpdir.dissolve() def test_build_ensemble(self): # Octanol will be added later - Sim = Ensemble(dirname=self.tmpdir.name, solvents=['water']) + Sim = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) diff = set(Sim.keys()) ^ set(ensemble_keys) assert not diff def test_kwargs(self): - l_dir = os.path.abspath(os.path.join(self.tmpdir.name, 'FEP', 'md.gro')) - bnz = Ensemble(dirname=self.tmpdir.name, solvents=['water'], topology_paths={'water': l_dir}) + l_dir = os.path.abspath(os.path.join(self.tmpdir.name, "FEP", "md.gro")) + bnz = Ensemble( + dirname=self.tmpdir.name, + solvents=["water"], + topology_paths={"water": l_dir}, + ) diff = set(bnz.keys()) ^ set(ensemble_keys) assert not diff def test_add_remove_systems(self): with in_dir(self.tmpdir.name, create=False): bnz = Ensemble() - l_dir = os.path.join(os.curdir, 'FEP', 'water', 'Coulomb', '0000') - top_dir = os.path.join(l_dir, 'md.gro') - trj_dir = os.path.join(l_dir, 'md_red.xtc') + l_dir = os.path.join(os.curdir, "FEP", "water", "Coulomb", "0000") + top_dir = os.path.join(l_dir, "md.gro") + trj_dir = os.path.join(l_dir, "md_red.xtc") U = mda.Universe(top_dir, trj_dir) - bnz.add_system(('water', 'Coulomb', '0000'), U) - assert bnz.keys() == [('water', 'Coulomb', '0000')] + bnz.add_system(("water", "Coulomb", "0000"), U) + assert bnz.keys() == [("water", "Coulomb", "0000")] assert bnz._num_systems == 1 assert bnz.__repr__() == "" assert len(bnz) == 1 - bnz.pop(('water', 'Coulomb', '0000')) + bnz.pop(("water", "Coulomb", "0000")) assert bnz._num_systems == 0 assert len(bnz) == 0 def test_select_atoms(self): - Sim = Ensemble(dirname=self.tmpdir.name, solvents=['water']) - solute = Sim.select_atoms('not resname SOL') + Sim = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) + solute = Sim.select_atoms("not resname SOL") assert len(solute) == 7 for k in solute.keys(): assert len(solute[k]) == 42 def test_select_systems(self): - Sim = Ensemble(dirname=self.tmpdir.name, solvents=['water']) - Sel1 = Sim.select_systems(keys=[('water', 'Coulomb', '0000'), - ('water', 'VDW', '0500')]) - assert Sel1.keys() == [('water', 'Coulomb', '0000'), - ('water', 'VDW', '0500')] - Sel2 = Sim.select_systems(solvents=['water'], interactions=['Coulomb'], - lambdas=['0000', '1000']) - assert Sel2.keys() == [('water', 'Coulomb', '0000'), - ('water', 'Coulomb', '1000')] - Sel3 = Sim.select_systems(solvents=['water'], interactions=['VDW'], - lambda_range=[0, 1]) + Sim = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) + Sel1 = Sim.select_systems( + keys=[("water", "Coulomb", "0000"), ("water", "VDW", "0500")] + ) + assert Sel1.keys() == [("water", "Coulomb", "0000"), ("water", "VDW", "0500")] + Sel2 = Sim.select_systems( + solvents=["water"], interactions=["Coulomb"], lambdas=["0000", "1000"] + ) + assert Sel2.keys() == [ + ("water", "Coulomb", "0000"), + ("water", "Coulomb", "1000"), + ] + Sel3 = Sim.select_systems( + solvents=["water"], interactions=["VDW"], lambda_range=[0, 1] + ) diff = set(Sel3.keys()) ^ set(ensemble_keys[3:]) assert not diff def test_ensemble_ag_methods(self): - Solv_system = Ensemble(dirname=self.tmpdir.name, solvents=['water']) - Sol1 = Solv_system.select_atoms('resname SOL') - Sol2 = Sol1.select_atoms('resid 2') + Solv_system = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) + Sol1 = Solv_system.select_atoms("resname SOL") + Sol2 = Sol1.select_atoms("resid 2") Sol2_pos = Sol2.positions() assert len(Sol2_pos) > 0 for k in Sol2_pos: assert np.shape(Sol2_pos[k]) == (3, 3) assert not Sol1 == Sol2 assert isinstance(Sol2, EnsembleAtomGroup) - assert Sol2 == Sol1.select_atoms('resid 2') + assert Sol2 == Sol1.select_atoms("resid 2") assert ensemble_keys.sort() == Sol1.ensemble.keys().sort() - Sol1._groups.pop(('water', 'Coulomb', '0000')) + Sol1._groups.pop(("water", "Coulomb", "0000")) Sol1._keys = Sol1._groups.keys() assert not Sol1 == Sol2 - pos2 = Sol2.positions(keys=[('water', 'Coulomb', '0000')]) - assert np.shape(pos2[('water', 'Coulomb', '0000')]) == (3, 3) + pos2 = Sol2.positions(keys=[("water", "Coulomb", "0000")]) + assert np.shape(pos2[("water", "Coulomb", "0000")]) == (3, 3) def test_ensemble_init_exception(self): with pytest.raises(FileNotFoundError): - Ens = Ensemble(dirname='foo') + Ens = Ensemble(dirname="foo") def test_ensemble_build_exceptions(self): with pytest.raises(NoDataError): - ens = Ensemble(self.tmpdir.name, solvents=['test_solv']) + ens = Ensemble(self.tmpdir.name, solvents=["test_solv"]) def test_ensemble_selection_error(self): - ens = Ensemble(dirname=self.tmpdir.name, solvents=['water']) - sel1 = ens.select_atoms('resid 1') + ens = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) + sel1 = ens.select_atoms("resid 1") with pytest.raises(SelectionError): - ens.select_atoms('foo') + ens.select_atoms("foo") with pytest.raises(SelectionError): - sel1.select_atoms('foo') + sel1.select_atoms("foo") def test_ensemble_analysis(self): class TestAnalysis(EnsembleAnalysis): @@ -140,12 +150,12 @@ def _single_universe(self): self.key_list.append(self._key) def _single_frame(self): - assert len(self._system.select_atoms('not resname SOL')) == 42 + assert len(self._system.select_atoms("not resname SOL")) == 42 def _conclude_universe(self): assert self.n_frames == self.stop - Sim = Ensemble(dirname=self.tmpdir.name, solvents=['water']) + Sim = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) TestRun = TestAnalysis(Sim).run(start=0, step=1, stop=10) assert Sim.keys() == TestRun.key_list @@ -159,7 +169,7 @@ def __init__(self, test_ensemble): def _single_universe(self): pass - Sim = Ensemble(dirname=self.tmpdir.name, solvents=['water']) + Sim = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) TestRun = TestAnalysis(Sim) with pytest.raises(NotImplementedError): @@ -175,21 +185,21 @@ def __init__(self, test_ensemble): def _single_frame(self): pass - Sim = Ensemble(dirname=self.tmpdir.name, solvents=['water']) + Sim = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) TestRun = TestAnalysis(Sim) with pytest.raises(NotImplementedError): TestRun._single_universe() def test_value_error(self): - ens = Ensemble(dirname=self.tmpdir.name, solvents=['water']) + ens = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) copy_ens = Ensemble() copy_ens._ensemble_dir = self.tmpdir.name for k in ens.keys(): copy_ens.add_system(k, ens[k]) - dh1 = ens.select_atoms('name C4 or name C17 or name S2 or name N3') - dh2 = copy_ens.select_atoms('name C4 or name C17 or name S2 or name N3') - dh3 = ens.select_atoms('name C4 or name C17 or name S2 or name N3') - dh4 = ens.select_atoms('name C4 or name C17 or name S2 or name N3') + dh1 = ens.select_atoms("name C4 or name C17 or name S2 or name N3") + dh2 = copy_ens.select_atoms("name C4 or name C17 or name S2 or name N3") + dh3 = ens.select_atoms("name C4 or name C17 or name S2 or name N3") + dh4 = ens.select_atoms("name C4 or name C17 or name S2 or name N3") with pytest.raises(ValueError): dh_run = DihedralAnalysis([dh1, dh2, dh4, dh3]).run(start=0, stop=4, step=1) diff --git a/mdpow/tests/test_equilibration_script.py b/mdpow/tests/test_equilibration_script.py index 5b9c4cb9..c0b5f752 100644 --- a/mdpow/tests/test_equilibration_script.py +++ b/mdpow/tests/test_equilibration_script.py @@ -10,36 +10,52 @@ from . import RESOURCES + class TestEquilibriumScript(object): def setup_method(self): self.tmpdir = td.TempDir() self.old_path = os.getcwd() self.resources = RESOURCES - m = pybol.Manifest(str(self.resources / 'manifest.yml')) - m.assemble('base', self.tmpdir.name) + m = pybol.Manifest(str(self.resources / "manifest.yml")) + m.assemble("base", self.tmpdir.name) def teardown_method(self): self.tmpdir.dissolve() def _run_equil(self, solvent, dirname): - cfg = get_configuration('runinput.yml') + cfg = get_configuration("runinput.yml") self.S = equilibrium_simulation(cfg, solvent, dirname=dirname) def test_basic_run(self): with in_dir(self.tmpdir.name, create=False): try: - self._run_equil('water','benzene/') + self._run_equil("water", "benzene/") self._new_structures() except Exception as err: - raise AssertionError('Equilibration simulations failed with exception:\n{0}'.format(str(err))) + raise AssertionError( + "Equilibration simulations failed with exception:\n{0}".format( + str(err) + ) + ) def _new_structures(self): assert os.path.exists( - os.path.join(self.tmpdir.name, - 'benzene', 'Equilibrium', 'water', 'em', 'em.pdb')) + os.path.join( + self.tmpdir.name, "benzene", "Equilibrium", "water", "em", "em.pdb" + ) + ) assert os.path.exists( - os.path.join(self.tmpdir.name, - 'benzene', 'Equilibrium', 'water', 'solvation', 'solvated.gro')) + os.path.join( + self.tmpdir.name, + "benzene", + "Equilibrium", + "water", + "solvation", + "solvated.gro", + ) + ) assert os.path.exists( - os.path.join(self.tmpdir.name, - 'benzene', 'Equilibrium', 'water', 'MD_NPT', 'md.gro')) + os.path.join( + self.tmpdir.name, "benzene", "Equilibrium", "water", "MD_NPT", "md.gro" + ) + ) diff --git a/mdpow/tests/test_fep.py b/mdpow/tests/test_fep.py index 6bda9432..d9447ac3 100644 --- a/mdpow/tests/test_fep.py +++ b/mdpow/tests/test_fep.py @@ -10,46 +10,72 @@ import mdpow.config import mdpow.fep + def test_molar_to_nm3(): assert_almost_equal(mdpow.fep.molar_to_nm3(1.5), 0.9033212684) assert_almost_equal(mdpow.fep.molar_to_nm3(55.5), 33.42288693449999) + def test_bar_to_kJmolnm3(): assert_almost_equal(mdpow.fep.bar_to_kJmolnm3(1.0), 0.0602214179) + def test_kcal_to_kJ(): assert_almost_equal(mdpow.fep.kcal_to_kJ(10.0), 41.84) + def test_kJ_to_kcal(): assert_almost_equal(mdpow.fep.kJ_to_kcal(41.84), 10.0) + def test_kBT_to_kJ(): - ref = constants.N_A*constants.k*1e-3 + ref = constants.N_A * constants.k * 1e-3 assert_almost_equal(mdpow.fep.kBT_to_kJ(1, 1), ref) + class TestFEPschedule(object): reference = { - 'VDW': - {'couple_lambda0': 'vdw', - 'couple_lambda1': 'none', - 'description': 'decoupling vdw --> none', - 'label': 'VDW', - 'lambdas': np.array([ 0. , 0.05, 0.1 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.65, - 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1. ]), - 'name': 'vdw', - 'sc_alpha': 0.5, - 'sc_power': 1, - 'sc_sigma': 0.3}, - 'Coulomb': - {'couple_lambda0': 'vdw-q', - 'couple_lambda1': 'vdw', - 'description': 'dis-charging vdw+q --> vdw', - 'label': 'Coul', - 'lambdas': np.array([ 0. , 0.25, 0.5 , 0.75, 1. ]), - 'name': 'Coulomb', - 'sc_alpha': 0, - 'sc_power': 1, - 'sc_sigma': 0.3} + "VDW": { + "couple_lambda0": "vdw", + "couple_lambda1": "none", + "description": "decoupling vdw --> none", + "label": "VDW", + "lambdas": np.array( + [ + 0.0, + 0.05, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.65, + 0.7, + 0.75, + 0.8, + 0.85, + 0.9, + 0.95, + 1.0, + ] + ), + "name": "vdw", + "sc_alpha": 0.5, + "sc_power": 1, + "sc_sigma": 0.3, + }, + "Coulomb": { + "couple_lambda0": "vdw-q", + "couple_lambda1": "vdw", + "description": "dis-charging vdw+q --> vdw", + "label": "Coul", + "lambdas": np.array([0.0, 0.25, 0.5, 0.75, 1.0]), + "name": "Coulomb", + "sc_alpha": 0, + "sc_power": 1, + "sc_sigma": 0.3, + }, } @pytest.fixture @@ -58,14 +84,14 @@ def cfg(self): return mdpow.config.get_configuration() def test_VDW(self, cfg): - return self._test_schedule(cfg, 'VDW') + return self._test_schedule(cfg, "VDW") def test_Coulomb(self, cfg): - return self._test_schedule(cfg, 'Coulomb') + return self._test_schedule(cfg, "Coulomb") - @pytest.mark.parametrize('component', ['VDW', 'Coulomb']) + @pytest.mark.parametrize("component", ["VDW", "Coulomb"]) def test_copy(self, cfg, component): - section = 'FEP_schedule_{0}'.format(component) + section = "FEP_schedule_{0}".format(component) schedule = deepcopy(mdpow.fep.FEPschedule.load(cfg, section)) reference = self.reference[component] @@ -77,30 +103,39 @@ def test_copy(self, cfg, component): for k in schedule.keys(): if k == "lambdas": - assert_array_almost_equal(schedule[k], reference[k], - err_msg="FEP schedule {0} mismatch".format(k)) + assert_array_almost_equal( + schedule[k], + reference[k], + err_msg="FEP schedule {0} mismatch".format(k), + ) else: - assert schedule[k] == reference[k], \ - "mismatch between loaded FEP schedule entry {0} and reference".format(k) + assert ( + schedule[k] == reference[k] + ), "mismatch between loaded FEP schedule entry {0} and reference".format( + k + ) - @pytest.mark.parametrize('component', ['VDW', 'Coulomb']) + @pytest.mark.parametrize("component", ["VDW", "Coulomb"]) def test_write(self, cfg, component, tmp_path): filename = tmp_path / "cfg.yaml" cfg.write(filename) new_cfg = mdpow.config.get_configuration(filename) assert new_cfg.conf == cfg.conf - @pytest.mark.parametrize("x,ref", [ - ("test", False), - ([1, 1, 2, 3], True), - ({1, 2, 3}, True), - (None, False), - ]) + @pytest.mark.parametrize( + "x,ref", + [ + ("test", False), + ([1, 1, 2, 3], True), + ({1, 2, 3}, True), + (None, False), + ], + ) def test_iterable(self, x, ref): assert mdpow.config.iterable(x) == ref def _test_schedule(self, cfg, component): - section = 'FEP_schedule_{0}'.format(component) + section = "FEP_schedule_{0}".format(component) schedule = mdpow.fep.FEPschedule.load(cfg, section) reference = self.reference[component] @@ -112,20 +147,26 @@ def _test_schedule(self, cfg, component): for k in schedule.keys(): if k == "lambdas": - assert_array_almost_equal(schedule[k], reference[k], - err_msg="FEP schedule {0} mismatch".format(k)) + assert_array_almost_equal( + schedule[k], + reference[k], + err_msg="FEP schedule {0} mismatch".format(k), + ) else: - assert schedule[k] == reference[k], \ - "mismatch between loaded FEP schedule entry {0} and reference".format(k) + assert ( + schedule[k] == reference[k] + ), "mismatch between loaded FEP schedule entry {0} and reference".format( + k + ) def test_skip_empty_entries(self, cfg, section="FEP_schedule_Coulomb"): # remove some entries - del cfg.conf[section]['name'] # string - del cfg.conf[section]['lambdas'] # array + del cfg.conf[section]["name"] # string + del cfg.conf[section]["lambdas"] # array with pytest.warns(mdpow.config.NoOptionWarning): schedule = mdpow.fep.FEPschedule.load(cfg, section) - assert schedule['label'] == "Coul" - assert schedule['sc_power'] == 1 - assert 'name' not in schedule - assert 'lambdas' not in schedule + assert schedule["label"] == "Coul" + assert schedule["sc_power"] == 1 + assert "name" not in schedule + assert "lambdas" not in schedule diff --git a/mdpow/tests/test_fep_analysis.py b/mdpow/tests/test_fep_analysis.py index 8619e54e..e9bf3981 100644 --- a/mdpow/tests/test_fep_analysis.py +++ b/mdpow/tests/test_fep_analysis.py @@ -8,14 +8,16 @@ from . import STATES + @pytest.fixture def FEP_dir(tmpdir): - name = STATES['FEP'].basename # a py.path.local - fepdir = tmpdir.join(name) # a py.path.local - shutil.copytree(STATES['FEP'].strpath, fepdir.strpath) + name = STATES["FEP"].basename # a py.path.local + fepdir = tmpdir.join(name) # a py.path.local + shutil.copytree(STATES["FEP"].strpath, fepdir.strpath) assert os.path.isdir(fepdir.strpath) return fepdir + def setup_Ghyd(fepdir): basedir = fepdir.join("benzene") gsolv = basedir.join("FEP", "water", "Gsolv.fep") @@ -30,25 +32,31 @@ def setup_Ghyd(fepdir): def Ghyd(FEP_dir): return setup_Ghyd(FEP_dir) + @pytest.fixture def Ghyd_other(FEP_dir): return setup_Ghyd(FEP_dir) + def test_load_Ghyd(Ghyd): assert isinstance(Ghyd, mdpow.fep.Ghyd) -@pytest.mark.parametrize("kwargs", ( - {}, - {'SI': True, 'estimator': 'alchemlyb', 'method': 'TI'}, - {'SI': False, 'estimator': 'alchemlyb', 'method': 'MBAR', 'force': False}, - {'SI': False, 'estimator': 'mdpow', 'method': 'TI', 'force': True}, + +@pytest.mark.parametrize( + "kwargs", + ( + {}, + {"SI": True, "estimator": "alchemlyb", "method": "TI"}, + {"SI": False, "estimator": "alchemlyb", "method": "MBAR", "force": False}, + {"SI": False, "estimator": "mdpow", "method": "TI", "force": True}, ), - ids=["defaults", - "SI=True, estimator='alchemlyb', method='TI'", - "SI=False, estimator='alchemlyb', method='MBAR', force=False", - "SI=False, estimator='mdpow', method='TI', force=True", - ] - ) + ids=[ + "defaults", + "SI=True, estimator='alchemlyb', method='TI'", + "SI=False, estimator='alchemlyb', method='MBAR', force=False", + "SI=False, estimator='mdpow', method='TI', force=True", + ], +) def test_p_transfer(Ghyd, Ghyd_other, kwargs): """Test transfer water <-> water with same data.""" G1 = Ghyd @@ -59,13 +67,16 @@ def test_p_transfer(Ghyd, Ghyd_other, kwargs): assert transferFE == pytest.approx(0.0) assert logPow == pytest.approx(0.0) + def test_p_transfer_wrong_method(Ghyd, Ghyd_other): """Test transfer water <-> water with same data.""" G1 = Ghyd G2 = Ghyd_other - with pytest.raises(ValueError, - match="Method MBAR is not implemented in MDPOW, use estimator='alchemlyb'"): + with pytest.raises( + ValueError, + match="Method MBAR is not implemented in MDPOW, use estimator='alchemlyb'", + ): mdpow.fep.p_transfer(G1, G2, estimator="mdpow", method="MBAR") @@ -73,10 +84,12 @@ def test_pOW_error(Ghyd, Ghyd_other): with pytest.raises(ValueError): mdpow.fep.pOW(Ghyd, Ghyd_other) + def test_pCW_error(Ghyd, Ghyd_other): with pytest.raises(ValueError): mdpow.fep.pCW(Ghyd, Ghyd_other) + def test_pTW_error(Ghyd, Ghyd_other): with pytest.raises(ValueError): mdpow.fep.pTW(Ghyd, Ghyd_other) diff --git a/mdpow/tests/test_fep_script.py b/mdpow/tests/test_fep_script.py index f7e001f8..ff4c29b5 100644 --- a/mdpow/tests/test_fep_script.py +++ b/mdpow/tests/test_fep_script.py @@ -13,27 +13,30 @@ from . import RESOURCES + class TestFEPScript(object): def setup_method(self): self.tmpdir = td.TempDir() self.old_path = os.getcwd() self.resources = RESOURCES - self.m = pybol.Manifest(str(self.resources / 'manifest.yml')) - self.m.assemble('md_npt', self.tmpdir.name) + self.m = pybol.Manifest(str(self.resources / "manifest.yml")) + self.m.assemble("md_npt", self.tmpdir.name) - S = Simulation(filename=os.path.join( - self.tmpdir.name, 'benzene', 'water.simulation')) - S.make_paths_relative(prefix=os.path.join( - self.tmpdir.name,'benzene', 'Equilibrium', 'water')) - S.dirs.includes = os.path.join(self.tmpdir.name, 'top') + S = Simulation( + filename=os.path.join(self.tmpdir.name, "benzene", "water.simulation") + ) + S.make_paths_relative( + prefix=os.path.join(self.tmpdir.name, "benzene", "Equilibrium", "water") + ) + S.dirs.includes = os.path.join(self.tmpdir.name, "top") S.save() def teardown_method(self): self.tmpdir.dissolve() def _run_fep(self, solvent, dirname): - cfg = get_configuration('runinput.yml') - if gromacs.release.startswith('4'): + cfg = get_configuration("runinput.yml") + if gromacs.release.startswith("4"): # For GROMACS 4.6.5 explicitly enable the group neighbor # scheme by creating a copy of the MDP file in the current # directory with MDP cutoff-scheme option changed. The local @@ -41,15 +44,19 @@ def _run_fep(self, solvent, dirname): # in the templates. fep_mdp_name = cfg.get("FEP", "mdp") mdp = mdpow.config.get_template(fep_mdp_name) - gromacs.cbook.edit_mdp(mdp, - new_mdp=os.path.join(os.getcwd(), fep_mdp_name), - cutoff_scheme="group") - self.savefilename = fep_simulation(cfg, solvent, dirname=dirname, exit_on_error=False) + gromacs.cbook.edit_mdp( + mdp, + new_mdp=os.path.join(os.getcwd(), fep_mdp_name), + cutoff_scheme="group", + ) + self.savefilename = fep_simulation( + cfg, solvent, dirname=dirname, exit_on_error=False + ) def test_default_run(self, capsys): with gromacs.utilities.in_dir(self.tmpdir.name, create=False): try: - self._run_fep('water', 'benzene/') + self._run_fep("water", "benzene/") except Exception as err: # check if log file contains # 'There are 2 perturbed non-bonded pair interactions beyond the pair-list cutoff' @@ -58,12 +65,22 @@ def test_default_run(self, capsys): # internal problems.) captured = capsys.readouterr() for line in captured: - if "perturbed non-bonded pair interactions beyond the pair-list cutoff'" in line: - pytest.xfail("Stochastic test failure (perturbed non-bonded beyond cutoff). " - "Still works as expected, see #175.") + if ( + "perturbed non-bonded pair interactions beyond the pair-list cutoff'" + in line + ): + pytest.xfail( + "Stochastic test failure (perturbed non-bonded beyond cutoff). " + "Still works as expected, see #175." + ) break else: - raise AssertionError('FEP simulations failed with exception:\n{0}'.format(str(err))) + raise AssertionError( + "FEP simulations failed with exception:\n{0}".format(str(err)) + ) - assert os.path.exists(os.path.join(self.tmpdir.name, - 'benzene', 'FEP', 'water', 'VDW', '0000', 'md.edr')) + assert os.path.exists( + os.path.join( + self.tmpdir.name, "benzene", "FEP", "water", "VDW", "0000", "md.edr" + ) + ) diff --git a/mdpow/tests/test_filelock.py b/mdpow/tests/test_filelock.py index 1cbed583..a337cdae 100644 --- a/mdpow/tests/test_filelock.py +++ b/mdpow/tests/test_filelock.py @@ -4,6 +4,7 @@ from mdpow import filelock + def test_FileLock_acquire(tmpdir, filename="test.txt"): with tmpdir.as_cwd(): with filelock.FileLock(filename, timeout=2) as lock: @@ -11,6 +12,7 @@ def test_FileLock_acquire(tmpdir, filename="test.txt"): f.write("Humpty Dumpty sat on a wall") assert os.path.exists(filename) + def test_FileLock_lock(filename="test.txt"): with filelock.FileLock(filename, timeout=2) as lock: with pytest.raises(filelock.FileLockException): diff --git a/mdpow/tests/test_forcefields.py b/mdpow/tests/test_forcefields.py index 5e95be83..bc5c3112 100644 --- a/mdpow/tests/test_forcefields.py +++ b/mdpow/tests/test_forcefields.py @@ -6,8 +6,9 @@ import mdpow.forcefields # currently supported -WATERMODELS = ('tip4p', 'tip3p', 'tip5p', 'spc', 'spce', 'm24', 'tip4pd') -SOLVENTMODELS = ('water', 'cyclohexane', 'octanol', 'toluene') +WATERMODELS = ("tip4p", "tip3p", "tip5p", "spc", "spce", "m24", "tip4pd") +SOLVENTMODELS = ("water", "cyclohexane", "octanol", "toluene") + class TestIncludedForcefiels(object): @staticmethod @@ -18,35 +19,40 @@ def test_default_forcefield(): def test_oplsaa_itp(): assert "ffoplsaa.itp" in mdpow.config.topfiles assert mdpow.config.topfiles["ffoplsaa.itp"].endswith( - os.path.join('mdpow', 'top', 'ffoplsaa.itp')) + os.path.join("mdpow", "top", "ffoplsaa.itp") + ) @staticmethod def test_oplsaa_ff(): assert "oplsaa.ff" in mdpow.config.topfiles assert mdpow.config.topfiles["oplsaa.ff"].endswith( - os.path.join('mdpow', 'top', 'oplsaa.ff')) + os.path.join("mdpow", "top", "oplsaa.ff") + ) + class TestIncludedSolvents(object): solvents = { - 'tip4p': { - 'tip4p.itp': os.path.join('mdpow', 'top', 'oplsaa.ff', 'tip4p.itp'), - 'tip4p.gro': os.path.join('mdpow', 'top', 'tip4p.gro') + "tip4p": { + "tip4p.itp": os.path.join("mdpow", "top", "oplsaa.ff", "tip4p.itp"), + "tip4p.gro": os.path.join("mdpow", "top", "tip4p.gro"), }, - 'octanol': { - '1oct.gro': os.path.join('mdpow', 'top', '1oct.gro'), - '1oct.itp': os.path.join('mdpow', 'top', 'oplsaa.ff', '1oct.itp'), + "octanol": { + "1oct.gro": os.path.join("mdpow", "top", "1oct.gro"), + "1oct.itp": os.path.join("mdpow", "top", "oplsaa.ff", "1oct.itp"), }, - 'cyclohexane': { - '1cyclo.gro': os.path.join('mdpow', 'top', '1cyclo.gro'), - '1cyclo.itp': os.path.join('mdpow', 'top', 'oplsaa.ff', '1cyclo.itp') + "cyclohexane": { + "1cyclo.gro": os.path.join("mdpow", "top", "1cyclo.gro"), + "1cyclo.itp": os.path.join("mdpow", "top", "oplsaa.ff", "1cyclo.itp"), }, - 'toluene': { - '1tol_oplsaa.gro': os.path.join('mdpow', 'top', '1tol_oplsaa.gro'), - '1tol.itp': os.path.join('mdpow', 'top', 'oplsaa.ff', '1tol.itp') + "toluene": { + "1tol_oplsaa.gro": os.path.join("mdpow", "top", "1tol_oplsaa.gro"), + "1tol.itp": os.path.join("mdpow", "top", "oplsaa.ff", "1tol.itp"), }, } - @pytest.mark.parametrize("solvent_name", ["tip4p", "octanol", "cyclohexane", "toluene"]) + @pytest.mark.parametrize( + "solvent_name", ["tip4p", "octanol", "cyclohexane", "toluene"] + ) def test_solvent(self, solvent_name): solvent = self.solvents[solvent_name] for filename, path in solvent.items(): @@ -60,12 +66,14 @@ def test_default_water_model(): assert mdpow.forcefields.DEFAULT_WATER_MODEL == "tip4p" def test_watermodelsdat(self): - included_watermodels = open(mdpow.config.topfiles['watermodels.dat']).read() - for line, ref in zip(self._simple_line_parser(mdpow.forcefields.GMX_WATERMODELS_DAT), - self._simple_line_parser(included_watermodels)): + included_watermodels = open(mdpow.config.topfiles["watermodels.dat"]).read() + for line, ref in zip( + self._simple_line_parser(mdpow.forcefields.GMX_WATERMODELS_DAT), + self._simple_line_parser(included_watermodels), + ): assert line.strip() == ref.strip() - @pytest.mark.parametrize('identifier', WATERMODELS) + @pytest.mark.parametrize("identifier", WATERMODELS) def test_gromacs_water_models(self, identifier): models = mdpow.forcefields.GROMACS_WATER_MODELS @@ -75,33 +83,37 @@ def test_gromacs_water_models(self, identifier): assert model.itp in mdpow.config.topfiles assert model.coordinates in mdpow.config.topfiles - @staticmethod def _simple_line_parser(string): - for line in string.split('\n'): + for line in string.split("\n"): line = line.strip() - if not line or line.startswith('#'): + if not line or line.startswith("#"): continue yield line @staticmethod def test_get_water_model(): model = mdpow.forcefields.DEFAULT_WATER_MODEL - assert (mdpow.forcefields.get_water_model(model) is - mdpow.forcefields.GROMACS_WATER_MODELS[model]) + assert ( + mdpow.forcefields.get_water_model(model) + is mdpow.forcefields.GROMACS_WATER_MODELS[model] + ) @staticmethod def test_get_water_model_ValueError(): with pytest.raises(ValueError): mdpow.forcefields.get_water_model("The Jabberwock is an imaginary beast.") + class TestSolventModels(object): @staticmethod def test_get_solvent_default_water(): model = "water" defaultmodel = mdpow.forcefields.DEFAULT_WATER_MODEL - assert (mdpow.forcefields.get_solvent_model(model) is - mdpow.forcefields.GROMACS_WATER_MODELS[defaultmodel]) + assert ( + mdpow.forcefields.get_solvent_model(model) + is mdpow.forcefields.GROMACS_WATER_MODELS[defaultmodel] + ) @staticmethod def test_get_solvent_model_ValueError(): @@ -110,46 +122,57 @@ def test_get_solvent_model_ValueError(): @staticmethod def test_get_solvent_cyclohexane(): - model = 'cyclohexane' - forcefield = 'OPLS-AA' - assert (mdpow.forcefields.get_solvent_model(model) is - mdpow.forcefields.GROMACS_SOLVENT_MODELS[forcefield][model]) - - @pytest.mark.parametrize("forcefield", ['OPLS-AA', 'CHARMM', 'AMBER']) + model = "cyclohexane" + forcefield = "OPLS-AA" + assert ( + mdpow.forcefields.get_solvent_model(model) + is mdpow.forcefields.GROMACS_SOLVENT_MODELS[forcefield][model] + ) + + @pytest.mark.parametrize("forcefield", ["OPLS-AA", "CHARMM", "AMBER"]) def test_get_solvent_octanol(self, forcefield): - model = 'octanol' - assert (mdpow.forcefields.get_solvent_model(model, forcefield=forcefield) is - mdpow.forcefields.GROMACS_SOLVENT_MODELS[forcefield][model]) + model = "octanol" + assert ( + mdpow.forcefields.get_solvent_model(model, forcefield=forcefield) + is mdpow.forcefields.GROMACS_SOLVENT_MODELS[forcefield][model] + ) - @pytest.mark.parametrize("forcefield", ['OPLS-AA', 'CHARMM', 'AMBER']) + @pytest.mark.parametrize("forcefield", ["OPLS-AA", "CHARMM", "AMBER"]) def test_get_solvent_wetoctanol(self, forcefield): - model = 'wetoctanol' - assert (mdpow.forcefields.get_solvent_model(model, forcefield=forcefield) is - mdpow.forcefields.GROMACS_SOLVENT_MODELS[forcefield][model]) + model = "wetoctanol" + assert ( + mdpow.forcefields.get_solvent_model(model, forcefield=forcefield) + is mdpow.forcefields.GROMACS_SOLVENT_MODELS[forcefield][model] + ) - @pytest.mark.parametrize("forcefield", ['OPLS-AA', 'CHARMM', 'AMBER']) + @pytest.mark.parametrize("forcefield", ["OPLS-AA", "CHARMM", "AMBER"]) def test_get_solvent_toluene(self, forcefield): - model = 'toluene' - assert (mdpow.forcefields.get_solvent_model(model, forcefield=forcefield) is - mdpow.forcefields.GROMACS_SOLVENT_MODELS[forcefield][model]) + model = "toluene" + assert ( + mdpow.forcefields.get_solvent_model(model, forcefield=forcefield) + is mdpow.forcefields.GROMACS_SOLVENT_MODELS[forcefield][model] + ) @staticmethod def test_get_solvent_identifier_default_is_water(): - assert (mdpow.forcefields.get_solvent_identifier('water') is - mdpow.forcefields.DEFAULT_WATER_MODEL) + assert ( + mdpow.forcefields.get_solvent_identifier("water") + is mdpow.forcefields.DEFAULT_WATER_MODEL + ) @pytest.mark.parametrize("model", WATERMODELS) def test_get_solvent_identifier_water(self, model): - assert mdpow.forcefields.get_solvent_identifier('water', model=model) is model + assert mdpow.forcefields.get_solvent_identifier("water", model=model) is model - @pytest.mark.parametrize('solvent', - [model for model in SOLVENTMODELS if model != "water"]) - @pytest.mark.parametrize('model', [None, "Jabberwock model"]) + @pytest.mark.parametrize( + "solvent", [model for model in SOLVENTMODELS if model != "water"] + ) + @pytest.mark.parametrize("model", [None, "Jabberwock model"]) def test_get_solvent_identifier_solvents(self, solvent, model): # The model="Jabberwock model" checks that "model" is properly ignored. assert mdpow.forcefields.get_solvent_identifier(solvent, model=model) is solvent @staticmethod def test_get_solvent_identifier_None(): - assert mdpow.forcefields.get_solvent_identifier('water', model="foobar") is None - assert mdpow.forcefields.get_solvent_identifier('benzene') is None + assert mdpow.forcefields.get_solvent_identifier("water", model="foobar") is None + assert mdpow.forcefields.get_solvent_identifier("benzene") is None diff --git a/mdpow/tests/test_run.py b/mdpow/tests/test_run.py index 8c97d0d4..d774d31b 100644 --- a/mdpow/tests/test_run.py +++ b/mdpow/tests/test_run.py @@ -16,29 +16,33 @@ def cfg(): return mdpow.config.get_configuration() -@pytest.mark.parametrize("protocols", [["energy_minimize"], - ["MD_relaxed"], - ["MD_NPT", "FEP"], - ]) +@pytest.mark.parametrize( + "protocols", + [ + ["energy_minimize"], + ["MD_relaxed"], + ["MD_NPT", "FEP"], + ], +) def test_get_mdp_files(cfg, protocols): mdpfiles = mdpow.run.get_mdp_files(cfg, protocols) assert len(mdpfiles) == len(protocols) assert set(mdpfiles.keys()) == set(protocols) assert all([mdp.endswith(".mdp") for mdp in mdpfiles.values()]) -@pytest.mark.parametrize("protocols", [["FEP"], - ["Jabberwocky", "Mad Hatter"] - ]) + +@pytest.mark.parametrize("protocols", [["FEP"], ["Jabberwocky", "Mad Hatter"]]) def test_get_mdp_files_None(cfg, protocols): # modify cfg - del cfg.conf['FEP']['mdp'] + del cfg.conf["FEP"]["mdp"] with pytest.warns(mdpow.config.NoOptionWarning): mdpfiles = mdpow.run.get_mdp_files(cfg, ["FEP"]) assert mdpfiles == {} + def test_get_mdp_files_ValueError(cfg): # modify cfg with a non-existant file - cfg.conf['FEP']['mdp'] = "smoke_and_mirror.mdp" + cfg.conf["FEP"]["mdp"] = "smoke_and_mirror.mdp" with pytest.raises(ValueError): mdpow.run.get_mdp_files(cfg, ["MD_NPT", "FEP"]) @@ -47,12 +51,15 @@ def test_get_mdp_files_ValueError(cfg): # and methods that would return failures, so that we don't have to # actually run simulations. + @pytest.fixture def MDrunner_failure(monkeypatch): # mock gromacs.run.MDrunner: pretend that the simulation failed def mock_run_check(*args, **kwargs): return False - monkeypatch.setattr(gromacs.run.MDrunner, 'run_check', mock_run_check) + + monkeypatch.setattr(gromacs.run.MDrunner, "run_check", mock_run_check) + # mock gromacs.run.check_mdrun_success(logfile) @pytest.fixture @@ -60,7 +67,9 @@ def check_mdrun_success_failure(monkeypatch): # pretend simulation has not completed as indicated by log file def mock_check_mdrun_success(arg): return False - monkeypatch.setattr(gromacs.run, 'check_mdrun_success', mock_check_mdrun_success) + + monkeypatch.setattr(gromacs.run, "check_mdrun_success", mock_check_mdrun_success) + @pytest.fixture def check_mdrun_success_none(monkeypatch): @@ -68,54 +77,71 @@ def check_mdrun_success_none(monkeypatch): # check and check_mdrun_success() returns None def mock_check_mdrun_success(arg): return None - monkeypatch.setattr(gromacs.run, 'check_mdrun_success', mock_check_mdrun_success) - -@pytest.mark.parametrize("runlocal,exception", - [(True, gromacs.exceptions.GromacsError), - (False, gromacs.exceptions.MissingDataError)]) -def test_runMD_or_exit_exceptions(runlocal, exception, cfg, MDrunner_failure, check_mdrun_success_failure, - monkeypatch, tmpdir): - params = {'deffnm': 'md'} + monkeypatch.setattr(gromacs.run, "check_mdrun_success", mock_check_mdrun_success) + + +@pytest.mark.parametrize( + "runlocal,exception", + [ + (True, gromacs.exceptions.GromacsError), + (False, gromacs.exceptions.MissingDataError), + ], +) +def test_runMD_or_exit_exceptions( + runlocal, + exception, + cfg, + MDrunner_failure, + check_mdrun_success_failure, + monkeypatch, + tmpdir, +): + params = {"deffnm": "md"} S = {} def mock_getboolean(*args): return runlocal + monkeypatch.setattr(cfg, "getboolean", mock_getboolean) with pytest.raises(exception): - mdpow.run.runMD_or_exit(S, "FEP", params, cfg, - dirname=str(tmpdir), - exit_on_error=False) + mdpow.run.runMD_or_exit( + S, "FEP", params, cfg, dirname=str(tmpdir), exit_on_error=False + ) + def test_runMD_or_exit_None(cfg, check_mdrun_success_none, monkeypatch, tmpdir): # special case where runlocal=False and no simulation has been run # so there's no logfile to check and check_mdrun_success() returns # None - params = {'deffnm': 'md'} + params = {"deffnm": "md"} S = {} def mock_getboolean(*args): return False + monkeypatch.setattr(cfg, "getboolean", mock_getboolean) - return_value = mdpow.run.runMD_or_exit(S, "FEP", params, cfg, - dirname=str(tmpdir), - exit_on_error=False) + return_value = mdpow.run.runMD_or_exit( + S, "FEP", params, cfg, dirname=str(tmpdir), exit_on_error=False + ) assert return_value is None @pytest.mark.parametrize("runlocal", [True, False]) -def test_runMD_or_exit_SysExit(runlocal, cfg, MDrunner_failure, check_mdrun_success_failure, - monkeypatch, tmpdir): - params = {'deffnm': 'md'} +def test_runMD_or_exit_SysExit( + runlocal, cfg, MDrunner_failure, check_mdrun_success_failure, monkeypatch, tmpdir +): + params = {"deffnm": "md"} S = {} def mock_getboolean(*args): return runlocal + monkeypatch.setattr(cfg, "getboolean", mock_getboolean) with pytest.raises(SystemExit): - mdpow.run.runMD_or_exit(S, "FEP", params, cfg, - dirname=str(tmpdir), - exit_on_error=True) + mdpow.run.runMD_or_exit( + S, "FEP", params, cfg, dirname=str(tmpdir), exit_on_error=True + ) diff --git a/mdpow/tests/test_runinput.py b/mdpow/tests/test_runinput.py index dc416279..cbf4db9e 100644 --- a/mdpow/tests/test_runinput.py +++ b/mdpow/tests/test_runinput.py @@ -7,98 +7,85 @@ from mdpow import config + class TestAlteredConfig(object): params_altered = { - 'DEFAULT': - { - 'qscripts':'custom.sh' - }, - 'setup': - { - 'name': 'custom_name', - 'molecule': 'some_molecule_ident', - 'itp': 'some_molecules_itp', - 'structure': 'some_molecules_structure', - 'watermodel': 'spce', - 'maxwarn': 2, - 'distance': None, # default (not in this input file) - 'boxtype': 'dodecahedron', # default (not in this input file) - 'gromacsoutput': True, - }, - 'energy_minimize': - { - 'mdp': 'custom_emin.mdp' - }, - 'MD_relaxed': - { - 'qscript': 'MD_relaxed.sge', - 'runtime': 10, - 'runlocal': False, - 'mdp': 'MD_relaxed_NPT_opls.mdp' - }, - 'MD_NPT': - { - 'qscript': 'MD_NPT.sge', - 'runtime': 10000, - 'runlocal': True, - 'mdp': 'MD_NPT_opls.mdp', - }, - 'FEP': - { - 'method': 'TI', - 'qscript': 'FEP.sge', - 'runtime': 1000, - 'runlocal': True, - 'maxwarn': 3, - 'mdp': 'fep_custom_opls.mdp' - }, - 'FEP_schedule_Coulomb': - { - 'name': 'Coul', - 'description': 'transition_1', - 'label': 'coulomb', - 'couple_lambda0': 'vdw', - 'couple_lambda1': 'vdw-q', - 'sc_alpha': 0.2, - 'sc_power': 2, - 'sc_sigma': 0.6, - 'lambdas': np.array([ 0 , 0.1 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1. ]) - }, - 'FEP_schedule_VDW': - { - 'name': 'VANDERWAALS', - 'description': 'transition_2', - 'label': 'vanderwaals', - 'couple_lambda0': 'none', - 'couple_lambda1': 'vdw', - 'sc_alpha': 0, - 'sc_power': 3, - 'sc_sigma': 0.1, - 'lambdas': np.array([ 0.0 , 0.25 , 0.50 , 0.75 , 1 ]) - }, - 'mdrun': - { - 'stepout': 12000, - 'verbose': False, - 'nice': 12, - 'maxthreads': 1 - } + "DEFAULT": {"qscripts": "custom.sh"}, + "setup": { + "name": "custom_name", + "molecule": "some_molecule_ident", + "itp": "some_molecules_itp", + "structure": "some_molecules_structure", + "watermodel": "spce", + "maxwarn": 2, + "distance": None, # default (not in this input file) + "boxtype": "dodecahedron", # default (not in this input file) + "gromacsoutput": True, + }, + "energy_minimize": {"mdp": "custom_emin.mdp"}, + "MD_relaxed": { + "qscript": "MD_relaxed.sge", + "runtime": 10, + "runlocal": False, + "mdp": "MD_relaxed_NPT_opls.mdp", + }, + "MD_NPT": { + "qscript": "MD_NPT.sge", + "runtime": 10000, + "runlocal": True, + "mdp": "MD_NPT_opls.mdp", + }, + "FEP": { + "method": "TI", + "qscript": "FEP.sge", + "runtime": 1000, + "runlocal": True, + "maxwarn": 3, + "mdp": "fep_custom_opls.mdp", + }, + "FEP_schedule_Coulomb": { + "name": "Coul", + "description": "transition_1", + "label": "coulomb", + "couple_lambda0": "vdw", + "couple_lambda1": "vdw-q", + "sc_alpha": 0.2, + "sc_power": 2, + "sc_sigma": 0.6, + "lambdas": np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]), + }, + "FEP_schedule_VDW": { + "name": "VANDERWAALS", + "description": "transition_2", + "label": "vanderwaals", + "couple_lambda0": "none", + "couple_lambda1": "vdw", + "sc_alpha": 0, + "sc_power": 3, + "sc_sigma": 0.1, + "lambdas": np.array([0.0, 0.25, 0.50, 0.75, 1]), + }, + "mdrun": {"stepout": 12000, "verbose": False, "nice": 12, "maxthreads": 1}, } @pytest.fixture def cfg(self): - return config.get_configuration(str(CONFIGURATIONS / 'altered_runinput.yml')) + return config.get_configuration(str(CONFIGURATIONS / "altered_runinput.yml")) def _test_section(self, cfg, section): section_dict = self.params_altered[section] for k in section_dict.keys(): - if k == 'lambdas': - parsed = np.array([float(x.strip()) for x in cfg.get(section,k).split(",")]) - assert_array_almost_equal(parsed, section_dict[k], - err_msg="mismatch in lambdas") + if k == "lambdas": + parsed = np.array( + [float(x.strip()) for x in cfg.get(section, k).split(",")] + ) + assert_array_almost_equal( + parsed, section_dict[k], err_msg="mismatch in lambdas" + ) else: - assert cfg.get(section,k) == section_dict[k], \ - "mismatch in {}:{}".format(section,k) + assert ( + cfg.get(section, k) == section_dict[k] + ), "mismatch in {}:{}".format(section, k) def test_DEFAULT(self, cfg): return self._test_section(cfg, "DEFAULT") diff --git a/mdpow/tests/test_solv_shell.py b/mdpow/tests/test_solv_shell.py index ac79ef13..0f020c98 100644 --- a/mdpow/tests/test_solv_shell.py +++ b/mdpow/tests/test_solv_shell.py @@ -18,40 +18,47 @@ from . import RESOURCES, MANIFEST + class TestSolvShell(object): def setup_method(self): self.tmpdir = td.TempDir() - self.m = pybol.Manifest(str(RESOURCES / 'manifest.yml')) - self.m.assemble('example_FEP', self.tmpdir.name) - self.ens = Ensemble(dirname=self.tmpdir.name, solvents=['water']) - self.solute = self.ens.select_atoms('not resname SOL') - self.solvent = self.ens.select_atoms('resname SOL and name OW') + self.m = pybol.Manifest(str(RESOURCES / "manifest.yml")) + self.m.assemble("example_FEP", self.tmpdir.name) + self.ens = Ensemble(dirname=self.tmpdir.name, solvents=["water"]) + self.solute = self.ens.select_atoms("not resname SOL") + self.solvent = self.ens.select_atoms("resname SOL and name OW") def teardown_method(self): self.tmpdir.dissolve() def test_dataframe(self): - solv = SolvationAnalysis(self.solute, self.solvent, [1.2]).run(start=0, stop=4, step=1) + solv = SolvationAnalysis(self.solute, self.solvent, [1.2]).run( + start=0, stop=4, step=1 + ) assert isinstance(solv.results, pd.DataFrame) - for d in solv.results['distance']: + for d in solv.results["distance"]: assert d == 1.2 - for s in solv.results['solvent']: - assert s == 'water' - for i in solv.results['interaction'][:12]: - assert i == 'Coulomb' + for s in solv.results["solvent"]: + assert s == "water" + for i in solv.results["interaction"][:12]: + assert i == "Coulomb" - @pytest.fixture(scope='class') + @pytest.fixture(scope="class") def solvation_analysis_list_results(self): self.setup_method() # Won't have solute and solvent without this - return SolvationAnalysis(self.solute, self.solvent, [2, 10]).run(start=0, stop=4, step=1) + return SolvationAnalysis(self.solute, self.solvent, [2, 10]).run( + start=0, stop=4, step=1 + ) - @pytest.mark.parametrize("d,ref_mean,ref_std", [(2, 1.10714285,2.07604166), - (10, 5306.89285714, 129.16720594)]) + @pytest.mark.parametrize( + "d,ref_mean,ref_std", + [(2, 1.10714285, 2.07604166), (10, 5306.89285714, 129.16720594)], + ) def test_selection(self, solvation_analysis_list_results, d, ref_mean, ref_std): results = solvation_analysis_list_results.results - mean = np.mean(results.loc[results['distance'] == d]['N_solvent']) - std = np.std(results.loc[results['distance'] == d]['N_solvent']) + mean = np.mean(results.loc[results["distance"] == d]["N_solvent"]) + std = np.std(results.loc[results["distance"] == d]["N_solvent"]) assert mean == pytest.approx(ref_mean) assert std == pytest.approx(ref_std) diff --git a/mdpow/tests/test_solvation.py b/mdpow/tests/test_solvation.py index d1d0c9e4..d2d75404 100644 --- a/mdpow/tests/test_solvation.py +++ b/mdpow/tests/test_solvation.py @@ -10,58 +10,67 @@ from . import RESOURCES -sims = {"water" : equil.WaterSimulation, - "octanol" : equil.OctanolSimulation, - "cyclohexane" : equil.CyclohexaneSimulation, - "wetoctanol" : equil.WetOctanolSimulation, - "toluene": equil.TolueneSimulation, - } - -test_file = {"OPLS-AA": 'benzene.itp', - "CHARMM": 'benzene_charmm.itp', - "AMBER": 'benzene_amber.itp', - } +sims = { + "water": equil.WaterSimulation, + "octanol": equil.OctanolSimulation, + "cyclohexane": equil.CyclohexaneSimulation, + "wetoctanol": equil.WetOctanolSimulation, + "toluene": equil.TolueneSimulation, +} + +test_file = { + "OPLS-AA": "benzene.itp", + "CHARMM": "benzene_charmm.itp", + "AMBER": "benzene_amber.itp", +} + @pytest.fixture def setup(tmpdir): - newdir = tmpdir.mkdir('resources') - files = ['benzene.pdb', 'benzene.itp', - 'benzene_charmm.itp', 'benzene_amber.itp'] + newdir = tmpdir.mkdir("resources") + files = ["benzene.pdb", "benzene.itp", "benzene_charmm.itp", "benzene_amber.itp"] for f in files: - orig = RESOURCES / 'molecules' / 'benzene' / f + orig = RESOURCES / "molecules" / "benzene" / f shutil.copy(orig, newdir.dirname) return newdir.dirname -def solvation(setup, solvent, ff='OPLS-AA'): + +def solvation(setup, solvent, ff="OPLS-AA"): itp = test_file[ff] with in_dir(setup, create=False): try: - S = sims[solvent](molecule='BNZ', forcefield=ff) + S = sims[solvent](molecule="BNZ", forcefield=ff) S.topology(itp=itp) - S.solvate(struct='benzene.pdb') + S.solvate(struct="benzene.pdb") except Exception: - raise AssertionError('Solvation failed.') + raise AssertionError("Solvation failed.") + -@pytest.mark.parametrize("ff", ['OPLS-AA', 'CHARMM', 'AMBER']) +@pytest.mark.parametrize("ff", ["OPLS-AA", "CHARMM", "AMBER"]) def test_solvation_water(setup, ff): solvation(setup, "water", ff) -@pytest.mark.parametrize("ff", ['OPLS-AA', 'CHARMM', 'AMBER']) + +@pytest.mark.parametrize("ff", ["OPLS-AA", "CHARMM", "AMBER"]) def test_solvation_octanol(setup, ff): solvation(setup, "octanol", ff) + def test_solvation_cyclohexane(setup): solvation(setup, "cyclohexane") -@pytest.mark.parametrize("ff", ['OPLS-AA', 'AMBER']) + +@pytest.mark.parametrize("ff", ["OPLS-AA", "AMBER"]) def test_solvation_toluene(setup, ff): solvation(setup, "toluene", ff) -@pytest.mark.xfail(gromacs.release.startswith('4') - or gromacs.release.startswith('5') - or gromacs.release.startswith('2016'), - reason="GROMACS < 2018 cannot easily work with mixed solvents " - "(see issue #111)") -@pytest.mark.parametrize("ff", ['OPLS-AA', 'CHARMM', 'AMBER']) + +@pytest.mark.xfail( + gromacs.release.startswith("4") + or gromacs.release.startswith("5") + or gromacs.release.startswith("2016"), + reason="GROMACS < 2018 cannot easily work with mixed solvents " "(see issue #111)", +) +@pytest.mark.parametrize("ff", ["OPLS-AA", "CHARMM", "AMBER"]) def test_solvation_wetoctanol(setup, ff): solvation(setup, "wetoctanol", ff) diff --git a/mdpow/tests/test_version.py b/mdpow/tests/test_version.py index 187f27d4..28968abd 100644 --- a/mdpow/tests/test_version.py +++ b/mdpow/tests/test_version.py @@ -2,16 +2,17 @@ import mdpow + @pytest.fixture(scope="module") def version(): return mdpow.__version__ + def test_version_string(version): assert isinstance(version, str) + def test_version(version): # generic non-empty check because versioneer can provide different # answers depending on VCS status assert version - - diff --git a/mdpow/tests/test_workflows_base.py b/mdpow/tests/test_workflows_base.py index 665e58d3..d46e1080 100644 --- a/mdpow/tests/test_workflows_base.py +++ b/mdpow/tests/test_workflows_base.py @@ -16,10 +16,11 @@ from pkg_resources import resource_filename from mdpow.workflows import base + @pytest.fixture def molname_workflows_directory(tmp_path): m = pybol.Manifest(str(MANIFEST)) - m.assemble('workflows', tmp_path) + m.assemble("workflows", tmp_path) return tmp_path @@ -32,30 +33,38 @@ def universe(request): u.add_TopologyAttr("masses", masses) return u -@pytest.mark.parametrize("universe,elements", - [ - [ - (np.array([12.011, 14.007, 0, 12.011, 35.45, 12.011]), - np.array(["C", "Nx", "DUMMY", "C0S", "Cl123", "C0U"])), - np.array(['C', 'N', 'DUMMY', 'C', 'CL', 'C']) - ], - [ - (np.array([12.011, 14.007, 0, 35.45]), - np.array(["C", "Nx", "DUMMY", "Cl123"])), - np.array(['C', 'N', 'DUMMY', 'CL']) - ], - [ - (np.array([15.999, 0, 40.08, 40.08, 40.08, 24.305, 132.9]), - np.array(["OW", "MW", "C0", "CAL", "CA2+", "MG2+", "CES"])), - np.array(['O', 'DUMMY', 'CA', 'CA', 'CA', 'MG', 'CS']) - ], - [ - (np.array([16, 1e-6, 40.085, 133]), - np.array(["OW", "MW", "CA2+", "CES"])), - np.array(['O', 'DUMMY', 'CA', 'CS']) - ], - ], - indirect=["universe"]) + +@pytest.mark.parametrize( + "universe,elements", + [ + [ + ( + np.array([12.011, 14.007, 0, 12.011, 35.45, 12.011]), + np.array(["C", "Nx", "DUMMY", "C0S", "Cl123", "C0U"]), + ), + np.array(["C", "N", "DUMMY", "C", "CL", "C"]), + ], + [ + ( + np.array([12.011, 14.007, 0, 35.45]), + np.array(["C", "Nx", "DUMMY", "Cl123"]), + ), + np.array(["C", "N", "DUMMY", "CL"]), + ], + [ + ( + np.array([15.999, 0, 40.08, 40.08, 40.08, 24.305, 132.9]), + np.array(["OW", "MW", "C0", "CAL", "CA2+", "MG2+", "CES"]), + ), + np.array(["O", "DUMMY", "CA", "CA", "CA", "MG", "CS"]), + ], + [ + (np.array([16, 1e-6, 40.085, 133]), np.array(["OW", "MW", "CA2+", "CES"])), + np.array(["O", "DUMMY", "CA", "CS"]), + ], + ], + indirect=["universe"], +) def test_guess_elements(universe, elements): u = universe guessed_elements = base.guess_elements(u.atoms) @@ -63,7 +72,6 @@ def test_guess_elements(universe, elements): assert_equal(guessed_elements, elements) - class TestWorkflowsBase(object): @pytest.fixture def SM_tmp_dir(self, molname_workflows_directory): @@ -72,14 +80,13 @@ def SM_tmp_dir(self, molname_workflows_directory): @pytest.fixture def csv_input_data(self): - csv_path = STATES['workflows'] / 'project_paths.csv' + csv_path = STATES["workflows"] / "project_paths.csv" csv_df = pd.read_csv(csv_path).reset_index(drop=True) return csv_path, csv_df @pytest.fixture def test_df_data(self): - test_dict = {'molecule' : ['SM25', 'SM26'], - 'resname' : ['SM25', 'SM26']} + test_dict = {"molecule": ["SM25", "SM26"], "resname": ["SM25", "SM26"]} test_df = pd.DataFrame(test_dict).reset_index(drop=True) return test_df @@ -92,10 +99,10 @@ def test_project_paths(self, test_df_data, project_paths_data): test_df = test_df_data project_paths = project_paths_data - assert project_paths['molecule'][0] == test_df['molecule'][0] - assert project_paths['molecule'][1] == test_df['molecule'][1] - assert project_paths['resname'][0] == test_df['resname'][0] - assert project_paths['resname'][1] == test_df['resname'][1] + assert project_paths["molecule"][0] == test_df["molecule"][0] + assert project_paths["molecule"][1] == test_df["molecule"][1] + assert project_paths["resname"][0] == test_df["resname"][0] + assert project_paths["resname"][1] == test_df["resname"][1] def test_project_paths_csv_input(self, csv_input_data): csv_path, csv_df = csv_input_data @@ -105,39 +112,51 @@ def test_project_paths_csv_input(self, csv_input_data): def test_dihedral_analysis_figdir_requirement(self, project_paths_data, caplog): caplog.clear() - caplog.set_level(logging.ERROR, logger='mdpow.workflows.base') + caplog.set_level(logging.ERROR, logger="mdpow.workflows.base") project_paths = project_paths_data # change resname to match topology (every SAMPL7 resname is 'UNK') # only necessary for this dataset, not necessary for normal use - project_paths['resname'] = 'UNK' - - with pytest.raises(AssertionError, - match="figdir MUST be set, even though it is a kwarg. Will be changed with #244"): + project_paths["resname"] = "UNK" - base.automated_project_analysis(project_paths, solvents=('water',), - ensemble_analysis='DihedralAnalysis') + with pytest.raises( + AssertionError, + match="figdir MUST be set, even though it is a kwarg. Will be changed with #244", + ): + base.automated_project_analysis( + project_paths, solvents=("water",), ensemble_analysis="DihedralAnalysis" + ) - assert 'all analyses completed' in caplog.text, ('automated_dihedral_analysis ' - 'did not iteratively run to completion for the provided project') + assert "all analyses completed" in caplog.text, ( + "automated_dihedral_analysis " + "did not iteratively run to completion for the provided project" + ) def test_automated_project_analysis_KeyError(self, project_paths_data, caplog): caplog.clear() - caplog.set_level(logging.ERROR, logger='mdpow.workflows.base') + caplog.set_level(logging.ERROR, logger="mdpow.workflows.base") project_paths = project_paths_data # change resname to match topology (every SAMPL7 resname is 'UNK') # only necessary for this dataset, not necessary for normal use - project_paths['resname'] = 'UNK' + project_paths["resname"] = "UNK" # test error output when raised - with pytest.raises(KeyError, - match="Invalid ensemble_analysis 'DarthVaderAnalysis'. " - "An EnsembleAnalysis type that corresponds to an existing " - "automated workflow module must be input as a kwarg. ex: " - "ensemble_analysis='DihedralAnalysis'"): - base.automated_project_analysis(project_paths, ensemble_analysis='DarthVaderAnalysis', solvents=('water',)) + with pytest.raises( + KeyError, + match="Invalid ensemble_analysis 'DarthVaderAnalysis'. " + "An EnsembleAnalysis type that corresponds to an existing " + "automated workflow module must be input as a kwarg. ex: " + "ensemble_analysis='DihedralAnalysis'", + ): + base.automated_project_analysis( + project_paths, + ensemble_analysis="DarthVaderAnalysis", + solvents=("water",), + ) # test logger error recording - assert "'DarthVaderAnalysis' is an invalid selection" in caplog.text, ('did not catch incorrect ' - 'key specification for workflows.registry that results in KeyError') + assert "'DarthVaderAnalysis' is an invalid selection" in caplog.text, ( + "did not catch incorrect " + "key specification for workflows.registry that results in KeyError" + ) diff --git a/mdpow/tests/test_workflows_registry.py b/mdpow/tests/test_workflows_registry.py index a1f7b708..50143818 100644 --- a/mdpow/tests/test_workflows_registry.py +++ b/mdpow/tests/test_workflows_registry.py @@ -2,5 +2,6 @@ from mdpow.workflows import registry + def test_registry(): - assert list(registry.registry.keys()) == ['DihedralAnalysis'] + assert list(registry.registry.keys()) == ["DihedralAnalysis"] diff --git a/mdpow/workflows/base.py b/mdpow/workflows/base.py index 8f4ef2e6..93ddb561 100644 --- a/mdpow/workflows/base.py +++ b/mdpow/workflows/base.py @@ -29,123 +29,126 @@ import pandas as pd from MDAnalysis.topology import guessers, tables -logger = logging.getLogger('mdpow.workflows.base') +logger = logging.getLogger("mdpow.workflows.base") + def project_paths(parent_directory=None, csv=None, csv_save_dir=None): """Takes a top directory containing MDPOW projects and determines - the molname, resname, and path, of each MDPOW project within. + the molname, resname, and path, of each MDPOW project within. - Optionally takes a .csv file containing `molname`, `resname`, and - `paths`, in that order. + Optionally takes a .csv file containing `molname`, `resname`, and + `paths`, in that order. - :keywords: + :keywords: - *parent_directory* - the path for the location of the top directory - under which the subdirectories of MDPOW simulation - data exist, additionally creates a 'project_paths.csv' file - for user manipulation of metadata and for future reference + *parent_directory* + the path for the location of the top directory + under which the subdirectories of MDPOW simulation + data exist, additionally creates a 'project_paths.csv' file + for user manipulation of metadata and for future reference - *csv* - .csv file containing the molecule names, resnames, - and paths, in that order, for the MDPOW simulation - data to be iterated over must contain header of the - form: `molecule,resname,path` + *csv* + .csv file containing the molecule names, resnames, + and paths, in that order, for the MDPOW simulation + data to be iterated over must contain header of the + form: `molecule,resname,path` - *csv_save_dir* - optionally provided directory to save .csv file, otherwise, - data will be saved in current working directory + *csv_save_dir* + optionally provided directory to save .csv file, otherwise, + data will be saved in current working directory - :returns: + :returns: - *project_paths* - :class:`pandas.DataFrame` containing MDPOW project metadata + *project_paths* + :class:`pandas.DataFrame` containing MDPOW project metadata - .. rubric:: Example + .. rubric:: Example - Typical Workflow:: + Typical Workflow:: - project_paths = project_paths(parent_directory='/foo/bar/MDPOW_projects') - automated_project_analysis(project_paths) + project_paths = project_paths(parent_directory='/foo/bar/MDPOW_projects') + automated_project_analysis(project_paths) - or:: + or:: - project_paths = project_paths(csv='/foo/bar/MDPOW.csv') - automated_project_analysis(project_paths) + project_paths = project_paths(csv='/foo/bar/MDPOW.csv') + automated_project_analysis(project_paths) """ if parent_directory is not None: - locations = [] - reg_compile = re.compile('FEP') + reg_compile = re.compile("FEP") for dirpath, dirnames, filenames in os.walk(parent_directory): - result = [dirpath.strip() for dirname in dirnames if reg_compile.match(dirname)] + result = [ + dirpath.strip() for dirname in dirnames if reg_compile.match(dirname) + ] if result: locations.append(result[0]) resnames = [] for loc in locations: - res_temp = loc.strip().split('/') + res_temp = loc.strip().split("/") resnames.append(res_temp[-1]) - project_paths = pd.DataFrame( - { - 'molecule': resnames, - 'resname': resnames, - 'path': locations - }).sort_values(by=['molecule', 'resname', 'path'] - ).reset_index(drop=True) + project_paths = ( + pd.DataFrame({"molecule": resnames, "resname": resnames, "path": locations}) + .sort_values(by=["molecule", "resname", "path"]) + .reset_index(drop=True) + ) if csv_save_dir is not None: - project_paths.to_csv(f'{csv_save_dir}/project_paths.csv', index=False) - logger.info(f'project_paths saved under {csv_save_dir}') + project_paths.to_csv(f"{csv_save_dir}/project_paths.csv", index=False) + logger.info(f"project_paths saved under {csv_save_dir}") else: current_directory = os.getcwd() - project_paths.to_csv('project_paths.csv', index=False) - logger.info(f'project_paths saved under {current_directory}') + project_paths.to_csv("project_paths.csv", index=False) + logger.info(f"project_paths saved under {current_directory}") elif csv is not None: locations = pd.read_csv(csv) - project_paths = locations.sort_values(by=['molecule', 'resname', 'path']).reset_index(drop=True) + project_paths = locations.sort_values( + by=["molecule", "resname", "path"] + ).reset_index(drop=True) return project_paths + def automated_project_analysis(project_paths, ensemble_analysis, **kwargs): """Takes a :class:`pandas.DataFrame` created by :func:`~mdpow.workflows.base.project_paths` - and iteratively runs the specified :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` - for each of the projects by running the associated automated workflow - in each project directory returned by :func:`~mdpow.workflows.base.project_paths`. + and iteratively runs the specified :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` + for each of the projects by running the associated automated workflow + in each project directory returned by :func:`~mdpow.workflows.base.project_paths`. - Compatibility with more automated analyses in development. + Compatibility with more automated analyses in development. - :keywords: + :keywords: - *project_paths* - :class:`pandas.DataFrame` that provides paths to MDPOW projects + *project_paths* + :class:`pandas.DataFrame` that provides paths to MDPOW projects - *ensemble_analysis* - name of the :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` - that corresponds to the desired automated workflow module + *ensemble_analysis* + name of the :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` + that corresponds to the desired automated workflow module - *kwargs* - keyword arguments for the supported automated workflows, - see the :mod:`~mdpow.workflows.registry` for all available - workflows and their call signatures + *kwargs* + keyword arguments for the supported automated workflows, + see the :mod:`~mdpow.workflows.registry` for all available + workflows and their call signatures - .. rubric:: Example + .. rubric:: Example - A typical workflow is the automated dihedral analysis from - :mod:`mdpow.workflows.dihedrals`, which applies the *ensemble analysis* - :class:`~mdpow.analysis.dihedral.DihedralAnalysis` to each project. - The :data:`~mdpow.workflows.registry.registry` contains this automated - workflow under the key *"DihedralAnalysis"* and so the automated execution - for all `project_paths` (obtained via :func:`project_paths`) is performed by - passing the specific key to :func:`automated_project_analysis`:: + A typical workflow is the automated dihedral analysis from + :mod:`mdpow.workflows.dihedrals`, which applies the *ensemble analysis* + :class:`~mdpow.analysis.dihedral.DihedralAnalysis` to each project. + The :data:`~mdpow.workflows.registry.registry` contains this automated + workflow under the key *"DihedralAnalysis"* and so the automated execution + for all `project_paths` (obtained via :func:`project_paths`) is performed by + passing the specific key to :func:`automated_project_analysis`:: - project_paths = project_paths(parent_directory='/foo/bar/MDPOW_projects') - automated_project_analysis(project_paths, ensemble_analysis='DihedralAnalysis', **kwargs) + project_paths = project_paths(parent_directory='/foo/bar/MDPOW_projects') + automated_project_analysis(project_paths, ensemble_analysis='DihedralAnalysis', **kwargs) """ # import inside function to avoid circular imports @@ -156,27 +159,34 @@ def automated_project_analysis(project_paths, ensemble_analysis, **kwargs): resname = row.resname dirname = row.path - logger.info(f'starting {molname}') + logger.info(f"starting {molname}") try: - registry[ensemble_analysis](dirname=dirname, resname=resname, molname=molname, **kwargs) - logger.info(f'{molname} completed') + registry[ensemble_analysis]( + dirname=dirname, resname=resname, molname=molname, **kwargs + ) + logger.info(f"{molname} completed") except KeyError as err: - msg = (f"Invalid ensemble_analysis {err}. An EnsembleAnalysis type that corresponds " - "to an existing automated workflow module must be input as a kwarg. " - "ex: ensemble_analysis='DihedralAnalysis'") - logger.error(f'{err} is an invalid selection') + msg = ( + f"Invalid ensemble_analysis {err}. An EnsembleAnalysis type that corresponds " + "to an existing automated workflow module must be input as a kwarg. " + "ex: ensemble_analysis='DihedralAnalysis'" + ) + logger.error(f"{err} is an invalid selection") raise KeyError(msg) except TypeError as err: - msg = (f"Invalid ensemble_analysis {ensemble_analysis}. An EnsembleAnalysis type that " - "corresponds to an existing automated workflow module must be input as a kwarg. " - "ex: ensemble_analysis='DihedralAnalysis'") - logger.error(f'workflow module for {ensemble_analysis} does not exist yet') + msg = ( + f"Invalid ensemble_analysis {ensemble_analysis}. An EnsembleAnalysis type that " + "corresponds to an existing automated workflow module must be input as a kwarg. " + "ex: ensemble_analysis='DihedralAnalysis'" + ) + logger.error(f"workflow module for {ensemble_analysis} does not exist yet") raise TypeError(msg) - logger.info('all analyses completed') + logger.info("all analyses completed") return + def guess_elements(atoms, rtol=1e-3): """guess elements for atoms from masses @@ -230,13 +240,15 @@ def guess_elements(atoms, rtol=1e-3): problems = np.logical_not(np.isclose(masses, guessed_masses, atol=ATOL, rtol=rtol)) # match only problematic masses against the MDA reference masses - iproblem, ielem = np.nonzero(np.isclose(masses[problems, np.newaxis], mda_masses, - atol=ATOL, rtol=rtol)) + iproblem, ielem = np.nonzero( + np.isclose(masses[problems, np.newaxis], mda_masses, atol=ATOL, rtol=rtol) + ) # We should normally find a match for each problem but just in case, assert and # give some useful information for debugging. - assert len(ielem) == sum(problems),\ - ("Not all masses could be assigned an element, " - f"missing names {set(names[problems]) - set(names[problems][iproblem])}") + assert len(ielem) == sum(problems), ( + "Not all masses could be assigned an element, " + f"missing names {set(names[problems]) - set(names[problems][iproblem])}" + ) guessed_elements[problems] = mda_elements[ielem] diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 61833108..000742eb 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -64,9 +64,9 @@ from .base import guess_elements from ..analysis import ensemble, dihedral -logger = logging.getLogger('mdpow.workflows.dihedrals') +logger = logging.getLogger("mdpow.workflows.dihedrals") -SOLVENTS_DEFAULT = ('water', 'octanol') +SOLVENTS_DEFAULT = ("water", "octanol") """Default solvents are water and octanol: * must match solvents used in project directory @@ -77,7 +77,7 @@ """ -INTERACTIONS_DEFAULT = ('Coulomb', 'VDW') +INTERACTIONS_DEFAULT = ("Coulomb", "VDW") """Default interactions set to Coulomb and VDW: * default values should not be changed @@ -85,7 +85,7 @@ """ -SMARTS_DEFAULT = '[!#1]~[!$(*#*)&!D1]-!@[!$(*#*)&!D1]~[!#1]' +SMARTS_DEFAULT = "[!#1]~[!$(*#*)&!D1]-!@[!$(*#*)&!D1]~[!#1]" """Default SMARTS string to identify relevant dihedral atom groups: * ``[!#1]`` : any atom, not Hydrogen @@ -109,89 +109,90 @@ default value: 190 mm = 718.110236229 pixels """ + def build_universe(dirname, solvents=SOLVENTS_DEFAULT): """Builds :class:`~MDAnalysis.core.universe.Universe` from the - ``./Coulomb/0000`` topology and trajectory of the project for - the first solvent specified. - - Output used by :func:`~mdpow.workflows.dihedrals.rdkit_conversion` - and :func:`~mdpow.workflows.dihedrals.get_atom_indices` to obtain atom indices - for each dihedral atom group. - - :keywords: - - *dirname* - Molecule Simulation directory. Loads simulation files present in - lambda directories into the new instance. With this method for - generating an :class:`~mdpow.analysis.ensemble.Ensemble` the - lambda directories are explored and - :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` - searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, - and .xtc files for trajectory. It will default to using the tpr file - available. - - *solvents* - The default solvents are documented under :data:`SOLVENTS_DEFAULT`. - Normally takes a two-tuple, but analysis is compatible with single solvent selections. - Single solvent analyses will result in a figure with fully filled violins for the single solvent. - - :returns: - - *u* - :class:`~MDAnalysis.core.universe.Universe` object + ``./Coulomb/0000`` topology and trajectory of the project for + the first solvent specified. + + Output used by :func:`~mdpow.workflows.dihedrals.rdkit_conversion` + and :func:`~mdpow.workflows.dihedrals.get_atom_indices` to obtain atom indices + for each dihedral atom group. + + :keywords: + + *dirname* + Molecule Simulation directory. Loads simulation files present in + lambda directories into the new instance. With this method for + generating an :class:`~mdpow.analysis.ensemble.Ensemble` the + lambda directories are explored and + :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` + searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, + and .xtc files for trajectory. It will default to using the tpr file + available. + + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. + + :returns: + + *u* + :class:`~MDAnalysis.core.universe.Universe` object """ path = pathlib.Path(dirname) - topology = path / f'FEP/{solvents[0]}/Coulomb/0000' / 'md.tpr' - trajectory = path / f'FEP/{solvents[0]}/Coulomb/0000' / 'md.xtc' + topology = path / f"FEP/{solvents[0]}/Coulomb/0000" / "md.tpr" + trajectory = path / f"FEP/{solvents[0]}/Coulomb/0000" / "md.xtc" u = mda.Universe(str(topology), str(trajectory)) return u + def rdkit_conversion(u, resname): """Converts the solute, `resname`, of the - :class:`~MDAnalysis.core.universe.Universe` to :class:`rdkit.Chem.rdchem.Mol` object - for use with a SMARTS selection string to identify dihedral atom groups. + :class:`~MDAnalysis.core.universe.Universe` to :class:`rdkit.Chem.rdchem.Mol` object + for use with a SMARTS selection string to identify dihedral atom groups. - Accepts :class:`~MDAnalysis.core.universe.Universe` object made with - :func:`~mdpow.workflows.dihedrals.build_universe` and a `resname` as input. - Uses `resname` to select the solute for conversion by - :class:`~MDAnalysis.converters.RDKit.RDKitConverter` to :class:`rdkit.Chem.rdchem.Mol`, - and will add element attributes for Hydrogen if not listed in the topology, - using :func:`MDAnalysis.topology.guessers.guess_atom_element`. + Accepts :class:`~MDAnalysis.core.universe.Universe` object made with + :func:`~mdpow.workflows.dihedrals.build_universe` and a `resname` as input. + Uses `resname` to select the solute for conversion by + :class:`~MDAnalysis.converters.RDKit.RDKitConverter` to :class:`rdkit.Chem.rdchem.Mol`, + and will add element attributes for Hydrogen if not listed in the topology, + using :func:`MDAnalysis.topology.guessers.guess_atom_element`. - :keywords: + :keywords: - *u* - :class:`~MDAnalysis.core.universe.Universe` object + *u* + :class:`~MDAnalysis.core.universe.Universe` object - *resname* - `resname` for the molecule as defined in - the topology and trajectory + *resname* + `resname` for the molecule as defined in + the topology and trajectory - :returns: + :returns: - *tuple(mol, solute)* - function call returns tuple, see below + *tuple(mol, solute)* + function call returns tuple, see below - *mol* - :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` - *solute* - the :any:`MDAnalysis` `AtomGroup` for the solute + *solute* + the :any:`MDAnalysis` `AtomGroup` for the solute """ - try: - solute = u.select_atoms(f'resname {resname}') - mol = solute.convert_to('RDKIT') + solute = u.select_atoms(f"resname {resname}") + mol = solute.convert_to("RDKIT") except AttributeError: guessed_elements = guess_elements(u.atoms) u.add_TopologyAttr("elements", guessed_elements) - solute = u.select_atoms(f'resname {resname}') - mol = solute.convert_to('RDKIT') + solute = u.select_atoms(f"resname {resname}") + mol = solute.convert_to("RDKIT") rdCoordGen.AddCoords(mol) @@ -200,58 +201,59 @@ def rdkit_conversion(u, resname): return mol, solute + def get_atom_indices(mol, SMARTS=SMARTS_DEFAULT): - '''Uses a SMARTS selection string to identify atom indices - for relevant dihedral atom groups. + """Uses a SMARTS selection string to identify atom indices + for relevant dihedral atom groups. - Requires a :class:`rdkit.Chem.rdchem.Mol` object as input - for the :data:`SMARTS_DEFAULT` kwarg to match patterns to and - identify relevant dihedral atom groups. + Requires a :class:`rdkit.Chem.rdchem.Mol` object as input + for the :data:`SMARTS_DEFAULT` kwarg to match patterns to and + identify relevant dihedral atom groups. - :keywords: + :keywords: - *mol* - :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` - *SMARTS* - The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. + *SMARTS* + The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. - :returns: + :returns: - *atom_indices* - tuple of tuples of indices for each dihedral atom group + *atom_indices* + tuple of tuples of indices for each dihedral atom group - ''' + """ pattern = Chem.MolFromSmarts(SMARTS) atom_indices = mol.GetSubstructMatches(pattern) - + return atom_indices + def get_bond_indices(mol, atom_indices): - '''From the :class:`rdkit.Chem.rdchem.Mol` object, uses - `atom_indices` to determine the indices of the bonds - between those atoms for each dihedral atom group. + """From the :class:`rdkit.Chem.rdchem.Mol` object, uses + `atom_indices` to determine the indices of the bonds + between those atoms for each dihedral atom group. - :keywords: + :keywords: - *mol* - :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` - *atom_indices* - tuple of tuples of indices for each dihedral atom group + *atom_indices* + tuple of tuples of indices for each dihedral atom group - :returns: + :returns: - *bond_indices* - tuple of tuples of indices for the bonds in each dihedral atom group + *bond_indices* + tuple of tuples of indices for the bonds in each dihedral atom group + + """ - ''' - bonds = [] for atom_index in atom_indices: - x = mol.GetBondBetweenAtoms(atom_index[0], atom_index[1]).GetIdx() y = mol.GetBondBetweenAtoms(atom_index[1], atom_index[2]).GetIdx() z = mol.GetBondBetweenAtoms(atom_index[2], atom_index[3]).GetIdx() @@ -263,137 +265,150 @@ def get_bond_indices(mol, atom_indices): return bond_indices + def get_dihedral_groups(solute, atom_indices): - '''Uses the 0-based `atom_indices` of the relevant dihedral atom groups - determined by :func:`~mdpow.workflows.dihedrals.get_atom_indices` - and returns the 1-based index names for each atom in each group. + """Uses the 0-based `atom_indices` of the relevant dihedral atom groups + determined by :func:`~mdpow.workflows.dihedrals.get_atom_indices` + and returns the 1-based index names for each atom in each group. - Requires the `atom_indices` from :func:`~mdpow.workflows.dihedrals.get_atom_indices` - to index the `solute` specified by :func:`~MDAnalysis.core.groups.select_atoms` - and return an array of the names of each atom within its respective - dihedral atom group as identified by the SMARTS selection string. + Requires the `atom_indices` from :func:`~mdpow.workflows.dihedrals.get_atom_indices` + to index the `solute` specified by :func:`~MDAnalysis.core.groups.select_atoms` + and return an array of the names of each atom within its respective + dihedral atom group as identified by the SMARTS selection string. - :keywords: + :keywords: - *solute* - the :any:`MDAnalysis` `AtomGroup` for the solute + *solute* + the :any:`MDAnalysis` `AtomGroup` for the solute - *atom_indices* - tuple of tuples of indices for each dihedral atom group + *atom_indices* + tuple of tuples of indices for each dihedral atom group - :returns: + :returns: - *dihedral_groups* - list of :func:`numpy.array` for atom names in each dihedral atom group + *dihedral_groups* + list of :func:`numpy.array` for atom names in each dihedral atom group - ''' + """ # currently uses RDKit Mol object atom indices to retrieve # atom names from the MDAnalysis solute object # RDKit-MDAnalysis index consistency is currently tested - dihedral_groups = [solute.atoms[list(atom_index)].names for atom_index - in atom_indices] + dihedral_groups = [ + solute.atoms[list(atom_index)].names for atom_index in atom_indices + ] return dihedral_groups + def get_paired_indices(atom_indices, bond_indices, dihedral_groups): - '''Combines `atom_indices` and `bond_indices` in tuples - to be paired with their respective dihedral atom groups. + """Combines `atom_indices` and `bond_indices` in tuples + to be paired with their respective dihedral atom groups. - A dictionary is created with key-value pairs as follows: - `atom_indices` and `bond_indices` are joined in a tuple - as the value, with the key being the respective member - of `dihedral_groups` to facilitate highlighting the - relevant dihedral atom group when generating violin plots. - As an example, `'C1-N2-O3-S4': ((0, 1, 2, 3), (0, 1, 2))`, - would be one key-value pair in the dictionary. + A dictionary is created with key-value pairs as follows: + `atom_indices` and `bond_indices` are joined in a tuple + as the value, with the key being the respective member + of `dihedral_groups` to facilitate highlighting the + relevant dihedral atom group when generating violin plots. + As an example, `'C1-N2-O3-S4': ((0, 1, 2, 3), (0, 1, 2))`, + would be one key-value pair in the dictionary. - :keywords: + :keywords: - *atom_indices* - tuple of tuples of indices for each dihedral atom group + *atom_indices* + tuple of tuples of indices for each dihedral atom group - *bond_indices* - tuple of tuples of indices for the bonds in each dihedral atom group + *bond_indices* + tuple of tuples of indices for the bonds in each dihedral atom group - *dihedral_groups* - list of :func:`numpy.array` for atom names in each dihedral atom group + *dihedral_groups* + list of :func:`numpy.array` for atom names in each dihedral atom group - :returns: + :returns: - *name_index_pairs* - dictionary with key-value pair for dihedral atom group, - atom indices, and bond indices - - ''' + *name_index_pairs* + dictionary with key-value pair for dihedral atom group, + atom indices, and bond indices + + """ - all_dgs = [f'{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}' for dg in dihedral_groups] + all_dgs = [f"{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}" for dg in dihedral_groups] name_index_pairs = {} - name_index_pairs = {all_dgs[i]: (atom_indices[i], bond_indices[i]) for i in range(len(all_dgs))} + name_index_pairs = { + all_dgs[i]: (atom_indices[i], bond_indices[i]) for i in range(len(all_dgs)) + } return name_index_pairs -def dihedral_groups_ensemble(dirname, atom_indices, - solvents=SOLVENTS_DEFAULT, - interactions=INTERACTIONS_DEFAULT, - start=None, stop=None, step=None): - '''Creates one :class:`~mdpow.analysis.ensemble.Ensemble` for the MDPOW - project and runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` - for each dihedral atom group identified by the SMARTS - selection string. - .. seealso:: +def dihedral_groups_ensemble( + dirname, + atom_indices, + solvents=SOLVENTS_DEFAULT, + interactions=INTERACTIONS_DEFAULT, + start=None, + stop=None, + step=None, +): + """Creates one :class:`~mdpow.analysis.ensemble.Ensemble` for the MDPOW + project and runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + for each dihedral atom group identified by the SMARTS + selection string. - :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`, - :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + .. seealso:: - :keywords: + :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`, + :class:`~mdpow.analysis.dihedral.DihedralAnalysis` - *dirname* - Molecule Simulation directory. Loads simulation files present in - lambda directories into the new instance. With this method for - generating an :class:`~mdpow.analysis.ensemble.Ensemble` the - lambda directories are explored and - :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` - searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, - and .xtc files for trajectory. It will default to using the tpr file - available. + :keywords: - *atom_indices* - tuples of atom indices for dihedral atom groups + *dirname* + Molecule Simulation directory. Loads simulation files present in + lambda directories into the new instance. With this method for + generating an :class:`~mdpow.analysis.ensemble.Ensemble` the + lambda directories are explored and + :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` + searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, + and .xtc files for trajectory. It will default to using the tpr file + available. - .. seealso:: :func:`~mdpow.workflows.dihedrals.get_atom_indices`, :data:`SMARTS_DEFAULT` + *atom_indices* + tuples of atom indices for dihedral atom groups - *solvents* - The default solvents are documented under :data:`SOLVENTS_DEFAULT`. - Normally takes a two-tuple, but analysis is compatible with single solvent selections. - Single solvent analyses will result in a figure with fully filled violins for the single solvent. + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_atom_indices`, :data:`SMARTS_DEFAULT` - *interactions* - The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. - *start, stop, step* - arguments passed to :func:`~mdpow.analysis.ensemble.EnsembleAnalysis.run`, - as parameters for iterating through the trajectories of the current ensemble + *interactions* + The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. - .. seealso:: :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` + *start, stop, step* + arguments passed to :func:`~mdpow.analysis.ensemble.EnsembleAnalysis.run`, + as parameters for iterating through the trajectories of the current ensemble - :returns: + .. seealso:: :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` - *df* - :class:`pandas.DataFrame` of :class:`~mdpow.analysis.dihedral.DihedralAnalysis` - results, including all dihedral atom groups for molecule of current project + :returns: - ''' + *df* + :class:`pandas.DataFrame` of :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + results, including all dihedral atom groups for molecule of current project - dih_ens = ensemble.Ensemble(dirname=dirname, - solvents=solvents, - interactions=interactions) + """ + + dih_ens = ensemble.Ensemble( + dirname=dirname, solvents=solvents, interactions=interactions + ) indices = atom_indices - all_dihedrals = [dih_ens.select_atoms(f'index {i[0]}', - f'index {i[1]}', - f'index {i[2]}', - f'index {i[3]}' ) for i in indices] + all_dihedrals = [ + dih_ens.select_atoms( + f"index {i[0]}", f"index {i[1]}", f"index {i[2]}", f"index {i[3]}" + ) + for i in indices + ] da = dihedral.DihedralAnalysis(all_dihedrals) da.run(start=start, stop=stop, step=step) @@ -401,42 +416,42 @@ def dihedral_groups_ensemble(dirname, atom_indices, return df + def save_df(df, df_save_dir, resname, molname=None): - '''Takes a :class:`pandas.DataFrame` of results from - :class:`~mdpow.analysis.dihedral.DihedralAnalysis` - as input before padding the angles to optionaly save the raw - data. + """Takes a :class:`pandas.DataFrame` of results from + :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + as input before padding the angles to optionaly save the raw + data. - Optionally saves results before padding the angles for periodicity - and plotting dihedral angle frequencies as KDE violins - with :func:`~mdpow.workflows.dihedrals.dihedral_violins`. - Given a parent directory, creates subdirectory for molecule, - saves fully sampled, unpadded results :class:`pandas.DataFrame` - as a compressed csv file, default: .csv.bz2. + Optionally saves results before padding the angles for periodicity + and plotting dihedral angle frequencies as KDE violins + with :func:`~mdpow.workflows.dihedrals.dihedral_violins`. + Given a parent directory, creates subdirectory for molecule, + saves fully sampled, unpadded results :class:`pandas.DataFrame` + as a compressed csv file, default: .csv.bz2. - :keywords: + :keywords: - *df* - :class:`pandas.DataFrame` of :class:`~mdpow.analysis.dihedral.DihedralAnalysis` - results, including all dihedral atom groups for molecule of current project + *df* + :class:`pandas.DataFrame` of :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + results, including all dihedral atom groups for molecule of current project - *df_save_dir* - optional, path to the location to save results :class:`pandas.DataFrame` + *df_save_dir* + optional, path to the location to save results :class:`pandas.DataFrame` - *resname* - `resname` for the molecule as defined in - the topology and trajectory + *resname* + `resname` for the molecule as defined in + the topology and trajectory - *molname* - molecule name to be used for labelling - plots, if different from `resname` + *molname* + molecule name to be used for labelling + plots, if different from `resname` - ''' + """ - df = df.sort_values(by=["selection", - "solvent", - "interaction", - "lambda"]).reset_index(drop=True) + df = df.sort_values( + by=["selection", "solvent", "interaction", "lambda"] + ).reset_index(drop=True) if molname is None: molname = resname @@ -446,10 +461,10 @@ def save_df(df, df_save_dir, resname, molname=None): os.mkdir(newdir) # time and compression level can be adjusted as kwargs - df.to_csv(f'{newdir}/{molname}_full_df.csv.bz2', - index=False, compression='bz2') + df.to_csv(f"{newdir}/{molname}_full_df.csv.bz2", index=False, compression="bz2") + + logger.info(f"Results DataFrame saved as {newdir}/{molname}_full_df.csv.bz2") - logger.info(f'Results DataFrame saved as {newdir}/{molname}_full_df.csv.bz2') def get_initial_dihedrals(df): '''Obtain the initial dihedral values for each atom group @@ -459,38 +474,38 @@ def get_initial_dihedrals(df): return def periodic_angle_padding(df, padding=45): - '''Pads the angles from the results :class:`~pandas.DataFrame` - to maintain periodicity in the violin plots. + """Pads the angles from the results :class:`~pandas.DataFrame` + to maintain periodicity in the violin plots. - Takes a :class:`pandas.DataFrame` of results from - :class:`~mdpow.analysis.dihedral.DihedralAnalysis` - or :func:`~mdpow.workflows.dihedrals.dihedral_groups_ensemble` - as input and pads the angles to maintain periodicity - for properly plotting dihedral angle frequencies as KDE violins - with :func:`~mdpow.workflows.dihedrals.dihedral_violins` and - :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins`. - Creates two new :class:`pandas.DataFrame` based on the - `padding` value specified, pads the angle values, concatenates - all three :class:`pandas.DataFrame`, maintaining original data and - adding padded values, and returns new augmented :class:`pandas.DataFrame`. + Takes a :class:`pandas.DataFrame` of results from + :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + or :func:`~mdpow.workflows.dihedrals.dihedral_groups_ensemble` + as input and pads the angles to maintain periodicity + for properly plotting dihedral angle frequencies as KDE violins + with :func:`~mdpow.workflows.dihedrals.dihedral_violins` and + :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins`. + Creates two new :class:`pandas.DataFrame` based on the + `padding` value specified, pads the angle values, concatenates + all three :class:`pandas.DataFrame`, maintaining original data and + adding padded values, and returns new augmented :class:`pandas.DataFrame`. - :keywords: + :keywords: - *df* - :class:`pandas.DataFrame` of :class:`~mdpow.analysis.dihedral.DihedralAnalysis` - results, including all dihedral atom groups for molecule of current project + *df* + :class:`pandas.DataFrame` of :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + results, including all dihedral atom groups for molecule of current project - *padding* - value in degrees to specify angle augmentation threshold - default: 45 + *padding* + value in degrees to specify angle augmentation threshold + default: 45 - :returns: + :returns: - *df_aug* - augmented results :class:`pandas.DataFrame` containing - padded dihedral angles as specified by `padding` + *df_aug* + augmented results :class:`pandas.DataFrame` containing + padded dihedral angles as specified by `padding` - ''' + """ df1 = df[df.dihedral > 180 - padding].copy(deep=True) df1.dihedral -= 360 @@ -500,132 +515,159 @@ def periodic_angle_padding(df, padding=45): return df_aug + def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): - '''Plots kernel density estimates (KDE) of dihedral angle frequencies for - one dihedral atom group as violin plots, using as input the augmented - :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle_padding`. + """Plots kernel density estimates (KDE) of dihedral angle frequencies for + one dihedral atom group as violin plots, using as input the augmented + :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle_padding`. - Output is converted to SVG by :func:`~mdpow.workflows.dihedrals.build_svg` - and final output is saved as PDF by :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` + Output is converted to SVG by :func:`~mdpow.workflows.dihedrals.build_svg` + and final output is saved as PDF by :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` - :keywords: + :keywords: - *df* - augmented results :class:`pandas.DataFrame` from - :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` + *df* + augmented results :class:`pandas.DataFrame` from + :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` - *width* - width of the violin element (>1 overlaps) - default: 0.9 + *width* + width of the violin element (>1 overlaps) + default: 0.9 - *solvents* - The default solvents are documented under :data:`SOLVENTS_DEFAULT`. - Normally takes a two-tuple, but analysis is compatible with single solvent selections. - Single solvent analyses will result in a figure with fully filled violins for the single solvent. + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. - *plot_title* - generated by :func:`~mdpow.workflows.dihedrals.build_svg` using - `molname`, `dihedral_groups`, `atom_indices`, and `interactions` - in this order and format: f'{molname}, {name[0]} {a} | ''{col_name}' + *plot_title* + generated by :func:`~mdpow.workflows.dihedrals.build_svg` using + `molname`, `dihedral_groups`, `atom_indices`, and `interactions` + in this order and format: f'{molname}, {name[0]} {a} | ''{col_name}' - :returns: + :returns: - *g* - returns a :class:`seaborn.FacetGrid` object containing a violin plot of the - kernel density estimates (KDE) of the dihedral angle frequencies for each - dihedral atom group identified by :data:`SMARTS_DEFAULT` + *g* + returns a :class:`seaborn.FacetGrid` object containing a violin plot of the + kernel density estimates (KDE) of the dihedral angle frequencies for each + dihedral atom group identified by :data:`SMARTS_DEFAULT` - ''' + """ - df['lambda'] = df['lambda'].astype('float') / 1000 - df = df.sort_values(by=["selection", - "solvent", - "interaction", - "lambda"]).reset_index(drop=True) + df["lambda"] = df["lambda"].astype("float") / 1000 + df = df.sort_values( + by=["selection", "solvent", "interaction", "lambda"] + ).reset_index(drop=True) - width_ratios = [len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) + 1, - len(df[df['interaction'] == "VDW"]["lambda"].unique()), - len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] + width_ratios = [ + len(df[df["interaction"] == "Coulomb"]["lambda"].unique()) + 1, + len(df[df["interaction"] == "VDW"]["lambda"].unique()), + len(df[df["interaction"] == "Coulomb"]["lambda"].unique()) - 1, + ] # Usage in Jupyter causes matplotlib figure object output, not the completed figure # Upcoming fix in issue #260 - assert 0 < len(solvents) < 3, "one or two solvents must be specified, otherwise SOLVENTS_DEFAULT is used" + assert ( + 0 < len(solvents) < 3 + ), "one or two solvents must be specified, otherwise SOLVENTS_DEFAULT is used" split = len(solvents) > 1 - g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", - kind="violin", split=split, width=width, inner=None, cut=0, - linewidth=0.5, - hue_order=list(solvents), col_order=["Coulomb", "VDW", "Structure"], - sharex=False, sharey=True, - height=3.0, aspect=2.0, - facet_kws={'ylim': (-180, 180), - 'gridspec_kws': {'width_ratios': width_ratios, - } - } - ) + g = sns.catplot( + data=df, + x="lambda", + y="dihedral", + hue="solvent", + col="interaction", + kind="violin", + split=split, + width=width, + inner=None, + cut=0, + linewidth=0.5, + hue_order=list(solvents), + col_order=["Coulomb", "VDW", "Structure"], + sharex=False, + sharey=True, + height=3.0, + aspect=2.0, + facet_kws={ + "ylim": (-180, 180), + "gridspec_kws": { + "width_ratios": width_ratios, + }, + }, + ) g.set_xlabels(r"$\lambda$") g.set_ylabels(r"dihedral angle $\phi$") g.despine(offset=5) - axC = g.axes_dict['Coulomb'] + axC = g.axes_dict["Coulomb"] axC.yaxis.set_major_locator(plt.matplotlib.ticker.MultipleLocator(60)) axC.yaxis.set_minor_locator(plt.matplotlib.ticker.MultipleLocator(30)) - axC.yaxis.set_major_formatter(plt.matplotlib.ticker.FormatStrFormatter(r"$%g^\circ$")) + axC.yaxis.set_major_formatter( + plt.matplotlib.ticker.FormatStrFormatter(r"$%g^\circ$") + ) - axV = g.axes_dict['VDW'] + axV = g.axes_dict["VDW"] axV.yaxis.set_visible(False) axV.spines["left"].set_visible(False) - axIM = g.axes_dict['Structure'] - axIM.axis('off') + axIM = g.axes_dict["Structure"] + axIM.axis("off") g.set_titles(plot_title) return g -def build_svg(mol, molname, name_index_pairs, atom_group_selection, - solvents=SOLVENTS_DEFAULT, width=0.9): - '''Converts and combines figure components into an - SVG object to be converted and saved as a publication - quality PDF. - :keywords: +def build_svg( + mol, + molname, + name_index_pairs, + atom_group_selection, + solvents=SOLVENTS_DEFAULT, + width=0.9, +): + """Converts and combines figure components into an + SVG object to be converted and saved as a publication + quality PDF. - *mol* - :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + :keywords: - *molname* - molecule name to be used for labelling - plots, if different from `resname` - (in this case, carried over from an upstream - decision between the two) + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` - *name_index_pairs* - dictionary with key-value pair for dihedral atom group, - atom indices, and bond indices + *molname* + molecule name to be used for labelling + plots, if different from `resname` + (in this case, carried over from an upstream + decision between the two) - .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` + *name_index_pairs* + dictionary with key-value pair for dihedral atom group, + atom indices, and bond indices - *atom_group_selection* - `name` of each section in the `groupby` series of atom group selections + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` - .. seealso:: :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` + *atom_group_selection* + `name` of each section in the `groupby` series of atom group selections - *solvents* - The default solvents are documented under :data:`SOLVENTS_DEFAULT`. - Normally takes a two-tuple, but analysis is compatible with single solvent selections. - Single solvent analyses will result in a figure with fully filled violins for the single solvent. + .. seealso:: :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` - *width* - width of the violin element (>1 overlaps) - default: 0.9 + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. - :returns: + *width* + width of the violin element (>1 overlaps) + default: 0.9 - *fig* - :mod:`svgutils` SVG figure object + :returns: - ''' + *fig* + :mod:`svgutils` SVG figure object + + """ atom_index = name_index_pairs[atom_group_selection[0]][0] bond_index = name_index_pairs[atom_group_selection[0]][1] @@ -633,14 +675,16 @@ def build_svg(mol, molname, name_index_pairs, atom_group_selection, drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) drawer.DrawMolecule(mol=mol, highlightAtoms=atom_index, highlightBonds=bond_index) drawer.FinishDrawing() - svg = drawer.GetDrawingText().replace('svg:','') + svg = drawer.GetDrawingText().replace("svg:", "") mol_svg = svgutils.transform.fromstring(svg) m = mol_svg.getroot() m.scale(0.0125).moveto(21.8, 0.35) - plot_title = f'{molname}, {atom_group_selection[0]} {atom_index} | ''{col_name}' - plot = dihedral_violins(atom_group_selection[1], width=width, solvents=solvents, plot_title=plot_title) + plot_title = f"{molname}, {atom_group_selection[0]} {atom_index} | " "{col_name}" + plot = dihedral_violins( + atom_group_selection[1], width=width, solvents=solvents, plot_title=plot_title + ) plot_svg = svgutils.transform.from_mpl(plot) p = plot_svg.getroot() @@ -651,65 +695,77 @@ def build_svg(mol, molname, name_index_pairs, atom_group_selection, return fig -def plot_dihedral_violins(df, resname, mol, name_index_pairs, figdir=None, molname=None, - width=0.9, plot_pdf_width=PLOT_WIDTH_DEFAULT, solvents=SOLVENTS_DEFAULT): - '''Coordinates plotting and saving figures for all dihedral atom groups. - - Makes a subdirectory for the current project within the specified - `figdir` using `resname` or `molname` as title and saves production - quality PDFs for each dihedral atom group separately. - .. seealso:: +def plot_dihedral_violins( + df, + resname, + mol, + name_index_pairs, + figdir=None, + molname=None, + width=0.9, + plot_pdf_width=PLOT_WIDTH_DEFAULT, + solvents=SOLVENTS_DEFAULT, +): + """Coordinates plotting and saving figures for all dihedral atom groups. - :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`, - :func:`~mdpow.workflows.dihedrals.dihedral_violins`, - :func:`~mdpow.workflows.dihedrals.build_svg` + Makes a subdirectory for the current project within the specified + `figdir` using `resname` or `molname` as title and saves production + quality PDFs for each dihedral atom group separately. - :keywords: + .. seealso:: - *df* - augmented results :class:`pandas.DataFrame` from - :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` + :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`, + :func:`~mdpow.workflows.dihedrals.dihedral_violins`, + :func:`~mdpow.workflows.dihedrals.build_svg` - *resname* - `resname` for the molecule as defined in - the topology and trajectory + :keywords: - *mol* - :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + *df* + augmented results :class:`pandas.DataFrame` from + :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` - *name_index_pairs* - dictionary with key-value pair for dihedral atom group, - atom indices, and bond indices + *resname* + `resname` for the molecule as defined in + the topology and trajectory - .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` - *figdir* - path to the location to save figures (REQUIRED but marked - as a kwarg for technical reasons; will be changed in #244) + *name_index_pairs* + dictionary with key-value pair for dihedral atom group, + atom indices, and bond indices - *molname* - molecule name to be used for labelling - plots, if different from `resname` + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` - *width* - width of the violin element (>1 overlaps) - default: 0.9 + *figdir* + path to the location to save figures (REQUIRED but marked + as a kwarg for technical reasons; will be changed in #244) - .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_violins` + *molname* + molecule name to be used for labelling + plots, if different from `resname` - *plot_pdf_width* - The default value for width of plot output is described in detail under - :data:`PLOT_WIDTH_DEFAULT`. + *width* + width of the violin element (>1 overlaps) + default: 0.9 - *solvents* - The default solvents are documented under :data:`SOLVENTS_DEFAULT`. - Normally takes a two-tuple, but analysis is compatible with single solvent selections. - Single solvent analyses will result in a figure with fully filled violins for the single solvent. + .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_violins` - ''' + *plot_pdf_width* + The default value for width of plot output is described in detail under + :data:`PLOT_WIDTH_DEFAULT`. + + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. + + """ - assert figdir is not None, "figdir MUST be set, even though it is a kwarg. Will be changed with #244" + assert ( + figdir is not None + ), "figdir MUST be set, even though it is a kwarg. Will be changed with #244" if molname is None: molname = resname @@ -725,17 +781,25 @@ def plot_dihedral_violins(df, resname, mol, name_index_pairs, figdir=None, molna pdf_list = [] for name in section: - - fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, name_index_pairs=name_index_pairs, - solvents=solvents, width=width) + fig = build_svg( + mol=mol, + molname=molname, + atom_group_selection=name, + name_index_pairs=name_index_pairs, + solvents=solvents, + width=width, + ) figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" if figdir is not None: - plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), - output_width=plot_pdf_width_px) - + plot_pdf = cairosvg.svg2pdf( + bytestring=fig.tostr(), + write_to=str(figfile), + output_width=plot_pdf_width_px, + ) + # add PDF for each dihedral atom group to all_PDFs list - pdf_list.append(f'{figfile}') + pdf_list.append(f"{figfile}") logger.info(f"Figure saved as {figfile}") @@ -747,141 +811,169 @@ def plot_dihedral_violins(df, resname, mol, name_index_pairs, figdir=None, molna merger.append(pdf) merger.write(f"{figdir}/{molname}/{molname}_all_figures.pdf") merger.close() - logger.info(f"PDF of combined figures generated and saved as {figdir}/{molname}/{molname}_all_figures.pdf") + logger.info( + f"PDF of combined figures generated and saved as {figdir}/{molname}/{molname}_all_figures.pdf" + ) return None -def automated_dihedral_analysis(dirname, resname, - figdir=None, - # figdir is required and will cause issues if not specified - # figdir=None is a temporary way to satisfy - # workflows base tests until issue #244 is resolved - # because it currently uses a **kwargs convention and the - # positional argument figdir will not carry over nicely - df_save_dir=None, molname=None, - SMARTS=SMARTS_DEFAULT, plot_pdf_width=PLOT_WIDTH_DEFAULT, - dataframe=None, padding=45, width=0.9, - solvents=SOLVENTS_DEFAULT, - interactions=INTERACTIONS_DEFAULT, - start=None, stop=None, step=None): - '''Runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` for a single MDPOW - project and creates violin plots of dihedral angle frequencies for each - relevant dihedral atom group. - - For one MDPOW project, automatically determines all relevant dihedral atom groups - in the molecule, runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` for each group, - pads the dihedral angles to maintain periodicity, creates violin plots of dihedral angle - frequencies (KDEs), and saves publication quality PDF figures for each group, separately. - - Optionally saves all pre-padded :class:`~mdpow.analysis.dihedral.DihedralAnalysis` results - as a single :class:`pandas.DataFrame` in `df_save_dir` provided. - - :keywords: - - *dirname* - Molecule Simulation directory. Loads simulation files present in - lambda directories into the new instance. With this method for - generating an :class:`~mdpow.analysis.ensemble.Ensemble` the - lambda directories are explored and - :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` - searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, - and .xtc files for trajectory. It will default to using the tpr file - available. - - *figdir* - path to the location to save figures (REQUIRED but marked - as a kwarg for technical reasons; will be changed in #244) - - *resname* - `resname` for the molecule as defined in - the topology and trajectory - - *df_save_dir* - optional, path to the location to save results :class:`pandas.DataFrame` - - *molname* - molecule name to be used for labelling - plots, if different from `resname` - - *SMARTS* - The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. - - *plot_pdf_width* - The default value for width of plot output is described in detail under - :data:`PLOT_WIDTH_DEFAULT`. - - *dataframe* - optional, if :class:`~mdpow.analysis.dihedral.DihedralAnalysis` - was done prior, then results :class:`pandas.DataFrame` can be - input to utilize angle padding and violin plotting functionality - - *padding* - value in degrees - default: 45 - - .. seealso:: :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` - - *width* - width of the violin element (>1 overlaps) - default: 0.9 - - .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_violins` - - *solvents* - The default solvents are documented under :data:`SOLVENTS_DEFAULT`. - Normally takes a two-tuple, but analysis is compatible with single solvent selections. - Single solvent analyses will result in a figure with fully filled violins for the single solvent. - - *interactions* - The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. - - *start, stop, step* - arguments passed to :func:`~mdpow.analysis.ensemble.EnsembleAnalysis.run`, - as parameters for iterating through the trajectories of the current ensemble - - .. seealso:: :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` - - .. rubric:: Example - - Typical Workflow:: - - import dihedrals - - dihedrals.automated_dihedral_analysis(dirname='/foo/bar/MDPOW_project_data', - figdir='/foo/bar/MDPOW_figure_directory', - resname='UNK', molname='benzene', - padding=45, width=0.9, - solvents=('water','octanol'), - interactions=('Coulomb','VDW'), - start=0, stop=100, step=10) - ''' + +def automated_dihedral_analysis( + dirname, + resname, + figdir=None, + # figdir is required and will cause issues if not specified + # figdir=None is a temporary way to satisfy + # workflows base tests until issue #244 is resolved + # because it currently uses a **kwargs convention and the + # positional argument figdir will not carry over nicely + df_save_dir=None, + molname=None, + SMARTS=SMARTS_DEFAULT, + plot_pdf_width=PLOT_WIDTH_DEFAULT, + dataframe=None, + padding=45, + width=0.9, + solvents=SOLVENTS_DEFAULT, + interactions=INTERACTIONS_DEFAULT, + start=None, + stop=None, + step=None, +): + """Runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` for a single MDPOW + project and creates violin plots of dihedral angle frequencies for each + relevant dihedral atom group. + + For one MDPOW project, automatically determines all relevant dihedral atom groups + in the molecule, runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` for each group, + pads the dihedral angles to maintain periodicity, creates violin plots of dihedral angle + frequencies (KDEs), and saves publication quality PDF figures for each group, separately. + + Optionally saves all pre-padded :class:`~mdpow.analysis.dihedral.DihedralAnalysis` results + as a single :class:`pandas.DataFrame` in `df_save_dir` provided. + + :keywords: + + *dirname* + Molecule Simulation directory. Loads simulation files present in + lambda directories into the new instance. With this method for + generating an :class:`~mdpow.analysis.ensemble.Ensemble` the + lambda directories are explored and + :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` + searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, + and .xtc files for trajectory. It will default to using the tpr file + available. + + *figdir* + path to the location to save figures (REQUIRED but marked + as a kwarg for technical reasons; will be changed in #244) + + *resname* + `resname` for the molecule as defined in + the topology and trajectory + + *df_save_dir* + optional, path to the location to save results :class:`pandas.DataFrame` + + *molname* + molecule name to be used for labelling + plots, if different from `resname` + + *SMARTS* + The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. + + *plot_pdf_width* + The default value for width of plot output is described in detail under + :data:`PLOT_WIDTH_DEFAULT`. + + *dataframe* + optional, if :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + was done prior, then results :class:`pandas.DataFrame` can be + input to utilize angle padding and violin plotting functionality + + *padding* + value in degrees + default: 45 + + .. seealso:: :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` + + *width* + width of the violin element (>1 overlaps) + default: 0.9 + + .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_violins` + + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. + + *interactions* + The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. + + *start, stop, step* + arguments passed to :func:`~mdpow.analysis.ensemble.EnsembleAnalysis.run`, + as parameters for iterating through the trajectories of the current ensemble + + .. seealso:: :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` + + .. rubric:: Example + + Typical Workflow:: + + import dihedrals + + dihedrals.automated_dihedral_analysis(dirname='/foo/bar/MDPOW_project_data', + figdir='/foo/bar/MDPOW_figure_directory', + resname='UNK', molname='benzene', + padding=45, width=0.9, + solvents=('water','octanol'), + interactions=('Coulomb','VDW'), + start=0, stop=100, step=10) + """ u = build_universe(dirname=dirname, solvents=solvents) mol, solute = rdkit_conversion(u=u, resname=resname) atom_indices = get_atom_indices(mol=mol, SMARTS=SMARTS) bond_indices = get_bond_indices(mol=mol, atom_indices=atom_indices) dihedral_groups = get_dihedral_groups(solute=solute, atom_indices=atom_indices) - name_index_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, - dihedral_groups=dihedral_groups) + name_index_pairs = get_paired_indices( + atom_indices=atom_indices, + bond_indices=bond_indices, + dihedral_groups=dihedral_groups, + ) if dataframe is not None: - df = dataframe - logger.info(f'Proceeding with results DataFrame provided.') + logger.info(f"Proceeding with results DataFrame provided.") else: - - df = dihedral_groups_ensemble(dirname=dirname, atom_indices=atom_indices, - solvents=solvents, interactions=interactions, - start=start, stop=stop, step=step) + df = dihedral_groups_ensemble( + dirname=dirname, + atom_indices=atom_indices, + solvents=solvents, + interactions=interactions, + start=start, + stop=stop, + step=step, + ) if df_save_dir is not None: save_df(df=df, df_save_dir=df_save_dir, resname=resname, molname=molname) df_aug = periodic_angle_padding(df, padding=padding) - plot_dihedral_violins(df=df_aug, resname=resname, mol=mol, name_index_pairs=name_index_pairs, figdir=figdir, molname=molname, - width=width, plot_pdf_width=plot_pdf_width, solvents=solvents) + plot_dihedral_violins( + df=df_aug, + resname=resname, + mol=mol, + name_index_pairs=name_index_pairs, + figdir=figdir, + molname=molname, + width=width, + plot_pdf_width=plot_pdf_width, + solvents=solvents, + ) logger.info(f"DihedralAnalysis completed for all projects in {dirname}") diff --git a/mdpow/workflows/registry.py b/mdpow/workflows/registry.py index 33bdd543..5320b6b8 100644 --- a/mdpow/workflows/registry.py +++ b/mdpow/workflows/registry.py @@ -28,9 +28,7 @@ # NOTE: analysis modules should NOT import registry to avoid circular imports from . import dihedrals -registry = { - 'DihedralAnalysis' : dihedrals.automated_dihedral_analysis -} +registry = {"DihedralAnalysis": dihedrals.automated_dihedral_analysis} """ In the `registry`, each entry corresponds to an :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` diff --git a/scripts/mdpow-cfg2yaml.py b/scripts/mdpow-cfg2yaml.py index a46f7c4c..4931750c 100755 --- a/scripts/mdpow-cfg2yaml.py +++ b/scripts/mdpow-cfg2yaml.py @@ -18,7 +18,7 @@ full_file = [] -with open(sys.argv[1],'r') as f: +with open(sys.argv[1], "r") as f: for line in f: newline = line.strip() if not newline.startswith("#") and not newline == "": @@ -32,28 +32,31 @@ sections = [] -for x in range(len(heads)-1): - sections.append(full_file[heads[x]:heads[x+1]]) +for x in range(len(heads) - 1): + sections.append(full_file[heads[x] : heads[x + 1]]) -sections.append(full_file[heads[-1]:]) +sections.append(full_file[heads[-1] :]) yaml_formatting = {} for x in sections: options = x[1:] - split = [[str(y.split("=")[0].strip()),str(y.split("=")[1].strip()).strip()] for y in options] - yaml_formatting[x[0][1:-1]]=split + split = [ + [str(y.split("=")[0].strip()), str(y.split("=")[1].strip()).strip()] + for y in options + ] + yaml_formatting[x[0][1:-1]] = split for x in yaml_formatting: - d = dict((key,value) for (key,value) in yaml_formatting[x]) + d = dict((key, value) for (key, value) in yaml_formatting[x]) yaml_formatting[x] = d -with open("result.yml",'w') as f: - f.write(yaml.dump(yaml_formatting,default_flow_style=False)) +with open("result.yml", "w") as f: + f.write(yaml.dump(yaml_formatting, default_flow_style=False)) -with open("result.yml",'r') as infile, open(sys.argv[-1],'w') as outfile: +with open("result.yml", "r") as infile, open(sys.argv[-1], "w") as outfile: data = infile.read() - data = data.replace("'","") + data = data.replace("'", "") outfile.write(data) os.remove("result.yml") diff --git a/setup.py b/setup.py index 262f113c..d6944529 100644 --- a/setup.py +++ b/setup.py @@ -6,67 +6,77 @@ from setuptools import setup, find_packages import versioneer -setup(name="MDPOW", - version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), - description="A library for computing solvation/water partitioning coefficients using molecular dynamics simulations", - long_description=open("README.rst").read(), - long_description_content_type="text/x-rst", - author="Oliver Beckstein", - author_email="orbeckst@gmail.com", - license="GPLv3", - url="https://github.com/Becksteinlab/MDPOW", - keywords="science Gromacs analysis 'molecular dynamics'", - classifiers=[ - "Development Status :: 4 - Beta", - "Environment :: Console", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", - "Operating System :: POSIX", - 'Operating System :: MacOS :: MacOS X', - 'Programming Language :: Python', - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Topic :: Scientific/Engineering :: Chemistry", - "Topic :: Scientific/Engineering :: Physics", - ], - packages=find_packages(exclude=['examples']), - scripts=['scripts/mdpow-pow', - 'scripts/mdpow-pcw', - 'scripts/mdpow-ptw', - 'scripts/mdpow-check', - 'scripts/mdpow-rebuild-fep', - 'scripts/mdpow-rebuild-simulation', - 'scripts/mdpow-equilibrium', - 'scripts/mdpow-fep', - 'scripts/mdpow-cfg2yaml.py', - 'scripts/mdpow-solvationenergy', - 'scripts/mdpow-get-runinput' - ], - package_data={'mdpow': ['top/*.dat', 'top/*.gro', 'top/*.itp', - 'top/oplsaa.ff/*', - 'top/charmm36-mar2019.ff/*', - 'top/amber99sb.ff/*', - 'templates/*'], }, - install_requires=['numpy>=1.6', 'scipy', - 'pyyaml', - 'GromacsWrapper>=0.5.1', - 'numkit', - 'six', - 'mdanalysis>=2', - 'alchemlyb>=2', - 'pandas', - 'pymbar>=4', - 'matplotlib', - 'seaborn', - 'rdkit', - 'svgutils', - 'cairosvg', - 'pypdf' - ], - #setup_requires=['pytest-runner',], - tests_require=['pytest', 'pybol', 'py'], - zip_safe=True, +setup( + name="MDPOW", + version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), + description="A library for computing solvation/water partitioning coefficients using molecular dynamics simulations", + long_description=open("README.rst").read(), + long_description_content_type="text/x-rst", + author="Oliver Beckstein", + author_email="orbeckst@gmail.com", + license="GPLv3", + url="https://github.com/Becksteinlab/MDPOW", + keywords="science Gromacs analysis 'molecular dynamics'", + classifiers=[ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Operating System :: POSIX", + "Operating System :: MacOS :: MacOS X", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering :: Chemistry", + "Topic :: Scientific/Engineering :: Physics", + ], + packages=find_packages(exclude=["examples"]), + scripts=[ + "scripts/mdpow-pow", + "scripts/mdpow-pcw", + "scripts/mdpow-ptw", + "scripts/mdpow-check", + "scripts/mdpow-rebuild-fep", + "scripts/mdpow-rebuild-simulation", + "scripts/mdpow-equilibrium", + "scripts/mdpow-fep", + "scripts/mdpow-cfg2yaml.py", + "scripts/mdpow-solvationenergy", + "scripts/mdpow-get-runinput", + ], + package_data={ + "mdpow": [ + "top/*.dat", + "top/*.gro", + "top/*.itp", + "top/oplsaa.ff/*", + "top/charmm36-mar2019.ff/*", + "top/amber99sb.ff/*", + "templates/*", + ], + }, + install_requires=[ + "numpy>=1.6", + "scipy", + "pyyaml", + "GromacsWrapper>=0.5.1", + "numkit", + "six", + "mdanalysis>=2", + "alchemlyb>=2", + "pandas", + "pymbar>=4", + "matplotlib", + "seaborn", + "rdkit", + "svgutils", + "cairosvg", + "pypdf", + ], + # setup_requires=['pytest-runner',], + tests_require=["pytest", "pybol", "py"], + zip_safe=True, ) diff --git a/versioneer.py b/versioneer.py index 64fea1c8..2b545405 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,4 +1,3 @@ - # Version: 0.18 """The Versioneer - like a rocketeer, but for versions. @@ -277,6 +276,7 @@ """ from __future__ import print_function + try: import configparser except ImportError: @@ -308,11 +308,13 @@ def get_root(): setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") + err = ( + "Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND')." + ) raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools @@ -325,8 +327,10 @@ def get_root(): me_dir = os.path.normcase(os.path.splitext(me)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir: - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(me), versioneer_py)) + print( + "Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(me), versioneer_py) + ) except NameError: pass return root @@ -348,6 +352,7 @@ def get(parser, name): if parser.has_option("versioneer", name): return parser.get("versioneer", name) return None + cfg = VersioneerConfig() cfg.VCS = VCS cfg.style = get(parser, "style") or "" @@ -372,17 +377,18 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None @@ -390,10 +396,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -418,7 +427,9 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, return stdout, p.returncode -LONG_VERSION_PY['git'] = ''' +LONG_VERSION_PY[ + "git" +] = ''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -993,7 +1004,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1002,7 +1013,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1010,19 +1021,26 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -1037,8 +1055,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -1046,10 +1063,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -1072,17 +1098,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -1091,10 +1116,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -1105,13 +1132,13 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces @@ -1167,16 +1194,22 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -1205,11 +1238,13 @@ def versions_from_file(filename): contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) + mo = re.search( + r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) + mo = re.search( + r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) @@ -1218,8 +1253,7 @@ def versions_from_file(filename): def write_to_version_file(filename, versions): """Write the given version number to the given _version.py file.""" os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) + contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) @@ -1251,8 +1285,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -1366,11 +1399,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -1390,9 +1425,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } class VersioneerBadRootError(Exception): @@ -1415,8 +1454,9 @@ def get_versions(verbose=False): handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" + assert ( + cfg.versionfile_source is not None + ), "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) @@ -1470,9 +1510,13 @@ def get_versions(verbose=False): if verbose: print("unable to compute version") - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } def get_version(): @@ -1521,6 +1565,7 @@ def run(self): print(" date: %s" % vers.get("date")) if vers["error"]: print(" error: %s" % vers["error"]) + cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools @@ -1553,14 +1598,15 @@ def run(self): # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) + target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) + cmds["build_py"] = cmd_build_py if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe + # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ @@ -1581,17 +1627,21 @@ def run(self): os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + cmds["build_exe"] = cmd_build_exe del cmds["build_py"] - if 'py2exe' in sys.modules: # py2exe enabled? + if "py2exe" in sys.modules: # py2exe enabled? try: from py2exe.distutils_buildexe import py2exe as _py2exe # py3 except ImportError: @@ -1610,13 +1660,17 @@ def run(self): os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + cmds["py2exe"] = cmd_py2exe # we override different "sdist" commands for both environments @@ -1643,8 +1697,10 @@ def make_release_tree(self, base_dir, files): # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) + write_to_version_file( + target_versionfile, self._versioneer_generated_versions + ) + cmds["sdist"] = cmd_sdist return cmds @@ -1699,11 +1755,13 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, - configparser.NoOptionError) as e: + except ( + EnvironmentError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) + print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) @@ -1712,15 +1770,18 @@ def do_setup(): print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: @@ -1762,8 +1823,10 @@ def do_setup(): else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) + print( + " appending versionfile_source ('%s') to MANIFEST.in" + % cfg.versionfile_source + ) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: