Skip to content

Commit

Permalink
guess_elements() improvements
Browse files Browse the repository at this point in the history
- removed ALL loops and only use array lookups
- manually set all zero masses to DUMMY (MW is detected as DUMMY, DUMMY as D, so
  this makes all dummies DUMMY for consistency)
- hard code ATOL=1e-6 for detecting dummies with very small discrepancy from 0
- updated docs
- tests for approximate masses
  • Loading branch information
orbeckst committed Aug 12, 2023
1 parent 8d29e1d commit 3f90ad2
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 10 deletions.
11 changes: 8 additions & 3 deletions mdpow/tests/test_workflows_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,14 @@ def universe(request):
np.array(['C', 'N', 'DUMMY', 'CL'])
],
[
(np.array([16, 0, 0, 40.08, 40.08, 40.08, 24.305, 132.9]),
np.array(["OW", "MW", "DUMMY", "C0", "CAL", "CA2+", "MG2+", "CES"])),
np.array(['O', 'DUMMY', 'DUMMY', 'CA', 'CA', 'CA', 'MG', 'CS'])
(np.array([15.999, 0, 40.08, 40.08, 40.08, 24.305, 132.9]),
np.array(["OW", "MW", "C0", "CAL", "CA2+", "MG2+", "CES"])),
np.array(['O', 'DUMMY', 'CA', 'CA', 'CA', 'MG', 'CS'])
],
[
(np.array([16, 1e-6, 40.085, 133]),
np.array(["OW", "MW", "CA2+", "CES"])),
np.array(['O', 'DUMMY', 'CA', 'CS'])
],
],
indirect=["universe"])
Expand Down
31 changes: 24 additions & 7 deletions mdpow/workflows/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,9 @@ def guess_elements(atoms, rtol=1e-3):
"""guess elements for atoms from masses
Given masses, we perform a reverse lookup on
:data:`MDAnalysis.topology.tables.masses` to find the
corresponding element.
:data:`MDAnalysis.topology.tables.masses` to find the corresponding
element. Only atoms where the standard MDAnalysis guesser finds elements
with masses contradicting the topology masses are corrected.
.. Note:: This function *requires* correct masses to be present.
No sanity checks because MDPOW always uses TPR files that
Expand All @@ -196,7 +197,9 @@ def guess_elements(atoms, rtol=1e-3):
:keywords:
*rtol*
relative tolerance for a match (as used in :func:`numpy.isclose`)
relative tolerance for a match (as used in :func:`numpy.isclose`);
atol=1e-6 is at a fixed value, which means that "zero" is only
recognized for values =< 1e-6
.. note:: In order to reliably match GROMACS masses, *rtol* should
be at least 1e-3.
Expand All @@ -214,16 +217,30 @@ def guess_elements(atoms, rtol=1e-3):
atoms.add_TopologyAttr("elements", elements)
"""
ATOL = 1e-6

names = atoms.names
masses = atoms.masses

mda_elements = np.fromiter(tables.masses.keys(), dtype="U5")
mda_masses = np.fromiter(tables.masses.values(), dtype=np.float64)

# match all masses against the MDA reference masses
elix = np.isclose(masses[:, np.newaxis], mda_masses)
assert elix.any(axis=1).all(), "no match for a mass/element"
guessed_elements = guessers.guess_types(names)
guessed_masses = np.array([guessers.get_atom_mass(n) for n in guessed_elements])
problems = np.logical_not(np.isclose(masses, guessed_masses, atol=ATOL, rtol=rtol))

# match only problematic masses against the MDA reference masses
iproblem, ielem = np.nonzero(np.isclose(masses[problems, np.newaxis], mda_masses,
atol=ATOL, rtol=rtol))
# We should normally find a match for each problem but just in case, assert and
# give some useful information for debugging.
assert len(ielem) == sum(problems),\
("Not all masses could be assigned an element, "
f"missing names {set(names[problems]) - set(names[problems][iproblem])}")

guessed_elements[problems] = mda_elements[ielem]

guessed_elements = [mda_elements[ix][0] for ix in elix]
# manually fix some dummies that are labelled "D": set ALL zero masses to DUMMY
guessed_elements[np.isclose(masses, 0, atol=ATOL)] = "DUMMY"

return np.array(guessed_elements)

0 comments on commit 3f90ad2

Please sign in to comment.