Skip to content

Commit

Permalink
Merge pull request #307 from OpenBioSim/sync_exscientia
Browse files Browse the repository at this point in the history
Synchronise with Exscientia remote
  • Loading branch information
lohedges authored Jul 4, 2024
2 parents 81b48f0 + 44e83d1 commit bb5321a
Show file tree
Hide file tree
Showing 13 changed files with 379 additions and 135 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/Sandpit_exs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macOS-latest",]
os: ["ubuntu-latest", ]
python-version: ["3.10",]

steps:
Expand All @@ -35,7 +35,7 @@ jobs:

- name: Install dependency
run: |
mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2023.5" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm
mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2024.1.0" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm
python -m pip install git+https://github.com/Exscientia/MDRestraintsGenerator.git
# For the testing of BSS.FreeEnergy.AlchemicalFreeEnergy.analysis
python -m pip install https://github.com/alchemistry/alchemtest/archive/master.zip
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@
install_command="pip install MDRestraintsGenerator",
)
if _have_imported(_MDRestraintsGenerator):
from Bio import BiopythonDeprecationWarning

_warnings.simplefilter("ignore", BiopythonDeprecationWarning)
from MDRestraintsGenerator import search as _search
from MDRestraintsGenerator.restraints import (
FindBoreschRestraint as _FindBoreschRestraint,
Expand Down
99 changes: 60 additions & 39 deletions python/BioSimSpace/Sandpit/Exscientia/Process/_amber.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ def __init__(
property_map,
)

if (
not isinstance(protocol, _Protocol._FreeEnergyMixin)
and system.nPerturbableMolecules()
):
raise _IncompatibleError(
"Perturbable system is not compatible with none free energy protocol."
)

# Set the package name.
self._package_name = "AMBER"

Expand Down Expand Up @@ -2936,7 +2944,7 @@ def _init_stdout_dict(self):
for file in _Path(self.workDir()).glob("*.out.offset"):
os.remove(file)

def saveMetric(
def _saveMetric(
self, filename="metric.parquet", u_nk="u_nk.parquet", dHdl="dHdl.parquet"
):
"""
Expand All @@ -2945,41 +2953,54 @@ def saveMetric(
is Free Energy protocol, the dHdl and the u_nk data will be saved in the
same parquet format as well.
"""

_assert_imported(_alchemlyb)

self._init_stdout_dict()
datadict = dict()
if isinstance(self._protocol, _Protocol.Minimisation):
datadict_keys = [
("Time (ps)", None, "getStep"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getTotalEnergy",
),
]
else:
datadict_keys = [
("Time (ps)", _Units.Time.picosecond, "getTime"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getPotentialEnergy",
),
("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"),
("Pressure (bar)", _Units.Pressure.bar, "getPressure"),
("Temperature (kelvin)", _Units.Temperature.kelvin, "getTemperature"),
]
# # Disable this now
df = self._convert_datadict_keys(datadict_keys)
df.to_parquet(path=f"{self.workDir()}/{filename}", index=True)
if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract(
f"{self.workDir()}/{self._name}.out",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
if "u_nk" in energy and energy["u_nk"] is not None:
energy["u_nk"].to_parquet(path=f"{self.workDir()}/{u_nk}", index=True)
if "dHdl" in energy and energy["dHdl"] is not None:
energy["dHdl"].to_parquet(path=f"{self.workDir()}/{dHdl}", index=True)
if filename is not None:
self._init_stdout_dict()
if isinstance(self._protocol, _Protocol.Minimisation):
datadict_keys = [
("Time (ps)", None, "getStep"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getTotalEnergy",
),
]
else:
datadict_keys = [
("Time (ps)", _Units.Time.picosecond, "getTime"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getPotentialEnergy",
),
("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"),
("Pressure (bar)", _Units.Pressure.bar, "getPressure"),
(
"Temperature (kelvin)",
_Units.Temperature.kelvin,
"getTemperature",
),
]
# # Disable this now
df = self._convert_datadict_keys(datadict_keys)
df.to_parquet(path=f"{self.workDir()}/{filename}", index=True)
if u_nk is not None or dHdl is not None:
_assert_imported(_alchemlyb)

if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract(
f"{self.workDir()}/{self._name}.out",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
with _warnings.catch_warnings():
_warnings.filterwarnings(
"ignore",
message="The DataFrame has column names of mixed type.",
)
if "u_nk" in energy and energy["u_nk"] is not None:
energy["u_nk"].to_parquet(
path=f"{self.workDir()}/{u_nk}", index=True
)
if "dHdl" in energy and energy["dHdl"] is not None:
energy["dHdl"].to_parquet(
path=f"{self.workDir()}/{dHdl}", index=True
)
134 changes: 80 additions & 54 deletions python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@
_alchemlyb = _try_import("alchemlyb")

if _have_imported(_alchemlyb):
from alchemlyb.parsing.gmx import extract as _extract
from alchemlyb.parsing.gmx import extract_u_nk as _extract_u_nk
from alchemlyb.parsing.gmx import extract_dHdl as _extract_dHdl

from .. import _gmx_exe
from .. import _isVerbose
Expand Down Expand Up @@ -400,18 +401,19 @@ def _write_system(self, system, coord_file=None, topol_file=None, ref_file=None)
property_map=self._property_map,
)

# Write the restraint to the topology file
if self._restraint:
with open(topol_file, "a") as f:
f.write("\n")
f.write(
self._restraint.toString(
engine="GROMACS",
perturbation_type=self._protocol.getPerturbationType(),
restraint_lambda="restraint"
in self._protocol.getLambda(type="series"),
)
def _apply_ABFE_restraint(self):
# Write the restraint to the topology file
if self._restraint:
with open(self._top_file, "a") as f:
f.write("\n")
f.write(
self._restraint.toString(
engine="GROMACS",
perturbation_type=self._protocol.getPerturbationType(),
restraint_lambda="restraint"
in self._protocol.getLambda(type="series"),
)
)

def _generate_config(self):
"""Generate GROMACS configuration file strings."""
Expand Down Expand Up @@ -488,6 +490,7 @@ def _generate_config(self):
if isinstance(self._protocol, _Protocol._FreeEnergyMixin)
else None
)
self._apply_ABFE_restraint()
self.addToConfig(
config.generateGromacsConfig(
extra_options={**config_options, **self._extra_options},
Expand Down Expand Up @@ -841,7 +844,6 @@ def getSystem(self, block="AUTO"):
system : :class:`System <BioSimSpace._SireWrappers.System>`
The latest molecular system.
"""

# Wait for the process to finish.
if block is True:
self.wait()
Expand All @@ -856,6 +858,10 @@ def getSystem(self, block="AUTO"):
if block is True and not self.isError():
return self._getFinalFrame()
else:
raise ValueError(
"getSystem should not use `trjconv` to get the frame in production settings."
"Trigger an exception here to show the traceback when that happens."
)
# Minimisation trajectories have a single frame, i.e. the final state.
if isinstance(self._protocol, _Protocol.Minimisation):
time = 0 * _Units.Time.nanosecond
Expand Down Expand Up @@ -2396,22 +2402,27 @@ def _parse_energy_terms(text):
U-B, Proper-Dih.. Note that this order is absolute and will not be
changed by the input to `gmx energy`.
"""
sections = text.split("---")
# Get rid of all the nasty message from argo
content = text.split("End your selection with an empty line or a zero.")[1]
sections = content.split("---")
# Remove the empty sections
sections = [section for section in sections if section]
sections = [section for section in sections if section.strip()]
# Concatenate the lines
section = sections[1].replace("\n", "")
section = sections[0].replace("\n", "")
terms = section.split()
# Remove the possible '-' from the separation line
terms = [term for term in terms if term != "-"]
# Check if the index order is correct
indexes = [int(term) for term in terms[::2]]
try:
indexes = [int(term) for term in terms[::2]]
except ValueError:
raise ValueError("Cannot parse the terms: {}".format("\n".join(terms)))
energy_names = terms[1::2]
length_nomatch = len(indexes) != len(energy_names)
# -1 as the index is 1-based.
index_nomatch = (_np.arange(len(indexes)) != _np.array(indexes) - 1).any()
if length_nomatch or index_nomatch:
raise ValueError(f"Cannot parse the energy terms in the {edr_file} file.")
raise ValueError(f"Cannot parse the energy terms in the {text} file.")
else:
return energy_names

Expand Down Expand Up @@ -2858,7 +2869,7 @@ def _find_trajectory_file(self):
else:
return self._traj_file

def saveMetric(
def _saveMetric(
self, filename="metric.parquet", u_nk="u_nk.parquet", dHdl="dHdl.parquet"
):
"""
Expand All @@ -2867,41 +2878,56 @@ def saveMetric(
is Free Energy protocol, the dHdl and the u_nk data will be saved in the
same parquet format as well.
"""

_assert_imported(_alchemlyb)

self._update_energy_dict(initialise=True)
datadict_keys = [
("Time (ps)", _Units.Time.picosecond, "getTime"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getPotentialEnergy",
),
]
if not isinstance(self._protocol, _Protocol.Minimisation):
datadict_keys.extend(
[
("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"),
("Pressure (bar)", _Units.Pressure.bar, "getPressure"),
(
"Temperature (kelvin)",
_Units.Temperature.kelvin,
"getTemperature",
),
]
)
df = self._convert_datadict_keys(datadict_keys)
df.to_parquet(path=f"{self.workDir()}/{filename}", index=True)
if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract(
f"{self.workDir()}/{self._name}.xvg",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
if "u_nk" in energy:
energy["u_nk"].to_parquet(path=f"{self.workDir()}/{u_nk}", index=True)
if "dHdl" in energy:
energy["dHdl"].to_parquet(path=f"{self.workDir()}/{dHdl}", index=True)
if filename is not None:
self._update_energy_dict(initialise=True)
datadict_keys = [
("Time (ps)", _Units.Time.picosecond, "getTime"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getPotentialEnergy",
),
]
if not isinstance(self._protocol, _Protocol.Minimisation):
datadict_keys.extend(
[
("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"),
("Pressure (bar)", _Units.Pressure.bar, "getPressure"),
(
"Temperature (kelvin)",
_Units.Temperature.kelvin,
"getTemperature",
),
]
)
df = self._convert_datadict_keys(datadict_keys)
df.to_parquet(path=f"{self.workDir()}/{filename}", index=True)
if u_nk is not None:
_assert_imported(_alchemlyb)
if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract_u_nk(
f"{self.workDir()}/{self._name}.xvg",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
with _warnings.catch_warnings():
_warnings.filterwarnings(
"ignore",
message="The DataFrame has column names of mixed type.",
)
energy.to_parquet(path=f"{self.workDir()}/{u_nk}", index=True)
if dHdl is not None:
_assert_imported(_alchemlyb)
if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract_dHdl(
f"{self.workDir()}/{self._name}.xvg",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
with _warnings.catch_warnings():
_warnings.filterwarnings(
"ignore",
message="The DataFrame has column names of mixed type.",
)
energy.to_parquet(path=f"{self.workDir()}/{dHdl}", index=True)


def _is_minimisation(config):
Expand Down
25 changes: 14 additions & 11 deletions python/BioSimSpace/Sandpit/Exscientia/Process/_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,16 +911,6 @@ def wait(self, max_time=None):

# The process isn't running.
if not self.isRunning():
try:
self.saveMetric()
except Exception:
exception_info = traceback.format_exc()
with open(f"{self.workDir()}/{self._name}.err", "a+") as f:
f.write("Exception Information during saveMetric():\n")
f.write("======================\n")
f.write(exception_info)
f.write("\n\n")

return

if max_time is not None:
Expand Down Expand Up @@ -1650,12 +1640,25 @@ def _generate_args(self):
"""Generate the dictionary of command-line arguments."""
self.clearArgs()

def _saveMetric(self, filename, u_nk, dHdl):
"""The abstract function to save the metric and free energy data. Need to be
defined for each MD engine."""
pass

def saveMetric(
self, filename="metric.parquet", u_nk="u_nk.parquet", dHdl="dHdl.parquet"
):
"""The abstract function to save the metric and free energy data. Need to be
defined for each MD engine."""
pass
try:
self._saveMetric(filename, u_nk, dHdl)
except Exception:
exception_info = traceback.format_exc()
with open(f"{self.workDir()}/{self._name}.err", "a+") as f:
f.write("Exception Information during saveMetric():\n")
f.write("======================\n")
f.write(exception_info)
f.write("\n\n")

def _convert_datadict_keys(self, datadict_keys):
"""This function is a helper function for saveMetric that converts a
Expand Down
Loading

0 comments on commit bb5321a

Please sign in to comment.