diff --git a/.github/workflows/Sandpit_exs.yml b/.github/workflows/Sandpit_exs.yml index f5b0fc0d3..50160fe36 100644 --- a/.github/workflows/Sandpit_exs.yml +++ b/.github/workflows/Sandpit_exs.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-latest", "macOS-latest",] + os: ["ubuntu-latest", ] python-version: ["3.10",] steps: @@ -30,12 +30,10 @@ jobs: python-version: ${{ matrix.python-version }} activate-environment: bss_build miniforge-version: latest - miniforge-variant: Mambaforge - use-mamba: true - name: Install dependency run: | - mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2023.5" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm + conda install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2024.1.0" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm python -m pip install git+https://github.com/Exscientia/MDRestraintsGenerator.git # For the testing of BSS.FreeEnergy.AlchemicalFreeEnergy.analysis python -m pip install https://github.com/alchemistry/alchemtest/archive/master.zip diff --git a/.github/workflows/devel.yaml b/.github/workflows/devel.yaml index 216bbdc29..827c5dbcf 100644 --- a/.github/workflows/devel.yaml +++ b/.github/workflows/devel.yaml @@ -43,14 +43,12 @@ jobs: python-version: ${{ matrix.python-version }} activate-environment: bss_build miniforge-version: latest - miniforge-variant: Mambaforge - use-mamba: true # - name: Clone the devel branch run: git clone -b devel https://github.com/openbiosim/biosimspace # - name: Setup Conda - run: mamba install -y -c conda-forge boa anaconda-client packaging pip-requirements-parser + run: conda install -y -c conda-forge boa anaconda-client packaging pip-requirements-parser # - name: Update Conda recipe run: python ${{ github.workspace }}/biosimspace/actions/update_recipe.py @@ -58,8 +56,8 @@ jobs: - name: Prepare build location run: mkdir ${{ github.workspace }}/build # - - name: Build Conda package using mamba build - run: conda mambabuild -c conda-forge -c openbiosim/label/dev ${{ github.workspace }}/biosimspace/recipes/biosimspace + - name: Build Conda package using conda build + run: conda build -c conda-forge -c openbiosim/label/dev ${{ github.workspace }}/biosimspace/recipes/biosimspace # - name: Upload Conda package run: python ${{ github.workspace }}/biosimspace/actions/upload_package.py diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index d453c2dd8..a9d31edf0 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -40,14 +40,12 @@ jobs: python-version: ${{ matrix.python-version }} activate-environment: bss_build miniforge-version: latest - miniforge-variant: Mambaforge - use-mamba: true # - name: Clone the main branch run: git clone -b main https://github.com/openbiosim/biosimspace # - name: Setup Conda - run: mamba install -y -c conda-forge boa anaconda-client packaging pip-requirements-parser + run: conda install -y -c conda-forge boa anaconda-client packaging pip-requirements-parser # - name: Update Conda recipe run: python ${{ github.workspace }}/biosimspace/actions/update_recipe.py @@ -55,8 +53,8 @@ jobs: - name: Prepare build location run: mkdir ${{ github.workspace }}/build # - - name: Build Conda package using mamba build - run: conda mambabuild -c conda-forge -c openbiosim/label/main ${{ github.workspace }}/biosimspace/recipes/biosimspace + - name: Build Conda package using conda build + run: conda build -c conda-forge -c openbiosim/label/main ${{ github.workspace }}/biosimspace/recipes/biosimspace # - name: Upload Conda package run: python ${{ github.workspace }}/biosimspace/actions/upload_package.py diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 476d7a0fa..adf13e94c 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -46,14 +46,12 @@ jobs: python-version: ${{ matrix.python-version }} activate-environment: bss_build miniforge-version: latest - miniforge-variant: Mambaforge - use-mamba: true # - name: Clone the feature branch run: git clone -b ${{ github.head_ref }} --single-branch https://github.com/${{ env.REPO }} biosimspace # - name: Setup Conda - run: mamba install -y -c conda-forge boa anaconda-client packaging pip-requirements-parser + run: conda install -y -c conda-forge boa anaconda-client packaging pip-requirements-parser # - name: Update Conda recipe run: python ${{ github.workspace }}/biosimspace/actions/update_recipe.py @@ -61,10 +59,10 @@ jobs: - name: Prepare build location run: mkdir ${{ github.workspace }}/build # - - name: Build Conda package using mamba build using main channel + - name: Build Conda package using conda build using main channel if: ${{ github.base_ref == 'main' }} - run: conda mambabuild -c conda-forge -c openbiosim/label/main ${{ github.workspace }}/biosimspace/recipes/biosimspace + run: conda build -c conda-forge -c openbiosim/label/main ${{ github.workspace }}/biosimspace/recipes/biosimspace # - - name: Build Conda package using mamba build using dev channel + - name: Build Conda package using conda build using dev channel if: ${{ github.base_ref != 'main' }} - run: conda mambabuild -c conda-forge -c openbiosim/label/dev ${{ github.workspace }}/biosimspace/recipes/biosimspace + run: conda build -c conda-forge -c openbiosim/label/dev ${{ github.workspace }}/biosimspace/recipes/biosimspace diff --git a/SECURITY.md b/SECURITY.md index 96ba0bfe1..2b2d8dcf2 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -4,13 +4,13 @@ As we have limited resource, we only support the latest major release of BioSimSpace with security updates. For example, if the current version -is 2023.1.0, then only versions 2023.1.0 to 2023.1.X wil have updates, -which will be released as 2023.1.X+1. +is 2024.1.0, then only versions 2024.1.0 to 2024.1.X wil have updates, +which will be released as 2024.1.X+1. | Version | Supported | | ------- | ------------------ | -| 2023.5.x | :white_check_mark: | -| < 2023.5.x| :x: | +| 2024.1.x | :white_check_mark: | +| < 2024.1.x| :x: | ## Reporting a Vulnerability diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 44d79ec3b..70f896098 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -9,6 +9,19 @@ company supporting open-source development of fostering academic/industrial coll within the biomolecular simulation community. Our software is hosted via the `OpenBioSim` `GitHub `__ organisation. +`2024.2.0 `_ - Jul 09 2024 +------------------------------------------------------------------------------------------------- + +* Fixed incorect use of ``self`` in :func:`Trajectory.getFrame ` function (`#281 `__). +* Use SDF as an intermediate for ``antechamber`` if the original molecule was loaded from that format (`#287 `__). +* Detect dummy atoms by checking ``element`` *and* ``ambertype`` properties when creating ``SOMD`` pert files (`#289 `__). +* Add missing ``match_water`` kwarg to ``prepareFEP`` node (`#292 `__). +* Add protein free-energy perturbation functionality (`@akalpokas `__). +* Ensure that the LJ sigma parameter for perturbed atoms is non-zero (`#295 `__). +* Fixed return type docstrings for functions in the :mod:`BioSimSpace.Parameters` module (`#298 `__). +* Don't use ``sire.legacy.Base.wrap`` with the ``file_format`` property to avoid (incorrect) auto string to unit conversion of ``mol2`` to moles squared (`#300 `__). +* Expose ``SOMD`` torsion modification kwargs (`#302 `__). + `2024.1.0 `_ - Apr 15 2024 ------------------------------------------------------------------------------------------------- diff --git a/doc/source/tutorials/images/aldose_reductase_mutation_mapping.png b/doc/source/tutorials/images/aldose_reductase_mutation_mapping.png new file mode 100644 index 000000000..21e12ac23 Binary files /dev/null and b/doc/source/tutorials/images/aldose_reductase_mutation_mapping.png differ diff --git a/doc/source/tutorials/images/pfep_thermodynamic_cycle.png b/doc/source/tutorials/images/pfep_thermodynamic_cycle.png new file mode 100644 index 000000000..7a5191da5 Binary files /dev/null and b/doc/source/tutorials/images/pfep_thermodynamic_cycle.png differ diff --git a/doc/source/tutorials/images/ubiquitin_mutation_mapping.png b/doc/source/tutorials/images/ubiquitin_mutation_mapping.png new file mode 100644 index 000000000..ae3ef4b3b Binary files /dev/null and b/doc/source/tutorials/images/ubiquitin_mutation_mapping.png differ diff --git a/doc/source/tutorials/index.rst b/doc/source/tutorials/index.rst index 7d911f076..612ad44f6 100644 --- a/doc/source/tutorials/index.rst +++ b/doc/source/tutorials/index.rst @@ -23,3 +23,4 @@ please :doc:`ask for support. <../support>` crystal_water hydration_freenrg metadynamics + protein_mutations diff --git a/doc/source/tutorials/protein_mutations.rst b/doc/source/tutorials/protein_mutations.rst new file mode 100644 index 000000000..50ed78ce2 --- /dev/null +++ b/doc/source/tutorials/protein_mutations.rst @@ -0,0 +1,396 @@ +Alchemical Protein Mutations +============================ + +In this tutorial you will learn how to use BioSimSpace’s mapping +functionality to set up alchemical calculations in order to compute the +change in the binding affinity of a ligand as a result of a protein +mutation. Specically, we are going to focus on two proteins, first a set +up of a single alchemical point mutation on ubiquitin, and second a set +up on `aldose +reductase `__ (AR), +which is a drug target for the treatment of diabetic nephropathy. It is +recommended to complete `previous BioSimSpace +tutorials `__ +before attempting this one. + +The relative change in the binding affinity as a result of a mutation, +:math:`\Delta \Delta G_{mut}` can be calculated from the difference +between free energy of mutation in the holo (bound) and apo (unbound) +simulation legs, i.e.: + +.. image:: images/pfep_thermodynamic_cycle.png + :width: 800 + +.. math:: + + + \Delta \Delta G_{mut} = \Delta G_{holo} - \Delta G_{apo} + +To get started, let’s go through a simple example of generating the +required input files in order to set up an alchemical mutation. + +Simple Case - Input File Generation +----------------------------------- + +In order to create an alchemical protein system in BioSimSpace, we need +two input protein structures, a wild-type and a mutant. We also need to +make sure that the atom ordering between the two proteins is identical. +Don’t worry, this is an easy assumption to satisfy. We will load a +structure ``1UBQ`` via `sire `__, which +comes with bundled with BioSimSpace: + +.. code:: ipython3 + + import BioSimSpace as BSS + import sire as sr + mols = sr.load("1UBQ") + + +.. parsed-literal:: + + INFO:rdkit:Enabling RDKit 2024.03.3 jupyter extensions + INFO:numexpr.utils:Note: NumExpr detected 20 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. + INFO:numexpr.utils:NumExpr defaulting to 8 threads. + + +.. parsed-literal:: + + Using cached download of 'https://files.rcsb.org/download/1UBQ.pdb.gz'... + Using cached unzipped file './1UBQ.pdb'... + + +There are multiple of ways of generating a mutant structure from a +wild-type protein, some examples are: - `Pymol Mutagenesis +Plugin `__ (when exporting +the mutant structure, you want to make you select ‘retain atom ids’ +under ‘PDB Options’, or pass both input structures through *pdb4amber*) +- +`HTMD `__ +- `FoldX `__ - +`pdb4amber `__ + +For this simple case we are going to use *pdb4amber* to mutate a +threonine at position 9 to an alanine residue. First we are going to +pass the wild-type protein from the crystal structure through +*pdb4amber* in order create a consistent atom ordering between wild-type +and mutant structures: + +.. code:: ipython3 + + !pdb4amber --reduce --dry --add-missing-atoms -o 1UBQ_dry_wt.pdb 1UBQ.pdb + + +.. parsed-literal:: + + + ================================================== + Summary of pdb4amber for: 1UBQ.pdb + =================================================== + + ----------Chains + The following (original) chains have been found: + A + + ---------- Alternate Locations (Original Residues!)) + + The following residues had alternate locations: + None + -----------Non-standard-resnames + + + ---------- Missing heavy atom(s) + + None + + +Next, we are going to create a mutant structure: + +.. code:: ipython3 + + !pdb4amber --reduce --dry -o 1UBQ_dry_t9a.pdb -m "9-ALA" --add-missing-atoms 1UBQ_dry_wt.pdb + + +.. parsed-literal:: + + + ================================================== + Summary of pdb4amber for: 1UBQ_dry_wt.pdb + =================================================== + + ----------Chains + The following (original) chains have been found: + + + ---------- Alternate Locations (Original Residues!)) + + The following residues had alternate locations: + None + -----------Non-standard-resnames + + + ---------- Missing heavy atom(s) + + ALA_9 misses 1 heavy atom(s) + + +.. container:: alert alert-block alert-warning + + Warning: This is a simple, but ultimately a crude way of generating a + mutant structure. Different factors such as sidechain rotomers, + packing and protonation states need to be taken into the account in + order to accurately and robustly describe the mutant end-state. + +Simple Case - Alchemical System Generation +------------------------------------------ + +Now that correct input files have been created, we can now proceed to +create an alchemical protein in BioSimSpace. Let’s load our two +proteins: + +.. code:: ipython3 + + protein_wt = BSS.IO.readMolecules("1UBQ_dry_wt.pdb")[0] + protein_mut = BSS.IO.readMolecules("1UBQ_dry_t9a.pdb")[0] + +Next, we want to parametrise them with our forcefield of choice: + +.. code:: ipython3 + + protein_wt = BSS.Parameters.ff14SB(protein_wt).getMolecule() + protein_mut = BSS.Parameters.ff14SB(protein_mut).getMolecule() + +Now we want to compute the mapping between the two proteins, first let’s +figure out the residue index of our residue of interest (ROI): + +.. code:: ipython3 + + protein_wt.getResidues()[7:10] + + + + +.. parsed-literal:: + + [, + , + ] + + + +.. code:: ipython3 + + protein_mut.getResidues()[7:10] + + + + +.. parsed-literal:: + + [, + , + ] + + + +We can see that the residue with the index value of 8 are different +between the two proteins. Let’s pass this value to the +```BioSimSpace.Align.matchAtoms`` `__ +function: + +.. code:: ipython3 + + mapping = BSS.Align.matchAtoms(molecule0=protein_wt, molecule1=protein_mut, roi=[8]) + +.. container:: alert alert-block alert-info + + Note: You can also pass multiple residues of interest indices to the + mapping if you wish to mutate several residues simultaneously. + +Now that the mapping has been computed, we can visualise it: + +.. code:: ipython3 + + BSS.Align.viewMapping(protein_wt, protein_mut, mapping, roi=8, pixels=500) + + + +.. image:: images/ubiquitin_mutation_mapping.png + :width: 800 + + +The computed atom mapping shows that both hydroxyl and methyl groups in +the threonine side chain will be transformed into hydrogen atoms +respectively. We can now proceed to align the two residues of interest: + +.. code:: ipython3 + + aligned_wt = BSS.Align.rmsdAlign(molecule0=protein_wt, molecule1=protein_mut, roi=[8]) + +Finally, we can create a merged alchemical protein system: + +.. code:: ipython3 + + merged_protein = BSS.Align.merge(aligned_wt, protein_mut, mapping, roi=[8]) + +The alchemical protein can now be solvated, ionised and exported to +different file formats, for example GROMACS or `SOMD2, our OpenMM-based +FEP engine `__: + +.. code:: ipython3 + + merged_system = merged_protein.toSystem() + + # solvate the system with the padding of 15 angstroms + padding = 15 * BSS.Units.Length.angstrom + box_min, box_max = merged_system.getAxisAlignedBoundingBox() + box_size = [y - x for x, y in zip(box_min, box_max)] + box_sizes = [x + padding for x in box_size] + + box, angles = BSS.Box.rhombicDodecahedronHexagon(max(box_sizes)) + solvated_system = BSS.Solvent.tip3p(molecule=merged_system, box=box, angles=angles, ion_conc=0.15) + +.. code:: ipython3 + + # export the solvated system to GROMACS input files + BSS.IO.saveMolecules("apo_ubiquitin_t9a", solvated_system, ["gro87", "grotop"]) + +.. code:: ipython3 + + # export the solvated system to SOMD2 input file + BSS.Stream.save(solvated_system, "apo_ubiquitin_t9a") + +Aldose Reductase - Alchemical System Generation +=============================================== + +Apo System +---------- + +Now we are going to focus on the aldose reductase system and set up an +alchemical transformation in both apo and holo forms of the protein. The +input files (2PDG_8.0) were taken from the SI of a `paper by Aldeghi et. +al `__, +residue 47 mutated via PyMol (V47I), and standardised via *pdb4amber*. + +.. code:: ipython3 + + protein_wt = BSS.IO.readMolecules(BSS.IO.expand(BSS.tutorialUrl(), "aldose_reductase_dry.pdb"))[0] + protein_mut = BSS.IO.readMolecules(BSS.IO.expand(BSS.tutorialUrl(), "aldose_reductase_v47i_dry.pdb"))[0] + +We can use ``ensure_compatible=False`` in order to get tLEaP to re-add +the hydrogens for us: + +.. code:: ipython3 + + protein_wt = BSS.Parameters.ff14SB(protein_wt, ensure_compatible=False).getMolecule() + protein_mut = BSS.Parameters.ff14SB(protein_mut, ensure_compatible=False).getMolecule() + +This time we are going to automatically detect the different residues +between the two proteins: + +.. code:: ipython3 + + roi = [] + for i, res in enumerate(protein_wt.getResidues()): + if res.name() != protein_mut.getResidues()[i].name(): + print(res, protein_mut.getResidues()[i]) + roi.append(res.index()) + + +.. parsed-literal:: + + + + +We can then pass these residue indices to the mapping function as +before: + +.. code:: ipython3 + + mapping = BSS.Align.matchAtoms(molecule0=protein_wt, molecule1=protein_mut, roi=roi) + +.. code:: ipython3 + + BSS.Align.viewMapping(protein_wt, protein_mut, mapping, roi=roi[0], pixels=500) + + + +.. image:: images/aldose_reductase_mutation_mapping.png + :width: 800 + + +The mapping shows that the perturbation will transform a hydrogen to a +methyl group. Is this what we would expect for a valine to isoleucine +transformation? If we are happy, we can proceed with the rest of the set +up as before: + +.. code:: ipython3 + + aligned_wt = BSS.Align.rmsdAlign(molecule0=protein_wt, molecule1=protein_mut, roi=roi) + merged_protein = BSS.Align.merge(aligned_wt, protein_mut, mapping, roi=roi) + +.. code:: ipython3 + + merged_system = merged_protein.toSystem() + +.. code:: ipython3 + + padding = 15 * BSS.Units.Length.angstrom + + box_min, box_max = merged_system.getAxisAlignedBoundingBox() + box_size = [y - x for x, y in zip(box_min, box_max)] + box_sizes = [x + padding for x in box_size] + +.. code:: ipython3 + + box, angles = BSS.Box.rhombicDodecahedronHexagon(max(box_sizes)) + solvated_system = BSS.Solvent.tip3p(molecule=merged_system, box=box, angles=angles, ion_conc=0.15) + +.. code:: ipython3 + + BSS.IO.saveMolecules("apo_aldose_reductase_v47i", solvated_system, ["gro87", "grotop"]) + +Holo System +----------- + +To set up a holo (bound) system, we are going to load in the associated +ligand and the cofactor of aldose reductase: + +.. code:: ipython3 + + ligand_47d = BSS.IO.readMolecules(BSS.IO.expand(BSS.tutorialUrl(), ["ligand_47_gaff2.gro", "ligand_47_gaff2.top"]))[0] + cofactor_nap = BSS.IO.readMolecules(BSS.IO.expand(BSS.tutorialUrl(), ["cofactor_nap_gaff2.gro", "cofactor_nap_gaff2.top"]))[0] + +We can use BioSimSpace’s AMBER parametrisation pipeline if we wish to, +but in this case the ligands have been parametrised for us so we can +skip the following cell: + +.. code:: ipython3 + + ligand_47d = BSS.Parameters.gaff2(ligand_47d, charge_method="BCC", net_charge=-1).getMolecule() + cofactor_nap = BSS.Parameters.gaff2(cofactor_nap, charge_method="BCC", net_charge=-4).getMolecule() + +We can simply add the ligands to our alchemical protein in order to +create an alchemical holo system. This way we are assuming that the +ligands are already placed correctly with respect to the protein: + +.. code:: ipython3 + + merged_system = merged_protein + ligand_47d + cofactor_nap + +As before we can now proceed to solvate, ionise and export our prepared +system or use BioSimSpace’s functionallity to `further set up and +execute the alchemical +simulations `__. + +.. code:: ipython3 + + padding = 15 * BSS.Units.Length.angstrom + + box_min, box_max = merged_system.getAxisAlignedBoundingBox() + box_size = [y - x for x, y in zip(box_min, box_max)] + box_sizes = [x + padding for x in box_size] + + box, angles = BSS.Box.rhombicDodecahedronHexagon(max(box_sizes)) + solvated_system = BSS.Solvent.tip3p(molecule=merged_system, box=box, angles=angles, ion_conc=0.15) + + BSS.IO.saveMolecules("holo_aldose_reductase_v47i", solvated_system, ["gro87", "grotop"]) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index bf4551787..1db09c51a 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -37,6 +37,8 @@ import os as _os import subprocess as _subprocess import sys as _sys +from typing import Any, Collection, Optional +from itertools import chain from .._Utils import _try_import, _have_imported, _assert_imported @@ -52,6 +54,8 @@ from rdkit import Chem as _Chem from rdkit.Chem import rdFMCS as _rdFMCS from rdkit import RDLogger as _RDLogger + from rdkit.Chem import Draw + from rdkit.Chem import AllChem # Disable RDKit warnings. _RDLogger.DisableLog("rdApp.*") @@ -720,6 +724,7 @@ def matchAtoms( timeout=5 * _Units.Time.second, complete_rings_only=True, max_scoring_matches=1000, + roi=None, property_map0={}, property_map1={}, ): @@ -729,7 +734,7 @@ def matchAtoms( When requesting more than one match, the mappings will be sorted using a scoring function and returned in order of best to worst score. (Note that, depending on the scoring function the "best" score may have the - lowest value.). + lowest value). Parameters ---------- @@ -751,7 +756,7 @@ def matchAtoms( computing the above RMSD score. - "rmsd_flex_align" Flexibly align molecule0 to molecule1 based on the mapping - before computing the above RMSD score. (based on the + before computing the above RMSD score. (Requires the 'fkcombu'. package: https://pdbj.org/kcombu) matches : int @@ -778,6 +783,10 @@ def matchAtoms( option is only relevant to MCS performed using RDKit and will be ignored when falling back on Sire. + roi : list + The region of interest to match. + Consists of a list of ROI residue indices. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -826,8 +835,56 @@ def matchAtoms( >>> import BioSimSpace as BSS >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, prematch={0 : 10, 3 : 7}) + + Find the best maximum common substructure mapping between two molecules + with a region of interest defined as a list of residues. + + >>> import BioSimSpace as BSS + >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, roi=[12]) + + Find the mapping between two molecules with multiple regions of interest. + + >>> import BioSimSpace as BSS + >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, roi=[12, 13, 14]) """ + if roi is None: + return _matchAtoms( + molecule0=molecule0, + molecule1=molecule1, + scoring_function=scoring_function, + matches=matches, + return_scores=return_scores, + prematch=prematch, + timeout=timeout, + complete_rings_only=complete_rings_only, + max_scoring_matches=max_scoring_matches, + property_map0=property_map0, + property_map1=property_map1, + ) + else: + return _roiMatch( + molecule0=molecule0, + molecule1=molecule1, + roi=roi, + use_kartograf=False, + kartograf_kwargs={}, + ) + + +def _matchAtoms( + molecule0, + molecule1, + scoring_function, + matches, + return_scores, + prematch, + timeout, + complete_rings_only, + max_scoring_matches, + property_map0, + property_map1, +): # A list of supported scoring functions. scoring_functions = ["RMSD", "RMSDALIGN", "RMSDFLEXALIGN"] @@ -897,9 +954,6 @@ def matchAtoms( # Convert the timeout to seconds and take the value as an integer. timeout = int(timeout.seconds().value()) - # Create the working directory. - work_dir = _Utils.WorkDir() - # Use RDKkit to find the maximum common substructure. try: @@ -1030,7 +1084,349 @@ def matchAtoms( return mappings[0:matches] -def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map1={}): +def _kartograf_map(molecule0, molecule1, kartograf_kwargs): + """ + A wrapper function for kartograf mapping algorithm. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule of interest. + + molecule1 : :class:`Molecule ` + The reference molecule. + + kartograf_kwargs : dict + A dictionary of keyword arguments to be passed to kartograf. + + Returns + ------- + + kartograf_mapping : gufe.mapping.ligandatommapping.LigandAtomMapping + The kartograf mapping object. + + """ + # try to import kartograf + try: + from kartograf.atom_aligner import align_mol_shape as _align_mol_shape + from kartograf.atom_mapping_scorer import ( + MappingRMSDScorer as _MappingRMSDScorer, + ) + from kartograf import ( + KartografAtomMapper, + SmallMoleculeComponent as _SmallMoleculeComponent, + ) + except ImportError: + raise ImportError( + "Unable to import Kartograf. Make sure Kartograf is installed properly to use this function." + ) + + # Validate input + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + from ..Convert import toRDKit as _toRDKit + + rdkit_mol0 = _toRDKit(molecule0) + rdkit_mol1 = _toRDKit(molecule1) + rdkit_mols = [rdkit_mol0, rdkit_mol1] + + # build small molecule components + mol0, mol1 = [_SmallMoleculeComponent.from_rdkit(m) for m in rdkit_mols] + + # align molecules first + a_mol1 = _align_mol_shape(mol1, ref_mol=mol0) + + # build Kartograf Atom Mapper + mapper = KartografAtomMapper(**kartograf_kwargs) + + # get the mapping + kartograf_mapping = next(mapper.suggest_mappings(mol0, a_mol1)) + + # score the mapping + rmsd_scorer = _MappingRMSDScorer() + score = rmsd_scorer(mapping=kartograf_mapping) + print(f"RMSD score: {score:.2f}") + + return kartograf_mapping + + +def _roiMatch( + molecule0, + molecule1, + roi, + **kwargs, +): + """ + Matching of two molecules based on a region of interest (ROI). + The ROI is defined as a list of residues in the molecule/protein. + The function will attempt to match the ROI in the two molecules and + return the mapping between the two molecules. Multiple ROIs can be + provided. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule of interest. + + molecule1 : :class:`Molecule ` + The reference molecule. + + roi : list + The region of interest to match. + Consists of a list of ROI residue indices + + use_kartograf : bool, optional, default=False + If set to True, will use the kartograf algorithm to match the + molecules. + + kartograf_kwargs : dict, optional, default={} + A dictionary of keyword arguments to be passed to kartograf. + + Returns + ------- + + full_mapping : dict + A dictionary of the mapping between the two molecules. + + Notes + ----- + + The key assumption of this function is that the two molecules are + structurally identical except for the region of interest (ROI). The ROI + could be a point mutation, or a residue that has been covalently modified. + The function will attempt to match the atoms in the ROI based on the + maximum common substructure (MCS) algorithm. First, the ROI is extracted + from the two molecules and then the atoms in the ROI are matched using + a mapping function such as BioSimSpace.Align.matchAtoms for example. + The function will return the mapping between the two molecules. + This "relative" mapping will then be used to map the atoms in the ROI to + the "absolute" indices in the molecule. + So for example the relative mapping could be {0: 3, 1: 2, 2: 5} and + the absolute mapping could be {100: 103, 101: 102, 102: 105}. This way we + can bypass the need to map the entire molecule and only focus on the ROI, + which is significantly faster for large molecules. The rest of the mapping + is then composed of atoms before the ROI (pre-ROI) and after the ROI. + Every time we map the atoms in the ROI, we append the ROI + mapping to the pre-ROI mapping, which will then be used as the pre-ROI + mapping for the next ROI in the list. + + Examples + -------- + + Find the best maximum common substructure mapping between two molecules + with a region of interest defined as a list of residues. + + >>> import BioSimSpace as BSS + >>> mapping = BSS.Align._align._roiMatch(molecule0, molecule1, roi=[12]) + + Find the mapping between two molecules with multiple regions of interest. + + >>> import BioSimSpace as BSS + >>> mapping = BSS.Align._align._roiMatch(molecule0, molecule1, roi=[12, 13, 14]) + + Find the best maximum common substructure mapping between two molecules, + using Kartograf as the MCS algorithm. + + >>> import BioSimSpace as BSS + >>> mapping = BSS.Align._align._roiMatch(molecule0, molecule1, roi=[12], use_kartograf=True) + """ + + # Validate input + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + if roi is None: + raise ValueError("No region of interest (ROI) has been provided.") + else: + _validate_roi([molecule0, molecule1], roi) + + # Check kwargs + use_kartograf = kwargs.get("use_kartograf", False) + kartograf_kwargs = kwargs.get("kartograf_kwargs", {}) + + # Make sure that the atoms in the pre-ROI region between two molecules are + # in the same order. While the residue sequences between two molecules + # might be the same, saving the molecules in different formats/editors + # might change the order of the atoms. The mapping function will not work + # if the atoms are not in the same order outside the ROI region. + # We will only test the first residue in the protein, as doing this for + # every residue would be computationally expensive. + if roi[0] != 0: + molecule0_res = molecule0.getResidues()[0] + molecule1_res = molecule1.getResidues()[0] + if [a.name() for a in molecule0_res.getAtoms()] != [ + b.name() for b in molecule1_res.getAtoms() + ]: + raise ValueError( + "The atoms outside the ROI region between the two molecules are not in the same order." + ) + # If the ROI is the first residue, then we will test the atoms in the last + # residue of the molecule. + else: + molecule0_res = molecule0.getResidues()[-1] + molecule1_res = molecule1.getResidues()[-1] + if [a.name() for a in molecule0_res.getAtoms()] != [ + b.name() for b in molecule1_res.getAtoms() + ]: + raise ValueError( + "The atoms outside the ROI region between the two molecules are not in the same order." + ) + + # Get the atoms before the ROI. + # This is being done so that when we map the atoms in ROI, we can append + # the ROI mapping to this pre-ROI mapping which will then be used as + # the pre-ROI mapping for the next ROI in the list, i.e. + # pre_roi_mapping = pre_roi_mapping + roi mapping + mapping to next ROI + pre_roi_molecule0 = molecule0.search(f"residue[0:{roi[0]}]") + pre_roi_atom_idx_molecule0 = [a.index() for a in pre_roi_molecule0.atoms()] + + pre_roi_molecule1 = molecule1.search(f"residue[0:{roi[0]}]") + pre_roi_atom_idx_molecule1 = [a.index() for a in pre_roi_molecule1.atoms()] + + pre_roi_mapping = dict(zip(pre_roi_atom_idx_molecule0, pre_roi_atom_idx_molecule1)) + + # Loop over the residues of interest + for i, res_idx in enumerate(roi): + molecule0_roi = molecule0.getResidues()[res_idx] + molecule1_roi = molecule1.getResidues()[res_idx] + + # Warn if matching between the same residues, in a case where we are + # transforming from one enantiomer to another, the atomtypes will + # be the same and trigger this warning. + if ( + molecule0_roi.name() == molecule1_roi.name() + and molecule0_roi.nAtoms() == molecule1_roi.nAtoms() + ): + molecule0_atoms = [a.name() for a in molecule0_roi.getAtoms()] + molecule1_atoms = [a.name() for a in molecule1_roi.getAtoms()] + if molecule0_atoms == molecule1_atoms: + _warnings.warn( + f"Residues {res_idx} between molecule0 and molecule1 have " + "identical atomtypes, which means you are likely attempting " + "to match two identical residues." + ) + + res0_idx = [a.index() for a in molecule0_roi] + res1_idx = [a.index() for a in molecule1_roi] + + # Extract the residues of interest from the molecules + res0_extracted = molecule0.extract(res0_idx) + res1_extracted = molecule1.extract(res1_idx) + + if use_kartograf: + kartograf_mapping = _kartograf_map( + res0_extracted, res1_extracted, kartograf_kwargs + ) + mapping = kartograf_mapping.componentA_to_componentB + else: + mapping = matchAtoms( + res0_extracted, + res1_extracted, + ) + + # Look up the absolute atom indices in the molecule + res0_lookup_table = list(mapping.keys()) + absolute_mapped_atoms_res0 = [res0_idx[i] for i in res0_lookup_table] + + res1_lookup_table = list(mapping.values()) + absolute_mapped_atoms_res1 = [res1_idx[i] for i in res1_lookup_table] + + absolute_roi_mapping = dict( + zip(absolute_mapped_atoms_res0, absolute_mapped_atoms_res1) + ) + + # If we are at the last residue of interest, we don't need to worry + # too much about the after ROI region as this region will be all of the + # molecule atoms after the last residue of interest. + # In the case when we are not at the last residue of interest, + # we need to map the atoms to the next ROI. + if res_idx != roi[-1]: + + # If the next ROI residue index in the ROI list is next to + # the current ROI index, after_roi atom index list will be empty + # i.e. if we're currently at residue 10 and the next ROI is 11, + # we don't need to map the atoms. + # If we were at residue 10 and the next residue of interest is 12, + # we would need to map the atoms between residues 10 and 12. + if roi[i + 1] - roi[i] == 1: + after_roi_atom_idx_molecule0 = [] + after_roi_atom_idx_molecule1 = [] + else: + after_roi_molecule0 = molecule0.search( + f"residue[{res_idx+1}:{roi[i+1]}]" + ) + after_roi_atom_idx_molecule0 = [ + a.index() for a in after_roi_molecule0.atoms() + ] + + after_roi_molecule1 = molecule1.search( + f"residue[{res_idx+1}:{roi[i+1]}]" + ) + after_roi_atom_idx_molecule1 = [ + b.index() for b in after_roi_molecule1.atoms() + ] + + after_roi_mapping = dict( + zip( + after_roi_atom_idx_molecule0, + after_roi_atom_idx_molecule1, + ) + ) + + # Append the mappings to the pre_roi_mapping, which will then be + # used as the pre_roi_mapping for the next ROI in the list. + pre_roi_mapping = { + **pre_roi_mapping, + **absolute_roi_mapping, + **after_roi_mapping, + } + else: + # Get all of the remaining atoms after the last ROI + after_roi_molecule0 = molecule0.search(f"residue[{res_idx+1}:]") + after_roi_atom_idx_molecule0 = [ + a.index() for a in after_roi_molecule0.atoms() + ] + + after_roi_molecule1 = molecule1.search(f"residue[{res_idx+1}:]") + after_roi_atom_idx_molecule1 = [ + b.index() for b in after_roi_molecule1.atoms() + ] + + after_roi_mapping = dict( + zip( + after_roi_atom_idx_molecule0, + after_roi_atom_idx_molecule1, + ) + ) + + # Combine the dictionaries to get the full mapping + full_mapping = { + **pre_roi_mapping, + **absolute_roi_mapping, + **after_roi_mapping, + } + + return full_mapping + + +def rmsdAlign( + molecule0, molecule1, mapping=None, roi=None, property_map0={}, property_map1={} +): """ Align atoms in molecule0 to those in molecule1 using the mapping between matched atom indices. The molecule is aligned using rigid-body @@ -1050,6 +1446,10 @@ def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map mapping : dict A dictionary mapping atoms in molecule0 to those in molecule1. + roi : list + The region of interest to align. + Consists of a list of ROI residue indices. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -1079,8 +1479,38 @@ def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map >>> import BioSimSpace as BSS >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1) + + Align residue of interest from molecule0 to molecule1. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1, roi=[12]) + + Align multiple residues of interest from molecule0 to molecule1. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1, roi=[12,13]) """ + if roi is None: + return _rmsdAlign( + molecule0, + molecule1, + mapping=mapping, + property_map0=property_map0, + property_map1=property_map1, + ) + else: + return _roiAlign( + molecule0, + molecule1, + roi=roi, + align_function="rmsd", + property_map0=property_map0, + property_map1=property_map1, + ) + + +def _rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map1={}): if not isinstance(molecule0, _Molecule): raise TypeError( "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" @@ -1143,6 +1573,7 @@ def flexAlign( molecule1, mapping=None, fkcombu_exe=None, + roi=None, property_map0={}, property_map1={}, ): @@ -1166,6 +1597,10 @@ def flexAlign( Path to the fkcombu executable. If None is passed, then BioSimSpace will attempt to find fkcombu by searching your PATH. + roi : list + The region of interest to align. + Consists of a list of ROI residue indices. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -1195,8 +1630,47 @@ def flexAlign( >>> import BioSimSpace as BSS >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1) + + Align residue of interest from molecule0 to molecule1. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, roi=[12]) + + Align multiple residues of interest from molecule0 to molecule1. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, roi=[12,13]) """ + if roi is None: + return _flexAlign( + molecule0, + molecule1, + mapping=mapping, + fkcombu_exe=fkcombu_exe, + property_map0=property_map0, + property_map1=property_map1, + ) + else: + return _roiAlign( + molecule0, + molecule1, + roi=roi, + align_function="rmsd_flex_align", + fkcombu_exe=fkcombu_exe, + property_map0=property_map0, + property_map1=property_map1, + ) + + +def _flexAlign( + molecule0, + molecule1, + mapping, + fkcombu_exe, + property_map0, + property_map1, +): # Check that we found fkcombu in the PATH. if fkcombu_exe is None: if _fkcombu_exe is None: @@ -1297,6 +1771,134 @@ def flexAlign( return _Molecule(molecule0) +def _roiAlign( + molecule0, + molecule1, + roi, + align_function, + property_map0, + property_map1, + fkcombu_exe=None, +): + """ + Flexibly align residue of interest (ROI) in molecule0 to that in molecule1 + using BioSimSpace.Align._flexAlign(). + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule to align. + + molecule1 : :class:`Molecule ` + The reference molecule. + + roi : list + The region of interest to align. + Consists of a list of ROI residue indices. + + align_function : str + The alignment function used to align atoms. Available options are: + - "rmsd" + Align atoms in molecule0 to those in molecule1 using the mapping + between matched atom indices. + Uses :class:`rmsdAlign ` to align the atoms in the ROI. + - "rmsd_flex_align" + Flexibly align roi from molecule0 to molecule1 based on the mapping. + Uses :class:`flexAlign ` to align the atoms in the ROI. + + fkcombu_exe : str + Path to the fkcombu executable. Will only be used if aligning with + "rmsd_flex_align" function. If None is passed, then BioSimSpace + will attempt to find fkcombu by searching your PATH. + + property_map0 : dict + A dictionary that maps "properties" in molecule0 to their user + defined values. This allows the user to refer to properties + with their own naming scheme, e.g. { "charge" : "my-charge" } + + property_map1 : dict + A dictionary that maps "properties" in molecule1 to their user + defined values. + + Returns + ------- + + molecule : :class:`Molecule ` + The aligned molecule. + """ + + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + if roi is None: + raise ValueError("No region of interest (ROI) has been provided.") + else: + _validate_roi([molecule0, molecule1], roi) + + if align_function not in ["rmsd", "rmsd_flex_align"]: + raise ValueError( + "Invalid alignment function. Available options are: 'rmsd', 'rmsd_flex_align'" + ) + + # Get the property name for the coordinates + prop0 = property_map0.get("coordinates", "coordinates") + + for roi_idx in roi: + res0 = molecule0.getResidues()[roi_idx] + res1 = molecule1.getResidues()[roi_idx] + + res0_idx = [a.index() for a in res0.getAtoms()] + res1_idx = [a.index() for a in res1.getAtoms()] + + # Extract the residues of interest from the molecules + res0_extracted = molecule0.extract(res0_idx) + res1_extracted = molecule1.extract(res1_idx) + + if align_function == "rmsd": + res0_aligned = rmsdAlign(molecule0=res0_extracted, molecule1=res1_extracted) + + elif align_function == "rmsd_flex_align": + res0_aligned = flexAlign( + molecule0=res0_extracted, + molecule1=res1_extracted, + fkcombu_exe=fkcombu_exe, + ) + + # Now update molecule0 with the aligned residue coordinates + mol0 = molecule0._getSireObject() + res0_aligned_coords = res0_aligned._getSireObject().property(prop0) + + # A list to store the updated coordinates for molecule0 + mol0_coords = [] + for i, res in enumerate(mol0.residues()): + if i == roi_idx: + mol0_coords.append(res0_aligned_coords) + else: + mol0_coords.append(res.property(prop0)) + + # Flatten the list + mol0_coords = [item for sublist in mol0_coords for item in sublist] + + # Create a cursor for updating the coordinates property + c = mol0.cursor() + for i, atom in enumerate(c.atoms()): + atom[prop0] = mol0_coords[i] + mol0 = c.commit() + + # Convert the Sire molecule back to a BioSimSpace molecule so we can + # loop over it again if needed + molecule0 = _Molecule(mol0) + + return molecule0 + + def merge( molecule0, molecule1, @@ -1304,6 +1906,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, + roi=None, property_map0={}, property_map1={}, ): @@ -1340,6 +1943,10 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. + roi : list + The region of interest to merge. + Consists of a list of ROI residue indices. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -1363,6 +1970,12 @@ def merge( >>> import BioSimSpace as BSS >>> merged = BSS.Align.merge(molecule0, molecule1, mapping) + Merge molecule0 and molecule1 based on a precomputed mapping and a region + of interest. + + >>> import BioSimSpace as BSS + >>> merged = BSS.Align.merge(molecule0, molecule1, mapping, roi=[12]) + Merge molecule0 with molecule1. Since no mapping is passed one will be autogenerated using :class:`matchAtoms ` with default options, following which :class:`rmsdAlign ` @@ -1397,6 +2010,10 @@ def merge( if not isinstance(force, bool): raise TypeError("'force' must be of type 'bool'") + if roi is not None: + if not isinstance(roi, list): + raise TypeError("'roi' must be of type 'list'.") + # The user has passed an atom mapping. if mapping is not None: if not isinstance(mapping, dict): @@ -1426,6 +2043,7 @@ def merge( allow_ring_breaking=allow_ring_breaking, allow_ring_size_change=allow_ring_size_change, force=force, + roi=roi, property_map0=property_map0, property_map1=property_map1, ) @@ -1435,14 +2053,14 @@ def viewMapping( molecule0, molecule1, mapping=None, + roi=None, + pixels=300, property_map0={}, property_map1={}, - style=None, - orientation="horizontal", - pixels=900, + **kwargs, ): """ - Visualise the mapping between molecule0 and molecule1. This draws a 3D + Visualise the mapping between molecule0 and molecule1. This draws a 2D depiction of both molecules with the mapped atoms highlighted in green. Labels specify the indices of the atoms, along with the indices of the atoms to which they map in the other molecule. @@ -1459,6 +2077,12 @@ def viewMapping( mapping : dict A dictionary mapping atoms in molecule0 to those in molecule1. + roi : int + The region of interest to highlight. + + pixels : int + The size in pixels of the 2D drawing. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -1468,31 +2092,15 @@ def viewMapping( A dictionary that maps "properties" in molecule1 to their user defined values. - style : dict - Drawing style. See https://3dmol.csb.pitt.edu/doc/$3Dmol.GLViewer.html - for some examples. - - orientation : str - Whether to display the two molecules in a "horizontal" or "vertical" - arrangement. - - pixels : int - The size of the largest view dimension in pixel, i.e. either the - "horizontal" or "vertical" size. - - Returns - ------- - - view : py3Dmol.view - A view of the two molecules with the mapped atoms highlighted and - labelled. + show_adjacent_residues : bool, optional default=False + If set to True, will show neighouring residues to the ROI region. """ - # Adapted from: https://gist.github.com/cisert/d05664d4c98ac1cf86ee70b8700e56a9 - # Only draw within a notebook. if not _is_notebook: return None + else: + from IPython.display import display, Image _assert_imported(_rdkit) @@ -1506,27 +2114,15 @@ def viewMapping( "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" ) + if roi is not None and not isinstance(roi, int): + raise TypeError("'roi' must be of type 'int'") + if not isinstance(property_map0, dict): raise TypeError("'property_map0' must be of type 'dict'") if not isinstance(property_map1, dict): raise TypeError("'property_map1' must be of type 'dict'") - if style is not None: - if not isinstance(style, dict): - raise TypeError("'style' must be of type 'dict'") - - if not isinstance(orientation, str): - raise TypeError("'orientation' must be of type 'str'") - else: - # Convert to lower case and strip whitespace. - orientation = orientation.lower().replace(" ", "") - - if orientation not in ["horizontal", "vertical"]: - raise ValueError( - "'orientation' must be equal to 'horizontal' " "or 'vertical'." - ) - if isinstance(pixels, float): pixels = int(pixels) if not type(pixels) is int: @@ -1534,6 +2130,9 @@ def viewMapping( if pixels <= 0: raise ValueError("pixels' must be > 0!") + # Get kwargs for the view + show_adjacent_residues = kwargs.get("show_adjacent_residues", False) + # The user has passed an atom mapping. if mapping is not None: if not isinstance(mapping, dict): @@ -1552,84 +2151,276 @@ def viewMapping( ) molecule0 = rmsdAlign(molecule0, molecule1, mapping) + if roi is not None: + if show_adjacent_residues: + # Extract the region of interest from the molecules plus one residue on each side. + # residue[roi-1:roi+1] would only extract the ROI residue. + roi0_region = molecule0.search(f"residue[{roi - 2}:{roi + 2}]") + roi1_region = molecule1.search(f"residue[{roi - 2}:{roi + 2}]") + else: + roi0_region = molecule0.search(f"residue[{roi - 1}:{roi + 1}]") + roi1_region = molecule1.search(f"residue[{roi - 1}:{roi + 1}]") + + roi0_idx = [a.index() for a in roi0_region.atoms()] + roi1_idx = [a.index() for a in roi1_region.atoms()] + + molecule0 = molecule0.extract(roi0_idx) + molecule1 = molecule1.extract(roi1_idx) + + # find the key in the mapping that corresponds to the ROI atoms + mapping = {k: v for k, v in mapping.items() if k in roi0_idx} + + # now we need to update the mapping to reflect the new atom indices + mapping = {roi0_idx.index(k): roi1_idx.index(v) for k, v in mapping.items()} + # Convert the molecules to RDKit format. rdmol0 = _Convert.toRDKit(molecule0, property_map=property_map0) rdmol1 = _Convert.toRDKit(molecule1, property_map=property_map1) + text = _draw_mapping(mapping, rdmol0, rdmol1, pixels=pixels) + img = Image(data=text) + display(img) - import py3Dmol as _py3Dmol - # Set grid view properties. - viewer0 = (0, 0) - if orientation == "horizontal": - viewergrid = (1, 2) - viewer1 = (0, 1) - width = pixels - height = int(pixels / 2) - else: - viewergrid = (2, 1) - viewer1 = (1, 0) - width = int(pixels / 2) - height = pixels - - # Create the view. - view = _py3Dmol.view( - linked=False, width=width, height=height, viewergrid=viewergrid - ) +# This code is adopted from OpenFE and is licensed under the MIT license. +# For details, see https://github.com/OpenFreeEnergy/gufe/visualization/mapping_visualization.py +def _match_elements(mol1: _Chem.Mol, idx1: int, mol2: _Chem.Mol, idx2: int) -> bool: + """ + Convenience method to check if elements between two molecules (molA + and molB) are the same. - # Set default drawing style. - if style is None: - style = {"stick": {"colorscheme": "grayCarbon", "linewidth": 0.1}} - - # Add the molecules to the views. - view.addModel(_Chem.MolToMolBlock(rdmol0), "mol0", viewer=viewer0) - view.addModel(_Chem.MolToMolBlock(rdmol1), "mol1", viewer=viewer1) - - # Set the style. - view.setStyle({"model": 0}, style, viewer=viewer0) - view.setStyle({"model": 0}, style, viewer=viewer1) - - # Highlight the atoms from the mapping. - for atom0, atom1 in mapping.items(): - p = rdmol0.GetConformer().GetAtomPosition(atom0) - view.addSphere( - { - "center": {"x": p.x, "y": p.y, "z": p.z}, - "radius": 0.5, - "color": "green", - "alpha": 0.8, - }, - viewer=viewer0, - ) - view.addLabel( - f"{atom0} \u2192 {atom1}", - {"position": {"x": p.x, "y": p.y, "z": p.z}}, - viewer=viewer0, - ) - p = rdmol1.GetConformer().GetAtomPosition(atom1) - view.addSphere( - { - "center": {"x": p.x, "y": p.y, "z": p.z}, - "radius": 0.5, - "color": "green", - "alpha": 0.8, - }, - viewer=viewer1, + Parameters + ---------- + mol1 : RDKit.Mol + RDKit representation of molecule 1. + idx1 : int + Index of atom to check in molecule 1. + mol2 : RDKit.Mol + RDKit representation of molecule 2. + idx2 : int + Index of atom to check in molecule 2. + + Returns + ------- + bool + True if elements are the same, False otherwise. + """ + elem_mol1 = mol1.GetAtomWithIdx(idx1).GetAtomicNum() + elem_mol2 = mol2.GetAtomWithIdx(idx2).GetAtomicNum() + return elem_mol1 == elem_mol2 + + +def _get_unique_bonds_and_atoms( + mapping: dict[int, int], mol1: _Chem.Mol, mol2: _Chem.Mol +) -> dict: + """ + Given an input mapping, returns new atoms, element changes, and + involved bonds. + + Parameters + ---------- + mapping : dict of int:int + Dictionary describing the atom mapping between molecules 1 and 2. + mol1 : RDKit.Mol + RDKit representation of molecule 1. + mol2 : RDKit.Mol + RDKit representation of molecule 2. + + Returns + ------- + uniques : dict + Dictionary containing; unique atoms ("atoms"), new elements + ("elements"), deleted bonds ("bond_deletions) and altered bonds + ("bond_changes) for molecule 1. + """ + + uniques: dict[str, set] = { + "atoms": set(), # atoms which fully don't exist in molB + "elements": set(), # atoms which exist but change elements in molB + "bond_deletions": set(), # bonds which are removed + "bond_changes": set(), # bonds which change + } + + for at in mol1.GetAtoms(): + idx = at.GetIdx() + if idx not in mapping: + uniques["atoms"].add(idx) + elif not _match_elements(mol1, idx, mol2, mapping[idx]): + uniques["elements"].add(idx) + + for bond in mol1.GetBonds(): + bond_at_idxs = [bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] + for at in chain(uniques["atoms"], uniques["elements"]): + if at in bond_at_idxs: + bond_idx = bond.GetIdx() + + if any(i in uniques["atoms"] for i in bond_at_idxs): + uniques["bond_deletions"].add(bond_idx) + else: + uniques["bond_changes"].add(bond_idx) + + return uniques + + +def _draw_molecules( + d2d, + mols: Collection[_Chem.Mol], + atoms_list: Collection[set[int]], + bonds_list: Collection[set[int]], + atom_colors: Collection[dict[Any, tuple[float, float, float, float]]], + bond_colors: Collection[dict[int, tuple[float, float, float, float]]], + highlight_color: tuple[float, float, float, float], + pixels: int, + atom_mapping: Optional[dict[tuple[int, int], dict[int, int]]] = None, +) -> str: + """ + Internal method to visualize a molecule, possibly with mapping info + + Parameters + ---------- + d2d : + renderer to draw the molecule; currently we only support + rdkit.rdMolDraw2D + mols : Collection[RDKitMol] + molecules to visualize + atoms_list: Collection[Set[int]] + iterable containing one set per molecule in ``mols``, with each set + containing the indices of the atoms to highlight + bonds_list: Collection[Set[int]] + iterable containing one set per molecule in ``mols``, with each set + containing the indices of the atoms involved in bonds to highlight + atom_colors: Collection[Dict[Any, Tuple[float, float, float, float]]] + iterable containing one dict per molecule in ``mols``, with each + dict containing a mapping of RDKit atom to color, expressed as an + RGBA tuple, for atoms that need special coloring (e.g., element + changes) + bond_colors: Collection[dict[int, tuple[float, float, float, float]]] + one dict for each molecule, each dict mapping + highlight_color: Tuple[float, float, float, float] + RGBA tuple for the default highlight color used in the mapping + visualization + pixels: int + size of the 2D image in pixels + atom_mapping: Dict[Tuple[int,int], Dict[int, int]], optional + used to align the molecules to each othter for clearer visualization. + The structure contains the indices of the molecules in mols as key + Tuple[molA, molB] and maps the atom indices via the value Dict[ + molA_atom_idx, molB_atom_idx] + default None + """ + # input standardization: + if atom_mapping is None: + atom_mapping = {} + + if d2d is None: + # select default layout based on number of molecules + grid_x, grid_y = { + 1: (1, 1), + 2: (2, 1), + }[len(mols)] + d2d = Draw.rdMolDraw2D.MolDraw2DCairo( + grid_x * pixels, grid_y * pixels, pixels, pixels ) - view.addLabel( - f"{atom1} \u2192 {atom0}", - {"position": {"x": p.x, "y": p.y, "z": p.z}}, - viewer=viewer1, + + # get molecule name labels + # labels = [m.GetProp("ofe-name") if(m.HasProp("ofe-name")) + # else "test" for m in mols] + + labels = ["molecule0", "molecule1"] + + # squash to 2D + copies = [_Chem.Mol(mol) for mol in mols] + for mol in copies: + AllChem.Compute2DCoords(mol) + + # mol alignments if atom_mapping present + for (i, j), atomMap in atom_mapping.items(): + AllChem.AlignMol( + copies[j], copies[i], atomMap=[(k, v) for v, k in atomMap.items()] ) - # Set background colour. - view.setBackgroundColor("white", viewer=viewer0) - view.setBackgroundColor("white", viewer=viewer1) + # standard settings for visualization + d2d.drawOptions().useBWAtomPalette() + d2d.drawOptions().continousHighlight = False + d2d.drawOptions().setHighlightColour(highlight_color) + d2d.drawOptions().addAtomIndices = True + d2d.DrawMolecules( + copies, + highlightAtoms=atoms_list, + highlightBonds=bonds_list, + highlightAtomColors=atom_colors, + highlightBondColors=bond_colors, + legends=labels, + ) + d2d.FinishDrawing() + return d2d.GetDrawingText() + - # Zoom to molecule. - view.zoomTo(viewer=viewer0) - view.zoomTo(viewer=viewer1) +def _draw_mapping( + mol1_to_mol2: dict[int, int], mol1: _Chem.Mol, mol2: _Chem.Mol, d2d=None, pixels=300 +): + """ + Method to visualise the atom map correspondence between two rdkit + molecules given an input mapping. - return view + Legend: + * Red highlighted atoms: unique atoms, i.e. atoms which are not + mapped. + * Blue highlighted atoms: element changes, i.e. atoms which are + mapped but change elements. + * Red highlighted bonds: any bond which involves at least one + unique atom or one element change. + + Parameters + ---------- + mol1_to_mol2 : dict of int:int + Atom mapping between input molecules. + mol1 : RDKit.Mol + RDKit representation of molecule 1 + mol2 : RDKit.Mol + RDKit representation of molecule 2 + d2d : :class:`rdkit.Chem.Draw.rdMolDraw2D.MolDraw2D` + Optional MolDraw2D backend to use for visualisation. + pixels : int + Size of the 2D image in pixels. + """ + # highlight core element changes differently from unique atoms + # RGBA color value needs to be between 0 and 1, so divide by 255 + RED = (220 / 255, 50 / 255, 32 / 255, 1.0) + BLUE = (0.0, 90 / 255, 181 / 255, 1.0) + mol1_uniques = _get_unique_bonds_and_atoms(mol1_to_mol2, mol1, mol2) + + # invert map + mol2_to_mol1_map = {v: k for k, v in mol1_to_mol2.items()} + mol2_uniques = _get_unique_bonds_and_atoms(mol2_to_mol1_map, mol2, mol1) + + atoms_list = [ + mol1_uniques["atoms"] | mol1_uniques["elements"], + mol2_uniques["atoms"] | mol2_uniques["elements"], + ] + bonds_list = [ + mol1_uniques["bond_deletions"] | mol1_uniques["bond_changes"], + mol2_uniques["bond_deletions"] | mol2_uniques["bond_changes"], + ] + + at1_colors = {at: BLUE for at in mol1_uniques["elements"]} + at2_colors = {at: BLUE for at in mol2_uniques["elements"]} + bd1_colors = {bd: BLUE for bd in mol1_uniques["bond_changes"]} + bd2_colors = {bd: BLUE for bd in mol2_uniques["bond_changes"]} + + atom_colors = [at1_colors, at2_colors] + bond_colors = [bd1_colors, bd2_colors] + + return _draw_molecules( + d2d, + [mol1, mol2], + atoms_list=atoms_list, + bonds_list=bonds_list, + atom_colors=atom_colors, + bond_colors=bond_colors, + highlight_color=RED, + pixels=pixels, + atom_mapping={(0, 1): mol1_to_mol2}, + ) def _score_rdkit_mappings( @@ -2074,6 +2865,35 @@ def _validate_mapping(molecule0, molecule1, mapping, name): ) +def _validate_roi(molecules, roi): + """ + Internal function to validate that a region of interest (ROI) is a list + of integers that are within the range of the molecule. + + Parameters + ---------- + + molecules : list + Consits of a list of :class:`Molecule `. + + roi : list + The region of interest. + Consists of a list of ROI residue indices. + """ + + if not isinstance(roi, list): + raise TypeError("'roi' must be of type 'list'.") + + for mol in molecules: + for idx in roi: + if not isinstance(idx, int): + raise TypeError("'roi' must be a list of integers.") + if idx < 0 or idx > (mol.nResidues() - 1): + raise ValueError( + f"Residue index {idx} is out of range! The molecule contains {mol.nResidues()} residues." + ) + + def _to_sire_mapping(mapping): """ Internal function to convert a regular mapping to Sire AtomIdx format. diff --git a/python/BioSimSpace/Align/_merge.py b/python/BioSimSpace/Align/_merge.py index 7c0b376ab..ba8b61295 100644 --- a/python/BioSimSpace/Align/_merge.py +++ b/python/BioSimSpace/Align/_merge.py @@ -43,6 +43,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, + roi=None, property_map0={}, property_map1={}, ): @@ -74,6 +75,10 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. + roi : list + The region of interest to merge. + Consists of a list of ROI residue indices. + property_map0 : dict A dictionary that maps "properties" in this molecule to their user defined values. This allows the user to refer to properties @@ -135,6 +140,12 @@ def merge( "key:value pairs in 'mapping' must be of type 'Sire.Mol.AtomIdx'" ) + # Validate the region of interest. + if roi is not None: + from ._align import _validate_roi + + _validate_roi([molecule0, molecule1], roi) + # Set 'allow_ring_breaking' and 'allow_ring_size_change' to true if the # user has requested to 'force' the merge, i.e. the 'force' argument # takes precedence. @@ -156,6 +167,10 @@ def merge( # Create the reverse mapping: molecule1 --> molecule0 inv_mapping = {v: k for k, v in mapping.items()} + # Generate the mappings from each molecule to the merged molecule + mol0_merged_mapping = {} + mol1_merged_mapping = {} + # Invert the user property mappings. inv_property_map0 = {v: k for k, v in property_map0.items()} inv_property_map1 = {v: k for k, v in property_map1.items()} @@ -202,35 +217,79 @@ def merge( # Create a new molecule to hold the merged molecule. molecule = _SireMol.Molecule("Merged_Molecule") + # Only part of the ligand is to be merged + if roi is not None: + if molecule0.nResidues() != molecule1.nResidues(): + raise ValueError( + "The two molecules need to have the same number of residues." + ) - # Add a single residue called LIG. - res = molecule.edit().add(_SireMol.ResNum(1)) - res.rename(_SireMol.ResName("LIG")) - - # Create a single cut-group. - cg = res.molecule().add(_SireMol.CGName("1")) - - # Counter for the number of atoms. - num = 1 - - # First add all of the atoms from molecule0. - for atom in molecule0.atoms(): - # Add the atom. - added = cg.add(atom.name()) - added.renumber(_SireMol.AtomNum(num)) - added.reparent(_SireMol.ResIdx(0)) - num += 1 - - # Now add all of the atoms from molecule1 that aren't mapped from molecule0. - for atom in atoms1: - added = cg.add(atom.name()) - added.renumber(_SireMol.AtomNum(num)) - added.reparent(_SireMol.ResIdx(0)) - inv_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) - num += 1 + num = 1 + for idx, (mol0_res, mol1_res) in enumerate( + zip(molecule0.residues(), molecule1.residues()) + ): + res = molecule.edit().add(_SireMol.ResNum(idx + 1)) + if mol0_res.name() == mol1_res.name(): + res.rename(mol0_res.name()) + else: + resname = "MUT" if len(molecule0.residues()) > 1 else "LIG" + res.rename(_SireMol.ResName(resname)) + + cg = res.molecule().add(_SireMol.CGName(f"{idx}")) + for atom in mol0_res.atoms(): + mol0_merged_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) + added = cg.add(atom.name()) + added.renumber(_SireMol.AtomNum(num)) + added.reparent(_SireMol.ResIdx(idx)) + num += 1 + + for atom in mol1_res.atoms(): + if atom.index() in atoms1_idx: + added = cg.add(atom.name()) + added.renumber(_SireMol.AtomNum(num)) + added.reparent(_SireMol.ResIdx(idx)) + mol1_merged_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) + num += 1 + else: + mol1_merged_mapping[atom.index()] = mol0_merged_mapping[ + inv_mapping[atom.index()] + ] + molecule = cg.molecule().commit() + else: + # Add a single residue called LIG. + res = molecule.edit().add(_SireMol.ResNum(1)) + res.rename(_SireMol.ResName("LIG")) + + # Create a single cut-group. + cg = res.molecule().add(_SireMol.CGName("1")) + + # Counter for the number of atoms. + num = 1 + + # First add all of the atoms from molecule0. + for atom in molecule0.atoms(): + # Add the atom. + added = cg.add(atom.name()) + added.renumber(_SireMol.AtomNum(num)) + added.reparent(_SireMol.ResIdx(0)) + mol0_merged_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) + num += 1 + + # Now add all of the atoms from molecule1 that aren't mapped from molecule0. + for atom in molecule1.atoms(): + if atom in atoms1: + added = cg.add(atom.name()) + added.renumber(_SireMol.AtomNum(num)) + added.reparent(_SireMol.ResIdx(0)) + mol1_merged_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) + num += 1 + else: + mol1_merged_mapping[atom.index()] = mol0_merged_mapping[ + inv_mapping[atom.index()] + ] - # Commit the changes to the molecule. - molecule = cg.molecule().commit() + # Commit the changes to the molecule. + molecule = cg.molecule().commit() # Make the molecule editable. edit_mol = molecule.edit() @@ -340,11 +399,12 @@ def merge( # Add the atom properties from molecule0. for atom in molecule0.atoms(): + # Get the atom index in the merged molecule. + idx = mol0_merged_mapping[atom.index()] + # Add an "name0" property. edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty("name0", atom.name().value()) - .molecule() + edit_mol.atom(idx).setProperty("name0", atom.name().value()).molecule() ) # Loop over all atom properties. @@ -358,15 +418,13 @@ def merge( # Add the property to the atom in the merged molecule. edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty(name, atom.property(prop)) - .molecule() + edit_mol.atom(idx).setProperty(name, atom.property(prop)).molecule() ) # Add the atom properties from molecule1. for atom in atoms1: # Get the atom index in the merged molecule. - idx = inv_mapping[atom.index()] + idx = mol1_merged_mapping[atom.index()] # Add an "name0" property. edit_mol = ( @@ -433,8 +491,8 @@ def merge( # Add all of the bonds from molecule0. for bond in bonds0.potentials(): - atom0 = info0.atomIdx(bond.atom0()) - atom1 = info0.atomIdx(bond.atom1()) + atom0 = mol0_merged_mapping[info0.atomIdx(bond.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(bond.atom1())] bonds.set(atom0, atom1, bond.function()) # Loop over all bonds in molecule1. @@ -450,8 +508,8 @@ def merge( exprn = bond.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] # Set the new bond. bonds.set(atom0, atom1, exprn) @@ -478,9 +536,9 @@ def merge( # Add all of the angles from molecule0. for angle in angles0.potentials(): - atom0 = info0.atomIdx(angle.atom0()) - atom1 = info0.atomIdx(angle.atom1()) - atom2 = info0.atomIdx(angle.atom2()) + atom0 = mol0_merged_mapping[info0.atomIdx(angle.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(angle.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(angle.atom2())] angles.set(atom0, atom1, atom2, angle.function()) # Loop over all angles in molecule1. @@ -498,9 +556,9 @@ def merge( exprn = angle.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] # Set the new angle. angles.set(atom0, atom1, atom2, exprn) @@ -527,10 +585,10 @@ def merge( # Add all of the dihedrals from molecule0. for dihedral in dihedrals0.potentials(): - atom0 = info0.atomIdx(dihedral.atom0()) - atom1 = info0.atomIdx(dihedral.atom1()) - atom2 = info0.atomIdx(dihedral.atom2()) - atom3 = info0.atomIdx(dihedral.atom3()) + atom0 = mol0_merged_mapping[info0.atomIdx(dihedral.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(dihedral.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(dihedral.atom2())] + atom3 = mol0_merged_mapping[info0.atomIdx(dihedral.atom3())] dihedrals.set(atom0, atom1, atom2, atom3, dihedral.function()) # Loop over all dihedrals in molecule1. @@ -550,10 +608,10 @@ def merge( exprn = dihedral.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] - atom3 = inv_mapping[atom3] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] + atom3 = mol1_merged_mapping[atom3] # Set the new dihedral. dihedrals.set(atom0, atom1, atom2, atom3, exprn) @@ -580,10 +638,10 @@ def merge( # Add all of the impropers from molecule0. for improper in impropers0.potentials(): - atom0 = info0.atomIdx(improper.atom0()) - atom1 = info0.atomIdx(improper.atom1()) - atom2 = info0.atomIdx(improper.atom2()) - atom3 = info0.atomIdx(improper.atom3()) + atom0 = mol0_merged_mapping[info0.atomIdx(improper.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(improper.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(improper.atom2())] + atom3 = mol0_merged_mapping[info0.atomIdx(improper.atom3())] impropers.set(atom0, atom1, atom2, atom3, improper.function()) # Loop over all impropers in molecule1. @@ -603,10 +661,10 @@ def merge( exprn = improper.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] - atom3 = inv_mapping[atom3] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] + atom3 = mol1_merged_mapping[atom3] # Set the new improper. impropers.set(atom0, atom1, atom2, atom3, exprn) @@ -621,7 +679,7 @@ def merge( # Add the atom properties from molecule1. for atom in molecule1.atoms(): # Get the atom index in the merged molecule. - idx = inv_mapping[atom.index()] + idx = mol1_merged_mapping[atom.index()] # Add an "name1" property. edit_mol = ( @@ -644,11 +702,12 @@ def merge( # Add the properties from atoms unique to molecule0. for atom in atoms0: + # Get the atom index in the merged molecule. + idx = mol0_merged_mapping[atom.index()] + # Add an "name1" property. edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty("name1", atom.name().value()) - .molecule() + edit_mol.atom(idx).setProperty("name1", atom.name().value()).molecule() ) # Loop over all atom properties. @@ -659,25 +718,21 @@ def merge( # Zero the "charge" and "LJ" property for atoms that are unique to molecule0. if name == "charge": edit_mol = ( - edit_mol.atom(atom.index()) + edit_mol.atom(idx) .setProperty("charge1", 0 * _SireUnits.e_charge) .molecule() ) elif name == "LJ": edit_mol = ( - edit_mol.atom(atom.index()) + edit_mol.atom(idx) .setProperty("LJ1", _SireMM.LJParameter()) .molecule() ) elif name == "ambertype": - edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty("ambertype1", "du") - .molecule() - ) + edit_mol = edit_mol.atom(idx).setProperty("ambertype1", "du").molecule() elif name == "element": edit_mol = ( - edit_mol.atom(atom.index()) + edit_mol.atom(idx) .setProperty("element1", _SireMol.Element(0)) .molecule() ) @@ -688,9 +743,7 @@ def merge( # Add the property to the atom in the merged molecule. edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty(name, atom.property(prop)) - .molecule() + edit_mol.atom(idx).setProperty(name, atom.property(prop)).molecule() ) # Tolerance for zero sigma values. @@ -751,8 +804,8 @@ def merge( exprn = bond.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] # Set the new bond. bonds.set(atom0, atom1, exprn) @@ -765,8 +818,8 @@ def merge( or info0.atomIdx(bond.atom1()) in atoms0_idx ): # Extract the bond information. - atom0 = info0.atomIdx(bond.atom0()) - atom1 = info0.atomIdx(bond.atom1()) + atom0 = mol0_merged_mapping[info0.atomIdx(bond.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(bond.atom1())] exprn = bond.function() # Set the new bond. @@ -801,9 +854,9 @@ def merge( exprn = angle.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] # Set the new angle. angles.set(atom0, atom1, atom2, exprn) @@ -817,9 +870,9 @@ def merge( or info0.atomIdx(angle.atom2()) in atoms0_idx ): # Extract the angle information. - atom0 = info0.atomIdx(angle.atom0()) - atom1 = info0.atomIdx(angle.atom1()) - atom2 = info0.atomIdx(angle.atom2()) + atom0 = mol0_merged_mapping[info0.atomIdx(angle.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(angle.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(angle.atom2())] exprn = angle.function() # Set the new angle. @@ -855,10 +908,10 @@ def merge( exprn = dihedral.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] - atom3 = inv_mapping[atom3] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] + atom3 = mol1_merged_mapping[atom3] # Set the new dihedral. dihedrals.set(atom0, atom1, atom2, atom3, exprn) @@ -873,10 +926,10 @@ def merge( or info0.atomIdx(dihedral.atom3()) in atoms0_idx ): # Extract the dihedral information. - atom0 = info0.atomIdx(dihedral.atom0()) - atom1 = info0.atomIdx(dihedral.atom1()) - atom2 = info0.atomIdx(dihedral.atom2()) - atom3 = info0.atomIdx(dihedral.atom3()) + atom0 = mol0_merged_mapping[info0.atomIdx(dihedral.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(dihedral.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(dihedral.atom2())] + atom3 = mol0_merged_mapping[info0.atomIdx(dihedral.atom3())] exprn = dihedral.function() # Set the new dihedral. @@ -912,10 +965,10 @@ def merge( exprn = improper.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] - atom3 = inv_mapping[atom3] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] + atom3 = mol1_merged_mapping[atom3] # Set the new improper. impropers.set(atom0, atom1, atom2, atom3, exprn) @@ -930,10 +983,10 @@ def merge( or info0.atomIdx(improper.atom3()) in atoms0_idx ): # Extract the improper information. - atom0 = info0.atomIdx(improper.atom0()) - atom1 = info0.atomIdx(improper.atom1()) - atom2 = info0.atomIdx(improper.atom2()) - atom3 = info0.atomIdx(improper.atom3()) + atom0 = mol0_merged_mapping[info0.atomIdx(improper.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(improper.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(improper.atom2())] + atom3 = mol0_merged_mapping[info0.atomIdx(improper.atom3())] exprn = improper.function() # Set the new improper. @@ -956,156 +1009,183 @@ def merge( "or 'allow_ring_size_change' options." ) - # Create the connectivity objects. + # Create the connectivity object. + conn = _SireMol.Connectivity(edit_mol.info()).edit() + + # Connectivity in the merged molecule. conn0 = _SireMol.Connectivity(edit_mol.info()).edit() conn1 = _SireMol.Connectivity(edit_mol.info()).edit() - # Connect the bonded atoms in both end states. - for bond in edit_mol.property("bond0").potentials(): - conn0.connect(bond.atom0(), bond.atom1()) + bonds_atoms0 = { + frozenset([bond.atom0(), bond.atom1()]) + for bond in edit_mol.property("bond0").potentials() + } + bonds_atoms1 = { + frozenset([bond.atom0(), bond.atom1()]) + for bond in edit_mol.property("bond1").potentials() + } + + for atom0, atom1 in bonds_atoms0: + conn0.connect(atom0, atom1) + + for atom0, atom1 in bonds_atoms1: + conn1.connect(atom0, atom1) + + # We only add a bond to the total connectivity if it's defined in both states + # This results in a "broken" topology if one writes it in GROMACS + # But GROMACS can't handle bond breaks anyway + for atom0, atom1 in bonds_atoms0 & bonds_atoms1: + conn.connect(atom0, atom1) + + conn = conn.commit() conn0 = conn0.commit() - for bond in edit_mol.property("bond1").potentials(): - conn1.connect(bond.atom0(), bond.atom1()) conn1 = conn1.commit() - # Get the original connectivity of the two molecules. + # Get the connectivity of the two molecules. c0 = molecule0.property("connectivity") c1 = molecule1.property("connectivity") # Check that the merge hasn't modified the connectivity. - # molecule0 - for x in range(0, molecule0.nAtoms()): - # Convert to an AtomIdx. - idx = _SireMol.AtomIdx(x) - - for y in range(x + 1, molecule0.nAtoms()): + # The checking was blocked when merging a protein + if roi is None: + # molecule0 + for x in range(0, molecule0.nAtoms()): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(y) + idx = _SireMol.AtomIdx(x) - # Was a ring opened/closed? - is_ring_broken = _is_ring_broken(c0, conn1, idx, idy, idx, idy) + # Map the index to its position in the merged molecule. + idx_map = mol0_merged_mapping[idx] - # A ring was broken and it is not allowed. - if is_ring_broken and not allow_ring_breaking: - raise _IncompatibleError( - "The merge has opened/closed a ring. To allow this " - "perturbation, set the 'allow_ring_breaking' option " - "to 'True'." - ) + for y in range(x + 1, molecule0.nAtoms()): + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(y) - # Did a ring change size? - is_ring_size_change = _is_ring_size_changed(c0, conn1, idx, idy, idx, idy) + # Map the index to its position in the merged molecule. + idy_map = mol0_merged_mapping[idy] - # A ring changed size and it is not allowed. - if ( - not is_ring_broken - and is_ring_size_change - and not allow_ring_size_change - ): - raise _IncompatibleError( - "The merge has changed the size of a ring. To allow this " - "perturbation, set the 'allow_ring_size_change' option " - "to 'True'. Be aware that this perturbation may not work " - "and a transition through an intermediate state may be " - "preferable." - ) + # Was a ring opened/closed? + is_ring_broken = _is_ring_broken(c0, conn, idx, idy, idx_map, idy_map) - # The connectivity has changed. - if c0.connectionType(idx, idy) != conn1.connectionType(idx, idy): - # The connectivity changed for an unknown reason. - if not (is_ring_broken or is_ring_size_change) and not force: + # A ring was broken and it is not allowed. + if is_ring_broken and not allow_ring_breaking: raise _IncompatibleError( - "The merge has changed the molecular connectivity " - "but a ring didn't open/close or change size. " - "If you want to proceed with this mapping pass " - "'force=True'. You are warned that the resulting " - "perturbation will likely be unstable." + "The merge has opened/closed a ring. To allow this " + "perturbation, set the 'allow_ring_breaking' option " + "to 'True'." ) - # molecule1 - for x in range(0, molecule1.nAtoms()): - # Convert to an AtomIdx. - idx = _SireMol.AtomIdx(x) - # Map the index to its position in the merged molecule. - idx_map = inv_mapping[idx] + # Did a ring change size? + is_ring_size_change = _is_ring_size_changed( + c0, conn, idx, idy, idx_map, idy_map + ) + + # A ring changed size and it is not allowed. + if ( + not is_ring_broken + and is_ring_size_change + and not allow_ring_size_change + ): + raise _IncompatibleError( + "The merge has changed the size of a ring. To allow this " + "perturbation, set the 'allow_ring_size_change' option " + "to 'True'. Be aware that this perturbation may not work " + "and a transition through an intermediate state may be " + "preferable." + ) - for y in range(x + 1, molecule1.nAtoms()): + # The connectivity has changed. + if c0.connectionType(idx, idy) != conn.connectionType(idx_map, idy_map): + # The connectivity changed for an unknown reason. + if not (is_ring_broken or is_ring_size_change) and not force: + raise _IncompatibleError( + "The merge has changed the molecular connectivity " + "but a ring didn't open/close or change size. " + "If you want to proceed with this mapping pass " + "'force=True'. You are warned that the resulting " + "perturbation will likely be unstable." + ) + # molecule1 + for x in range(0, molecule1.nAtoms()): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(y) + idx = _SireMol.AtomIdx(x) # Map the index to its position in the merged molecule. - idy_map = inv_mapping[idy] + idx_map = mol1_merged_mapping[idx] - # Was a ring opened/closed? - is_ring_broken = _is_ring_broken(c1, conn0, idx, idy, idx_map, idy_map) + for y in range(x + 1, molecule1.nAtoms()): + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(y) - # A ring was broken and it is not allowed. - if is_ring_broken and not allow_ring_breaking: - raise _IncompatibleError( - "The merge has opened/closed a ring. To allow this " - "perturbation, set the 'allow_ring_breaking' option " - "to 'True'." - ) + # Map the index to its position in the merged molecule. + idy_map = mol1_merged_mapping[idy] - # Did a ring change size? - is_ring_size_change = _is_ring_size_changed( - c1, conn0, idx, idy, idx_map, idy_map - ) + # Was a ring opened/closed? + is_ring_broken = _is_ring_broken(c1, conn, idx, idy, idx_map, idy_map) - # A ring changed size and it is not allowed. - if ( - not is_ring_broken - and is_ring_size_change - and not allow_ring_size_change - ): - raise _IncompatibleError( - "The merge has changed the size of a ring. To allow this " - "perturbation, set the 'allow_ring_size_change' option " - "to 'True'. Be aware that this perturbation may not work " - "and a transition through an intermediate state may be " - "preferable." + # A ring was broken and it is not allowed. + if is_ring_broken and not allow_ring_breaking: + raise _IncompatibleError( + "The merge has opened/closed a ring. To allow this " + "perturbation, set the 'allow_ring_breaking' option " + "to 'True'." + ) + + # Did a ring change size? + is_ring_size_change = _is_ring_size_changed( + c1, conn, idx, idy, idx_map, idy_map ) - # The connectivity has changed. - if c1.connectionType(idx, idy) != conn0.connectionType(idx_map, idy_map): - # The connectivity changed for an unknown reason. - if not (is_ring_broken or is_ring_size_change) and not force: + # A ring changed size and it is not allowed. + if ( + not is_ring_broken + and is_ring_size_change + and not allow_ring_size_change + ): raise _IncompatibleError( - "The merge has changed the molecular connectivity " - "but a ring didn't open/close or change size. " - "If you want to proceed with this mapping pass " - "'force=True'. You are warned that the resulting " - "perturbation will likely be unstable." + "The merge has changed the size of a ring. To allow this " + "perturbation, set the 'allow_ring_size_change' option " + "to 'True'. Be aware that this perturbation may not work " + "and a transition through an intermediate state may be " + "preferable." ) - # Set the "connectivity" property. If the end state connectivity is the same, - # then we can just set the "connectivity" property. - if conn0 == conn1: - edit_mol.setProperty("connectivity", conn0) - else: - edit_mol.setProperty("connectivity0", conn0) - edit_mol.setProperty("connectivity1", conn1) + # The connectivity has changed. + if c1.connectionType(idx, idy) != conn.connectionType(idx_map, idy_map): + # The connectivity changed for an unknown reason. + if not (is_ring_broken or is_ring_size_change) and not force: + raise _IncompatibleError( + "The merge has changed the molecular connectivity " + "but a ring didn't open/close or change size. " + "If you want to proceed with this mapping pass " + "'force=True'. You are warned that the resulting " + "perturbation will likely be unstable." + ) + + # Set the "connectivity" property. + edit_mol.setProperty("connectivity", conn) # Create the CLJNBPairs matrices. ff = molecule0.property(ff0) - # Initialise the intrascale matrices for both end states. - clj_nb_pairs0 = _SireMM.CLJNBPairs(edit_mol.info(), _SireMM.CLJScaleFactor(0, 0)) - clj_nb_pairs1 = _SireMM.CLJNBPairs(edit_mol.info(), _SireMM.CLJScaleFactor(0, 0)) + scale_factor_14 = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0 = _SireMM.CLJNBPairs(conn0, scale_factor_14) + clj_nb_pairs1 = _SireMM.CLJNBPairs(conn1, scale_factor_14) # Loop over all atoms unique to molecule0. for idx0 in atoms0_idx: + # Map the index to its position in the merged molecule. + idx0 = mol0_merged_mapping[idx0] + # Loop over all atoms unique to molecule1. for idx1 in atoms1_idx: # Map the index to its position in the merged molecule. - idx1 = inv_mapping[idx1] + idx1 = mol1_merged_mapping[idx1] - # Work out the connection type between the atoms in both end states. + # Work out the connection type between the atoms, in molecule 0. conn_type0 = conn0.connectionType(idx0, idx1) - conn_type1 = conn1.connectionType(idx0, idx1) - - # Lambda = 0 # The atoms aren't bonded. if conn_type0 == 0: @@ -1119,7 +1199,13 @@ def merge( ) clj_nb_pairs0.set(idx0, idx1, clj_scale_factor) - # Lambda = 1 + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx0, idx1, clj_scale_factor) + + # Work out the connection type between the atoms, in molecule 1. + conn_type1 = conn1.connectionType(idx0, idx1) # The atoms aren't bonded. if conn_type1 == 0: @@ -1133,77 +1219,155 @@ def merge( ) clj_nb_pairs1.set(idx0, idx1, clj_scale_factor) - # Get the user defined "intrascale" property names. - prop0 = inv_property_map0.get("intrascale", "intrascale") - prop1 = inv_property_map1.get("intrascale", "intrascale") - - # Get the "intrascale" property from the two molecules. - intrascale0 = molecule0.property(prop0) - intrascale1 = molecule1.property(prop1) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs1.set(idx0, idx1, clj_scale_factor) # Copy the intrascale from molecule1 into clj_nb_pairs0. # Perform a triangular loop over atoms from molecule1. - for x in range(0, molecule1.nAtoms()): - # Convert to an AtomIdx. - idx = _SireMol.AtomIdx(x) - - # Map the index to its position in the merged molecule. - idx = inv_mapping[idx] - - for y in range(x + 1, molecule1.nAtoms()): + if roi is None: + iterlen = molecule1.nAtoms() + iterrange = list(range(molecule1.nAtoms())) + # When region of interest is defined, perfrom loop from these indices + else: + for roi_res in roi: + # Get atom indices of ROI residue in molecule1 + molecule1_roi = molecule1.residues()[roi_res] + molecule1_roi_idx = [a.index() for a in molecule1_roi] + iterlen = len(molecule1_roi_idx) + iterrange = molecule1_roi_idx + + for x in range(0, iterlen): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(y) + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) # Map the index to its position in the merged molecule. - idy = inv_mapping[idy] - - # Get the intrascale value. - intra = intrascale1.get(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y)) + idx_map = mol1_merged_mapping[idx] + + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) + + # Map the index to its position in the merged molecule. + idy_map = mol1_merged_mapping[idy] + + conn_type = conn0.connectionType(idx_map, idy_map) + # The atoms aren't bonded. + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) - # Only set if there is a non-zero value. - # Set using the re-mapped atom indices. - if not intra.coulomb() == 0: - clj_nb_pairs0.set(idx, idy, intra) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) # Now copy in all intrascale values from molecule0 into both # clj_nb_pairs matrices. - - # Perform a triangular loop over atoms from molecule0. - for x in range(0, molecule0.nAtoms()): - for y in range(x + 1, molecule0.nAtoms()): - # Get the intrascale value. - intra = intrascale0.get(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y)) - - # Set the value in the new matrix, overwriting existing value. - clj_nb_pairs0.set(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y), intra) - - # Only set if there is a non-zero value. - if not intra.coulomb() == 0: - clj_nb_pairs1.set(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y), intra) + if roi is None: + iterlen = molecule0.nAtoms() + iterrange = list(range(molecule0.nAtoms())) + # When region of interest is defined, perfrom loop from these indices + else: + for roi_res in roi: + # Get atom indices of ROI residue in molecule0 + molecule0_roi = molecule0.residues()[roi_res] + molecule0_roi_idx = [a.index() for a in molecule0_roi] + iterlen = len(molecule0_roi_idx) + iterrange = molecule0_roi_idx + + # Perform a triangular loop over atoms from molecule0. + for x in range(0, iterlen): + # Convert to an AtomIdx. + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) + + # Map the index to its position in the merged molecule. + idx_map = mol0_merged_mapping[idx] + + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) + + # Map the index to its position in the merged molecule. + idy_map = mol0_merged_mapping[idy] + + conn_type = conn0.connectionType(idx_map, idy_map) + # The atoms aren't bonded. + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) # Finally, copy the intrascale from molecule1 into clj_nb_pairs1. + if roi is None: + iterlen = molecule1.nAtoms() + iterrange = list(range(molecule1.nAtoms())) - # Perform a triangular loop over atoms from molecule1. - for x in range(0, molecule1.nAtoms()): - # Convert to an AtomIdx. - idx = _SireMol.AtomIdx(x) - - # Map the index to its position in the merged molecule. - idx = inv_mapping[idx] - - for y in range(x + 1, molecule1.nAtoms()): - # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(y) - - # Map the index to its position in the merged molecule. - idy = inv_mapping[idy] - - # Get the intrascale value. - intra = intrascale1.get(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y)) - - # Set the value in the new matrix, overwriting existing value. - clj_nb_pairs1.set(idx, idy, intra) + else: + for roi_res in roi: + molecule1_roi = molecule1.residues()[roi_res] + molecule1_roi_idx = [a.index() for a in molecule1_roi] + iterlen = len(molecule1_roi_idx) + iterrange = molecule1_roi_idx + + # Perform a triangular loop over atoms from molecule1. + for x in range(0, iterlen): + # Convert to an AtomIdx. + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) + + # Map the index to its position in the merged molecule. + idx = mol1_merged_mapping[idx] + + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) + + # Map the index to its position in the merged molecule. + idy = mol1_merged_mapping[idy] + + conn_type = conn1.connectionType(idx, idy) + + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) + + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) # Store the two molecular components. edit_mol.setProperty("molecule0", molecule0) @@ -1422,8 +1586,7 @@ def _is_on_ring(idx, conn): def _removeDummies(molecule, is_lambda1): """ - Internal function which removes the dummy atoms from one of the endstates - of a merged molecule. + Internal function which removes the dummy atoms from one of the endstates of a merged molecule. Parameters ---------- diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py index 31a53e956..2fa2c1b90 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py @@ -92,6 +92,9 @@ install_command="pip install MDRestraintsGenerator", ) if _have_imported(_MDRestraintsGenerator): + from Bio import BiopythonDeprecationWarning + + _warnings.simplefilter("ignore", BiopythonDeprecationWarning) from MDRestraintsGenerator import search as _search from MDRestraintsGenerator.restraints import ( FindBoreschRestraint as _FindBoreschRestraint, diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_amber.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_amber.py index 64c7a173f..8b557d51f 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_amber.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_amber.py @@ -145,6 +145,14 @@ def __init__( property_map, ) + if ( + not isinstance(protocol, _Protocol._FreeEnergyMixin) + and system.nPerturbableMolecules() + ): + raise _IncompatibleError( + "Perturbable system is not compatible with none free energy protocol." + ) + # Set the package name. self._package_name = "AMBER" @@ -2936,7 +2944,7 @@ def _init_stdout_dict(self): for file in _Path(self.workDir()).glob("*.out.offset"): os.remove(file) - def saveMetric( + def _saveMetric( self, filename="metric.parquet", u_nk="u_nk.parquet", dHdl="dHdl.parquet" ): """ @@ -2945,41 +2953,54 @@ def saveMetric( is Free Energy protocol, the dHdl and the u_nk data will be saved in the same parquet format as well. """ - - _assert_imported(_alchemlyb) - - self._init_stdout_dict() - datadict = dict() - if isinstance(self._protocol, _Protocol.Minimisation): - datadict_keys = [ - ("Time (ps)", None, "getStep"), - ( - "PotentialEnergy (kJ/mol)", - _Units.Energy.kj_per_mol, - "getTotalEnergy", - ), - ] - else: - datadict_keys = [ - ("Time (ps)", _Units.Time.picosecond, "getTime"), - ( - "PotentialEnergy (kJ/mol)", - _Units.Energy.kj_per_mol, - "getPotentialEnergy", - ), - ("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"), - ("Pressure (bar)", _Units.Pressure.bar, "getPressure"), - ("Temperature (kelvin)", _Units.Temperature.kelvin, "getTemperature"), - ] - # # Disable this now - df = self._convert_datadict_keys(datadict_keys) - df.to_parquet(path=f"{self.workDir()}/{filename}", index=True) - if isinstance(self._protocol, _Protocol.FreeEnergy): - energy = _extract( - f"{self.workDir()}/{self._name}.out", - T=self._protocol.getTemperature() / _Units.Temperature.kelvin, - ) - if "u_nk" in energy and energy["u_nk"] is not None: - energy["u_nk"].to_parquet(path=f"{self.workDir()}/{u_nk}", index=True) - if "dHdl" in energy and energy["dHdl"] is not None: - energy["dHdl"].to_parquet(path=f"{self.workDir()}/{dHdl}", index=True) + if filename is not None: + self._init_stdout_dict() + if isinstance(self._protocol, _Protocol.Minimisation): + datadict_keys = [ + ("Time (ps)", None, "getStep"), + ( + "PotentialEnergy (kJ/mol)", + _Units.Energy.kj_per_mol, + "getTotalEnergy", + ), + ] + else: + datadict_keys = [ + ("Time (ps)", _Units.Time.picosecond, "getTime"), + ( + "PotentialEnergy (kJ/mol)", + _Units.Energy.kj_per_mol, + "getPotentialEnergy", + ), + ("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"), + ("Pressure (bar)", _Units.Pressure.bar, "getPressure"), + ( + "Temperature (kelvin)", + _Units.Temperature.kelvin, + "getTemperature", + ), + ] + # # Disable this now + df = self._convert_datadict_keys(datadict_keys) + df.to_parquet(path=f"{self.workDir()}/{filename}", index=True) + if u_nk is not None or dHdl is not None: + _assert_imported(_alchemlyb) + + if isinstance(self._protocol, _Protocol.FreeEnergy): + energy = _extract( + f"{self.workDir()}/{self._name}.out", + T=self._protocol.getTemperature() / _Units.Temperature.kelvin, + ) + with _warnings.catch_warnings(): + _warnings.filterwarnings( + "ignore", + message="The DataFrame has column names of mixed type.", + ) + if "u_nk" in energy and energy["u_nk"] is not None: + energy["u_nk"].to_parquet( + path=f"{self.workDir()}/{u_nk}", index=True + ) + if "dHdl" in energy and energy["dHdl"] is not None: + energy["dHdl"].to_parquet( + path=f"{self.workDir()}/{dHdl}", index=True + ) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py index 788450c84..b6e02992f 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py @@ -54,7 +54,8 @@ _alchemlyb = _try_import("alchemlyb") if _have_imported(_alchemlyb): - from alchemlyb.parsing.gmx import extract as _extract + from alchemlyb.parsing.gmx import extract_u_nk as _extract_u_nk + from alchemlyb.parsing.gmx import extract_dHdl as _extract_dHdl from .. import _gmx_exe from .. import _isVerbose @@ -400,18 +401,19 @@ def _write_system(self, system, coord_file=None, topol_file=None, ref_file=None) property_map=self._property_map, ) - # Write the restraint to the topology file - if self._restraint: - with open(topol_file, "a") as f: - f.write("\n") - f.write( - self._restraint.toString( - engine="GROMACS", - perturbation_type=self._protocol.getPerturbationType(), - restraint_lambda="restraint" - in self._protocol.getLambda(type="series"), - ) + def _apply_ABFE_restraint(self): + # Write the restraint to the topology file + if self._restraint: + with open(self._top_file, "a") as f: + f.write("\n") + f.write( + self._restraint.toString( + engine="GROMACS", + perturbation_type=self._protocol.getPerturbationType(), + restraint_lambda="restraint" + in self._protocol.getLambda(type="series"), ) + ) def _generate_config(self): """Generate GROMACS configuration file strings.""" @@ -488,6 +490,7 @@ def _generate_config(self): if isinstance(self._protocol, _Protocol._FreeEnergyMixin) else None ) + self._apply_ABFE_restraint() self.addToConfig( config.generateGromacsConfig( extra_options={**config_options, **self._extra_options}, @@ -841,7 +844,6 @@ def getSystem(self, block="AUTO"): system : :class:`System ` The latest molecular system. """ - # Wait for the process to finish. if block is True: self.wait() @@ -856,6 +858,10 @@ def getSystem(self, block="AUTO"): if block is True and not self.isError(): return self._getFinalFrame() else: + raise ValueError( + "getSystem should not use `trjconv` to get the frame in production settings." + "Trigger an exception here to show the traceback when that happens." + ) # Minimisation trajectories have a single frame, i.e. the final state. if isinstance(self._protocol, _Protocol.Minimisation): time = 0 * _Units.Time.nanosecond @@ -2396,22 +2402,27 @@ def _parse_energy_terms(text): U-B, Proper-Dih.. Note that this order is absolute and will not be changed by the input to `gmx energy`. """ - sections = text.split("---") + # Get rid of all the nasty message from argo + content = text.split("End your selection with an empty line or a zero.")[1] + sections = content.split("---") # Remove the empty sections - sections = [section for section in sections if section] + sections = [section for section in sections if section.strip()] # Concatenate the lines - section = sections[1].replace("\n", "") + section = sections[0].replace("\n", "") terms = section.split() # Remove the possible '-' from the separation line terms = [term for term in terms if term != "-"] # Check if the index order is correct - indexes = [int(term) for term in terms[::2]] + try: + indexes = [int(term) for term in terms[::2]] + except ValueError: + raise ValueError("Cannot parse the terms: {}".format("\n".join(terms))) energy_names = terms[1::2] length_nomatch = len(indexes) != len(energy_names) # -1 as the index is 1-based. index_nomatch = (_np.arange(len(indexes)) != _np.array(indexes) - 1).any() if length_nomatch or index_nomatch: - raise ValueError(f"Cannot parse the energy terms in the {edr_file} file.") + raise ValueError(f"Cannot parse the energy terms in the {text} file.") else: return energy_names @@ -2858,7 +2869,7 @@ def _find_trajectory_file(self): else: return self._traj_file - def saveMetric( + def _saveMetric( self, filename="metric.parquet", u_nk="u_nk.parquet", dHdl="dHdl.parquet" ): """ @@ -2867,41 +2878,56 @@ def saveMetric( is Free Energy protocol, the dHdl and the u_nk data will be saved in the same parquet format as well. """ - - _assert_imported(_alchemlyb) - - self._update_energy_dict(initialise=True) - datadict_keys = [ - ("Time (ps)", _Units.Time.picosecond, "getTime"), - ( - "PotentialEnergy (kJ/mol)", - _Units.Energy.kj_per_mol, - "getPotentialEnergy", - ), - ] - if not isinstance(self._protocol, _Protocol.Minimisation): - datadict_keys.extend( - [ - ("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"), - ("Pressure (bar)", _Units.Pressure.bar, "getPressure"), - ( - "Temperature (kelvin)", - _Units.Temperature.kelvin, - "getTemperature", - ), - ] - ) - df = self._convert_datadict_keys(datadict_keys) - df.to_parquet(path=f"{self.workDir()}/{filename}", index=True) - if isinstance(self._protocol, _Protocol.FreeEnergy): - energy = _extract( - f"{self.workDir()}/{self._name}.xvg", - T=self._protocol.getTemperature() / _Units.Temperature.kelvin, - ) - if "u_nk" in energy: - energy["u_nk"].to_parquet(path=f"{self.workDir()}/{u_nk}", index=True) - if "dHdl" in energy: - energy["dHdl"].to_parquet(path=f"{self.workDir()}/{dHdl}", index=True) + if filename is not None: + self._update_energy_dict(initialise=True) + datadict_keys = [ + ("Time (ps)", _Units.Time.picosecond, "getTime"), + ( + "PotentialEnergy (kJ/mol)", + _Units.Energy.kj_per_mol, + "getPotentialEnergy", + ), + ] + if not isinstance(self._protocol, _Protocol.Minimisation): + datadict_keys.extend( + [ + ("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"), + ("Pressure (bar)", _Units.Pressure.bar, "getPressure"), + ( + "Temperature (kelvin)", + _Units.Temperature.kelvin, + "getTemperature", + ), + ] + ) + df = self._convert_datadict_keys(datadict_keys) + df.to_parquet(path=f"{self.workDir()}/{filename}", index=True) + if u_nk is not None: + _assert_imported(_alchemlyb) + if isinstance(self._protocol, _Protocol.FreeEnergy): + energy = _extract_u_nk( + f"{self.workDir()}/{self._name}.xvg", + T=self._protocol.getTemperature() / _Units.Temperature.kelvin, + ) + with _warnings.catch_warnings(): + _warnings.filterwarnings( + "ignore", + message="The DataFrame has column names of mixed type.", + ) + energy.to_parquet(path=f"{self.workDir()}/{u_nk}", index=True) + if dHdl is not None: + _assert_imported(_alchemlyb) + if isinstance(self._protocol, _Protocol.FreeEnergy): + energy = _extract_dHdl( + f"{self.workDir()}/{self._name}.xvg", + T=self._protocol.getTemperature() / _Units.Temperature.kelvin, + ) + with _warnings.catch_warnings(): + _warnings.filterwarnings( + "ignore", + message="The DataFrame has column names of mixed type.", + ) + energy.to_parquet(path=f"{self.workDir()}/{dHdl}", index=True) def _is_minimisation(config): diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_process.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_process.py index 1228a11ed..45579e0c2 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_process.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_process.py @@ -911,16 +911,6 @@ def wait(self, max_time=None): # The process isn't running. if not self.isRunning(): - try: - self.saveMetric() - except Exception: - exception_info = traceback.format_exc() - with open(f"{self.workDir()}/{self._name}.err", "a+") as f: - f.write("Exception Information during saveMetric():\n") - f.write("======================\n") - f.write(exception_info) - f.write("\n\n") - return if max_time is not None: @@ -1650,12 +1640,25 @@ def _generate_args(self): """Generate the dictionary of command-line arguments.""" self.clearArgs() + def _saveMetric(self, filename, u_nk, dHdl): + """The abstract function to save the metric and free energy data. Need to be + defined for each MD engine.""" + pass + def saveMetric( self, filename="metric.parquet", u_nk="u_nk.parquet", dHdl="dHdl.parquet" ): """The abstract function to save the metric and free energy data. Need to be defined for each MD engine.""" - pass + try: + self._saveMetric(filename, u_nk, dHdl) + except Exception: + exception_info = traceback.format_exc() + with open(f"{self.workDir()}/{self._name}.err", "a+") as f: + f.write("Exception Information during saveMetric():\n") + f.write("======================\n") + f.write(exception_info) + f.write("\n\n") def _convert_datadict_keys(self, datadict_keys): """This function is a helper function for saveMetric that converts a diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py index 99446e14b..3daebf40d 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py @@ -247,10 +247,24 @@ def generateAmberConfig(self, extra_options=None, extra_lines=None): # Restrain the backbone. restraint = self.protocol.getRestraint() + # Convert to a squashed representation, if needed + if isinstance(self.protocol, _Protocol._FreeEnergyMixin): + atom_mapping0 = _squashed_atom_mapping(self.system, is_lambda1=False) + atom_mapping1 = _squashed_atom_mapping(self.system, is_lambda1=True) + if self.system.getAlchemicalIon(): alchem_ion_idx = self.system.getAlchemicalIonIdx() protein_com_idx = _get_protein_com_idx(self.system) - alchemical_ion_mask = f"@{alchem_ion_idx} | @{protein_com_idx}" + atom_idxs = [alchem_ion_idx, protein_com_idx] + if isinstance(self.protocol, _Protocol._FreeEnergyMixin): + atom_idxs = sorted( + {atom_mapping0[x] for x in atom_idxs if x in atom_mapping0} + | {atom_mapping1[x] for x in atom_idxs if x in atom_mapping1} + ) + # Convert to 1-based index + alchemical_ion_mask = "@" + ",".join( + [str(atom_idx + 1) for atom_idx in atom_idxs] + ) else: alchemical_ion_mask = None @@ -263,10 +277,6 @@ def generateAmberConfig(self, extra_options=None, extra_lines=None): # Convert to a squashed representation, if needed if isinstance(self.protocol, _Protocol._FreeEnergyMixin): - atom_mapping0 = _squashed_atom_mapping( - self.system, is_lambda1=False - ) - atom_mapping1 = _squashed_atom_mapping(self.system, is_lambda1=True) atom_idxs = sorted( {atom_mapping0[x] for x in atom_idxs if x in atom_mapping0} | {atom_mapping1[x] for x in atom_idxs if x in atom_mapping1} diff --git a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py index 64426775e..f5c578ece 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py +++ b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py @@ -1359,7 +1359,11 @@ def getAlchemicalIon(self): The Alchemical Ion or None if there isn't any. """ try: - return self.search("mols with property AlchemicalIon").molecules()[0] + return ( + self.search("mols with property AlchemicalIon") + .molecules()[0] + .getAtoms()[0] + ) except: return None diff --git a/python/setup.py b/python/setup.py index 453781143..f5eb8b8e4 100644 --- a/python/setup.py +++ b/python/setup.py @@ -110,7 +110,7 @@ def clear_installed_list(): conda_deps = [ "alchemlyb", # known not available on aarch64 "configargparse", - "ipywidgets<8", + "ipywidgets", "kcombu_bss", "lomap2", "mdtraj", # known not available on aarch64 diff --git a/requirements.txt b/requirements.txt index 0bc33ac3a..54bef5a18 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ # BioSimSpace runtime requirements. # main -sire~=2024.1.0 +sire~=2024.2.0 # devel -#sire==2024.1.0.dev +#sire==2024.3.0.dev configargparse ipywidgets diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index ab022a8e4..341e4bf4f 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -3,6 +3,7 @@ from sire.legacy.MM import InternalFF, IntraCLJFF, IntraFF from sire.legacy.Mol import AtomIdx, Element, PartialMolecule +from tests.conftest import has_amber import BioSimSpace as BSS @@ -470,3 +471,205 @@ def test_hydrogen_mass_repartitioning(): assert mass0 == masses1[idx] for idx, mass1 in dummy_masses1: assert mass1 == masses0[idx] + + +@pytest.fixture( + params=[ + ( + "single_mutant", + { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 11: 11, + 12: 12, + 13: 13, + 14: 14, + 15: 15, + 16: 16, + 17: 17, + 18: 18, + 19: 19, + 20: 20, + 21: 23, + 22: 21, + 23: 22, + 25: 24, + 26: 25, + 27: 26, + 28: 27, + 29: 28, + 30: 29, + 31: 30, + 32: 31, + 33: 32, + 34: 33, + 35: 34, + 36: 35, + 37: 36, + 38: 37, + 39: 38, + 40: 39, + 41: 40, + 42: 41, + }, + [2], + ), + ( + "double_neighbour_mutant", + { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 11: 11, + 12: 12, + 13: 13, + 14: 14, + 15: 15, + 16: 16, + 17: 17, + 18: 18, + 19: 19, + 20: 20, + 21: 24, + 22: 25, + 23: 26, + 24: 27, + 25: 28, + 26: 29, + 27: 30, + 28: 34, + 29: 35, + 30: 36, + 31: 37, + 32: 38, + 33: 39, + 34: 40, + 35: 41, + 36: 42, + 37: 43, + 38: 44, + 39: 45, + 40: 46, + 41: 47, + 42: 48, + 43: 49, + 44: 50, + 45: 51, + 46: 52, + 47: 53, + 48: 54, + 49: 55, + 50: 56, + 51: 57, + 52: 58, + 53: 59, + 54: 60, + 55: 61, + }, + [2, 3], + ), + ], + ids=["single_mutant", "double_neighbour_mutant"], +) +def protein_inputs(request): + return request.param + + +def test_roi_match(protein_inputs): + proteins, protein_mapping, roi = protein_inputs + p0 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_mut_peptide.pdb") + )[0] + p1 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_wt_peptide.pdb") + )[0] + mapping = BSS.Align.matchAtoms(p0, p1, roi=roi) + assert mapping == protein_mapping + + +def test_roi_align(protein_inputs): + # p0 has been translated by 10 A in each direction. + proteins, protein_mapping, roi = protein_inputs + p0 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_mut_peptide.pdb") + )[0] + p1 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_wt_peptide.pdb") + )[0] + + aligned_p0 = BSS.Align.rmsdAlign(p0, p1, roi=roi) + for res in roi: + # Extract sire objects for the ROI and compare their coordinates + aligned_roi = aligned_p0.extract( + [a.index() for a in aligned_p0.getResidues()[res].getAtoms()] + ) + aligned_roi_coords = aligned_roi._sire_object.coordinates() + + p1_roi = p1.extract([a.index() for a in p1.getResidues()[res].getAtoms()]) + p1_roi_coords = p1_roi._sire_object.coordinates() + + for i, coord in enumerate(aligned_roi_coords): + # assume that the test passes if the coordinates are within 0.5 A + assert coord.value() == pytest.approx(p1_roi_coords[i].value(), abs=0.5) + + +def test_roi_flex_align(protein_inputs): + # p0 has been translated by 10 A in each direction. + proteins, protein_mapping, roi = protein_inputs + p0 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_mut_peptide.pdb") + )[0] + p1 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_wt_peptide.pdb") + )[0] + + aligned_p0 = BSS.Align.flexAlign(p0, p1, roi=roi) + for res in roi: + # Extract sire objects for the ROI and compare their coordinates + aligned_roi = aligned_p0.extract( + [a.index() for a in aligned_p0.getResidues()[res].getAtoms()] + ) + aligned_roi_coords = aligned_roi._sire_object.coordinates() + + p1_roi = p1.extract([a.index() for a in p1.getResidues()[res].getAtoms()]) + p1_roi_coords = p1_roi._sire_object.coordinates() + + for i, coord in enumerate(aligned_roi_coords): + # assume that the test passes if the coordinates are within 0.5 A + assert coord.value() == pytest.approx(p1_roi_coords[i].value(), abs=0.5) + + +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER and to be installed.") +def test_roi_merge(protein_inputs): + proteins, protein_mapping, roi = protein_inputs + p0 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_mut_peptide.pdb") + )[0] + p1 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_wt_peptide.pdb") + )[0] + + p0 = BSS.Parameters.ff14SB(p0).getMolecule() + p1 = BSS.Parameters.ff14SB(p1).getMolecule() + + aligned_p0 = BSS.Align.rmsdAlign(p0, p1, roi=roi) + merged = BSS.Align.merge(aligned_p0, p1, protein_mapping, roi=roi) + merged_system = merged.toSystem() + assert merged_system.nPerturbableMolecules() == 1 diff --git a/tests/Sandpit/Exscientia/Align/test_alchemical_ion.py b/tests/Sandpit/Exscientia/Align/test_alchemical_ion.py index a3320e5da..f06471e6a 100644 --- a/tests/Sandpit/Exscientia/Align/test_alchemical_ion.py +++ b/tests/Sandpit/Exscientia/Align/test_alchemical_ion.py @@ -5,7 +5,7 @@ _get_protein_com_idx, _mark_alchemical_ion, ) -from BioSimSpace.Sandpit.Exscientia._SireWrappers import Molecule +from BioSimSpace.Sandpit.Exscientia._SireWrappers import Atom from tests.conftest import has_gromacs, root_fp @@ -45,13 +45,13 @@ def test_getAlchemicalIon(input_system, isalchem, request): if isalchem is None: assert ion is None else: - assert isinstance(ion, Molecule) + assert isinstance(ion, Atom) @pytest.mark.skipif(has_gromacs is False, reason="Requires GROMACS to be installed.") def test_getAlchemicalIonIdx(alchemical_ion_system): index = alchemical_ion_system.getAlchemicalIonIdx() - assert index == 680 + assert index == 2053 @pytest.mark.skipif(has_gromacs is False, reason="Requires GROMACS to be installed.") diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py index 0149a64e3..a82c89be7 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py @@ -1,20 +1,22 @@ -import pytest - import numpy as np - -from sire.legacy.Units import angstrom3 as _Sire_angstrom3 -from sire.legacy.Units import k_boltz as _k_boltz -from sire.legacy.Units import meter3 as _Sire_meter3 -from sire.legacy.Units import mole as _Sire_mole +import pytest +from sire.legacy.Units import ( + angstrom3 as _Sire_angstrom3, + k_boltz as _k_boltz, + meter3 as _Sire_meter3, + mole as _Sire_mole, +) import BioSimSpace.Sandpit.Exscientia as BSS from BioSimSpace.Sandpit.Exscientia.Align import decouple from BioSimSpace.Sandpit.Exscientia.FreeEnergy import Restraint -from BioSimSpace.Sandpit.Exscientia.Units.Length import angstrom -from BioSimSpace.Sandpit.Exscientia.Units.Angle import radian, degree +from BioSimSpace.Sandpit.Exscientia.Units.Angle import degree, radian from BioSimSpace.Sandpit.Exscientia.Units.Energy import kcal_per_mol +from BioSimSpace.Sandpit.Exscientia.Units.Length import angstrom from BioSimSpace.Sandpit.Exscientia.Units.Temperature import kelvin +from tests.conftest import has_gromacs + # Store the tutorial URL. url = BSS.tutorialUrl() @@ -75,6 +77,29 @@ def boresch_restraint_component(): return system, restraint_dict +@pytest.mark.skipif(has_gromacs is False, reason="Requires GROMACS to be installed.") +@pytest.mark.parametrize( + ("protocol", "posres"), + [ + (BSS.Protocol.FreeEnergy, "heavy"), + (BSS.Protocol.FreeEnergy, None), + ], +) +def test_top_write( + boresch_restraint_component, protocol, posres, tmp_path, boresch_restraint +): + system, restraint_dict = boresch_restraint_component + bss_protocol = protocol(restraint=posres) + BSS.Process.Gromacs( + system, + bss_protocol, + work_dir=str(tmp_path), + restraint=boresch_restraint, + ) + with open(str(tmp_path / "gromacs.top"), "r") as f: + assert "intermolecular_interactions" in f.read() + + @pytest.fixture(scope="session") def boresch_restraint(boresch_restraint_component): """Generate the Boresch restraint object.""" diff --git a/tests/Sandpit/Exscientia/Process/test_amber.py b/tests/Sandpit/Exscientia/Process/test_amber.py index 1a6daded5..02ad1239a 100644 --- a/tests/Sandpit/Exscientia/Process/test_amber.py +++ b/tests/Sandpit/Exscientia/Process/test_amber.py @@ -7,7 +7,12 @@ import pytest import BioSimSpace.Sandpit.Exscientia as BSS -from tests.Sandpit.Exscientia.conftest import has_amber, has_pyarrow +from BioSimSpace.Sandpit.Exscientia._Exceptions import IncompatibleError +from tests.Sandpit.Exscientia.conftest import ( + has_amber, + has_pyarrow, + url, +) from tests.conftest import root_fp @@ -373,6 +378,10 @@ def test_parse_fep_output(system, protocol): assert len(records_sc1) != 0 +@pytest.mark.skipif( + has_amber is False or has_pyarrow is False, + reason="Requires AMBER and pyarrow to be installed.", +) class TestsaveMetric: @staticmethod @pytest.fixture() @@ -401,13 +410,29 @@ def setup(alchemical_system): process.saveMetric() return process + def test_incompatible_error(self): + alchemical_system = BSS.IO.readPerturbableSystem( + f"{url}/complex_vac0.prm7.bz2", + f"{url}/complex_vac0.rst7.bz2", + f"{url}/complex_vac1.prm7.bz2", + f"{url}/complex_vac1.rst7.bz2", + ) + with pytest.raises( + IncompatibleError, + match="Perturbable system is not compatible with none free energy protocol.", + ): + BSS.Process.Amber( + alchemical_system, + BSS.Protocol.Minimisation(), + ) + def test_error_alchemlyb_extract(self, alchemical_system): # Create a process using any system and the protocol. process = BSS.Process.Amber( alchemical_system, BSS.Protocol.FreeEnergy(temperature=298 * BSS.Units.Temperature.kelvin), ) - process.wait() + process.saveMetric() with open(process.workDir() + "/amber.err", "r") as f: text = f.read() assert "Exception Information" in text @@ -433,3 +458,37 @@ def test_no_output(self, system): process = BSS.Process.Amber(system, BSS.Protocol.Production()) with pytest.warns(match="Simulation didn't produce any output."): process.saveMetric() + + @staticmethod + @pytest.mark.parametrize( + ("filename", "u_nk", "dHdl"), + [ + ("metric.parquet", None, None), + (None, "u_nk.parquet", None), + (None, None, "dHdl.parquet"), + ], + ) + def test_selective(alchemical_system, filename, u_nk, dHdl): + # Create a process using any system and the protocol. + process = BSS.Process.Amber( + alchemical_system, + BSS.Protocol.FreeEnergy(temperature=298 * BSS.Units.Temperature.kelvin), + ) + shutil.copyfile( + f"{root_fp}/Sandpit/Exscientia/output/amber_fep.out", + process.workDir() + "/amber.out", + ) + process.saveMetric(filename, u_nk, dHdl) + if filename is not None: + assert (Path(process.workDir()) / filename).exists() + else: + assert not (Path(process.workDir()) / "metric.parquet").exists() + if u_nk is not None: + assert (Path(process.workDir()) / u_nk).exists() + else: + assert not (Path(process.workDir()) / "u_nk.parquet").exists() + if dHdl is not None: + # The file doesn't contain dHdl information + pass + else: + assert not (Path(process.workDir()) / "dHdl.parquet").exists() diff --git a/tests/Sandpit/Exscientia/Process/test_gromacs.py b/tests/Sandpit/Exscientia/Process/test_gromacs.py index 9a1886e97..70250a847 100644 --- a/tests/Sandpit/Exscientia/Process/test_gromacs.py +++ b/tests/Sandpit/Exscientia/Process/test_gromacs.py @@ -226,7 +226,27 @@ def test_regular_protocol(self, setup, tmp_path_factory): # Run the process and check that it finishes without error. run_process(system, protocol, restraint=restraint, work_dir=str(tmp_path)) with open(tmp_path / "test.top", "r") as f: - assert "intermolecular_interactions" in f.read() + text = f.read() + assert text.count("intermolecular_interactions") == 1 + + def test_position_restraint_protocol(self, setup, tmp_path_factory): + """Test if the restraint has been written in a way that could be processed + correctly. + """ + tmp_path = tmp_path_factory.mktemp("out") + system, restraint = setup + # Create a short production protocol. + protocol = BSS.Protocol.FreeEnergy( + runtime=BSS.Types.Time(0.0001, "nanoseconds"), + perturbation_type="full", + restraint="heavy", + ) + + # Run the process and check that it finishes without error. + run_process(system, protocol, restraint=restraint, work_dir=str(tmp_path)) + with open(tmp_path / "test.top", "r") as f: + text = f.read() + assert text.count("intermolecular_interactions") == 1 def test_restraint_lambda(self, setup, tmp_path_factory): """Test if the restraint has been written correctly when restraint lambda is evoked.""" @@ -383,11 +403,64 @@ def extract(*args): perturbable_system, BSS.Protocol.FreeEnergy(temperature=298 * BSS.Units.Temperature.kelvin), ) - process.wait() + process.saveMetric() with open(process.workDir() + "/gromacs.err", "r") as f: text = f.read() assert "Exception Information" in text + @staticmethod + @pytest.mark.parametrize( + ("filename", "u_nk", "dHdl"), + [ + ("metric.parquet", None, None), + (None, "u_nk.parquet", None), + (None, None, "dHdl.parquet"), + ], + ) + def test_selective(perturbable_system, filename, u_nk, dHdl): + from alchemtest.gmx import load_ABFE + + protocol = BSS.Protocol.FreeEnergy( + runtime=BSS.Types.Time(60, "picosecond"), + timestep=BSS.Types.Time(4, "femtosecond"), + report_interval=200, + ) + process = BSS.Process.Gromacs(perturbable_system, protocol) + shutil.copyfile( + f"{root_fp}/Sandpit/Exscientia/output/gromacs.edr", + process.workDir() + "/gromacs.edr", + ) + shutil.copyfile( + load_ABFE().data["ligand"][0], + process.workDir() + "/gromacs.xvg", + ) + + process.saveMetric(filename, u_nk, dHdl) + if filename is not None: + assert (Path(process.workDir()) / filename).exists() + else: + assert not (Path(process.workDir()) / "metric.parquet").exists() + if u_nk is not None: + assert (Path(process.workDir()) / u_nk).exists() + else: + assert not (Path(process.workDir()) / "u_nk.parquet").exists() + if dHdl is not None: + assert (Path(process.workDir()) / dHdl).exists() + else: + assert not (Path(process.workDir()) / "dHdl.parquet").exists() + + +@pytest.mark.skipif( + has_gromacs is False or has_pyarrow is False, + reason="Requires GROMACS and pyarrow to be installed.", +) +def test_error_saveMetric(perturbable_system): + protocol = BSS.Protocol.FreeEnergy() + process = BSS.Process.Gromacs(perturbable_system, protocol) + process.saveMetric() + with open(f"{process.workDir()}/{process._name}.err", "r") as f: + assert "Exception Information during saveMetric():" in f.read() + @pytest.mark.skipif( has_amber is False diff --git a/tests/Sandpit/Exscientia/Process/test_position_restraint.py b/tests/Sandpit/Exscientia/Process/test_position_restraint.py index b82ec3c1f..542b71c54 100644 --- a/tests/Sandpit/Exscientia/Process/test_position_restraint.py +++ b/tests/Sandpit/Exscientia/Process/test_position_restraint.py @@ -36,12 +36,20 @@ def alchemical_ion_system(): ) ion = solvated.getMolecule(-1) pert_ion = BSS.Align.merge(ion, ion, mapping={0: 0}) + for lambda_ in [0, 1]: + atomtype = pert_ion._sire_object.property(f"atomtype{lambda_}") + pert_ion._sire_object = ( + pert_ion._sire_object.edit() + .setProperty(f"ambertype{lambda_}", atomtype) + .molecule() + ) pert_ion._sire_object = ( pert_ion.getAtoms()[0] ._sire_object.edit() .setProperty("charge1", 0 * SireUnits.mod_electron) .molecule() ) + alchemcial_ion = _mark_alchemical_ion(pert_ion) solvated.updateMolecule(solvated.getIndex(ion), alchemcial_ion) return solvated @@ -164,6 +172,8 @@ def test_gromacs(protocol, system, ref_system, tmp_path): reason="Requires AMBER and openff to be installed", ) def test_amber(protocol, system, ref_system, tmp_path): + if not isinstance(protocol, BSS.Protocol._FreeEnergyMixin): + pytest.skip("AMBER position restraint only works for free energy protocol") proc = BSS.Process.Amber( system, protocol, reference_system=ref_system, work_dir=str(tmp_path) ) @@ -240,19 +250,27 @@ def test_gromacs_alchemical_ion( reason="Requires AMBER, GROMACS and OpenFF to be installed", ) @pytest.mark.parametrize( - ("restraint", "target"), + ("restraint", "protocol", "target"), [ - ("backbone", "@5-7,9,15-17 | @2148 | @8"), - ("heavy", "@2,5-7,9,11,15-17,19 | @2148 | @8"), - ("all", "@1-22 | @2148 | @8"), - ("none", "@2148 | @8"), + ( + "backbone", + BSS.Protocol.FreeEnergyEquilibration, + "@5-7,9,15-17 | @9,6442,6443", + ), + ( + "heavy", + BSS.Protocol.FreeEnergyEquilibration, + "@2,5-7,9,11,15-17,19 | @9,6442,6443", + ), + ("all", BSS.Protocol.FreeEnergyEquilibration, "@1-22 | @9,6442,6443"), + ("none", BSS.Protocol.FreeEnergyEquilibration, "@9,6442,6443"), ], ) def test_amber_alchemical_ion( - alchemical_ion_system, restraint, target, alchemical_ion_system_psores + alchemical_ion_system, restraint, protocol, target, alchemical_ion_system_psores ): # Create an equilibration protocol with backbone restraints. - protocol = BSS.Protocol.Equilibration(restraint=restraint) + protocol = protocol(restraint=restraint) # Create the process object. process = BSS.Process.Amber( diff --git a/tests/Sandpit/Exscientia/Process/test_restart.py b/tests/Sandpit/Exscientia/Process/test_restart.py index 64c00f58d..c4873f7ce 100644 --- a/tests/Sandpit/Exscientia/Process/test_restart.py +++ b/tests/Sandpit/Exscientia/Process/test_restart.py @@ -116,6 +116,8 @@ def test_gromacs(protocol, system, tmp_path): reason="Requires AMBER and OpenFF to be installed.", ) def test_amber(protocol, system, tmp_path): + if not isinstance(protocol, BSS.Protocol._FreeEnergyMixin): + pytest.skip("AMBER position restraint only works for free energy protocol") BSS.Process.Amber(system, protocol, work_dir=str(tmp_path)) with open(tmp_path / "amber.cfg", "r") as f: cfg = f.read()