From 8896e633d9b0fbe96920bce386240f4f9e34893e Mon Sep 17 00:00:00 2001 From: Benjamin Ries Date: Thu, 4 Jul 2024 17:36:37 +0200 Subject: [PATCH 01/14] [WIP] Using the robust solver for pyMBAR - avoiding convergence Failures. ## Description In this PR, we would like to propose switching the default solver, if pyMBAR > 4.0.0, such we have an improved convergence rate at the cost of minimal more time. -> less errors thrown. ## Todos - [ ] Implement feature / fix bug - [ ] Add [tests](https://github.com/choderalab/openmmtools/tree/master/openmmtools/tests) - [ ] Update [documentation](https://github.com/choderalab/openmmtools/tree/master/docs) as needed - [ ] Update [changelog](https://github.com/choderalab/openmmtools/blob/master/docs/releasehistory.rst) to summarize changes in behavior, enhancements, and bugfixes implemented in this PR ## Status - [ ] Ready to go ## Changelog message ``` ``` --- openmmtools/multistate/multistateanalyzer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/openmmtools/multistate/multistateanalyzer.py b/openmmtools/multistate/multistateanalyzer.py index ae7fd793..cf30bc24 100644 --- a/openmmtools/multistate/multistateanalyzer.py +++ b/openmmtools/multistate/multistateanalyzer.py @@ -38,6 +38,7 @@ from scipy.special import logsumexp from openmmtools import multistate, utils, forces +from openmmtools.multistate import pymbar from openmmtools.multistate.pymbar import ( statistical_inefficiency_multiple, subsample_correlated_data, @@ -567,6 +568,10 @@ def __init__(self, reporter, name=None, reference_states=(0, -1), self.reference_states = reference_states self._user_extra_analysis_kwargs = analysis_kwargs # Store the user-specified (higher priority) keywords + # Use the robust approach as default, avoiding Convergence problems. + if pymbar.version.short_version >= "4.0.0" and "solver_protocol" not in self._user_extra_analysis_kwargs: + self._user_extra_analysis_kwargs["solver_protocol"] = "robust" + # Initialize cached values that are read or derived from the Reporter. self._cache = {} # This cache should be always set with _update_cache(). self.clear() From d72fe8ef3a9b140d96561497551d92ea09942e01 Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Tue, 13 Aug 2024 08:01:26 -0700 Subject: [PATCH 02/14] bump ci From 060d5c196c6c31ec40fc9067ccac2c60252bb819 Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Wed, 25 Sep 2024 12:13:48 -0700 Subject: [PATCH 03/14] doing this a different way --- openmmtools/multistate/multistateanalyzer.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/openmmtools/multistate/multistateanalyzer.py b/openmmtools/multistate/multistateanalyzer.py index cf30bc24..ae7fd793 100644 --- a/openmmtools/multistate/multistateanalyzer.py +++ b/openmmtools/multistate/multistateanalyzer.py @@ -38,7 +38,6 @@ from scipy.special import logsumexp from openmmtools import multistate, utils, forces -from openmmtools.multistate import pymbar from openmmtools.multistate.pymbar import ( statistical_inefficiency_multiple, subsample_correlated_data, @@ -568,10 +567,6 @@ def __init__(self, reporter, name=None, reference_states=(0, -1), self.reference_states = reference_states self._user_extra_analysis_kwargs = analysis_kwargs # Store the user-specified (higher priority) keywords - # Use the robust approach as default, avoiding Convergence problems. - if pymbar.version.short_version >= "4.0.0" and "solver_protocol" not in self._user_extra_analysis_kwargs: - self._user_extra_analysis_kwargs["solver_protocol"] = "robust" - # Initialize cached values that are read or derived from the Reporter. self._cache = {} # This cache should be always set with _update_cache(). self.clear() From 39b683bb3523f10f86f14eee452774f6c0911b9d Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Wed, 25 Sep 2024 12:31:11 -0700 Subject: [PATCH 04/14] use Version to compare versions --- openmmtools/multistate/multistateanalyzer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/openmmtools/multistate/multistateanalyzer.py b/openmmtools/multistate/multistateanalyzer.py index ae7fd793..49537c02 100644 --- a/openmmtools/multistate/multistateanalyzer.py +++ b/openmmtools/multistate/multistateanalyzer.py @@ -24,6 +24,7 @@ import copy import inspect import logging +from packaging.version import Version import re from typing import Optional, NamedTuple, Union @@ -37,6 +38,7 @@ import simtk.unit as units from scipy.special import logsumexp +from openmmtools.multistate import pymbar from openmmtools import multistate, utils, forces from openmmtools.multistate.pymbar import ( statistical_inefficiency_multiple, @@ -567,6 +569,11 @@ def __init__(self, reporter, name=None, reference_states=(0, -1), self.reference_states = reference_states self._user_extra_analysis_kwargs = analysis_kwargs # Store the user-specified (higher priority) keywords + # If we are using pymbar 4, change the default behavior to use the robust solver protocol if the user + # didn't set a kwarg to control the solver protocol + if Version(pymbar.version.short_version) >= Version("4") and "solver_protocol" not in self._user_extra_analysis_kwargs: + self._user_extra_analysis_kwargs["solver_protocol"] = "robust" + # Initialize cached values that are read or derived from the Reporter. self._cache = {} # This cache should be always set with _update_cache(). self.clear() From 50b058fdd72050d26533972dfa0d676f46b5b355 Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Thu, 26 Sep 2024 08:32:06 -0700 Subject: [PATCH 05/14] fix micromamba, see https://github.com/mamba-org/micromamba-releases/issues/58 --- .github/workflows/CI.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index e5a7d4bc..994745f2 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -59,6 +59,7 @@ jobs: - name: Setup micromamba uses: mamba-org/setup-micromamba@v1 with: + micromamba-version: '2.0.0-0' environment-file: devtools/conda-envs/test_env.yaml environment-name: openmmtools-test create-args: >- From 41ce6328627196333b19c1d29369f22045555246 Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:33:01 -0700 Subject: [PATCH 06/14] fix version check --- openmmtools/multistate/multistateanalyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmmtools/multistate/multistateanalyzer.py b/openmmtools/multistate/multistateanalyzer.py index 49537c02..23ff16a3 100644 --- a/openmmtools/multistate/multistateanalyzer.py +++ b/openmmtools/multistate/multistateanalyzer.py @@ -571,7 +571,7 @@ def __init__(self, reporter, name=None, reference_states=(0, -1), # If we are using pymbar 4, change the default behavior to use the robust solver protocol if the user # didn't set a kwarg to control the solver protocol - if Version(pymbar.version.short_version) >= Version("4") and "solver_protocol" not in self._user_extra_analysis_kwargs: + if Version(pymbar.__version__) >= Version("4") and "solver_protocol" not in self._user_extra_analysis_kwargs: self._user_extra_analysis_kwargs["solver_protocol"] = "robust" # Initialize cached values that are read or derived from the Reporter. From c1ac13e905b4ae41da4cd2462e05e7df9b16e3eb Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:56:20 -0700 Subject: [PATCH 07/14] forgot how we import pymbar in this package --- openmmtools/multistate/pymbar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openmmtools/multistate/pymbar.py b/openmmtools/multistate/pymbar.py index 7df80578..d8e34516 100644 --- a/openmmtools/multistate/pymbar.py +++ b/openmmtools/multistate/pymbar.py @@ -10,7 +10,7 @@ subsample_correlated_data, statistical_inefficiency ) - from pymbar import MBAR + from pymbar import MBAR, __version__ from pymbar.utils import ParameterError except ImportError: # pymbar < 4 @@ -20,7 +20,7 @@ subsampleCorrelatedData as subsample_correlated_data, statisticalInefficiency as statistical_inefficiency ) - from pymbar import MBAR + from pymbar import MBAR, __version__ from pymbar.utils import ParameterError From 4916e4b29e69996a02b68051b42cbedbfd21450b Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:12:23 -0700 Subject: [PATCH 08/14] pymbar 3 stores version differently --- openmmtools/multistate/pymbar.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openmmtools/multistate/pymbar.py b/openmmtools/multistate/pymbar.py index d8e34516..4bd97754 100644 --- a/openmmtools/multistate/pymbar.py +++ b/openmmtools/multistate/pymbar.py @@ -20,8 +20,9 @@ subsampleCorrelatedData as subsample_correlated_data, statisticalInefficiency as statistical_inefficiency ) - from pymbar import MBAR, __version__ + from pymbar import MBAR from pymbar.utils import ParameterError + from pymbar.version import short_version as __version__ def _pymbar_bar( From 7d3dc30e749a08e89151b0e4655ec321dc4dd940 Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:59:32 -0700 Subject: [PATCH 09/14] added test from pymbar issue 419 --- openmmtools/tests/test_sampling.py | 40 +++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/openmmtools/tests/test_sampling.py b/openmmtools/tests/test_sampling.py index f16188bf..0b85a148 100644 --- a/openmmtools/tests/test_sampling.py +++ b/openmmtools/tests/test_sampling.py @@ -22,12 +22,14 @@ import shutil import sys import tempfile +import time from io import StringIO import numpy as np import yaml import pytest +import requests try: import openmm @@ -1861,7 +1863,7 @@ def test_analysis_opens_without_checkpoint(self): del reporter self.REPORTER(storage_path, checkpoint_storage=cp_file_mod, open_mode="r") - @pytest.mark.flaky(reruns=3) + @pytest.mark.skipif(sys.platform == "darwin", reason="seg faults on osx sometimes") def test_storage_reporter_and_string(self): """Test that creating a MultiState by storage string and reporter is the same""" thermodynamic_states, sampler_states, unsampled_states = copy.deepcopy( @@ -2612,6 +2614,42 @@ def test_resume_velocities_from_legacy_storage(self): state.velocities.value_in_unit_system(unit.md_unit_system) != 0 ), "At least some velocity in sampler state from new checkpoint is expected to different from zero." +@pytest.fixture +def download_nc_file(tmpdir): + FILE_URL = "https://github.com/user-attachments/files/17156868/ala-thr.zip" + MAX_RETRIES = 3 + RETRY_DELAY = 2 # Delay between retries (in seconds) + file_name = os.path.join(tmpdir, "ala-thr.nc") + retries = 0 + while retries < MAX_RETRIES: + try: + # Send GET request to download the file + response = requests.get(FILE_URL, timeout=20) # Timeout to avoid hanging + response.raise_for_status() # Raise HTTPError for bad responses (4xx/5xx) + with open(file_name, "wb") as f: + f.write(response.content) + # File downloaded successfully, break out of retry loop + break + + except (requests.exceptions.RequestException, requests.exceptions.HTTPError) as e: + retries += 1 + if retries >= MAX_RETRIES: + pytest.fail(f"Failed to download file after {MAX_RETRIES} retries: {e}") + else: + print(f"Retrying download... ({retries}/{MAX_RETRIES})") + time.sleep(RETRY_DELAY) # Wait before retrying + yield file_name + + +def test_pymbar_issue_419(download_nc_file): + from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer + + n_iterations = 1000 + reporter_file = download_nc_file + reporter = MultiStateReporter(reporter_file) + analyzer = MultiStateSamplerAnalyzer(reporter, max_n_iterations=n_iterations) + f_ij, df_ij = analyzer.get_free_energy() + # ============================================================================== # MAIN AND TESTS From fc0f4adede9fa8c9c9abc3c80b7d1f0353e557e1 Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:32:08 -0700 Subject: [PATCH 10/14] re-run flaky tests --- openmmtools/tests/test_sampling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openmmtools/tests/test_sampling.py b/openmmtools/tests/test_sampling.py index 0b85a148..9d7b3c6b 100644 --- a/openmmtools/tests/test_sampling.py +++ b/openmmtools/tests/test_sampling.py @@ -308,6 +308,7 @@ def run(self, include_unsampled_states=False): # Clean up. del simulation + @pytest.mark.flaky(reruns=3) def test_with_unsampled_states(self): """Test multistate sampler on a harmonic oscillator with unsampled endstates""" self.run(include_unsampled_states=True) From f8458bf9b1f838c9e2abd3686b5b1b39d7c709c3 Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Mon, 30 Sep 2024 14:23:32 -0700 Subject: [PATCH 11/14] check to see if we get an expected value --- openmmtools/tests/test_sampling.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/openmmtools/tests/test_sampling.py b/openmmtools/tests/test_sampling.py index 9d7b3c6b..6cbbb660 100644 --- a/openmmtools/tests/test_sampling.py +++ b/openmmtools/tests/test_sampling.py @@ -2650,6 +2650,10 @@ def test_pymbar_issue_419(download_nc_file): reporter = MultiStateReporter(reporter_file) analyzer = MultiStateSamplerAnalyzer(reporter, max_n_iterations=n_iterations) f_ij, df_ij = analyzer.get_free_energy() + # free energy + assert f_ij[0, -1] == -52.00083148433459 + # error + assert df_ij[0, -1] == 0.21365627649558516 # ============================================================================== From a3dab116e805ca589e375b33a773a7269fbf4b0a Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:10:16 -0700 Subject: [PATCH 12/14] Add some tol --- openmmtools/tests/test_sampling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openmmtools/tests/test_sampling.py b/openmmtools/tests/test_sampling.py index 6cbbb660..4714cc5a 100644 --- a/openmmtools/tests/test_sampling.py +++ b/openmmtools/tests/test_sampling.py @@ -2651,9 +2651,9 @@ def test_pymbar_issue_419(download_nc_file): analyzer = MultiStateSamplerAnalyzer(reporter, max_n_iterations=n_iterations) f_ij, df_ij = analyzer.get_free_energy() # free energy - assert f_ij[0, -1] == -52.00083148433459 + assert abs(f_ij[0, -1] - -52.00083148433459) < 1e-5 # error - assert df_ij[0, -1] == 0.21365627649558516 + assert abs(df_ij[0, -1] - 0.21365627649558516) < 1e-5 # ============================================================================== From 51cc3d07f64c8674e0ae71396376d15ae288417f Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Tue, 1 Oct 2024 13:49:26 -0700 Subject: [PATCH 13/14] use pytest.approx to make @IAlibay happy ;) --- openmmtools/tests/test_sampling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openmmtools/tests/test_sampling.py b/openmmtools/tests/test_sampling.py index 4714cc5a..6b02cf87 100644 --- a/openmmtools/tests/test_sampling.py +++ b/openmmtools/tests/test_sampling.py @@ -2651,9 +2651,9 @@ def test_pymbar_issue_419(download_nc_file): analyzer = MultiStateSamplerAnalyzer(reporter, max_n_iterations=n_iterations) f_ij, df_ij = analyzer.get_free_energy() # free energy - assert abs(f_ij[0, -1] - -52.00083148433459) < 1e-5 + assert f_ij[0, -1] == pytest.approx(-52.00083148433459) # error - assert abs(df_ij[0, -1] - 0.21365627649558516) < 1e-5 + assert df_ij[0, -1] == pytest.approx(0.21365627649558516) # ============================================================================== From b1badbbb60010261a27f6a403ce5693cdd696f98 Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Wed, 2 Oct 2024 14:53:13 -0700 Subject: [PATCH 14/14] update the doc strings to explain where the file came from --- openmmtools/tests/test_sampling.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/openmmtools/tests/test_sampling.py b/openmmtools/tests/test_sampling.py index 6b02cf87..c4905b25 100644 --- a/openmmtools/tests/test_sampling.py +++ b/openmmtools/tests/test_sampling.py @@ -2617,6 +2617,9 @@ def test_resume_velocities_from_legacy_storage(self): @pytest.fixture def download_nc_file(tmpdir): + # See https://github.com/choderalab/pymbar/issues/419#issuecomment-1718386779 + # and https://github.com/choderalab/openmmtools/pull/735#issuecomment-2378070388 + # if this file ever starts to 404 FILE_URL = "https://github.com/user-attachments/files/17156868/ala-thr.zip" MAX_RETRIES = 3 RETRY_DELAY = 2 # Delay between retries (in seconds) @@ -2643,6 +2646,21 @@ def download_nc_file(tmpdir): def test_pymbar_issue_419(download_nc_file): + """ + This test checks that a nc file from a ala-thr mutation simulation converges. + + With pymbar 4 default (as of 2024-10-02) solver fails to converge. + With pymbar 3 defaults, the solver does converge. + + With PR #735 (https://github.com/choderalab/openmmtools/pull/735) we updated + the MultiStateSamplerAnalyzer to use the "robust" sampler when using pymbar4. + + See https://github.com/choderalab/pymbar/issues/419#issuecomment-1718386779 for more + information on how the file was generated. + + """ + + from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer n_iterations = 1000