From bd2ef5805b2c9f960dbb1011b36cef4d51339b33 Mon Sep 17 00:00:00 2001
From: Sikorski <sikorski@zib.de>
Date: Tue, 10 Sep 2024 11:56:49 +0200
Subject: [PATCH] refactor OpenMMSimulation to use minimalist type

closes #31
---
 src/precompile.jl         |   0
 src/simulators/mopenmm.py |   6 +-
 src/simulators/openmm.jl  | 318 ++++++++++++++------------------------
 3 files changed, 117 insertions(+), 207 deletions(-)
 create mode 100644 src/precompile.jl

diff --git a/src/precompile.jl b/src/precompile.jl
new file mode 100644
index 0000000..e69de29
diff --git a/src/simulators/mopenmm.py b/src/simulators/mopenmm.py
index 1a0e77e..5c7be4e 100644
--- a/src/simulators/mopenmm.py
+++ b/src/simulators/mopenmm.py
@@ -18,7 +18,7 @@ def singlerun(i):
         except OpenMMException as e:
           print("Error integrating trajectory", e)
           return get_numpy_state(c, withmomenta).fill(np.nan)
-        
+
         x = get_numpy_state(c, withmomenta)
         return x
 
@@ -34,7 +34,7 @@ def trajectory(sim, x0, stepsize, steps, saveevery, mmthreads, withmomenta):
   trajectory[0] = x0
 
   c = newcontext(sim.context, mmthreads)
-  
+
   set_numpy_state(c, x0, withmomenta)
   c.getIntegrator().setStepSize(stepsize)
 
@@ -123,7 +123,7 @@ def set_numpy_state(context, x, withmomenta):
         context.setPositions(x)
         context.setVelocitiesToTemperature(context.getIntegrator().getTemperature())
 
-def set_random_velocities(context,  mmthreads):
+def set_random_velocities(context):
     context.setVelocitiesToTemperature(context.getIntegrator().getTemperature())
     v = context.getState(getVelocities=True).getVelocities(asNumpy=True).value_in_unit(nanometer/picosecond).flatten()
     return v
diff --git a/src/simulators/openmm.jl b/src/simulators/openmm.jl
index 7727a69..58bc86b 100644
--- a/src/simulators/openmm.jl
+++ b/src/simulators/openmm.jl
@@ -7,7 +7,7 @@ import JLD2
 import ..ISOKANN: ISOKANN, IsoSimulation,
     propagate, dim, randx0,
     featurizer, defaultmodel,
-    savecoords, getcoords, force, pdb,
+    savecoords, getcoords, force, pdbfile,
     force, potential, lagtime, trajectory
 
 export OpenMMSimulation, FORCE_AMBER, FORCE_AMBER_IMPLICIT
@@ -34,49 +34,15 @@ end
 
 ###
 
-""" A Simulation wrapping the Python OpenMM Simulation object """
-struct OpenMMSimulation <: IsoSimulation
-    pysim::PyObject
-    pdb
-    ligand
-    forcefields
-    temp
-    friction
-    step
-    steps
-    features
-    nthreads::Int
-    mmthreads::Union{Int,String}
-    momenta::Bool
-end
-
-steps(sim) = sim.steps::Int
-# TODO: we have redundant values, decide which ones to use
-friction(pysim::PyObject) = pysim.integrator.getFriction()._value # 1/ps
-temp(pysim::PyObject) = pysim.integrator.getTemperature()._value # kelvin
-stepsize(pysim::PyObject) = pysim.integrator.getStepSize()._value # ps
-
-friction(sim) = friction(sim.pysim)
-temp(sim) = temp(sim.pysim)
-stepsize(sim) = stepsize(sim.pysim)
-
-function OpenMMScript(filename::String;
-    steps::Int,
-    features=nothing,
-)
-    @pyinclude(filename)
-    pysim = py"simulation"
-    pdb = py"pdb"
-    mmthreads = CUDA.has_cuda() ? "gpu" : 1
-
-    OpenMMSimulation(pysim, pdb, nothing, nothing, temp(pysim), friction(pysim), stepsize(pysim), steps, features, 1, mmthreads, false)
-end
-
+#abstract type OpenMMSimulation <: IsoSimulation end
 
 """
-    OpenMMSimulation(; pdb=DEFAULT_PDB, ligand="", forcefields=["amber14-all.xml", "amber14/tip3pfb.xml"], temp=298, friction=1, step=0.002, steps=100, features=nothing, minimize=false)
+    OpenMMSimulation(; pdb, steps, ...)
+    OpenMMSimulation(; py, steps)
 
 Constructs an OpenMM simulation object.
+Either use  `OpenMMSimulation(;py, steps)` where `py`` is the location of a .py python script creating a OpenMM simulation object
+or supply a .pdb file via `pdb` and the following parameters (see also defaultsystem):
 
 ## Arguments
 - `pdb::String`: Path to the PDB file.
@@ -99,66 +65,85 @@ Constructs an OpenMM simulation object.
 - `OpenMMSimulation`: An OpenMMSimulation object.
 
 """
-function OpenMMSimulation(;
+struct OpenMMSimulation
+    pysim::PyObject
+    steps::Int
+    constructor
+
+    function OpenMMSimulation(; steps, k...)
+        k = NamedTuple(k)
+        if haskey(k, :py)
+            @pyinclude(k.py)
+            pysim = py"simulation"
+            return new(pysim, steps, k)
+        else
+            pysim = defaultsystem(; k...)
+            return new(pysim, steps, k)
+        end
+    end
+end
+
+function defaultsystem(;
     pdb=DEFAULT_PDB,
     ligand="",
     forcefields=["amber14-all.xml"],
-    temp=298, # kelvin
+    temp=310, # kelvin
     friction=1,  # 1/picosecond
     step=0.002, # picoseconds
-    steps=100,
-    features=:all,
     minimize=false,
-    gpu=false,
-    nthreads=gpu ? 1 : Threads.nthreads(),
-    mmthreads=gpu ? "gpu" : 1,
     addwater=false,
     padding=3,
     ionicstrength=0.0,
     forcefield_kwargs=Dict(),
-    momenta=false)
+    kwargs...
+)
+    mmthreads = get(kwargs, :mmthreads, CUDA.has_cuda() ? "gpu" : 1)
+    platform, properties = mmthreads == "gpu" ? ("CUDA", Dict()) : ("CPU", Dict("Threads" => "$mmthreads"))
 
-    platform, properties = if mmthreads == "gpu"
-        "CUDA", Dict()
-    else
-        "CPU", Dict("Threads" => "$mmthreads")
-    end
+    @pycall py"defaultsystem"(pdb, ligand, forcefields, temp, friction, step, minimize; addwater, padding, ionicstrength, forcefield_kwargs, platform, properties)::PyObject
+end
 
-    pysim = @pycall py"defaultsystem"(pdb, ligand, forcefields, temp, friction, step, minimize; addwater, padding, ionicstrength, forcefield_kwargs, platform, properties)::PyObject
 
-    if features isa Number
-        radius = features
-        features = [calpha_pairs(pysim); local_atom_pairs(pysim, radius)] |> unique
-    end
+# TODO: this are remnants of the old detailed openmm system, remove them eventually
+mmthreads(sim::OpenMMSimulation) = get(sim.constructor, :mmthreads, CUDA.has_cuda() ? "gpu" : 1)
+nthreads(sim::OpenMMSimulation) = get(sim.constructor, :nthreads, CUDA.has_cuda() ? 1 : Threads.nthreads())
+momenta(sim::OpenMMSimulation) = get(sim.constructor, :momenta, false)
+pdbfile(sim::OpenMMSimulation) = get(sim.constructor, :pdb, () -> createpdb(sim))
 
-    return OpenMMSimulation(pysim::PyObject, pdb, ligand, forcefields, temp, friction, step, steps, features, nthreads, mmthreads, momenta)
-end
+steps(sim) = sim.steps::Int
+friction(sim) = friction(sim.pysim)
+temp(sim) = temp(sim.pysim)
+stepsize(sim) = stepsize(sim.pysim)
 
-#=
-function featurizer(sim::OpenMMSimulation)
-    if sim.features isa (Vector{Int})
-        ix = vec([1, 2, 3] .+ ((sim.features .- 1) * 3)')
-        return ISOKANN.pairdistfeatures(ix)
-    elseif sim.features isa (Vector{Tuple{Int,Int}}) # local pairwise distances
-        inds = sim.features
-        return coords -> ISOKANN.pdists(coords, inds)
-    elseif sim.features == :all
-        if length(getcoords(sim)) > 100
-            @warn "Computing _all_ pairwise distances for a bigger (>100 atoms) molecule. Try using a cutoff by setting features::Number in OpenMMSimulatioon"
-        end
-        return ISOKANN.flatpairdists
-    else
-        error("unknown featurizer")
-    end
+
+lagtime(sim::OpenMMSimulation) = steps(sim) * stepsize(sim) # in ps
+dim(sim::OpenMMSimulation) = length(getcoords(sim))
+defaultmodel(sim::OpenMMSimulation; kwargs...) = ISOKANN.pairnet(; kwargs...)
+
+getcoords(sim::OpenMMSimulation) = getcoords(sim.pysim, momenta(sim))::Vector{Float64}
+setcoords(sim::OpenMMSimulation, coords) = setcoords(sim.pysim, coords, momenta(sim))
+natoms(sim::OpenMMSimulation) = div(dim(sim), 3)
+
+friction(pysim::PyObject) = pysim.integrator.getFriction()._value # 1/ps
+temp(pysim::PyObject) = pysim.integrator.getTemperature()._value # kelvin
+stepsize(pysim::PyObject) = pysim.integrator.getStepSize()._value # ps
+getcoords(sim::PyObject, momenta) = py"get_numpy_state($sim.context, $momenta).flatten()"
+
+function createpdb(sim)
+    pysim = sim.pysim
+    file = tempname() * ".pdb"
+    pdb = py"app.PDBFile"
+    pdb.writeFile(pysim.topology, pysim.positions, file)  # TODO: fix this
+    return file
 end
-=#
 
-featurizer(sim::OpenMMSimulation) = featurizer(sim, sim.features)
+featurizer(sim::OpenMMSimulation) = featurizer(sim, get(sim.constructor, :features, nothing))
 
 featurizer(sim, ::Nothing) = error("No default featurizer specified")
 featurizer(sim, atoms::Vector{Int}) = FeaturesAtoms(atoms)
 featurizer(sim, pairs::Vector{Tuple{Int,Int}}) = FeaturesPairs(pairs)
 featurizer(sim, features::Function) = features
+featurizer(sim, radius::Number) = FeaturesPairs([calpha_pairs(sim.pysim); local_atom_pairs(sim.pysim, radius)] |> unique)
 
 function featurizer(sim, features::Symbol)
     @assert features == :all
@@ -167,9 +152,8 @@ function featurizer(sim, features::Symbol)
 end
 
 
-
-
-# implementation of some featurizers
+struct FeaturesCoords end
+(f::FeaturesCoords)(coords) = coords
 
 struct FeaturesAll end
 (f::FeaturesAll)(coords) = ISOKANN.flatpairdists(coords)
@@ -177,57 +161,29 @@ struct FeaturesAll end
 struct FeaturesAtoms
     atominds::Vector{Int}
 end
-
 (f::FeaturesAtoms)(coords) = ISOKANN.flatpairdists(coords, f.atominds)
 
 struct FeaturesPairs
     pairs::Vector{Tuple{Int,Int}}
 end
-
 (f::FeaturesPairs)(coords) = ISOKANN.pdists(coords, f.pairs)
 
-function FeaturesPairs(sim::OpenMMSimulation, maxdist::Number, atomfilter::Function)
+
+import StatsBase
+function FeaturesPairs(sim::OpenMMSimulation; maxdist::Number, atomfilter::Function=a -> true, maxfeatures::Int=0)
     pairs = local_atom_pairs(sim.pysim, maxdist; atomfilter=atomfilter)
-    return FeaturesPairs(pairs)
-end
 
-# N random pairs
-function FeaturesRandomPairs(sim::OpenMMSimulation, features::Int)
-    n = natoms(sim)
-    @assert features <= (n * n - 1) / 2
-    p = Set{Tuple{Int,Int}}()
-    while length(p) < features
-        i = rand(1:n)
-        j = rand(1:n)
-        i < j || continue
-        push!(p, (i, j))
+    if 0 < maxfeatures < length(pairs)
+        pairs = StatsBase.sample(pairs, maxfeatures, replace=false)
     end
-    pairs = sort(collect(p))
-    FeaturesPairs(pairs)
-end
-
-
 
-""" generate `n` random inintial points for the simulation `mm` """
-function randx0(sim::OpenMMSimulation, n)
-    x0 = hcat(getcoords(sim))
-    xs = propagate(sim, x0, n)
-    return dropdims(xs, dims=3)
+    return FeaturesPairs(pairs)
 end
 
+# TODO: replace this with laggedtrajectory?
+""" generate `n` random inintial points for the simulation `mm` """
+randx0(sim::OpenMMSimulation, n) = ISOKANN.laggedtrajectory(sim, n)
 
-" compute the lagtime in ps "
-lagtime(sim::OpenMMSimulation) = sim.step * sim.steps
-dim(sim::OpenMMSimulation) = return length(getcoords(sim))
-defaultmodel(sim::OpenMMSimulation; kwargs...) = ISOKANN.pairnet(; kwargs...)
-
-pdbfile(s::OpenMMSimulation) = pdbfile(s.pdb)
-pdbfile(str::String) = str
-function pdbfile(pdb::PyObject)
-    file = tempname()
-    pdb.writeFile(pdb.topology, pdb.positions, file)
-    file
-end
 
 """
     propagate(s::OpenMMSimulation, x0::AbstractMatrix{T}, ny; nthreads=Threads.nthreads(), mmthreads=1) where {T}
@@ -246,20 +202,18 @@ Propagates `ny` replicas of the OpenMMSimulation `s` from the inintial states `x
 Note: For CPU we observed better performance with nthreads = num cpus, mmthreads = 1 then the other way around.
 With GPU nthreads > 1 should be supported, but on our machine lead to slower performance then nthreads=1.
 """
-function propagate(s::OpenMMSimulation, x0::AbstractMatrix{T}, ny; stepsize=s.step, steps=s.steps, nthreads=s.nthreads, mmthreads=s.mmthreads, momenta=s.momenta) where {T}
-    s.mmthreads == "gpu" && CUDA.has_cuda() && CUDA.reclaim()
+function propagate(sim::OpenMMSimulation, x0::AbstractMatrix{T}, ny; stepsize=stepsize(sim), steps=steps(sim), nthreads=nthreads(sim), mmthreads=mmthreads(sim), momenta=momenta(sim)) where {T}
+    mmthreads == "gpu" && CUDA.has_cuda() && CUDA.reclaim()
     dim, nx = size(x0)
     xs = repeat(x0, outer=[1, ny])
     xs = permutedims(reinterpret(Tuple{T,T,T}, xs))
-    ys = @pycall py"threadedrun"(xs, s.pysim, stepsize, steps, nthreads, mmthreads, momenta)::Vector{Float32}
+    ys = @pycall py"threadedrun"(xs, sim.pysim, stepsize, steps, nthreads, mmthreads, momenta)::Vector{Float32}
     ys = reshape(ys, dim, nx, ny)
     ys = permutedims(ys, (1, 3, 2))
     checkoverflow(ys)  # control the simulated data for NaNs and too large entries and throws an error
     return ys#convert(Array{Float32,3}, ys)
 end
 
-#propagate(s::OpenMMSimulation, x0::CuArray, ny; nthreads=Threads.nthreads()) = cu(propagate(s, collect(x0), ny; nthreads))
-
 struct OpenMMOverflow{T} <: Exception where {T}
     result::T
     select::Vector{Bool}  # flags which results are valid
@@ -277,34 +231,27 @@ end
 
 Return the coordinates of a single trajectory started at `x0` for the given number of `steps` where each `saveevery` step is stored.
 """
-function trajectory(s::OpenMMSimulation; x0::AbstractVector{T}=getcoords(s), steps=s.steps, saveevery=1, stepsize=s.step, mmthreads=s.mmthreads, momenta=s.momenta) where {T}
+function trajectory(sim::OpenMMSimulation; x0::AbstractVector{T}=getcoords(sim), steps=steps(sim), saveevery=1, stepsize=stepsize(sim), mmthreads=mmthreads(sim), momenta=momenta(sim)) where {T}
     x0 = reinterpret(Tuple{T,T,T}, x0)
-    xs = py"trajectory"(s.pysim, x0, stepsize, steps, saveevery, mmthreads, momenta)
+    xs = py"trajectory"(sim.pysim, x0, stepsize, steps, saveevery, mmthreads, momenta)
     xs = permutedims(xs, (3, 2, 1))
     xs = reshape(xs, :, size(xs, 3))
     return xs
 end
 
-@deprecate trajectory(s, x0, steps, saveevery; kwargs...) trajectory(s; x0, steps, saveevery, kwargs...)
+@deprecate trajectory(sim, x0, steps, saveevery; kwargs...) trajectory(sim; x0, steps, saveevery, kwargs...)
 
-function ISOKANN.laggedtrajectory(s::OpenMMSimulation, n_lags, steps_per_lag=s.steps; x0=getcoords(s), keepstart=false)
+function ISOKANN.laggedtrajectory(sim::OpenMMSimulation, n_lags, steps_per_lag=steps(sim); x0=getcoords(sim), keepstart=false)
     steps = steps_per_lag * n_lags
     saveevery = steps_per_lag
-    xs = trajectory(s, x0, steps, saveevery)
+    xs = trajectory(sim, x0, steps, saveevery)
     return keepstart ? xs : xs[:, 2:end]
 end
 
-getcoords(sim::OpenMMSimulation) = getcoords(sim.pysim, sim.momenta)::Vector{Float64}
-setcoords(sim::OpenMMSimulation, coords) = setcoords(sim.pysim, coords, sim.momenta)
-
-getcoords(sim::PyObject, momenta) = py"get_numpy_state($sim.context, $momenta).flatten()"
-
-natoms(sim::OpenMMSimulation) = div(length(getcoords(sim)), 3)
-
-function minimize(sim::OpenMMSimulation, coords, iter=100)
+function minimize!(sim::OpenMMSimulation, coords=getcoords(sim); iter=100)
     setcoords(sim, coords)
-    sim.pysim.minimizeEnergy(maxIterations=iter)
-    return getcoords(sim)
+    return sim.pysim.minimizeEnergy(maxIterations=iter)
+    return nothing
 end
 
 function setcoords(sim::PyObject, coords::AbstractVector{T}, momenta) where {T}
@@ -320,10 +267,9 @@ end
 
 """ mutates the state in sim """
 function set_random_velocities!(sim, x)
-    v = py"set_random_velocities($(sim.pysim.context), $(sim.mmthreads))"
+    v = py"set_random_velocities($(sim.pysim.context))"
     n = length(x) ÷ 2
     x[n+1:end] = v
-
     return x
 end
 
@@ -334,15 +280,8 @@ masses(sim::OpenMMSimulation) = [sim.pysim.system.getParticleMass(i - 1)._value
 function force(sim::OpenMMSimulation, x)
     CUDA.reclaim()
     setcoords(sim, x)
-    sys = sim.pysim.system
-
-    f = sim.pysim.context.getState(getForces=true).getForces(asNumpy=true)
-    f = f.value_in_unit(f.unit) |> permutedims
-    #m = [sys.getParticleMass(i - 1)._value for i in 1:sys.getNumParticles()]
-    #f ./= m'
-    f = vec(f)
-    @assert(!any(isnan.(f)))
-    f
+    pysim = sim.pysim
+    return PyArray(py"$pysim.context.getState(getForces=True).getForces(asNumpy=True)._value.reshape(-1)"o)
 end
 
 function potential(sim::OpenMMSimulation, x)
@@ -352,8 +291,6 @@ function potential(sim::OpenMMSimulation, x)
     v = v.value_in_unit(v.unit)
 end
 
-### PYTHON CODE
-
 
 function savecoords(path, sim::OpenMMSimulation, coords::AbstractArray{T}) where {T}
     coords = ISOKANN.cpu(coords)
@@ -371,14 +308,14 @@ end
 
 ### FEATURE FILTERS
 
-function atomfilter(atom)
+function remove_H_H2O_NACL(atom)
     !(
         atom.element.symbol == "H" ||
         atom.residue.name in ["HOH", "NA", "CL"]
     )
 end
 
-function local_atom_pairs(pysim::PyObject, radius; atomfilter=atomfilter)
+function local_atom_pairs(pysim::PyObject, radius; atomfilter=remove_H_H2O_NACL)
     coords = reshape(getcoords(pysim, false), 3, :)
     atoms = filter(atomfilter, pysim.topology.atoms() |> collect)
     inds = map(atom -> atom.index + 1, atoms)
@@ -409,69 +346,42 @@ end
 ### SERIALIZATION
 
 struct OpenMMSimulationSerialized
-    pdb
-    ligand
-    forcefields
-    temp
-    friction
-    step
     steps
-    features
-    nthreads
-    mmthreads
+    constructor
 end
 
 JLD2.writeas(::Type{OpenMMSimulation}) = OpenMMSimulationSerialized
 
-Base.convert(::Type{OpenMMSimulationSerialized}, a::OpenMMSimulation) =
-    OpenMMSimulationSerialized(
-        a.pdb,
-        a.ligand,
-        a.forcefields,
-        a.temp,
-        a.friction,
-        a.step,
-        a.steps,
-        a.features,
-        a.nthreads,
-        a.mmthreads)
-
-Base.convert(::Type{OpenMMSimulation}, a::OpenMMSimulationSerialized) =
-    OpenMMSimulation(;
-        pdb=a.pdb,
-        ligand=a.ligand,
-        forcefields=a.forcefields,
-        temp=a.temp,
-        friction=a.friction,
-        step=a.step,
-        steps=a.steps,
-        features=a.features,
-        nthreads=a.nthreads,
-        mmthreads=a.mmthreads)
+Base.convert(::Type{OpenMMSimulationSerialized}, sim::OpenMMSimulation) =
+    OpenMMSimulationSerialized(steps(sim), sim.constructor)
+
+Base.convert(::Type{OpenMMSimulation}, s::OpenMMSimulationSerialized) =
+    OpenMMSimulation(; steps=s.steps, s.constructor...)
 
 function Base.show(io::IO, mime::MIME"text/plain", sim::OpenMMSimulation)#
-    featstr = if sim.features isa Vector{Tuple{Int,Int}}
-        "{$(length(sim.features)) pairwise distances}"
-    else
-        sim.features
-    end
+    #featstr = if sim.features isa Vector{Tuple{Int,Int}}
+    #    "{$(length(sim.features)) pairwise distances}"
+    #else
+    #    sim.features
+    #end
+    # features=$featstr)
+    #ligand="$(sim.ligand)",
+    #        forcefields=$(sim.forcefields),
     println(
         io, """
         OpenMMSimulation(;
-            pdb="$(sim.pdb)",
-            ligand="$(sim.ligand)",
-            forcefields=$(sim.forcefields),
-            temp=$(sim.temp),
-            friction=$(sim.friction),
-            step=$(sim.step),
-            steps=$(sim.steps),
-            features=$featstr)
-        with $(div(length(getcoords(sim)),3)) atoms"""
+            pdb="$(pdbfile(sim))",
+            temp=$(temp(sim)),
+            friction=$(friction(sim)),
+            step=$(stepsize(sim)),
+            steps=$(steps(sim))
+        with $(natoms(sim)) atoms"""
     )
+
 end
 
 
-""" 
+"""
     integrate_langevin(sim::OpenMMSimulation, x0=getcoords(sim); steps=steps(sim), F_ext::Union{Function,Nothing}=nothing, saveevery::Union{Int, nothing}=nothing)
 
 Integrate the Langevin equations with a Euler-Maruyama scheme, allowing for external forces.
@@ -483,7 +393,7 @@ function integrate_langevin(sim::OpenMMSimulation, x0=getcoords(sim); steps=step
     x = copy(x0)
     v = zero(x) # this should be either provided or drawn from the Maxwell Boltzmann distribution
     kBT = 0.008314463 * temp(sim)
-    dt = step(sim) # sim,step is in picosecond, we calculate in nanoseconds
+    dt = stepsize(sim) # sim,step is in picosecond, we calculate in nanoseconds
     gamma = friction(sim) # convert 1/ps = 1000/ns
     m = repeat(masses(sim), inner=3)
     out = isnothing(saveevery) ? x : similar(x, length(x), cld(steps, saveevery))
@@ -506,7 +416,7 @@ end
 function integrate_girsanov(sim::OpenMMSimulation; x0=getcoords(sim), steps=steps(sim), u::Union{Function,Nothing}=nothing)
     # TODO: check units on the following three lines
     kB = 0.008314463
-    dt = step(sim)
+    dt = stepsize(sim)
     gamma = friction(sim)
 
     sigma = sqrt(2 * gamma * kB * temp(sim))