Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bump monty to use the latest monty.json import speedup patch, add import test regression test, lazy load some rarely used but costly modules #4128

Open
wants to merge 44 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
d2447ed
copy pyproject from 4073
DanielYang59 Oct 22, 2024
1eb855b
bump monty
DanielYang59 Oct 22, 2024
aae9f4b
recover networkx pin, and bump sympy
DanielYang59 Oct 22, 2024
e08454e
pin monty to lower ver to see what is causing the failure
DanielYang59 Oct 22, 2024
f8b5723
revert all changes to pyproject but monty
DanielYang59 Oct 22, 2024
1948442
bump sympy
DanielYang59 Oct 22, 2024
03df9b0
sort and group optional deps
DanielYang59 Oct 22, 2024
4492816
loose networkx pin for compatibility
DanielYang59 Oct 22, 2024
a123174
lazy import sympy
DanielYang59 Oct 22, 2024
9688244
lazy load scipy
DanielYang59 Oct 22, 2024
6e0a89c
Revert "lazy load scipy"
DanielYang59 Oct 22, 2024
75e23b1
try netcdf4 install with delvewheel, https://github.com/materialsproj…
DanielYang59 Oct 22, 2024
4320868
Revert "try netcdf4 install with delvewheel, https://github.com/mater…
DanielYang59 Oct 22, 2024
77ebf2a
Merge branch 'master' into bump-monty-json
shyuep Oct 23, 2024
206ea52
test netcdf4 1.7.1.post2, notice new release is out today
DanielYang59 Oct 23, 2024
25975dd
netcdf4 1.7.1.post2 doesn't work, try latest 1.7.2
DanielYang59 Oct 23, 2024
762b85a
reset netcdf4 pin
DanielYang59 Oct 23, 2024
038ce9a
why <= doesn't work?
DanielYang59 Oct 23, 2024
24fdcdc
add comment
DanielYang59 Oct 23, 2024
022b91a
exclude 1.7.2 as well (perhaps conditionally skip that test?)
DanielYang59 Oct 23, 2024
3dd2cdf
add place holder
DanielYang59 Oct 23, 2024
9b7f35a
enhance comment
DanielYang59 Oct 26, 2024
87267f4
add TODO tag
DanielYang59 Oct 26, 2024
3435e99
Merge remote-tracking branch 'upstream/master' into bump-monty-json
DanielYang59 Oct 26, 2024
aa4153a
Merge branch 'master' into bump-monty-json
DanielYang59 Oct 26, 2024
313d553
tweak placeholder
DanielYang59 Oct 26, 2024
e69e344
Merge branch 'bump-monty-json' of https://github.com/DanielYang59/pym…
DanielYang59 Oct 26, 2024
fa17222
replace assert_allclose for scalar compare
DanielYang59 Oct 26, 2024
74ec0c9
replace assert_allclose with isclose
DanielYang59 Oct 26, 2024
df8e2ff
fix is close
DanielYang59 Oct 26, 2024
2778e28
use np.allclose over np.all(np.isclose())
DanielYang59 Oct 26, 2024
8952c4a
lazy import AseAtomsAdaptor
DanielYang59 Oct 26, 2024
c37ba45
guard type check import
DanielYang59 Oct 26, 2024
1674a96
lazy import matplotlib
DanielYang59 Oct 26, 2024
8ac0c7f
toggle reference generation
DanielYang59 Oct 26, 2024
2f7f993
update import time records
DanielYang59 Oct 26, 2024
abf5f8b
skip in not CI env
DanielYang59 Oct 26, 2024
de4dc3b
include current time in err msg
DanielYang59 Oct 26, 2024
8f5c2ac
loose hard threshold to 100%, as it appear macos runner is prone to f…
DanielYang59 Oct 26, 2024
4849da0
update type
DanielYang59 Oct 26, 2024
aceb5b5
skip test for macos
DanielYang59 Oct 26, 2024
eb51fee
add PR tag for easy tracing
DanielYang59 Oct 26, 2024
c64ecae
use perf_counter_ns for precision
DanielYang59 Oct 26, 2024
54752f7
Merge branch 'master' into bump-monty-json
DanielYang59 Oct 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 15 additions & 21 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ classifiers = [
dependencies = [
"joblib>=1",
"matplotlib>=3.8",
"monty>=2024.7.29",
"networkx>=3", # PR #4116
"monty>=2024.10.21",
"networkx>=2.7", # PR #4116
"palettable>=3.3.3",
"pandas>=2",
"plotly>=4.5.0",
Expand All @@ -69,7 +69,7 @@ dependencies = [
# https://github.com/scipy/scipy/issues/21052
"scipy>=1.14.1; platform_system == 'Windows'",
"spglib>=2.5.0",
"sympy>=1.2",
"sympy>=1.3", # PR #4116
"tabulate>=0.9",
"tqdm>=4.60",
"uncertainties>=3.1.4",
Expand All @@ -87,40 +87,34 @@ Issues = "https://github.com/materialsproject/pymatgen/issues"
Pypi = "https://pypi.org/project/pymatgen"

[project.optional-dependencies]
# PR4128: netcdf4 1.7.[0/1] yanked, 1.7.1.post[1/2]/1.7.2 cause CI error
abinit = ["netcdf4>=1.6.5,!=1.7.1.post1,!=1.7.1.post2,!=1.7.2"]
ase = ["ase>=3.23.0"]
# tblite only support Python 3.12+ through conda-forge
# https://github.com/tblite/tblite/issues/175
tblite = ["tblite[ase]>=0.3.0; python_version<'3.12'"]
vis = ["vtk>=6.0.0"]
abinit = ["netcdf4>=1.7.1"]
DanielYang59 marked this conversation as resolved.
Show resolved Hide resolved
mlp = ["chgnet>=0.3.8", "matgl>=1.1.3"]
electronic_structure = ["fdint>=2.0.2"]
ci = ["pytest-cov>=4", "pytest-split>=0.8", "pytest>=8"]
docs = ["invoke", "sphinx", "sphinx_markdown_builder", "sphinx_rtd_theme"]
electronic_structure = ["fdint>=2.0.2"]
mlp = ["chgnet>=0.3.8", "matgl>=1.1.3"]
numba = ["numba>=0.55"]
numpy-v1 = ["numpy>=1.25.0,<2"] # Test NP1 on Windows (quite buggy ATM)
optional = [
"ase>=3.23.0",
"pymatgen[abinit,ase,mlp,tblite]",
"beautifulsoup4",
# BoltzTraP2 build fails on Windows GitHub runners
"BoltzTraP2>=24.9.4 ; platform_system != 'Windows'",
"chemview>=0.6",
"chgnet>=0.3.8",
"f90nml>=1.1.2",
"galore>=0.6.1",
"h5py>=3.11.0",
"hiphive>=1.3.1",
"jarvis-tools>=2020.7.14",
"matgl>=1.1.3",
"matplotlib>=3.8",
"netCDF4>=1.6.5",
"phonopy>=2.23",
"seekpath>=2.0.1",
# tblite only support Python 3.12+ through conda-forge
# https://github.com/tblite/tblite/issues/175
"hiphive>=1.3.1",
"openbabel-wheel>=3.1.1.20",
"tblite[ase]>=0.3.0; platform_system=='Linux' and python_version<'3.12'",
]
numba = ["numba>=0.55"]
numpy-v1 = ["numpy>=1.25.0,<2"] # Test NP1 on Windows (quite buggy ATM)
# tblite only support Python 3.12+ through conda-forge
# https://github.com/tblite/tblite/issues/175
tblite = [ "tblite[ase]>=0.3.0; platform_system=='Linux' and python_version<'3.12'"]
vis = ["vtk>=6.0.0"]

[project.scripts]
pmg = "pymatgen.cli.pmg:main"
Expand Down
53 changes: 16 additions & 37 deletions src/pymatgen/analysis/interfaces/coherent_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import TYPE_CHECKING

import numpy as np
from numpy.testing import assert_allclose
from scipy.linalg import polar

from pymatgen.analysis.elasticity.strain import Deformation
Expand Down Expand Up @@ -101,21 +100,15 @@ def _find_matches(self) -> None:
for match in self.zsl_matches:
xform = get_2d_transform(film_vectors, match.film_vectors)
strain, _rot = polar(xform)
assert_allclose(
Copy link
Contributor Author

@DanielYang59 DanielYang59 Oct 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, np.testing.assert_allclose is not suitable for production code (~ 2.5x slower than np.allclose), though it provides more detailed debug info for testing.

Note the default tolerances are a bit stricter for assert_allclose, this commit also migrated the tolerances as is without changing behaviour.

testing.assert_allclose(actual, desired, rtol=1e-07, atol=0, equal_nan=True, err_msg='', verbose=True, *, strict=False)

numpy.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)


Test script:

import numpy as np
from numpy.testing import assert_allclose
import timeit


def run_tests():
    # Array sizes to test
    sizes = [(10, 10), (100, 100), (1000, 1000), (10000, 10000)]

    rtol = 1e-5
    atol = 1e-8

    # Run tests for each size
    for size in sizes:
        arr1 = np.random.rand(*size)
        arr2 = np.copy(arr1)  # Exact copy of arr1

        assert_allclose(arr1, arr2, rtol=rtol, atol=atol)
        assert np.allclose(arr1, arr2, rtol=rtol, atol=atol)

        # Measure performance in milliseconds
        num_runs = 10
        assert_time = timeit.timeit(lambda: assert_allclose(arr1, arr2, rtol=rtol, atol=atol), number=num_runs) * 1000
        allclose_time = timeit.timeit(lambda: np.allclose(arr1, arr2, rtol=rtol, atol=atol), number=num_runs) * 1000

        print(f"\nPerformance results for array size {size} over {num_runs} runs:")
        print(f"assert_allclose time: {assert_time:.3f} ms")
        print(f"np.allclose time: {allclose_time:.3f} ms")
        print(f"Speedup: {assert_time / allclose_time:.2f}x")

run_tests()

I got:

Performance results for array size (10, 10) over 10 runs:
assert_allclose time: 0.308 ms
np.allclose time: 0.102 ms
Speedup: 3.03x

Performance results for array size (100, 100) over 10 runs:
assert_allclose time: 0.563 ms
np.allclose time: 0.244 ms
Speedup: 2.30x

Performance results for array size (1000, 1000) over 10 runs:
assert_allclose time: 55.887 ms
np.allclose time: 22.179 ms
Speedup: 2.52x

Performance results for array size (10000, 10000) over 10 runs:
assert_allclose time: 12374.578 ms
np.allclose time: 2916.093 ms
Speedup: 4.24x

strain,
np.round(strain),
atol=1e-12,
err_msg="Film lattice vectors changed during ZSL match, check your ZSL Generator parameters",
)
if not np.allclose(strain, np.round(strain), atol=1e-12):
raise ValueError("Film lattice vectors changed during ZSL match, check your ZSL Generator parameters")

xform = get_2d_transform(substrate_vectors, match.substrate_vectors)
strain, _rot = polar(xform)
assert_allclose(
strain,
strain.astype(int),
atol=1e-12,
err_msg="Substrate lattice vectors changed during ZSL match, check your ZSL Generator parameters",
)
if not np.allclose(strain, strain.astype(int), atol=1e-12):
raise ValueError(
"Substrate lattice vectors changed during ZSL match, check your ZSL Generator parameters"
)

def _find_terminations(self):
"""Find all terminations."""
Expand Down Expand Up @@ -226,37 +219,23 @@ def get_interfaces(
).astype(int)
film_sl_slab = film_slab.copy()
film_sl_slab.make_supercell(super_film_transform)
assert_allclose(
film_sl_slab.lattice.matrix[2],
film_slab.lattice.matrix[2],
atol=1e-08,
err_msg="2D transformation affected C-axis for Film transformation",
)
assert_allclose(
film_sl_slab.lattice.matrix[:2],
match.film_sl_vectors,
atol=1e-08,
err_msg="Transformation didn't make proper supercell for film",
)
if not np.allclose(film_sl_slab.lattice.matrix[2], film_slab.lattice.matrix[2], atol=1e-08):
raise ValueError(
"2D transformation affected C-axis for Film transformation",
)
if not np.allclose(film_sl_slab.lattice.matrix[:2], match.film_sl_vectors, atol=1e-08):
raise ValueError("Transformation didn't make proper supercell for film")

# Build substrate superlattice
super_sub_transform = np.round(
from_2d_to_3d(get_2d_transform(sub_slab.lattice.matrix[:2], match.substrate_sl_vectors))
).astype(int)
sub_sl_slab = sub_slab.copy()
sub_sl_slab.make_supercell(super_sub_transform)
assert_allclose(
sub_sl_slab.lattice.matrix[2],
sub_slab.lattice.matrix[2],
atol=1e-08,
err_msg="2D transformation affected C-axis for Film transformation",
)
assert_allclose(
sub_sl_slab.lattice.matrix[:2],
match.substrate_sl_vectors,
atol=1e-08,
err_msg="Transformation didn't make proper supercell for substrate",
)
if not np.allclose(sub_sl_slab.lattice.matrix[2], sub_slab.lattice.matrix[2], atol=1e-08):
raise ValueError("2D transformation affected C-axis for Film transformation")
if not np.allclose(sub_sl_slab.lattice.matrix[:2], match.substrate_sl_vectors, atol=1e-08):
raise ValueError("Transformation didn't make proper supercell for substrate")

# Add extra info
match_dict = match.as_dict()
Expand Down
15 changes: 10 additions & 5 deletions src/pymatgen/core/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import numpy as np
from monty.fractions import lcm
from numpy.testing import assert_allclose
from scipy.cluster.hierarchy import fcluster, linkage
from scipy.spatial.distance import squareform

Expand Down Expand Up @@ -2784,10 +2783,16 @@ def from_slabs(
substrate_slab = substrate_slab.get_orthogonal_c_slab()
if isinstance(film_slab, Slab):
film_slab = film_slab.get_orthogonal_c_slab()
assert_allclose(film_slab.lattice.alpha, 90, 0.1)
Copy link
Contributor Author

@DanielYang59 DanielYang59 Oct 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

math.isclose is much faster (~ 500x/ ~ 188x) for comparing scalar than assert_allclose or np.isclose.

import math
import numpy as np
from numpy.testing import assert_allclose
import timeit


def run_tests():
    a = 0.123456789
    b = 0.123456789

    rtol = 1e-5
    atol = 1e-8

    assert math.isclose(a, b, rel_tol=rtol, abs_tol=atol)
    assert np.isclose(a, b, rtol=rtol, atol=atol)
    assert_allclose(a, b, rtol=rtol, atol=atol)

    # Measure performance in milliseconds
    num_runs = 1000000
    isclose_time_math = timeit.timeit(lambda: math.isclose(a, b, rel_tol=rtol, abs_tol=atol), number=num_runs) * 1000
    isclose_time_np = timeit.timeit(lambda: np.isclose(a, b, rtol=rtol, atol=atol), number=num_runs) * 1000
    assert_allclose_time = timeit.timeit(lambda: assert_allclose(a, b, rtol=rtol, atol=atol), number=num_runs) * 1000

    print(f"\nPerformance results over {num_runs} runs with single float values:")
    print(f"math.isclose time: {isclose_time_math:.3f} ms")
    print(f"np.isclose time: {isclose_time_np:.3f} ms")
    print(f"assert_allclose time: {assert_allclose_time:.3f} ms")
    print(f"Speedup (math.isclose vs np.isclose): {isclose_time_np / isclose_time_math:.2f}x")
    print(f"Speedup (math.isclose vs assert_allclose): {assert_allclose_time / isclose_time_math:.2f}x")

run_tests()

Gives:

Performance results over 1000000 runs with single float values:

math.isclose time: 37.568 ms
np.isclose time: 7065.625 ms
assert_allclose time: 19688.410 ms

Speedup (math.isclose vs np.isclose): 188.07x
Speedup (math.isclose vs assert_allclose): 524.07x

assert_allclose(film_slab.lattice.beta, 90, 0.1)
assert_allclose(substrate_slab.lattice.alpha, 90, 0.1)
assert_allclose(substrate_slab.lattice.beta, 90, 0.1)

if not math.isclose(film_slab.lattice.alpha, 90, rel_tol=0.1) or not math.isclose(
film_slab.lattice.beta, 90, rel_tol=0.1
):
raise ValueError("The lattice angles alpha or beta of the film slab are not approximately 90 degrees.")

if not math.isclose(substrate_slab.lattice.alpha, 90, rel_tol=0.1) or not math.isclose(
substrate_slab.lattice.beta, 90, rel_tol=0.1
):
raise ValueError("The lattice angles alpha or beta of the substrate slab are not approximately 90 degrees.")

# Ensure sub is right-handed
# IE sub has surface facing "up"
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/core/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ def as_xyz_str(self) -> str:
Only works for integer rotation matrices.
"""
# Check for invalid rotation matrix
if not np.all(np.isclose(self.rotation_matrix, np.round(self.rotation_matrix))):
if not np.allclose(self.rotation_matrix, np.round(self.rotation_matrix)):
warnings.warn("Rotation matrix should be integer")

return transformation_to_string(
Expand Down
4 changes: 3 additions & 1 deletion src/pymatgen/core/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from monty.json import MSONable

from pymatgen.core.structure import Composition, DummySpecies, Element, Lattice, Molecule, Species, Structure
from pymatgen.io.ase import AseAtomsAdaptor

if TYPE_CHECKING:
from collections.abc import Iterator
Expand Down Expand Up @@ -580,9 +579,12 @@ def from_file(cls, filename: str | Path, constant_lattice: bool = True, **kwargs
try:
from ase.io.trajectory import Trajectory as AseTrajectory

from pymatgen.io.ase import AseAtomsAdaptor
Copy link
Contributor Author

@DanielYang59 DanielYang59 Oct 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AseAtomsAdaptor is only used in one of the many try-except branches (other branches also lazy import the corresponding modules), ~10% speed up.

if fnmatch(filename, "*XDATCAR*"):
from pymatgen.io.vasp.outputs import Xdatcar
structures = Xdatcar(filename).structures
elif fnmatch(filename, "vasprun*.xml*"):
from pymatgen.io.vasp.outputs import Vasprun
structures = Vasprun(filename).structures
elif fnmatch(filename, "*.traj"):
try:
from ase.io.trajectory import Trajectory as AseTrajectory
ase_traj = AseTrajectory(filename)
# Periodic boundary conditions should be the same for all frames so just check the first
pbc = ase_traj[0].pbc
if any(pbc):
structures = [AseAtomsAdaptor.get_structure(atoms) for atoms in ase_traj]
else:
molecules = [AseAtomsAdaptor.get_molecule(atoms) for atoms in ase_traj]
is_mol = True
except ImportError as exc:
raise ImportError("ASE is required to read .traj files. pip install ase") from exc

image


ase_traj = AseTrajectory(filename)
# Periodic boundary conditions should be the same for all frames so just check the first
pbc = ase_traj[0].pbc

if any(pbc):
structures = [AseAtomsAdaptor.get_structure(atoms) for atoms in ase_traj]
else:
Expand Down
5 changes: 2 additions & 3 deletions src/pymatgen/io/vasp/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from monty.json import MSONable, jsanitize
from monty.os.path import zpath
from monty.re import regrep
from numpy.testing import assert_allclose
from tqdm import tqdm

from pymatgen.core import Composition, Element, Lattice, Structure
Expand Down Expand Up @@ -4978,8 +4977,8 @@ def __init__(

if i_spin == 0:
self.kpoints.append(kpoint)
else:
assert_allclose(self.kpoints[i_nk], kpoint)
elif not np.allclose(self.kpoints[i_nk], kpoint, rtol=1e-7, atol=0):
raise ValueError(f"kpoints of {i_nk=} mismatch")

if verbose:
print(f"kpoint {i_nk: 4} with {nplane: 5} plane waves at {kpoint}")
Expand Down
6 changes: 4 additions & 2 deletions src/pymatgen/symmetry/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from typing import TYPE_CHECKING

import numpy as np
from sympy import Matrix
from sympy.parsing.sympy_parser import parse_expr

from pymatgen.core.lattice import Lattice
from pymatgen.core.operations import MagSymmOp, SymmOp
Expand Down Expand Up @@ -100,6 +98,10 @@ def parse_transformation_string(
Returns:
tuple[list[list[float]] | np.ndarray, list[float]]: transformation matrix & vector
"""
# Import sympy is expensive (PR4128)
from sympy import Matrix
from sympy.parsing.sympy_parser import parse_expr

try:
a, b, c = np.eye(3)
b_change, o_shift = transformation_string.split(";")
Expand Down
2 changes: 2 additions & 0 deletions tests/io/abinit/test_netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def test_read_fe(self):
ref_magmom_collinear = [-0.5069359730980665]
path = os.path.join(tmp_dir, "Fe_magmoms_collinear_GSR.nc")

# TODO: PR4128, EtsfReader would fail in Ubuntu CI with netCDF4 > 1.6.5
# Need someone with knowledge in netCDF4 to fix it
with EtsfReader(path) as data:
structure = data.read_structure()
assert structure.site_properties["magmom"] == ref_magmom_collinear
Expand Down
14 changes: 14 additions & 0 deletions tests/performance/test_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Test the import time of several important modules.

TODO:
Test the runtime of several important modules.
"""

from __future__ import annotations


class TestImportTime:
"""
Test import time of important modules.
"""