diff --git a/.lockfiles/py310-dev.lock b/.lockfiles/py310-dev.lock index 1d59d53b5..ff4431390 100644 --- a/.lockfiles/py310-dev.lock +++ b/.lockfiles/py310-dev.lock @@ -182,7 +182,9 @@ future==1.0.0 gitdb==4.0.11 # via gitpython gitpython==3.1.43 - # via streamlit + # via + # baybe (pyproject.toml) + # streamlit googleapis-common-protos==1.63.2 # via # opentelemetry-exporter-otlp-proto-grpc diff --git a/CHANGELOG.md b/CHANGELOG.md index f4c94b96d..03431427a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `allow_missing` and `allow_extra` keyword arguments to `Objective.transform` - Example for a traditional mixture - `add_noise_to_perturb_degenerate_rows` utility +- `benchmarks` subpackage for defining and running performance tests ### Changed - `SubstanceParameter` encodings are now computed exclusively with the diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 6f67b86aa..0b1252070 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -28,3 +28,5 @@ Bernoulli multi-armed bandit and Thompson sampling - Karin Hrovatin (Merck KGaA, Darmstadt, Germany):\ `scikit-fingerprints` support +- Fabian Liebig (Merck KGaA, Darmstadt, Germany):\ + Benchmarking structure diff --git a/README.md b/README.md index 0f2529c7d..18f179d72 100644 --- a/README.md +++ b/README.md @@ -299,6 +299,7 @@ The available groups are: - `polars`: Required for optimized search space construction via [Polars](https://docs.pola.rs/) - `simulation`: Enabling the [simulation](https://emdgroup.github.io/baybe/stable/_autosummary/baybe.simulation.html) module. - `test`: Required for running the tests. +- `benchmarking`: Required for running the benchmarking module. - `dev`: All of the above plus `tox` and `pip-audit`. For code contributors. ## 📡 Telemetry diff --git a/baybe/serialization/core.py b/baybe/serialization/core.py index 2947d4cd0..e3e53f052 100644 --- a/baybe/serialization/core.py +++ b/baybe/serialization/core.py @@ -3,6 +3,7 @@ import base64 import pickle from collections.abc import Callable +from datetime import datetime, timedelta from typing import Any, TypeVar, get_type_hints import attrs @@ -163,3 +164,9 @@ def select_constructor_hook(specs: dict, cls: type[_T]) -> _T: # Register custom un-/structure hooks converter.register_unstructure_hook(pd.DataFrame, _unstructure_dataframe_hook) converter.register_structure_hook(pd.DataFrame, _structure_dataframe_hook) +converter.register_unstructure_hook(datetime, lambda x: x.isoformat()) +converter.register_structure_hook(datetime, lambda x, _: datetime.fromisoformat(x)) +converter.register_unstructure_hook(timedelta, lambda x: f"{x.total_seconds()}s") +converter.register_structure_hook( + timedelta, lambda x, _: timedelta(seconds=float(x.removesuffix("s"))) +) diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 000000000..fb631ee0c --- /dev/null +++ b/benchmarks/__init__.py @@ -0,0 +1,9 @@ +"""Benchmarking module for performance tracking.""" + +from benchmarks.definition import Benchmark +from benchmarks.result import Result + +__all__ = [ + "Result", + "Benchmark", +] diff --git a/benchmarks/__main__.py b/benchmarks/__main__.py new file mode 100644 index 000000000..11fe8aff6 --- /dev/null +++ b/benchmarks/__main__.py @@ -0,0 +1,14 @@ +"""Executes the benchmarking module.""" +# Run this via 'python -m benchmarks' from the root directory. + +from benchmarks.domains import BENCHMARKS + + +def main(): + """Run all benchmarks.""" + for benchmark in BENCHMARKS: + benchmark() + + +if __name__ == "__main__": + main() diff --git a/benchmarks/definition/__init__.py b/benchmarks/definition/__init__.py new file mode 100644 index 000000000..970a3dacb --- /dev/null +++ b/benchmarks/definition/__init__.py @@ -0,0 +1,13 @@ +"""Benchmark task definitions.""" + +from benchmarks.definition.config import ( + Benchmark, + BenchmarkSettings, + ConvergenceExperimentSettings, +) + +__all__ = [ + "ConvergenceExperimentSettings", + "Benchmark", + "BenchmarkSettings", +] diff --git a/benchmarks/definition/config.py b/benchmarks/definition/config.py new file mode 100644 index 000000000..e7bc75dd9 --- /dev/null +++ b/benchmarks/definition/config.py @@ -0,0 +1,90 @@ +"""Benchmark configurations.""" + +import time +from abc import ABC +from collections.abc import Callable +from datetime import datetime, timedelta, timezone +from typing import Any, Generic, TypeVar + +from attrs import define, field +from attrs.validators import instance_of +from pandas import DataFrame + +from baybe.serialization.mixin import SerialMixin +from baybe.utils.random import temporary_seed +from benchmarks.result import Result, ResultMetadata + + +@define(frozen=True) +class BenchmarkSettings(SerialMixin, ABC): + """Benchmark configuration for recommender analyses.""" + + random_seed: int = field(validator=instance_of(int), kw_only=True, default=1337) + """The random seed for reproducibility.""" + + +BenchmarkSettingsType = TypeVar("BenchmarkSettingsType", bound=BenchmarkSettings) + + +@define(frozen=True) +class ConvergenceExperimentSettings(BenchmarkSettings): + """Benchmark configuration for recommender convergence analyses.""" + + batch_size: int = field(validator=instance_of(int)) + """The recommendation batch size.""" + + n_doe_iterations: int = field(validator=instance_of(int)) + """The number of Design of Experiment iterations.""" + + n_mc_iterations: int = field(validator=instance_of(int)) + """The number of Monte Carlo iterations.""" + + +@define(frozen=True) +class Benchmark(Generic[BenchmarkSettingsType]): + """The base class for a benchmark executable.""" + + settings: BenchmarkSettingsType = field() + """The benchmark configuration.""" + + function: Callable[[BenchmarkSettingsType], DataFrame] = field() + """The callable which contains the benchmarking logic.""" + + name: str = field(init=False) + """The name of the benchmark.""" + + best_possible_result: float | None = field(default=None) + """The best possible result which can be achieved in the optimization process.""" + + optimal_function_inputs: list[dict[str, Any]] | None = field(default=None) + """An input that creates the best_possible_result.""" + + @property + def description(self) -> str: + """The description of the benchmark function.""" + if self.function.__doc__ is not None: + return self.function.__doc__ + return "No description available." + + @name.default + def _default_name(self): + """Return the name of the benchmark function.""" + return self.function.__name__ + + def __call__(self) -> Result: + """Execute the benchmark and return the result.""" + start_datetime = datetime.now(timezone.utc) + + with temporary_seed(self.settings.random_seed): + start_sec = time.perf_counter() + result = self.function(self.settings) + stop_sec = time.perf_counter() + + duration = timedelta(seconds=stop_sec - start_sec) + + metadata = ResultMetadata( + start_datetime=start_datetime, + duration=duration, + ) + + return Result(self.name, result, metadata) diff --git a/benchmarks/domains/__init__.py b/benchmarks/domains/__init__.py new file mode 100644 index 000000000..4a0e956a8 --- /dev/null +++ b/benchmarks/domains/__init__.py @@ -0,0 +1,10 @@ +"""Benchmark domains.""" + +from benchmarks.definition.config import Benchmark +from benchmarks.domains.synthetic_2C1D_1C import synthetic_2C1D_1C_benchmark + +BENCHMARKS: list[Benchmark] = [ + synthetic_2C1D_1C_benchmark, +] + +__all__ = ["BENCHMARKS"] diff --git a/benchmarks/domains/synthetic_2C1D_1C.py b/benchmarks/domains/synthetic_2C1D_1C.py new file mode 100644 index 000000000..abb8ab176 --- /dev/null +++ b/benchmarks/domains/synthetic_2C1D_1C.py @@ -0,0 +1,123 @@ +"""Synthetic function with two continuous and one discrete input.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np +from numpy import pi, sin, sqrt +from pandas import DataFrame + +from baybe.campaign import Campaign +from baybe.parameters import NumericalContinuousParameter, NumericalDiscreteParameter +from baybe.recommenders.pure.nonpredictive.sampling import RandomRecommender +from baybe.searchspace import SearchSpace +from baybe.simulation import simulate_scenarios +from baybe.targets import NumericalTarget, TargetMode +from benchmarks.definition import ( + Benchmark, + ConvergenceExperimentSettings, +) + +if TYPE_CHECKING: + from mpl_toolkits.mplot3d import Axes3D + + +def _lookup(z: np.ndarray, x: np.ndarray, y: np.ndarray) -> np.ndarray: + """Lookup that is used internally in the callable for the benchmark.""" + try: + assert np.all(-2 * pi <= x) and np.all(x <= 2 * pi) + assert np.all(-2 * pi <= y) and np.all(y <= 2 * pi) + assert np.all(np.isin(z, [1, 2, 3, 4])) + except AssertionError: + raise ValueError("Inputs are not in the valid ranges.") + + return ( + (z == 1) * sin(x) * (1 + sin(y)) + + (z == 2) * (x * sin(0.9 * x) + sin(x) * sin(y)) + + (z == 3) * (sqrt(x + 8) * sin(x) + sin(x) * sin(y)) + + (z == 4) * (x * sin(1.666 * sqrt(x + 8)) + sin(x) * sin(y)) + ) + + +def synthetic_2C1D_1C(settings: ConvergenceExperimentSettings) -> DataFrame: + """Hybrid synthetic test function. + + Inputs: + z discrete {1,2,3,4} + x continuous [-2*pi, 2*pi] + y continuous [-2*pi, 2*pi] + Output: continuous + Objective: Maximization + Optimal Inputs: + {x: 1.610, y: 1.571, z: 3} + {x: 1.610, y: -4.712, z: 3} + Optimal Output: 4.09685 + """ + parameters = [ + NumericalContinuousParameter("x", (-2 * pi, 2 * pi)), + NumericalContinuousParameter("y", (-2 * pi, 2 * pi)), + NumericalDiscreteParameter("z", (1, 2, 3, 4)), + ] + + objective = NumericalTarget(name="target", mode=TargetMode.MAX).to_objective() + search_space = SearchSpace.from_product(parameters=parameters) + + scenarios: dict[str, Campaign] = { + "Random Recommender": Campaign( + searchspace=search_space, + recommender=RandomRecommender(), + objective=objective, + ), + "Default Recommender": Campaign( + searchspace=search_space, + objective=objective, + ), + } + + return simulate_scenarios( + scenarios, + _lookup, + batch_size=settings.batch_size, + n_doe_iterations=settings.n_doe_iterations, + n_mc_iterations=settings.n_mc_iterations, + impute_mode="error", + ) + + +benchmark_config = ConvergenceExperimentSettings( + batch_size=5, + n_doe_iterations=30, + n_mc_iterations=50, +) + +synthetic_2C1D_1C_benchmark = Benchmark( + function=synthetic_2C1D_1C, + best_possible_result=4.09685, + settings=benchmark_config, + optimal_function_inputs=[ + {"x": 1.610, "y": 1.571, "z": 3}, + {"x": 1.610, "y": -4.712, "z": 3}, + ], +) + + +if __name__ == "__main__": + # Visualize the domain + + import matplotlib.pyplot as plt + + X = np.linspace(-2 * pi, 2 * pi) + Y = np.linspace(-2 * pi, 2 * pi) + Z = [1, 2, 3, 4] + + x_mesh, y_mesh = np.meshgrid(X, Y) + + fig = plt.figure(figsize=(10, 10)) + for i, z in enumerate(Z): + ax: Axes3D = fig.add_subplot(2, 2, i + 1, projection="3d") + t_mesh = _lookup(np.asarray(z), x_mesh, y_mesh) + ax.plot_surface(x_mesh, y_mesh, t_mesh) + plt.title(f"{z=}") + + plt.show() diff --git a/benchmarks/result/__init__.py b/benchmarks/result/__init__.py new file mode 100644 index 000000000..fb4380da7 --- /dev/null +++ b/benchmarks/result/__init__.py @@ -0,0 +1,6 @@ +"""Benchmark results.""" + +from benchmarks.result.metadata import ResultMetadata +from benchmarks.result.result import Result + +__all__ = ["Result", "ResultMetadata"] diff --git a/benchmarks/result/metadata.py b/benchmarks/result/metadata.py new file mode 100644 index 000000000..44bfdc24b --- /dev/null +++ b/benchmarks/result/metadata.py @@ -0,0 +1,51 @@ +"""Benchmark result metadata.""" + +from datetime import datetime, timedelta + +import git +from attrs import define, field +from attrs.validators import instance_of +from cattrs.gen import make_dict_unstructure_fn + +from baybe.serialization.core import converter +from baybe.serialization.mixin import SerialMixin + + +@define(frozen=True) +class ResultMetadata(SerialMixin): + """The metadata of a benchmark result.""" + + start_datetime: datetime = field(validator=instance_of(datetime)) + """The start datetime of the benchmark.""" + + duration: timedelta = field(validator=instance_of(timedelta)) + """The time it took to complete the benchmark.""" + + commit_hash: str = field(validator=instance_of(str), init=False) + """The commit hash of the used BayBE code.""" + + latest_baybe_tag: str = field(validator=instance_of(str), init=False) + """The latest BayBE tag reachable in the ancestor commit history.""" + + @commit_hash.default + def _default_commit_hash(self) -> str: + """Extract the git commit hash.""" + repo = git.Repo(search_parent_directories=True) + sha = repo.head.object.hexsha + return sha + + @latest_baybe_tag.default + def _default_latest_baybe_tag(self) -> str: + """Extract the latest reachable BayBE tag.""" + repo = git.Repo(search_parent_directories=True) + latest_tag = repo.git.describe(tags=True, abbrev=0) + return latest_tag + + +# Register un-/structure hooks +converter.register_unstructure_hook( + ResultMetadata, + make_dict_unstructure_fn( + ResultMetadata, converter, _cattrs_include_init_false=True + ), +) diff --git a/benchmarks/result/result.py b/benchmarks/result/result.py new file mode 100644 index 000000000..8fbaa0995 --- /dev/null +++ b/benchmarks/result/result.py @@ -0,0 +1,24 @@ +"""Basic result classes for benchmarking.""" + +from __future__ import annotations + +from attrs import define, field +from attrs.validators import instance_of +from pandas import DataFrame + +from baybe.serialization.mixin import SerialMixin +from benchmarks.result import ResultMetadata + + +@define(frozen=True) +class Result(SerialMixin): + """A single result of the benchmarking.""" + + benchmark_identifier: str = field(validator=instance_of(str)) + """The identifier of the benchmark that produced the result.""" + + data: DataFrame = field(validator=instance_of(DataFrame)) + """The result of the benchmarked callable.""" + + metadata: ResultMetadata = field(validator=instance_of(ResultMetadata)) + """The metadata associated with the benchmark result.""" diff --git a/mypy.ini b/mypy.ini index 2eb5718bf..4521667dd 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,5 @@ [mypy] -packages = baybe +packages = baybe,benchmarks ; Avoid false positives for `type[P]` when `P` is abstract. ; * https://svcs.hynek.me/en/stable/typing-caveats.html#abstract-classes-and-pep-544 @@ -23,6 +23,9 @@ ignore_missing_imports = True [mypy-gpytorch.*] ignore_missing_imports = True +[mypy-git.*] +ignore_missing_imports = True + [mypy-joblib.*] ignore_missing_imports = True diff --git a/pyproject.toml b/pyproject.toml index 69298cc60..fc28a80d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,7 @@ dev = [ "baybe[polars]", "baybe[simulation]", "baybe[test]", + "baybe[benchmarking]", "pip-audit>=2.5.5", "tox-uv>=1.7.0", "uv>=0.3.0", # `uv lock` (for lockfiles) is stable since 0.3.0: https://github.com/astral-sh/uv/issues/2679#event-13950215962 @@ -140,6 +141,13 @@ simulation = [ "xyzpy>=1.2.1", ] +benchmarking = [ + "baybe[chem]", + "baybe[onnx]", + "baybe[simulation]", + "GitPython>=3.0.6", # GitPython<3.0.6 is necessary since older versions rely on a specific version of GitDB: https://github.com/gitpython-developers/GitPython/issues/983 +] + test = [ "hypothesis[pandas]>=6.88.4", "tenacity>=8.5.0",