From d8112405e786380c26e7aedddde5717370ca4d3c Mon Sep 17 00:00:00 2001 From: Di Jin Date: Tue, 2 Jul 2024 11:10:50 +0200 Subject: [PATCH 01/67] Enable cardinality constraint in botorch recommender via sampling inactive parameters --- baybe/recommenders/pure/bayesian/botorch.py | 69 ++++++++++++----- baybe/searchspace/continuous.py | 74 +++++++++++++++++++ .../test_cardinality_constraint_continuous.py | 50 ++++--------- .../test_constraints_continuous.py | 33 +++++++++ 4 files changed, 173 insertions(+), 53 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index a3601c3fa..07703b936 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -21,6 +21,8 @@ sample_numerical_df, ) +N_RESTART_CARDINALITY = 5 + @define(kw_only=True) class BotorchRecommender(BayesianRecommender): @@ -153,25 +155,56 @@ def _recommend_continuous( import torch from botorch.optim import optimize_acqf + from torch import Tensor + + def _recommend_continuous_on_subspace( + _subspace_continuous: SubspaceContinuous + ) -> tuple[Tensor, Tensor]: + """Define a helper function with only one parameter.""" + _points, _acqf_values = optimize_acqf( + acq_function=self._botorch_acqf, + bounds=torch.from_numpy(_subspace_continuous.param_bounds_comp), + q=batch_size, + num_restarts=5, # TODO make choice for num_restarts + raw_samples=10, # TODO make choice for raw_samples + equality_constraints=[ + c.to_botorch(_subspace_continuous.parameters) + for c in _subspace_continuous.constraints_lin_eq + ] + or None, # TODO: https://github.com/pytorch/botorch/issues/2042 + inequality_constraints=[ + c.to_botorch(_subspace_continuous.parameters) + for c in _subspace_continuous.constraints_lin_ineq + ] + or None, # TODO: https://github.com/pytorch/botorch/issues/2042 + sequential=self.sequential_continuous, + ) + return _points, _acqf_values + + if len(subspace_continuous.constraints_cardinality): + acqf_values_all: list[Tensor] = [] + points_all: list[Tensor] = [] + for _ in range(N_RESTART_CARDINALITY): + # Randomly set some parameters inactive + inactive_params_sample = ( + subspace_continuous._sample_inactive_parameters(1)[0] + ) + # Create a new subspace + subspace_renewed = subspace_continuous._ensure_nonzero_parameters( + inactive_params_sample + ) - points, _ = optimize_acqf( - acq_function=self._botorch_acqf, - bounds=torch.from_numpy(subspace_continuous.param_bounds_comp), - q=batch_size, - num_restarts=5, # TODO make choice for num_restarts - raw_samples=10, # TODO make choice for raw_samples - equality_constraints=[ - c.to_botorch(subspace_continuous.parameters) - for c in subspace_continuous.constraints_lin_eq - ] - or None, # TODO: https://github.com/pytorch/botorch/issues/2042 - inequality_constraints=[ - c.to_botorch(subspace_continuous.parameters) - for c in subspace_continuous.constraints_lin_ineq - ] - or None, # TODO: https://github.com/pytorch/botorch/issues/2042 - sequential=self.sequential_continuous, - ) + ( + points_all_i, + acqf_values_i, + ) = _recommend_continuous_on_subspace( + subspace_renewed, + ) + points_all.append(points_all_i.unsqueeze(0)) + acqf_values_all.append(acqf_values_i.unsqueeze(0)) + points = torch.cat(points_all)[torch.argmax(torch.cat(acqf_values_all)), :] + else: + points, _ = _recommend_continuous_on_subspace(subspace_continuous) # Return optimized points as dataframe rec = pd.DataFrame(points, columns=subspace_continuous.param_names) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 5b687d668..0bdc7eb9d 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -36,6 +36,7 @@ from baybe.searchspace.core import SearchSpace _MAX_CARDINALITY_SAMPLING_ATTEMPTS = 10_000 +ZERO_THRESHOLD = 1e-5 @define @@ -247,6 +248,14 @@ def param_names(self) -> tuple[str, ...]: """Return list of parameter names.""" return tuple(p.name for p in self.parameters) + @property + def param_names_in_cardinality_constraint(self) -> tuple[str, ...]: + """Return list of parameter names involved in cardinality constraints.""" + params_per_cardinatliy_constraint = [ + c.parameters for c in self.constraints_cardinality + ] + return tuple(chain(*params_per_cardinatliy_constraint)) + @property def param_bounds_comp(self) -> np.ndarray: """Return bounds as numpy array.""" @@ -454,6 +463,71 @@ def _sample_inactive_parameters(self, batch_size: int = 1) -> list[set[str]]: ] return [set(chain(*x)) for x in zip(*inactives_per_constraint)] + def _ensure_nonzero_parameters( + self, + inactive_parameters: Collection[str], + zero_threshold: float = ZERO_THRESHOLD, + ) -> SubspaceContinuous: + """Create a new subspace with following several actions. + + * Ensure inactive parameter = 0.0. + * Ensure active parameter != 0.0. + * Remove cardinality constraint. + + Args: + inactive_parameters: A list of inactive parameters. + zero_threshold: Threshold for checking whether a value is zero. + + Returns: + A new subspace object. + """ + # Active parameters: parameters involved in cardinality constraints + active_params_sample = set( + self.param_names_in_cardinality_constraint + ).difference(set(inactive_parameters)) + + constraints_lin_ineq = list(self.constraints_lin_ineq) + for active_param in active_params_sample: + index = self.param_names.index(active_param) + + # Ensure x != 0 when bounds = [..., 0]. This is needed, otherwise + # the minimum cardinality constraint is easily violated + # TODO: Ensure x != 0 when x in [..., 0, ...] is not done + # TODO: To ensure the minimum cardinaltiy constraints, shall we keep the x + # != 0 operations or shall we have instead skip the invalid results + if self.parameters[index].bounds.upper == 0: + constraints_lin_ineq.append( + ContinuousLinearInequalityConstraint( + parameters=[active_param], + coefficients=[-1.0], + rhs=min(zero_threshold, -self.parameters[index].bounds.lower), + ) + ) + # Ensure x != 0 when bounds = [0, ...] + elif self.parameters[index].bounds.lower == 0: + constraints_lin_ineq.append( + ContinuousLinearInequalityConstraint( + parameters=[active_param], + coefficients=[1.0], + rhs=min(zero_threshold, self.parameters[index].bounds.upper), + ), + ) + + # Ensure inactive parameters must be 0 + constraints_lin_eq = list(self.constraints_lin_eq) + for inactive_param in inactive_parameters: + constraints_lin_eq.append( + ContinuousLinearEqualityConstraint( + parameters=[inactive_param], coefficients=[1.0], rhs=0.0 + ) + ) + + return SubspaceContinuous( + parameters=tuple(self.parameters), + constraints_lin_eq=tuple(constraints_lin_eq), + constraints_lin_ineq=tuple(constraints_lin_ineq), + ) + def samples_full_factorial(self, n_points: int = 1) -> pd.DataFrame: """Deprecated!""" # noqa: D401 warnings.warn( diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index 0d45f5a5f..7afb60b7e 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -11,13 +11,16 @@ ContinuousLinearEqualityConstraint, ContinuousLinearInequalityConstraint, ) -from baybe.parameters import NumericalContinuousParameter -from baybe.recommenders.pure.nonpredictive.sampling import RandomRecommender +from baybe.parameters.numerical import NumericalContinuousParameter from baybe.searchspace.core import SearchSpace, SubspaceContinuous def _validate_samples( - samples: pd.DataFrame, max_cardinality: int, min_cardinality: int, batch_size: int + samples: pd.DataFrame, + max_cardinality: int, + min_cardinality: int, + batch_size: int, + threshold: float = 0.0, ): """Validate if cardinality-constrained samples fulfill the necessary conditions. @@ -31,16 +34,21 @@ def _validate_samples( max_cardinality: Maximum allowed cardinality min_cardinality: Minimum required cardinality batch_size: Requested batch size + threshold: Threshold for checking whether a value is treated as zero. """ # Assert that cardinality constraint is fulfilled - n_nonzero = np.sum(~np.isclose(samples, 0.0), axis=1) + n_nonzero = np.sum(samples.abs().ge(threshold), axis=1) + # n_nonzero = np.sum(~np.isclose(samples, 0.0, rtol=threshold), axis=1) assert np.all(n_nonzero >= min_cardinality) and np.all(n_nonzero <= max_cardinality) # Assert that we obtain as many samples as requested - assert len(samples) == batch_size + assert samples.shape[0] == batch_size - # If there are duplicates, they must all come from the case cardinality = 0 - assert np.all(samples[samples.duplicated()] == 0.0) + # If all rows are duplicates of the first row, they must all come from the case + # cardinality = 0 (all rows are zeros) + all_zero_rows = (samples == 0).all(axis=1) + duplicated_rows = samples.duplicated() + assert ~np.all(duplicated_rows[1:]) | np.all(all_zero_rows) # Combinations of cardinalities to be tested @@ -138,31 +146,3 @@ def test_polytope_sampling_with_cardinality_constraint(): .ge(rhs_inequality - TOLERANCE) .all() ) - - -@pytest.mark.parametrize( - "parameter_names", [["Conti_finite1", "Conti_finite2", "Conti_finite3"]] -) -@pytest.mark.parametrize("constraint_names", [["ContiConstraint_5"]]) -@pytest.mark.parametrize("batch_size", [5], ids=["b5"]) -def test_random_recommender_with_cardinality_constraint( - parameters: list[NumericalContinuousParameter], - constraints: list[ContinuousCardinalityConstraint], - batch_size: int, -): - """Recommendations generated by a `RandomRecommender` under a cardinality constraint - have the expected number of nonzero elements.""" # noqa - - searchspace = SearchSpace.from_product( - parameters=parameters, constraints=constraints - ) - recommender = RandomRecommender() - recommendations = recommender.recommend( - searchspace=searchspace, - batch_size=batch_size, - ) - - # Assert that conditions listed in_validate_samples() are fulfilled - _validate_samples( - recommendations, max_cardinality=2, min_cardinality=1, batch_size=batch_size - ) diff --git a/tests/constraints/test_constraints_continuous.py b/tests/constraints/test_constraints_continuous.py index 6b850b3c7..8d03726ff 100644 --- a/tests/constraints/test_constraints_continuous.py +++ b/tests/constraints/test_constraints_continuous.py @@ -7,7 +7,9 @@ ContinuousLinearEqualityConstraint, ContinuousLinearInequalityConstraint, ) +from baybe.searchspace.continuous import ZERO_THRESHOLD from tests.conftest import run_iterations +from tests.constraints.test_cardinality_constraint_continuous import _validate_samples @pytest.mark.parametrize("parameter_names", [["Conti_finite1", "Conti_finite2"]]) @@ -58,6 +60,37 @@ def test_inequality2(campaign, n_iterations, batch_size): assert (1.0 * res["Conti_finite1"] + 3.0 * res["Conti_finite2"]).ge(0.299).all() +@pytest.mark.slow +@pytest.mark.parametrize( + "parameter_names", [["Conti_finite1", "Conti_finite2", "Conti_finite3"]] +) +@pytest.mark.parametrize("constraint_names", [["ContiConstraint_5"]]) +@pytest.mark.parametrize("batch_size", [5], ids=["b5"]) +def test_cardinality_constraint(campaign, n_iterations, batch_size): + """Test cardinality constraint for both random recommender and botorch + recommender.""" # noqa + + MIN_CARDINALITY = 0 + MAX_CARDINALITY = 2 + run_iterations(campaign, n_iterations, batch_size, add_noise=False) + recommendations = campaign.measurements + + print(recommendations) + + # Assert that conditions listed in_validate_samples() are fulfilled + for i_batch in range(2): + _validate_samples( + recommendations.loc[ + 0 + i_batch * batch_size : (i_batch + 1) * batch_size - 1, + ["Conti_finite1", "Conti_finite2", "Conti_finite3"], + ], + max_cardinality=MAX_CARDINALITY, + min_cardinality=MIN_CARDINALITY, + batch_size=batch_size, + threshold=ZERO_THRESHOLD, + ) + + @pytest.mark.slow @pytest.mark.parametrize( "parameter_names", From da813f5b64d2747e5d51bb6539e21946a51b3fbc Mon Sep 17 00:00:00 2001 From: Di Jin Date: Tue, 2 Jul 2024 15:00:12 +0200 Subject: [PATCH 02/67] Make inactive parameters fixed features --- baybe/recommenders/pure/bayesian/botorch.py | 20 ++++++++++++++++++-- baybe/searchspace/continuous.py | 18 ++++-------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 07703b936..c531c4833 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -158,15 +158,17 @@ def _recommend_continuous( from torch import Tensor def _recommend_continuous_on_subspace( - _subspace_continuous: SubspaceContinuous + _subspace_continuous: SubspaceContinuous, + _fixed_parameters: dict[int, float] | None = None, ) -> tuple[Tensor, Tensor]: - """Define a helper function with only one parameter.""" + """Define a helper function on a subset of parameters.""" _points, _acqf_values = optimize_acqf( acq_function=self._botorch_acqf, bounds=torch.from_numpy(_subspace_continuous.param_bounds_comp), q=batch_size, num_restarts=5, # TODO make choice for num_restarts raw_samples=10, # TODO make choice for raw_samples + fixed_features=_fixed_parameters, equality_constraints=[ c.to_botorch(_subspace_continuous.parameters) for c in _subspace_continuous.constraints_lin_eq @@ -189,6 +191,19 @@ def _recommend_continuous_on_subspace( inactive_params_sample = ( subspace_continuous._sample_inactive_parameters(1)[0] ) + + if len(inactive_params_sample): + # Turn inactive parameters to fixed features (used as input in + # optimize_acqf()) + indices_inactive_params = [ + subspace_continuous.param_names.index(key) + for key in subspace_continuous.param_names + if key in inactive_params_sample + ] + fixed_parameters = {ind: 0.0 for ind in indices_inactive_params} + else: + fixed_parameters = None + # Create a new subspace subspace_renewed = subspace_continuous._ensure_nonzero_parameters( inactive_params_sample @@ -199,6 +214,7 @@ def _recommend_continuous_on_subspace( acqf_values_i, ) = _recommend_continuous_on_subspace( subspace_renewed, + fixed_parameters, ) points_all.append(points_all_i.unsqueeze(0)) acqf_values_all.append(acqf_values_i.unsqueeze(0)) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 0bdc7eb9d..4c49eae19 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -470,7 +470,6 @@ def _ensure_nonzero_parameters( ) -> SubspaceContinuous: """Create a new subspace with following several actions. - * Ensure inactive parameter = 0.0. * Ensure active parameter != 0.0. * Remove cardinality constraint. @@ -490,11 +489,11 @@ def _ensure_nonzero_parameters( for active_param in active_params_sample: index = self.param_names.index(active_param) + # TODO: Ensure x != 0 when x in [..., 0, ...] is not done. Do we need it? + # TODO: To ensure the minimum cardinality constraints, shall we keep the x + # != 0 operations or shall we instead skip the invalid results at the end # Ensure x != 0 when bounds = [..., 0]. This is needed, otherwise # the minimum cardinality constraint is easily violated - # TODO: Ensure x != 0 when x in [..., 0, ...] is not done - # TODO: To ensure the minimum cardinaltiy constraints, shall we keep the x - # != 0 operations or shall we have instead skip the invalid results if self.parameters[index].bounds.upper == 0: constraints_lin_ineq.append( ContinuousLinearInequalityConstraint( @@ -513,18 +512,9 @@ def _ensure_nonzero_parameters( ), ) - # Ensure inactive parameters must be 0 - constraints_lin_eq = list(self.constraints_lin_eq) - for inactive_param in inactive_parameters: - constraints_lin_eq.append( - ContinuousLinearEqualityConstraint( - parameters=[inactive_param], coefficients=[1.0], rhs=0.0 - ) - ) - return SubspaceContinuous( parameters=tuple(self.parameters), - constraints_lin_eq=tuple(constraints_lin_eq), + constraints_lin_eq=self.constraints_lin_eq, constraints_lin_ineq=tuple(constraints_lin_ineq), ) From adf5cc2c3daa7979de9c5dde5c1ae53a81294e3f Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 4 Jul 2024 11:06:55 +0200 Subject: [PATCH 03/67] Fix bug in test file --- .../test_cardinality_constraint_continuous.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index 7afb60b7e..765084131 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -37,8 +37,13 @@ def _validate_samples( threshold: Threshold for checking whether a value is treated as zero. """ # Assert that cardinality constraint is fulfilled - n_nonzero = np.sum(samples.abs().ge(threshold), axis=1) - # n_nonzero = np.sum(~np.isclose(samples, 0.0, rtol=threshold), axis=1) + if threshold == 0.0: + # When threshold is zero, abs(value) > threshold is treated as non-zero. + n_nonzero = len(samples.columns) - np.sum(samples.abs().le(threshold), axis=1) + else: + # When threshold is non-zero, abs(value) >= threshold is treated as non-zero. + n_nonzero = np.sum(samples.abs().ge(threshold), axis=1) + assert np.all(n_nonzero >= min_cardinality) and np.all(n_nonzero <= max_cardinality) # Assert that we obtain as many samples as requested From e69ceffaa3757f7900031367600eb29e837f0e45 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Tue, 2 Jul 2024 17:08:19 +0200 Subject: [PATCH 04/67] Validate bounds of cardinality constraint parameters --- baybe/constraints/validation.py | 45 ++++++++++++++++++++++++++++++++- baybe/searchspace/continuous.py | 4 +++ tests/test_searchspace.py | 19 ++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/baybe/constraints/validation.py b/baybe/constraints/validation.py index f9c34f9aa..f8c872414 100644 --- a/baybe/constraints/validation.py +++ b/baybe/constraints/validation.py @@ -1,6 +1,6 @@ """Validation functionality for constraints.""" -from collections.abc import Collection +from collections.abc import Collection, Sequence from itertools import combinations from baybe.constraints.base import Constraint @@ -8,6 +8,7 @@ from baybe.constraints.discrete import ( DiscreteDependenciesConstraint, ) +from baybe.parameters import NumericalContinuousParameter from baybe.parameters.base import Parameter @@ -26,6 +27,8 @@ def validate_constraints( # noqa: DOC101, DOC103 ValueError: If any discrete constraint includes a continuous parameter. ValueError: If any discrete constraint that is valid only for numerical discrete parameters includes non-numerical discrete parameters. + ValueError: If the bounds of any parameter in a cardinality constraint does + not cover zero. """ if sum(isinstance(itm, DiscreteDependenciesConstraint) for itm in constraints) > 1: raise ValueError( @@ -41,6 +44,9 @@ def validate_constraints( # noqa: DOC101, DOC103 param_names_discrete = [p.name for p in parameters if p.is_discrete] param_names_continuous = [p.name for p in parameters if p.is_continuous] param_names_non_numerical = [p.name for p in parameters if not p.is_numerical] + params_continuous: list[NumericalContinuousParameter] = [ + p for p in parameters if p.is_continuous + ] for constraint in constraints: if not all(p in param_names_all for p in constraint.parameters): @@ -78,6 +84,11 @@ def validate_constraints( # noqa: DOC101, DOC103 f"Parameter list of the affected constraint: {constraint.parameters}." ) + if isinstance(constraint, ContinuousCardinalityConstraint): + validate_parameters_bounds_in_cardinality_constraint( + params_continuous, constraint + ) + def validate_cardinality_constraints_are_nonoverlapping( constraints: Collection[ContinuousCardinalityConstraint], @@ -98,3 +109,35 @@ def validate_cardinality_constraints_are_nonoverlapping( f"cannot share the same parameters. Found the following overlapping " f"parameter sets: {s1}, {s2}." ) + + +def validate_parameters_bounds_in_cardinality_constraint( + parameters: Sequence[NumericalContinuousParameter], + constraint: ContinuousCardinalityConstraint, +) -> None: + """Validate that the bounds of all parameters in a cardinality constraint cover + zero. + + Args: + parameters: A collection of continuous numerical parameters. + constraint: A continuous cardinality constraint. + + Raises: + ValueError: If the bounds of any parameter of a constraint does not cover zero. + """ # noqa D205 + param_names = [p.name for p in parameters] + for param_in_constraint in constraint.parameters: + # Note that this implementation checks implicitly that all constraint + # parameters must be included in the list of parameters. Otherwise Runtime + # error occurs. + if ( + param := parameters[param_names.index(param_in_constraint)] + ) and not param.is_in_range(0.0): + raise ValueError( + f"The bounds of all parameters in a constraint of type " + f"`{ContinuousCardinalityConstraint.__name__}` must cover " + f"zero. Either correct the parameter ({param}) bounds:" + f" {param.bounds=} or remove the parameter {param} from the " + f"{constraint=} and update the minimum/maximum cardinality " + f"accordingly." + ) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 4c49eae19..cae12caac 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -19,6 +19,7 @@ from baybe.constraints.base import ContinuousConstraint, ContinuousNonlinearConstraint from baybe.constraints.validation import ( validate_cardinality_constraints_are_nonoverlapping, + validate_parameters_bounds_in_cardinality_constraint, ) from baybe.parameters import NumericalContinuousParameter from baybe.parameters.base import ContinuousParameter @@ -118,6 +119,9 @@ def _validate_constraints_nonlin(self, _, __) -> None: self.constraints_cardinality ) + for con in self.constraints_cardinality: + validate_parameters_bounds_in_cardinality_constraint(self.parameters, con) + def to_searchspace(self) -> SearchSpace: """Turn the subspace into a search space with no discrete part.""" from baybe.searchspace.core import SearchSpace diff --git a/tests/test_searchspace.py b/tests/test_searchspace.py index 71190f83d..73359943f 100644 --- a/tests/test_searchspace.py +++ b/tests/test_searchspace.py @@ -281,3 +281,22 @@ def test_cardinality_constraints_with_overlapping_parameters(): ), ), ) + + +def test_cardinality_constraint_with_invalid_parameter_bounds(): + """Impose a cardinality constraint on a parameter whose valid area does not + include zero raises an error.""" # noqa + parameters = ( + NumericalContinuousParameter("c1", (0, 1)), + NumericalContinuousParameter("c2", (1, 2)), + ) + with pytest.raises(ValueError, match="must cover zero"): + SubspaceContinuous( + parameters=parameters, + constraints_nonlin=( + ContinuousCardinalityConstraint( + parameters=["c1", "c2"], + max_cardinality=1, + ), + ), + ) From 2270350c369cbeaf1e37c39551aea1af040208e3 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Wed, 3 Jul 2024 09:36:25 +0200 Subject: [PATCH 05/67] Add second option: iterate through combinatorial list --- baybe/constraints/continuous.py | 24 ++++++++++++++ baybe/recommenders/pure/bayesian/botorch.py | 33 ++++++++++++++++--- baybe/searchspace/continuous.py | 35 +++++++++++++++++++-- 3 files changed, 85 insertions(+), 7 deletions(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index db128e012..089c3df06 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -1,6 +1,8 @@ """Continuous constraints.""" import math +from itertools import combinations +from math import comb import numpy as np from attrs import define @@ -46,6 +48,28 @@ class ContinuousCardinalityConstraint( ): """Class for continuous cardinality constraints.""" + @property + def combinatorial_counts_zero_parameters(self) -> int: + """Return the total number of all possible combinations of zero parameters.""" + combinatorial_counts = 0 + for i_zeros in range( + len(self.parameters) - self.max_cardinality, + len(self.parameters) - self.min_cardinality + 1, + ): + combinatorial_counts += comb(len(self.parameters), i_zeros) + return combinatorial_counts + + @property + def combinatorial_zero_parameters(self) -> list[tuple[str, ...]]: + """Return a combinatorial list of all possible zero parameters.""" + combinatorial_zeros = [] + for i_zeros in range( + len(self.parameters) - self.max_cardinality, + len(self.parameters) - self.min_cardinality + 1, + ): + combinatorial_zeros.extend(combinations(self.parameters, i_zeros)) + return combinatorial_zeros + def sample_inactive_parameters(self, batch_size: int = 1) -> list[set[str]]: """Sample sets of inactive parameters according to the cardinality constraints. diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index c531c4833..fdb9be536 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -21,7 +21,7 @@ sample_numerical_df, ) -N_RESTART_CARDINALITY = 5 +N_ITER_THRESHOLD = 10 @define(kw_only=True) @@ -186,11 +186,34 @@ def _recommend_continuous_on_subspace( if len(subspace_continuous.constraints_cardinality): acqf_values_all: list[Tensor] = [] points_all: list[Tensor] = [] - for _ in range(N_RESTART_CARDINALITY): - # Randomly set some parameters inactive - inactive_params_sample = ( - subspace_continuous._sample_inactive_parameters(1)[0] + + # When the size of the full list of inactive parameters is not too large, + # we can iterate through the full list; otherwise we randomly set some + # parameters inactive. + _iterator = ( + subspace_continuous.combinatorial_zero_parameters + if ( + combinatorial_counts + := subspace_continuous.combinatorial_counts_zero_parameters ) + <= N_ITER_THRESHOLD + else range(N_ITER_THRESHOLD) + ) + + for inactive_params_generator in _iterator: + if combinatorial_counts <= N_ITER_THRESHOLD: + # Iterate through the combinations of all possible inactive + # parameters. + inactive_params_sample = { + param + for sublist in inactive_params_generator + for param in sublist + } + else: + # Randomly set some parameters inactive + inactive_params_sample = ( + subspace_continuous._sample_inactive_parameters(1)[0] + ) if len(inactive_params_sample): # Turn inactive parameters to fixed features (used as input in diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index cae12caac..885467419 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -3,8 +3,9 @@ from __future__ import annotations import warnings -from collections.abc import Collection, Sequence -from itertools import chain +from collections.abc import Collection, Iterable, Sequence +from functools import reduce +from itertools import chain, product from typing import TYPE_CHECKING, Any, cast import numpy as np @@ -111,6 +112,36 @@ def constraints_cardinality(self) -> tuple[ContinuousCardinalityConstraint, ...] if isinstance(c, ContinuousCardinalityConstraint) ) + @property + def combinatorial_counts_zero_parameters(self) -> int: + """Return the total number of all possible combinations of zero parameters.""" + # Note that both continuous subspace and continuous cardinality constraint + # have this property. This property is the counts for the subspace + # parameters; while the latter one is the counts only for that constraint. + if self.constraints_cardinality: + return reduce( + lambda x, y: x * y, + [ + con.combinatorial_counts_zero_parameters + for con in self.constraints_cardinality + ], + ) + else: + return 0 + + @property + def combinatorial_zero_parameters(self) -> Iterable[tuple[str, ...]]: + """Return a combinatorial list of all possible zero parameters on subspace.""" + # The comments on the difference in `combinatorial_counts_zero_parameters` + # applies here as well. + if self.constraints_cardinality: + return product( + *[ + con.combinatorial_zero_parameters + for con in self.constraints_cardinality + ] + ) + @constraints_nonlin.validator def _validate_constraints_nonlin(self, _, __) -> None: """Validate nonlinear constraints.""" From 6483e4bc1a7cd294d2f1aec3542c30586903cc77 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Wed, 3 Jul 2024 21:53:36 +0200 Subject: [PATCH 06/67] Fix type error --- baybe/constraints/continuous.py | 2 +- baybe/constraints/validation.py | 2 +- baybe/recommenders/pure/bayesian/botorch.py | 51 ++++++++++++++------- baybe/searchspace/continuous.py | 6 ++- 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index 089c3df06..6a97e9661 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -62,7 +62,7 @@ def combinatorial_counts_zero_parameters(self) -> int: @property def combinatorial_zero_parameters(self) -> list[tuple[str, ...]]: """Return a combinatorial list of all possible zero parameters.""" - combinatorial_zeros = [] + combinatorial_zeros: list[tuple[str, ...]] = [] for i_zeros in range( len(self.parameters) - self.max_cardinality, len(self.parameters) - self.min_cardinality + 1, diff --git a/baybe/constraints/validation.py b/baybe/constraints/validation.py index f8c872414..61512fac6 100644 --- a/baybe/constraints/validation.py +++ b/baybe/constraints/validation.py @@ -45,7 +45,7 @@ def validate_constraints( # noqa: DOC101, DOC103 param_names_continuous = [p.name for p in parameters if p.is_continuous] param_names_non_numerical = [p.name for p in parameters if not p.is_numerical] params_continuous: list[NumericalContinuousParameter] = [ - p for p in parameters if p.is_continuous + p for p in parameters if isinstance(p, NumericalContinuousParameter) ] for constraint in constraints: diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index fdb9be536..e86e9fc93 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -1,6 +1,7 @@ """Botorch recommender.""" import math +from collections.abc import Iterable from typing import Any, ClassVar import pandas as pd @@ -142,6 +143,7 @@ def _recommend_continuous( Raises: NoMCAcquisitionFunctionError: If a non-Monte Carlo acquisition function is used with a batch size > 1. + RuntimeError: If the combinatorial list of inactive parameters is None. Returns: A dataframe containing the recommendations as individual rows. @@ -187,21 +189,41 @@ def _recommend_continuous_on_subspace( acqf_values_all: list[Tensor] = [] points_all: list[Tensor] = [] - # When the size of the full list of inactive parameters is not too large, - # we can iterate through the full list; otherwise we randomly set some - # parameters inactive. - _iterator = ( - subspace_continuous.combinatorial_zero_parameters - if ( - combinatorial_counts - := subspace_continuous.combinatorial_counts_zero_parameters + # The key steps of handling cardinality constraint are + # * Determine several configurations of inactive parameters based on the + # cardinality constraints. + # * Optimize the acquisition function for different configurations and + # pick the best one. + # There are two mechanisms for inactive parameter configurations. The + # full list of different inactive parameter configurations is used, + # when its size is not too large; otherwise we randomly pick a + # fixed number of inactive parameter configurations. + + # Create an iterable that either iterates through range() or iterates + # through the full list configuration. + if ( + subspace_continuous.combinatorial_counts_zero_parameters + > N_ITER_THRESHOLD + ): + _iterator: Iterable[tuple[tuple[str, ...], ...]] | range = range( + N_ITER_THRESHOLD + ) + elif subspace_continuous.combinatorial_zero_parameters is not None: + _iterator = subspace_continuous.combinatorial_zero_parameters + else: + raise RuntimeError( + f"The attribute" + f"{SubspaceContinuous.combinatorial_zero_parameters.__name__}" + f"should not be None." ) - <= N_ITER_THRESHOLD - else range(N_ITER_THRESHOLD) - ) for inactive_params_generator in _iterator: - if combinatorial_counts <= N_ITER_THRESHOLD: + if isinstance(inactive_params_generator, int): + # Randomly set some parameters inactive + inactive_params_sample = ( + subspace_continuous._sample_inactive_parameters(1)[0] + ) + else: # Iterate through the combinations of all possible inactive # parameters. inactive_params_sample = { @@ -209,11 +231,6 @@ def _recommend_continuous_on_subspace( for sublist in inactive_params_generator for param in sublist } - else: - # Randomly set some parameters inactive - inactive_params_sample = ( - subspace_continuous._sample_inactive_parameters(1)[0] - ) if len(inactive_params_sample): # Turn inactive parameters to fixed features (used as input in diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 885467419..95703f137 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -130,7 +130,9 @@ def combinatorial_counts_zero_parameters(self) -> int: return 0 @property - def combinatorial_zero_parameters(self) -> Iterable[tuple[str, ...]]: + def combinatorial_zero_parameters( + self + ) -> Iterable[tuple[tuple[str, ...], ...]] | None: """Return a combinatorial list of all possible zero parameters on subspace.""" # The comments on the difference in `combinatorial_counts_zero_parameters` # applies here as well. @@ -141,6 +143,8 @@ def combinatorial_zero_parameters(self) -> Iterable[tuple[str, ...]]: for con in self.constraints_cardinality ] ) + else: + return None @constraints_nonlin.validator def _validate_constraints_nonlin(self, _, __) -> None: From ae919d4c82a24cc0ed84a90ec5194e8ac7ea19d9 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 4 Jul 2024 10:34:27 +0200 Subject: [PATCH 07/67] Revise botorch+cardinality constraint for enhanced clarity --- baybe/recommenders/pure/bayesian/botorch.py | 123 ++++++++++++-------- 1 file changed, 72 insertions(+), 51 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index e86e9fc93..455d1c00d 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -1,7 +1,6 @@ """Botorch recommender.""" import math -from collections.abc import Iterable from typing import Any, ClassVar import pandas as pd @@ -159,18 +158,44 @@ def _recommend_continuous( from botorch.optim import optimize_acqf from torch import Tensor - def _recommend_continuous_on_subspace( + def _recommend_continuous_with_inactive_parameters( _subspace_continuous: SubspaceContinuous, - _fixed_parameters: dict[int, float] | None = None, + inactive_parameters: tuple[str, ...] | None = None, ) -> tuple[Tensor, Tensor]: - """Define a helper function on a subset of parameters.""" + """Define a helper function that can deal with inactive parameters.""" + if _subspace_continuous.constraints_cardinality: + # When there are cardinality constraints present. + if inactive_parameters is None: + # When no parameters are constrained to zeros + inactive_parameters = () + fixed_parameters = None + else: + # When certain parameters are constrained to zeros. + + # Cast the inactive parameters to the format of fixed features used + # in optimize_acqf()) + indices_inactive_params = [ + _subspace_continuous.param_names.index(key) + for key in _subspace_continuous.param_names + if key in inactive_parameters + ] + fixed_parameters = {ind: 0.0 for ind in indices_inactive_params} + + # Create a new subspace by ensuring all active parameters are non-zeros + _subspace_continuous = _subspace_continuous._ensure_nonzero_parameters( + inactive_parameters + ) + else: + # When there is no cardinality constraint + fixed_parameters = None + _points, _acqf_values = optimize_acqf( acq_function=self._botorch_acqf, bounds=torch.from_numpy(_subspace_continuous.param_bounds_comp), q=batch_size, num_restarts=5, # TODO make choice for num_restarts raw_samples=10, # TODO make choice for raw_samples - fixed_features=_fixed_parameters, + fixed_features=fixed_parameters, equality_constraints=[ c.to_botorch(_subspace_continuous.parameters) for c in _subspace_continuous.constraints_lin_eq @@ -194,73 +219,69 @@ def _recommend_continuous_on_subspace( # cardinality constraints. # * Optimize the acquisition function for different configurations and # pick the best one. - # There are two mechanisms for inactive parameter configurations. The + # There are two mechanisms for the inactive parameter configurations. The # full list of different inactive parameter configurations is used, # when its size is not too large; otherwise we randomly pick a # fixed number of inactive parameter configurations. - # Create an iterable that either iterates through range() or iterates - # through the full list configuration. if ( subspace_continuous.combinatorial_counts_zero_parameters > N_ITER_THRESHOLD ): - _iterator: Iterable[tuple[tuple[str, ...], ...]] | range = range( - N_ITER_THRESHOLD - ) - elif subspace_continuous.combinatorial_zero_parameters is not None: - _iterator = subspace_continuous.combinatorial_zero_parameters - else: - raise RuntimeError( - f"The attribute" - f"{SubspaceContinuous.combinatorial_zero_parameters.__name__}" - f"should not be None." - ) - - for inactive_params_generator in _iterator: - if isinstance(inactive_params_generator, int): - # Randomly set some parameters inactive + # When the size of full list is too large, randomly set some + # parameters inactive. + for _ in range(N_ITER_THRESHOLD): inactive_params_sample = ( subspace_continuous._sample_inactive_parameters(1)[0] ) - else: - # Iterate through the combinations of all possible inactive - # parameters. + + ( + points_i, + acqf_values_i, + ) = _recommend_continuous_with_inactive_parameters( + subspace_continuous, + tuple(inactive_params_sample), + ) + + points_all.append(points_i.unsqueeze(0)) + acqf_values_all.append(acqf_values_i.unsqueeze(0)) + + elif subspace_continuous.combinatorial_zero_parameters is not None: + # When the size of full list is not too large, iterate the combinations + # of all possible inactive parameters. + for ( + inactive_params_generator + ) in subspace_continuous.combinatorial_zero_parameters: + # flatten inactive parameters inactive_params_sample = { param for sublist in inactive_params_generator for param in sublist } - if len(inactive_params_sample): - # Turn inactive parameters to fixed features (used as input in - # optimize_acqf()) - indices_inactive_params = [ - subspace_continuous.param_names.index(key) - for key in subspace_continuous.param_names - if key in inactive_params_sample - ] - fixed_parameters = {ind: 0.0 for ind in indices_inactive_params} - else: - fixed_parameters = None - - # Create a new subspace - subspace_renewed = subspace_continuous._ensure_nonzero_parameters( - inactive_params_sample - ) + ( + points_i, + acqf_values_i, + ) = _recommend_continuous_with_inactive_parameters( + subspace_continuous, + tuple(inactive_params_sample), + ) - ( - points_all_i, - acqf_values_i, - ) = _recommend_continuous_on_subspace( - subspace_renewed, - fixed_parameters, + points_all.append(points_i.unsqueeze(0)) + acqf_values_all.append(acqf_values_i.unsqueeze(0)) + else: + raise RuntimeError( + f"The attribute" + f"{SubspaceContinuous.combinatorial_zero_parameters.__name__}" + f"should not be None." ) - points_all.append(points_all_i.unsqueeze(0)) - acqf_values_all.append(acqf_values_i.unsqueeze(0)) + # Find the best option points = torch.cat(points_all)[torch.argmax(torch.cat(acqf_values_all)), :] else: - points, _ = _recommend_continuous_on_subspace(subspace_continuous) + # When there is no cardinality constraint + points, _ = _recommend_continuous_with_inactive_parameters( + subspace_continuous + ) # Return optimized points as dataframe rec = pd.DataFrame(points, columns=subspace_continuous.param_names) From 9ab8fda7427091d092661f8707cd3ffe53904689 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 11 Jul 2024 10:17:54 +0200 Subject: [PATCH 08/67] Fix property names and its docstrings --- baybe/constraints/continuous.py | 20 ++++++++++---------- baybe/recommenders/pure/bayesian/botorch.py | 8 ++++---- baybe/searchspace/continuous.py | 19 ++++++++++--------- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index 6a97e9661..479d0f420 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -49,26 +49,26 @@ class ContinuousCardinalityConstraint( """Class for continuous cardinality constraints.""" @property - def combinatorial_counts_zero_parameters(self) -> int: - """Return the total number of all possible combinations of zero parameters.""" - combinatorial_counts = 0 + def n_combinatorial_inactive_parameters(self) -> int: + """Counts of elements in the combinatorial list of inactive parameters.""" + n_combinatorial_inactive_params = 0 for i_zeros in range( len(self.parameters) - self.max_cardinality, len(self.parameters) - self.min_cardinality + 1, ): - combinatorial_counts += comb(len(self.parameters), i_zeros) - return combinatorial_counts + n_combinatorial_inactive_params += comb(len(self.parameters), i_zeros) + return n_combinatorial_inactive_params @property - def combinatorial_zero_parameters(self) -> list[tuple[str, ...]]: - """Return a combinatorial list of all possible zero parameters.""" - combinatorial_zeros: list[tuple[str, ...]] = [] + def combinatorial_inactive_parameters(self) -> list[tuple[str, ...]]: + """Combinatorial list of inactive parameters.""" + combinatorial_inactive_params: list[tuple[str, ...]] = [] for i_zeros in range( len(self.parameters) - self.max_cardinality, len(self.parameters) - self.min_cardinality + 1, ): - combinatorial_zeros.extend(combinations(self.parameters, i_zeros)) - return combinatorial_zeros + combinatorial_inactive_params.extend(combinations(self.parameters, i_zeros)) + return combinatorial_inactive_params def sample_inactive_parameters(self, batch_size: int = 1) -> list[set[str]]: """Sample sets of inactive parameters according to the cardinality constraints. diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 455d1c00d..7bedde9f6 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -225,7 +225,7 @@ def _recommend_continuous_with_inactive_parameters( # fixed number of inactive parameter configurations. if ( - subspace_continuous.combinatorial_counts_zero_parameters + subspace_continuous.n_combinatorial_inactive_parameters > N_ITER_THRESHOLD ): # When the size of full list is too large, randomly set some @@ -246,12 +246,12 @@ def _recommend_continuous_with_inactive_parameters( points_all.append(points_i.unsqueeze(0)) acqf_values_all.append(acqf_values_i.unsqueeze(0)) - elif subspace_continuous.combinatorial_zero_parameters is not None: + elif subspace_continuous.combinatorial_inactive_parameters is not None: # When the size of full list is not too large, iterate the combinations # of all possible inactive parameters. for ( inactive_params_generator - ) in subspace_continuous.combinatorial_zero_parameters: + ) in subspace_continuous.combinatorial_inactive_parameters: # flatten inactive parameters inactive_params_sample = { param @@ -272,7 +272,7 @@ def _recommend_continuous_with_inactive_parameters( else: raise RuntimeError( f"The attribute" - f"{SubspaceContinuous.combinatorial_zero_parameters.__name__}" + f"{SubspaceContinuous.combinatorial_inactive_parameters.__name__}" f"should not be None." ) # Find the best option diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 95703f137..06837e205 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -113,16 +113,17 @@ def constraints_cardinality(self) -> tuple[ContinuousCardinalityConstraint, ...] ) @property - def combinatorial_counts_zero_parameters(self) -> int: - """Return the total number of all possible combinations of zero parameters.""" + def n_combinatorial_inactive_parameters(self) -> int: + """Counts of elements in the combinatorial list of inactive parameters.""" # Note that both continuous subspace and continuous cardinality constraint - # have this property. This property is the counts for the subspace - # parameters; while the latter one is the counts only for that constraint. + # have this property. Both differs in that the former one refers to the + # parameters in the subspace while the latter one refers only to the + # constraint parameters. if self.constraints_cardinality: return reduce( lambda x, y: x * y, [ - con.combinatorial_counts_zero_parameters + con.n_combinatorial_inactive_parameters for con in self.constraints_cardinality ], ) @@ -130,16 +131,16 @@ def combinatorial_counts_zero_parameters(self) -> int: return 0 @property - def combinatorial_zero_parameters( + def combinatorial_inactive_parameters( self ) -> Iterable[tuple[tuple[str, ...], ...]] | None: - """Return a combinatorial list of all possible zero parameters on subspace.""" - # The comments on the difference in `combinatorial_counts_zero_parameters` + """Combinatorial list of inactive parameters on subspace.""" + # The comments on the difference in `n_combinatorial_inactive_parameters` # applies here as well. if self.constraints_cardinality: return product( *[ - con.combinatorial_zero_parameters + con.combinatorial_inactive_parameters for con in self.constraints_cardinality ] ) From c3831c71b2cbf9b760f7ad3ae545763435b67833 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 11 Jul 2024 10:35:24 +0200 Subject: [PATCH 09/67] Use guard clause --- baybe/searchspace/continuous.py | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 06837e205..c5bfda5b6 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -119,17 +119,17 @@ def n_combinatorial_inactive_parameters(self) -> int: # have this property. Both differs in that the former one refers to the # parameters in the subspace while the latter one refers only to the # constraint parameters. - if self.constraints_cardinality: - return reduce( - lambda x, y: x * y, - [ - con.n_combinatorial_inactive_parameters - for con in self.constraints_cardinality - ], - ) - else: + if not self.constraints_cardinality: return 0 + return reduce( + lambda x, y: x * y, + [ + con.n_combinatorial_inactive_parameters + for con in self.constraints_cardinality + ], + ) + @property def combinatorial_inactive_parameters( self @@ -137,16 +137,16 @@ def combinatorial_inactive_parameters( """Combinatorial list of inactive parameters on subspace.""" # The comments on the difference in `n_combinatorial_inactive_parameters` # applies here as well. - if self.constraints_cardinality: - return product( - *[ - con.combinatorial_inactive_parameters - for con in self.constraints_cardinality - ] - ) - else: + if not self.constraints_cardinality: return None + return product( + *[ + con.combinatorial_inactive_parameters + for con in self.constraints_cardinality + ] + ) + @constraints_nonlin.validator def _validate_constraints_nonlin(self, _, __) -> None: """Validate nonlinear constraints.""" From 2f49f5a00081d602d6e9bb2622b41a540fffb17d Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 11 Jul 2024 11:47:40 +0200 Subject: [PATCH 10/67] Simplify syntax with 'prod' --- baybe/searchspace/continuous.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index c5bfda5b6..434d071bc 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -2,9 +2,9 @@ from __future__ import annotations +import math import warnings from collections.abc import Collection, Iterable, Sequence -from functools import reduce from itertools import chain, product from typing import TYPE_CHECKING, Any, cast @@ -122,13 +122,11 @@ def n_combinatorial_inactive_parameters(self) -> int: if not self.constraints_cardinality: return 0 - return reduce( - lambda x, y: x * y, - [ - con.n_combinatorial_inactive_parameters - for con in self.constraints_cardinality - ], - ) + n_combinatorial_inactive_params = [ + con.n_combinatorial_inactive_parameters + for con in self.constraints_cardinality + ] + return math.prod(n_combinatorial_inactive_params) @property def combinatorial_inactive_parameters( From d46fd60837e30da7e70eecd3db624629a38f3129 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Fri, 12 Jul 2024 09:22:24 +0200 Subject: [PATCH 11/67] Refactor botorch+cardinality constraint --- baybe/recommenders/pure/bayesian/botorch.py | 305 ++++++++++++-------- baybe/searchspace/continuous.py | 4 +- 2 files changed, 181 insertions(+), 128 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 7bedde9f6..91a28ec1a 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -1,12 +1,15 @@ """Botorch recommender.""" +from __future__ import annotations + import math -from typing import Any, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar import pandas as pd from attr.converters import optional from attrs import define, field +from baybe.constraints import ContinuousCardinalityConstraint from baybe.exceptions import NoMCAcquisitionFunctionError from baybe.recommenders.pure.bayesian.base import BayesianRecommender from baybe.searchspace import ( @@ -21,7 +24,14 @@ sample_numerical_df, ) -N_ITER_THRESHOLD = 10 +if TYPE_CHECKING: + from torch import Tensor + +N_THRESHOLD_INACTIVE_PARAMETERS_GENERATOR: int = 10 +"""This threshold controls which inactive parameters generator is chosen. There are +two mechanisms: +* Iterating the combinatorial list of all possible inactive parameters, +* Iterate a fixed number of randomly generated inactive parameter configurations.""" @define(kw_only=True) @@ -142,7 +152,6 @@ def _recommend_continuous( Raises: NoMCAcquisitionFunctionError: If a non-Monte Carlo acquisition function is used with a batch size > 1. - RuntimeError: If the combinatorial list of inactive parameters is None. Returns: A dataframe containing the recommendations as individual rows. @@ -154,138 +163,184 @@ def _recommend_continuous( f"acquisition functions for batch sizes > 1." ) + if len(subspace_continuous.constraints_cardinality): + points, _ = self._recommend_continuous_with_cardinality_constraints( + subspace_continuous, + batch_size, + ) + else: + points, _ = self._recommend_continuous_without_cardinality_constraints( + subspace_continuous, + batch_size, + ) + + # Return optimized points as dataframe + rec = pd.DataFrame(points, columns=subspace_continuous.param_names) + return rec + + def _recommend_continuous_with_cardinality_constraints( + self, + subspace_continuous: SubspaceContinuous, + batch_size: int, + ) -> tuple[Tensor, Tensor]: + """Recommend from a continuous search space with cardinality constraints. + + Args: + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Returns: + The recommendations. + The acquisition values. + + Raises: + RuntimeError: If the continuous search space has no cardinality constraint. + """ import torch - from botorch.optim import optimize_acqf - from torch import Tensor - - def _recommend_continuous_with_inactive_parameters( - _subspace_continuous: SubspaceContinuous, - inactive_parameters: tuple[str, ...] | None = None, - ) -> tuple[Tensor, Tensor]: - """Define a helper function that can deal with inactive parameters.""" - if _subspace_continuous.constraints_cardinality: - # When there are cardinality constraints present. - if inactive_parameters is None: - # When no parameters are constrained to zeros - inactive_parameters = () - fixed_parameters = None - else: - # When certain parameters are constrained to zeros. - - # Cast the inactive parameters to the format of fixed features used - # in optimize_acqf()) - indices_inactive_params = [ - _subspace_continuous.param_names.index(key) - for key in _subspace_continuous.param_names - if key in inactive_parameters - ] - fixed_parameters = {ind: 0.0 for ind in indices_inactive_params} - - # Create a new subspace by ensuring all active parameters are non-zeros - _subspace_continuous = _subspace_continuous._ensure_nonzero_parameters( - inactive_parameters - ) - else: - # When there is no cardinality constraint - fixed_parameters = None - - _points, _acqf_values = optimize_acqf( - acq_function=self._botorch_acqf, - bounds=torch.from_numpy(_subspace_continuous.param_bounds_comp), - q=batch_size, - num_restarts=5, # TODO make choice for num_restarts - raw_samples=10, # TODO make choice for raw_samples - fixed_features=fixed_parameters, - equality_constraints=[ - c.to_botorch(_subspace_continuous.parameters) - for c in _subspace_continuous.constraints_lin_eq - ] - or None, # TODO: https://github.com/pytorch/botorch/issues/2042 - inequality_constraints=[ - c.to_botorch(_subspace_continuous.parameters) - for c in _subspace_continuous.constraints_lin_ineq - ] - or None, # TODO: https://github.com/pytorch/botorch/issues/2042 - sequential=self.sequential_continuous, + + if not subspace_continuous.constraints_cardinality: + raise RuntimeError( + f"This method expects a subspace object with constraints of type " + f"{ContinuousCardinalityConstraint.__name__}. For a subspace object " + f"without constraints of type" + f" {ContinuousCardinalityConstraint.__name__}, " + f"{self._recommend_continuous_without_cardinality_constraints.__name__}." # noqa ) - return _points, _acqf_values - if len(subspace_continuous.constraints_cardinality): - acqf_values_all: list[Tensor] = [] - points_all: list[Tensor] = [] - - # The key steps of handling cardinality constraint are - # * Determine several configurations of inactive parameters based on the - # cardinality constraints. - # * Optimize the acquisition function for different configurations and - # pick the best one. - # There are two mechanisms for the inactive parameter configurations. The - # full list of different inactive parameter configurations is used, - # when its size is not too large; otherwise we randomly pick a - # fixed number of inactive parameter configurations. - - if ( - subspace_continuous.n_combinatorial_inactive_parameters - > N_ITER_THRESHOLD - ): - # When the size of full list is too large, randomly set some - # parameters inactive. - for _ in range(N_ITER_THRESHOLD): - inactive_params_sample = ( - subspace_continuous._sample_inactive_parameters(1)[0] - ) - - ( - points_i, - acqf_values_i, - ) = _recommend_continuous_with_inactive_parameters( - subspace_continuous, - tuple(inactive_params_sample), - ) - - points_all.append(points_i.unsqueeze(0)) - acqf_values_all.append(acqf_values_i.unsqueeze(0)) - - elif subspace_continuous.combinatorial_inactive_parameters is not None: - # When the size of full list is not too large, iterate the combinations - # of all possible inactive parameters. - for ( - inactive_params_generator - ) in subspace_continuous.combinatorial_inactive_parameters: - # flatten inactive parameters - inactive_params_sample = { + acqf_values_all: list[Tensor] = [] + points_all: list[Tensor] = [] + + def append_recommendation_for_inactive_parameters_setting( + inactive_parameters: tuple[str, ...], + ): + """Append the recommendation for each inactive parameter configuration. + + Args: + inactive_parameters: A list of inactive parameters. + """ + # Create a new subspace by ensuring all active parameters being + # non-zeros. + subspace_continuous_with_active_params = ( + subspace_continuous._ensure_nonzero_parameters(inactive_parameters) + ) + # Optimize the acquisition function + ( + points_i, + acqf_values_i, + ) = self._recommend_continuous_without_cardinality_constraints( + subspace_continuous_with_active_params, + batch_size, + inactive_parameters, + ) + # Append recommendation list and acquisition function values + points_all.append(points_i.unsqueeze(0)) + acqf_values_all.append(acqf_values_i.unsqueeze(0)) + + # Below we start recommendation + if ( + subspace_continuous.n_combinatorial_inactive_parameters + > N_THRESHOLD_INACTIVE_PARAMETERS_GENERATOR + ): + # When the combinatorial list is too large, randomly set some parameters + # inactive. + for _ in range(N_THRESHOLD_INACTIVE_PARAMETERS_GENERATOR): + inactive_params_sample = tuple( + subspace_continuous._sample_inactive_parameters(1)[0] + ) + append_recommendation_for_inactive_parameters_setting( + inactive_params_sample + ) + else: + # When the combinatorial list is not too large, iterate the combinatorial + # list of all possible inactive parameters. + for ( + inactive_params_generator + ) in subspace_continuous.combinatorial_inactive_parameters: + # Flatten inactive parameter generator + inactive_params_sample = tuple( + { param for sublist in inactive_params_generator for param in sublist } - - ( - points_i, - acqf_values_i, - ) = _recommend_continuous_with_inactive_parameters( - subspace_continuous, - tuple(inactive_params_sample), - ) - - points_all.append(points_i.unsqueeze(0)) - acqf_values_all.append(acqf_values_i.unsqueeze(0)) - else: - raise RuntimeError( - f"The attribute" - f"{SubspaceContinuous.combinatorial_inactive_parameters.__name__}" - f"should not be None." ) - # Find the best option - points = torch.cat(points_all)[torch.argmax(torch.cat(acqf_values_all)), :] - else: - # When there is no cardinality constraint - points, _ = _recommend_continuous_with_inactive_parameters( - subspace_continuous + append_recommendation_for_inactive_parameters_setting( + inactive_params_sample + ) + + # Find the best option + points = torch.cat(points_all)[torch.argmax(torch.cat(acqf_values_all)), :] + acqf_values = torch.max(torch.cat(acqf_values_all)) + return points, acqf_values + + def _recommend_continuous_without_cardinality_constraints( + self, + subspace_continuous: SubspaceContinuous, + batch_size: int, + inactive_parameters: tuple[str, ...] | None = None, + ) -> tuple[Tensor, Tensor]: + """Recommend from a continuous search space without cardinality constraints. + + Args: + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + inactive_parameters: A list of inactive parameters. + + Returns: + The recommendations. + The acquisition values. + + Raises: + RuntimeError: If the continuous search space has any cardinality + constraints. + """ + import torch + from botorch.optim import optimize_acqf + + if subspace_continuous.constraints_cardinality: + raise RuntimeError( + f"This method expects only subspace object without constraints of type " + f"{ContinuousCardinalityConstraint.__name__}. For a subspace object " + f"with constraints of type {ContinuousCardinalityConstraint.__name__}, " + f"try method {self._recommend_continuous.__name__}." ) - # Return optimized points as dataframe - rec = pd.DataFrame(points, columns=subspace_continuous.param_names) - return rec + if not inactive_parameters: + fixed_parameters = None + else: + # Cast the inactive parameters to the format of fixed features used + # in optimize_acqf()) + indices_inactive_params = [ + subspace_continuous.param_names.index(key) + for key in subspace_continuous.param_names + if key in inactive_parameters + ] + fixed_parameters = {ind: 0.0 for ind in indices_inactive_params} + + points, acqf_values = optimize_acqf( + acq_function=self._botorch_acqf, + bounds=torch.from_numpy(subspace_continuous.param_bounds_comp), + q=batch_size, + num_restarts=5, # TODO make choice for num_restarts + raw_samples=10, # TODO make choice for raw_samples + fixed_features=fixed_parameters, + equality_constraints=[ + c.to_botorch(subspace_continuous.parameters) + for c in subspace_continuous.constraints_lin_eq + ] + or None, + # TODO: https://github.com/pytorch/botorch/issues/2042 + inequality_constraints=[ + c.to_botorch(subspace_continuous.parameters) + for c in subspace_continuous.constraints_lin_ineq + ] + or None, + # TODO: https://github.com/pytorch/botorch/issues/2042 + sequential=self.sequential_continuous, + ) + return points, acqf_values def _recommend_hybrid( self, diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 434d071bc..4a65ea816 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -131,12 +131,10 @@ def n_combinatorial_inactive_parameters(self) -> int: @property def combinatorial_inactive_parameters( self - ) -> Iterable[tuple[tuple[str, ...], ...]] | None: + ) -> Iterable[tuple[tuple[str, ...], ...]]: """Combinatorial list of inactive parameters on subspace.""" # The comments on the difference in `n_combinatorial_inactive_parameters` # applies here as well. - if not self.constraints_cardinality: - return None return product( *[ From 293e2efd2c9a70e9a0569909b60e4c682a119351 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Fri, 12 Jul 2024 11:58:42 +0200 Subject: [PATCH 12/67] Make 'n_threshold_inactive_parameters_generator' an attribute of botorch recommender --- baybe/recommenders/pure/bayesian/botorch.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 91a28ec1a..d749343f6 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -8,6 +8,7 @@ import pandas as pd from attr.converters import optional from attrs import define, field +from attrs.validators import ge, instance_of from baybe.constraints import ContinuousCardinalityConstraint from baybe.exceptions import NoMCAcquisitionFunctionError @@ -27,12 +28,6 @@ if TYPE_CHECKING: from torch import Tensor -N_THRESHOLD_INACTIVE_PARAMETERS_GENERATOR: int = 10 -"""This threshold controls which inactive parameters generator is chosen. There are -two mechanisms: -* Iterating the combinatorial list of all possible inactive parameters, -* Iterate a fixed number of randomly generated inactive parameter configurations.""" - @define(kw_only=True) class BotorchRecommender(BayesianRecommender): @@ -70,6 +65,16 @@ class BotorchRecommender(BayesianRecommender): """Percentage of discrete search space that is sampled when performing hybrid search space optimization. Ignored when ``hybrid_sampler="None"``.""" + n_threshold_inactive_parameters_generator: int = field( + default=10, validator=[instance_of(int), ge(1)] + ) + """Threshold used for checking which inactive parameters generator is used when + cardinality constraints are present. When the size of the combinatorial list of + all possible inactive parameters is larger than the threshold, a fixed number of + randomly generated inactive parameter configurations are used and the best + optimum among them is recommended; Otherwise, we find the best one by iterating the + combinatorial list of all possible inactive parameters """ + @sampling_percentage.validator def _validate_percentage( # noqa: DOC101, DOC103 self, _: Any, value: float @@ -240,11 +245,11 @@ def append_recommendation_for_inactive_parameters_setting( # Below we start recommendation if ( subspace_continuous.n_combinatorial_inactive_parameters - > N_THRESHOLD_INACTIVE_PARAMETERS_GENERATOR + > self.n_threshold_inactive_parameters_generator ): # When the combinatorial list is too large, randomly set some parameters # inactive. - for _ in range(N_THRESHOLD_INACTIVE_PARAMETERS_GENERATOR): + for _ in range(self.n_threshold_inactive_parameters_generator): inactive_params_sample = tuple( subspace_continuous._sample_inactive_parameters(1)[0] ) From f8d0713c0986dc7d021455ef6692a4a14cd082f3 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 15 Aug 2024 09:53:48 +0200 Subject: [PATCH 13/67] Refactor combinatorial properties of cardinality constraint --- baybe/constraints/continuous.py | 33 ++++++++++++++++----------------- baybe/searchspace/continuous.py | 6 +++--- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index 479d0f420..c50193910 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -1,6 +1,7 @@ """Continuous constraints.""" import math +from collections.abc import Iterator from itertools import combinations from math import comb @@ -49,26 +50,24 @@ class ContinuousCardinalityConstraint( """Class for continuous cardinality constraints.""" @property - def n_combinatorial_inactive_parameters(self) -> int: - """Counts of elements in the combinatorial list of inactive parameters.""" - n_combinatorial_inactive_params = 0 - for i_zeros in range( - len(self.parameters) - self.max_cardinality, - len(self.parameters) - self.min_cardinality + 1, - ): - n_combinatorial_inactive_params += comb(len(self.parameters), i_zeros) - return n_combinatorial_inactive_params + def n_inactive_parameter_combinations(self) -> int: + """The number of possible inactive parameter combinations.""" + return sum( + comb(len(self.parameters), n_inactive_parameters) + for n_inactive_parameters in self._inactive_set_sizes() + ) - @property - def combinatorial_inactive_parameters(self) -> list[tuple[str, ...]]: - """Combinatorial list of inactive parameters.""" - combinatorial_inactive_params: list[tuple[str, ...]] = [] - for i_zeros in range( + def _inactive_set_sizes(self) -> Iterator[int]: + """Iterate over all possible sizes of inactive parameter sets.""" + return range( len(self.parameters) - self.max_cardinality, len(self.parameters) - self.min_cardinality + 1, - ): - combinatorial_inactive_params.extend(combinations(self.parameters, i_zeros)) - return combinatorial_inactive_params + ) + + def inactive_parameter_combinations(self) -> Iterator[frozenset[str]]: + """Iterate over all possible combinations of inactive parameters.""" + for n_inactive_parameters in self._inactive_set_sizes(): + yield from combinations(self.parameters, n_inactive_parameters) def sample_inactive_parameters(self, batch_size: int = 1) -> list[set[str]]: """Sample sets of inactive parameters according to the cardinality constraints. diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 4a65ea816..abdfd25d0 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -123,14 +123,14 @@ def n_combinatorial_inactive_parameters(self) -> int: return 0 n_combinatorial_inactive_params = [ - con.n_combinatorial_inactive_parameters + con.n_inactive_parameter_combinations for con in self.constraints_cardinality ] return math.prod(n_combinatorial_inactive_params) @property def combinatorial_inactive_parameters( - self + self, ) -> Iterable[tuple[tuple[str, ...], ...]]: """Combinatorial list of inactive parameters on subspace.""" # The comments on the difference in `n_combinatorial_inactive_parameters` @@ -138,7 +138,7 @@ def combinatorial_inactive_parameters( return product( *[ - con.combinatorial_inactive_parameters + con.inactive_parameter_combinations() for con in self.constraints_cardinality ] ) From 76b5d727c3791d21e6c00c36f8aa0cdbdd824af5 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 15 Aug 2024 10:18:21 +0200 Subject: [PATCH 14/67] Refactor combinatorial properties of continuous subspace --- baybe/recommenders/pure/bayesian/botorch.py | 14 ++------- baybe/searchspace/continuous.py | 34 ++++++--------------- 2 files changed, 13 insertions(+), 35 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index d749343f6..30f0525d9 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -244,7 +244,7 @@ def append_recommendation_for_inactive_parameters_setting( # Below we start recommendation if ( - subspace_continuous.n_combinatorial_inactive_parameters + subspace_continuous.n_inactive_parameter_combinations > self.n_threshold_inactive_parameters_generator ): # When the combinatorial list is too large, randomly set some parameters @@ -261,17 +261,9 @@ def append_recommendation_for_inactive_parameters_setting( # list of all possible inactive parameters. for ( inactive_params_generator - ) in subspace_continuous.combinatorial_inactive_parameters: - # Flatten inactive parameter generator - inactive_params_sample = tuple( - { - param - for sublist in inactive_params_generator - for param in sublist - } - ) + ) in subspace_continuous.inactive_parameter_combinations(): append_recommendation_for_inactive_parameters_setting( - inactive_params_sample + inactive_params_generator ) # Find the best option diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index abdfd25d0..a3e6e6099 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -113,35 +113,21 @@ def constraints_cardinality(self) -> tuple[ContinuousCardinalityConstraint, ...] ) @property - def n_combinatorial_inactive_parameters(self) -> int: - """Counts of elements in the combinatorial list of inactive parameters.""" - # Note that both continuous subspace and continuous cardinality constraint - # have this property. Both differs in that the former one refers to the - # parameters in the subspace while the latter one refers only to the - # constraint parameters. - if not self.constraints_cardinality: - return 0 - - n_combinatorial_inactive_params = [ - con.n_inactive_parameter_combinations - for con in self.constraints_cardinality - ] - return math.prod(n_combinatorial_inactive_params) - - @property - def combinatorial_inactive_parameters( - self, - ) -> Iterable[tuple[tuple[str, ...], ...]]: - """Combinatorial list of inactive parameters on subspace.""" - # The comments on the difference in `n_combinatorial_inactive_parameters` - # applies here as well. + def n_inactive_parameter_combinations(self) -> int: + """The number of possible inactive parameter combinations.""" + return math.prod( + c.n_inactive_parameter_combinations for c in self.constraints_cardinality + ) - return product( + def inactive_parameter_combinations(self) -> Iterable[frozenset[str]]: + """Iterate over all possible combinations of inactive parameters.""" + for combination in product( *[ con.inactive_parameter_combinations() for con in self.constraints_cardinality ] - ) + ): + yield frozenset(chain(*combination)) @constraints_nonlin.validator def _validate_constraints_nonlin(self, _, __) -> None: From 5c92079b98245b1f1420fae0c85da9a1474c7c41 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 15 Aug 2024 11:34:03 +0200 Subject: [PATCH 15/67] Refactor constraint validation --- baybe/constraints/validation.py | 63 +++++++++++++++++++-------------- baybe/searchspace/continuous.py | 4 +-- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/baybe/constraints/validation.py b/baybe/constraints/validation.py index 61512fac6..17a2489a9 100644 --- a/baybe/constraints/validation.py +++ b/baybe/constraints/validation.py @@ -1,6 +1,6 @@ """Validation functionality for constraints.""" -from collections.abc import Collection, Sequence +from collections.abc import Collection from itertools import combinations from baybe.constraints.base import Constraint @@ -11,6 +11,11 @@ from baybe.parameters import NumericalContinuousParameter from baybe.parameters.base import Parameter +try: # For python < 3.11, use the exceptiongroup backport + ExceptionGroup +except NameError: + from exceptiongroup import ExceptionGroup + def validate_constraints( # noqa: DOC101, DOC103 constraints: Collection[Constraint], parameters: Collection[Parameter] @@ -27,8 +32,8 @@ def validate_constraints( # noqa: DOC101, DOC103 ValueError: If any discrete constraint includes a continuous parameter. ValueError: If any discrete constraint that is valid only for numerical discrete parameters includes non-numerical discrete parameters. - ValueError: If the bounds of any parameter in a cardinality constraint does - not cover zero. + ValueError: If any parameter affected by a cardinality constraint does + not include zero. """ if sum(isinstance(itm, DiscreteDependenciesConstraint) for itm in constraints) > 1: raise ValueError( @@ -85,8 +90,8 @@ def validate_constraints( # noqa: DOC101, DOC103 ) if isinstance(constraint, ContinuousCardinalityConstraint): - validate_parameters_bounds_in_cardinality_constraint( - params_continuous, constraint + validate_cardinality_constraint_parameter_bounds( + constraint, params_continuous ) @@ -111,33 +116,37 @@ def validate_cardinality_constraints_are_nonoverlapping( ) -def validate_parameters_bounds_in_cardinality_constraint( - parameters: Sequence[NumericalContinuousParameter], +def validate_cardinality_constraint_parameter_bounds( constraint: ContinuousCardinalityConstraint, + parameters: Collection[NumericalContinuousParameter], ) -> None: - """Validate that the bounds of all parameters in a cardinality constraint cover - zero. + """Validate that all parameters of a continuous cardinality constraint include zero. Args: - parameters: A collection of continuous numerical parameters. constraint: A continuous cardinality constraint. + parameters: A collection of parameters, including those affected by the + constraint. Raises: - ValueError: If the bounds of any parameter of a constraint does not cover zero. - """ # noqa D205 - param_names = [p.name for p in parameters] - for param_in_constraint in constraint.parameters: - # Note that this implementation checks implicitly that all constraint - # parameters must be included in the list of parameters. Otherwise Runtime - # error occurs. - if ( - param := parameters[param_names.index(param_in_constraint)] - ) and not param.is_in_range(0.0): - raise ValueError( - f"The bounds of all parameters in a constraint of type " - f"`{ContinuousCardinalityConstraint.__name__}` must cover " - f"zero. Either correct the parameter ({param}) bounds:" - f" {param.bounds=} or remove the parameter {param} from the " - f"{constraint=} and update the minimum/maximum cardinality " - f"accordingly." + ValueError: If one of the affected parameters does not include zero. + ExceptionGroup: If several of the affected parameters do not include zero. + """ + exceptions = [] + for name in constraint.parameters: + # We implicitly assume that the corresponding parameter exists + parameter = next(p for p in parameters if p.name == name) + + if not parameter.is_in_range(0.0): + exceptions.append( + ValueError( + f"The bounds of all parameters affected by a constraint of type " + f"'{ContinuousCardinalityConstraint.__name__}' must include zero, " + f"but the bounds of parameter '{name}' are: " + f"{parameter.bounds.to_tuple()}" + ) ) + + if exceptions: + if len(exceptions) == 1: + raise exceptions[0] + raise ExceptionGroup("invalid parameter bounds", exceptions) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index a3e6e6099..f766ea2f5 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -19,8 +19,8 @@ ) from baybe.constraints.base import ContinuousConstraint, ContinuousNonlinearConstraint from baybe.constraints.validation import ( + validate_cardinality_constraint_parameter_bounds, validate_cardinality_constraints_are_nonoverlapping, - validate_parameters_bounds_in_cardinality_constraint, ) from baybe.parameters import NumericalContinuousParameter from baybe.parameters.base import ContinuousParameter @@ -138,7 +138,7 @@ def _validate_constraints_nonlin(self, _, __) -> None: ) for con in self.constraints_cardinality: - validate_parameters_bounds_in_cardinality_constraint(self.parameters, con) + validate_cardinality_constraint_parameter_bounds(con, self.parameters) def to_searchspace(self) -> SearchSpace: """Turn the subspace into a search space with no discrete part.""" From f07a452e59e9932c5e269e1d0b40e26ffab72f95 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 15 Aug 2024 13:24:11 +0200 Subject: [PATCH 16/67] Move factory code up --- baybe/searchspace/continuous.py | 110 ++++++++++++++++---------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index f766ea2f5..696b28077 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -304,6 +304,61 @@ def _drop_parameters(self, parameter_names: Collection[str]) -> SubspaceContinuo ], ) + def _ensure_nonzero_parameters( + self, + inactive_parameters: Collection[str], + zero_threshold: float = ZERO_THRESHOLD, + ) -> SubspaceContinuous: + """Create a new subspace with following several actions. + + * Ensure active parameter != 0.0. + * Remove cardinality constraint. + + Args: + inactive_parameters: A list of inactive parameters. + zero_threshold: Threshold for checking whether a value is zero. + + Returns: + A new subspace object. + """ + # Active parameters: parameters involved in cardinality constraints + active_params_sample = set( + self.param_names_in_cardinality_constraint + ).difference(set(inactive_parameters)) + + constraints_lin_ineq = list(self.constraints_lin_ineq) + for active_param in active_params_sample: + index = self.param_names.index(active_param) + + # TODO: Ensure x != 0 when x in [..., 0, ...] is not done. Do we need it? + # TODO: To ensure the minimum cardinality constraints, shall we keep the x + # != 0 operations or shall we instead skip the invalid results at the end + # Ensure x != 0 when bounds = [..., 0]. This is needed, otherwise + # the minimum cardinality constraint is easily violated + if self.parameters[index].bounds.upper == 0: + constraints_lin_ineq.append( + ContinuousLinearInequalityConstraint( + parameters=[active_param], + coefficients=[-1.0], + rhs=min(zero_threshold, -self.parameters[index].bounds.lower), + ) + ) + # Ensure x != 0 when bounds = [0, ...] + elif self.parameters[index].bounds.lower == 0: + constraints_lin_ineq.append( + ContinuousLinearInequalityConstraint( + parameters=[active_param], + coefficients=[1.0], + rhs=min(zero_threshold, self.parameters[index].bounds.upper), + ), + ) + + return SubspaceContinuous( + parameters=tuple(self.parameters), + constraints_lin_eq=self.constraints_lin_eq, + constraints_lin_ineq=tuple(constraints_lin_ineq), + ) + def transform( self, df: pd.DataFrame | None = None, @@ -485,61 +540,6 @@ def _sample_inactive_parameters(self, batch_size: int = 1) -> list[set[str]]: ] return [set(chain(*x)) for x in zip(*inactives_per_constraint)] - def _ensure_nonzero_parameters( - self, - inactive_parameters: Collection[str], - zero_threshold: float = ZERO_THRESHOLD, - ) -> SubspaceContinuous: - """Create a new subspace with following several actions. - - * Ensure active parameter != 0.0. - * Remove cardinality constraint. - - Args: - inactive_parameters: A list of inactive parameters. - zero_threshold: Threshold for checking whether a value is zero. - - Returns: - A new subspace object. - """ - # Active parameters: parameters involved in cardinality constraints - active_params_sample = set( - self.param_names_in_cardinality_constraint - ).difference(set(inactive_parameters)) - - constraints_lin_ineq = list(self.constraints_lin_ineq) - for active_param in active_params_sample: - index = self.param_names.index(active_param) - - # TODO: Ensure x != 0 when x in [..., 0, ...] is not done. Do we need it? - # TODO: To ensure the minimum cardinality constraints, shall we keep the x - # != 0 operations or shall we instead skip the invalid results at the end - # Ensure x != 0 when bounds = [..., 0]. This is needed, otherwise - # the minimum cardinality constraint is easily violated - if self.parameters[index].bounds.upper == 0: - constraints_lin_ineq.append( - ContinuousLinearInequalityConstraint( - parameters=[active_param], - coefficients=[-1.0], - rhs=min(zero_threshold, -self.parameters[index].bounds.lower), - ) - ) - # Ensure x != 0 when bounds = [0, ...] - elif self.parameters[index].bounds.lower == 0: - constraints_lin_ineq.append( - ContinuousLinearInequalityConstraint( - parameters=[active_param], - coefficients=[1.0], - rhs=min(zero_threshold, self.parameters[index].bounds.upper), - ), - ) - - return SubspaceContinuous( - parameters=tuple(self.parameters), - constraints_lin_eq=self.constraints_lin_eq, - constraints_lin_ineq=tuple(constraints_lin_ineq), - ) - def samples_full_factorial(self, n_points: int = 1) -> pd.DataFrame: """Deprecated!""" # noqa: D401 warnings.warn( From c5b014df7af969ddc3e35bca0159a6488ef081bd Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 15 Aug 2024 13:50:48 +0200 Subject: [PATCH 17/67] Simplify constructor code --- baybe/searchspace/continuous.py | 38 +++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 696b28077..049755b32 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -38,7 +38,6 @@ from baybe.searchspace.core import SearchSpace _MAX_CARDINALITY_SAMPLING_ATTEMPTS = 10_000 -ZERO_THRESHOLD = 1e-5 @define @@ -273,6 +272,7 @@ def param_names(self) -> tuple[str, ...]: @property def param_names_in_cardinality_constraint(self) -> tuple[str, ...]: """Return list of parameter names involved in cardinality constraints.""" + # TODO: Is this property really needed? If so, apply naming conventions. params_per_cardinatliy_constraint = [ c.parameters for c in self.constraints_cardinality ] @@ -307,7 +307,7 @@ def _drop_parameters(self, parameter_names: Collection[str]) -> SubspaceContinuo def _ensure_nonzero_parameters( self, inactive_parameters: Collection[str], - zero_threshold: float = ZERO_THRESHOLD, + inactivity_threshold: float = 1e-5, ) -> SubspaceContinuous: """Create a new subspace with following several actions. @@ -316,47 +316,57 @@ def _ensure_nonzero_parameters( Args: inactive_parameters: A list of inactive parameters. - zero_threshold: Threshold for checking whether a value is zero. + inactivity_threshold: Threshold for checking whether a value is zero. Returns: A new subspace object. """ + # TODO: Revise function name/docstring and arguments. In particular: why + # does the function expect the inactive parameters instead of the active ones? + + # TODO: Shouldn't the x != 0 constraints be applied on the level of the + # individual constrains, also taking into account whether min_cardinality > 0? + + # TODO: Instead of adding additional constraints, why not alter the parameter + # bounds? In case we keep the constraints: is the sign of the threshold + # correct? + # Active parameters: parameters involved in cardinality constraints - active_params_sample = set( + active_parameter_names = set( self.param_names_in_cardinality_constraint ).difference(set(inactive_parameters)) constraints_lin_ineq = list(self.constraints_lin_ineq) - for active_param in active_params_sample: - index = self.param_names.index(active_param) + for name in active_parameter_names: + parameter = next(p for p in self.parameters if p.name == name) # TODO: Ensure x != 0 when x in [..., 0, ...] is not done. Do we need it? # TODO: To ensure the minimum cardinality constraints, shall we keep the x # != 0 operations or shall we instead skip the invalid results at the end # Ensure x != 0 when bounds = [..., 0]. This is needed, otherwise # the minimum cardinality constraint is easily violated - if self.parameters[index].bounds.upper == 0: + if parameter.bounds.upper == 0: constraints_lin_ineq.append( ContinuousLinearInequalityConstraint( - parameters=[active_param], + parameters=[name], coefficients=[-1.0], - rhs=min(zero_threshold, -self.parameters[index].bounds.lower), + rhs=min(inactivity_threshold, -parameter.bounds.lower), ) ) # Ensure x != 0 when bounds = [0, ...] - elif self.parameters[index].bounds.lower == 0: + elif parameter.bounds.lower == 0: constraints_lin_ineq.append( ContinuousLinearInequalityConstraint( - parameters=[active_param], + parameters=[name], coefficients=[1.0], - rhs=min(zero_threshold, self.parameters[index].bounds.upper), + rhs=min(inactivity_threshold, parameter.bounds.upper), ), ) return SubspaceContinuous( - parameters=tuple(self.parameters), + parameters=self.parameters, constraints_lin_eq=self.constraints_lin_eq, - constraints_lin_ineq=tuple(constraints_lin_ineq), + constraints_lin_ineq=constraints_lin_ineq, ) def transform( From 306c9d2d7674825acb94aa5b6f2c4d08b20f0c62 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 15 Aug 2024 14:36:05 +0200 Subject: [PATCH 18/67] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f85c11b08..4de24b099 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - `py.typed` file to enable the use of type checkers on the user side +- `ContinuousCardinalityConstraint` is now compatible with `BotorchRecommender` +- Utilities `inactive_parameter_combinations` and`n_inactive_parameter_combinations` + in both `ContinuousCardinalityConstraint`and `SubspaceContinuous` +- Attribute `n_threshold_inactive_parameters_generator` added to `BotorchRecommender` ### Fixed - `CategoricalParameter` and `TaskParameter` no longer incorrectly coerce a single From 7687e39699af4273336dffda5b9f71e81400503b Mon Sep 17 00:00:00 2001 From: Di Jin Date: Fri, 23 Aug 2024 15:45:42 +0200 Subject: [PATCH 19/67] Ensure active parameters by altering parameters bounds --- baybe/searchspace/continuous.py | 75 +++++++++---------- .../test_constraints_continuous.py | 5 +- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 049755b32..e98bd0a5f 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -3,6 +3,7 @@ from __future__ import annotations import math +import sys import warnings from collections.abc import Collection, Iterable, Sequence from itertools import chain, product @@ -32,6 +33,7 @@ from baybe.serialization import SerialMixin, converter, select_constructor_hook from baybe.utils.basic import to_tuple from baybe.utils.dataframe import pretty_print_df +from baybe.utils.interval import Interval from baybe.utils.numerical import DTypeFloatNumpy if TYPE_CHECKING: @@ -306,16 +308,16 @@ def _drop_parameters(self, parameter_names: Collection[str]) -> SubspaceContinuo def _ensure_nonzero_parameters( self, - inactive_parameters: Collection[str], - inactivity_threshold: float = 1e-5, + inactive_parameter_names: Collection[str], + inactivity_threshold: float = sys.float_info.min, ) -> SubspaceContinuous: """Create a new subspace with following several actions. - * Ensure active parameter != 0.0. - * Remove cardinality constraint. + * Remove cardinality constraints. + * Ensure active parameters != 0.0 when its bounds locate on zero. Args: - inactive_parameters: A list of inactive parameters. + inactive_parameter_names: A list of inactive parameters. inactivity_threshold: Threshold for checking whether a value is zero. Returns: @@ -327,46 +329,43 @@ def _ensure_nonzero_parameters( # TODO: Shouldn't the x != 0 constraints be applied on the level of the # individual constrains, also taking into account whether min_cardinality > 0? - # TODO: Instead of adding additional constraints, why not alter the parameter - # bounds? In case we keep the constraints: is the sign of the threshold - # correct? + def ensure_active_parameters( + parameters: tuple[NumericalContinuousParameter, ...], + active_parameter_names: Collection[str], + ) -> tuple[NumericalContinuousParameter, ...]: + parameters_active_guaranteed = [] + for p in parameters: + if p.name not in active_parameter_names: + bounds = p.bounds + # Active parameter x with bounds [..., 0], ensure x != 0 + elif p.bounds.upper == 0.0: + bounds = Interval(lower=p.bounds.lower, upper=inactivity_threshold) + # Active parameter x with bounds [0, ...], ensure x != 0 + elif p.bounds.lower == 0.0: + bounds = Interval(lower=inactivity_threshold, upper=p.bounds.upper) + # TODO: For active parameter x in [..., 0, ...], ensure x != 0 is not + # done. + else: + bounds = p.bounds + parameters_active_guaranteed.append( + NumericalContinuousParameter( + name=p.name, + bounds=bounds, + ) + ) + return tuple(parameters_active_guaranteed) # Active parameters: parameters involved in cardinality constraints active_parameter_names = set( self.param_names_in_cardinality_constraint - ).difference(set(inactive_parameters)) - - constraints_lin_ineq = list(self.constraints_lin_ineq) - for name in active_parameter_names: - parameter = next(p for p in self.parameters if p.name == name) - - # TODO: Ensure x != 0 when x in [..., 0, ...] is not done. Do we need it? - # TODO: To ensure the minimum cardinality constraints, shall we keep the x - # != 0 operations or shall we instead skip the invalid results at the end - # Ensure x != 0 when bounds = [..., 0]. This is needed, otherwise - # the minimum cardinality constraint is easily violated - if parameter.bounds.upper == 0: - constraints_lin_ineq.append( - ContinuousLinearInequalityConstraint( - parameters=[name], - coefficients=[-1.0], - rhs=min(inactivity_threshold, -parameter.bounds.lower), - ) - ) - # Ensure x != 0 when bounds = [0, ...] - elif parameter.bounds.lower == 0: - constraints_lin_ineq.append( - ContinuousLinearInequalityConstraint( - parameters=[name], - coefficients=[1.0], - rhs=min(inactivity_threshold, parameter.bounds.upper), - ), - ) + ).difference(set(inactive_parameter_names)) return SubspaceContinuous( - parameters=self.parameters, + parameters=ensure_active_parameters( + self.parameters, active_parameter_names + ), constraints_lin_eq=self.constraints_lin_eq, - constraints_lin_ineq=constraints_lin_ineq, + constraints_lin_ineq=self.constraints_lin_ineq, ) def transform( diff --git a/tests/constraints/test_constraints_continuous.py b/tests/constraints/test_constraints_continuous.py index 8d03726ff..b67d4796e 100644 --- a/tests/constraints/test_constraints_continuous.py +++ b/tests/constraints/test_constraints_continuous.py @@ -1,5 +1,7 @@ """Test for imposing continuous constraints.""" +import sys + import numpy as np import pytest @@ -7,7 +9,6 @@ ContinuousLinearEqualityConstraint, ContinuousLinearInequalityConstraint, ) -from baybe.searchspace.continuous import ZERO_THRESHOLD from tests.conftest import run_iterations from tests.constraints.test_cardinality_constraint_continuous import _validate_samples @@ -87,7 +88,7 @@ def test_cardinality_constraint(campaign, n_iterations, batch_size): max_cardinality=MAX_CARDINALITY, min_cardinality=MIN_CARDINALITY, batch_size=batch_size, - threshold=ZERO_THRESHOLD, + threshold=sys.float_info.min, ) From 66c3278a4554df29da5bae651ade627df92414e0 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Fri, 23 Aug 2024 23:18:59 +0200 Subject: [PATCH 20/67] Fix continuous constraint test --- tests/constraints/test_constraints_continuous.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/constraints/test_constraints_continuous.py b/tests/constraints/test_constraints_continuous.py index b67d4796e..c6449caf8 100644 --- a/tests/constraints/test_constraints_continuous.py +++ b/tests/constraints/test_constraints_continuous.py @@ -71,7 +71,7 @@ def test_cardinality_constraint(campaign, n_iterations, batch_size): """Test cardinality constraint for both random recommender and botorch recommender.""" # noqa - MIN_CARDINALITY = 0 + MIN_CARDINALITY = 1 MAX_CARDINALITY = 2 run_iterations(campaign, n_iterations, batch_size, add_noise=False) recommendations = campaign.measurements From a1d11e799af0f736501c8052bad55bd71df3af53 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Fri, 23 Aug 2024 23:25:31 +0200 Subject: [PATCH 21/67] Refactor botorch interface using fixed parameter class --- baybe/parameters/numerical.py | 23 ++++++++++++++ baybe/recommenders/pure/bayesian/botorch.py | 33 +++++++++----------- baybe/searchspace/continuous.py | 34 ++++++++++++++++----- 3 files changed, 63 insertions(+), 27 deletions(-) diff --git a/baybe/parameters/numerical.py b/baybe/parameters/numerical.py index f1c7b3e46..8e90afa90 100644 --- a/baybe/parameters/numerical.py +++ b/baybe/parameters/numerical.py @@ -142,3 +142,26 @@ def summary(self) -> dict: # noqa: D102 Upper_Bound=self.bounds.upper, ) return param_dict + + +@define(frozen=True, slots=False) +class _FixedNumericalContinuousParameter(ContinuousParameter): + """Parameter class for fixed numerical parameters.""" + + is_numeric: ClassVar[bool] = True + # See base class. + + value: float = field(converter=float) + """The fixed value of the parameter.""" + + @property + def bounds(self) -> Interval: + """The value of the parameter as a degenerate interval.""" + return Interval(self.value, self.value) + + def is_in_range(self, item: float) -> bool: + # See base class. + return item == self.value + + def summary(self) -> dict: + raise NotImplementedError() diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 30f0525d9..2b103cce3 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -12,6 +12,7 @@ from baybe.constraints import ContinuousCardinalityConstraint from baybe.exceptions import NoMCAcquisitionFunctionError +from baybe.parameters.numerical import _FixedNumericalContinuousParameter from baybe.recommenders.pure.bayesian.base import BayesianRecommender from baybe.searchspace import ( SearchSpace, @@ -209,7 +210,7 @@ def _recommend_continuous_with_cardinality_constraints( f"This method expects a subspace object with constraints of type " f"{ContinuousCardinalityConstraint.__name__}. For a subspace object " f"without constraints of type" - f" {ContinuousCardinalityConstraint.__name__}, " + f" {ContinuousCardinalityConstraint.__name__}, use method" f"{self._recommend_continuous_without_cardinality_constraints.__name__}." # noqa ) @@ -226,22 +227,24 @@ def append_recommendation_for_inactive_parameters_setting( """ # Create a new subspace by ensuring all active parameters being # non-zeros. - subspace_continuous_with_active_params = ( - subspace_continuous._ensure_nonzero_parameters(inactive_parameters) + subspace_continuous_without_cardinality_constraints = ( + subspace_continuous._remove_cardinality_constraints(inactive_parameters) ) # Optimize the acquisition function ( points_i, acqf_values_i, ) = self._recommend_continuous_without_cardinality_constraints( - subspace_continuous_with_active_params, + subspace_continuous_without_cardinality_constraints, batch_size, - inactive_parameters, ) # Append recommendation list and acquisition function values points_all.append(points_i.unsqueeze(0)) acqf_values_all.append(acqf_values_i.unsqueeze(0)) + # TODO: For certain setting of inactive parameters, the resulting problem may + # be infeasible. Add "try" section to handle it. + # Below we start recommendation if ( subspace_continuous.n_inactive_parameter_combinations @@ -275,7 +278,6 @@ def _recommend_continuous_without_cardinality_constraints( self, subspace_continuous: SubspaceContinuous, batch_size: int, - inactive_parameters: tuple[str, ...] | None = None, ) -> tuple[Tensor, Tensor]: """Recommend from a continuous search space without cardinality constraints. @@ -283,7 +285,6 @@ def _recommend_continuous_without_cardinality_constraints( subspace_continuous: The continuous subspace from which to generate recommendations. batch_size: The size of the recommendation batch. - inactive_parameters: A list of inactive parameters. Returns: The recommendations. @@ -304,17 +305,11 @@ def _recommend_continuous_without_cardinality_constraints( f"try method {self._recommend_continuous.__name__}." ) - if not inactive_parameters: - fixed_parameters = None - else: - # Cast the inactive parameters to the format of fixed features used - # in optimize_acqf()) - indices_inactive_params = [ - subspace_continuous.param_names.index(key) - for key in subspace_continuous.param_names - if key in inactive_parameters - ] - fixed_parameters = {ind: 0.0 for ind in indices_inactive_params} + fixed_parameters = { + idx: p.value + for (idx, p) in enumerate(subspace_continuous.parameters) + if isinstance(p, _FixedNumericalContinuousParameter) + } points, acqf_values = optimize_acqf( acq_function=self._botorch_acqf, @@ -322,7 +317,7 @@ def _recommend_continuous_without_cardinality_constraints( q=batch_size, num_restarts=5, # TODO make choice for num_restarts raw_samples=10, # TODO make choice for raw_samples - fixed_features=fixed_parameters, + fixed_features=fixed_parameters or None, equality_constraints=[ c.to_botorch(subspace_continuous.parameters) for c in subspace_continuous.constraints_lin_eq diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index e98bd0a5f..3c0113892 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -25,6 +25,7 @@ ) from baybe.parameters import NumericalContinuousParameter from baybe.parameters.base import ContinuousParameter +from baybe.parameters.numerical import _FixedNumericalContinuousParameter from baybe.parameters.utils import get_parameters_from_dataframe from baybe.searchspace.validation import ( get_transform_parameters, @@ -306,22 +307,19 @@ def _drop_parameters(self, parameter_names: Collection[str]) -> SubspaceContinuo ], ) - def _ensure_nonzero_parameters( + def _remove_cardinality_constraints( self, inactive_parameter_names: Collection[str], inactivity_threshold: float = sys.float_info.min, ) -> SubspaceContinuous: - """Create a new subspace with following several actions. - - * Remove cardinality constraints. - * Ensure active parameters != 0.0 when its bounds locate on zero. + """Create a copy of the subspace with cardinality constraints removed. Args: inactive_parameter_names: A list of inactive parameters. inactivity_threshold: Threshold for checking whether a value is zero. Returns: - A new subspace object. + A new subspace object without cardinality constraints. """ # TODO: Revise function name/docstring and arguments. In particular: why # does the function expect the inactive parameters instead of the active ones? @@ -329,10 +327,21 @@ def _ensure_nonzero_parameters( # TODO: Shouldn't the x != 0 constraints be applied on the level of the # individual constrains, also taking into account whether min_cardinality > 0? + # TODO: Merge _drop_parameters() to this method. def ensure_active_parameters( parameters: tuple[NumericalContinuousParameter, ...], active_parameter_names: Collection[str], ) -> tuple[NumericalContinuousParameter, ...]: + """Ensure certain parameters being non-zero by adjusting bounds. + + Args: + parameters: A list of parameters. + active_parameter_names: A list of parameters names that must be + non-zero. + + Returns: + A list of parameters with certain parameters guaranteed to be non-zero. + """ parameters_active_guaranteed = [] for p in parameters: if p.name not in active_parameter_names: @@ -360,9 +369,18 @@ def ensure_active_parameters( self.param_names_in_cardinality_constraint ).difference(set(inactive_parameter_names)) + active_parameters_guaranteed = ensure_active_parameters( + self.parameters, active_parameter_names + ) + return SubspaceContinuous( - parameters=ensure_active_parameters( - self.parameters, active_parameter_names + parameters=tuple( + [ + _FixedNumericalContinuousParameter(name=p.name, value=0.0) + if p.name in inactive_parameter_names + else p + for p in active_parameters_guaranteed + ] ), constraints_lin_eq=self.constraints_lin_eq, constraints_lin_ineq=self.constraints_lin_ineq, From 22fd94243ccefdf15f5f5e86fe1c5548986642e4 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 26 Aug 2024 10:08:21 +0200 Subject: [PATCH 22/67] Add try-except block to handle infeasible problem at certain inactive parameter setting --- baybe/recommenders/pure/bayesian/botorch.py | 32 ++++++++++++--------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 2b103cce3..653744e11 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -230,20 +230,24 @@ def append_recommendation_for_inactive_parameters_setting( subspace_continuous_without_cardinality_constraints = ( subspace_continuous._remove_cardinality_constraints(inactive_parameters) ) - # Optimize the acquisition function - ( - points_i, - acqf_values_i, - ) = self._recommend_continuous_without_cardinality_constraints( - subspace_continuous_without_cardinality_constraints, - batch_size, - ) - # Append recommendation list and acquisition function values - points_all.append(points_i.unsqueeze(0)) - acqf_values_all.append(acqf_values_i.unsqueeze(0)) - - # TODO: For certain setting of inactive parameters, the resulting problem may - # be infeasible. Add "try" section to handle it. + try: + # Optimize the acquisition function + ( + points_i, + acqf_values_i, + ) = self._recommend_continuous_without_cardinality_constraints( + subspace_continuous_without_cardinality_constraints, + batch_size, + ) + # Append recommendation list and acquisition function values + points_all.append(points_i.unsqueeze(0)) + acqf_values_all.append(acqf_values_i.unsqueeze(0)) + + # The optimization problem may be infeasible for certain inactive + # parameters. The optimize_acqf raises a ValueError when the optimization + # problem is infeasible. + except ValueError: + pass # Below we start recommendation if ( From 120d71782e933028deef31827ca41f7f1a132324 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 26 Aug 2024 10:13:55 +0200 Subject: [PATCH 23/67] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4de24b099..ef817bc64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Utilities `inactive_parameter_combinations` and`n_inactive_parameter_combinations` in both `ContinuousCardinalityConstraint`and `SubspaceContinuous` - Attribute `n_threshold_inactive_parameters_generator` added to `BotorchRecommender` +- Class `_FixedNumericalContinuousParameter` ### Fixed - `CategoricalParameter` and `TaskParameter` no longer incorrectly coerce a single From e6248b59d1bc654704011d9b9dac9a3b77887a43 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 26 Aug 2024 11:03:58 +0200 Subject: [PATCH 24/67] Fix type hint --- baybe/constraints/continuous.py | 4 ++-- baybe/recommenders/pure/bayesian/botorch.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index c50193910..343cea239 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -1,7 +1,7 @@ """Continuous constraints.""" import math -from collections.abc import Iterator +from collections.abc import Iterable, Iterator from itertools import combinations from math import comb @@ -57,7 +57,7 @@ def n_inactive_parameter_combinations(self) -> int: for n_inactive_parameters in self._inactive_set_sizes() ) - def _inactive_set_sizes(self) -> Iterator[int]: + def _inactive_set_sizes(self) -> Iterable[int]: """Iterate over all possible sizes of inactive parameter sets.""" return range( len(self.parameters) - self.max_cardinality, diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 653744e11..bd2f1c8af 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -3,6 +3,7 @@ from __future__ import annotations import math +from collections.abc import Collection from typing import TYPE_CHECKING, Any, ClassVar import pandas as pd @@ -218,7 +219,7 @@ def _recommend_continuous_with_cardinality_constraints( points_all: list[Tensor] = [] def append_recommendation_for_inactive_parameters_setting( - inactive_parameters: tuple[str, ...], + inactive_parameters: Collection[str], ): """Append the recommendation for each inactive parameter configuration. From e0508b902d8ecb2508a43c054001eb9d73936876 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 26 Aug 2024 12:21:42 +0200 Subject: [PATCH 25/67] Fix test by repacing match text --- tests/test_searchspace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_searchspace.py b/tests/test_searchspace.py index 73359943f..eb19f15bb 100644 --- a/tests/test_searchspace.py +++ b/tests/test_searchspace.py @@ -290,7 +290,7 @@ def test_cardinality_constraint_with_invalid_parameter_bounds(): NumericalContinuousParameter("c1", (0, 1)), NumericalContinuousParameter("c2", (1, 2)), ) - with pytest.raises(ValueError, match="must cover zero"): + with pytest.raises(ValueError, match="must include zero"): SubspaceContinuous( parameters=parameters, constraints_nonlin=( From 04d89d10522c4e90af412f095ce8691d042001f9 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 15 Oct 2024 11:28:38 +0200 Subject: [PATCH 26/67] Refine docstrings --- baybe/constraints/continuous.py | 4 ++-- baybe/searchspace/continuous.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index 343cea239..70a3aaf7a 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -58,14 +58,14 @@ def n_inactive_parameter_combinations(self) -> int: ) def _inactive_set_sizes(self) -> Iterable[int]: - """Iterate over all possible sizes of inactive parameter sets.""" + """Get all possible sizes of inactive parameter sets.""" return range( len(self.parameters) - self.max_cardinality, len(self.parameters) - self.min_cardinality + 1, ) def inactive_parameter_combinations(self) -> Iterator[frozenset[str]]: - """Iterate over all possible combinations of inactive parameters.""" + """Get an iterator over all possible combinations of inactive parameters.""" for n_inactive_parameters in self._inactive_set_sizes(): yield from combinations(self.parameters, n_inactive_parameters) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 3c0113892..ebc86ca21 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -122,7 +122,7 @@ def n_inactive_parameter_combinations(self) -> int: ) def inactive_parameter_combinations(self) -> Iterable[frozenset[str]]: - """Iterate over all possible combinations of inactive parameters.""" + """Get an iterator over all possible combinations of inactive parameters.""" for combination in product( *[ con.inactive_parameter_combinations() From 102ef07907bafbd2cd0d555113e82aa187116ead Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 15 Oct 2024 11:28:54 +0200 Subject: [PATCH 27/67] Fix method return type --- baybe/constraints/continuous.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index 70a3aaf7a..aca1c907e 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -1,7 +1,7 @@ """Continuous constraints.""" import math -from collections.abc import Iterable, Iterator +from collections.abc import Iterator from itertools import combinations from math import comb @@ -57,7 +57,7 @@ def n_inactive_parameter_combinations(self) -> int: for n_inactive_parameters in self._inactive_set_sizes() ) - def _inactive_set_sizes(self) -> Iterable[int]: + def _inactive_set_sizes(self) -> range: """Get all possible sizes of inactive parameter sets.""" return range( len(self.parameters) - self.max_cardinality, From 3d72f04f7b4aceabb17839e5c53c05561e5a51f4 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 25 Oct 2024 13:32:02 +0200 Subject: [PATCH 28/67] Fix capitalization in exception group --- baybe/constraints/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/constraints/validation.py b/baybe/constraints/validation.py index 17a2489a9..f1dcc0986 100644 --- a/baybe/constraints/validation.py +++ b/baybe/constraints/validation.py @@ -149,4 +149,4 @@ def validate_cardinality_constraint_parameter_bounds( if exceptions: if len(exceptions) == 1: raise exceptions[0] - raise ExceptionGroup("invalid parameter bounds", exceptions) + raise ExceptionGroup("Invalid parameter bounds", exceptions) From ed8054ba9c575f91cd7d9399ebfe701edd571d85 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 25 Oct 2024 13:49:22 +0200 Subject: [PATCH 29/67] Add explicit error handling to validator --- baybe/constraints/validation.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/baybe/constraints/validation.py b/baybe/constraints/validation.py index f1dcc0986..8c80a172f 100644 --- a/baybe/constraints/validation.py +++ b/baybe/constraints/validation.py @@ -133,8 +133,13 @@ def validate_cardinality_constraint_parameter_bounds( """ exceptions = [] for name in constraint.parameters: - # We implicitly assume that the corresponding parameter exists - parameter = next(p for p in parameters if p.name == name) + try: + parameter = next(p for p in parameters if p.name == name) + except StopIteration as ex: + raise ValueError( + f"The parameter '{name}' referenced by the constraint is not contained " + f"in the given collection of parameters." + ) from ex if not parameter.is_in_range(0.0): exceptions.append( From 756bb09d3cdd8e1fb45480673c082695a9d5c15d Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 25 Oct 2024 14:19:19 +0200 Subject: [PATCH 30/67] Clean up cardinality constraint helper property --- baybe/searchspace/continuous.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 0d23e859d..1c933854c 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -300,13 +300,10 @@ def comp_rep_columns(self) -> tuple[str, ...]: return tuple(chain.from_iterable(p.comp_rep_columns for p in self.parameters)) @property - def param_names_in_cardinality_constraint(self) -> tuple[str, ...]: - """Return list of parameter names involved in cardinality constraints.""" - # TODO: Is this property really needed? If so, apply naming conventions. - params_per_cardinatliy_constraint = [ - c.parameters for c in self.constraints_cardinality - ] - return tuple(chain(*params_per_cardinatliy_constraint)) + def parameter_names_in_cardinality_constraints(self) -> tuple[str, ...]: + """The names of all parameters affected by cardinality constraints.""" + names_per_constraint = (c.parameters for c in self.constraints_cardinality) + return tuple(chain(*names_per_constraint)) @property def comp_rep_bounds(self) -> pd.DataFrame: @@ -394,7 +391,7 @@ def ensure_active_parameters( # Active parameters: parameters involved in cardinality constraints active_parameter_names = set( - self.param_names_in_cardinality_constraint + self.parameter_names_in_cardinality_constraints ).difference(set(inactive_parameter_names)) active_parameters_guaranteed = ensure_active_parameters( From b0c422e6dc1a87373ba907467d1d5189cae4f3d6 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 25 Oct 2024 15:26:37 +0200 Subject: [PATCH 31/67] Refactor parameter activation logic --- baybe/parameters/utils.py | 50 +++++++++++++++++++++++++++++++ baybe/searchspace/continuous.py | 53 +++++++-------------------------- 2 files changed, 61 insertions(+), 42 deletions(-) diff --git a/baybe/parameters/utils.py b/baybe/parameters/utils.py index ec33ce455..31d4f1d43 100644 --- a/baybe/parameters/utils.py +++ b/baybe/parameters/utils.py @@ -4,8 +4,10 @@ from typing import Any, TypeVar import pandas as pd +from attrs import evolve from baybe.parameters.base import Parameter +from baybe.parameters.numerical import NumericalContinuousParameter _TParameter = TypeVar("_TParameter", bound=Parameter) @@ -87,3 +89,51 @@ def get_parameters_from_dataframe( def sort_parameters(parameters: Collection[Parameter]) -> tuple[Parameter, ...]: """Sort parameters alphabetically by their names.""" return tuple(sorted(parameters, key=lambda p: p.name)) + + +def activate_parameter( + parameter: NumericalContinuousParameter, threshold: float +) -> NumericalContinuousParameter: + """Activates a given parameter by moving its bounds away from zero. + + Important: + Parameters whose ranges include zero but whose bounds do not overlap with the + inactive range (i.e. parameters that contain the value zero far from their + boundary values) remain unchanged, because the corresponding activated parameter + would no longer have a continuous value range. + + Args: + parameter: The parameter to be activated. + threshold: The threshold for a parameter to be considered active. + + Returns: + A copy of the parameter with adjusted bounds. + + Raises: + ValueError: If the parameter cannot be activated since both its bounds are + in the inactive range. + """ + lower = parameter.bounds.lower + upper = parameter.bounds.upper + + def in_inactive_range(x: float, /) -> bool: + return -threshold <= x <= threshold + + # Upper bound is in inactive range + if lower < -threshold and in_inactive_range(upper): + return evolve(parameter, bounds=(lower, -threshold)) + + # Lower bound is in inactive range + if upper > threshold and in_inactive_range(lower): + return evolve(parameter, bounds=(threshold, upper)) + + # Both bounds in inactive range + if in_inactive_range(lower) and in_inactive_range(upper): + raise ValueError( + f"Parameter '{parameter.name}' cannot be set active since its " + f"bounds {parameter.bounds.to_tuple()} are entirely contained in the " + f"inactive range [-{threshold}, {threshold}]." + ) + + # Both bounds separated from inactive range + return parameter diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 1c933854c..c569c75ec 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -27,14 +27,17 @@ from baybe.parameters import NumericalContinuousParameter from baybe.parameters.base import ContinuousParameter from baybe.parameters.numerical import _FixedNumericalContinuousParameter -from baybe.parameters.utils import get_parameters_from_dataframe, sort_parameters +from baybe.parameters.utils import ( + activate_parameter, + get_parameters_from_dataframe, + sort_parameters, +) from baybe.searchspace.validation import ( validate_parameter_names, ) from baybe.serialization import SerialMixin, converter, select_constructor_hook from baybe.utils.basic import to_tuple from baybe.utils.dataframe import get_transform_objects, pretty_print_df -from baybe.utils.interval import Interval from baybe.utils.plotting import to_string if TYPE_CHECKING: @@ -352,51 +355,17 @@ def _remove_cardinality_constraints( # TODO: Shouldn't the x != 0 constraints be applied on the level of the # individual constrains, also taking into account whether min_cardinality > 0? - # TODO: Merge _drop_parameters() to this method. - def ensure_active_parameters( - parameters: tuple[NumericalContinuousParameter, ...], - active_parameter_names: Collection[str], - ) -> tuple[NumericalContinuousParameter, ...]: - """Ensure certain parameters being non-zero by adjusting bounds. - - Args: - parameters: A list of parameters. - active_parameter_names: A list of parameters names that must be - non-zero. - - Returns: - A list of parameters with certain parameters guaranteed to be non-zero. - """ - parameters_active_guaranteed = [] - for p in parameters: - if p.name not in active_parameter_names: - bounds = p.bounds - # Active parameter x with bounds [..., 0], ensure x != 0 - elif p.bounds.upper == 0.0: - bounds = Interval(lower=p.bounds.lower, upper=inactivity_threshold) - # Active parameter x with bounds [0, ...], ensure x != 0 - elif p.bounds.lower == 0.0: - bounds = Interval(lower=inactivity_threshold, upper=p.bounds.upper) - # TODO: For active parameter x in [..., 0, ...], ensure x != 0 is not - # done. - else: - bounds = p.bounds - parameters_active_guaranteed.append( - NumericalContinuousParameter( - name=p.name, - bounds=bounds, - ) - ) - return tuple(parameters_active_guaranteed) - # Active parameters: parameters involved in cardinality constraints active_parameter_names = set( self.parameter_names_in_cardinality_constraints ).difference(set(inactive_parameter_names)) - active_parameters_guaranteed = ensure_active_parameters( - self.parameters, active_parameter_names - ) + active_parameters_guaranteed = [ + activate_parameter(p, inactivity_threshold) + if p.name in active_parameter_names + else p + for p in self.parameters + ] return SubspaceContinuous( parameters=tuple( From ddabcf5324f763e1c788745592a15d5c15f77dbb Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 25 Oct 2024 16:12:58 +0200 Subject: [PATCH 32/67] Refactor method for enforcing cardinality constraints --- baybe/recommenders/pure/bayesian/botorch.py | 4 +- baybe/searchspace/continuous.py | 65 +++++++++------------ 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index d647a2ed7..192e234ae 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -252,7 +252,9 @@ def append_recommendation_for_inactive_parameters_setting( # Create a new subspace by ensuring all active parameters being # non-zeros. subspace_continuous_without_cardinality_constraints = ( - subspace_continuous._remove_cardinality_constraints(inactive_parameters) + subspace_continuous._enforce_cardinality_constraints_via_assignment( + inactive_parameters + ) ) try: # Optimize the acquisition function diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index c569c75ec..15398dc58 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -4,7 +4,6 @@ import gc import math -import sys import warnings from collections.abc import Collection, Iterable, Sequence from itertools import chain, product @@ -12,7 +11,7 @@ import numpy as np import pandas as pd -from attrs import define, field, fields +from attrs import define, evolve, field, fields from typing_extensions import override from baybe.constraints import ( @@ -335,50 +334,44 @@ def _drop_parameters(self, parameter_names: Collection[str]) -> SubspaceContinuo ], ) - def _remove_cardinality_constraints( + def _enforce_cardinality_constraints_via_assignment( self, inactive_parameter_names: Collection[str], - inactivity_threshold: float = sys.float_info.min, + threshold: float = 1e-8, ) -> SubspaceContinuous: - """Create a copy of the subspace with cardinality constraints removed. + """Create a copy of the subspace with fixed inactive parameters. + + The returned subspace requires no cardinality constraints since – for the + given separation of parameter into active an inactive sets – the + cardinality constraints are implemented by fixing the inactive parameters to + zero and bounding the active parameters away from zero. Args: - inactive_parameter_names: A list of inactive parameters. - inactivity_threshold: Threshold for checking whether a value is zero. + inactive_parameter_names: The names of the parameter to be inactivated. + threshold: The threshold for a parameter to be considered active. Returns: - A new subspace object without cardinality constraints. + A new subspace with fixed inactive parameters and no cardinality + constraints. """ - # TODO: Revise function name/docstring and arguments. In particular: why - # does the function expect the inactive parameters instead of the active ones? - - # TODO: Shouldn't the x != 0 constraints be applied on the level of the - # individual constrains, also taking into account whether min_cardinality > 0? - - # Active parameters: parameters involved in cardinality constraints + # Extract active parameters involved in cardinality constraints active_parameter_names = set( self.parameter_names_in_cardinality_constraints - ).difference(set(inactive_parameter_names)) - - active_parameters_guaranteed = [ - activate_parameter(p, inactivity_threshold) - if p.name in active_parameter_names - else p - for p in self.parameters - ] - - return SubspaceContinuous( - parameters=tuple( - [ - _FixedNumericalContinuousParameter(name=p.name, value=0.0) - if p.name in inactive_parameter_names - else p - for p in active_parameters_guaranteed - ] - ), - constraints_lin_eq=self.constraints_lin_eq, - constraints_lin_ineq=self.constraints_lin_ineq, - ) + ).difference(inactive_parameter_names) + + # Adjust parameters depending on their in-/activity assignment + adjusted_parameters: list[ContinuousParameter] = [] + p_adjusted: ContinuousParameter + for p in self.parameters: + if p.name in inactive_parameter_names: + p_adjusted = _FixedNumericalContinuousParameter(name=p.name, value=0.0) + elif p.name in active_parameter_names: + p_adjusted = activate_parameter(p, threshold) + else: + p_adjusted = p + adjusted_parameters.append(p_adjusted) + + return evolve(self, parameters=adjusted_parameters, constraints_nonlin=()) def transform( self, From b8d24d7dfa1aed15ceb5b16a287ac3bce5e1ee7e Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Mon, 28 Oct 2024 18:19:12 +0100 Subject: [PATCH 33/67] Fix exception types and messages --- baybe/recommenders/pure/bayesian/botorch.py | 24 +++++++++------------ 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 192e234ae..17d10303a 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -225,17 +225,15 @@ def _recommend_continuous_with_cardinality_constraints( The acquisition values. Raises: - RuntimeError: If the continuous search space has no cardinality constraint. + ValueError: If the continuous search space has no cardinality constraints. """ import torch if not subspace_continuous.constraints_cardinality: - raise RuntimeError( - f"This method expects a subspace object with constraints of type " - f"{ContinuousCardinalityConstraint.__name__}. For a subspace object " - f"without constraints of type" - f" {ContinuousCardinalityConstraint.__name__}, use method" - f"{self._recommend_continuous_without_cardinality_constraints.__name__}." # noqa + raise ValueError( + f"'{self._recommend_continuous_with_cardinality_constraints.__name__}' " + f"expects a subspace with constraints of type " + f"'{ContinuousCardinalityConstraint.__name__}'. " ) acqf_values_all: list[Tensor] = [] @@ -321,18 +319,16 @@ def _recommend_continuous_without_cardinality_constraints( The acquisition values. Raises: - RuntimeError: If the continuous search space has any cardinality - constraints. + ValueError: If the continuous search space has cardinality constraints. """ import torch from botorch.optim import optimize_acqf if subspace_continuous.constraints_cardinality: - raise RuntimeError( - f"This method expects only subspace object without constraints of type " - f"{ContinuousCardinalityConstraint.__name__}. For a subspace object " - f"with constraints of type {ContinuousCardinalityConstraint.__name__}, " - f"try method {self._recommend_continuous.__name__}." + raise ValueError( + f"'{self._recommend_continuous_without_cardinality_constraints.__name__}' " # noqa: E501 + f"expects a subspace without constraints of type " + f"'{ContinuousCardinalityConstraint.__name__}'. " ) fixed_parameters = { From 492bb3b8f2013ba1a9859a9acffa932ee8a760a5 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Mon, 28 Oct 2024 18:20:58 +0100 Subject: [PATCH 34/67] Apply minor formatting and documentation fixes --- baybe/recommenders/pure/bayesian/botorch.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 17d10303a..6e3d17ad8 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -186,7 +186,6 @@ def _recommend_continuous( Returns: A dataframe containing the recommendations as individual rows. """ - # For batch size > 1, this optimizer needs a MC acquisition function if batch_size > 1 and not self.acquisition_function.is_mc: raise IncompatibleAcquisitionFunctionError( f"The '{self.__class__.__name__}' only works with Monte Carlo " @@ -195,18 +194,14 @@ def _recommend_continuous( if len(subspace_continuous.constraints_cardinality): points, _ = self._recommend_continuous_with_cardinality_constraints( - subspace_continuous, - batch_size, + subspace_continuous, batch_size ) else: points, _ = self._recommend_continuous_without_cardinality_constraints( - subspace_continuous, - batch_size, + subspace_continuous, batch_size ) - # Return optimized points as dataframe - rec = pd.DataFrame(points, columns=subspace_continuous.parameter_names) - return rec + return pd.DataFrame(points, columns=subspace_continuous.parameter_names) def _recommend_continuous_with_cardinality_constraints( self, @@ -221,8 +216,7 @@ def _recommend_continuous_with_cardinality_constraints( batch_size: The size of the recommendation batch. Returns: - The recommendations. - The acquisition values. + The recommendations and corresponding acquisition values. Raises: ValueError: If the continuous search space has no cardinality constraints. @@ -315,8 +309,7 @@ def _recommend_continuous_without_cardinality_constraints( batch_size: The size of the recommendation batch. Returns: - The recommendations. - The acquisition values. + The recommendations and corresponding acquisition values. Raises: ValueError: If the continuous search space has cardinality constraints. @@ -343,7 +336,9 @@ def _recommend_continuous_without_cardinality_constraints( q=batch_size, num_restarts=self.n_restarts, raw_samples=self.n_raw_samples, + # TODO: https://github.com/pytorch/botorch/issues/2042 fixed_features=fixed_parameters or None, + # TODO: https://github.com/pytorch/botorch/issues/2042 equality_constraints=[ c.to_botorch(subspace_continuous.parameters) for c in subspace_continuous.constraints_lin_eq @@ -355,7 +350,6 @@ def _recommend_continuous_without_cardinality_constraints( for c in subspace_continuous.constraints_lin_ineq ] or None, - # TODO: https://github.com/pytorch/botorch/issues/2042 sequential=self.sequential_continuous, ) return points, acqf_values From 584d8d91d8ffb0985cd7e6287314f813850b025d Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Mon, 28 Oct 2024 18:21:22 +0100 Subject: [PATCH 35/67] Remove unnecessary `len` call --- baybe/recommenders/pure/bayesian/botorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 6e3d17ad8..ab895b2ec 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -192,7 +192,7 @@ def _recommend_continuous( f"acquisition functions for batch sizes > 1." ) - if len(subspace_continuous.constraints_cardinality): + if subspace_continuous.constraints_cardinality: points, _ = self._recommend_continuous_with_cardinality_constraints( subspace_continuous, batch_size ) From 5744e31812c6dd6f5a27822321fb46f004dbb8f1 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Mon, 28 Oct 2024 18:22:06 +0100 Subject: [PATCH 36/67] Remove unnecessary function layer --- baybe/recommenders/pure/bayesian/botorch.py | 52 +++++++-------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index ab895b2ec..3edc732ff 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -4,7 +4,7 @@ import gc import math -from collections.abc import Collection +from collections.abc import Collection, Iterable from typing import TYPE_CHECKING, Any, ClassVar import pandas as pd @@ -230,19 +230,25 @@ def _recommend_continuous_with_cardinality_constraints( f"'{ContinuousCardinalityConstraint.__name__}'. " ) + # Determine search scope based on number of inactive parameter combinations + exhaustive_search = ( + subspace_continuous.n_inactive_parameter_combinations + <= self.n_threshold_inactive_parameters_generator + ) + iterator: Iterable[Collection[str]] + if exhaustive_search: + # If manageable, evaluate all combinations of inactive parameters + iterator = subspace_continuous.inactive_parameter_combinations() + else: + # Otherwise, draw a random subset of inactive parameter combinations + iterator = subspace_continuous._sample_inactive_parameters( + self.n_threshold_inactive_parameters_generator + ) + acqf_values_all: list[Tensor] = [] points_all: list[Tensor] = [] - def append_recommendation_for_inactive_parameters_setting( - inactive_parameters: Collection[str], - ): - """Append the recommendation for each inactive parameter configuration. - - Args: - inactive_parameters: A list of inactive parameters. - """ - # Create a new subspace by ensuring all active parameters being - # non-zeros. + for inactive_parameters in iterator: subspace_continuous_without_cardinality_constraints = ( subspace_continuous._enforce_cardinality_constraints_via_assignment( inactive_parameters @@ -267,30 +273,6 @@ def append_recommendation_for_inactive_parameters_setting( except ValueError: pass - # Below we start recommendation - if ( - subspace_continuous.n_inactive_parameter_combinations - > self.n_threshold_inactive_parameters_generator - ): - # When the combinatorial list is too large, randomly set some parameters - # inactive. - for _ in range(self.n_threshold_inactive_parameters_generator): - inactive_params_sample = tuple( - subspace_continuous._sample_inactive_parameters(1)[0] - ) - append_recommendation_for_inactive_parameters_setting( - inactive_params_sample - ) - else: - # When the combinatorial list is not too large, iterate the combinatorial - # list of all possible inactive parameters. - for ( - inactive_params_generator - ) in subspace_continuous.inactive_parameter_combinations(): - append_recommendation_for_inactive_parameters_setting( - inactive_params_generator - ) - # Find the best option points = torch.cat(points_all)[torch.argmax(torch.cat(acqf_values_all)), :] acqf_values = torch.max(torch.cat(acqf_values_all)) From e4afdccc852db1f6eb2fbc70875f2d3ebc4574e1 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Mon, 28 Oct 2024 18:47:55 +0100 Subject: [PATCH 37/67] Extract loop into general function optimizing subspaces --- baybe/recommenders/pure/bayesian/botorch.py | 67 ++++++++++++--------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 3edc732ff..73229adc4 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -221,8 +221,6 @@ def _recommend_continuous_with_cardinality_constraints( Raises: ValueError: If the continuous search space has no cardinality constraints. """ - import torch - if not subspace_continuous.constraints_cardinality: raise ValueError( f"'{self._recommend_continuous_with_cardinality_constraints.__name__}' " @@ -245,38 +243,19 @@ def _recommend_continuous_with_cardinality_constraints( self.n_threshold_inactive_parameters_generator ) - acqf_values_all: list[Tensor] = [] - points_all: list[Tensor] = [] - - for inactive_parameters in iterator: - subspace_continuous_without_cardinality_constraints = ( + # Create iterable of subspaces to be optimized + subspaces = ( + ( subspace_continuous._enforce_cardinality_constraints_via_assignment( inactive_parameters ) ) - try: - # Optimize the acquisition function - ( - points_i, - acqf_values_i, - ) = self._recommend_continuous_without_cardinality_constraints( - subspace_continuous_without_cardinality_constraints, - batch_size, - ) - # Append recommendation list and acquisition function values - points_all.append(points_i.unsqueeze(0)) - acqf_values_all.append(acqf_values_i.unsqueeze(0)) - - # The optimization problem may be infeasible for certain inactive - # parameters. The optimize_acqf raises a ValueError when the optimization - # problem is infeasible. - except ValueError: - pass + for inactive_parameters in iterator + ) - # Find the best option - points = torch.cat(points_all)[torch.argmax(torch.cat(acqf_values_all)), :] - acqf_values = torch.max(torch.cat(acqf_values_all)) - return points, acqf_values + return self._optimize_subspaces_without_cardinality_constraints( + subspaces, batch_size + ) def _recommend_continuous_without_cardinality_constraints( self, @@ -467,6 +446,36 @@ def __str__(self) -> str: ] return to_string(self.__class__.__name__, *fields) + def _optimize_subspaces_without_cardinality_constraints( + self, subspaces: Iterable[SubspaceContinuous], batch_size: int + ) -> tuple[Tensor, Tensor]: + import torch + + acqf_values_all: list[Tensor] = [] + points_all: list[Tensor] = [] + + for subspace in subspaces: + try: + # Optimize the acquisition function + f = self._recommend_continuous_without_cardinality_constraints + points_i, acqf_values_i = f(subspace, batch_size) + + # Append recommendation list and acquisition function values + points_all.append(points_i.unsqueeze(0)) + acqf_values_all.append(acqf_values_i.unsqueeze(0)) + + # # The optimization problem may be infeasible for certain inactive + # # parameters. The optimize_acqf raises a ValueError when the optimization + # # problem is infeasible. + except ValueError: + pass + + # Find the best option + points = torch.cat(points_all)[torch.argmax(torch.cat(acqf_values_all)), :] + acqf_values = torch.max(torch.cat(acqf_values_all)) + + return points, acqf_values + # Collect leftover original slotted classes processed by `attrs.define` gc.collect() From 788a5ba14be7a2439f3bb08cdcf8f96d9253f938 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 29 Oct 2024 08:51:27 +0100 Subject: [PATCH 38/67] Simplify multi-space optimization logic --- baybe/recommenders/pure/bayesian/botorch.py | 29 +++++++++++++-------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 73229adc4..fe931888a 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -7,6 +7,7 @@ from collections.abc import Collection, Iterable from typing import TYPE_CHECKING, Any, ClassVar +import numpy as np import pandas as pd from attrs import define, field from attrs.converters import optional as optional_c @@ -449,8 +450,15 @@ def __str__(self) -> str: def _optimize_subspaces_without_cardinality_constraints( self, subspaces: Iterable[SubspaceContinuous], batch_size: int ) -> tuple[Tensor, Tensor]: - import torch + """Find the optimum candidates from multiple subspaces. + + Args: + subspaces: The subspaces to consider for the optimization. + batch_size: The number of points to be recommended. + Returns: + The batch of candidates and the corresponding acquisition value. + """ acqf_values_all: list[Tensor] = [] points_all: list[Tensor] = [] @@ -460,21 +468,20 @@ def _optimize_subspaces_without_cardinality_constraints( f = self._recommend_continuous_without_cardinality_constraints points_i, acqf_values_i = f(subspace, batch_size) - # Append recommendation list and acquisition function values - points_all.append(points_i.unsqueeze(0)) - acqf_values_all.append(acqf_values_i.unsqueeze(0)) + # Append optimization results + points_all.append(points_i) + acqf_values_all.append(acqf_values_i) - # # The optimization problem may be infeasible for certain inactive - # # parameters. The optimize_acqf raises a ValueError when the optimization - # # problem is infeasible. + # The optimization problem may be infeasible in certain subspaces except ValueError: pass - # Find the best option - points = torch.cat(points_all)[torch.argmax(torch.cat(acqf_values_all)), :] - acqf_values = torch.max(torch.cat(acqf_values_all)) + # Find the best option f + best_idx = np.argmax(acqf_values_all) + points = points_all[best_idx] + acqf_value = acqf_values_all[best_idx] - return points, acqf_values + return points, acqf_value # Collect leftover original slotted classes processed by `attrs.define` From 046a8e2ec45baae8dfd55da655f348d21ccd197f Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 29 Oct 2024 18:12:34 +0100 Subject: [PATCH 39/67] Remove restriction on subspaces without cardinality constraints --- baybe/recommenders/pure/bayesian/botorch.py | 29 ++++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index fe931888a..4c13da97d 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -193,17 +193,23 @@ def _recommend_continuous( f"acquisition functions for batch sizes > 1." ) + points, _ = self._recommend_continuous_torch(subspace_continuous, batch_size) + + return pd.DataFrame(points, columns=subspace_continuous.parameter_names) + + def _recommend_continuous_torch( + self, subspace_continuous: SubspaceContinuous, batch_size: int + ) -> tuple[Tensor, Tensor]: + """Dispatcher selecting continuous optimization routine.""" if subspace_continuous.constraints_cardinality: - points, _ = self._recommend_continuous_with_cardinality_constraints( + return self._recommend_continuous_with_cardinality_constraints( subspace_continuous, batch_size ) else: - points, _ = self._recommend_continuous_without_cardinality_constraints( + return self._recommend_continuous_without_cardinality_constraints( subspace_continuous, batch_size ) - return pd.DataFrame(points, columns=subspace_continuous.parameter_names) - def _recommend_continuous_with_cardinality_constraints( self, subspace_continuous: SubspaceContinuous, @@ -254,9 +260,7 @@ def _recommend_continuous_with_cardinality_constraints( for inactive_parameters in iterator ) - return self._optimize_subspaces_without_cardinality_constraints( - subspaces, batch_size - ) + return self._optimize_continuous_subspaces(subspaces, batch_size) def _recommend_continuous_without_cardinality_constraints( self, @@ -447,10 +451,10 @@ def __str__(self) -> str: ] return to_string(self.__class__.__name__, *fields) - def _optimize_subspaces_without_cardinality_constraints( + def _optimize_continuous_subspaces( self, subspaces: Iterable[SubspaceContinuous], batch_size: int ) -> tuple[Tensor, Tensor]: - """Find the optimum candidates from multiple subspaces. + """Find the optimum candidates from multiple continuous subspaces. Args: subspaces: The subspaces to consider for the optimization. @@ -465,12 +469,11 @@ def _optimize_subspaces_without_cardinality_constraints( for subspace in subspaces: try: # Optimize the acquisition function - f = self._recommend_continuous_without_cardinality_constraints - points_i, acqf_values_i = f(subspace, batch_size) + p, acqf = self._recommend_continuous_torch(subspace, batch_size) # Append optimization results - points_all.append(points_i) - acqf_values_all.append(acqf_values_i) + points_all.append(p) + acqf_values_all.append(acqf) # The optimization problem may be infeasible in certain subspaces except ValueError: From 7aac4d318aee3d32b21ef4966998e1f1dfb37ec0 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 29 Oct 2024 18:19:44 +0100 Subject: [PATCH 40/67] Move __str__ method to top --- baybe/recommenders/pure/bayesian/botorch.py | 36 ++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 4c13da97d..d282047a2 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -109,6 +109,24 @@ def _validate_percentage( # noqa: DOC101, DOC103 f"Hybrid sampling percentage needs to be between 0 and 1 but is {value}" ) + @override + def __str__(self) -> str: + fields = [ + to_string("Surrogate", self._surrogate_model), + to_string( + "Acquisition function", self.acquisition_function, single_line=True + ), + to_string("Compatibility", self.compatibility, single_line=True), + to_string( + "Sequential continuous", self.sequential_continuous, single_line=True + ), + to_string("Hybrid sampler", self.hybrid_sampler, single_line=True), + to_string( + "Sampling percentage", self.sampling_percentage, single_line=True + ), + ] + return to_string(self.__class__.__name__, *fields) + @override def _recommend_discrete( self, @@ -433,24 +451,6 @@ def _recommend_hybrid( return rec_exp - @override - def __str__(self) -> str: - fields = [ - to_string("Surrogate", self._surrogate_model), - to_string( - "Acquisition function", self.acquisition_function, single_line=True - ), - to_string("Compatibility", self.compatibility, single_line=True), - to_string( - "Sequential continuous", self.sequential_continuous, single_line=True - ), - to_string("Hybrid sampler", self.hybrid_sampler, single_line=True), - to_string( - "Sampling percentage", self.sampling_percentage, single_line=True - ), - ] - return to_string(self.__class__.__name__, *fields) - def _optimize_continuous_subspaces( self, subspaces: Iterable[SubspaceContinuous], batch_size: int ) -> tuple[Tensor, Tensor]: From 3c829d048991abc70380e0859da0974f792c5bf7 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 29 Oct 2024 20:44:47 +0100 Subject: [PATCH 41/67] Rename threshold attribute --- baybe/recommenders/pure/bayesian/botorch.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index d282047a2..68f4b2289 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -85,15 +85,12 @@ class BotorchRecommender(BayesianRecommender): optimization. **Does not affect purely discrete optimization**. """ - n_threshold_inactive_parameters_generator: int = field( - default=10, validator=[instance_of(int), ge(1)] - ) - """Threshold used for checking which inactive parameters generator is used when - cardinality constraints are present. When the size of the combinatorial list of - all possible inactive parameters is larger than the threshold, a fixed number of - randomly generated inactive parameter configurations are used and the best - optimum among them is recommended; Otherwise, we find the best one by iterating the - combinatorial list of all possible inactive parameters """ + max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) + """Threshold defining the maximum number of subspaces to consider for exhaustive + search in the presence of cardinality constraints. If the combinatorial number of + groupings into active and inactive parameters dictated by the constraints is greater + than this number, that many randomly selected combinations are selected for + optimization.""" @sampling_percentage.validator def _validate_percentage( # noqa: DOC101, DOC103 @@ -256,7 +253,7 @@ def _recommend_continuous_with_cardinality_constraints( # Determine search scope based on number of inactive parameter combinations exhaustive_search = ( subspace_continuous.n_inactive_parameter_combinations - <= self.n_threshold_inactive_parameters_generator + <= self.max_n_subspaces ) iterator: Iterable[Collection[str]] if exhaustive_search: @@ -265,7 +262,7 @@ def _recommend_continuous_with_cardinality_constraints( else: # Otherwise, draw a random subset of inactive parameter combinations iterator = subspace_continuous._sample_inactive_parameters( - self.n_threshold_inactive_parameters_generator + self.max_n_subspaces ) # Create iterable of subspaces to be optimized From bc697c09e9010b74cf61df786e0a3f5ab7c232a5 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 30 Oct 2024 08:08:33 +0100 Subject: [PATCH 42/67] Add item to README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 91383dfd1..c11280842 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ Besides functionality to perform a typical recommend-measure loop, BayBE's highl - 🎭 Hybrid (mixed continuous and discrete) spaces - 🚀 Transfer learning: Mix data from multiple campaigns and accelerate optimization - 🎰 Bandit models: Efficiently find the best among many options in noisy environments (e.g. A/B Testing) +- 🔢 Cardinality constraints: Control the number of active factors in your design - 🌎 Distributed workflows: Run campaigns asynchronously with pending experiments - 🎓 Active learning: Perform smart data acquisition campaigns - ⚙️ Custom surrogate models: Enhance your predictions through mechanistic understanding From b9038b82c98a30caafb1b32f0a2ddea893ed2435 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 30 Oct 2024 08:27:45 +0100 Subject: [PATCH 43/67] Implement summary method --- baybe/parameters/numerical.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/baybe/parameters/numerical.py b/baybe/parameters/numerical.py index c1973eafa..439a80050 100644 --- a/baybe/parameters/numerical.py +++ b/baybe/parameters/numerical.py @@ -176,7 +176,11 @@ def comp_rep_columns(self) -> tuple[str, ...]: @override def summary(self) -> dict: - raise NotImplementedError() + return dict( + Name=self.name, + Type=self.__class__.__name__, + Value=self.value, + ) # Collect leftover original slotted classes processed by `attrs.define` From c2c8b99f066a10cceba635f02795348a328860dd Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 1 Nov 2024 10:35:41 +0100 Subject: [PATCH 44/67] Update CHANGELOG.md --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea32d7b7a..1d17deb88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,10 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `allow_missing` and `allow_extra` keyword arguments to `Objective.transform` - `ContinuousCardinalityConstraint` is now compatible with `BotorchRecommender` +- Attribute `max_n_subspaces` to `BotorchRecommender`, allowing to control + optimization behavior in the presence of multiple subspaces - Utilities `inactive_parameter_combinations` and`n_inactive_parameter_combinations` in both `ContinuousCardinalityConstraint`and `SubspaceContinuous` -- Attribute `n_threshold_inactive_parameters_generator` added to `BotorchRecommender` -- Class `_FixedNumericalContinuousParameter` ### Deprecations - Passing a dataframe via the `data` argument to `Objective.transform` is no longer From a721f21a60edb78a3e6b2fcccd24e6b1974f8455 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 1 Nov 2024 11:23:55 +0100 Subject: [PATCH 45/67] Fix tests --- .../test_cardinality_constraint_continuous.py | 59 ++++++++-------- .../test_constraints_continuous.py | 70 +++++++++++-------- tests/test_searchspace.py | 4 +- 3 files changed, 70 insertions(+), 63 deletions(-) diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index aaff41ccf..acf512663 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -14,45 +14,40 @@ from baybe.searchspace.core import SearchSpace, SubspaceContinuous -def _validate_samples( - samples: pd.DataFrame, - max_cardinality: int, +def _validate_cardinality_constrained_batch( + batch: pd.DataFrame, min_cardinality: int, + max_cardinality: int, batch_size: int, threshold: float = 0.0, ): - """Validate if cardinality-constrained samples fulfill the necessary conditions. - - Conditions to check: - * Cardinality is in requested range - * The batch contains right number of samples - * The samples are free of duplicates (except all zeros) + """Validate that a cardinality-constrained batch fulfills the necessary conditions. Args: - samples: Samples to check - max_cardinality: Maximum allowed cardinality - min_cardinality: Minimum required cardinality - batch_size: Requested batch size + batch: Batch to validate. + min_cardinality: Minimum required cardinality. + max_cardinality: Maximum allowed cardinality. + batch_size: Requested batch size. threshold: Threshold for checking whether a value is treated as zero. """ # Assert that cardinality constraint is fulfilled - if threshold == 0.0: - # When threshold is zero, abs(value) > threshold is treated as non-zero. - n_nonzero = len(samples.columns) - np.sum(samples.abs().le(threshold), axis=1) - else: - # When threshold is non-zero, abs(value) >= threshold is treated as non-zero. - n_nonzero = np.sum(samples.abs().ge(threshold), axis=1) - + n_nonzero = np.sum(~np.isclose(batch, 0.0, atol=threshold), axis=1) assert np.all(n_nonzero >= min_cardinality) and np.all(n_nonzero <= max_cardinality) # Assert that we obtain as many samples as requested - assert samples.shape[0] == batch_size + assert batch.shape[0] == batch_size - # If all rows are duplicates of the first row, they must all come from the case - # cardinality = 0 (all rows are zeros) - all_zero_rows = (samples == 0).all(axis=1) - duplicated_rows = samples.duplicated() - assert ~np.all(duplicated_rows[1:]) | np.all(all_zero_rows) + # Sanity check: If all recommendations in the batch are identical, something is + # fishy – unless the cardinality is 0, in which case the entire batch must contain + # zeros. Technically, the probability of getting such a degenerate batch + # is not zero, hence this is not a strict requirement. However, in earlier BoTorch + # versions, this simply happened due to a bug in their sampler: + # https://github.com/pytorch/botorch/issues/2351 + # We thus include this check as a safety net for catching regressions. If it + # turns out the check fails because we observe degenerate batches as actual + # recommendations, we need to invent something smarter. + if len(unique_row := batch.drop_duplicates()) == 1: + assert (unique_row.iloc[0] == 0.0).all() and (max_cardinality == 0) # Combinations of cardinalities to be tested @@ -88,8 +83,10 @@ def test_sampling_cardinality_constraint(cardinality_bounds: tuple[int, int]): subspace = SubspaceContinuous(parameters=parameters, constraints_nonlin=constraints) samples = subspace.sample_uniform(BATCH_SIZE) - # Assert that conditions listed in_validate_samples() are fulfilled - _validate_samples(samples, max_cardinality, min_cardinality, BATCH_SIZE) + # Assert that the constraint conditions hold + _validate_cardinality_constrained_batch( + samples, min_cardinality, max_cardinality, BATCH_SIZE + ) def test_polytope_sampling_with_cardinality_constraint(): @@ -136,9 +133,9 @@ def test_polytope_sampling_with_cardinality_constraint(): samples = searchspace.continuous.sample_uniform(BATCH_SIZE) - # Assert that conditions listed in_validate_samples() are fulfilled - _validate_samples( - samples[params_cardinality], MAX_CARDINALITY, MIN_CARDINALITY, BATCH_SIZE + # Assert that the constraint conditions hold + _validate_cardinality_constrained_batch( + samples[params_cardinality], MIN_CARDINALITY, MAX_CARDINALITY, BATCH_SIZE ) # Assert that linear equality constraint is fulfilled diff --git a/tests/constraints/test_constraints_continuous.py b/tests/constraints/test_constraints_continuous.py index 065af451c..4463cbec2 100644 --- a/tests/constraints/test_constraints_continuous.py +++ b/tests/constraints/test_constraints_continuous.py @@ -1,14 +1,22 @@ """Test for imposing continuous constraints.""" -import sys - import numpy as np +import pandas as pd import pytest from pytest import param from baybe.constraints import ContinuousLinearConstraint +from baybe.constraints.continuous import ContinuousCardinalityConstraint +from baybe.parameters.numerical import NumericalContinuousParameter +from baybe.recommenders.pure.bayesian.base import BayesianRecommender +from baybe.recommenders.pure.bayesian.botorch import BotorchRecommender +from baybe.recommenders.pure.nonpredictive.sampling import RandomRecommender +from baybe.searchspace.core import SearchSpace +from baybe.targets.numerical import NumericalTarget from tests.conftest import run_iterations -from tests.constraints.test_cardinality_constraint_continuous import _validate_samples +from tests.constraints.test_cardinality_constraint_continuous import ( + _validate_cardinality_constrained_batch, +) @pytest.mark.parametrize("parameter_names", [["Conti_finite1", "Conti_finite2"]]) @@ -71,35 +79,37 @@ def test_inequality3(campaign, n_iterations, batch_size): assert (1.0 * res["Conti_finite1"] + 3.0 * res["Conti_finite2"]).le(0.301).all() -@pytest.mark.slow -@pytest.mark.parametrize( - "parameter_names", [["Conti_finite1", "Conti_finite2", "Conti_finite3"]] -) -@pytest.mark.parametrize("constraint_names", [["ContiConstraint_5"]]) -@pytest.mark.parametrize("batch_size", [5], ids=["b5"]) -def test_cardinality_constraint(campaign, n_iterations, batch_size): - """Test cardinality constraint for both random recommender and botorch - recommender.""" # noqa +@pytest.mark.parametrize("recommender", [RandomRecommender(), BotorchRecommender()]) +def test_cardinality_constraint(recommender): + """Cardinality constraints are taken into account by the recommender.""" + MIN_CARDINALITY = 4 + MAX_CARDINALITY = 7 + BATCH_SIZE = 10 - MIN_CARDINALITY = 1 - MAX_CARDINALITY = 2 - run_iterations(campaign, n_iterations, batch_size, add_noise=False) - recommendations = campaign.measurements - - print(recommendations) - - # Assert that conditions listed in_validate_samples() are fulfilled - for i_batch in range(2): - _validate_samples( - recommendations.loc[ - 0 + i_batch * batch_size : (i_batch + 1) * batch_size - 1, - ["Conti_finite1", "Conti_finite2", "Conti_finite3"], - ], - max_cardinality=MAX_CARDINALITY, - min_cardinality=MIN_CARDINALITY, - batch_size=batch_size, - threshold=sys.float_info.min, + parameters = [NumericalContinuousParameter(str(i), (0, 1)) for i in range(10)] + constraints = [ + ContinuousCardinalityConstraint( + [p.name for p in parameters], MIN_CARDINALITY, MAX_CARDINALITY ) + ] + searchspace = SearchSpace.from_product(parameters, constraints) + + if isinstance(recommender, BayesianRecommender): + objective = NumericalTarget("t", "MAX").to_objective() + measurements = pd.DataFrame(searchspace.continuous.sample_uniform(2)) + measurements["t"] = np.random.random(len(measurements)) + else: + objective = None + measurements = None + + recommendation = recommender.recommend( + BATCH_SIZE, searchspace, objective, measurements + ) + + # Assert that the constraint conditions hold + _validate_cardinality_constrained_batch( + recommendation, MIN_CARDINALITY, MAX_CARDINALITY, BATCH_SIZE + ) @pytest.mark.slow diff --git a/tests/test_searchspace.py b/tests/test_searchspace.py index 6356968a1..68bb5dd29 100644 --- a/tests/test_searchspace.py +++ b/tests/test_searchspace.py @@ -271,8 +271,8 @@ def test_cardinality_constraints_with_overlapping_parameters(): def test_cardinality_constraint_with_invalid_parameter_bounds(): - """Impose a cardinality constraint on a parameter whose valid area does not - include zero raises an error.""" # noqa + """Imposing a cardinality constraint on a parameter whose range does not include + zero raises an error.""" # noqa parameters = ( NumericalContinuousParameter("c1", (0, 1)), NumericalContinuousParameter("c2", (1, 2)), From e84dda902ce4f11dbb93ac9cff9b624eaddf92fa Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 1 Nov 2024 12:18:46 +0100 Subject: [PATCH 46/67] Explain mechanism of recommending with cardinality constraints --- baybe/recommenders/pure/bayesian/botorch.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 68f4b2289..56df659ca 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -232,6 +232,14 @@ def _recommend_continuous_with_cardinality_constraints( ) -> tuple[Tensor, Tensor]: """Recommend from a continuous search space with cardinality constraints. + This is achieved by considering the individual restricted subspaces that can be + obtained by splitting the parameters into sets of active and inactive + parameters, according to what is allowed by the cardinality constraints. In each + of these spaces, the in-/activity assignment is fixed, so that the cardinality + constraints can be removed and a regular optimization can be performed. The + recommendation is then constructed from the combined optimization results of the + unconstrained spaces. + Args: subspace_continuous: The continuous subspace from which to generate recommendations. From fa1326763e48bed5993f8093a23b0a623e1e96ad Mon Sep 17 00:00:00 2001 From: Di Jin Date: Fri, 13 Dec 2024 11:51:38 +0100 Subject: [PATCH 47/67] Add near-zero threshold to continuous numerical parameter --- baybe/parameters/numerical.py | 18 ++++++++++++++++++ baybe/parameters/utils.py | 16 ++++++++-------- baybe/searchspace/continuous.py | 2 +- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/baybe/parameters/numerical.py b/baybe/parameters/numerical.py index 439a80050..6f4158f8d 100644 --- a/baybe/parameters/numerical.py +++ b/baybe/parameters/numerical.py @@ -112,6 +112,9 @@ class NumericalContinuousParameter(ContinuousParameter): bounds: Interval = field(default=None, converter=convert_bounds) """The bounds of the parameter.""" + near_zero_threshold: float = field(default=1e-8, converter=float) + """A threshold for determining if the value is considered near-zero.""" + @bounds.validator def _validate_bounds(self, _: Any, value: Interval) -> None: # noqa: DOC101, DOC103 """Validate bounds. @@ -149,6 +152,21 @@ def summary(self) -> dict: ) return param_dict + def is_near_zero(self, item: float) -> bool: + """Return whether an item is near-zero. + + Important: + Value in the open interval (-near_zero_threshold, near_zero_threshold) + will be treated as near_zero. + + Args: + item: The value to be checked. + + Returns: + ``True`` if the value is near-zero, ``False`` otherwise. + """ + return abs(item) < self.near_zero_threshold + @define(frozen=True, slots=False) class _FixedNumericalContinuousParameter(ContinuousParameter): diff --git a/baybe/parameters/utils.py b/baybe/parameters/utils.py index 31d4f1d43..c29a71523 100644 --- a/baybe/parameters/utils.py +++ b/baybe/parameters/utils.py @@ -92,7 +92,7 @@ def sort_parameters(parameters: Collection[Parameter]) -> tuple[Parameter, ...]: def activate_parameter( - parameter: NumericalContinuousParameter, threshold: float + parameter: NumericalContinuousParameter, ) -> NumericalContinuousParameter: """Activates a given parameter by moving its bounds away from zero. @@ -104,7 +104,6 @@ def activate_parameter( Args: parameter: The parameter to be activated. - threshold: The threshold for a parameter to be considered active. Returns: A copy of the parameter with adjusted bounds. @@ -117,22 +116,23 @@ def activate_parameter( upper = parameter.bounds.upper def in_inactive_range(x: float, /) -> bool: - return -threshold <= x <= threshold + return -parameter.near_zero_threshold <= x <= parameter.near_zero_threshold # Upper bound is in inactive range - if lower < -threshold and in_inactive_range(upper): - return evolve(parameter, bounds=(lower, -threshold)) + if lower < -parameter.near_zero_threshold and in_inactive_range(upper): + return evolve(parameter, bounds=(lower, -parameter.near_zero_threshold)) # Lower bound is in inactive range - if upper > threshold and in_inactive_range(lower): - return evolve(parameter, bounds=(threshold, upper)) + if upper > parameter.near_zero_threshold and in_inactive_range(lower): + return evolve(parameter, bounds=(parameter.near_zero_threshold, upper)) # Both bounds in inactive range if in_inactive_range(lower) and in_inactive_range(upper): raise ValueError( f"Parameter '{parameter.name}' cannot be set active since its " f"bounds {parameter.bounds.to_tuple()} are entirely contained in the " - f"inactive range [-{threshold}, {threshold}]." + f"inactive range [-{parameter.near_zero_threshold}," + f" {parameter.near_zero_threshold}]." ) # Both bounds separated from inactive range diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 15398dc58..9f1784e2b 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -366,7 +366,7 @@ def _enforce_cardinality_constraints_via_assignment( if p.name in inactive_parameter_names: p_adjusted = _FixedNumericalContinuousParameter(name=p.name, value=0.0) elif p.name in active_parameter_names: - p_adjusted = activate_parameter(p, threshold) + p_adjusted = activate_parameter(p) else: p_adjusted = p adjusted_parameters.append(p_adjusted) From 7aa7c3fec2d09f01bc01fcb98ec4e2ffb8d921bf Mon Sep 17 00:00:00 2001 From: Di Jin Date: Fri, 13 Dec 2024 11:57:48 +0100 Subject: [PATCH 48/67] Refine activate parameter helper function - Replace redandunt function - Ensure near-zero range being an open interval --- baybe/parameters/utils.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/baybe/parameters/utils.py b/baybe/parameters/utils.py index c29a71523..108a8b56c 100644 --- a/baybe/parameters/utils.py +++ b/baybe/parameters/utils.py @@ -115,19 +115,16 @@ def activate_parameter( lower = parameter.bounds.lower upper = parameter.bounds.upper - def in_inactive_range(x: float, /) -> bool: - return -parameter.near_zero_threshold <= x <= parameter.near_zero_threshold - - # Upper bound is in inactive range - if lower < -parameter.near_zero_threshold and in_inactive_range(upper): + # Upper bound is in near-zero range + if lower <= -parameter.near_zero_threshold and parameter.is_near_zero(upper): return evolve(parameter, bounds=(lower, -parameter.near_zero_threshold)) - # Lower bound is in inactive range - if upper > parameter.near_zero_threshold and in_inactive_range(lower): + # Lower bound is in near-zero range + if upper > parameter.near_zero_threshold and parameter.is_near_zero(lower): return evolve(parameter, bounds=(parameter.near_zero_threshold, upper)) # Both bounds in inactive range - if in_inactive_range(lower) and in_inactive_range(upper): + if parameter.is_near_zero(lower) and parameter.is_near_zero(upper): raise ValueError( f"Parameter '{parameter.name}' cannot be set active since its " f"bounds {parameter.bounds.to_tuple()} are entirely contained in the " From cfdf1e32d40a1e27796aac90802f40e0e970bdc0 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Sat, 14 Dec 2024 16:51:13 +0100 Subject: [PATCH 49/67] Show warnings when any minimum cardinality constraints are violated. --- baybe/exceptions.py | 4 ++ baybe/recommenders/pure/bayesian/botorch.py | 17 ++++- baybe/utils/cardinality_constraints.py | 76 +++++++++++++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 baybe/utils/cardinality_constraints.py diff --git a/baybe/exceptions.py b/baybe/exceptions.py index 661f61a97..3fd5aaf33 100644 --- a/baybe/exceptions.py +++ b/baybe/exceptions.py @@ -9,6 +9,10 @@ class UnusedObjectWarning(UserWarning): """ +class MinimumCardinalityViolatedWarning(UserWarning): + """Minimum cardinality constraints are violated.""" + + ##### Exceptions ##### diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 56df659ca..1bdbb5bb4 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -4,6 +4,7 @@ import gc import math +import warnings from collections.abc import Collection, Iterable from typing import TYPE_CHECKING, Any, ClassVar @@ -19,6 +20,7 @@ from baybe.exceptions import ( IncompatibilityError, IncompatibleAcquisitionFunctionError, + MinimumCardinalityViolatedWarning, ) from baybe.parameters.numerical import _FixedNumericalContinuousParameter from baybe.recommenders.pure.bayesian.base import BayesianRecommender @@ -28,6 +30,7 @@ SubspaceContinuous, SubspaceDiscrete, ) +from baybe.utils.cardinality_constraints import is_min_cardinality_fulfilled from baybe.utils.dataframe import to_tensor from baybe.utils.plotting import to_string from baybe.utils.sampling_algorithms import ( @@ -283,7 +286,19 @@ def _recommend_continuous_with_cardinality_constraints( for inactive_parameters in iterator ) - return self._optimize_continuous_subspaces(subspaces, batch_size) + points, acqf_value = self._optimize_continuous_subspaces(subspaces, batch_size) + + # Check if any minimum cardinality constraints are violated + if not is_min_cardinality_fulfilled( + subspace_continuous, + pd.DataFrame(points, columns=subspace_continuous.parameter_names), + ): + warnings.warn( + "Minimum cardinality constraints are not guaranteed.", + MinimumCardinalityViolatedWarning, + ) + + return points, acqf_value def _recommend_continuous_without_cardinality_constraints( self, diff --git a/baybe/utils/cardinality_constraints.py b/baybe/utils/cardinality_constraints.py new file mode 100644 index 000000000..92cd8ede6 --- /dev/null +++ b/baybe/utils/cardinality_constraints.py @@ -0,0 +1,76 @@ +"""Utilities related to cardinality constraints.""" + +import numpy as np +import pandas as pd + +from baybe.parameters import NumericalContinuousParameter +from baybe.searchspace import SubspaceContinuous + + +def count_near_zeros( + parameters: tuple[NumericalContinuousParameter, ...], points: pd.DataFrame +) -> np.ndarray: + """Return the counts of near-zeros in the recommendations. + + Args: + parameters: A list of parameter objects according to which the counts of + near-zeros in the recommendations should be calculated. + points: The recommendations of the parameter objects. + + Raises: + ValueError: If the dimensionality of parameters does not match that of points. + + Returns: + The counts of near-zero values in the recommendations. + + + """ + if len(parameters) != points.shape[1]: + raise ValueError( + "Dimensionality mismatch: number of parameters = {len(" + "parameters)}, parameters in recommendations " + "= {points.shape[1]}." + ) + + # Boolean values indicating whether candidate is near-zero: True for near-zero. + p_thresholds = np.array([p.near_zero_threshold for p in parameters]) + p_thresholds_mask = np.broadcast_to(p_thresholds, points.shape) + near_zero_flags = (points > -p_thresholds_mask) & (points < p_thresholds_mask) + return np.sum(near_zero_flags, axis=1) + + +def is_min_cardinality_fulfilled( + subspace_continuous: SubspaceContinuous, batch: pd.DataFrame +) -> bool: + """Check whether any minimum cardinality constraints are fulfilled. + + Args: + subspace_continuous: The continuous subspace from which candidates are + generated. + batch: The recommended batch + + Returns: + Return "True" if all minimum cardinality constraints are fulfilled; "False" + otherwise. + """ + if len(subspace_continuous.constraints_cardinality) == 0: + return True + + for c in subspace_continuous.constraints_cardinality: + if c.min_cardinality == 0: + continue + + # TODO: Is the parameters in constraints sorted or not? Can we assume the + # order of parameters in constraints align with that in the subspace? + + # Counts the near-zero elements + batch_related_to_c = batch[c.parameters] + parameters_related_to_c = tuple( + p for p in subspace_continuous.parameters if p.name in c.parameters + ) + n_near_zeros = count_near_zeros(parameters_related_to_c, batch_related_to_c) + + # When the minimum cardinality is violated + if np.any(len(c.parameters) - n_near_zeros < c.min_cardinality): + return False + return True From e3c6620962e2f6a01962fda3003411641abbbff6 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Sun, 15 Dec 2024 19:55:42 +0100 Subject: [PATCH 50/67] Update test related to cardinality constraints --- .../test_cardinality_constraint_continuous.py | 39 ++++++++++++++----- .../test_constraints_continuous.py | 16 ++++++-- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index acf512663..a685d9c31 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -1,6 +1,8 @@ """Tests for the continuous cardinality constraint.""" +import warnings from itertools import combinations_with_replacement +from warnings import WarningMessage import numpy as np import pandas as pd @@ -12,6 +14,7 @@ ) from baybe.parameters.numerical import NumericalContinuousParameter from baybe.searchspace.core import SearchSpace, SubspaceContinuous +from baybe.utils.cardinality_constraints import count_near_zeros def _validate_cardinality_constrained_batch( @@ -19,7 +22,8 @@ def _validate_cardinality_constrained_batch( min_cardinality: int, max_cardinality: int, batch_size: int, - threshold: float = 0.0, + parameters: tuple[NumericalContinuousParameter], + captured_warnings: list[WarningMessage | None], ): """Validate that a cardinality-constrained batch fulfills the necessary conditions. @@ -28,11 +32,20 @@ def _validate_cardinality_constrained_batch( min_cardinality: Minimum required cardinality. max_cardinality: Maximum allowed cardinality. batch_size: Requested batch size. - threshold: Threshold for checking whether a value is treated as zero. + parameters: A list of parameters for which recommendations are provided. + captured_warnings: A list of captured warnings. """ - # Assert that cardinality constraint is fulfilled - n_nonzero = np.sum(~np.isclose(batch, 0.0, atol=threshold), axis=1) - assert np.all(n_nonzero >= min_cardinality) and np.all(n_nonzero <= max_cardinality) + # Assert that the maximum cardinality constraint is fulfilled + n_nonzeros = len(parameters) - count_near_zeros(parameters, batch) + assert np.all(n_nonzeros <= max_cardinality) + + # Check whether the minimum cardinality constraint is fulfilled + is_min_cardinality_fulfilled = np.all(n_nonzeros >= min_cardinality) + + # A warning must be raised when the minimum cardinality constraint is not fulfilled + if not is_min_cardinality_fulfilled: + w_message = "Minimum cardinality constraints are not guaranteed." + assert any(str(w.message) == w_message for w in captured_warnings) # Assert that we obtain as many samples as requested assert batch.shape[0] == batch_size @@ -81,11 +94,13 @@ def test_sampling_cardinality_constraint(cardinality_bounds: tuple[int, int]): ) subspace = SubspaceContinuous(parameters=parameters, constraints_nonlin=constraints) - samples = subspace.sample_uniform(BATCH_SIZE) + + with warnings.catch_warnings(record=True) as w: + samples = subspace.sample_uniform(BATCH_SIZE) # Assert that the constraint conditions hold _validate_cardinality_constrained_batch( - samples, min_cardinality, max_cardinality, BATCH_SIZE + samples, min_cardinality, max_cardinality, BATCH_SIZE, parameters, w ) @@ -131,11 +146,17 @@ def test_polytope_sampling_with_cardinality_constraint(): ] searchspace = SearchSpace.from_product(parameters, constraints) - samples = searchspace.continuous.sample_uniform(BATCH_SIZE) + with warnings.catch_warnings(record=True) as w: + samples = searchspace.continuous.sample_uniform(BATCH_SIZE) # Assert that the constraint conditions hold _validate_cardinality_constrained_batch( - samples[params_cardinality], MIN_CARDINALITY, MAX_CARDINALITY, BATCH_SIZE + samples[params_cardinality], + MIN_CARDINALITY, + MAX_CARDINALITY, + BATCH_SIZE, + tuple(p for p in parameters if p.name in params_cardinality), + w, ) # Assert that linear equality constraint is fulfilled diff --git a/tests/constraints/test_constraints_continuous.py b/tests/constraints/test_constraints_continuous.py index 4463cbec2..e326ebb83 100644 --- a/tests/constraints/test_constraints_continuous.py +++ b/tests/constraints/test_constraints_continuous.py @@ -1,5 +1,7 @@ """Test for imposing continuous constraints.""" +import warnings + import numpy as np import pandas as pd import pytest @@ -102,13 +104,19 @@ def test_cardinality_constraint(recommender): objective = None measurements = None - recommendation = recommender.recommend( - BATCH_SIZE, searchspace, objective, measurements - ) + with warnings.catch_warnings(record=True) as w: + recommendation = recommender.recommend( + BATCH_SIZE, searchspace, objective, measurements + ) # Assert that the constraint conditions hold _validate_cardinality_constrained_batch( - recommendation, MIN_CARDINALITY, MAX_CARDINALITY, BATCH_SIZE + recommendation, + MIN_CARDINALITY, + MAX_CARDINALITY, + BATCH_SIZE, + tuple(parameters), + w, ) From b85924fbc2372baeac4868eafae7c401bd2914fb Mon Sep 17 00:00:00 2001 From: Di Jin Date: Sun, 15 Dec 2024 20:21:22 +0100 Subject: [PATCH 51/67] Add to-dos - Add to-do related to customized error in botorch - Add to-do related to active parameters guarantee in random sampler --- baybe/searchspace/continuous.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 9f1784e2b..139440ea9 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -517,11 +517,14 @@ def _sample_from_polytope_with_cardinality_constraints( # Randomly set some parameters inactive inactive_params_sample = self._sample_inactive_parameters(1)[0] + # TODO: active parameters must be guaranteed non-zero! # Remove the inactive parameters from the search space subspace_without_cardinality_constraint = self._drop_parameters( inactive_params_sample ) + # TODO: Replace ValueError with customized erorr. See + # https://github.com/pytorch/botorch/pull/2652 # Sample from the reduced space try: sample = subspace_without_cardinality_constraint.sample_uniform(1) From 22b19f9698e068d5e61aa53155232cb82a65083a Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 16 Dec 2024 09:53:44 +0100 Subject: [PATCH 52/67] Add test on catching warning related to violation of minimum cardinality constraint --- baybe/parameters/numerical.py | 2 +- .../test_cardinality_constraint_continuous.py | 63 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/baybe/parameters/numerical.py b/baybe/parameters/numerical.py index 6f4158f8d..4c087199e 100644 --- a/baybe/parameters/numerical.py +++ b/baybe/parameters/numerical.py @@ -112,7 +112,7 @@ class NumericalContinuousParameter(ContinuousParameter): bounds: Interval = field(default=None, converter=convert_bounds) """The bounds of the parameter.""" - near_zero_threshold: float = field(default=1e-8, converter=float) + near_zero_threshold: float = field(default=1e-5, converter=float) """A threshold for determining if the value is considered near-zero.""" @bounds.validator diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index a685d9c31..a2d804acb 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -12,8 +12,11 @@ ContinuousCardinalityConstraint, ContinuousLinearConstraint, ) +from baybe.exceptions import MinimumCardinalityViolatedWarning from baybe.parameters.numerical import NumericalContinuousParameter +from baybe.recommenders import BotorchRecommender from baybe.searchspace.core import SearchSpace, SubspaceContinuous +from baybe.targets.numerical import NumericalTarget from baybe.utils.cardinality_constraints import count_near_zeros @@ -172,3 +175,63 @@ def test_polytope_sampling_with_cardinality_constraint(): .ge(rhs_inequality - TOLERANCE) .all() ) + + +def test_min_cardinality_warning(): + """Providing candidates violating minimum cardinality constraint raises a + warning. + """ # noqa + N_PARAMETERS = 2 + MIN_CARDINALITY = 2 + MAX_CARDINALITY = 2 + BATCH_SIZE = 20 + + lower_bound = -0.5 + upper_bound = 0.5 + stepsize = 0.05 + parameters = [ + NumericalContinuousParameter(name=f"x_{i+1}", bounds=(lower_bound, upper_bound)) + for i in range(N_PARAMETERS) + ] + + constraints = [ + ContinuousCardinalityConstraint( + parameters=[p.name for p in parameters], + max_cardinality=MAX_CARDINALITY, + min_cardinality=MIN_CARDINALITY, + ), + ] + + searchspace = SearchSpace.from_product(parameters, constraints) + objective = NumericalTarget("t", "MAX").to_objective() + + # Create a scenario in which + # - The optimum of the target function is at the origin + # - The Botorch recommender is likely to provide candidates at the origin, + # which violates the minimum cardinality constraint. + def custom_target(x1: np.ndarray, x2: np.ndarray) -> np.ndarray: + """A custom target function with maximum at the origin.""" + return -abs(x1) - abs(x2) + + def prepare_measurements() -> pd.DataFrame: + """Prepare measurements.""" + x1 = np.arange(lower_bound, upper_bound + stepsize, stepsize) + # Exclude 0 from the array + X1, X2 = np.meshgrid(x1[abs(x1) > stepsize / 2], x1[abs(x1) > stepsize / 2]) + + return pd.DataFrame( + { + "x_1": X1.flatten(), + "x_2": X2.flatten(), + "t": custom_target(X1.flatten(), X2.flatten()), + } + ) + + with warnings.catch_warnings(record=True) as captured_warnings: + BotorchRecommender().recommend( + BATCH_SIZE, searchspace, objective, prepare_measurements() + ) + assert any( + issubclass(w.category, MinimumCardinalityViolatedWarning) + for w in captured_warnings + ) From b0dc037fb42bf79fdae01ba437a54c1f0eb5a0b0 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 16 Dec 2024 10:54:11 +0100 Subject: [PATCH 53/67] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d17deb88..91e84cb95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 optimization behavior in the presence of multiple subspaces - Utilities `inactive_parameter_combinations` and`n_inactive_parameter_combinations` in both `ContinuousCardinalityConstraint`and `SubspaceContinuous` +- Attribute `near_zero_threshold` and utility `is_near_zero` to + `NumericalContinuousParameter` +- Warning `MinimumCardinalityViolatedWarning` is triggered when any minimum + cardinality is violated in `BotorchRecommender` +- Utilities `count_near_zeros` and `is_min_cardinality_fulfilled` ### Deprecations - Passing a dataframe via the `data` argument to `Objective.transform` is no longer From 35825b84f4268257c9d865141b36f4dedc4fe114 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 16 Dec 2024 11:38:13 +0100 Subject: [PATCH 54/67] Clean up merge conflict code --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24bf72211..ae87bf664 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,7 +45,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `NumericalTarget` now raises an error - Crash when using `ContinuousCardinalityConstraint` caused by an unintended interplay between constraints and dropped parameters yielding empty parameter sets ->>>>>>> main ### Deprecations - Passing a dataframe via the `data` argument to `Objective.transform` is no longer From 3e275e4454556d6ae42d21cd77e0035e5613a5cc Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 16 Dec 2024 14:03:21 +0100 Subject: [PATCH 55/67] Refine logic in counting the near-zero elements --- baybe/utils/cardinality_constraints.py | 29 ++++++++++++++------------ 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/baybe/utils/cardinality_constraints.py b/baybe/utils/cardinality_constraints.py index 92cd8ede6..5c2b178cc 100644 --- a/baybe/utils/cardinality_constraints.py +++ b/baybe/utils/cardinality_constraints.py @@ -18,22 +18,29 @@ def count_near_zeros( points: The recommendations of the parameter objects. Raises: - ValueError: If the dimensionality of parameters does not match that of points. + ValueError: If parameters does not cover all parameters present in points. Returns: The counts of near-zero values in the recommendations. """ - if len(parameters) != points.shape[1]: + p_names = [p.name for p in parameters] + if not set(points.columns).issubset(set(p_names)): raise ValueError( - "Dimensionality mismatch: number of parameters = {len(" - "parameters)}, parameters in recommendations " - "= {points.shape[1]}." + "Parameters must cover all parameters present in points: " + "parameter names in parameters are: {p_name} and parameter " + "names from points are: {points.columns}." ) - # Boolean values indicating whether candidate is near-zero: True for near-zero. - p_thresholds = np.array([p.near_zero_threshold for p in parameters]) + # Only keep parameters that are present in points; The order of parameters + # aligns with that in points. + parameters_filtered_sorted = ( + p for p_name in points.columns for p in parameters if p.name == p_name + ) + + # Boolean values indicating whether the candidate is near-zero: True for near-zero. + p_thresholds = np.array([p.near_zero_threshold for p in parameters_filtered_sorted]) p_thresholds_mask = np.broadcast_to(p_thresholds, points.shape) near_zero_flags = (points > -p_thresholds_mask) & (points < p_thresholds_mask) return np.sum(near_zero_flags, axis=1) @@ -60,15 +67,11 @@ def is_min_cardinality_fulfilled( if c.min_cardinality == 0: continue - # TODO: Is the parameters in constraints sorted or not? Can we assume the - # order of parameters in constraints align with that in the subspace? - # Counts the near-zero elements batch_related_to_c = batch[c.parameters] - parameters_related_to_c = tuple( - p for p in subspace_continuous.parameters if p.name in c.parameters + n_near_zeros = count_near_zeros( + subspace_continuous.parameters, batch_related_to_c ) - n_near_zeros = count_near_zeros(parameters_related_to_c, batch_related_to_c) # When the minimum cardinality is violated if np.any(len(c.parameters) - n_near_zeros < c.min_cardinality): From 62f0ed62c3224998d28992947eff95f63072555b Mon Sep 17 00:00:00 2001 From: Di Jin Date: Wed, 8 Jan 2025 10:28:59 +0100 Subject: [PATCH 56/67] Add TODO related to customized infeasibility error in botorch --- baybe/recommenders/pure/bayesian/botorch.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 1bdbb5bb4..356aae00f 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -476,6 +476,11 @@ def _optimize_continuous_subspaces( ) -> tuple[Tensor, Tensor]: """Find the optimum candidates from multiple continuous subspaces. + **Important**: A subspace without a feasible solution will be ignored + silently, and no warning will be raised. This design is intentional to + accommodate recommendations with cardinality constraints. Please be mindful + of this behavior when invoking this method. + Args: subspaces: The subspaces to consider for the optimization. batch_size: The number of points to be recommended. @@ -495,6 +500,8 @@ def _optimize_continuous_subspaces( points_all.append(p) acqf_values_all.append(acqf) + # TODO: Replace ValueError with customized erorr. See + # https://github.com/pytorch/botorch/pull/2652 # The optimization problem may be infeasible in certain subspaces except ValueError: pass From 9af846b2734522e79bf3a4925175aba0fd251a75 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Wed, 8 Jan 2025 11:32:54 +0100 Subject: [PATCH 57/67] Add threshold to continuous cardinality constraint --- baybe/constraints/continuous.py | 45 ++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index fbcaa18e9..4675ac9d1 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Any import numpy as np -from attr.validators import in_ +from attr.validators import gt, in_, lt from attrs import define, field from baybe.constraints.base import ( @@ -19,6 +19,7 @@ ContinuousNonlinearConstraint, ) from baybe.parameters import NumericalContinuousParameter +from baybe.utils.interval import Interval from baybe.utils.numerical import DTypeFloatNumpy from baybe.utils.validation import finite_float @@ -140,6 +141,11 @@ class ContinuousCardinalityConstraint( ): """Class for continuous cardinality constraints.""" + relative_threshold: float = field( + default=1e-2, converter=float, validator=[gt(0.0), lt(1.0)] + ) + """A relative threshold for determining if the value is considered zero.""" + @property def n_inactive_parameter_combinations(self) -> int: """The number of possible inactive parameter combinations.""" @@ -198,6 +204,43 @@ def sample_inactive_parameters(self, batch_size: int = 1) -> list[set[str]]: return inactive_params + def get_threshold(self, parameter: NumericalContinuousParameter) -> Interval: + """Get the threshold values of a parameter. + + This method calculates the thresholds based on the parameter's bounds + and the relative threshold. + + Note: + Thresholds (lower, upper) are defined below: + * If lower < 0 and upper > 0, any value v with lower < v < upper are treated + zero; + * If lower = 0 and upper > 0, any value v with lower <= v < upper are + treated zero; + * If lower < 0 and upper = 0, any value v with lower < v <= upper are + treated zero. + + + Args: + parameter: The parameter object. + + Returns: + The lower and upper thresholds. + + Raises: + ValueError: when parameter_name is not present in parameter list of this + constraint. + """ + if parameter.name not in self.parameters: + raise ValueError( + f"The given parameter with name: {parameter.name} cannot " + f"be found in the parameter list: {self.parameters}." + ) + + return Interval( + lower=self.relative_threshold * parameter.bounds.lower, + upper=self.relative_threshold * parameter.bounds.upper, + ) + # Collect leftover original slotted classes processed by `attrs.define` gc.collect() From 10e08124469d6b61e174499f58b839df5234f801 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Wed, 8 Jan 2025 15:54:23 +0100 Subject: [PATCH 58/67] Adapt activate_parameter towards threshold per cardinality constraints --- baybe/parameters/utils.py | 55 ++++++++++++++++++++++++--------- baybe/searchspace/continuous.py | 10 ++++-- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/baybe/parameters/utils.py b/baybe/parameters/utils.py index 108a8b56c..0acb65896 100644 --- a/baybe/parameters/utils.py +++ b/baybe/parameters/utils.py @@ -3,11 +3,17 @@ from collections.abc import Callable, Collection from typing import Any, TypeVar +import numpy as np import pandas as pd from attrs import evolve from baybe.parameters.base import Parameter from baybe.parameters.numerical import NumericalContinuousParameter +from baybe.utils.interval import Interval + +# TODO: Check whether it has been defined in BayBE? +SMALLEST_FLOAT32 = np.finfo(np.float32).tiny +"""The smallest 32 bit float number.""" _TParameter = TypeVar("_TParameter", bound=Parameter) @@ -93,6 +99,7 @@ def sort_parameters(parameters: Collection[Parameter]) -> tuple[Parameter, ...]: def activate_parameter( parameter: NumericalContinuousParameter, + thresholds: Interval, ) -> NumericalContinuousParameter: """Activates a given parameter by moving its bounds away from zero. @@ -104,33 +111,53 @@ def activate_parameter( Args: parameter: The parameter to be activated. + thresholds: The thresholds of the inactive region of the parameter. Returns: A copy of the parameter with adjusted bounds. Raises: + ValueError: If the threshold does not cover zero. ValueError: If the parameter cannot be activated since both its bounds are in the inactive range. """ - lower = parameter.bounds.lower - upper = parameter.bounds.upper - - # Upper bound is in near-zero range - if lower <= -parameter.near_zero_threshold and parameter.is_near_zero(upper): - return evolve(parameter, bounds=(lower, -parameter.near_zero_threshold)) - - # Lower bound is in near-zero range - if upper > parameter.near_zero_threshold and parameter.is_near_zero(lower): - return evolve(parameter, bounds=(parameter.near_zero_threshold, upper)) + lower_bound = parameter.bounds.lower + upper_bound = parameter.bounds.upper + + if not thresholds.contains(0.0): + raise ValueError("The thresholds must cover zero.") + + # When the lower/upper threshold is zero, it is slightly adjusted to and used as + # thresholf for checking the inactive range. + # Check ContinuousCardinalityConstraint.get_threshold(parameter) for the definition + # of threshold of inactive (near-zero) region. + lower_threshold_for_inactive_range = min(thresholds.lower, -SMALLEST_FLOAT32) + upper_threshold_for_inactive_range = max(thresholds.upper, SMALLEST_FLOAT32) + + def in_inactive_range(x: float) -> bool: + """Return true when x is within the inactive range.""" + return ( + lower_threshold_for_inactive_range < x < upper_threshold_for_inactive_range + ) - # Both bounds in inactive range - if parameter.is_near_zero(lower) and parameter.is_near_zero(upper): + # When both bounds in inactive range + if in_inactive_range(lower_bound) and in_inactive_range(upper_bound): raise ValueError( f"Parameter '{parameter.name}' cannot be set active since its " f"bounds {parameter.bounds.to_tuple()} are entirely contained in the " - f"inactive range [-{parameter.near_zero_threshold}," - f" {parameter.near_zero_threshold}]." + f"inactive range [-{lower_threshold_for_inactive_range}," + f" {upper_threshold_for_inactive_range}]." ) + # When the upper bound is in near-zero range, reduce it to the lower threshold of + # inactive region. + if lower_bound <= thresholds.lower and in_inactive_range(upper_bound): + return evolve(parameter, bounds=(lower_bound, thresholds.lower)) + + # When the lower bound is in near-zero range, uplift it to the upper threshold of + # the inactive region + if upper_bound >= thresholds.upper and in_inactive_range(lower_bound): + return evolve(parameter, bounds=(thresholds.upper, upper_bound)) + # Both bounds separated from inactive range return parameter diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 83910b27f..3d0481f05 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -345,7 +345,6 @@ def _drop_parameters(self, parameter_names: Collection[str]) -> SubspaceContinuo def _enforce_cardinality_constraints_via_assignment( self, inactive_parameter_names: Collection[str], - threshold: float = 1e-8, ) -> SubspaceContinuous: """Create a copy of the subspace with fixed inactive parameters. @@ -356,7 +355,6 @@ def _enforce_cardinality_constraints_via_assignment( Args: inactive_parameter_names: The names of the parameter to be inactivated. - threshold: The threshold for a parameter to be considered active. Returns: A new subspace with fixed inactive parameters and no cardinality @@ -374,7 +372,13 @@ def _enforce_cardinality_constraints_via_assignment( if p.name in inactive_parameter_names: p_adjusted = _FixedNumericalContinuousParameter(name=p.name, value=0.0) elif p.name in active_parameter_names: - p_adjusted = activate_parameter(p) + # cardinality constraint object containing the current parameter + cardinality_constraint_with_p = [ + c for c in self.constraints_cardinality if p.name in c.parameters + ][0] + p_adjusted = activate_parameter( + p, cardinality_constraint_with_p.get_threshold(p) + ) else: p_adjusted = p adjusted_parameters.append(p_adjusted) From 142b1ecb4b5eeac90a584ad502e93fbc4bc70879 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Wed, 8 Jan 2025 21:59:26 +0100 Subject: [PATCH 59/67] Refine check cardinaltiy constraint fulfillment logic - Support checking minimum cardinality or maximum cardinatliy - Adapt to threshold per cardinality - Update related tests --- baybe/recommenders/pure/bayesian/botorch.py | 7 +- baybe/utils/cardinality_constraints.py | 99 +++++++++++++------ .../test_cardinality_constraint_continuous.py | 46 ++++----- .../test_constraints_continuous.py | 6 +- 4 files changed, 99 insertions(+), 59 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 356aae00f..4f5a61028 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -30,7 +30,7 @@ SubspaceContinuous, SubspaceDiscrete, ) -from baybe.utils.cardinality_constraints import is_min_cardinality_fulfilled +from baybe.utils.cardinality_constraints import is_cardinality_fulfilled from baybe.utils.dataframe import to_tensor from baybe.utils.plotting import to_string from baybe.utils.sampling_algorithms import ( @@ -289,12 +289,13 @@ def _recommend_continuous_with_cardinality_constraints( points, acqf_value = self._optimize_continuous_subspaces(subspaces, batch_size) # Check if any minimum cardinality constraints are violated - if not is_min_cardinality_fulfilled( + if not is_cardinality_fulfilled( subspace_continuous, pd.DataFrame(points, columns=subspace_continuous.parameter_names), + "min", ): warnings.warn( - "Minimum cardinality constraints are not guaranteed.", + "At least one minimum cardinality constraint is violated.", MinimumCardinalityViolatedWarning, ) diff --git a/baybe/utils/cardinality_constraints.py b/baybe/utils/cardinality_constraints.py index 5c2b178cc..4be92ec7f 100644 --- a/baybe/utils/cardinality_constraints.py +++ b/baybe/utils/cardinality_constraints.py @@ -1,79 +1,120 @@ """Utilities related to cardinality constraints.""" +from typing import Literal + import numpy as np import pandas as pd -from baybe.parameters import NumericalContinuousParameter +from baybe.parameters.utils import SMALLEST_FLOAT32 from baybe.searchspace import SubspaceContinuous +from baybe.utils.interval import Interval def count_near_zeros( - parameters: tuple[NumericalContinuousParameter, ...], points: pd.DataFrame + thresholds: tuple[Interval, ...], points: pd.DataFrame ) -> np.ndarray: """Return the counts of near-zeros in the recommendations. Args: - parameters: A list of parameter objects according to which the counts of + thresholds: A list of thresholds for according to which the counts of near-zeros in the recommendations should be calculated. points: The recommendations of the parameter objects. Raises: - ValueError: If parameters does not cover all parameters present in points. + ValueError: If number of thresholds differs from the number of + parameters in points. Returns: The counts of near-zero values in the recommendations. """ - p_names = [p.name for p in parameters] - if not set(points.columns).issubset(set(p_names)): + if len(thresholds) != len(points.columns): raise ValueError( - "Parameters must cover all parameters present in points: " - "parameter names in parameters are: {p_name} and parameter " - "names from points are: {points.columns}." + f"The size of thresholds ({len(thresholds)}) must be the same as the " + f"number of parameters ({len(points.columns)}) in points." ) + # Get the lower threshold for determining zeros/non-zeros. When the + # lower_threshold is zero, we replace it with a very small negative value to have + # the threshold being an open-support. + lower_threshold = np.array( + [min(threshold.lower, -SMALLEST_FLOAT32) for threshold in thresholds] + ) + lower_threshold = np.broadcast_to(lower_threshold, points.shape) - # Only keep parameters that are present in points; The order of parameters - # aligns with that in points. - parameters_filtered_sorted = ( - p for p_name in points.columns for p in parameters if p.name == p_name + # Get the upper threshold for determining zeros/non-zeros. When the + # upper_threshold is zero, we replace it with a very small positive value. + upper_threshold = np.array( + [max(threshold.upper, SMALLEST_FLOAT32) for threshold in thresholds] ) + upper_threshold = np.broadcast_to(upper_threshold, points.shape) - # Boolean values indicating whether the candidate is near-zero: True for near-zero. - p_thresholds = np.array([p.near_zero_threshold for p in parameters_filtered_sorted]) - p_thresholds_mask = np.broadcast_to(p_thresholds, points.shape) - near_zero_flags = (points > -p_thresholds_mask) & (points < p_thresholds_mask) + # Boolean values indicating whether the candidates is near-zero: True for is + # near-zero. + near_zero_flags = (points > lower_threshold) & (points < upper_threshold) return np.sum(near_zero_flags, axis=1) -def is_min_cardinality_fulfilled( - subspace_continuous: SubspaceContinuous, batch: pd.DataFrame +def is_cardinality_fulfilled( + subspace_continuous: SubspaceContinuous, + batch: pd.DataFrame, + type_cardinality: Literal["min", "max"], ) -> bool: - """Check whether any minimum cardinality constraints are fulfilled. + """Check whether all minimum cardinality constraints are fulfilled. Args: - subspace_continuous: The continuous subspace from which candidates are - generated. + subspace_continuous: + The continuous subspace from which candidates are generated. batch: The recommended batch + type_cardinality: + "min" or "max". "min" indicates all minimum cardinality constraints are + checked; "max" for all maximum cardinality constraints. Returns: Return "True" if all minimum cardinality constraints are fulfilled; "False" otherwise. + + Raises: + ValueError: If type_cardinality is neither "min" nor "max". """ + if type_cardinality not in ["min", "max"]: + raise ValueError( + f"Unknown type of cardinality. Only support min or max but " + f"{type_cardinality=}." + ) + if len(subspace_continuous.constraints_cardinality) == 0: return True for c in subspace_continuous.constraints_cardinality: - if c.min_cardinality == 0: + # No need to check this redundant cardinality constraint + if (c.min_cardinality == 0) and type_cardinality == "min": continue - # Counts the near-zero elements + if (c.max_cardinality == len(c.parameters)) and type_cardinality == "max": + continue + + # Batch of parameters that are related to cardinality constraint batch_related_to_c = batch[c.parameters] - n_near_zeros = count_near_zeros( - subspace_continuous.parameters, batch_related_to_c - ) - # When the minimum cardinality is violated - if np.any(len(c.parameters) - n_near_zeros < c.min_cardinality): + # Parameters related to cardinality constraint + parameters_in_c = subspace_continuous.get_parameters_by_name(c.parameters) + + # Thresholds of parameters that are related to the cardinality constraint + thresholds = tuple(c.get_threshold(p) for p in parameters_in_c) + + # Count the number of near-zero elements + n_near_zeros = count_near_zeros(thresholds, batch_related_to_c) + + # When any minimum cardinality is violated + if type_cardinality == "min" and np.any( + len(c.parameters) - n_near_zeros < c.min_cardinality + ): + return False + + # When any maximum cardinality is violated + if type_cardinality == "max" and np.any( + len(c.parameters) - n_near_zeros > c.max_cardinality + ): return False return True diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index 8f5bbbc2c..5c4780109 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -17,33 +17,30 @@ from baybe.recommenders import BotorchRecommender from baybe.searchspace.core import SearchSpace, SubspaceContinuous from baybe.targets.numerical import NumericalTarget -from baybe.utils.cardinality_constraints import count_near_zeros +from baybe.utils.cardinality_constraints import is_cardinality_fulfilled def _validate_cardinality_constrained_batch( + subspace_continuous: SubspaceContinuous, batch: pd.DataFrame, - min_cardinality: int, - max_cardinality: int, batch_size: int, - parameters: tuple[NumericalContinuousParameter], captured_warnings: list[WarningMessage | None], ): """Validate that a cardinality-constrained batch fulfills the necessary conditions. Args: + subspace_continuous: The continuous subspace from which to recommend the points. batch: Batch to validate. - min_cardinality: Minimum required cardinality. - max_cardinality: Maximum allowed cardinality. - batch_size: Requested batch size. - parameters: A list of parameters for which recommendations are provided. + batch_size: The number of points to be recommended. captured_warnings: A list of captured warnings. """ # Assert that the maximum cardinality constraint is fulfilled - n_nonzeros = len(parameters) - count_near_zeros(parameters, batch) - assert np.all(n_nonzeros <= max_cardinality) + assert is_cardinality_fulfilled(subspace_continuous, batch, "max") # Check whether the minimum cardinality constraint is fulfilled - is_min_cardinality_fulfilled = np.all(n_nonzeros >= min_cardinality) + is_min_cardinality_fulfilled = is_cardinality_fulfilled( + subspace_continuous, batch, "min" + ) # A warning must be raised when the minimum cardinality constraint is not fulfilled if not is_min_cardinality_fulfilled: @@ -62,8 +59,13 @@ def _validate_cardinality_constrained_batch( # We thus include this check as a safety net for catching regressions. If it # turns out the check fails because we observe degenerate batches as actual # recommendations, we need to invent something smarter. + max_cardinalities = [ + c.max_cardinality for c in subspace_continuous.constraints_cardinality + ] if len(unique_row := batch.drop_duplicates()) == 1: - assert (unique_row.iloc[0] == 0.0).all() and (max_cardinality == 0) + assert (unique_row.iloc[0] == 0.0).all() and all( + max_cardinality == 0 for max_cardinality in max_cardinalities + ) # Combinations of cardinalities to be tested @@ -96,15 +98,15 @@ def test_sampling_cardinality_constraint(cardinality_bounds: tuple[int, int]): ), ) - subspace = SubspaceContinuous(parameters=parameters, constraints_nonlin=constraints) + subspace_continous = SubspaceContinuous( + parameters=parameters, constraints_nonlin=constraints + ) with warnings.catch_warnings(record=True) as w: - samples = subspace.sample_uniform(BATCH_SIZE) + samples = subspace_continous.sample_uniform(BATCH_SIZE) # Assert that the constraint conditions hold - _validate_cardinality_constrained_batch( - samples, min_cardinality, max_cardinality, BATCH_SIZE, parameters, w - ) + _validate_cardinality_constrained_batch(subspace_continous, samples, BATCH_SIZE, w) def test_polytope_sampling_with_cardinality_constraint(): @@ -147,18 +149,16 @@ def test_polytope_sampling_with_cardinality_constraint(): min_cardinality=MIN_CARDINALITY, ), ] - searchspace = SearchSpace.from_product(parameters, constraints) + subspace_continous = SubspaceContinuous.from_product(parameters, constraints) with warnings.catch_warnings(record=True) as w: - samples = searchspace.continuous.sample_uniform(BATCH_SIZE) + samples = subspace_continous.sample_uniform(BATCH_SIZE) # Assert that the constraint conditions hold _validate_cardinality_constrained_batch( - samples[params_cardinality], - MIN_CARDINALITY, - MAX_CARDINALITY, + subspace_continous, + samples, BATCH_SIZE, - tuple(p for p in parameters if p.name in params_cardinality), w, ) diff --git a/tests/constraints/test_constraints_continuous.py b/tests/constraints/test_constraints_continuous.py index e326ebb83..182f18da0 100644 --- a/tests/constraints/test_constraints_continuous.py +++ b/tests/constraints/test_constraints_continuous.py @@ -13,7 +13,7 @@ from baybe.recommenders.pure.bayesian.base import BayesianRecommender from baybe.recommenders.pure.bayesian.botorch import BotorchRecommender from baybe.recommenders.pure.nonpredictive.sampling import RandomRecommender -from baybe.searchspace.core import SearchSpace +from baybe.searchspace import SearchSpace from baybe.targets.numerical import NumericalTarget from tests.conftest import run_iterations from tests.constraints.test_cardinality_constraint_continuous import ( @@ -111,11 +111,9 @@ def test_cardinality_constraint(recommender): # Assert that the constraint conditions hold _validate_cardinality_constrained_batch( + searchspace.continuous, recommendation, - MIN_CARDINALITY, - MAX_CARDINALITY, BATCH_SIZE, - tuple(parameters), w, ) From 04f145c72923ffe22f55d16a3e13d30069e0f102 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 9 Jan 2025 09:07:51 +0100 Subject: [PATCH 60/67] Remove threshold related attribute and method in numerical continuous parameter --- baybe/parameters/numerical.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/baybe/parameters/numerical.py b/baybe/parameters/numerical.py index 4c087199e..439a80050 100644 --- a/baybe/parameters/numerical.py +++ b/baybe/parameters/numerical.py @@ -112,9 +112,6 @@ class NumericalContinuousParameter(ContinuousParameter): bounds: Interval = field(default=None, converter=convert_bounds) """The bounds of the parameter.""" - near_zero_threshold: float = field(default=1e-5, converter=float) - """A threshold for determining if the value is considered near-zero.""" - @bounds.validator def _validate_bounds(self, _: Any, value: Interval) -> None: # noqa: DOC101, DOC103 """Validate bounds. @@ -152,21 +149,6 @@ def summary(self) -> dict: ) return param_dict - def is_near_zero(self, item: float) -> bool: - """Return whether an item is near-zero. - - Important: - Value in the open interval (-near_zero_threshold, near_zero_threshold) - will be treated as near_zero. - - Args: - item: The value to be checked. - - Returns: - ``True`` if the value is near-zero, ``False`` otherwise. - """ - return abs(item) < self.near_zero_threshold - @define(frozen=True, slots=False) class _FixedNumericalContinuousParameter(ContinuousParameter): From 55a7ba3730955af2eb88197155ac6abd55975886 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 9 Jan 2025 13:59:35 +0100 Subject: [PATCH 61/67] Make zero-checking and threshold definition compatible * Assure parameter bounds cover zero * Check invalid "activate_parameter" option first --- baybe/parameters/utils.py | 65 ++++++++++++-------- baybe/utils/cardinality_constraints.py | 85 ++++++++++++-------------- 2 files changed, 78 insertions(+), 72 deletions(-) diff --git a/baybe/parameters/utils.py b/baybe/parameters/utils.py index 0acb65896..c81b038ae 100644 --- a/baybe/parameters/utils.py +++ b/baybe/parameters/utils.py @@ -3,18 +3,16 @@ from collections.abc import Callable, Collection from typing import Any, TypeVar -import numpy as np import pandas as pd from attrs import evolve from baybe.parameters.base import Parameter -from baybe.parameters.numerical import NumericalContinuousParameter +from baybe.parameters.numerical import ( + NumericalContinuousParameter, + _FixedNumericalContinuousParameter, +) from baybe.utils.interval import Interval -# TODO: Check whether it has been defined in BayBE? -SMALLEST_FLOAT32 = np.finfo(np.float32).tiny -"""The smallest 32 bit float number.""" - _TParameter = TypeVar("_TParameter", bound=Parameter) @@ -100,7 +98,7 @@ def sort_parameters(parameters: Collection[Parameter]) -> tuple[Parameter, ...]: def activate_parameter( parameter: NumericalContinuousParameter, thresholds: Interval, -) -> NumericalContinuousParameter: +) -> NumericalContinuousParameter | _FixedNumericalContinuousParameter: """Activates a given parameter by moving its bounds away from zero. Important: @@ -111,7 +109,7 @@ def activate_parameter( Args: parameter: The parameter to be activated. - thresholds: The thresholds of the inactive region of the parameter. + thresholds: The thresholds of the inactive range of the parameter. Returns: A copy of the parameter with adjusted bounds. @@ -125,39 +123,54 @@ def activate_parameter( upper_bound = parameter.bounds.upper if not thresholds.contains(0.0): - raise ValueError("The thresholds must cover zero.") + raise ValueError( + f"The thresholds must cover zero but ({thresholds.lower}, " + f"{thresholds.upper}) is given." + ) - # When the lower/upper threshold is zero, it is slightly adjusted to and used as - # thresholf for checking the inactive range. - # Check ContinuousCardinalityConstraint.get_threshold(parameter) for the definition - # of threshold of inactive (near-zero) region. - lower_threshold_for_inactive_range = min(thresholds.lower, -SMALLEST_FLOAT32) - upper_threshold_for_inactive_range = max(thresholds.upper, SMALLEST_FLOAT32) + if not parameter.bounds.contains(0.0): + raise ValueError( + f"The parameter bounds must cover zero but " + f"({parameter.bounds.lower}, {parameter.bounds.upper}) is " + f"given." + ) def in_inactive_range(x: float) -> bool: """Return true when x is within the inactive range.""" - return ( - lower_threshold_for_inactive_range < x < upper_threshold_for_inactive_range - ) - - # When both bounds in inactive range + if thresholds.lower == 0.0: + return thresholds.lower <= x < thresholds.upper + if thresholds.upper == 0.0: + return thresholds.lower < x <= thresholds.upper + return thresholds.lower < x < thresholds.upper + + # Note: When both bounds in inactive range. This step must be checked first to catch + # all possible cases when a parameter cannot be activated. if in_inactive_range(lower_bound) and in_inactive_range(upper_bound): raise ValueError( f"Parameter '{parameter.name}' cannot be set active since its " f"bounds {parameter.bounds.to_tuple()} are entirely contained in the " - f"inactive range [-{lower_threshold_for_inactive_range}," - f" {upper_threshold_for_inactive_range}]." + f"inactive range ({thresholds.lower}, {thresholds.upper})." ) - # When the upper bound is in near-zero range, reduce it to the lower threshold of + # When the upper bound is in inactive range, move it to the lower threshold of the # inactive region. - if lower_bound <= thresholds.lower and in_inactive_range(upper_bound): + if lower_bound < thresholds.lower and in_inactive_range(upper_bound): return evolve(parameter, bounds=(lower_bound, thresholds.lower)) - # When the lower bound is in near-zero range, uplift it to the upper threshold of + if lower_bound == thresholds.lower and in_inactive_range(upper_bound): + return _FixedNumericalContinuousParameter( + name=parameter.name, value=thresholds.lower + ) + + # When the lower bound is in inactive range, move it to the upper threshold of # the inactive region - if upper_bound >= thresholds.upper and in_inactive_range(lower_bound): + if upper_bound > thresholds.upper and in_inactive_range(lower_bound): return evolve(parameter, bounds=(thresholds.upper, upper_bound)) + if upper_bound == thresholds.upper and in_inactive_range(lower_bound): + return _FixedNumericalContinuousParameter( + name=parameter.name, value=thresholds.upper + ) + # Both bounds separated from inactive range return parameter diff --git a/baybe/utils/cardinality_constraints.py b/baybe/utils/cardinality_constraints.py index 4be92ec7f..f93d0ec5c 100644 --- a/baybe/utils/cardinality_constraints.py +++ b/baybe/utils/cardinality_constraints.py @@ -5,54 +5,46 @@ import numpy as np import pandas as pd -from baybe.parameters.utils import SMALLEST_FLOAT32 from baybe.searchspace import SubspaceContinuous from baybe.utils.interval import Interval -def count_near_zeros( - thresholds: tuple[Interval, ...], points: pd.DataFrame -) -> np.ndarray: - """Return the counts of near-zeros in the recommendations. +def count_zeros(thresholds: tuple[Interval, ...], points: pd.DataFrame) -> np.ndarray: + """Return the counts of zeros in the recommendations. Args: - thresholds: A list of thresholds for according to which the counts of - near-zeros in the recommendations should be calculated. + thresholds: A list of thresholds according to which the counts of zeros + in the recommendations should be calculated. points: The recommendations of the parameter objects. - Raises: - ValueError: If number of thresholds differs from the number of - parameters in points. - Returns: - The counts of near-zero values in the recommendations. - + The counts of zero parameters in the recommendations. + Raises: + ValueError: If the number of thresholds differs from the number of + parameters in points. """ if len(thresholds) != len(points.columns): raise ValueError( f"The size of thresholds ({len(thresholds)}) must be the same as the " f"number of parameters ({len(points.columns)}) in points." ) - # Get the lower threshold for determining zeros/non-zeros. When the - # lower_threshold is zero, we replace it with a very small negative value to have - # the threshold being an open-support. - lower_threshold = np.array( - [min(threshold.lower, -SMALLEST_FLOAT32) for threshold in thresholds] - ) - lower_threshold = np.broadcast_to(lower_threshold, points.shape) - - # Get the upper threshold for determining zeros/non-zeros. When the - # upper_threshold is zero, we replace it with a very small positive value. - upper_threshold = np.array( - [max(threshold.upper, SMALLEST_FLOAT32) for threshold in thresholds] - ) - upper_threshold = np.broadcast_to(upper_threshold, points.shape) - - # Boolean values indicating whether the candidates is near-zero: True for is - # near-zero. - near_zero_flags = (points > lower_threshold) & (points < upper_threshold) - return np.sum(near_zero_flags, axis=1) + # Get the lower/upper thresholds for determining zeros/non-zeros + lower_thresholds = np.array([threshold.lower for threshold in thresholds]) + lower_thresholds = np.broadcast_to(lower_thresholds, points.shape) + + upper_thresholds = np.array([threshold.upper for threshold in thresholds]) + upper_thresholds = np.broadcast_to(upper_thresholds, points.shape) + + # Boolean values indicating whether the candidates are treated zeros: True for zero + zero_flags = (points > lower_thresholds) & (points < upper_thresholds) + + # Correct the comparison on the special boundary: zero. This step is needed + # because when the lower_threshold = 0, a value v with lower_threshold <= v < + # upper_threshold should be treated zero. + zero_flags = (points == 0.0) | zero_flags + + return np.sum(zero_flags, axis=1) def is_cardinality_fulfilled( @@ -60,19 +52,18 @@ def is_cardinality_fulfilled( batch: pd.DataFrame, type_cardinality: Literal["min", "max"], ) -> bool: - """Check whether all minimum cardinality constraints are fulfilled. + """Check whether all minimum (or maximum) cardinality constraints are fulfilled. Args: - subspace_continuous: - The continuous subspace from which candidates are generated. + subspace_continuous: The continuous subspace from which candidates are + generated. batch: The recommended batch - type_cardinality: - "min" or "max". "min" indicates all minimum cardinality constraints are - checked; "max" for all maximum cardinality constraints. + type_cardinality: "min" or "max". "min" indicates all minimum cardinality + constraints will be checked; "max" for all maximum cardinality constraints. Returns: - Return "True" if all minimum cardinality constraints are fulfilled; "False" - otherwise. + Return "True" if all minimum (or maximum) cardinality constraints are + fulfilled; "False" otherwise. Raises: ValueError: If type_cardinality is neither "min" nor "max". @@ -80,14 +71,16 @@ def is_cardinality_fulfilled( if type_cardinality not in ["min", "max"]: raise ValueError( f"Unknown type of cardinality. Only support min or max but " - f"{type_cardinality=}." + f"{type_cardinality=} is given." ) if len(subspace_continuous.constraints_cardinality) == 0: return True for c in subspace_continuous.constraints_cardinality: - # No need to check this redundant cardinality constraint + # No need to check the redundant cardinality constraints that are + # - min_cardinality = 0 + # - max_cardinality = len(parameters) if (c.min_cardinality == 0) and type_cardinality == "min": continue @@ -103,18 +96,18 @@ def is_cardinality_fulfilled( # Thresholds of parameters that are related to the cardinality constraint thresholds = tuple(c.get_threshold(p) for p in parameters_in_c) - # Count the number of near-zero elements - n_near_zeros = count_near_zeros(thresholds, batch_related_to_c) + # Count the number of zeros + n_zeros = count_zeros(thresholds, batch_related_to_c) # When any minimum cardinality is violated if type_cardinality == "min" and np.any( - len(c.parameters) - n_near_zeros < c.min_cardinality + len(c.parameters) - n_zeros < c.min_cardinality ): return False # When any maximum cardinality is violated if type_cardinality == "max" and np.any( - len(c.parameters) - n_near_zeros > c.max_cardinality + len(c.parameters) - n_zeros > c.max_cardinality ): return False return True From 78b115f1cba5db9fdf450684b26d762773e05b15 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 9 Jan 2025 14:46:51 +0100 Subject: [PATCH 62/67] Add activate parameter step in random sampler --- baybe/searchspace/continuous.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 3d0481f05..70493e7e4 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -529,10 +529,16 @@ def _sample_from_polytope_with_cardinality_constraints( # Randomly set some parameters inactive inactive_params_sample = self._sample_inactive_parameters(1)[0] - # TODO: active parameters must be guaranteed non-zero! - # Remove the inactive parameters from the search space - subspace_without_cardinality_constraint = self._drop_parameters( - inactive_params_sample + # Remove the inactive parameters from the search space. In the first + # step, the active parameters get activated and inactive parameters are + # fixed to zero. The first step helps ensure active parameters stay + # non-zero, especially when one boundary is zero. The second step is + # optional and it helps reduce the parameter space with certain + # computational cost. + subspace_without_cardinality_constraint = ( + self._enforce_cardinality_constraints_via_assignment( + inactive_params_sample + )._drop_parameters(inactive_params_sample) ) # TODO: Replace ValueError with customized erorr. See From 68045a7220332ad1f9ac4fe73e7adee15d40aab0 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 9 Jan 2025 16:47:42 +0100 Subject: [PATCH 63/67] Update CHANGELOG.md --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae87bf664..4f84d4c58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,9 +20,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 optimization behavior in the presence of multiple subspaces - Utilities `inactive_parameter_combinations` and`n_inactive_parameter_combinations` in both `ContinuousCardinalityConstraint`and `SubspaceContinuous` -- Attribute `near_zero_threshold` and utility `is_near_zero` to - `NumericalContinuousParameter` -- Utilities `count_near_zeros` and `is_min_cardinality_fulfilled` +- Attribute `relative_threshold` and method `get_threshold` to + `ContinuousCardinalityConstraint` +- Utilities `count_zeros` and `is_cardinality_fulfilled` ### Changed - `SubstanceParameter` encodings are now computed exclusively with the From e6e2e97c4319d4f6fd77d4d81100c172807e64a3 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Thu, 9 Jan 2025 17:03:09 +0100 Subject: [PATCH 64/67] Fix type hint in continuous numerical parameter classes --- baybe/parameters/numerical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baybe/parameters/numerical.py b/baybe/parameters/numerical.py index 439a80050..e58afa6af 100644 --- a/baybe/parameters/numerical.py +++ b/baybe/parameters/numerical.py @@ -136,7 +136,7 @@ def is_in_range(self, item: float) -> bool: @override @property - def comp_rep_columns(self) -> tuple[str, ...]: + def comp_rep_columns(self) -> tuple[str]: return (self.name,) @override @@ -171,7 +171,7 @@ def is_in_range(self, item: float) -> bool: @override @property - def comp_rep_columns(self) -> tuple[str, ...]: + def comp_rep_columns(self) -> tuple[str]: return (self.name,) @override From a30b0092e8482c621c0b9c882737783468dc7d5f Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 13 Jan 2025 17:07:57 +0100 Subject: [PATCH 65/67] Test activate parameter function --- tests/utils/test_parameters.py | 199 +++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 tests/utils/test_parameters.py diff --git a/tests/utils/test_parameters.py b/tests/utils/test_parameters.py new file mode 100644 index 000000000..b038e4e9d --- /dev/null +++ b/tests/utils/test_parameters.py @@ -0,0 +1,199 @@ +"""Tests for parameter utilities.""" + +import pytest +from pytest import param + +from baybe.parameters import NumericalContinuousParameter +from baybe.parameters.numerical import _FixedNumericalContinuousParameter +from baybe.parameters.utils import activate_parameter +from baybe.utils.interval import Interval + + +def mirror_interval(interval: Interval) -> Interval: + """Return an interval copy mirrored around the origin.""" + return Interval(lower=-interval.upper, upper=-interval.lower) + + +@pytest.mark.parametrize( + ( + "bounds", + "thresholds", + "is_valid", + "expected_bounds", + ), + [ + # one-side bounds, two-side thresholds + param( + Interval(lower=0.0, upper=1.0), + Interval(lower=-1.0, upper=1.5), + False, + None, + id="oneside_bounds_in_twoside_thresholds", + ), + param( + Interval(lower=0.0, upper=1.0), + Interval(lower=-1.0, upper=1.0), + True, + Interval(lower=1.0, upper=1.0), + id="oneside_bounds_in_twoside_thresholds_fixed_value", + ), + param( + Interval(lower=0.0, upper=1.0), + Interval(lower=-1.0, upper=0.5), + True, + Interval(lower=0.5, upper=1.0), + id="oneside_bounds_intersected_with_twoside_thresholds", + ), + # one-side bounds, one-side thresholds + param( + Interval(lower=0.0, upper=1.0), + Interval(lower=-1.0, upper=0.0), + True, + Interval(lower=0.0, upper=1.0), + id="oneside_bounds_intersected_on_single_point_with_oneside_thresholds", + ), + param( + Interval(lower=0.0, upper=1.0), + Interval(lower=0.0, upper=0.5), + True, + Interval(lower=0.5, upper=1.0), + id="oneside_bounds_cover_oneside_thresholds", + ), + param( + Interval(lower=0.0, upper=1.0), + Interval(lower=0.0, upper=1.0), + True, + Interval(lower=1.0, upper=1.0), + id="oneside_bounds_match_oneside_thresholds", + ), + param( + Interval(lower=0.0, upper=1.0), + Interval(lower=0.0, upper=1.1), + False, + None, + id="oneside_bounds_in_oneside_thresholds", + ), + # Two-side bounds. One-side thresholds do not differ from two-side threshold + # in these cases. Hence, use two-side thresholds. + param( + Interval(lower=-0.5, upper=1.0), + Interval(lower=-1.0, upper=1.1), + False, + None, + id="twoside_bounds_in_twoside_thresholds", + ), + param( + Interval(lower=-0.5, upper=1.0), + Interval(lower=-0.5, upper=1.0), + True, + Interval(lower=-0.5, upper=1.0), + id="twoside_bounds_match_twoside_thresholds", + ), + param( + Interval(lower=-0.6, upper=1.1), + Interval(lower=-0.5, upper=1.0), + True, + Interval(lower=-0.6, upper=1.1), + id="twoside_bounds_cover_twoside_thresholds", + ), + param( + Interval(lower=-0.6, upper=1.1), + Interval(lower=-1.0, upper=0.5), + True, + Interval(lower=0.5, upper=1.1), + id="twoside_bounds_intersected_with_twoside_thresholds", + ), + param( + Interval(lower=-0.6, upper=0.5), + Interval(lower=-1.0, upper=0.5), + True, + Interval(lower=0.5, upper=0.5), + id="twoside_bounds_partial_in_twoside_thresholds", + ), + param( + Interval(lower=-1.0, upper=0.5), + Interval(lower=-0.6, upper=0.5), + True, + Interval(lower=-1.0, upper=0.5), + id="twoside_bounds_partial_cover_twoside_thresholds", + ), + ], +) +@pytest.mark.parametrize("mirror", [False, True]) +def test_activate_parameter( + bounds: Interval, + thresholds: Interval, + is_valid: bool, + expected_bounds: Interval | None, + mirror: bool, +) -> None: + """Test that the utility correctly activate a parameter. + + Args: + bounds: the bounds of the parameter to activate + thresholds: the thresholds of inactive range + is_valid: boolean variable indicating whether a parameter is returned from + activate_parameter + expected_bounds: the bounds of the activated parameter if one is returned + mirror: if true both bounds and thresholds get mirrored + + Returns: + None + """ + if mirror: + bounds = mirror_interval(bounds) + thresholds = mirror_interval(thresholds) + if mirror and is_valid: + expected_bounds = mirror_interval(expected_bounds) + + parameter = NumericalContinuousParameter("parameter", bounds=bounds) + + if is_valid: + activated_parameter = activate_parameter(parameter, thresholds) + assert activated_parameter.bounds == expected_bounds + if expected_bounds.is_degenerate: + assert isinstance(activated_parameter, _FixedNumericalContinuousParameter) + else: + with pytest.raises(ValueError, match="cannot be set active"): + activate_parameter(parameter, thresholds) + + +@pytest.mark.parametrize( + ("bounds", "thresholds", "match"), + [ + param( + Interval(lower=-0.5, upper=0.5), + Interval(lower=0.5, upper=1.0), + "The thresholds must cover zero", + id="invalid_thresholds", + ), + param( + Interval(lower=0.5, upper=1.0), + Interval(lower=-0.5, upper=0.5), + "The parameter bounds must cover zero", + id="invalid_bounds", + ), + ], +) +@pytest.mark.parametrize("mirror", [False, True]) +def test_invalid_activate_parameter( + bounds: Interval, thresholds: Interval, match: str, mirror: bool +) -> None: + """Test that invalid bounds or thresholds are given. + + Args: + bounds: the bounds of the parameter to activate + thresholds: the thresholds of inactive range + match: error message to match + mirror: if true both bounds and thresholds get mirrored + + Returns: + None + """ + if mirror: + bounds = mirror_interval(bounds) + thresholds = mirror_interval(thresholds) + + parameter = NumericalContinuousParameter("parameter", bounds=bounds) + with pytest.raises(ValueError, match=match): + activate_parameter(parameter, thresholds) From 983b1a9b727f595d6c8119d356b9ef46421bf406 Mon Sep 17 00:00:00 2001 From: Di Jin Date: Mon, 13 Jan 2025 22:37:36 +0100 Subject: [PATCH 66/67] Correct logic on boundary handling in activate paramter --- baybe/parameters/utils.py | 34 ++++++------- tests/utils/test_parameters.py | 90 +++++++++------------------------- 2 files changed, 36 insertions(+), 88 deletions(-) diff --git a/baybe/parameters/utils.py b/baybe/parameters/utils.py index c81b038ae..b6dc18c38 100644 --- a/baybe/parameters/utils.py +++ b/baybe/parameters/utils.py @@ -9,7 +9,6 @@ from baybe.parameters.base import Parameter from baybe.parameters.numerical import ( NumericalContinuousParameter, - _FixedNumericalContinuousParameter, ) from baybe.utils.interval import Interval @@ -98,7 +97,7 @@ def sort_parameters(parameters: Collection[Parameter]) -> tuple[Parameter, ...]: def activate_parameter( parameter: NumericalContinuousParameter, thresholds: Interval, -) -> NumericalContinuousParameter | _FixedNumericalContinuousParameter: +) -> NumericalContinuousParameter: """Activates a given parameter by moving its bounds away from zero. Important: @@ -135,16 +134,21 @@ def activate_parameter( f"given." ) + # Note that the definition on the boundary (lower/upper threshold) is vague. + # The value on the lower/upper boundary is determined as within inactive_range; + # while an activated parameter may take this boundary value (lower/upper + # threshold). We allow the misuse of boundary in the "in_inactive_range" and it + # is just an utils for checking condition. Ultimately, the "key" threshold + # boundary appears as a bound of the activated parameter and this is compatible + # with the thresholds defined in ContinuousCardinalityConstraint, as long as the + # "key" threshold boundary is not zero. The "key" threshold boundary is always + # non-zero when the thresholds are inferred from the bounds of this parameter. + def in_inactive_range(x: float) -> bool: """Return true when x is within the inactive range.""" - if thresholds.lower == 0.0: - return thresholds.lower <= x < thresholds.upper - if thresholds.upper == 0.0: - return thresholds.lower < x <= thresholds.upper - return thresholds.lower < x < thresholds.upper - - # Note: When both bounds in inactive range. This step must be checked first to catch - # all possible cases when a parameter cannot be activated. + return thresholds.lower <= x <= thresholds.upper + + # When both bounds in inactive range. if in_inactive_range(lower_bound) and in_inactive_range(upper_bound): raise ValueError( f"Parameter '{parameter.name}' cannot be set active since its " @@ -157,20 +161,10 @@ def in_inactive_range(x: float) -> bool: if lower_bound < thresholds.lower and in_inactive_range(upper_bound): return evolve(parameter, bounds=(lower_bound, thresholds.lower)) - if lower_bound == thresholds.lower and in_inactive_range(upper_bound): - return _FixedNumericalContinuousParameter( - name=parameter.name, value=thresholds.lower - ) - # When the lower bound is in inactive range, move it to the upper threshold of # the inactive region if upper_bound > thresholds.upper and in_inactive_range(lower_bound): return evolve(parameter, bounds=(thresholds.upper, upper_bound)) - if upper_bound == thresholds.upper and in_inactive_range(lower_bound): - return _FixedNumericalContinuousParameter( - name=parameter.name, value=thresholds.upper - ) - # Both bounds separated from inactive range return parameter diff --git a/tests/utils/test_parameters.py b/tests/utils/test_parameters.py index b038e4e9d..2c4a3af8c 100644 --- a/tests/utils/test_parameters.py +++ b/tests/utils/test_parameters.py @@ -22,100 +22,54 @@ def mirror_interval(interval: Interval) -> Interval: "expected_bounds", ), [ - # one-side bounds, two-side thresholds param( - Interval(lower=0.0, upper=1.0), - Interval(lower=-1.0, upper=1.5), + Interval(lower=-1.0, upper=1.0), + Interval(lower=-1.0, upper=1.0), False, None, - id="oneside_bounds_in_twoside_thresholds", + id="bounds_on_thresholds", ), param( - Interval(lower=0.0, upper=1.0), Interval(lower=-1.0, upper=1.0), - True, - Interval(lower=1.0, upper=1.0), - id="oneside_bounds_in_twoside_thresholds_fixed_value", - ), - param( - Interval(lower=0.0, upper=1.0), - Interval(lower=-1.0, upper=0.5), - True, - Interval(lower=0.5, upper=1.0), - id="oneside_bounds_intersected_with_twoside_thresholds", - ), - # one-side bounds, one-side thresholds - param( - Interval(lower=0.0, upper=1.0), - Interval(lower=-1.0, upper=0.0), - True, - Interval(lower=0.0, upper=1.0), - id="oneside_bounds_intersected_on_single_point_with_oneside_thresholds", - ), - param( - Interval(lower=0.0, upper=1.0), - Interval(lower=0.0, upper=0.5), - True, - Interval(lower=0.5, upper=1.0), - id="oneside_bounds_cover_oneside_thresholds", - ), - param( - Interval(lower=0.0, upper=1.0), - Interval(lower=0.0, upper=1.0), - True, - Interval(lower=1.0, upper=1.0), - id="oneside_bounds_match_oneside_thresholds", - ), - param( - Interval(lower=0.0, upper=1.0), - Interval(lower=0.0, upper=1.1), + Interval(lower=-1.5, upper=1.5), False, None, - id="oneside_bounds_in_oneside_thresholds", + id="bounds_in_thresholds", ), - # Two-side bounds. One-side thresholds do not differ from two-side threshold - # in these cases. Hence, use two-side thresholds. param( - Interval(lower=-0.5, upper=1.0), - Interval(lower=-1.0, upper=1.1), + Interval(lower=-1.0, upper=1.0), + Interval(lower=-1.5, upper=1.0), False, None, - id="twoside_bounds_in_twoside_thresholds", + id="bounds_in_thresholds_single_side_match", ), param( - Interval(lower=-0.5, upper=1.0), - Interval(lower=-0.5, upper=1.0), + Interval(lower=-1.0, upper=1.0), + Interval(lower=-0.5, upper=0.5), True, - Interval(lower=-0.5, upper=1.0), - id="twoside_bounds_match_twoside_thresholds", + Interval(lower=-1.0, upper=1.0), + id="thresholds_in_bounds", ), param( - Interval(lower=-0.6, upper=1.1), + Interval(lower=-1.0, upper=1.0), Interval(lower=-0.5, upper=1.0), True, - Interval(lower=-0.6, upper=1.1), - id="twoside_bounds_cover_twoside_thresholds", - ), - param( - Interval(lower=-0.6, upper=1.1), - Interval(lower=-1.0, upper=0.5), - True, - Interval(lower=0.5, upper=1.1), - id="twoside_bounds_intersected_with_twoside_thresholds", + Interval(lower=-1.0, upper=-0.5), + id="thresholds_in_bounds_single_side_match", ), param( - Interval(lower=-0.6, upper=0.5), + Interval(lower=-0.5, upper=1.0), Interval(lower=-1.0, upper=0.5), True, - Interval(lower=0.5, upper=0.5), - id="twoside_bounds_partial_in_twoside_thresholds", + Interval(lower=0.5, upper=1.0), + id="bounds_intersected_with_thresholds", ), param( - Interval(lower=-1.0, upper=0.5), - Interval(lower=-0.6, upper=0.5), + Interval(lower=0.0, upper=1.0), + Interval(lower=-1.0, upper=0.0), True, - Interval(lower=-1.0, upper=0.5), - id="twoside_bounds_partial_cover_twoside_thresholds", + Interval(lower=0.0, upper=1.0), + id="bounds_intersected_with_thresholds_on_one_point", ), ], ) From bddab62f82e9435a67f41ca0a30f984be64f236e Mon Sep 17 00:00:00 2001 From: Di Jin Date: Tue, 14 Jan 2025 09:29:13 +0100 Subject: [PATCH 67/67] Ensure parameter bounds cover zero --- baybe/constraints/continuous.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index 4675ac9d1..ce70d00c7 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -219,7 +219,6 @@ def get_threshold(self, parameter: NumericalContinuousParameter) -> Interval: * If lower < 0 and upper = 0, any value v with lower < v <= upper are treated zero. - Args: parameter: The parameter object. @@ -229,12 +228,18 @@ def get_threshold(self, parameter: NumericalContinuousParameter) -> Interval: Raises: ValueError: when parameter_name is not present in parameter list of this constraint. + ValueError: when parameter bounds do not cover zero. """ if parameter.name not in self.parameters: raise ValueError( f"The given parameter with name: {parameter.name} cannot " f"be found in the parameter list: {self.parameters}." ) + if parameter.bounds.contains(0.0): + raise ValueError( + f"The bounds of the given parameter must cover zero but its bounds " + f"are ({parameter.bounds.lower}, {parameter.bounds.upper})." + ) return Interval( lower=self.relative_threshold * parameter.bounds.lower,