From 0d44dc996cb514b590ee17bf8b3b08df52f3e0d8 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 17:07:36 +0200 Subject: [PATCH 01/23] Add files --- examples/Mixtures/Mixtures.md | 4 + .../slot_based.py} | 5 +- examples/Mixtures/traditional.py | 76 +++++++++++++++++++ 3 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 examples/Mixtures/Mixtures.md rename examples/{Constraints_Discrete/mixture_constraints.py => Mixtures/slot_based.py} (98%) create mode 100644 examples/Mixtures/traditional.py diff --git a/examples/Mixtures/Mixtures.md b/examples/Mixtures/Mixtures.md new file mode 100644 index 000000000..c4847d0f0 --- /dev/null +++ b/examples/Mixtures/Mixtures.md @@ -0,0 +1,4 @@ +# Mixtures + +These examples demonstrate how to set up mixture use cases in the traditional as well +as in the slot-based representation. \ No newline at end of file diff --git a/examples/Constraints_Discrete/mixture_constraints.py b/examples/Mixtures/slot_based.py similarity index 98% rename from examples/Constraints_Discrete/mixture_constraints.py rename to examples/Mixtures/slot_based.py index 3a07922a1..8a1c54a52 100644 --- a/examples/Constraints_Discrete/mixture_constraints.py +++ b/examples/Mixtures/slot_based.py @@ -1,4 +1,4 @@ -## Example for using a mixture use case in a discrete searchspace +## Example for Modelling a Slot-Based Mixture # Example for imposing sum constraints for discrete parameters. # The constraints simulate a situation where we want to mix up to three solvents. @@ -108,10 +108,9 @@ objective = SingleTargetObjective(target=NumericalTarget(name="Target_1", mode="MAX")) -### Creating and printing the campaign +### Creating the Campaign campaign = Campaign(searchspace=searchspace, objective=objective) -print(campaign) ### Manual verification of the constraint diff --git a/examples/Mixtures/traditional.py b/examples/Mixtures/traditional.py new file mode 100644 index 000000000..fe2b43567 --- /dev/null +++ b/examples/Mixtures/traditional.py @@ -0,0 +1,76 @@ +## Example for Modelling a Slot-Based Mixture + +# Explanation + +### Imports + +from baybe import Campaign +from baybe.constraints import ContinuousLinearConstraint +from baybe.parameters import NumericalContinuousParameter +from baybe.searchspace import SearchSpace, SubspaceContinuous +from baybe.targets import NumericalTarget + +# List of substance labels, divided into subgroups + +g1 = ["A", "B"] +g2 = ["mol1", "mol2"] +g3 = ["substance1", "substance2"] + +# Make continuous concentration parameters for each group + +p_g1_concentrations = [ + NumericalContinuousParameter(name=f"{name}", bounds=(0, 20)) for name in g1 +] +p_g2_concentrations = [ + NumericalContinuousParameter(name=f"{name}", bounds=(0, 40)) for name in g2 +] +p_g3_concentrations = [ + NumericalContinuousParameter(name=f"{name}", bounds=(0, 60)) for name in g3 +] + +# Ensure total sum is 100 + +c_total_sum = ContinuousLinearConstraint( + parameters=g1 + g2 + g3, + operator="=", + coefficients=[1.0] * len(g1 + g2 + g3), + rhs=100.0, +) + +# Ensure sum of group 1 is smaller than 40 + +c_g1_max = ContinuousLinearConstraint( + parameters=g1, + operator="<=", + coefficients=[1.0] * len(g1), + rhs=40, +) + +# Ensure sum of group 2 is larger than 60 + +c_g2_min = ContinuousLinearConstraint( + parameters=g2, + operator=">=", + coefficients=[1.0] * len(g2), + rhs=60, +) + +# Create the Campaign +searchspace = SearchSpace( + continuous=SubspaceContinuous( + parameters=p_g1_concentrations + p_g2_concentrations + p_g3_concentrations, + constraints_lin_eq=[c_total_sum], + constraints_lin_ineq=[c_g1_max, c_g2_min], + ), +) +campaign = Campaign( + searchspace=searchspace, + objective=NumericalTarget(name="MyTarget", mode="MAX").to_objective(), +) + +#### Look at some recommendations + +# We can quickly verify that the constraints imposed above are respected + +rec = campaign.recommend(10) +print(rec) From 9315451f7fd49d5bca9acfad65d25bd832abb193 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 18:13:00 +0200 Subject: [PATCH 02/23] Rework slot-based mixture --- examples/Mixtures/slot_based.py | 218 ++++++++++++++++++++++---------- 1 file changed, 150 insertions(+), 68 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 8a1c54a52..dece5b64d 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -1,14 +1,37 @@ ## Example for Modelling a Slot-Based Mixture -# Example for imposing sum constraints for discrete parameters. -# The constraints simulate a situation where we want to mix up to three solvents. -# However, their respective fractions need to sum up to 100. -# Also, the solvents should never be chosen twice, which requires various other constraints. - -# This example assumes some basic familiarity with using BayBE. -# We thus refer to [`campaign`](./../Basics/campaign.md) for a basic example. - -### Necessary imports for this example +### Terminology + +# Modelling a mixture is possible on a non-traditional way with something we refer to as +# **slot**. A slot consists of one parameter indicating the amount of a substance and +# another parameter indicating the type of substance (as label) that is in the slot. +# Contrary to traditional mixture modelling, the total number of parameters is not +# defined by how many substance choices we have, but by the maximum number of slots we +# want to allow. For instance, if we want to design a mixture with *up to five* +# components, we would need 5 slots, i.e. 10 parameters. + +# A corresponding search space with three slots could look like this: +# | Slot1_Label | Slot1_Amount | Slot2_Label | Slot2_Amount | Slot3_Label | Slot3_Amount | +# |:------------|:-------------|:------------|:-------------|:------------|:-------------| +# | Solvent1 | 10 | Solvent5 | 20 | Solvent4 | 30 | +# | Solvent1 | 30 | Solvent8 | 40 | Solvent2 | 30 | +# | Solvent3 | 20 | Solvent1 | 35 | Solvent9 | 30 | +# | Solvent2 | 15 | Solvent3 | 10 | Solvent1 | 30 | + +# This slot-based representation has one decided advantage compared to traditional +# modelling: We can utilize BayBE's label encodings for the label parameters. For +# instance, when mixing small molecules, the +# [`SubstanceParameter`](baybe.parameters.substance.SubstanceParameter) can be used to +# smartly encode the slot labels, enabling the algorithm to perform a chemically-aware +# mixture optimization. + +# In this example, we show how to design the search space and the various discrete +# constraints we need to impose. We simulate a situation where we want to mix up to +# three solvents, i.e. we will have 3 slots (6 parameters). Their respective amounts +# need to sum up to 100. Also, a solvents should never be chosen twice, which +# requires various other constraints. + +### Imports import math import os @@ -23,22 +46,21 @@ DiscreteSumConstraint, ThresholdCondition, ) -from baybe.objectives import SingleTargetObjective from baybe.parameters import NumericalDiscreteParameter, SubstanceParameter from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget from baybe.utils.dataframe import add_fake_measurements -### Experiment setup +### Parameter Setup -# This parameter denotes the tolerance with regard to the calculation of the sum. - -SUM_TOLERANCE = 1.0 +# Set some basic settings. +SUM_TOLERANCE = 0.1 # The tolerance we allow for the fulfillment of sum constraints SMOKE_TEST = "SMOKE_TEST" in os.environ +RESOLUTION = 5 if SMOKE_TEST else 11 # resolution of the discretization -# This parameter denotes the resolution of the discretization of the parameters -RESOLUTION = 5 if SMOKE_TEST else 12 +# Create the parameters for the slot labels. Each of our slots offers a choice between +# 4 solvents. dict_solvents = { "water": "O", @@ -46,75 +68,110 @@ "C2": "CC", "C3": "CCC", } -solvent1 = SubstanceParameter(name="Solv1", data=dict_solvents, encoding="MORDRED") -solvent2 = SubstanceParameter(name="Solv2", data=dict_solvents, encoding="MORDRED") -solvent3 = SubstanceParameter(name="Solv3", data=dict_solvents, encoding="MORDRED") +slot1_label = SubstanceParameter( + name="Slot1_Label", data=dict_solvents, encoding="MORDRED" +) +slot2_label = SubstanceParameter( + name="Slot2_Label", data=dict_solvents, encoding="MORDRED" +) +slot3_label = SubstanceParameter( + name="Slot3_Label", data=dict_solvents, encoding="MORDRED" +) -# Parameters for representing the fraction. +# Create the parameters for the slot amounts. -fraction1 = NumericalDiscreteParameter( - name="Frac1", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 +slot1_amount = NumericalDiscreteParameter( + name="Slot1_Amount", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 ) -fraction2 = NumericalDiscreteParameter( - name="Frac2", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 +slot2_amount = NumericalDiscreteParameter( + name="Slot2_Amount", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 ) -fraction3 = NumericalDiscreteParameter( - name="Frac3", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 +slot3_amount = NumericalDiscreteParameter( + name="Slot3_Amount", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 ) -parameters = [solvent1, solvent2, solvent3, fraction1, fraction2, fraction3] +# Store all parameters. + +parameters = [ + slot1_label, + slot2_label, + slot3_label, + slot1_amount, + slot2_amount, + slot3_amount, +] + +### Constraint Setup + +# Like for all mixtures, let us ensure that the overall sum of slot amounts is always +# 100. + +sum_constraint = DiscreteSumConstraint( + parameters=["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"], + condition=ThresholdCondition(threshold=100, operator="=", tolerance=SUM_TOLERANCE), +) + +# We could have a situation where we do not care about the order of addition of +# components to the mixture. This comes with two additional constraints. + +# If there is no order of addition, it does not matter whether we have two slots +# with the same substance or just one holding the combined amounts of two slots +# with the same slot ingredient. Thus, let us make sure that no slot contains a +# duplicate label entry. -### Creating the constraint +no_duplicates_constraint = DiscreteNoLabelDuplicatesConstraint( + parameters=["Slot1_Label", "Slot2_Label", "Slot3_Label"] +) -# Since the constraints are required for the creation of the searchspace, we create -# them next. -# Note that we need a `PermutationInvarianceConstraint` here. -# The reason is that constraints are normally applied in a specific order. -# However, the fractions should be invariant under permutations. -# We thus require an explicit constraint for this. +# Next, we need to take care of permutation invariance. If our order of addition does +# not matter, the result of exchanging slot 1 with slot 3 does not change the mixture, +# i.e. the mixture slots are permutation invariant. + +# One complication arising for the permutation invariance in this case stems from the +# fact that we not only have a label per slot, but also a numerical amount. Now if +# this amount is zero, it actually does not matter what label the slot has (i.e. +# what substance should be considered for that slot), because we are adding 0 of it to +# the mixture anyway. In BayBE, we call this a "dependency", i.e. the slot labels +# depend on the slot amounts and are only relevant if the amount fulfills some +# condition (in this case "amount > 0"). The `DiscreteDependenciesConstraint` tells +# the `DiscretePermutationInvarianceConstraint` about these dependencies so that they +# are correctly included in the filtering process. perm_inv_constraint = DiscretePermutationInvarianceConstraint( - parameters=["Solv1", "Solv2", "Solv3"], + parameters=["Slot1_Label", "Slot2_Label", "Slot3_Label"], dependencies=DiscreteDependenciesConstraint( - parameters=["Frac1", "Frac2", "Frac3"], + parameters=["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"], conditions=[ ThresholdCondition(threshold=0.0, operator=">"), ThresholdCondition(threshold=0.0, operator=">"), ThresholdCondition(threshold=0.0, operator=">"), ], - affected_parameters=[["Solv1"], ["Solv2"], ["Solv3"]], + affected_parameters=[["Slot1_Label"], ["Slot2_Label"], ["Slot3_Label"]], ), ) -# This is now the actual sum constraint - -sum_constraint = DiscreteSumConstraint( - parameters=["Frac1", "Frac2", "Frac3"], - condition=ThresholdCondition(threshold=100, operator="=", tolerance=SUM_TOLERANCE), -) - -# The permutation invariance might create duplicate labels. -# We thus include a constraint to remove them. - -no_duplicates_constraint = DiscreteNoLabelDuplicatesConstraint( - parameters=["Solv1", "Solv2", "Solv3"] -) +# Store all constraints. constraints = [perm_inv_constraint, sum_constraint, no_duplicates_constraint] -### Creating the searchspace and the objective +# ```{admonition} Order of Addition +# :class: note +# Whether you need to impose the constraints for removing duplicates and +# permutation invariance depends on your use case. If the order of addition is relevant +# to your mixture, there is no permutation invariance and one could argue that +# duplicates should also be allowed if subsequent steps can add the same substance. +# ``` -searchspace = SearchSpace.from_product(parameters=parameters, constraints=constraints) - -objective = SingleTargetObjective(target=NumericalTarget(name="Target_1", mode="MAX")) - -### Creating the Campaign +### Campaign Setup +searchspace = SearchSpace.from_product(parameters=parameters, constraints=constraints) +objective = NumericalTarget(name="Target_1", mode="MAX").to_objective() campaign = Campaign(searchspace=searchspace, objective=objective) -### Manual verification of the constraint +### Verification of Constraints -# The following loop performs some recommendations and manually verifies the given constraints. +# Now let us take a look at some recommendations for this campaign and check whether +# the constraints we imposed are indeed adhered to. N_ITERATIONS = 2 if SMOKE_TEST else 3 for kIter in range(N_ITERATIONS): @@ -122,8 +179,10 @@ print("## ASSERTS ##") print( - "No. of searchspace entries where fractions do not sum to 100.0: ", - campaign.searchspace.discrete.exp_rep[["Frac1", "Frac2", "Frac3"]] + "No. of searchspace entries where amounts do not sum to 100.0: ", + campaign.searchspace.discrete.exp_rep[ + ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] + ] .sum(axis=1) .apply(lambda x: x - 100.0) .abs() @@ -131,18 +190,26 @@ .sum(), ) print( - "No. of searchspace entries that have duplicate solvent labels: ", - campaign.searchspace.discrete.exp_rep[["Solv1", "Solv2", "Solv3"]] + "No. of searchspace entries that have duplicate slot labels: ", + campaign.searchspace.discrete.exp_rep[ + ["Slot1_Label", "Slot2_Label", "Slot3_Label"] + ] .nunique(axis=1) .ne(3) .sum(), ) print( - "No. of searchspace entries with permutation-invariant combinations: ", - campaign.searchspace.discrete.exp_rep[["Solv1", "Solv2", "Solv3"]] + "No. of searchspace entries with permutation-invariant combinations:", + campaign.searchspace.discrete.exp_rep[ + ["Slot1_Label", "Slot2_Label", "Slot3_Label"] + ] .apply(frozenset, axis=1) .to_frame() - .join(campaign.searchspace.discrete.exp_rep[["Frac1", "Frac2", "Frac3"]]) + .join( + campaign.searchspace.discrete.exp_rep[ + ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] + ] + ) .duplicated() .sum(), ) @@ -151,7 +218,12 @@ # points than intended due to numeric rounding print( f"No. of unique 1-solvent entries (exp. {math.comb(len(dict_solvents), 1)*1})", - (campaign.searchspace.discrete.exp_rep[["Frac1", "Frac2", "Frac3"]] == 0.0) + ( + campaign.searchspace.discrete.exp_rep[ + ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] + ] + == 0.0 + ) .sum(axis=1) .eq(2) .sum(), @@ -159,7 +231,12 @@ print( f"No. of unique 2-solvent entries (exp." f" {math.comb(len(dict_solvents), 2)*(RESOLUTION-2)})", - (campaign.searchspace.discrete.exp_rep[["Frac1", "Frac2", "Frac3"]] == 0.0) + ( + campaign.searchspace.discrete.exp_rep[ + ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] + ] + == 0.0 + ) .sum(axis=1) .eq(1) .sum(), @@ -167,7 +244,12 @@ print( f"No. of unique 3-solvent entries (exp." f" {math.comb(len(dict_solvents), 3)*((RESOLUTION-3)*(RESOLUTION-2))//2})", - (campaign.searchspace.discrete.exp_rep[["Frac1", "Frac2", "Frac3"]] == 0.0) + ( + campaign.searchspace.discrete.exp_rep[ + ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] + ] + == 0.0 + ) .sum(axis=1) .eq(0) .sum(), From 729a8710e326bfebd2cbb45d3104b45de93c8140 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 18:38:17 +0200 Subject: [PATCH 03/23] Add traditional mixture example --- examples/Mixtures/traditional.py | 78 +++++++++++++++++++------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/examples/Mixtures/traditional.py b/examples/Mixtures/traditional.py index fe2b43567..e83d8d8ab 100644 --- a/examples/Mixtures/traditional.py +++ b/examples/Mixtures/traditional.py @@ -1,6 +1,14 @@ -## Example for Modelling a Slot-Based Mixture +## Example for Modelling a Mixture in Traditional Representation -# Explanation +# When modelling mixtures one is typically confronted with a large set of substances to +# chose from. In the traditional representation, for each of these substance choices we +# would have one single parameter describing the amount of that substance which should +# go into the mixture. Then, there is one overall constraint to ensure that all substance +# amounts sum to 100. Additionally, there could be more constraints, for instance if +# there are subgroups of substances that have their own constraints. + +# In this example we will create a simple mixture of up to 6 components. There are +# three subgroups of substances: solvents, bases and phase agents. ### Imports @@ -10,25 +18,31 @@ from baybe.searchspace import SearchSpace, SubspaceContinuous from baybe.targets import NumericalTarget -# List of substance labels, divided into subgroups +### Parameters Setup + +# List of substance labels, divided into subgroups. -g1 = ["A", "B"] -g2 = ["mol1", "mol2"] -g3 = ["substance1", "substance2"] +g1 = ["Solvent1", "Solvent2"] +g2 = ["Base1", "Base2"] +g3 = ["PhaseAgent1", "PhaseAgent2"] -# Make continuous concentration parameters for each group +# Make continuous parameters for each subtance amount for each group. Here, the maximum +# amount for each substance depends on the group, i.e. we would allow more addition of each +# solvent compared to bases or phase agents. -p_g1_concentrations = [ - NumericalContinuousParameter(name=f"{name}", bounds=(0, 20)) for name in g1 +p_g1_amounts = [ + NumericalContinuousParameter(name=f"{name}", bounds=(0, 80)) for name in g1 ] -p_g2_concentrations = [ - NumericalContinuousParameter(name=f"{name}", bounds=(0, 40)) for name in g2 +p_g2_amounts = [ + NumericalContinuousParameter(name=f"{name}", bounds=(0, 20)) for name in g2 ] -p_g3_concentrations = [ - NumericalContinuousParameter(name=f"{name}", bounds=(0, 60)) for name in g3 +p_g3_amounts = [ + NumericalContinuousParameter(name=f"{name}", bounds=(0, 5)) for name in g3 ] -# Ensure total sum is 100 +### Constraints Setup + +# Ensure total sum is 100%. c_total_sum = ContinuousLinearConstraint( parameters=g1 + g2 + g3, @@ -37,30 +51,30 @@ rhs=100.0, ) -# Ensure sum of group 1 is smaller than 40 - -c_g1_max = ContinuousLinearConstraint( - parameters=g1, - operator="<=", - coefficients=[1.0] * len(g1), - rhs=40, -) - -# Ensure sum of group 2 is larger than 60 +# Ensure bases make up at least 10% of the mixture. c_g2_min = ContinuousLinearConstraint( parameters=g2, operator=">=", coefficients=[1.0] * len(g2), - rhs=60, + rhs=10, +) + +# Ensure phase agents make up no more than 5%. + +c_g3_max = ContinuousLinearConstraint( + parameters=g3, + operator="<=", + coefficients=[1.0] * len(g3), + rhs=5, ) -# Create the Campaign +### Campaign Setup + searchspace = SearchSpace( - continuous=SubspaceContinuous( - parameters=p_g1_concentrations + p_g2_concentrations + p_g3_concentrations, - constraints_lin_eq=[c_total_sum], - constraints_lin_ineq=[c_g1_max, c_g2_min], + continuous=SubspaceContinuous.from_product( + parameters=p_g1_amounts + p_g2_amounts + p_g3_amounts, + constraints=[c_total_sum, c_g2_min, c_g3_max], ), ) campaign = Campaign( @@ -68,9 +82,9 @@ objective=NumericalTarget(name="MyTarget", mode="MAX").to_objective(), ) -#### Look at some recommendations +### Inspect Some Recommendations -# We can quickly verify that the constraints imposed above are respected +# We can quickly verify that the constraints imposed above are respected. rec = campaign.recommend(10) print(rec) From 1829de6a5aac62da5c6b4f5a4887c63c187d5013 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 18:40:28 +0200 Subject: [PATCH 04/23] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 391cf9bb5..31137a7bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - `allow_missing` and `allow_extra` keyword arguments to `Objective.transform` +- Example for a traditional mixture + +### Changed +- Example for slot-based mixtures has been revised and grouped together with the new + traditional mixture example ### Deprecations - Passing a dataframe via the `data` argument to `Objective.transform` is no longer From 4d38805a2a6dc2635dde39645b33e235cd762737 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 18:49:51 +0200 Subject: [PATCH 05/23] Fix filename --- examples/Mixtures/{Mixtures.md => Mixtures_Header.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/Mixtures/{Mixtures.md => Mixtures_Header.md} (100%) diff --git a/examples/Mixtures/Mixtures.md b/examples/Mixtures/Mixtures_Header.md similarity index 100% rename from examples/Mixtures/Mixtures.md rename to examples/Mixtures/Mixtures_Header.md From 247df6707b5839377bdadcaa21b1a36f1a4e97ca Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 19:02:54 +0200 Subject: [PATCH 06/23] Fix references --- docs/userguide/constraints.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/userguide/constraints.md b/docs/userguide/constraints.md index ec9d564e9..7212fcaa2 100644 --- a/docs/userguide/constraints.md +++ b/docs/userguide/constraints.md @@ -197,7 +197,7 @@ Without this constraint, combinations like below would be possible: | 3 | Octanol | Octanol | would be excluded | The usage of `DiscreteNoLabelDuplicatesConstraint` is part of the -[example on mixtures](../../examples/Constraints_Discrete/mixture_constraints). +[example on slot-based mixtures](../../examples/Mixtures/slot_based). ### DiscreteLinkedParametersConstraint The [`DiscreteLinkedParametersConstraint`](baybe.constraints.discrete.DiscreteLinkedParametersConstraint) @@ -367,7 +367,7 @@ DiscretePermutationInvarianceConstraint( ``` The usage of `DiscretePermutationInvarianceConstraint` is also part of the -[example on mixtures](../../examples/Constraints_Discrete/mixture_constraints). +[example on slot-based mixtures](../../examples/Mixtures/slot_based). ### DiscreteCustomConstraint With a [`DiscreteCustomConstraint`](baybe.constraints.discrete.DiscreteCustomConstraint) From 6d11d8de5215291857b41104b553187b7f968b5f Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Tue, 15 Oct 2024 00:12:22 +0200 Subject: [PATCH 07/23] Improve text --- examples/Mixtures/slot_based.py | 26 +++++++++++++++++--------- examples/Mixtures/traditional.py | 4 ++-- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index dece5b64d..210419aa3 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -2,7 +2,7 @@ ### Terminology -# Modelling a mixture is possible on a non-traditional way with something we refer to as +# Modelling a mixture is possible in a non-traditional way with something we refer to as # **slot**. A slot consists of one parameter indicating the amount of a substance and # another parameter indicating the type of substance (as label) that is in the slot. # Contrary to traditional mixture modelling, the total number of parameters is not @@ -13,10 +13,10 @@ # A corresponding search space with three slots could look like this: # | Slot1_Label | Slot1_Amount | Slot2_Label | Slot2_Amount | Slot3_Label | Slot3_Amount | # |:------------|:-------------|:------------|:-------------|:------------|:-------------| -# | Solvent1 | 10 | Solvent5 | 20 | Solvent4 | 30 | +# | Solvent1 | 10 | Solvent5 | 20 | Solvent4 | 70 | # | Solvent1 | 30 | Solvent8 | 40 | Solvent2 | 30 | -# | Solvent3 | 20 | Solvent1 | 35 | Solvent9 | 30 | -# | Solvent2 | 15 | Solvent3 | 10 | Solvent1 | 30 | +# | Solvent3 | 20 | Solvent1 | 35 | Solvent9 | 45 | +# | Solvent2 | 15 | Solvent3 | 10 | Solvent1 | 45 | # This slot-based representation has one decided advantage compared to traditional # modelling: We can utilize BayBE's label encodings for the label parameters. For @@ -31,6 +31,14 @@ # need to sum up to 100. Also, a solvents should never be chosen twice, which # requires various other constraints. +# ```{admonition} Discrete vs. Continuous Modelling +# :class: important +# In here, we utilize only discrete parameters, although in principle, the parameters +# corresponding to amounts could also be modelled as continuous numbers. This however, +# would mean some of the constraints we need act between discrete and continuous +# parameters - which is not supported at the moment. +# ``` + ### Imports import math @@ -51,15 +59,15 @@ from baybe.targets import NumericalTarget from baybe.utils.dataframe import add_fake_measurements -### Parameter Setup - -# Set some basic settings. +# Basic example settings. SUM_TOLERANCE = 0.1 # The tolerance we allow for the fulfillment of sum constraints SMOKE_TEST = "SMOKE_TEST" in os.environ RESOLUTION = 5 if SMOKE_TEST else 11 # resolution of the discretization -# Create the parameters for the slot labels. Each of our slots offers a choice between +### Parameter Setup + +# Create parameters for the slot labels. Each of our slots offers a choice between # 4 solvents. dict_solvents = { @@ -78,7 +86,7 @@ name="Slot3_Label", data=dict_solvents, encoding="MORDRED" ) -# Create the parameters for the slot amounts. +# Create parameters for the slot amounts. slot1_amount = NumericalDiscreteParameter( name="Slot1_Amount", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 diff --git a/examples/Mixtures/traditional.py b/examples/Mixtures/traditional.py index e83d8d8ab..dfb74f2ec 100644 --- a/examples/Mixtures/traditional.py +++ b/examples/Mixtures/traditional.py @@ -1,13 +1,13 @@ ## Example for Modelling a Mixture in Traditional Representation -# When modelling mixtures one is typically confronted with a large set of substances to +# When modelling mixtures, one is typically confronted with a large set of substances to # chose from. In the traditional representation, for each of these substance choices we # would have one single parameter describing the amount of that substance which should # go into the mixture. Then, there is one overall constraint to ensure that all substance # amounts sum to 100. Additionally, there could be more constraints, for instance if # there are subgroups of substances that have their own constraints. -# In this example we will create a simple mixture of up to 6 components. There are +# In this example, we will create a simple mixture of up to 6 components. There are # three subgroups of substances: solvents, bases and phase agents. ### Imports From 5d4e45e450668f2950871fb302c22f9424dd31ea Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Mon, 21 Oct 2024 15:39:54 +0200 Subject: [PATCH 08/23] Update text --- examples/Mixtures/slot_based.py | 8 ++------ examples/Mixtures/traditional.py | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 210419aa3..c41f95e1a 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -3,7 +3,7 @@ ### Terminology # Modelling a mixture is possible in a non-traditional way with something we refer to as -# **slot**. A slot consists of one parameter indicating the amount of a substance and +# **slots**. A slot consists of one parameter indicating the amount of a substance and # another parameter indicating the type of substance (as label) that is in the slot. # Contrary to traditional mixture modelling, the total number of parameters is not # defined by how many substance choices we have, but by the maximum number of slots we @@ -16,7 +16,7 @@ # | Solvent1 | 10 | Solvent5 | 20 | Solvent4 | 70 | # | Solvent1 | 30 | Solvent8 | 40 | Solvent2 | 30 | # | Solvent3 | 20 | Solvent1 | 35 | Solvent9 | 45 | -# | Solvent2 | 15 | Solvent3 | 10 | Solvent1 | 45 | +# | Solvent2 | 15 | Solvent3 | 40 | Solvent1 | 45 | # This slot-based representation has one decided advantage compared to traditional # modelling: We can utilize BayBE's label encodings for the label parameters. For @@ -98,8 +98,6 @@ name="Slot3_Amount", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 ) -# Store all parameters. - parameters = [ slot1_label, slot2_label, @@ -158,8 +156,6 @@ ), ) -# Store all constraints. - constraints = [perm_inv_constraint, sum_constraint, no_duplicates_constraint] # ```{admonition} Order of Addition diff --git a/examples/Mixtures/traditional.py b/examples/Mixtures/traditional.py index dfb74f2ec..06933a93f 100644 --- a/examples/Mixtures/traditional.py +++ b/examples/Mixtures/traditional.py @@ -4,7 +4,7 @@ # chose from. In the traditional representation, for each of these substance choices we # would have one single parameter describing the amount of that substance which should # go into the mixture. Then, there is one overall constraint to ensure that all substance -# amounts sum to 100. Additionally, there could be more constraints, for instance if +# amounts sum to 100%. Additionally, there could be more constraints, for instance if # there are subgroups of substances that have their own constraints. # In this example, we will create a simple mixture of up to 6 components. There are @@ -20,7 +20,7 @@ ### Parameters Setup -# List of substance labels, divided into subgroups. +# Create lists of substance labels, divided into subgroups. g1 = ["Solvent1", "Solvent2"] g2 = ["Base1", "Base2"] From 86d5bcbca125096b4858625ea748fd028d00e21a Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 22 Oct 2024 10:59:50 +0200 Subject: [PATCH 09/23] Revise slot example --- examples/Mixtures/slot_based.py | 144 ++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 65 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index c41f95e1a..241467485 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -1,42 +1,41 @@ -## Example for Modelling a Slot-Based Mixture +## Modeling a Slot-Based Mixture ### Terminology -# Modelling a mixture is possible in a non-traditional way with something we refer to as -# **slots**. A slot consists of one parameter indicating the amount of a substance and -# another parameter indicating the type of substance (as label) that is in the slot. -# Contrary to traditional mixture modelling, the total number of parameters is not -# defined by how many substance choices we have, but by the maximum number of slots we -# want to allow. For instance, if we want to design a mixture with *up to five* -# components, we would need 5 slots, i.e. 10 parameters. +# Modeling a mixture is possible in a non-traditional way by using a concept we +# refer to as a **slot**. A slot is represented through the combination of two +# parameters: one indicating the amount of a mixture ingredient, and another indicating +# the type of ingredient (as a label) populating the slot. Unlike [traditional +# mixture modeling](/examples/Mixtures/traditional.md), the total number of parameters +# is not determined by how many ingredient choices we have, but by the maximum number of +# slots we allow. For instance, if we want to design a mixture with *up to three* +# ingredients, we can do so by creating three slots represented by six parameters. -# A corresponding search space with three slots could look like this: +# A corresponding search space could look like this: # | Slot1_Label | Slot1_Amount | Slot2_Label | Slot2_Amount | Slot3_Label | Slot3_Amount | # |:------------|:-------------|:------------|:-------------|:------------|:-------------| -# | Solvent1 | 10 | Solvent5 | 20 | Solvent4 | 70 | +# | Solvent1 | 10 | Solvent5 | 20 | Solvent4 | 30 | # | Solvent1 | 30 | Solvent8 | 40 | Solvent2 | 30 | -# | Solvent3 | 20 | Solvent1 | 35 | Solvent9 | 45 | -# | Solvent2 | 15 | Solvent3 | 40 | Solvent1 | 45 | +# | Solvent3 | 20 | Solvent1 | 35 | Solvent9 | 30 | +# | Solvent2 | 15 | Solvent3 | 10 | Solvent1 | 30 | -# This slot-based representation has one decided advantage compared to traditional -# modelling: We can utilize BayBE's label encodings for the label parameters. For +# The slot-based representation has one decided advantage over traditional +# modeling: We can use BayBE's label encodings for the label parameters. For # instance, when mixing small molecules, the # [`SubstanceParameter`](baybe.parameters.substance.SubstanceParameter) can be used to # smartly encode the slot labels, enabling the algorithm to perform a chemically-aware # mixture optimization. -# In this example, we show how to design the search space and the various discrete -# constraints we need to impose. We simulate a situation where we want to mix up to -# three solvents, i.e. we will have 3 slots (6 parameters). Their respective amounts -# need to sum up to 100. Also, a solvents should never be chosen twice, which -# requires various other constraints. +# In this example, we show how to design such a search space, including the various +# discrete constraints we need to impose. We simulate a situation where we want to mix +# up to three solvents, whose respective amounts must add up to 100. -# ```{admonition} Discrete vs. Continuous Modelling +# ```{admonition} Discrete vs. Continuous Modeling # :class: important -# In here, we utilize only discrete parameters, although in principle, the parameters -# corresponding to amounts could also be modelled as continuous numbers. This however, -# would mean some of the constraints we need act between discrete and continuous -# parameters - which is not supported at the moment. +# Here, we only use discrete parameters, although in principle the parameters +# corresponding to amounts could also be modeled as continuous numbers. However, this +# would imply that some of the constraints would have to act on both discrete and +# continuous parameters, which is not currently supported. # ``` ### Imports @@ -59,16 +58,16 @@ from baybe.targets import NumericalTarget from baybe.utils.dataframe import add_fake_measurements -# Basic example settings. +# Basic example settings: -SUM_TOLERANCE = 0.1 # The tolerance we allow for the fulfillment of sum constraints SMOKE_TEST = "SMOKE_TEST" in os.environ -RESOLUTION = 5 if SMOKE_TEST else 11 # resolution of the discretization +SUM_TOLERANCE = 0.1 # tolerance allowed to fulfill the sum constraints +RESOLUTION = 5 if SMOKE_TEST else 11 # resolution for discretizing the slot amounts ### Parameter Setup -# Create parameters for the slot labels. Each of our slots offers a choice between -# 4 solvents. +# First, we create the parameters for the slot labels. Each slot offers a choice of +# four solvents: dict_solvents = { "water": "O", @@ -86,18 +85,20 @@ name="Slot3_Label", data=dict_solvents, encoding="MORDRED" ) -# Create parameters for the slot amounts. +# Next, we create the parameters representing the slot amounts: slot1_amount = NumericalDiscreteParameter( - name="Slot1_Amount", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 + name="Slot1_Amount", values=np.linspace(0, 100, RESOLUTION), tolerance=0.2 ) slot2_amount = NumericalDiscreteParameter( - name="Slot2_Amount", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 + name="Slot2_Amount", values=np.linspace(0, 100, RESOLUTION), tolerance=0.2 ) slot3_amount = NumericalDiscreteParameter( - name="Slot3_Amount", values=list(np.linspace(0, 100, RESOLUTION)), tolerance=0.2 + name="Slot3_Amount", values=np.linspace(0, 100, RESOLUTION), tolerance=0.2 ) +# We collect all parameters in a single list: + parameters = [ slot1_label, slot2_label, @@ -109,39 +110,44 @@ ### Constraint Setup -# Like for all mixtures, let us ensure that the overall sum of slot amounts is always -# 100. - -sum_constraint = DiscreteSumConstraint( - parameters=["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"], - condition=ThresholdCondition(threshold=100, operator="=", tolerance=SUM_TOLERANCE), -) +# For the sake of demonstration, we consider a scenario where we do *not* care about the +# order of addition of components to the mixture, which imposes two additional +# constraints. +# +# ```{admonition} Order of Addition +# :class: note +# Whether you need to impose the constraints for removing duplicates and imposing +# permutation invariance depends on your use case. If the order of addition is relevant +# to your mixture, the permutation invariance constraint should be discarded and one +# could further argue that adding the same substance multiple times should be allowed. +# ``` -# We could have a situation where we do not care about the order of addition of -# components to the mixture. This comes with two additional constraints. +#### Duplicate Substances -# If there is no order of addition, it does not matter whether we have two slots -# with the same substance or just one holding the combined amounts of two slots -# with the same slot ingredient. Thus, let us make sure that no slot contains a -# duplicate label entry. +# Assuming that the order of addition is irrelevant, there is no difference between +# having two slots with the same substance or having only one slot with the combined +# amounts. Thus, we want to make sure that there are no such duplicate label entries: no_duplicates_constraint = DiscreteNoLabelDuplicatesConstraint( parameters=["Slot1_Label", "Slot2_Label", "Slot3_Label"] ) +#### Permutation Invariance + # Next, we need to take care of permutation invariance. If our order of addition does -# not matter, the result of exchanging slot 1 with slot 3 does not change the mixture, -# i.e. the mixture slots are permutation invariant. - -# One complication arising for the permutation invariance in this case stems from the -# fact that we not only have a label per slot, but also a numerical amount. Now if -# this amount is zero, it actually does not matter what label the slot has (i.e. -# what substance should be considered for that slot), because we are adding 0 of it to -# the mixture anyway. In BayBE, we call this a "dependency", i.e. the slot labels -# depend on the slot amounts and are only relevant if the amount fulfills some -# condition (in this case "amount > 0"). The `DiscreteDependenciesConstraint` tells -# the `DiscretePermutationInvarianceConstraint` about these dependencies so that they -# are correctly included in the filtering process. +# not matter, the result of interchanging any two slots does not alter the overall +# mixture, i.e. the mixture slots are are considered permutation-invariant. + +# A complication with permutation invariance arises from the fact that we have not +# only a label per slot, but also a numerical amount. If this amount is zero, then the +# label of the slot becomes meaningless (i.e. which ingredient should be considered for the +# slot), because adding zero of it does not change the mixture. In BayBE, we call +# this a "dependency", i.e. the slot labels depend on the slot amounts and are only +# relevant if the amount satisfies some condition (in this case "amount > 0"). + +# The {class}`~baybe.constraints.discrete.DiscreteDependenciesConstraint` informs the +# {class}`~baybe.constraints.discrete.DiscretePermutationInvarianceConstraint` about +# these dependencies so that they are correctly included in the filtering process: perm_inv_constraint = DiscretePermutationInvarianceConstraint( parameters=["Slot1_Label", "Slot2_Label", "Slot3_Label"], @@ -156,18 +162,26 @@ ), ) +#### Substance Amounts + +# Interpreting the slot amounts as percentages, we need to ensure that their total is +# always 100: + +sum_constraint = DiscreteSumConstraint( + parameters=["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"], + condition=ThresholdCondition(threshold=100, operator="=", tolerance=SUM_TOLERANCE), +) + + +# We store all constraints in a single list: + constraints = [perm_inv_constraint, sum_constraint, no_duplicates_constraint] -# ```{admonition} Order of Addition -# :class: note -# Whether you need to impose the constraints for removing duplicates and -# permutation invariance depends on your use case. If the order of addition is relevant -# to your mixture, there is no permutation invariance and one could argue that -# duplicates should also be allowed if subsequent steps can add the same substance. -# ``` ### Campaign Setup +# With all basic building blocks in place, we can now assemble our campaign: + searchspace = SearchSpace.from_product(parameters=parameters, constraints=constraints) objective = NumericalTarget(name="Target_1", mode="MAX").to_objective() campaign = Campaign(searchspace=searchspace, objective=objective) From a4a9b4c7cd04681975884522e621c58c56340a2b Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 22 Oct 2024 23:51:51 +0200 Subject: [PATCH 10/23] Rework assertion part * Drop campaign, iterations and smoke test * Show the search space * Group into constraint and span verification * Explain stars and bars logic --- examples/Mixtures/slot_based.py | 172 ++++++++++++++------------------ 1 file changed, 76 insertions(+), 96 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 241467485..af32ea820 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -41,11 +41,9 @@ ### Imports import math -import os import numpy as np -from baybe import Campaign from baybe.constraints import ( DiscreteDependenciesConstraint, DiscreteNoLabelDuplicatesConstraint, @@ -54,15 +52,12 @@ ThresholdCondition, ) from baybe.parameters import NumericalDiscreteParameter, SubstanceParameter -from baybe.searchspace import SearchSpace -from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_measurements +from baybe.searchspace.discrete import SubspaceDiscrete # Basic example settings: -SMOKE_TEST = "SMOKE_TEST" in os.environ SUM_TOLERANCE = 0.1 # tolerance allowed to fulfill the sum constraints -RESOLUTION = 5 if SMOKE_TEST else 11 # resolution for discretizing the slot amounts +RESOLUTION = 5 # resolution for discretizing the slot amounts ### Parameter Setup @@ -178,101 +173,86 @@ constraints = [perm_inv_constraint, sum_constraint, no_duplicates_constraint] -### Campaign Setup +### Search Space Creation -# With all basic building blocks in place, we can now assemble our campaign: +# With all building blocks in place, we can now assemble our discrete space and inspect +# its configurations: -searchspace = SearchSpace.from_product(parameters=parameters, constraints=constraints) -objective = NumericalTarget(name="Target_1", mode="MAX").to_objective() -campaign = Campaign(searchspace=searchspace, objective=objective) +space = SubspaceDiscrete.from_product(parameters=parameters, constraints=constraints) +print(space.exp_rep) ### Verification of Constraints -# Now let us take a look at some recommendations for this campaign and check whether -# the constraints we imposed are indeed adhered to. +# Let us programmatically assert that all constraints are satisfied: -N_ITERATIONS = 2 if SMOKE_TEST else 3 -for kIter in range(N_ITERATIONS): - print(f"\n#### ITERATION {kIter+1} ####") +amounts = space.exp_rep[["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"]] +labels = space.exp_rep[["Slot1_Label", "Slot2_Label", "Slot3_Label"]] - print("## ASSERTS ##") - print( - "No. of searchspace entries where amounts do not sum to 100.0: ", - campaign.searchspace.discrete.exp_rep[ - ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] - ] - .sum(axis=1) - .apply(lambda x: x - 100.0) - .abs() - .gt(SUM_TOLERANCE) - .sum(), - ) - print( - "No. of searchspace entries that have duplicate slot labels: ", - campaign.searchspace.discrete.exp_rep[ - ["Slot1_Label", "Slot2_Label", "Slot3_Label"] - ] - .nunique(axis=1) - .ne(3) - .sum(), - ) - print( - "No. of searchspace entries with permutation-invariant combinations:", - campaign.searchspace.discrete.exp_rep[ - ["Slot1_Label", "Slot2_Label", "Slot3_Label"] - ] - .apply(frozenset, axis=1) - .to_frame() - .join( - campaign.searchspace.discrete.exp_rep[ - ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] - ] - ) - .duplicated() - .sum(), - ) - # The following asserts only work if the tolerance for the threshold condition in - # the constraint are not 0. Otherwise, the sum/prod constraints will remove more - # points than intended due to numeric rounding - print( - f"No. of unique 1-solvent entries (exp. {math.comb(len(dict_solvents), 1)*1})", - ( - campaign.searchspace.discrete.exp_rep[ - ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] - ] - == 0.0 - ) - .sum(axis=1) - .eq(2) - .sum(), - ) - print( - f"No. of unique 2-solvent entries (exp." - f" {math.comb(len(dict_solvents), 2)*(RESOLUTION-2)})", - ( - campaign.searchspace.discrete.exp_rep[ - ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] - ] - == 0.0 - ) - .sum(axis=1) - .eq(1) - .sum(), - ) + +# * All amounts sum to 100: + +n_wrong_sum = amounts.sum(axis=1).apply(lambda x: x - 100).abs().gt(SUM_TOLERANCE).sum() +assert n_wrong_sum == 0 +print("Number of configurations whose amounts do not sum to 100: ", n_wrong_sum) + + +# * There are no duplicate slot labels: + +n_duplicates = labels.nunique(axis=1).ne(3).sum() +assert n_duplicates == 0 +print("Number of configurations with duplicate slot labels: ", n_duplicates) + + +# * There are no permutation-invariant configurations: + +n_permute = labels.apply(frozenset, axis=1).to_frame().join(amounts).duplicated().sum() +assert n_permute == 0 +print("Number of permuted configurations: ", n_permute) + + +### Verification of Span + +# Finally, we also assert if we have completely spanned the space of allowed +# configurations by comparing the numbers of unique `K`-solvent entries against their +# theoretical values. + +# ```{admonition} Theoretical Span +# :class: info + +# The number of possible `K`-solvent entries can be found by imagining the corresponding +# [traditional mixture representation](/examples/Mixtures/traditional.md) and solving a +# slightly more complex version of the ["stars and bars" +# problem](https://en.wikipedia.org/wiki/Stars_and_bars_(combinatorics)), where the +# number of non-empty bins is fixed. That is, we need to ask how many possible ways +# exist to distribute `N` items (= increase from one percentage amount to the next) +# across `M` bins (= number of solvents) if exactly `K` bins are non-empty (= number of +# solvents in the configurations). +# +# There are `(M choose K)` ways to select the non-empty buckets. When distributing the +# `N` items, one item needs to go to each of the `K` buckets for it to be non-empty. +# The remaining `N - K` items can be freely distributed among the `K` buckets. The +# number of configurations for the latter is given by the "stars and bars" formula, +# which states that `X` indistinguishable items can be placed in `Y` distinguishable +# bins in `((X + Y -1) choose (Y - 1))` ways. Setting `X`=`N-K` and `Y`=`K` gives +# `((N - 1) choose (K - 1))`. Combined with the former count, we get the formula +# implemented in the helper function below. +# ``` + +# Helper function to compute the theoretical numbers: + + +def n_combinations(N: int, M: int, K: int) -> int: + """Get number of ways to put `N` items into `M` bins yielding `K` non-empty bins.""" + return math.comb(M, K) * math.comb(N - 1, K - 1) + + +# Verify that the space is fully spanned: + +for K in range(1, 4): + n_combinations_expected = n_combinations(RESOLUTION - 1, len(dict_solvents), K) + n_combinations_actual = (amounts != 0).sum(axis=1).eq(K).sum() + assert n_combinations_expected == n_combinations_actual print( - f"No. of unique 3-solvent entries (exp." - f" {math.comb(len(dict_solvents), 3)*((RESOLUTION-3)*(RESOLUTION-2))//2})", - ( - campaign.searchspace.discrete.exp_rep[ - ["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"] - ] - == 0.0 - ) - .sum(axis=1) - .eq(0) - .sum(), + f"Number of unique {K}-solvent entries: " + f"{n_combinations_actual} ({n_combinations_expected} expected)" ) - - rec = campaign.recommend(batch_size=5) - add_fake_measurements(rec, campaign.targets) - campaign.add_measurements(rec) From 2b52297aefa5efa7f490550ff2e5b5f108d46b66 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 24 Oct 2024 13:37:40 +0200 Subject: [PATCH 11/23] Apply minor corrections --- examples/Mixtures/slot_based.py | 50 +++++++++++++++++---------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index af32ea820..56ded1fee 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -2,22 +2,22 @@ ### Terminology -# Modeling a mixture is possible in a non-traditional way by using a concept we -# refer to as a **slot**. A slot is represented through the combination of two -# parameters: one indicating the amount of a mixture ingredient, and another indicating -# the type of ingredient (as a label) populating the slot. Unlike [traditional -# mixture modeling](/examples/Mixtures/traditional.md), the total number of parameters -# is not determined by how many ingredient choices we have, but by the maximum number of -# slots we allow. For instance, if we want to design a mixture with *up to three* -# ingredients, we can do so by creating three slots represented by six parameters. +# Modeling a mixture is possible in a non-traditional way by using a concept we refer to +# as a **slot**. A slot is represented through the combination of two parameters: one +# indicating the *amount* of a mixture ingredient, and another indicating the *type* of +# the ingredient (as a label) populating the slot. Unlike in [traditional mixture +# modeling](/examples/Mixtures/traditional.md), the total number of parameters is not +# determined by how many ingredient choices we have, but by the maximum number of slots +# we allow. For instance, if we want to design a mixture with *up to three* ingredients, +# we can do so by creating three slots represented by six parameters. # A corresponding search space could look like this: # | Slot1_Label | Slot1_Amount | Slot2_Label | Slot2_Amount | Slot3_Label | Slot3_Amount | # |:------------|:-------------|:------------|:-------------|:------------|:-------------| -# | Solvent1 | 10 | Solvent5 | 20 | Solvent4 | 30 | +# | Solvent1 | 10 | Solvent5 | 20 | Solvent4 | 70 | # | Solvent1 | 30 | Solvent8 | 40 | Solvent2 | 30 | -# | Solvent3 | 20 | Solvent1 | 35 | Solvent9 | 30 | -# | Solvent2 | 15 | Solvent3 | 10 | Solvent1 | 30 | +# | Solvent3 | 20 | Solvent1 | 35 | Solvent9 | 45 | +# | Solvent2 | 15 | Solvent3 | 40 | Solvent1 | 45 | # The slot-based representation has one decided advantage over traditional # modeling: We can use BayBE's label encodings for the label parameters. For @@ -27,7 +27,7 @@ # mixture optimization. # In this example, we show how to design such a search space, including the various -# discrete constraints we need to impose. We simulate a situation where we want to mix +# discrete constraints we need to impose. We consider a situation where we want to mix # up to three solvents, whose respective amounts must add up to 100. # ```{admonition} Discrete vs. Continuous Modeling @@ -52,7 +52,7 @@ ThresholdCondition, ) from baybe.parameters import NumericalDiscreteParameter, SubstanceParameter -from baybe.searchspace.discrete import SubspaceDiscrete +from baybe.searchspace import SubspaceDiscrete # Basic example settings: @@ -107,7 +107,7 @@ # For the sake of demonstration, we consider a scenario where we do *not* care about the # order of addition of components to the mixture, which imposes two additional -# constraints. +# constraints: one for removing duplicates and one for imposing permutation invariance. # # ```{admonition} Order of Addition # :class: note @@ -121,7 +121,9 @@ # Assuming that the order of addition is irrelevant, there is no difference between # having two slots with the same substance or having only one slot with the combined -# amounts. Thus, we want to make sure that there are no such duplicate label entries: +# amounts. Thus, we want to make sure that there are no such duplicate label entries, +# which can be achieved using a +# {class}`~baybe.constraints.discrete.DiscreteNoLabelDuplicatesConstraint`: no_duplicates_constraint = DiscreteNoLabelDuplicatesConstraint( parameters=["Slot1_Label", "Slot2_Label", "Slot3_Label"] @@ -133,12 +135,12 @@ # not matter, the result of interchanging any two slots does not alter the overall # mixture, i.e. the mixture slots are are considered permutation-invariant. -# A complication with permutation invariance arises from the fact that we have not -# only a label per slot, but also a numerical amount. If this amount is zero, then the -# label of the slot becomes meaningless (i.e. which ingredient should be considered for the -# slot), because adding zero of it does not change the mixture. In BayBE, we call -# this a "dependency", i.e. the slot labels depend on the slot amounts and are only -# relevant if the amount satisfies some condition (in this case "amount > 0"). +# A complication with permutation invariance arises from the fact that we have not only +# a label per slot, but also a numerical amount. If this amount is zero, then the label +# of the slot becomes meaningless, because adding zero of the corresponding substance +# does not change the mixture. In BayBE, we call this a "dependency", i.e. the slot +# labels depend on the slot amounts and are only relevant if the amount satisfies some +# condition (in this case "amount > 0"). # The {class}`~baybe.constraints.discrete.DiscreteDependenciesConstraint` informs the # {class}`~baybe.constraints.discrete.DiscretePermutationInvarianceConstraint` about @@ -224,9 +226,9 @@ # slightly more complex version of the ["stars and bars" # problem](https://en.wikipedia.org/wiki/Stars_and_bars_(combinatorics)), where the # number of non-empty bins is fixed. That is, we need to ask how many possible ways -# exist to distribute `N` items (= increase from one percentage amount to the next) -# across `M` bins (= number of solvents) if exactly `K` bins are non-empty (= number of -# solvents in the configurations). +# exist to distribute `N` items (= increase from one percentage level to the next) +# across `M` bins (= number of available solvents) if exactly `K` bins are non-empty (= +# number of solvents allowed in the mixture). # # There are `(M choose K)` ways to select the non-empty buckets. When distributing the # `N` items, one item needs to go to each of the `K` buckets for it to be non-empty. From 5bef4cd8dcff5afb55f37bd601de88351a6bb3ed Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 25 Oct 2024 10:51:12 +0200 Subject: [PATCH 12/23] Revise traditional example --- examples/Mixtures/traditional.py | 109 +++++++++++++++++++------------ 1 file changed, 68 insertions(+), 41 deletions(-) diff --git a/examples/Mixtures/traditional.py b/examples/Mixtures/traditional.py index 06933a93f..7aeb98564 100644 --- a/examples/Mixtures/traditional.py +++ b/examples/Mixtures/traditional.py @@ -1,34 +1,46 @@ -## Example for Modelling a Mixture in Traditional Representation - -# When modelling mixtures, one is typically confronted with a large set of substances to -# chose from. In the traditional representation, for each of these substance choices we -# would have one single parameter describing the amount of that substance which should -# go into the mixture. Then, there is one overall constraint to ensure that all substance -# amounts sum to 100%. Additionally, there could be more constraints, for instance if -# there are subgroups of substances that have their own constraints. - -# In this example, we will create a simple mixture of up to 6 components. There are -# three subgroups of substances: solvents, bases and phase agents. +## Modelling a Mixture in Traditional Representation + +# When modelling mixtures, we are often faced with a large set of ingredients to choose +# from. A common way to formalize this type of selection problem is to assign each +# ingredient its own numerical parameter representing the amount of the ingredient in +# the mixture. A sum constraint imposed on all parameters then ensures that the total +# amount of ingredients in the mix is always 100%. In addition, there could be other +# constraints, for instance, to impose further restrictions on individual subgroups of +# ingredients. In BayBE's language, we call this the *traditional mixture +# representation*. + +# In this example, we demonstrate how to create a search space in this representation, +# using a simple mixture of up to six components, which are divided into three +# subgroups: solvents, bases and phase agents. + +# ```{admonition} Slot-based Representation +# :class: seealso +# For an alternative way to describe mixtures, see our +# [slot-based representation](/examples/Mixtures/slot_based.md). +# ``` ### Imports -from baybe import Campaign +import numpy as np +import pandas as pd + from baybe.constraints import ContinuousLinearConstraint from baybe.parameters import NumericalContinuousParameter -from baybe.searchspace import SearchSpace, SubspaceContinuous -from baybe.targets import NumericalTarget +from baybe.recommenders import RandomRecommender +from baybe.searchspace import SubspaceContinuous -### Parameters Setup +### Parameter Setup -# Create lists of substance labels, divided into subgroups. +# We start by creating lists containing our substance labels according to their +# subgroups: g1 = ["Solvent1", "Solvent2"] g2 = ["Base1", "Base2"] g3 = ["PhaseAgent1", "PhaseAgent2"] -# Make continuous parameters for each subtance amount for each group. Here, the maximum -# amount for each substance depends on the group, i.e. we would allow more addition of each -# solvent compared to bases or phase agents. +# Next, we create continuous parameters describing the substance amounts for each group. +# Here, the maximum amount for each substance depends on its group, i.e. we allow +# adding more of a solvent compared to a base or a phase agent: p_g1_amounts = [ NumericalContinuousParameter(name=f"{name}", bounds=(0, 80)) for name in g1 @@ -42,49 +54,64 @@ ### Constraints Setup -# Ensure total sum is 100%. +# Now, we set up our constraints. We start with the overall mixture constraint, ensuring +# the total of all ingredients is 100%: c_total_sum = ContinuousLinearConstraint( parameters=g1 + g2 + g3, operator="=", - coefficients=[1.0] * len(g1 + g2 + g3), - rhs=100.0, + coefficients=[1] * len(g1 + g2 + g3), + rhs=100, ) -# Ensure bases make up at least 10% of the mixture. +# Additionally, we require bases make up at least 10% of the mixture: c_g2_min = ContinuousLinearConstraint( parameters=g2, operator=">=", - coefficients=[1.0] * len(g2), + coefficients=[1] * len(g2), rhs=10, ) -# Ensure phase agents make up no more than 5%. +# By contrast, phase agents should make up no more than 5%: c_g3_max = ContinuousLinearConstraint( parameters=g3, operator="<=", - coefficients=[1.0] * len(g3), + coefficients=[1] * len(g3), rhs=5, ) -### Campaign Setup +### Search Space Creation -searchspace = SearchSpace( - continuous=SubspaceContinuous.from_product( - parameters=p_g1_amounts + p_g2_amounts + p_g3_amounts, - constraints=[c_total_sum, c_g2_min, c_g3_max], - ), -) -campaign = Campaign( - searchspace=searchspace, - objective=NumericalTarget(name="MyTarget", mode="MAX").to_objective(), -) +# Having both parameter and constraint definitions at hand, we can create our +# search space: + +searchspace = SubspaceContinuous.from_product( + parameters=[*p_g1_amounts, *p_g2_amounts, *p_g3_amounts], + constraints=[c_total_sum, c_g2_min, c_g3_max], +).to_searchspace() -### Inspect Some Recommendations -# We can quickly verify that the constraints imposed above are respected. +### Verification of Constraints + +# To verify that the constraints imposed above are fulfilled, let us draw some +# random points from the search space: + +recommendations = RandomRecommender().recommend(batch_size=10, searchspace=searchspace) +print(recommendations) + +# Computing the respective row sums reveals the expected result: + +stats = pd.DataFrame( + { + "Total": recommendations.sum(axis=1), + "Total_Bases": recommendations[g2].sum(axis=1), + "Total_Phase_Agents": recommendations[g3].sum(axis=1), + } +) +print(stats) -rec = campaign.recommend(10) -print(rec) +assert np.allclose(stats["Total"], 100) +assert (stats["Total_Bases"] >= 10).all() +assert (stats["Total_Phase_Agents"] <= 5).all() From cb55bbf6e9feb5d12c72a887d74f54d270884267 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 25 Oct 2024 10:51:24 +0200 Subject: [PATCH 13/23] Synchronize headings --- examples/Mixtures/slot_based.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 56ded1fee..934e0afdf 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -1,4 +1,4 @@ -## Modeling a Slot-Based Mixture +## Modelling a Mixture in Slot-Based Representation ### Terminology From 05e6e2771bc9f2f3d5cdc3167e6b1e9f47dc2531 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 1 Nov 2024 17:28:12 +0100 Subject: [PATCH 14/23] Correctly render search space dataframe --- examples/Mixtures/slot_based.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 934e0afdf..0b338725b 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -53,6 +53,7 @@ ) from baybe.parameters import NumericalDiscreteParameter, SubstanceParameter from baybe.searchspace import SubspaceDiscrete +from baybe.utils.dataframe import pretty_print_df # Basic example settings: @@ -181,7 +182,13 @@ # its configurations: space = SubspaceDiscrete.from_product(parameters=parameters, constraints=constraints) -print(space.exp_rep) +print( + pretty_print_df( + space.exp_rep, + max_rows=len(space.exp_rep), + max_columns=len(space.exp_rep.columns), + ) +) ### Verification of Constraints From 37cab9cc8dcc543ac3ebadda15c4d58af010aef8 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 1 Nov 2024 17:46:14 +0100 Subject: [PATCH 15/23] Add admonition mentioning from_simplex constructor --- examples/Mixtures/slot_based.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 0b338725b..fd6f93b70 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -32,6 +32,7 @@ # ```{admonition} Discrete vs. Continuous Modeling # :class: important +# # Here, we only use discrete parameters, although in principle the parameters # corresponding to amounts could also be modeled as continuous numbers. However, this # would imply that some of the constraints would have to act on both discrete and @@ -181,6 +182,17 @@ # With all building blocks in place, we can now assemble our discrete space and inspect # its configurations: +# ```{admonition} Simplex Construction +# :class: tip +# +# In this example, we use the +# {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_product` constructor in order +# to demonstrate the explicit creation all involved constraint objects. However, for +# creating mixture representations, the +# {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_simplex` constructor should +# generally be preferred, as it provides a more efficient path to the same result. +# ```` + space = SubspaceDiscrete.from_product(parameters=parameters, constraints=constraints) print( pretty_print_df( @@ -227,7 +239,7 @@ # ```{admonition} Theoretical Span # :class: info - +# # The number of possible `K`-solvent entries can be found by imagining the corresponding # [traditional mixture representation](/examples/Mixtures/traditional.md) and solving a # slightly more complex version of the ["stars and bars" From cf198d974e2340882484172ef6a97e4c81797edf Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 Nov 2024 16:15:20 +0100 Subject: [PATCH 16/23] Use realistic substances --- examples/Mixtures/slot_based.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index fd6f93b70..939f0fce9 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -68,9 +68,9 @@ dict_solvents = { "water": "O", - "C1": "C", - "C2": "CC", - "C3": "CCC", + "ethanol": "CCO", + "methanol": "CO", + "acetone": "CC(=O)C", } slot1_label = SubstanceParameter( name="Slot1_Label", data=dict_solvents, encoding="MORDRED" From 435713321829491ea50684017b693423ca15a291 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 Nov 2024 16:53:01 +0100 Subject: [PATCH 17/23] Improve text --- examples/Mixtures/slot_based.py | 18 +++++++++--------- examples/Mixtures/traditional.py | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 939f0fce9..83f549e46 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -1,4 +1,4 @@ -## Modelling a Mixture in Slot-Based Representation +## Modeling a Mixture in Slot-Based Representation ### Terminology @@ -135,14 +135,14 @@ # Next, we need to take care of permutation invariance. If our order of addition does # not matter, the result of interchanging any two slots does not alter the overall -# mixture, i.e. the mixture slots are are considered permutation-invariant. - -# A complication with permutation invariance arises from the fact that we have not only -# a label per slot, but also a numerical amount. If this amount is zero, then the label -# of the slot becomes meaningless, because adding zero of the corresponding substance -# does not change the mixture. In BayBE, we call this a "dependency", i.e. the slot -# labels depend on the slot amounts and are only relevant if the amount satisfies some -# condition (in this case "amount > 0"). +# mixture, i.e. the mixture slots are considered permutation-invariant. + +# A complication with permutation invariance arises from the fact that we do not only +# have a label per slot, but also a numerical amount. If this amount is zero, then the +# label of the slot becomes meaningless, because adding zero of the corresponding +# substance does not change the mixture. In BayBE, we call this a "dependency", i.e. +# the slot labels depend on the slot amounts and are only relevant if the amount +# satisfies some condition (in this case "amount > 0"). # The {class}`~baybe.constraints.discrete.DiscreteDependenciesConstraint` informs the # {class}`~baybe.constraints.discrete.DiscretePermutationInvarianceConstraint` about diff --git a/examples/Mixtures/traditional.py b/examples/Mixtures/traditional.py index 7aeb98564..8fe8c552b 100644 --- a/examples/Mixtures/traditional.py +++ b/examples/Mixtures/traditional.py @@ -1,6 +1,6 @@ -## Modelling a Mixture in Traditional Representation +## Modeling a Mixture in Traditional Representation -# When modelling mixtures, we are often faced with a large set of ingredients to choose +# When modeling mixtures, we are often faced with a large set of ingredients to choose # from. A common way to formalize this type of selection problem is to assign each # ingredient its own numerical parameter representing the amount of the ingredient in # the mixture. A sum constraint imposed on all parameters then ensures that the total From a3aa0b1731be9f2794a0e2f3a618da36d188b777 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 Nov 2024 16:55:32 +0100 Subject: [PATCH 18/23] Point out alternative --- examples/Mixtures/traditional.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Mixtures/traditional.py b/examples/Mixtures/traditional.py index 8fe8c552b..38b7b3f46 100644 --- a/examples/Mixtures/traditional.py +++ b/examples/Mixtures/traditional.py @@ -90,7 +90,7 @@ searchspace = SubspaceContinuous.from_product( parameters=[*p_g1_amounts, *p_g2_amounts, *p_g3_amounts], constraints=[c_total_sum, c_g2_min, c_g3_max], -).to_searchspace() +).to_searchspace() # alternatively use SearchSpace.from_product ### Verification of Constraints From dca173494611e4973217ad83e56ac3160399d2ae Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 Nov 2024 17:00:10 +0100 Subject: [PATCH 19/23] Add more explanation for N --- examples/Mixtures/slot_based.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 83f549e46..ca4b54a00 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -245,9 +245,9 @@ # slightly more complex version of the ["stars and bars" # problem](https://en.wikipedia.org/wiki/Stars_and_bars_(combinatorics)), where the # number of non-empty bins is fixed. That is, we need to ask how many possible ways -# exist to distribute `N` items (= increase from one percentage level to the next) -# across `M` bins (= number of available solvents) if exactly `K` bins are non-empty (= -# number of solvents allowed in the mixture). +# exist to distribute `N` items (= number of elemental steps for the amounts, in our +# case `RESOLUTION-1`) across `M` bins (= number of available solvents) if exactly +# `K` bins are non-empty (= number of solvents allowed in the mixture). # # There are `(M choose K)` ways to select the non-empty buckets. When distributing the # `N` items, one item needs to go to each of the `K` buckets for it to be non-empty. From 438c2ecc6a7c8b41f8091f0afd67a4d5d3d43afc Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 Nov 2024 17:06:43 +0100 Subject: [PATCH 20/23] Improve from_simplex mention --- examples/Mixtures/slot_based.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index ca4b54a00..8ebcf5ead 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -187,11 +187,12 @@ # # In this example, we use the # {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_product` constructor in order -# to demonstrate the explicit creation all involved constraint objects. However, for +# to demonstrate the explicit creation of all involved constraints. However, for # creating mixture representations, the # {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_simplex` constructor should -# generally be preferred, as it provides a more efficient path to the same result. -# ```` +# generally be used. It takes care of the overall sum constraint already during search +# space creation, providing a more efficient path to the same result. +# ``` space = SubspaceDiscrete.from_product(parameters=parameters, constraints=constraints) print( From 6f936398066e3ff2a45ed6db9061b52ff9c6a406 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 Nov 2024 20:10:42 +0100 Subject: [PATCH 21/23] Add simplex code example in admonition --- examples/Mixtures/slot_based.py | 34 +++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 8ebcf5ead..7dfff9021 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -182,9 +182,18 @@ # With all building blocks in place, we can now assemble our discrete space and inspect # its configurations: -# ```{admonition} Simplex Construction + +space = SubspaceDiscrete.from_product(parameters=parameters, constraints=constraints) +print( + pretty_print_df( + space.exp_rep, + max_rows=len(space.exp_rep), + max_columns=len(space.exp_rep.columns), + ) +) + +# ````{admonition} Simplex Construction # :class: tip -# # In this example, we use the # {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_product` constructor in order # to demonstrate the explicit creation of all involved constraints. However, for @@ -192,16 +201,21 @@ # {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_simplex` constructor should # generally be used. It takes care of the overall sum constraint already during search # space creation, providing a more efficient path to the same result. +# +# The alternative in our case would look like: +# ```python +# space = SubspaceDiscrete.from_simplex( +# max_sum=100.0, +# boundary_only=True, +# simplex_parameters=[slot1_amount, slot2_amount, slot3_amount], +# product_parameters=[slot1_label, slot2_label, slot3_label], +# constraints=[perm_inv_constraint, no_duplicates_constraint], +# ) # ``` +# Note that `from_simplex` explicitly ensures the sum constraint, hence we do not pass +# it to `constraints`. +# ```` -space = SubspaceDiscrete.from_product(parameters=parameters, constraints=constraints) -print( - pretty_print_df( - space.exp_rep, - max_rows=len(space.exp_rep), - max_columns=len(space.exp_rep.columns), - ) -) ### Verification of Constraints From abe88c28e5e00c5eea9a03b3a3cc5c0d3f98128c Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 Nov 2024 20:31:09 +0100 Subject: [PATCH 22/23] Improve the permutation check --- examples/Mixtures/slot_based.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 7dfff9021..512163fa4 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -44,6 +44,7 @@ import math import numpy as np +import pandas as pd from baybe.constraints import ( DiscreteDependenciesConstraint, @@ -223,7 +224,12 @@ amounts = space.exp_rep[["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"]] labels = space.exp_rep[["Slot1_Label", "Slot2_Label", "Slot3_Label"]] - +slots = space.exp_rep.apply( + lambda row: pd.Series( + [(row[f"Slot{k}_Label"], row[f"Slot{k}_Amount"]) for k in range(1, 4)] + ), + axis=1, +) # * All amounts sum to 100: @@ -241,11 +247,10 @@ # * There are no permutation-invariant configurations: -n_permute = labels.apply(frozenset, axis=1).to_frame().join(amounts).duplicated().sum() +n_permute = slots.apply(frozenset, axis=1).duplicated().sum() assert n_permute == 0 print("Number of permuted configurations: ", n_permute) - ### Verification of Span # Finally, we also assert if we have completely spanned the space of allowed From 61cadb7b99d4b1c6cda276a508317a0e59c7165a Mon Sep 17 00:00:00 2001 From: Martin Fitzner <17951239+Scienfitz@users.noreply.github.com> Date: Mon, 11 Nov 2024 11:39:45 +0100 Subject: [PATCH 23/23] Update text Co-authored-by: AdrianSosic --- examples/Mixtures/slot_based.py | 4 ++-- examples/Mixtures/traditional.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index 512163fa4..ee1cef9a7 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -213,8 +213,8 @@ # constraints=[perm_inv_constraint, no_duplicates_constraint], # ) # ``` -# Note that `from_simplex` explicitly ensures the sum constraint, hence we do not pass -# it to `constraints`. +# Note that {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_simplex` +# inherently ensures the sum constraint, hence we do not pass it to `constraints`. # ```` diff --git a/examples/Mixtures/traditional.py b/examples/Mixtures/traditional.py index 38b7b3f46..fbefc78f3 100644 --- a/examples/Mixtures/traditional.py +++ b/examples/Mixtures/traditional.py @@ -27,7 +27,7 @@ from baybe.constraints import ContinuousLinearConstraint from baybe.parameters import NumericalContinuousParameter from baybe.recommenders import RandomRecommender -from baybe.searchspace import SubspaceContinuous +from baybe.searchspace import SearchSpace ### Parameter Setup @@ -87,10 +87,10 @@ # Having both parameter and constraint definitions at hand, we can create our # search space: -searchspace = SubspaceContinuous.from_product( +searchspace = SearchSpace.from_product( parameters=[*p_g1_amounts, *p_g2_amounts, *p_g3_amounts], constraints=[c_total_sum, c_g2_min, c_g3_max], -).to_searchspace() # alternatively use SearchSpace.from_product +) ### Verification of Constraints