Skip to content

Commit

Permalink
Merge branch 'emdgroup:main' into feat/benchmark_capabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
fabianliebig authored Oct 22, 2024
2 parents 5320fd9 + de4c2e4 commit 3af57a5
Show file tree
Hide file tree
Showing 14 changed files with 98 additions and 45 deletions.
4 changes: 2 additions & 2 deletions .lockfiles/py310-dev.lock
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ python-json-logger==2.0.7
# via jupyter-events
pytz==2024.1
# via pandas
pyupgrade==3.16.0
pyupgrade==3.19.0
# via baybe (pyproject.toml)
pywin32==306 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32'
# via jupyter-core
Expand Down Expand Up @@ -854,7 +854,7 @@ threadpoolctl==3.5.0
# via scikit-learn
tinycss2==1.3.0
# via nbconvert
tokenize-rt==5.2.0
tokenize-rt==6.1.0
# via pyupgrade
toml==0.10.2
# via
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format
- repo: https://github.com/asottile/pyupgrade
rev: v3.16.0
rev: v3.19.0
hooks:
- id: pyupgrade
args: [--py310-plus]
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- The new `allow_extra` flag is automatically set to `True` in `Objective.transform`
when left unspecified
- `get_transform_parameters` has been replaced with `get_transform_objects`
- Passing a dataframe via the `data` argument to `Target.transform` is no longer
possible. The data must now be passed as a series as first positional argument.

## [0.11.2] - 2024-10-11
### Added
Expand Down
2 changes: 1 addition & 1 deletion baybe/objectives/desirability.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def transform(

# Transform all targets individually
for target in self.targets:
transformed[target.name] = target.transform(df[[target.name]])
transformed[target.name] = target.transform(df[target.name])

# Scalarize the transformed targets into desirability values
vals = scalarize(transformed.values, self.scalarizer, self._normalized_weights)
Expand Down
11 changes: 6 additions & 5 deletions baybe/objectives/single.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,15 @@ def transform(
)
# <<<<<<<<<< Deprecation

# Even for a single target, it is convenient to use the existing machinery
# instead of re-implementing the validation logic
targets = get_transform_objects(
# Even for a single target, it is convenient to use the existing validation
# machinery instead of re-implementing it
get_transform_objects(
df, [self._target], allow_missing=allow_missing, allow_extra=allow_extra
)
target_data = df[[t.name for t in targets]].copy()

return self._target.transform(target_data)
target_data = df[self._target.name].copy()

return self._target.transform(target_data).to_frame()


# Collect leftover original slotted classes processed by `attrs.define`
Expand Down
8 changes: 5 additions & 3 deletions baybe/parameters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,15 @@ def is_in_range(self, item: Any) -> bool:
return item in self.values

def transform(self, series: pd.Series, /) -> pd.DataFrame:
"""Transform parameter values from experimental to computational representation.
"""Transform parameter values to computational representation.
Args:
series: The parameter values to be transformed.
series: The parameter values in experimental representation to be
transformed.
Returns:
The transformed parameter values.
A series containing the transformed values. The series name matches
that of the input.
"""
if self.encoding:
# replace each label with the corresponding encoding
Expand Down
2 changes: 1 addition & 1 deletion baybe/searchspace/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def transform(
# >>>>>>>>>> Deprecation
if not ((df is None) ^ (data is None)):
raise ValueError(
"Provide the dataframe to be transformed as argument to `df`."
"Provide the data to be transformed as first positional argument."
)

if data is not None:
Expand Down
2 changes: 1 addition & 1 deletion baybe/searchspace/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def transform(
# >>>>>>>>>> Deprecation
if not ((df is None) ^ (data is None)):
raise ValueError(
"Provide the dataframe to be transformed as argument to `df`."
"Provide the data to be transformed as first positional argument."
)

if data is not None:
Expand Down
2 changes: 1 addition & 1 deletion baybe/searchspace/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ def transform(
# >>>>>>>>>> Deprecation
if not ((df is None) ^ (data is None)):
raise ValueError(
"Provide the dataframe to be transformed as argument to `df`."
"Provide the data to be transformed as first positional argument."
)

if data is not None:
Expand Down
13 changes: 6 additions & 7 deletions baybe/targets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,16 @@ def to_objective(self) -> SingleTargetObjective:
return SingleTargetObjective(self)

@abstractmethod
def transform(self, data: pd.DataFrame) -> pd.DataFrame:
"""Transform data into computational representation.
The transformation depends on the target mode, e.g. minimization, maximization,
matching, etc.
def transform(self, series: pd.Series, /) -> pd.Series:
"""Transform target measurements to computational representation.
Args:
data: The data to be transformed.
series: The target measurements in experimental representation to be
transformed.
Returns:
A dataframe containing the transformed data.
A series containing the transformed measurements. The series name matches
that of the input.
"""

@abstractmethod
Expand Down
44 changes: 31 additions & 13 deletions baybe/targets/binary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Binary targets."""

import gc
import warnings
from typing import TypeAlias

import numpy as np
Expand Down Expand Up @@ -55,29 +56,46 @@ def _validate_values(self, _, value):
)

@override
def transform(self, data: pd.DataFrame) -> pd.DataFrame:
# TODO: The method (signature) needs to be refactored, potentially when
# enabling multi-target settings. The current input type suggests that passing
# dataframes is allowed, but the code was designed for single targets and
# desirability objectives, where only one column is present.
assert data.shape[1] == 1
def transform(
self, series: pd.Series | None = None, /, *, data: pd.DataFrame | None = None
) -> pd.Series:
# >>>>>>>>>> Deprecation
if not ((series is None) ^ (data is None)):
raise ValueError(
"Provide the data to be transformed as first positional argument."
)

if data is not None:
assert data.shape[1] == 1
series = data.iloc[:, 0]
warnings.warn(
"Providing a dataframe via the `data` argument is deprecated and "
"will be removed in a future version. Please pass your data "
"in form of a series as positional argument instead.",
DeprecationWarning,
)

# Mypy does not infer from the above that `series` must be a series here
assert isinstance(series, pd.Series)
# <<<<<<<<<< Deprecation

# Validate target values
col = data.iloc[:, [0]]
invalid = col[~col.isin([self.success_value, self.failure_value]).values]
invalid = series[
~series.isin([self.success_value, self.failure_value]).to_numpy()
]
if len(invalid) > 0:
raise InvalidTargetValueError(
f"The following values entered for target '{self.name}' are not in the "
f"set of accepted choice values "
f"{{self.success_value, self.failure_value}}: \n{invalid}"
f"{set((self.success_value, self.failure_value))}: {set(invalid)}"
)

# Transform
success_idx = data.iloc[:, 0] == self.success_value
return pd.DataFrame(
success_idx = series == self.success_value
return pd.Series(
np.where(success_idx, _SUCCESS_VALUE_COMP, _FAILURE_VALUE_COMP),
index=data.index,
columns=data.columns,
index=series.index,
name=series.name,
)

@override
Expand Down
36 changes: 27 additions & 9 deletions baybe/targets/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,28 @@ def _is_transform_normalized(self) -> bool:
return (self.bounds.is_bounded) and (self.transformation is not None)

@override
def transform(self, data: pd.DataFrame) -> pd.DataFrame:
# TODO: The method (signature) needs to be refactored, potentially when
# enabling multi-target settings. The current input type suggests that passing
# dataframes is allowed, but the code was designed for single targets and
# desirability objectives, where only one column is present.
assert data.shape[1] == 1
def transform(
self, series: pd.Series | None = None, /, *, data: pd.DataFrame | None = None
) -> pd.Series:
# >>>>>>>>>> Deprecation
if not ((series is None) ^ (data is None)):
raise ValueError(
"Provide the data to be transformed as first positional argument."
)

if data is not None:
assert data.shape[1] == 1
series = data.iloc[:, 0]
warnings.warn(
"Providing a dataframe via the `data` argument is deprecated and "
"will be removed in a future version. Please pass your data "
"in form of a series as positional argument instead.",
DeprecationWarning,
)

# Mypy does not infer from the above that `series` must be a series here
assert isinstance(series, pd.Series)
# <<<<<<<<<< Deprecation

# When a transformation is specified, apply it
if self.transformation is not None:
Expand All @@ -148,11 +164,13 @@ def transform(self, data: pd.DataFrame) -> pd.DataFrame:
self.mode,
cast(TargetTransformation, self.transformation),
)
transformed = pd.DataFrame(
func(data, *self.bounds.to_tuple()), index=data.index
transformed = pd.Series(
func(series, *self.bounds.to_tuple()),
index=series.index,
name=series.name,
)
else:
transformed = data.copy()
transformed = series.copy()

return transformed

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ lint = [
"flake8==7.1.0", # see DEV TOOLS NOTE
"pre-commit==3.7.1", # see DEV TOOLS NOTE
"pydoclint==0.5.5", # see DEV TOOLS NOTE
"pyupgrade==3.16.0", # see DEV TOOLS NOTE
"pyupgrade==3.19.0", # see DEV TOOLS NOTE
"ruff==0.5.2", # see DEV TOOLS NOTE
]

Expand Down
13 changes: 13 additions & 0 deletions tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
)
from baybe.searchspace.continuous import SubspaceContinuous
from baybe.searchspace.validation import get_transform_parameters
from baybe.targets.binary import BinaryTarget
from baybe.targets.numerical import NumericalTarget


Expand Down Expand Up @@ -224,3 +225,15 @@ def test_deprecated_get_transform_parameters():
DeprecationWarning, match="'get_transform_parameters' has been deprecated"
):
get_transform_parameters(pd.DataFrame(), [])


def test_target_transform_interface():
"""Using the deprecated transform interface raises a warning."""
numerical = NumericalTarget("num", "MAX")
binary = BinaryTarget("bin")

# Passing dataframe via `data`
with pytest.warns(DeprecationWarning):
numerical.transform(data=pd.DataFrame(columns=["num"]))
with pytest.warns(DeprecationWarning):
binary.transform(data=pd.DataFrame(columns=["bin"]))

0 comments on commit 3af57a5

Please sign in to comment.