diff --git a/.lockfiles/py310-dev.lock b/.lockfiles/py310-dev.lock index 4731ed102..8aef1fe6d 100644 --- a/.lockfiles/py310-dev.lock +++ b/.lockfiles/py310-dev.lock @@ -683,7 +683,7 @@ python-json-logger==2.0.7 # via jupyter-events pytz==2024.1 # via pandas -pyupgrade==3.16.0 +pyupgrade==3.19.0 # via baybe (pyproject.toml) pywin32==306 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32' # via jupyter-core @@ -854,7 +854,7 @@ threadpoolctl==3.5.0 # via scikit-learn tinycss2==1.3.0 # via nbconvert -tokenize-rt==5.2.0 +tokenize-rt==6.1.0 # via pyupgrade toml==0.10.2 # via diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f717aa286..75d484e9e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: args: [--fix, --exit-non-zero-on-fix] - id: ruff-format - repo: https://github.com/asottile/pyupgrade - rev: v3.16.0 + rev: v3.19.0 hooks: - id: pyupgrade args: [--py310-plus] diff --git a/CHANGELOG.md b/CHANGELOG.md index a4aab98b5..2a22f2004 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - The new `allow_extra` flag is automatically set to `True` in `Objective.transform` when left unspecified - `get_transform_parameters` has been replaced with `get_transform_objects` +- Passing a dataframe via the `data` argument to `Target.transform` is no longer + possible. The data must now be passed as a series as first positional argument. ## [0.11.2] - 2024-10-11 ### Added diff --git a/baybe/objectives/desirability.py b/baybe/objectives/desirability.py index ebdad88d8..fe015a9e9 100644 --- a/baybe/objectives/desirability.py +++ b/baybe/objectives/desirability.py @@ -188,7 +188,7 @@ def transform( # Transform all targets individually for target in self.targets: - transformed[target.name] = target.transform(df[[target.name]]) + transformed[target.name] = target.transform(df[target.name]) # Scalarize the transformed targets into desirability values vals = scalarize(transformed.values, self.scalarizer, self._normalized_weights) diff --git a/baybe/objectives/single.py b/baybe/objectives/single.py index 705062dba..b089b918a 100644 --- a/baybe/objectives/single.py +++ b/baybe/objectives/single.py @@ -78,14 +78,15 @@ def transform( ) # <<<<<<<<<< Deprecation - # Even for a single target, it is convenient to use the existing machinery - # instead of re-implementing the validation logic - targets = get_transform_objects( + # Even for a single target, it is convenient to use the existing validation + # machinery instead of re-implementing it + get_transform_objects( df, [self._target], allow_missing=allow_missing, allow_extra=allow_extra ) - target_data = df[[t.name for t in targets]].copy() - return self._target.transform(target_data) + target_data = df[self._target.name].copy() + + return self._target.transform(target_data).to_frame() # Collect leftover original slotted classes processed by `attrs.define` diff --git a/baybe/parameters/base.py b/baybe/parameters/base.py index 7e80d3a82..e591a663a 100644 --- a/baybe/parameters/base.py +++ b/baybe/parameters/base.py @@ -124,13 +124,15 @@ def is_in_range(self, item: Any) -> bool: return item in self.values def transform(self, series: pd.Series, /) -> pd.DataFrame: - """Transform parameter values from experimental to computational representation. + """Transform parameter values to computational representation. Args: - series: The parameter values to be transformed. + series: The parameter values in experimental representation to be + transformed. Returns: - The transformed parameter values. + A series containing the transformed values. The series name matches + that of the input. """ if self.encoding: # replace each label with the corresponding encoding diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 51fc4726a..308d15999 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -314,7 +314,7 @@ def transform( # >>>>>>>>>> Deprecation if not ((df is None) ^ (data is None)): raise ValueError( - "Provide the dataframe to be transformed as argument to `df`." + "Provide the data to be transformed as first positional argument." ) if data is not None: diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index b307e3243..1711352ab 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -379,7 +379,7 @@ def transform( # >>>>>>>>>> Deprecation if not ((df is None) ^ (data is None)): raise ValueError( - "Provide the dataframe to be transformed as argument to `df`." + "Provide the data to be transformed as first positional argument." ) if data is not None: diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index 88f26fef9..1229a008e 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -725,7 +725,7 @@ def transform( # >>>>>>>>>> Deprecation if not ((df is None) ^ (data is None)): raise ValueError( - "Provide the dataframe to be transformed as argument to `df`." + "Provide the data to be transformed as first positional argument." ) if data is not None: diff --git a/baybe/targets/base.py b/baybe/targets/base.py index 6240bf426..ae55c7d86 100644 --- a/baybe/targets/base.py +++ b/baybe/targets/base.py @@ -38,17 +38,16 @@ def to_objective(self) -> SingleTargetObjective: return SingleTargetObjective(self) @abstractmethod - def transform(self, data: pd.DataFrame) -> pd.DataFrame: - """Transform data into computational representation. - - The transformation depends on the target mode, e.g. minimization, maximization, - matching, etc. + def transform(self, series: pd.Series, /) -> pd.Series: + """Transform target measurements to computational representation. Args: - data: The data to be transformed. + series: The target measurements in experimental representation to be + transformed. Returns: - A dataframe containing the transformed data. + A series containing the transformed measurements. The series name matches + that of the input. """ @abstractmethod diff --git a/baybe/targets/binary.py b/baybe/targets/binary.py index 060e32720..d7090bba6 100644 --- a/baybe/targets/binary.py +++ b/baybe/targets/binary.py @@ -1,6 +1,7 @@ """Binary targets.""" import gc +import warnings from typing import TypeAlias import numpy as np @@ -55,29 +56,46 @@ def _validate_values(self, _, value): ) @override - def transform(self, data: pd.DataFrame) -> pd.DataFrame: - # TODO: The method (signature) needs to be refactored, potentially when - # enabling multi-target settings. The current input type suggests that passing - # dataframes is allowed, but the code was designed for single targets and - # desirability objectives, where only one column is present. - assert data.shape[1] == 1 + def transform( + self, series: pd.Series | None = None, /, *, data: pd.DataFrame | None = None + ) -> pd.Series: + # >>>>>>>>>> Deprecation + if not ((series is None) ^ (data is None)): + raise ValueError( + "Provide the data to be transformed as first positional argument." + ) + + if data is not None: + assert data.shape[1] == 1 + series = data.iloc[:, 0] + warnings.warn( + "Providing a dataframe via the `data` argument is deprecated and " + "will be removed in a future version. Please pass your data " + "in form of a series as positional argument instead.", + DeprecationWarning, + ) + + # Mypy does not infer from the above that `series` must be a series here + assert isinstance(series, pd.Series) + # <<<<<<<<<< Deprecation # Validate target values - col = data.iloc[:, [0]] - invalid = col[~col.isin([self.success_value, self.failure_value]).values] + invalid = series[ + ~series.isin([self.success_value, self.failure_value]).to_numpy() + ] if len(invalid) > 0: raise InvalidTargetValueError( f"The following values entered for target '{self.name}' are not in the " f"set of accepted choice values " - f"{{self.success_value, self.failure_value}}: \n{invalid}" + f"{set((self.success_value, self.failure_value))}: {set(invalid)}" ) # Transform - success_idx = data.iloc[:, 0] == self.success_value - return pd.DataFrame( + success_idx = series == self.success_value + return pd.Series( np.where(success_idx, _SUCCESS_VALUE_COMP, _FAILURE_VALUE_COMP), - index=data.index, - columns=data.columns, + index=series.index, + name=series.name, ) @override diff --git a/baybe/targets/numerical.py b/baybe/targets/numerical.py index f4ba653cf..dc3fa84ff 100644 --- a/baybe/targets/numerical.py +++ b/baybe/targets/numerical.py @@ -132,12 +132,28 @@ def _is_transform_normalized(self) -> bool: return (self.bounds.is_bounded) and (self.transformation is not None) @override - def transform(self, data: pd.DataFrame) -> pd.DataFrame: - # TODO: The method (signature) needs to be refactored, potentially when - # enabling multi-target settings. The current input type suggests that passing - # dataframes is allowed, but the code was designed for single targets and - # desirability objectives, where only one column is present. - assert data.shape[1] == 1 + def transform( + self, series: pd.Series | None = None, /, *, data: pd.DataFrame | None = None + ) -> pd.Series: + # >>>>>>>>>> Deprecation + if not ((series is None) ^ (data is None)): + raise ValueError( + "Provide the data to be transformed as first positional argument." + ) + + if data is not None: + assert data.shape[1] == 1 + series = data.iloc[:, 0] + warnings.warn( + "Providing a dataframe via the `data` argument is deprecated and " + "will be removed in a future version. Please pass your data " + "in form of a series as positional argument instead.", + DeprecationWarning, + ) + + # Mypy does not infer from the above that `series` must be a series here + assert isinstance(series, pd.Series) + # <<<<<<<<<< Deprecation # When a transformation is specified, apply it if self.transformation is not None: @@ -148,11 +164,13 @@ def transform(self, data: pd.DataFrame) -> pd.DataFrame: self.mode, cast(TargetTransformation, self.transformation), ) - transformed = pd.DataFrame( - func(data, *self.bounds.to_tuple()), index=data.index + transformed = pd.Series( + func(series, *self.bounds.to_tuple()), + index=series.index, + name=series.name, ) else: - transformed = data.copy() + transformed = series.copy() return transformed diff --git a/pyproject.toml b/pyproject.toml index 846c94cf8..435799c17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -124,7 +124,7 @@ lint = [ "flake8==7.1.0", # see DEV TOOLS NOTE "pre-commit==3.7.1", # see DEV TOOLS NOTE "pydoclint==0.5.5", # see DEV TOOLS NOTE - "pyupgrade==3.16.0", # see DEV TOOLS NOTE + "pyupgrade==3.19.0", # see DEV TOOLS NOTE "ruff==0.5.2", # see DEV TOOLS NOTE ] diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py index 01e9352e3..0b818f5b6 100644 --- a/tests/test_deprecations.py +++ b/tests/test_deprecations.py @@ -24,6 +24,7 @@ ) from baybe.searchspace.continuous import SubspaceContinuous from baybe.searchspace.validation import get_transform_parameters +from baybe.targets.binary import BinaryTarget from baybe.targets.numerical import NumericalTarget @@ -224,3 +225,15 @@ def test_deprecated_get_transform_parameters(): DeprecationWarning, match="'get_transform_parameters' has been deprecated" ): get_transform_parameters(pd.DataFrame(), []) + + +def test_target_transform_interface(): + """Using the deprecated transform interface raises a warning.""" + numerical = NumericalTarget("num", "MAX") + binary = BinaryTarget("bin") + + # Passing dataframe via `data` + with pytest.warns(DeprecationWarning): + numerical.transform(data=pd.DataFrame(columns=["num"])) + with pytest.warns(DeprecationWarning): + binary.transform(data=pd.DataFrame(columns=["bin"]))