From 278d2ee28e9366631537c78de5a4778ae6c76b0d Mon Sep 17 00:00:00 2001 From: Niclas Rieger <45175997+nicrie@users.noreply.github.com> Date: Sun, 1 Sep 2024 15:23:43 +0200 Subject: [PATCH] feat: change default value of parameter `normalized` (#207) --- xeofs/models/_base_model_cross_set.py | 2 +- xeofs/models/_base_model_single_set.py | 30 +++++++++++++++++--------- xeofs/models/cpcca_rotator.py | 14 ++++++------ xeofs/models/eof.py | 22 +++++++++++++------ xeofs/models/sparse_pca.py | 4 ++-- 5 files changed, 45 insertions(+), 27 deletions(-) diff --git a/xeofs/models/_base_model_cross_set.py b/xeofs/models/_base_model_cross_set.py index 87c8f51..1939435 100644 --- a/xeofs/models/_base_model_cross_set.py +++ b/xeofs/models/_base_model_cross_set.py @@ -317,7 +317,7 @@ def transform( X, Y: DataObject | None Data to be transformed. At least one of them must be provided. normalized: bool, default=False - Whether to return normalized scores. + Whether to return L2 normalized scores. Returns ------- diff --git a/xeofs/models/_base_model_single_set.py b/xeofs/models/_base_model_single_set.py index 1964323..3530e05 100644 --- a/xeofs/models/_base_model_single_set.py +++ b/xeofs/models/_base_model_single_set.py @@ -178,14 +178,14 @@ def _fit_algorithm(self, data: DataArray) -> Self: """ raise NotImplementedError - def transform(self, data: DataObject, normalized=True) -> DataArray: + def transform(self, data: DataObject, normalized=False) -> DataArray: """Project data onto the components. Parameters ---------- data: DataObject Data to be transformed. - normalized: bool, default=True + normalized: bool, default=False Whether to normalize the scores by the L2 norm. Returns @@ -250,7 +250,7 @@ def fit_transform( return self.fit(data, dim, weights).transform(data, **kwargs) def inverse_transform( - self, scores: DataArray, normalized: bool = True + self, scores: DataArray, normalized: bool = False ) -> DataObject: """Reconstruct the original data from transformed data. @@ -260,7 +260,7 @@ def inverse_transform( Transformed data to be reconstructed. This could be a subset of the `scores` data of a fitted model, or unseen data. Must have a 'mode' dimension. - normalized: bool, default=True + normalized: bool, default=False Whether the scores data have been normalized by the L2 norm. Returns @@ -305,12 +305,23 @@ def _inverse_transform_algorithm(self, scores: DataArray) -> DataArray: """ raise NotImplementedError - def components(self) -> DataObject: - """Get the components.""" + def components(self, normalized: bool = True) -> DataObject: + """Get the components. + + Parameters + ---------- + normalized: bool, default=True + Whether to normalize the components by the L2 norm. + + """ components = self.data["components"] + if not normalized: + name = components.name + components = components * self.data["norms"] + components.name = name return self.preprocessor.inverse_transform_components(components) - def scores(self, normalized=True) -> DataArray: + def scores(self, normalized: bool = False) -> DataArray: """Get the scores. Parameters @@ -320,8 +331,7 @@ def scores(self, normalized=True) -> DataArray: """ scores = self.data["scores"].copy() if normalized: - attrs = scores.attrs.copy() + name = scores.name scores = scores / self.data["norms"] - scores.attrs.update(attrs) - scores.name = "scores" + scores.name = name return self.preprocessor.inverse_transform_scores(scores) diff --git a/xeofs/models/cpcca_rotator.py b/xeofs/models/cpcca_rotator.py index 9113b50..b6853b8 100644 --- a/xeofs/models/cpcca_rotator.py +++ b/xeofs/models/cpcca_rotator.py @@ -310,19 +310,19 @@ def transform( Y: DataObject | None = None, normalized: bool = False, ) -> DataArray | List[DataArray]: - """Project new "unseen" data onto the rotated singular vectors. + """Transform the data. Parameters ---------- - X : DataObject - Data to be projected onto the rotated singular vectors of the first dataset. - Y : DataObject - Data to be projected onto the rotated singular vectors of the second dataset. + X, Y: DataObject | None + Data to be transformed. At least one of them must be provided. + normalized: bool, default=False + Whether to return L2 normalized scores. Returns ------- - DataArray | List[DataArray] - Projected data. + Sequence[DataArray] | DataArray + Transformed data. """ # raise error if no data is provided diff --git a/xeofs/models/eof.py b/xeofs/models/eof.py index a011d96..4b234ab 100644 --- a/xeofs/models/eof.py +++ b/xeofs/models/eof.py @@ -156,11 +156,10 @@ def _inverse_transform_algorithm(self, scores: DataArray) -> DataArray: return reconstructed_data - def components(self) -> DataObject: - """Return the (EOF) components. + def components(self, normalized: bool = True) -> DataObject: + """Return the components. - The components in EOF anaylsis are the eigenvectors of the covariance/correlation matrix. - Other names include the principal components or EOFs. + The components are also refered to as eigenvectors, EOFs or loadings depending on the context. Returns ------- @@ -168,9 +167,9 @@ def components(self) -> DataObject: Components of the fitted model. """ - return super().components() + return super().components(normalized=normalized) - def scores(self, normalized: bool = True) -> DataArray: + def scores(self, normalized: bool = False) -> DataArray: """Return the (PC) scores. The scores in EOF anaylsis are the projection of the data matrix onto the @@ -341,7 +340,7 @@ def _fit_algorithm(self, X: DataArray) -> Self: return super()._fit_algorithm(X) - def components_amplitude(self) -> DataObject: + def components_amplitude(self, normalized=True) -> DataObject: """Return the amplitude of the (EOF) components. The amplitude of the components are defined as @@ -352,6 +351,11 @@ def components_amplitude(self) -> DataObject: where :math:`C_{ij}` is the :math:`i`-th entry of the :math:`j`-th component and :math:`|\\cdot|` denotes the absolute value. + Parameters + ---------- + normalized : bool, default=True + Whether to normalize the components by the singular values + Returns ------- components_amplitude: DataArray | Dataset | List[DataArray] @@ -359,6 +363,10 @@ def components_amplitude(self) -> DataObject: """ amplitudes = abs(self.data["components"]) + + if not normalized: + amplitudes = amplitudes * self.data["norms"] + amplitudes.name = "components_amplitude" return self.preprocessor.inverse_transform_components(amplitudes) diff --git a/xeofs/models/sparse_pca.py b/xeofs/models/sparse_pca.py index d5aaebc..4a5c473 100644 --- a/xeofs/models/sparse_pca.py +++ b/xeofs/models/sparse_pca.py @@ -297,7 +297,7 @@ def components(self) -> DataObject: """ return super().components() - def scores(self, normalized: bool = True) -> DataArray: + def scores(self, normalized: bool = False) -> DataArray: """Return the component scores. The component scores :math:`U` are defined as the projection of the fitted @@ -309,7 +309,7 @@ def scores(self, normalized: bool = True) -> DataArray: Parameters ---------- - normalized : bool, default=True + normalized : bool, default=False Whether to normalize the scores by the L2 norm. Returns