From a77fe980e66c0e9c731120f9cae9b5dd870d1c3c Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 3 Dec 2020 17:41:48 +0100 Subject: [PATCH 001/417] First version of hierarchical clustering. --- skfda/ml/clustering/_hierarchical.py | 132 +++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 skfda/ml/clustering/_hierarchical.py diff --git a/skfda/ml/clustering/_hierarchical.py b/skfda/ml/clustering/_hierarchical.py new file mode 100644 index 000000000..ca01da4d2 --- /dev/null +++ b/skfda/ml/clustering/_hierarchical.py @@ -0,0 +1,132 @@ +import sklearn.cluster +from sklearn.base import BaseEstimator, ClusterMixin + + +class AgglomerativeClustering(ClusterMixin, BaseEstimator): + """ + Agglomerative Clustering + Recursively merges the pair of clusters that minimally increases + a given linkage distance. + Read more in the :ref:`User Guide `. + Parameters + ---------- + n_clusters : int or None, default=2 + The number of clusters to find. It must be ``None`` if + ``distance_threshold`` is not ``None``. + metric : str or callable, default='euclidean' + Metric used to compute the linkage. Can be "euclidean", "l1", "l2", + "manhattan", "cosine", or "precomputed". + If linkage is "ward", only "euclidean" is accepted. + If "precomputed", a distance matrix (instead of a similarity matrix) + is needed as input for the fit method. + memory : str or object with the joblib.Memory interface, default=None + Used to cache the output of the computation of the tree. + By default, no caching is done. If a string is given, it is the + path to the caching directory. + connectivity : array-like or callable, default=None + Connectivity matrix. Defines for each sample the neighboring + samples following a given structure of the data. + This can be a connectivity matrix itself or a callable that transforms + the data into a connectivity matrix, such as derived from + kneighbors_graph. Default is None, i.e, the + hierarchical clustering algorithm is unstructured. + compute_full_tree : 'auto' or bool, default='auto' + Stop early the construction of the tree at n_clusters. This is useful + to decrease computation time if the number of clusters is not small + compared to the number of samples. This option is useful only when + specifying a connectivity matrix. Note also that when varying the + number of clusters and using caching, it may be advantageous to compute + the full tree. It must be ``True`` if ``distance_threshold`` is not + ``None``. By default `compute_full_tree` is "auto", which is equivalent + to `True` when `distance_threshold` is not `None` or that `n_clusters` + is inferior to the maximum between 100 or `0.02 * n_samples`. + Otherwise, "auto" is equivalent to `False`. + linkage : {"ward", "complete", "average", "single"}, default="ward" + Which linkage criterion to use. The linkage criterion determines which + distance to use between sets of observation. The algorithm will merge + the pairs of cluster that minimize this criterion. + - ward minimizes the variance of the clusters being merged. + - average uses the average of the distances of each observation of + the two sets. + - complete or maximum linkage uses the maximum distances between + all observations of the two sets. + - single uses the minimum of the distances between all observations + of the two sets. + .. versionadded:: 0.20 + Added the 'single' option + distance_threshold : float, default=None + The linkage distance threshold above which, clusters will not be + merged. If not ``None``, ``n_clusters`` must be ``None`` and + ``compute_full_tree`` must be ``True``. + .. versionadded:: 0.21 + Attributes + ---------- + n_clusters_ : int + The number of clusters found by the algorithm. If + ``distance_threshold=None``, it will be equal to the given + ``n_clusters``. + labels_ : ndarray of shape (n_samples) + cluster labels for each point + n_leaves_ : int + Number of leaves in the hierarchical tree. + n_connected_components_ : int + The estimated number of connected components in the graph. + .. versionadded:: 0.21 + ``n_connected_components_`` was added to replace ``n_components_``. + children_ : array-like of shape (n_samples-1, 2) + The children of each non-leaf node. Values less than `n_samples` + correspond to leaves of the tree which are the original samples. + A node `i` greater than or equal to `n_samples` is a non-leaf + node and has children `children_[i - n_samples]`. Alternatively + at the i-th iteration, children[i][0] and children[i][1] + are merged to form node `n_samples + i` + Examples + -------- + >>> from sklearn.cluster import AgglomerativeClustering + >>> import numpy as np + >>> X = np.array([[1, 2], [1, 4], [1, 0], + ... [4, 2], [4, 4], [4, 0]]) + >>> clustering = AgglomerativeClustering().fit(X) + >>> clustering + AgglomerativeClustering() + >>> clustering.labels_ + array([1, 1, 1, 0, 0, 0]) + """ + + def __init__(self, n_clusters=2, *, metric="euclidean", + memory=None, + connectivity=None, compute_full_tree='auto', + linkage='ward', distance_threshold=None): + self.n_clusters = n_clusters + self.metric = metric + self.memory = memory + self.connectivity = connectivity + self.compute_full_tree = compute_full_tree + self.linkage = linkage + self.distance_threshold = distance_threshold + + def _init_estimator(self): + self._estimator = sklearn.cluster.AgglomerativeClustering( + n_clusters=self.n_clusters, + affinity="precomputed", + memory=self.memory, + connectivity=self.connectivity, + compute_full_tree=self.compute_full_tree, + linkage=self.linkage, + distance_threshold=self.distance_threshold, + ) + + def fit(self, X, y=None): + self._init_estimator() + + # TODO: Compute precomputed + + return self._estimator.fit(X, y) + + def fit_predict(self, X, y=None): + + self._init_estimator() + + # TODO: Compute precomputed + + return self._estimator.fit_predict(X, y) From 4ffa9326e72e23475a2eb3e14b74aa83cf808019 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Mon, 7 Dec 2020 14:55:44 +0100 Subject: [PATCH 002/417] DDTransform --- docs/modules/ml/classification.rst | 3 + setup.cfg | 23 +- skfda/ml/classification/__init__.py | 2 +- .../classification/_centroid_classifiers.py | 14 +- skfda/ml/classification/_depth_classifiers.py | 139 +++++++++++- .../classification/_neighbors_classifiers.py | 210 +++++++++++------- 6 files changed, 280 insertions(+), 111 deletions(-) diff --git a/docs/modules/ml/classification.rst b/docs/modules/ml/classification.rst index e4c2d0a77..a82dcae65 100644 --- a/docs/modules/ml/classification.rst +++ b/docs/modules/ml/classification.rst @@ -22,3 +22,6 @@ it is explained the basic usage of these estimators. skfda.ml.classification.KNeighborsClassifier skfda.ml.classification.RadiusNeighborsClassifier skfda.ml.classification.NearestCentroid + skfda.ml.classification.DTMClassifier + skfda.ml.classification.MaximumDepthClassifier + skfda.ml.classification.DDTransform diff --git a/setup.cfg b/setup.cfg index 54ab6552e..86790b45d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,8 +12,8 @@ ignore = D105, # No docstrings in __init__ D107, - # Uppercase arguments like X are common in scikit-learn - N803, + # Uppercase arguments and variables like X are common in scikit-learn + N803, N806, # There are no bad quotes Q000, # Google Python style is not RST until after processed by Napoleon @@ -38,7 +38,7 @@ ignore = WPS436, # Our private objects are fine to import WPS450 - + per-file-ignores = __init__.py: # Unused modules are allowed in `__init__.py`, to reduce imports @@ -48,36 +48,43 @@ per-file-ignores = # Tests benefit from magic numbers test_*.py: WPS432 - + rst-directives = # These are sorted alphabetically - but that does not matter autosummary,data,currentmodule,deprecated, glossary,moduleauthor,plot,testcode, versionadded,versionchanged, - + rst-roles = attr,class,func,meth,mod,obj,ref,term, # Needs to be tuned +max-arguments = 10 +max-base-classes = 5 +max-expressions = 15 max-line-complexity = 25 -max-methods = 30 max-local-variables = 15 -max-expressions = 15 +max-methods = 30 max-module-expressions = 15 max-module-members = 10 max-string-usages = 10 + ignore-decorators = property strictness = long +# Beautify output and make it more informative +format = wemake +show-source = true + [coverage:run] omit = # Omit reporting for dataset module */datasets/* # Omit reporting for __init__.py files */__init__.py - + [isort] multi_line_output = 3 include_trailing_comma = true diff --git a/skfda/ml/classification/__init__.py b/skfda/ml/classification/__init__.py index da2bca618..76b445f27 100644 --- a/skfda/ml/classification/__init__.py +++ b/skfda/ml/classification/__init__.py @@ -1,6 +1,6 @@ """Classification.""" from ._centroid_classifiers import DTMClassifier, NearestCentroid -from ._depth_classifiers import MaximumDepthClassifier +from ._depth_classifiers import DDTransform, MaximumDepthClassifier from ._neighbors_classifiers import ( KNeighborsClassifier, RadiusNeighborsClassifier, diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index 8b36fc001..79df49ef4 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -69,9 +69,9 @@ def fit(self, X, y): Args: X (:class:`FDataGrid`, array_matrix): Training data. FDataGrid with the training data or array matrix with shape - [n_samples, n_samples] if metric='precomputed'. + (n_samples, n_samples) if metric='precomputed'. y (array-like or sparse matrix): Target values of - shape = [n_samples] or [n_samples, n_outputs]. + shape = (n_samples) or (n_samples, n_outputs). Returns: self (object) @@ -94,8 +94,8 @@ def predict(self, X): X (:class:`FDataGrid`): FDataGrid with the test samples. Returns: - y (np.array): array of shape [n_samples] or - [n_samples, n_outputs] with class labels for each data sample. + y (np.array): array of shape (n_samples) or + (n_samples, n_outputs) with class labels for each data sample. """ sklearn_check_is_fitted(self) @@ -184,7 +184,7 @@ def fit(self, X, y): Args: X (:class:`FDataGrid`): FDataGrid with the training data. - y (array-like): Target values of shape = [n_samples]. + y (array-like): Target values of shape = (n_samples). Returns: self (object) @@ -208,7 +208,7 @@ def predict(self, X): X (:class:`FDataGrid`): FDataGrid with the test samples. Returns: - y (np.array): array of shape [n_samples] or - [n_samples, n_outputs] with class labels for each data sample. + y (np.array): array of shape (n_samples) or + (n_samples, n_outputs) with class labels for each data sample. """ return self._clf.predict(X) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 37aa27f42..f3248887b 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -1,11 +1,18 @@ """Depth-based models for supervised classification.""" +from typing import List + import numpy as np -from sklearn.base import BaseEstimator, ClassifierMixin, clone +from sklearn.base import ( + BaseEstimator, + ClassifierMixin, + TransformerMixin, + clone, +) from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted from ..._utils import _classifier_get_classes -from ...exploratory.depth import Depth, ModifiedBandDepth +from ...exploratory.depth import Depth, IntegratedDepth, ModifiedBandDepth class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): @@ -41,7 +48,7 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): >>> clf.predict(X_test) # Predict labels for test samples array([1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1]) + 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1]) Finally, we calculate the mean accuracy for the test data @@ -49,6 +56,7 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): 0.875 See also: + :class:`~skfda.ml.classification.DDClassifier` :class:`~skfda.ml.classification.DTMClassifier` References: @@ -57,8 +65,6 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): """ def __init__(self, depth_method: Depth = None): - self.depth_method = depth_method - if depth_method is None: self.depth_method = ModifiedBandDepth() else: @@ -69,11 +75,10 @@ def fit(self, X, y): Args: X (:class:`FDataGrid`): FDataGrid with the training data. - y (array-like): Target values of shape = [n_samples]. + y (array-like): Target values of shape = (n_samples). Returns: self (object) - """ classes_, y_ind = _classifier_get_classes(y) @@ -92,9 +97,8 @@ def predict(self, X): X (:class:`FDataGrid`): FDataGrid with the test samples. Returns: - y (np.array): array of shape [n_samples] with class labels + y (np.array): array of shape (n_samples) with class labels for each data sample. - """ sklearn_check_is_fitted(self) @@ -104,3 +108,120 @@ def predict(self, X): ] return self.classes_[np.argmax(depths, axis=0)] + + +class DDTransform(BaseEstimator, TransformerMixin): + r"""Depth-versus-depth (DD) transformer for functional data. + + This transformer takes a list of k depths and performs the following map: + + .. math:: + \mathcal{X} &\rightarrow \mathbb{R}^G \\ + x &\rightarrow \textbf{d} = (D_1^1(x),...,D_g^k(x)) + + Where :math:`D_i^j(x)` is the depth of the point :math:`x` with respect to + the data in the :math:`i`-th group using the :math:`j`-th depth of the + provided list. + + Note that :math:`\mathcal{X}` is possibly multivariate, that is, + :math:`\mathcal{X} = \mathcal{X}_1 \times ... \times \mathcal{X}_p`. + + Parameters: + depth_methods (default + :class:`ModifiedBandDepth `): + List of depth classes to use when calculating the depth of a test + sample in a class. See the documentation of the depths module + for a list of available depths. By default it is the list + containing ModifiedBandDepth. + + Examples: + Firstly, we will import and split the Berkeley Growth Study dataset + + >>> from skfda.datasets import fetch_growth + >>> from sklearn.model_selection import train_test_split + >>> dataset = fetch_growth() + >>> fd = dataset['data'] + >>> y = dataset['target'] + >>> X_train, X_test, y_train, y_test = train_test_split( + ... fd, y, test_size=0.25, stratify=y, random_state=0) + + >>> from skfda.ml.classification import DDTransform + >>> from sklearn.pipeline import make_pipeline + >>> from sklearn.neighbors import KNeighborsClassifier + + We classify by first transforming our data using the defined map + and then using KNN + + >>> pipe = make_pipeline(DDTransform(), KNeighborsClassifier()) + >>> pipe.fit(X_train, y_train) + Pipeline(steps=[('ddtransform', + DDTransform(depth_methods=[ModifiedBandDepth(), + IntegratedDepth()])), + ('kneighborsclassifier', KNeighborsClassifier())]) + + We can predict the class of new samples + + >>> pipe.predict(X_test) + array([1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, + 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1]) + + Finally, we calculate the mean accuracy for the test data + + >>> pipe.score(X_test, y_test) + 0.875 + + See also: + :class:`~skfda.ml.classification.DTMClassifier` + :class:`~skfda.ml.classification.MaximumDepthClassifier` + + References: + Li, J., Cuesta-Albertos, J. A., and Liu, R. Y. (2012). DD-classifier: + Nonparametric classification procedure based on DD-plot. Journal of + the American Statistical Association, 107(498):737-753. + + Cuesta-Albertos, J.A., Febrero-Bande, M. and Oviedo de la Fuente, M. + (2017) The DDG-classifier in the functional setting. TEST, 26. 119-142. + """ + + def __init__(self, depth_methods: List[Depth] = None): # FIXME + if depth_methods is None: + self.depth_methods = [ModifiedBandDepth(), IntegratedDepth()] + else: + self.depth_methods = depth_methods + + def fit(self, X, y): # FIXME + """Fit the model using X as training data and y as target values. + + Args: + X (:class:`FDataGrid`): FDataGrid with the training data. + y (array-like): Target values of shape = (n_samples). + + Returns: + self (object) + """ + classes_, y_ind = _classifier_get_classes(y) + + self.classes_ = classes_ + self.distributions_ = [ + clone(depth_method).fit(X[y_ind == cur_class]) + for cur_class in range(self.classes_.size) + for depth_method in self.depth_methods + ] + + return self + + def transform(self, X): # FIXME + """Transform the provided data using the defined map. + + Args: + X (:class:`FDataGrid`): FDataGrid with the test samples. + + Returns: + X_new (array-like): array of shape (n_samples, G). FIXME + """ + sklearn_check_is_fitted(self) + + return np.transpose([ + distribution.predict(X) + for distribution in self.distributions_ + ]) diff --git a/skfda/ml/classification/_neighbors_classifiers.py b/skfda/ml/classification/_neighbors_classifiers.py index 743d269ee..e2348b402 100644 --- a/skfda/ml/classification/_neighbors_classifiers.py +++ b/skfda/ml/classification/_neighbors_classifiers.py @@ -15,18 +15,23 @@ ) -class KNeighborsClassifier(NeighborsBase, NeighborsMixin, KNeighborsMixin, - ClassifierMixin, NeighborsClassifierMixin): +class KNeighborsClassifier( + NeighborsBase, + NeighborsMixin, + KNeighborsMixin, + ClassifierMixin, + NeighborsClassifierMixin, +): """Classifier implementing the k-nearest neighbors vote. Parameters: - n_neighbors: int, optional (default = 5) + n_neighbors (int, default = 5): Number of neighbors to use by default for :meth:`kneighbors` queries. - weights: str or callable, optional (default = 'uniform') - weight function used in prediction. Possible values: - - - 'uniform': uniform weights. All points in each neighborhood + weights (str or callable, default = 'uniform'): + Weight function used in prediction. + Possible values: + - 'uniform': uniform weights. All points in each neighborhood are weighted equally. - 'distance': weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a @@ -34,34 +39,31 @@ class KNeighborsClassifier(NeighborsBase, NeighborsMixin, KNeighborsMixin, - [callable]: a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. - - algorithm: {'auto', 'ball_tree', 'brute'}, optional + algorithm (string, optional): Algorithm used to compute the nearest neighbors: - - 'ball_tree' will use :class:`sklearn.neighbors.BallTree`. - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. - - leaf_size: int, optional (default = 30) - Leaf size passed to BallTree or KDTree. This can affect the + leaf_size (int, default = 30): + Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory - required to store the tree. The optimal value depends on the + required to store the tree. The optimal value depends on the nature of the problem. - metric: string or callable, (default - :func:`lp_distance `) - the distance metric to use for the tree. The default metric is + metric (string or callable, default + :func:`l2_distance `): + the distance metric to use for the tree. The default metric is the L2 distance. See the documentation of the metrics module for a list of available metrics. - metric_params: dict, optional (default = None) + metric_params (dict, optional): Additional keyword arguments for the metric function. - n_jobs: int or None, optional (default=None) + n_jobs (int or None, optional): The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Doesn't affect :meth:`fit` method. - multivariate_metric: boolean, optional (default = False) + multivariate_metric (boolean, default = False): Indicates if the metric used is a sklearn distance between vectors (see :class:`~sklearn.neighbors.DistanceMetric`) or a functional metric of the module `skfda.misc.metrics` if ``False``. @@ -116,43 +118,39 @@ class KNeighborsClassifier(NeighborsBase, NeighborsMixin, KNeighborsMixin, https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm """ - def __init__(self, n_neighbors=5, weights='uniform', algorithm='auto', - leaf_size=30, metric='l2', metric_params=None, - n_jobs=1, multivariate_metric=False): - """Initialize the classifier.""" - super().__init__(n_neighbors=n_neighbors, - weights=weights, algorithm=algorithm, - leaf_size=leaf_size, metric=metric, - metric_params=metric_params, n_jobs=n_jobs, - multivariate_metric=multivariate_metric) - - def _init_estimator(self, sklearn_metric): - """Initialize the sklearn K neighbors estimator. - - Args: - sklearn_metric (pyfunc or 'precomputed'): Metric compatible with - sklearn API or matrix (n_samples, n_samples) with precomputed - distances. - - Returns: - Sklearn K Neighbors estimator initialized. - """ - return _KNeighborsClassifier( - n_neighbors=self.n_neighbors, weights=self.weights, - algorithm=self.algorithm, leaf_size=self.leaf_size, - metric=sklearn_metric, metric_params=self.metric_params, - n_jobs=self.n_jobs) + def __init__( + self, + n_neighbors=5, + weights='uniform', + algorithm='auto', + leaf_size=30, + metric='l2', + metric_params=None, + n_jobs=1, + multivariate_metric=False, + ): + super().__init__( + n_neighbors=n_neighbors, + weights=weights, + algorithm=algorithm, + leaf_size=leaf_size, + metric=metric, + metric_params=metric_params, + n_jobs=n_jobs, + multivariate_metric=multivariate_metric, + ) def predict_proba(self, X): - """Return probability estimates for the test data X. + """Calculate probability estimates for the test data X. Args: X (:class:`FDataGrid` or array-like): FDataGrid with the test samples or array (n_query, n_indexed) if metric == 'precomputed'. + Returns: - p: array of shape = [n_samples, n_classes], or a list of n_outputs - of such arrays if n_outputs > 1. + p (array of shape = (n_samples, n_classes), or a list of n_outputs + of such arrays if n_outputs > 1): The class probabilities of the input samples. Classes are ordered by lexicographic order. """ @@ -162,20 +160,45 @@ def predict_proba(self, X): return self.estimator_.predict_proba(X) + def _init_estimator(self, sklearn_metric): + """Initialize the sklearn K neighbors estimator. + + Args: + sklearn_metric (pyfunc or 'precomputed'): Metric compatible with + sklearn API or matrix (n_samples, n_samples) with precomputed + distances. + + Returns: + Sklearn K Neighbors estimator initialized. + """ + return _KNeighborsClassifier( + n_neighbors=self.n_neighbors, + weights=self.weights, + algorithm=self.algorithm, + leaf_size=self.leaf_size, + metric=sklearn_metric, + metric_params=self.metric_params, + n_jobs=self.n_jobs, + ) + -class RadiusNeighborsClassifier(NeighborsBase, NeighborsMixin, - RadiusNeighborsMixin, ClassifierMixin, - NeighborsClassifierMixin): +class RadiusNeighborsClassifier( + NeighborsBase, + NeighborsMixin, + RadiusNeighborsMixin, + ClassifierMixin, + NeighborsClassifierMixin, +): """Classifier implementing a vote among neighbors within a given radius. Parameters: - radius: float, optional (default = 1.0) + radius (float, default = 1.0): Range of parameter space to use by default for :meth:`radius_neighbors` queries. - weights: str or callable - weight function used in prediction. Possible values: - - - 'uniform': uniform weights. All points in each neighborhood + weights (str or callable, default = 'uniform'): + Weight function used in prediction. + Possible values: + - 'uniform': uniform weights. All points in each neighborhood are weighted equally. - 'distance': weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a @@ -183,41 +206,37 @@ class RadiusNeighborsClassifier(NeighborsBase, NeighborsMixin, - [callable]: a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. - - Uniform weights are used by default. - algorithm: {'auto', 'ball_tree', 'brute'}, optional + algorithm (string, optional): Algorithm used to compute the nearest neighbors: - - 'ball_tree' will use :class:`sklearn.neighbors.BallTree`. - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. - - leaf_size: int, optional (default = 30) - Leaf size passed to BallTree. This can affect the + leaf_size (int, default = 30): + Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem. - metric: string or callable, (default - :func:`lp_distance `) - the distance metric to use for the tree. The default metric is + metric (string or callable, default + :func:`l2_distance `): + the distance metric to use for the tree. The default metric is the L2 distance. See the documentation of the metrics module for a list of available metrics. - outlier_label: int, optional (default = None) + outlier_label (int, optional): Label, which is given for outlier samples (samples with no neighbors on given radius). If set to None, ValueError is raised, when outlier is detected. - metric_params: dict, optional (default = None) + metric_params (dict, optional): Additional keyword arguments for the metric function. - n_jobs: int or None, optional (default=None) + n_jobs (int or None, optional): The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. - multivariate_metric: boolean, optional (default = False) + multivariate_metric (boolean, default = False): Indicates if the metric used is a sklearn distance between vectors - (see :class:`sklearn.neighbors.DistanceMetric`) or a functional - metric of the module :mod:`skfda.misc.metrics`. + (see :class:`~sklearn.neighbors.DistanceMetric`) or a functional + metric of the module `skfda.misc.metrics` if ``False``. Examples: Firstly, we will create a toy dataset with 2 classes. @@ -258,14 +277,28 @@ class RadiusNeighborsClassifier(NeighborsBase, NeighborsMixin, https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm """ - def __init__(self, radius=1.0, weights='uniform', algorithm='auto', - leaf_size=30, metric='l2', metric_params=None, - outlier_label=None, n_jobs=1, multivariate_metric=False): - """Initialize the classifier.""" - super().__init__(radius=radius, weights=weights, algorithm=algorithm, - leaf_size=leaf_size, metric=metric, - metric_params=metric_params, n_jobs=n_jobs, - multivariate_metric=multivariate_metric) + def __init__( + self, + radius=1.0, + weights='uniform', + algorithm='auto', + leaf_size=30, + metric='l2', + metric_params=None, + outlier_label=None, + n_jobs=1, + multivariate_metric=False, + ): + super().__init__( + radius=radius, + weights=weights, + algorithm=algorithm, + leaf_size=leaf_size, + metric=metric, + metric_params=metric_params, + n_jobs=n_jobs, + multivariate_metric=multivariate_metric, + ) self.outlier_label = outlier_label @@ -273,7 +306,7 @@ def _init_estimator(self, sklearn_metric): """Initialize the sklearn radius neighbors estimator. Args: - sklearn_metric: (pyfunc or 'precomputed'): Metric compatible with + sklearn_metric (pyfunc or 'precomputed'): Metric compatible with sklearn API or matrix (n_samples, n_samples) with precomputed distances. @@ -281,7 +314,12 @@ def _init_estimator(self, sklearn_metric): Sklearn Radius Neighbors estimator initialized. """ return _RadiusNeighborsClassifier( - radius=self.radius, weights=self.weights, - algorithm=self.algorithm, leaf_size=self.leaf_size, - metric=sklearn_metric, metric_params=self.metric_params, - outlier_label=self.outlier_label, n_jobs=self.n_jobs) + radius=self.radius, + weights=self.weights, + algorithm=self.algorithm, + leaf_size=self.leaf_size, + metric=sklearn_metric, + metric_params=self.metric_params, + outlier_label=self.outlier_label, + n_jobs=self.n_jobs, + ) From 36bdfd087ab437b7318b0ab7b181749141157272 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Mon, 7 Dec 2020 15:03:10 +0100 Subject: [PATCH 003/417] Remove FIXMEs --- skfda/ml/classification/_depth_classifiers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index f3248887b..68da77cf1 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -183,13 +183,13 @@ class DDTransform(BaseEstimator, TransformerMixin): (2017) The DDG-classifier in the functional setting. TEST, 26. 119-142. """ - def __init__(self, depth_methods: List[Depth] = None): # FIXME + def __init__(self, depth_methods: List[Depth] = None): if depth_methods is None: self.depth_methods = [ModifiedBandDepth(), IntegratedDepth()] else: self.depth_methods = depth_methods - def fit(self, X, y): # FIXME + def fit(self, X, y): """Fit the model using X as training data and y as target values. Args: @@ -210,14 +210,14 @@ def fit(self, X, y): # FIXME return self - def transform(self, X): # FIXME + def transform(self, X): """Transform the provided data using the defined map. Args: X (:class:`FDataGrid`): FDataGrid with the test samples. Returns: - X_new (array-like): array of shape (n_samples, G). FIXME + X_new (array-like): array of shape (n_samples, G). """ sklearn_check_is_fitted(self) From 1654273802d4feb351b24749be9299c14634759c Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 22 Dec 2020 02:37:35 +0100 Subject: [PATCH 004/417] First working version. --- skfda/misc/metrics.py | 7 ++- skfda/ml/_neighbors_base.py | 8 ++-- skfda/ml/clustering/__init__.py | 1 + skfda/ml/clustering/_hierarchical.py | 69 ++++++++++++++++++---------- 4 files changed, 53 insertions(+), 32 deletions(-) diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py index 2fc60799c..a26ee114a 100644 --- a/skfda/misc/metrics.py +++ b/skfda/misc/metrics.py @@ -1,14 +1,13 @@ from builtins import isinstance -import scipy.integrate - import numpy as np +import scipy.integrate from .._utils import _pairwise_commutative -from ..preprocessing.registration import normalize_warping, ElasticRegistration +from ..preprocessing.registration import ElasticRegistration, normalize_warping from ..preprocessing.registration._warping import _normalize_scale from ..preprocessing.registration.elastic import SRSF -from ..representation import FData, FDataGrid, FDataBasis +from ..representation import FData, FDataBasis, FDataGrid def _check_compatible(fdata1, fdata2): diff --git a/skfda/ml/_neighbors_base.py b/skfda/ml/_neighbors_base.py index ac7b02662..4718eccd6 100644 --- a/skfda/ml/_neighbors_base.py +++ b/skfda/ml/_neighbors_base.py @@ -2,13 +2,11 @@ from abc import ABC -from sklearn.base import BaseEstimator -from sklearn.base import RegressorMixin -from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted - import numpy as np +from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted -from .. import FDataGrid, FData +from .. import FData, FDataGrid from ..misc.metrics import lp_distance diff --git a/skfda/ml/clustering/__init__.py b/skfda/ml/clustering/__init__.py index 1ac00d603..dc414ebf9 100644 --- a/skfda/ml/clustering/__init__.py +++ b/skfda/ml/clustering/__init__.py @@ -1,3 +1,4 @@ """Clustering.""" +from ._hierarchical import AgglomerativeClustering from ._kmeans import BaseKMeans, FuzzyCMeans, KMeans from ._neighbors_clustering import NearestNeighbors diff --git a/skfda/ml/clustering/_hierarchical.py b/skfda/ml/clustering/_hierarchical.py index ca01da4d2..45233a780 100644 --- a/skfda/ml/clustering/_hierarchical.py +++ b/skfda/ml/clustering/_hierarchical.py @@ -1,8 +1,14 @@ +from typing import Any + +import numpy as np import sklearn.cluster from sklearn.base import BaseEstimator, ClusterMixin +from ...misc.metrics import l2_distance, pairwise_distance +from ...representation import FData + -class AgglomerativeClustering(ClusterMixin, BaseEstimator): +class AgglomerativeClustering(ClusterMixin, BaseEstimator): # type: ignore """ Agglomerative Clustering Recursively merges the pair of clusters that minimally increases @@ -80,23 +86,33 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): node and has children `children_[i - n_samples]`. Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_samples + i` - Examples - -------- - >>> from sklearn.cluster import AgglomerativeClustering - >>> import numpy as np - >>> X = np.array([[1, 2], [1, 4], [1, 0], - ... [4, 2], [4, 4], [4, 0]]) - >>> clustering = AgglomerativeClustering().fit(X) - >>> clustering - AgglomerativeClustering() - >>> clustering.labels_ - array([1, 1, 1, 0, 0, 0]) + + Examples: + + >>> from skfda import FDataGrid + >>> from skfda.ml.clustering import AgglomerativeClustering + >>> import numpy as np + >>> data_matrix = np.array([[1, 2], [1, 4], [1, 0], + ... [4, 2], [4, 4], [4, 0]]) + >>> X = FDataGrid(data_matrix) + >>> clustering = AgglomerativeClustering().fit(X) + >>> clustering + AgglomerativeClustering() + >>> clustering.labels_ + array([0, 0, 1, 0, 0, 1], dtype=int64) """ - def __init__(self, n_clusters=2, *, metric="euclidean", - memory=None, - connectivity=None, compute_full_tree='auto', - linkage='ward', distance_threshold=None): + def __init__( + self, + n_clusters: int = 2, + *, + metric=l2_distance, + memory=None, + connectivity=None, + compute_full_tree='auto', + linkage='complete', + distance_threshold=None + ) -> None: self.n_clusters = n_clusters self.metric = metric self.memory = memory @@ -105,10 +121,10 @@ def __init__(self, n_clusters=2, *, metric="euclidean", self.linkage = linkage self.distance_threshold = distance_threshold - def _init_estimator(self): + def _init_estimator(self) -> None: self._estimator = sklearn.cluster.AgglomerativeClustering( n_clusters=self.n_clusters, - affinity="precomputed", + affinity='precomputed', memory=self.memory, connectivity=self.connectivity, compute_full_tree=self.compute_full_tree, @@ -116,17 +132,24 @@ def _init_estimator(self): distance_threshold=self.distance_threshold, ) - def fit(self, X, y=None): + def fit(self, X: FData, y: Any = None) -> 'AgglomerativeClustering': + self._init_estimator() - # TODO: Compute precomputed + if self.metric != 'precomputed': + data = pairwise_distance(self.metric)(X) - return self._estimator.fit(X, y) + self._estimator.fit(data, y) + return self def fit_predict(self, X, y=None): self._init_estimator() - # TODO: Compute precomputed + if self.metric != 'precomputed': + data = pairwise_distance(self.metric)(X) + + return self._estimator.fit_predict(data, y) - return self._estimator.fit_predict(X, y) + def __getattr__(self, attr): + return getattr(self._estimator, attr) From 37156c0a5b0b67eff90695151fb2a3045d7a17d6 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 31 Dec 2020 17:09:27 +0100 Subject: [PATCH 005/417] Remove Python 3.6 support. --- .github/workflows/tests.yml | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e1c1eae94..73a717a71 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,7 +11,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.6', '3.7', '3.8'] + python-version: ['3.7', '3.8'] steps: - uses: actions/checkout@v2 diff --git a/setup.py b/setup.py index 4a840bc26..19493fdcc 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ platforms=['any'], license='BSD', packages=find_packages(), - python_requires='>=3.6, <4', + python_requires='>=3.7, <4', classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', @@ -53,8 +53,8 @@ 'License :: OSI Approved :: BSD License', 'Natural Language :: English', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Topic :: Scientific/Engineering :: Mathematics', 'Topic :: Software Development :: Libraries :: Python Modules', ], From 4e625ad0ae37342629993b8ea206d50fe3b9341e Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sat, 2 Jan 2021 13:28:44 +0100 Subject: [PATCH 006/417] Typed basis and constant. --- skfda/_utils/__init__.py | 2 +- skfda/_utils/_utils.py | 33 ++-- skfda/representation/_functional_data.py | 2 +- skfda/representation/_typing.py | 9 ++ skfda/representation/basis/_basis.py | 161 +++++++++++++------- skfda/representation/basis/_bspline.py | 4 +- skfda/representation/basis/_constant.py | 49 +++--- skfda/representation/basis/_fourier.py | 4 +- skfda/representation/basis/_vector_basis.py | 34 +++-- skfda/representation/grid.py | 51 ++++--- 10 files changed, 222 insertions(+), 127 deletions(-) create mode 100644 skfda/representation/_typing.py diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 8852c319e..f988f773f 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -5,7 +5,6 @@ _check_array_key, _check_estimator, _classifier_get_classes, - _domain_range, _evaluate_grid, _FDataCallable, _int_to_real, @@ -13,6 +12,7 @@ _reshape_eval_points, _same_domain, _to_array_maybe_ragged, + _to_domain_range, _to_grid, _tuple_of_arrays, check_is_univariate, diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index ba6808e3d..25fc89623 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -1,17 +1,24 @@ """Module with generic methods""" +from __future__ import annotations + import functools import numbers -from typing import Any, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast import numpy as np import scipy.integrate from pandas.api.indexers import check_array_indexer +from ..representation._typing import DomainRange, DomainRangeLike from ..representation.evaluator import Evaluator RandomStateLike = Optional[Union[int, np.random.RandomState]] +if TYPE_CHECKING: + from ..representation import FData + from ..representation.basis import Basis + class _FDataCallable(): @@ -117,19 +124,23 @@ def _tuple_of_arrays(original_array): return tuple(_int_to_real(np.asarray(i)) for i in original_array) -def _domain_range(sequence): +def _to_domain_range(sequence: DomainRangeLike) -> DomainRange: + """Convert sequence to a proper domain range.""" - try: - iter(sequence[0]) - except TypeError: - sequence = (sequence,) + seq_aux = cast( + Sequence[Sequence[float]], + (sequence,) if isinstance(sequence[0], numbers.Real) else sequence, + ) - sequence = tuple(tuple(s) for s in sequence) + tuple_aux = tuple(tuple(s) for s in seq_aux) - if not all(len(s) == 2 for s in sequence): - raise ValueError("Domain intervals should have 2 bounds each") + if not all(len(s) == 2 and s[0] <= s[1] for s in tuple_aux): + raise ValueError( + "Domain intervals should have 2 bounds for " + "dimension: (lower, upper).", + ) - return sequence + return cast(DomainRange, tuple_aux) def _to_array_maybe_ragged(array, *, row_shape=None): @@ -212,7 +223,7 @@ def _cartesian_product(axes, flatten=True, return_shape=False): return cartesian -def _same_domain(fd, fd2): +def _same_domain(fd: Union[Basis, FData], fd2: Union[Basis, FData]) -> bool: """Check if the domain range of two objects is the same.""" return np.array_equal(fd.domain_range, fd2.domain_range) diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 5843778fb..849b15260 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -23,6 +23,7 @@ import pandas.api.extensions from .._utils import _evaluate_grid, _reshape_eval_points +from ._typing import DomainRange from .evaluator import Evaluator from .extrapolation import _parse_extrapolation @@ -31,7 +32,6 @@ from .basis import Basis T = TypeVar('T', bound='FData') -DomainRange = Tuple[Tuple[float, float], ...] LabelTuple = Tuple[Optional[str], ...] diff --git a/skfda/representation/_typing.py b/skfda/representation/_typing.py new file mode 100644 index 000000000..52a530deb --- /dev/null +++ b/skfda/representation/_typing.py @@ -0,0 +1,9 @@ +"""Common types.""" +from typing import Sequence, Tuple, Union + +DomainRange = Tuple[Tuple[float, float], ...] +DomainRangeLike = Union[ + DomainRange, + Sequence[float], + Sequence[Sequence[float]] +] diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index e6e666e5f..5491a4c5b 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -4,34 +4,39 @@ the corresponding basis classes. """ +from __future__ import annotations + import copy import warnings from abc import ABC, abstractmethod -from typing import Tuple +from typing import Any, Optional, Tuple, TypeVar, Union import numpy as np +from matplotlib.figure import Figure -from ..._utils import _domain_range, _reshape_eval_points, _same_domain +from ..._utils import _reshape_eval_points, _same_domain, _to_domain_range +from .._typing import DomainRange, DomainRangeLike from . import _fdatabasis - -def _check_domain(domain_range): - for domain in domain_range: - if len(domain) != 2 or domain[0] >= domain[1]: - raise ValueError(f"The interval {domain} is not well-defined.") +T = TypeVar("T", bound='Basis') class Basis(ABC): """Defines the structure of a basis function system. Attributes: - domain_range (tuple): a tuple of length 2 containing the initial and + domain_range: a tuple of length 2 containing the initial and end values of the interval over which the basis can be evaluated. - n_basis (int): number of functions in the basis. + n_basis: number of functions in the basis. """ - def __init__(self, *, domain_range=None, n_basis: int = 1): + def __init__( + self, + *, + domain_range: Optional[DomainRangeLike] = None, + n_basis: int = 1, + ) -> None: """Basis constructor. Args: @@ -42,10 +47,7 @@ def __init__(self, *, domain_range=None, n_basis: int = 1): """ if domain_range is not None: - domain_range = _domain_range(domain_range) - - # Some checks - _check_domain(domain_range) + domain_range = _to_domain_range(domain_range) if n_basis < 1: raise ValueError( @@ -57,9 +59,28 @@ def __init__(self, *, domain_range=None, n_basis: int = 1): super().__init__() - def __call__(self, *args, **kwargs) -> np.ndarray: - """Evaluate the basis using :meth:`evaluate`.""" - return self.evaluate(*args, **kwargs) + def __call__( + self, + eval_points: np.ndarray, + *, + derivative: int = 0, + ) -> np.ndarray: + """Evaluate Basis objects. + + Evaluates the basis function system or its derivatives at a list of + given values. + + Args: + eval_points (array_like): List of points where the basis is + evaluated. + + Returns: + Matrix whose rows are the values of the each + basis function or its derivatives at the values specified in + eval_points. + + """ + return self.evaluate(eval_points, derivative=derivative) @property def dim_domain(self) -> int: @@ -70,7 +91,7 @@ def dim_codomain(self) -> int: return 1 @property - def domain_range(self) -> Tuple[Tuple[float, float], ...]: + def domain_range(self) -> DomainRange: if self._domain_range is None: return ((0, 1),) * self.dim_domain else: @@ -81,11 +102,19 @@ def n_basis(self) -> int: return self._n_basis @abstractmethod - def _evaluate(self, eval_points) -> np.ndarray: + def _evaluate( + self, + eval_points: np.ndarray, + ) -> np.ndarray: """Subclasses must override this to provide basis evaluation.""" pass - def evaluate(self, eval_points, *, derivative: int = 0) -> np.ndarray: + def evaluate( + self, + eval_points: np.ndarray, + *, + derivative: int = 0, + ) -> np.ndarray: """Evaluate Basis objects and its derivatives. Evaluates the basis function system or its derivatives at a list of @@ -119,7 +148,7 @@ def evaluate(self, eval_points, *, derivative: int = 0) -> np.ndarray: def __len__(self) -> int: return self.n_basis - def derivative(self, *, order: int = 1) -> '_fdatabasis.FDataBasis': + def derivative(self, *, order: int = 1) -> _fdatabasis.FDataBasis: """Construct a FDataBasis object containing the derivative. Args: @@ -129,10 +158,13 @@ def derivative(self, *, order: int = 1) -> '_fdatabasis.FDataBasis': Derivative object. """ - return self.to_basis().derivative(order=order) - def _derivative_basis_and_coefs(self, coefs: np.ndarray, order: int = 1): + def _derivative_basis_and_coefs( + self: T, + coefs: np.ndarray, + order: int = 1, + ) -> Tuple[T, np.ndarray]: """ Subclasses can override this to provide derivative construction. @@ -141,11 +173,12 @@ def _derivative_basis_and_coefs(self, coefs: np.ndarray, order: int = 1): although is recommended to provide both if possible. """ - raise NotImplementedError(f"{type(self)} basis does not support " - "the construction of a basis of the " - "derivatives.") + raise NotImplementedError( + f"{type(self)} basis does not support the construction of a " + "basis of the derivatives.", + ) - def plot(self, chart=None, **kwargs): + def plot(self, *args: Any, **kwargs: Any) -> Figure: """Plot the basis object or its derivatives. Args: @@ -159,9 +192,13 @@ def plot(self, chart=None, **kwargs): fig (figure): figure object in which the graphs are plotted. """ - self.to_basis().plot(chart=chart, **kwargs) + self.to_basis().plot(*args, **kwargs) - def _coordinate_nonfull(self, fdatabasis, key): + def _coordinate_nonfull( + self, + fdatabasis: _fdatabasis.FDataBasis, + key: Union[int, range], + ) -> _fdatabasis.FDataBasis: """ Returns a fdatagrid for the coordinate functions indexed by key. @@ -173,8 +210,12 @@ def _coordinate_nonfull(self, fdatabasis, key): """ raise NotImplementedError("Coordinate indexing not implemented") - def _coordinate(self, fdatabasis, key): - """Returns a fdatagrid for the coordinate functions indexed by key.""" + def _coordinate( + self, + fdatabasis: _fdatabasis.FDataBasis, + key: Union[int, slice], + ) -> _fdatabasis.FDataBasis: + """Returns a fdatabasis for the coordinate functions indexed by key.""" # Raises error if not in range and normalize key r_key = range(self.dim_codomain)[key] @@ -192,34 +233,34 @@ def _coordinate(self, fdatabasis, key): return self._coordinate_nonfull(fdatabasis=fdatabasis, key=r_key) - def rescale(self, domain_range=None): + def rescale(self: T, domain_range: Optional[DomainRangeLike] = None) -> T: r"""Return a copy of the basis with a new :term:`domain` range, with the corresponding values rescaled to the new bounds. Args: - domain_range (tuple, optional): Definition of the interval + domain_range: Definition of the interval where the basis defines a space. Defaults uses the same as the original basis. + + Return: + Rescaled copy- """ return self.copy(domain_range=domain_range) - def copy(self, domain_range=None): + def copy(self: T, domain_range: Optional[DomainRangeLike] = None) -> T: """Basis copy""" new_copy = copy.deepcopy(self) if domain_range is not None: - domain_range = _domain_range(domain_range) - - # Some checks - _check_domain(domain_range) + domain_range = _to_domain_range(domain_range) new_copy._domain_range = domain_range return new_copy - def to_basis(self) -> '_fdatabasis.FDataBasis': + def to_basis(self) -> _fdatabasis.FDataBasis: """Convert the Basis to FDatabasis. Returns: @@ -230,16 +271,10 @@ def to_basis(self) -> '_fdatabasis.FDataBasis': from . import FDataBasis return FDataBasis(self.copy(), np.identity(self.n_basis)) - def _list_to_R(self, knots): - retstring = "c(" - for i in range(0, len(knots)): - retstring = retstring + str(knots[i]) + ", " - return retstring[0:len(retstring) - 2] + ")" - - def _to_R(self): + def _to_R(self) -> str: raise NotImplementedError - def inner_product_matrix(self, other: 'Basis' = None) -> np.array: + def inner_product_matrix(self, other: Optional[Basis] = None) -> np.array: r"""Return the Inner Product Matrix of a pair of basis. The Inner Product Matrix is defined as @@ -311,27 +346,47 @@ def gram_matrix(self) -> np.array: return gram - def _add_same_basis(self, coefs1, coefs2): + def _add_same_basis( + self: T, + coefs1: np.ndarray, + coefs2: np.ndarray, + ) -> Tuple[T, np.ndarray]: return self.copy(), coefs1 + coefs2 - def _add_constant(self, coefs, constant): + def _add_constant( + self: T, + coefs: np.ndarray, + constant: float + ) -> Tuple[T, np.ndarray]: coefs = coefs.copy() constant = np.array(constant) coefs[:, 0] = coefs[:, 0] + constant return self.copy(), coefs - def _sub_same_basis(self, coefs1, coefs2): + def _sub_same_basis( + self: T, + coefs1: np.ndarray, + coefs2: np.ndarray, + ) -> Tuple[T, np.ndarray]: return self.copy(), coefs1 - coefs2 - def _sub_constant(self, coefs, other): + def _sub_constant( + self: T, + coefs: np.ndarray, + other: float, + ) -> Tuple[T, np.ndarray]: coefs = coefs.copy() other = np.array(other) coefs[:, 0] = coefs[:, 0] - other return self.copy(), coefs - def _mul_constant(self, coefs, other): + def _mul_constant( + self: T, + coefs: np.ndarray, + other: float, + ) -> Tuple[T, np.ndarray]: coefs = coefs.copy() other = np.atleast_2d(other).reshape(-1, 1) coefs = coefs * other @@ -343,7 +398,7 @@ def __repr__(self) -> str: return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " f"n_basis={self.n_basis})") - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: """Equality of Basis""" return (type(self) == type(other) and _same_domain(self, other) diff --git a/skfda/representation/basis/_bspline.py b/skfda/representation/basis/_bspline.py index 791699398..bc8365631 100644 --- a/skfda/representation/basis/_bspline.py +++ b/skfda/representation/basis/_bspline.py @@ -3,7 +3,7 @@ from numpy import polyint, polymul, polyval from scipy.interpolate import BSpline as SciBSpline, PPoly -from ..._utils import _domain_range +from ..._utils import _to_domain_range from ._basis import Basis @@ -101,7 +101,7 @@ def __init__(self, domain_range=None, n_basis=None, order=4, knots=None): """ if domain_range is not None: - domain_range = _domain_range(domain_range) + domain_range = _to_domain_range(domain_range) if len(domain_range) != 1: raise ValueError("Domain range should be unidimensional.") diff --git a/skfda/representation/basis/_constant.py b/skfda/representation/basis/_constant.py index 220adc8b6..9854cdbd8 100644 --- a/skfda/representation/basis/_constant.py +++ b/skfda/representation/basis/_constant.py @@ -1,15 +1,19 @@ +from typing import Optional, Tuple, TypeVar + import numpy as np -from ..._utils import _same_domain +from .._typing import DomainRangeLike from ._basis import Basis +T = TypeVar("T", bound='Constant') + class Constant(Basis): """Constant basis. Basis for constant functions - Attributes: + Parameters: domain_range (tuple): a tuple of length 2 containing the initial and end values of the interval over which the basis can be evaluated. @@ -21,28 +25,29 @@ class Constant(Basis): """ - def __init__(self, domain_range=None): - """Constant basis constructor. - - Args: - domain_range (tuple): Tuple defining the domain over which the - function is defined. - - """ + def __init__(self, domain_range: Optional[DomainRangeLike] = None) -> None: + """Constant basis constructor.""" super().__init__(domain_range=domain_range, n_basis=1) - def _evaluate(self, eval_points): + def _evaluate(self, eval_points: np.ndarray) -> np.ndarray: return np.ones((1, len(eval_points))) - def _derivative_basis_and_coefs(self, coefs, order=1): - return ((self.copy(), coefs.copy()) if order == 0 - else (self.copy(), np.zeros(coefs.shape))) - - def _gram_matrix(self): - return np.array([[self.domain_range[0][1] - - self.domain_range[0][0]]]) - - def _to_R(self): + def _derivative_basis_and_coefs( + self: T, + coefs: np.ndarray, + order: int = 1, + ) -> Tuple[T, np.ndarray]: + return ( + (self.copy(), coefs.copy()) if order == 0 + else (self.copy(), np.zeros(coefs.shape)) + ) + + def _gram_matrix(self) -> np.ndarray: + return np.array( + [[self.domain_range[0][1] - self.domain_range[0][0]]], + ) + + def _to_R(self) -> str: # noqa: N802 drange = self.domain_range[0] - return "create.constant.basis(rangeval = c(" + str(drange[0]) + "," +\ - str(drange[1]) + "))" + drange_str = f"c({str(drange[0])}, {str(drange[1])})" + return f"create.constant.basis(rangeval = {drange_str})" diff --git a/skfda/representation/basis/_fourier.py b/skfda/representation/basis/_fourier.py index a6d89623e..fce88f1c8 100644 --- a/skfda/representation/basis/_fourier.py +++ b/skfda/representation/basis/_fourier.py @@ -1,6 +1,6 @@ import numpy as np -from ..._utils import _domain_range +from ..._utils import _to_domain_range from ._basis import Basis @@ -83,7 +83,7 @@ def __init__(self, domain_range=None, n_basis=3, period=None): """ if domain_range is not None: - domain_range = _domain_range(domain_range) + domain_range = _to_domain_range(domain_range) if len(domain_range) != 1: raise ValueError("Domain range should be unidimensional.") diff --git a/skfda/representation/basis/_vector_basis.py b/skfda/representation/basis/_vector_basis.py index 29b7080c0..4a9a234fe 100644 --- a/skfda/representation/basis/_vector_basis.py +++ b/skfda/representation/basis/_vector_basis.py @@ -1,8 +1,10 @@ +from typing import Iterable + import numpy as np import scipy.linalg from ..._utils import _same_domain -from ._basis import Basis +from ._basis import Basis, Tuple class VectorValued(Basis): @@ -57,39 +59,43 @@ class VectorValued(Basis): """ - def __init__(self, basis_list): + def __init__(self, basis_list: Iterable[Basis]) -> None: basis_list = tuple(basis_list) if not all(b.dim_codomain == 1 for b in basis_list): - raise ValueError("The basis functions must be " - "scalar valued") + raise ValueError( + "The basis functions must be scalar valued", + ) if any(b.dim_domain != basis_list[0].dim_domain or not _same_domain(b, basis_list[0]) for b in basis_list): - raise ValueError("The basis must all have the same domain " - "dimension an range") + raise ValueError( + "The basis must all have the same domain " + "dimension and range", + ) self._basis_list = basis_list super().__init__( domain_range=basis_list[0].domain_range, - n_basis=sum(b.n_basis for b in basis_list)) + n_basis=sum(b.n_basis for b in basis_list), + ) @property - def basis_list(self): + def basis_list(self) -> Tuple[Basis, ...]: return self._basis_list @property - def dim_domain(self): + def dim_domain(self) -> int: return self.basis_list[0].dim_domain @property - def dim_codomain(self): + def dim_codomain(self) -> int: return len(self.basis_list) - def _evaluate(self, eval_points): + def _evaluate(self, eval_points: np.ndarray) -> np.ndarray: matrix = np.zeros((self.n_basis, len(eval_points), self.dim_codomain)) n_basis_evaluated = 0 @@ -103,7 +109,11 @@ def _evaluate(self, eval_points): return matrix - def _derivative_basis_and_coefs(self, coefs, order=1): + def _derivative_basis_and_coefs( + self, + coefs: np.ndarray, + order: int = 1, + ) -> Tuple[Basis, np.ndarray]: n_basis_list = [b.n_basis for b in self.basis_list] indexes = np.cumsum(n_basis_list) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index b75caf0b2..f7bbf231e 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -18,8 +18,8 @@ from .._utils import ( _check_array_key, - _domain_range, _int_to_real, + _to_domain_range, _tuple_of_arrays, constants, ) @@ -207,7 +207,7 @@ def __init__(self, data_matrix, grid_points=None, # Default value for domain_range is a list of tuples with # the first and last element of each list of the grid_points. - self._domain_range = _domain_range(domain_range) + self._domain_range = _to_domain_range(domain_range) if len(self._domain_range) != self.dim_domain: raise ValueError("Incorrect shape of domain_range.") @@ -858,18 +858,20 @@ def to_grid(self, grid_points=None, *, sample_points=None): return self.copy(data_matrix=self.evaluate(grid_points, grid=True), grid_points=grid_points) - def copy(self, *, - deep=False, # For Pandas compatibility - data_matrix=None, - grid_points=None, - sample_points=None, - domain_range=None, - dataset_name=None, - argument_names=None, - coordinate_names=None, - sample_names=None, - extrapolation=None, - interpolation=None): + def copy( + self, *, + deep=False, # For Pandas compatibility + data_matrix=None, + grid_points=None, + sample_points=None, + domain_range=None, + dataset_name=None, + argument_names=None, + coordinate_names=None, + sample_names=None, + extrapolation=None, + interpolation=None, + ): """Returns a copy of the FDataGrid. If an argument is provided the corresponding attribute in the new copy @@ -915,14 +917,17 @@ def copy(self, *, if interpolation is None: interpolation = self.interpolation - return FDataGrid(data_matrix, grid_points=grid_points, - domain_range=domain_range, - dataset_name=dataset_name, - argument_names=argument_names, - coordinate_names=coordinate_names, - sample_names=sample_names, - extrapolation=extrapolation, - interpolation=interpolation) + return FDataGrid( + data_matrix, + grid_points=grid_points, + domain_range=domain_range, + dataset_name=dataset_name, + argument_names=argument_names, + coordinate_names=coordinate_names, + sample_names=sample_names, + extrapolation=extrapolation, + interpolation=interpolation, + ) def shift(self, shifts, *, restrict_domain=False, extrapolation=None, eval_points=None): @@ -1186,7 +1191,7 @@ def __init__(self, grid_points, dim_codomain, domain_range=None) -> None: domain_range = np.array( [(s[0], s[-1]) for s in self.grid_points]) - self.domain_range = _domain_range(domain_range) + self.domain_range = _to_domain_range(domain_range) self.dim_codomain = dim_codomain @classmethod From 54c2e780801b97b82fb7cffda1eed0d6b29ea2f1 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sat, 2 Jan 2021 19:56:44 +0100 Subject: [PATCH 007/417] Style and documentation improvements. --- docs/_templates/autosummary/class.rst | 13 +- docs/conf.py | 5 +- docs/glossary.rst | 6 + docs/modules/representation.rst | 10 ++ setup.cfg | 1 + skfda/representation/basis/_basis.py | 170 +++++++++++----------- skfda/representation/basis/_constant.py | 4 +- skfda/representation/basis/_fdatabasis.py | 2 +- skfda/representation/grid.py | 2 +- 9 files changed, 112 insertions(+), 101 deletions(-) diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst index c97621a73..fede4ca4e 100644 --- a/docs/_templates/autosummary/class.rst +++ b/docs/_templates/autosummary/class.rst @@ -14,18 +14,9 @@ {%- endfor %} {% endif %} - .. automethod:: __init__ - {% endblock %} - - {% block attributes %} - {% if attributes %} - .. rubric:: Attributes - - .. autosummary:: - {% for item in attributes %} - ~{{ name }}.{{ item }} + {% for item in methods %} + .. automethod:: {{ item }} {%- endfor %} - {% endif %} {% endblock %} .. include:: {{package}}/backreferences/{{fullname}}.examples \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 2be688a8e..46bc37d2a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,6 +22,9 @@ import sys import pkg_resources +# -- Extensions to the Napoleon GoogleDocstring class --------------------- +from sphinx.ext.napoleon.docstring import GoogleDocstring + try: release = pkg_resources.get_distribution('scikit-fda').version except pkg_resources.DistributionNotFound: @@ -245,8 +248,6 @@ # Taken from # https://michaelgoerz.net/notes/extending-sphinx-napoleon-docstring-sections.html -# -- Extensions to the Napoleon GoogleDocstring class --------------------- -from sphinx.ext.napoleon.docstring import GoogleDocstring # first, we define new methods for any new sections and add them to the class diff --git a/docs/glossary.rst b/docs/glossary.rst index 502fb2829..13ccd49dc 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -29,6 +29,12 @@ General Concepts domain The set of possible input values of a function. + domain range + The valid range where a function can be evaluated. It is a Python + sequence that contains, for each dimension of the domain, a tuple with + the minimum and maximum values for that dimension. Usually used in + plotting functions and as the domain of integration for this function. + FDA Functional Data Analysis The branch of statistics that deals with curves, surfaces or other diff --git a/docs/modules/representation.rst b/docs/modules/representation.rst index 83efe532a..a5f53408a 100644 --- a/docs/modules/representation.rst +++ b/docs/modules/representation.rst @@ -73,6 +73,16 @@ several :math:`\mathbb{R}^n \to \mathbb{R}` bases. :toctree: autosummary skfda.representation.basis.VectorValued + +All the aforementioned basis inherit the basics from an +abstract base class :class:`Basis`. Users can create their own +basis subclassing this class and implementing the required +methods. + +.. autosummary:: + :toctree: autosummary + + skfda.representation.basis.Basis Generic representation ---------------------- diff --git a/setup.cfg b/setup.cfg index a3072c196..383ac3358 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,6 +90,7 @@ rst-roles = allowed-domain-names = data, obj, result, val, value, values, var # Needs to be tuned +max-imports = 20 max-arguments = 10 max-attributes = 10 max-line-complexity = 25 diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index 5491a4c5b..9dcef68e4 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -1,9 +1,5 @@ -"""Module for functional data manipulation in a basis system. +"""Abstract base class for basis.""" -Defines functional data object in a basis function system representation and -the corresponding basis classes. - -""" from __future__ import annotations import copy @@ -22,11 +18,11 @@ class Basis(ABC): - """Defines the structure of a basis function system. + """Defines the structure of a basis of functions. - Attributes: - domain_range: a tuple of length 2 containing the initial and - end values of the interval over which the basis can be evaluated. + Parameters: + domain_range: The :term:`domain range` over which the basis can be + evaluated. n_basis: number of functions in the basis. """ @@ -37,14 +33,7 @@ def __init__( domain_range: Optional[DomainRangeLike] = None, n_basis: int = 1, ) -> None: - """Basis constructor. - - Args: - domain_range (tuple or list of tuples, optional): Definition of the - interval where the basis defines a space. Defaults to (0,1). - n_basis: Number of functions that form the basis. Defaults to 1. - - """ + """Basis constructor.""" if domain_range is not None: domain_range = _to_domain_range(domain_range) @@ -67,12 +56,15 @@ def __call__( ) -> np.ndarray: """Evaluate Basis objects. - Evaluates the basis function system or its derivatives at a list of - given values. + Evaluates the basis functions at a list of given values. Args: - eval_points (array_like): List of points where the basis is + eval_points: List of points where the basis is evaluated. + derivative: order of the derivative. + + .. deprecated:: 0.4 + Use `derivative` method instead. Returns: Matrix whose rows are the values of the each @@ -94,8 +86,8 @@ def dim_codomain(self) -> int: def domain_range(self) -> DomainRange: if self._domain_range is None: return ((0, 1),) * self.dim_domain - else: - return self._domain_range + + return self._domain_range @property def n_basis(self) -> int: @@ -106,7 +98,12 @@ def _evaluate( self, eval_points: np.ndarray, ) -> np.ndarray: - """Subclasses must override this to provide basis evaluation.""" + """ + Evaluate Basis object. + + Subclasses must override this to provide basis evaluation. + + """ pass def evaluate( @@ -117,12 +114,15 @@ def evaluate( ) -> np.ndarray: """Evaluate Basis objects and its derivatives. - Evaluates the basis function system or its derivatives at a list of - given values. + Evaluates the basis functions at a list of given values. Args: - eval_points (array_like): List of points where the basis is + eval_points: List of points where the basis is evaluated. + derivative: order of the derivative. + + .. deprecated:: 0.4 + Use `derivative` method instead. Returns: Matrix whose rows are the values of the each @@ -133,17 +133,23 @@ def evaluate( if derivative < 0: raise ValueError("derivative only takes non-negative values.") elif derivative != 0: - warnings.warn("Parameter derivative is deprecated. Use the " - "derivative function instead.", DeprecationWarning) + warnings.warn( + "Parameter derivative is deprecated. Use the " + "derivative method instead.", + DeprecationWarning, + ) return self.derivative(order=derivative)(eval_points) - eval_points = _reshape_eval_points(eval_points, - aligned=True, - n_samples=self.n_basis, - dim_domain=self.dim_domain) + eval_points = _reshape_eval_points( + eval_points, + aligned=True, + n_samples=self.n_basis, + dim_domain=self.dim_domain, + ) return self._evaluate(eval_points).reshape( - (self.n_basis, len(eval_points), self.dim_codomain)) + (self.n_basis, len(eval_points), self.dim_codomain), + ) def __len__(self) -> int: return self.n_basis @@ -161,16 +167,14 @@ def derivative(self, *, order: int = 1) -> _fdatabasis.FDataBasis: return self.to_basis().derivative(order=order) def _derivative_basis_and_coefs( - self: T, - coefs: np.ndarray, - order: int = 1, + self: T, + coefs: np.ndarray, + order: int = 1, ) -> Tuple[T, np.ndarray]: """ - Subclasses can override this to provide derivative construction. + Return basis and coefficients of the derivative. - A basis can provide derivative evaluation at given points - without providing a basis representation for its derivatives, - although is recommended to provide both if possible. + Subclasses can override this to provide derivative construction. """ raise NotImplementedError( @@ -182,14 +186,13 @@ def plot(self, *args: Any, **kwargs: Any) -> Figure: """Plot the basis object or its derivatives. Args: - chart (figure object, axe or list of axes, optional): figure over - with the graphs are plotted or axis over where the graphs are - plotted. - **kwargs: keyword arguments to be passed to the + args: arguments to be passed to the + fdata.plot function. + kwargs: keyword arguments to be passed to the fdata.plot function. Returns: - fig (figure): figure object in which the graphs are plotted. + Figure object in which the graphs are plotted. """ self.to_basis().plot(*args, **kwargs) @@ -200,7 +203,7 @@ def _coordinate_nonfull( key: Union[int, range], ) -> _fdatabasis.FDataBasis: """ - Returns a fdatagrid for the coordinate functions indexed by key. + Return a fdatagrid for the coordinate functions indexed by key. Subclasses can override this to provide coordinate indexing. @@ -215,8 +218,7 @@ def _coordinate( fdatabasis: _fdatabasis.FDataBasis, key: Union[int, slice], ) -> _fdatabasis.FDataBasis: - """Returns a fdatabasis for the coordinate functions indexed by key.""" - + """Return a fdatabasis for the coordinate functions indexed by key.""" # Raises error if not in range and normalize key r_key = range(self.dim_codomain)[key] @@ -224,39 +226,37 @@ def _coordinate( raise IndexError("Empty number of coordinates selected") # Full fdatabasis case - if (self.dim_codomain == 1 and r_key == 0) or ( - isinstance(r_key, range) and len(r_key) == self.dim_codomain): - + if ( + (self.dim_codomain == 1 and r_key == 0) + or (isinstance(r_key, range) and len(r_key) == self.dim_codomain) + ): return fdatabasis.copy() - else: - - return self._coordinate_nonfull(fdatabasis=fdatabasis, key=r_key) + return self._coordinate_nonfull(fdatabasis=fdatabasis, key=r_key) def rescale(self: T, domain_range: Optional[DomainRangeLike] = None) -> T: - r"""Return a copy of the basis with a new :term:`domain` range, with - the corresponding values rescaled to the new bounds. + r""" + Return a copy of the basis with a new :term:`domain` range. - Args: - domain_range: Definition of the interval - where the basis defines a space. Defaults uses the same as - the original basis. + Args: + domain_range: Definition of the interval + where the basis defines a space. Defaults uses the same as + the original basis. - Return: - Rescaled copy- - """ + Returns: + Rescaled copy. + """ return self.copy(domain_range=domain_range) def copy(self: T, domain_range: Optional[DomainRangeLike] = None) -> T: - """Basis copy""" - + """Basis copy.""" new_copy = copy.deepcopy(self) if domain_range is not None: domain_range = _to_domain_range(domain_range) - new_copy._domain_range = domain_range + new_copy._domain_range = domain_range # noqa: WPS437 return new_copy @@ -271,7 +271,7 @@ def to_basis(self) -> _fdatabasis.FDataBasis: from . import FDataBasis return FDataBasis(self.copy(), np.identity(self.n_basis)) - def _to_R(self) -> str: + def _to_R(self) -> str: # noqa: N802 raise NotImplementedError def inner_product_matrix(self, other: Optional[Basis] = None) -> np.array: @@ -280,9 +280,9 @@ def inner_product_matrix(self, other: Optional[Basis] = None) -> np.array: The Inner Product Matrix is defined as .. math:: - IP_{ij} = \langle\phi_i, \theta_j\rangle + I_{ij} = \langle\phi_i, \theta_j\rangle - where :math:`\phi_i` is the ith element of the basi and + where :math:`\phi_i` is the ith element of the basis and :math:`\theta_j` is the jth element of the second basis. This matrix helps on the calculation of the inner product between objects on two basis and for the change of basis. @@ -304,10 +304,7 @@ def inner_product_matrix(self, other: Optional[Basis] = None) -> np.array: return inner_product_matrix(self, other) def _gram_matrix_numerical(self) -> np.array: - """ - Compute the Gram matrix numerically. - - """ + """Compute the Gram matrix numerically.""" from ...misc import inner_product_matrix return inner_product_matrix(self, force_numerical=True) @@ -323,7 +320,8 @@ def _gram_matrix(self) -> np.array: return self._gram_matrix_numerical() def gram_matrix(self) -> np.array: - r"""Return the Gram Matrix of a basis + r""" + Return the Gram Matrix of a basis. The Gram Matrix is defined as @@ -337,7 +335,6 @@ def gram_matrix(self) -> np.array: Gram Matrix of the basis. """ - gram = getattr(self, "_gram_matrix_cached", None) if gram is None: @@ -356,7 +353,7 @@ def _add_same_basis( def _add_constant( self: T, coefs: np.ndarray, - constant: float + constant: float, ) -> Tuple[T, np.ndarray]: coefs = coefs.copy() constant = np.array(constant) @@ -389,21 +386,26 @@ def _mul_constant( ) -> Tuple[T, np.ndarray]: coefs = coefs.copy() other = np.atleast_2d(other).reshape(-1, 1) - coefs = coefs * other + coefs *= other return self.copy(), coefs def __repr__(self) -> str: """Representation of a Basis object.""" - return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " - f"n_basis={self.n_basis})") + return ( + f"{self.__class__.__name__}(" + f"domain_range={self.domain_range}, " + f"n_basis={self.n_basis})" + ) def __eq__(self, other: Any) -> bool: - """Equality of Basis""" - return (type(self) == type(other) - and _same_domain(self, other) - and self.n_basis == other.n_basis) + """Test equality of Basis.""" + return ( + isinstance(other, type(self)) + and _same_domain(self, other) + and self.n_basis == other.n_basis + ) def __hash__(self) -> int: - """Hash of Basis""" + """Hash a Basis.""" return hash((self.domain_range, self.n_basis)) diff --git a/skfda/representation/basis/_constant.py b/skfda/representation/basis/_constant.py index 9854cdbd8..3d49af323 100644 --- a/skfda/representation/basis/_constant.py +++ b/skfda/representation/basis/_constant.py @@ -14,8 +14,8 @@ class Constant(Basis): Basis for constant functions Parameters: - domain_range (tuple): a tuple of length 2 containing the initial and - end values of the interval over which the basis can be evaluated. + domain_range: The :term:`domain range` over which the basis can be + evaluated. Examples: Defines a contant base over the interval :math:`[0, 5]` consisting diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 7c324bad7..3525eb51f 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -627,7 +627,7 @@ def equals(self, other): def __eq__(self, other): """Elementwise equality of FDataBasis""" - if not isinstance(self, type(other)) or self.dtype != other.dtype: + if not isinstance(other, type(self)) or self.dtype != other.dtype: if other is pandas.NA: return self.isna() if pandas.api.types.is_list_like(other) and not isinstance( diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index f7bbf231e..4f75d1078 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -567,7 +567,7 @@ def equals(self, other): def __eq__(self, other): """Elementwise equality of FDataGrid""" - if not isinstance(self, type(other)) or self.dtype != other.dtype: + if not isinstance(other, type(self)) or self.dtype != other.dtype: if other is pandas.NA: return self.isna() if pandas.api.types.is_list_like(other) and not isinstance( From a94a5c23430abac92f19db2378a9147f886ca9cd Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sun, 3 Jan 2021 03:19:49 +0100 Subject: [PATCH 008/417] Fix style in vector basis. --- skfda/representation/basis/_vector_basis.py | 83 ++++++++++++++------- 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/skfda/representation/basis/_vector_basis.py b/skfda/representation/basis/_vector_basis.py index 4a9a234fe..1d6996d0c 100644 --- a/skfda/representation/basis/_vector_basis.py +++ b/skfda/representation/basis/_vector_basis.py @@ -1,10 +1,17 @@ -from typing import Iterable +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Iterable, Tuple, TypeVar, Union import numpy as np import scipy.linalg from ..._utils import _same_domain -from ._basis import Basis, Tuple +from ._basis import Basis + +if TYPE_CHECKING: + from .. import FDataBasis + +T = TypeVar("T", bound='VectorValued') class VectorValued(Basis): @@ -68,9 +75,11 @@ def __init__(self, basis_list: Iterable[Basis]) -> None: "The basis functions must be scalar valued", ) - if any(b.dim_domain != basis_list[0].dim_domain or - not _same_domain(b, basis_list[0]) - for b in basis_list): + if any( + b.dim_domain != basis_list[0].dim_domain + or not _same_domain(b, basis_list[0]) + for b in basis_list + ): raise ValueError( "The basis must all have the same domain " "dimension and range", @@ -98,70 +107,90 @@ def dim_codomain(self) -> int: def _evaluate(self, eval_points: np.ndarray) -> np.ndarray: matrix = np.zeros((self.n_basis, len(eval_points), self.dim_codomain)) - n_basis_evaluated = 0 + n_basis_eval = 0 - basis_evaluations = [b._evaluate(eval_points) for b in self.basis_list] + basis_evaluations = [b.evaluate(eval_points) for b in self.basis_list] for i, ev in enumerate(basis_evaluations): - matrix[n_basis_evaluated:n_basis_evaluated + len(ev), :, i] = ev - n_basis_evaluated += len(ev) + matrix[n_basis_eval:n_basis_eval + len(ev), :, i] = ev[..., 0] + n_basis_eval += len(ev) return matrix def _derivative_basis_and_coefs( - self, - coefs: np.ndarray, - order: int = 1, - ) -> Tuple[Basis, np.ndarray]: + self: T, + coefs: np.ndarray, + order: int = 1, + ) -> Tuple[T, np.ndarray]: n_basis_list = [b.n_basis for b in self.basis_list] indexes = np.cumsum(n_basis_list) coefs_per_basis = np.hsplit(coefs, indexes[:-1]) - basis_and_coefs = [b._derivative_basis_and_coefs( - c, order=order) for b, c in zip(self.basis_list, coefs_per_basis)] + basis_and_coefs = [ + b._derivative_basis_and_coefs(c, order=order) # noqa: WPS437 + for b, c in zip(self.basis_list, coefs_per_basis) + ] new_basis_list, new_coefs_list = zip(*basis_and_coefs) - new_basis = VectorValued(new_basis_list) + new_basis = type(self)(new_basis_list) new_coefs = np.hstack(new_coefs_list) return new_basis, new_coefs - def _gram_matrix(self): + def _gram_matrix(self) -> np.ndarray: gram_matrices = [b.gram_matrix() for b in self.basis_list] return scipy.linalg.block_diag(*gram_matrices) - def _coordinate_nonfull(self, fdatabasis, key): + def _coordinate_nonfull( + self, + fdatabasis: FDataBasis, + key: Union[int, range], + ) -> FDataBasis: r_key = key if isinstance(r_key, int): r_key = range(r_key, r_key + 1) - s_key = slice(r_key.start, r_key.stop, r_key.step) + + s_key = slice(r_key.start, r_key.stop, r_key.step) coef_indexes = np.concatenate([ np.ones(b.n_basis, dtype=np.bool_) if i in r_key else np.zeros(b.n_basis, dtype=np.bool_) - for i, b in enumerate(self.basis_list)]) + for i, b in enumerate(self.basis_list) + ]) - new_basis_list = self.basis_list[key] + new_basis_list = self.basis_list[s_key] - basis = (new_basis_list if isinstance(new_basis_list, Basis) - else VectorValued(new_basis_list)) + basis = ( + new_basis_list[0] if isinstance(key, int) + else VectorValued(new_basis_list) + ) coefs = fdatabasis.coefficients[:, coef_indexes] coordinate_names = np.array(fdatabasis.coordinate_names)[s_key] - return fdatabasis.copy(basis=basis, coefficients=coefs, - coordinate_names=coordinate_names) + return fdatabasis.copy( + basis=basis, + coefficients=coefs, + coordinate_names=coordinate_names, + ) + + def __repr__(self) -> str: + """Representation of a Basis object.""" + return ( + f"{self.__class__.__name__}(" + f"basis_list={self.basis_list})" + ) - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: return super().__eq__(other) and self.basis_list == other.basis_list - def __hash__(self): + def __hash__(self) -> int: return hash((super().__hash__(), self.basis_list)) From 3859dd0f58257bfb7e1af6c65f8e9b8f48585f40 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sun, 3 Jan 2021 13:27:18 +0100 Subject: [PATCH 009/417] Fix style in Fourier basis. --- skfda/representation/basis/_fourier.py | 140 +++++++++++++++---------- 1 file changed, 86 insertions(+), 54 deletions(-) diff --git a/skfda/representation/basis/_fourier.py b/skfda/representation/basis/_fourier.py index fce88f1c8..ffdaa1a16 100644 --- a/skfda/representation/basis/_fourier.py +++ b/skfda/representation/basis/_fourier.py @@ -1,8 +1,13 @@ +from typing import Any, Optional, Tuple, TypeVar + import numpy as np from ..._utils import _to_domain_range +from .._typing import DomainRangeLike from ._basis import Basis +T = TypeVar("T", bound='Fourier') + class Fourier(Basis): r"""Fourier basis. @@ -24,11 +29,11 @@ class Fourier(Basis): Actually this basis functions are not orthogonal but not orthonormal. To achieve this they are divided by its norm: :math:`\sqrt{\frac{T}{2}}`. - Attributes: - domain_range (tuple): A tuple of length 2 containing the initial and + Parameters: + domain_range: A tuple of length 2 containing the initial and end values of the interval over which the basis can be evaluated. - n_basis (int): Number of functions in the basis. - period (int or float): Period (:math:`T`). + n_basis: Number of functions in the basis. + period: Period (:math:`T`). Examples: Constructs specifying number of basis, definition interval and period. @@ -67,21 +72,26 @@ class Fourier(Basis): """ - def __init__(self, domain_range=None, n_basis=3, period=None): - """Construct a Fourier object. + def __init__( + self, + domain_range: Optional[DomainRangeLike] = None, + n_basis: int = 3, + period: Optional[float] = None, + ) -> None: + """ + Construct a Fourier object. It forces the object to have an odd number of basis. If n_basis is even, it is incremented by one. Args: - domain_range (tuple): Tuple defining the domain over which the - function is defined. - n_basis (int): Number of basis functions. - period (int or float): Period of the trigonometric functions that + domain_range: Tuple defining the domain over which the + function is defined. + n_basis: Number of basis functions. + period: Period of the trigonometric functions that define the basis. """ - if domain_range is not None: domain_range = _to_domain_range(domain_range) @@ -96,13 +106,13 @@ def __init__(self, domain_range=None, n_basis=3, period=None): super().__init__(domain_range=domain_range, n_basis=n_basis) @property - def period(self): + def period(self) -> float: if self._period is None: return self.domain_range[0][1] - self.domain_range[0][0] - else: - return self._period - def _evaluate(self, eval_points): + return self._period + + def _evaluate(self, eval_points: np.ndarray) -> np.ndarray: # Input is scalar eval_points = eval_points[..., 0] @@ -120,7 +130,7 @@ def _evaluate(self, eval_points): res = np.einsum('ij,k->ijk', phase_coefs, eval_points) # Apply odd and even functions - for i in [0, 1]: + for i in (0, 1): functions[i](res[:, i, :], out=res[:, i, :]) res = res.reshape(-1, len(eval_points)) @@ -128,21 +138,26 @@ def _evaluate(self, eval_points): constant_basis = np.full( shape=(1, len(eval_points)), - fill_value=1 / (np.sqrt(2) * normalization_denominator)) - - res = np.concatenate((constant_basis, res)) + fill_value=1 / (np.sqrt(2) * normalization_denominator), + ) - return res + return np.concatenate((constant_basis, res)) - def _derivative_basis_and_coefs(self, coefs, order=1): + def _derivative_basis_and_coefs( + self: T, + coefs: np.ndarray, + order: int = 1, + ) -> Tuple[T, np.ndarray]: omega = 2 * np.pi / self.period deriv_factor = (np.arange(1, (self.n_basis + 1) / 2) * omega) ** order deriv_coefs = np.zeros(coefs.shape) - cos_sign, sin_sign = ((-1) ** int((order + 1) / 2), - (-1) ** int(order / 2)) + cos_sign, sin_sign = ( + (-1) ** int((order + 1) / 2), + (-1) ** int(order / 2), + ) if order % 2 == 0: deriv_coefs[:, 1::2] = sin_sign * coefs[:, 1::2] * deriv_factor @@ -154,27 +169,35 @@ def _derivative_basis_and_coefs(self, coefs, order=1): # normalise return self.copy(), deriv_coefs - def _gram_matrix(self): + def _gram_matrix(self) -> np.ndarray: # Orthogonal in this case if self.period == (self.domain_range[0][1] - self.domain_range[0][0]): return np.identity(self.n_basis) - else: - return super()._gram_matrix() - - def rescale(self, domain_range=None, *, rescale_period=False): - r"""Return a copy of the basis with a new domain range, with the - corresponding values rescaled to the new bounds. - - Args: - domain_range (tuple, optional): Definition of the interval - where the basis defines a space. Defaults uses the same as - the original basis. - rescale_period (bool, optional): If true the period will be - rescaled using the ratio between the lengths of the new - and old interval. Defaults to False. - """ + return super()._gram_matrix() + + def rescale( + self: T, + domain_range: Optional[DomainRangeLike] = None, + *, + rescale_period: bool = False, + ) -> T: + r""" + Return a copy of the basis with a new domain range. + + Args: + domain_range: Definition of the interval + where the basis defines a space. Defaults uses the same as + the original basis. + rescale_period: If true the period will be + rescaled using the ratio between the lengths of the new + and old interval. Defaults to False. + + Returns: + Rescaled basis. + + """ rescale_basis = super().rescale(domain_range) if rescale_period is True: @@ -182,26 +205,35 @@ def rescale(self, domain_range=None, *, rescale_period=False): domain_rescaled = rescale_basis.domain_range[0] domain = self.domain_range[0] - rescale_basis._period = ( - self.period * - (domain_rescaled[1] - domain_rescaled[0]) / - (domain[1] - domain[0])) + rescale_basis._period = ( # noqa: WPS437 + self.period + * (domain_rescaled[1] - domain_rescaled[0]) + / (domain[1] - domain[0]) + ) return rescale_basis - def _to_R(self): + def _to_R(self) -> str: # noqa: N802 drange = self.domain_range[0] - return ("create.fourier.basis(rangeval = c(" + str(drange[0]) + "," + - str(drange[1]) + "), nbasis = " + str(self.n_basis) + - ", period = " + str(self.period) + ")") - - def __repr__(self): + rangeval = f"c({drange[0]}, {drange[1]})" + return ( + f"create.fourier.basis(" + f"rangeval = {rangeval}, " + f"nbasis = {self.n_basis}, " + f"period = {self.period})" + ) + + def __repr__(self) -> str: """Representation of a Fourier basis.""" - return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " - f"n_basis={self.n_basis}, period={self.period})") - - def __eq__(self, other): + return ( + f"{self.__class__.__name__}(" + f"domain_range={self.domain_range}, " + f"n_basis={self.n_basis}, " + f"period={self.period})" + ) + + def __eq__(self, other: Any) -> bool: return super().__eq__(other) and self.period == other.period - def __hash__(self): + def __hash__(self) -> int: return hash((super().__hash__(), self.period)) From a3467fbe06617c88089dbf52e9e038db91e3b473 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sun, 3 Jan 2021 16:57:39 +0100 Subject: [PATCH 010/417] Fixes style in Monomial basis. --- skfda/representation/basis/_fourier.py | 14 --------- skfda/representation/basis/_monomial.py | 41 +++++++++++++++++-------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/skfda/representation/basis/_fourier.py b/skfda/representation/basis/_fourier.py index ffdaa1a16..46b95986a 100644 --- a/skfda/representation/basis/_fourier.py +++ b/skfda/representation/basis/_fourier.py @@ -183,21 +183,7 @@ def rescale( *, rescale_period: bool = False, ) -> T: - r""" - Return a copy of the basis with a new domain range. - Args: - domain_range: Definition of the interval - where the basis defines a space. Defaults uses the same as - the original basis. - rescale_period: If true the period will be - rescaled using the ratio between the lengths of the new - and old interval. Defaults to False. - - Returns: - Rescaled basis. - - """ rescale_basis = super().rescale(domain_range) if rescale_period is True: diff --git a/skfda/representation/basis/_monomial.py b/skfda/representation/basis/_monomial.py index 273b31c97..811fc4d36 100644 --- a/skfda/representation/basis/_monomial.py +++ b/skfda/representation/basis/_monomial.py @@ -1,8 +1,12 @@ +from typing import Tuple, TypeVar + import numpy as np import scipy.linalg from ._basis import Basis +T = TypeVar("T", bound='Monomial') + class Monomial(Basis): """Monomial basis. @@ -13,9 +17,9 @@ class Monomial(Basis): 1, t, t^2, t^3... Attributes: - domain_range (tuple): a tuple of length 2 containing the initial and + domain_range: a tuple of length 2 containing the initial and end values of the interval over which the basis can be evaluated. - n_basis (int): number of functions in the basis. + n_basis: number of functions in the basis. Examples: Defines a monomial base over the interval :math:`[0, 5]` consisting @@ -63,7 +67,7 @@ class Monomial(Basis): [ 2.]]]) """ - def _evaluate(self, eval_points): + def _evaluate(self, eval_points: np.ndarray) -> np.ndarray: # Input is scalar eval_points = eval_points[..., 0] @@ -73,31 +77,37 @@ def _evaluate(self, eval_points): return raised.T - def _derivative_basis_and_coefs(self, coefs, order=1): + def _derivative_basis_and_coefs( + self: T, + coefs: np.ndarray, + order: int = 1, + ) -> Tuple[T, np.ndarray]: if order >= self.n_basis: return ( - Monomial(domain_range=self.domain_range, n_basis=1), + type(self)(domain_range=self.domain_range, n_basis=1), np.zeros((len(coefs), 1)), ) return ( - Monomial( + type(self)( domain_range=self.domain_range, n_basis=self.n_basis - order, ), np.array([np.polyder(x[::-1], order)[::-1] for x in coefs]), ) - def _gram_matrix(self): + def _gram_matrix(self) -> np.ndarray: integral_coefs = np.polyint(np.ones(2 * self.n_basis - 1)) # We obtain the powers of both extremes in the domain range power_domain_limits = np.vander( - self.domain_range[0], 2 * self.n_basis) + self.domain_range[0], 2 * self.n_basis, + ) # Subtract the powers (Barrow's rule) power_domain_limits_diff = ( - power_domain_limits[1] - power_domain_limits[0]) + power_domain_limits[1] - power_domain_limits[0] + ) # Multiply the constants that appear in the integration evaluated_points = integral_coefs * power_domain_limits_diff @@ -109,9 +119,14 @@ def _gram_matrix(self): # Build the matrix return scipy.linalg.hankel( ordered_evaluated_points[:self.n_basis], - ordered_evaluated_points[self.n_basis - 1:]) + ordered_evaluated_points[self.n_basis - 1:], + ) - def _to_R(self): + def _to_R(self) -> str: # noqa: N802 drange = self.domain_range[0] - return "create.monomial.basis(rangeval = c(" + str(drange[0]) + "," +\ - str(drange[1]) + "), nbasis = " + str(self.n_basis) + ")" + rangeval = f"c({drange[0]}, {drange[1]})" + return ( + f"create.monomial.basis(" + f"rangeval = {rangeval}, " + f"nbasis = {self.n_basis})" + ) From 89e4a147377e129e5ff3d7d629642212afdef8cc Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sun, 3 Jan 2021 19:18:29 +0100 Subject: [PATCH 011/417] Fix style for BSpline basis. --- setup.cfg | 2 + skfda/representation/basis/_bspline.py | 235 ++++++++++++++----------- skfda/representation/basis/_fourier.py | 2 +- 3 files changed, 134 insertions(+), 105 deletions(-) diff --git a/setup.cfg b/setup.cfg index 383ac3358..a7d21a3f8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,6 +31,8 @@ ignore = W503, # Short names like X or y are common in scikit-learn WPS111, + # We do not like this underscored numbers convention + WPS114, # Trailing underscores are a scikit-learn convention WPS120, # The number of imported things may be large, especially for typing diff --git a/skfda/representation/basis/_bspline.py b/skfda/representation/basis/_bspline.py index bc8365631..9054de46e 100644 --- a/skfda/representation/basis/_bspline.py +++ b/skfda/representation/basis/_bspline.py @@ -1,11 +1,16 @@ +from typing import Any, Optional, Sequence, Tuple, Type, TypeVar + import numpy as np import scipy.interpolate from numpy import polyint, polymul, polyval from scipy.interpolate import BSpline as SciBSpline, PPoly from ..._utils import _to_domain_range +from .._typing import DomainRangeLike from ._basis import Basis +T = TypeVar("T", bound='BSpline') + class BSpline(Basis): r"""BSpline basis. @@ -27,12 +32,12 @@ class BSpline(Basis): boundaries [RS05]_. This is automatically done so that the user only has to specify a single knot at the boundaries. - Attributes: - domain_range (tuple): A tuple of length 2 containing the initial and + Parameters: + domain_range: A tuple of length 2 containing the initial and end values of the interval over which the basis can be evaluated. - n_basis (int): Number of functions in the basis. - order (int): Order of the splines. One greather than their degree. - knots (list): List of knots of the spline functions. + n_basis: Number of functions in the basis. + order: Order of the splines. One greather than their degree. + knots: List of knots of the spline functions. Examples: Constructs specifying number of basis and order. @@ -83,23 +88,14 @@ class BSpline(Basis): """ - def __init__(self, domain_range=None, n_basis=None, order=4, knots=None): - """Bspline basis constructor. - - Args: - domain_range (tuple, optional): Definition of the interval where - the basis defines a space. Defaults to (0,1) if knots are not - specified. If knots are specified defaults to the first and - last element of the knots. - n_basis (int, optional): Number of splines that form the basis. - order (int, optional): Order of the splines. One greater that - their degree. Defaults to 4 which mean cubic splines. - knots (array_like): List of knots of the splines. If domain_range - is specified the first and last elements of the knots have to - match with it. - - """ - + def __init__( + self, + domain_range: Optional[DomainRangeLike] = None, + n_basis: Optional[int] = None, + order: int = 4, + knots: Optional[Sequence[float]] = None, + ) -> None: + """Bspline basis constructor.""" if domain_range is not None: domain_range = _to_domain_range(domain_range) @@ -109,28 +105,32 @@ def __init__(self, domain_range=None, n_basis=None, order=4, knots=None): domain_range = domain_range[0] # Knots default to equally space points in the domain_range - if knots is None: - if n_basis is None: - raise ValueError("Must provide either a list of knots or the" - "number of basis.") - else: + if knots is not None: knots = tuple(knots) knots = sorted(knots) if domain_range is None: domain_range = (knots[0], knots[-1]) - else: - if domain_range[0] != knots[0] or domain_range[1] != knots[-1]: - raise ValueError("The ends of the knots must be the same " - "as the domain_range.") + elif domain_range[0] != knots[0] or domain_range[1] != knots[-1]: + raise ValueError( + "The ends of the knots must be the same " + "as the domain_range.", + ) # n_basis default to number of knots + order of the splines - 2 if n_basis is None: + if knots is None: + raise ValueError( + "Must provide either a list of knots or the" + "number of basis.", + ) n_basis = len(knots) + order - 2 if (n_basis - order + 2) < 2: - raise ValueError(f"The number of basis ({n_basis}) minus the " - f"order of the bspline ({order}) should be " - f"greater than 3.") + raise ValueError( + f"The number of basis ({n_basis}) minus the " + f"order of the bspline ({order}) should be " + f"greater than 3.", + ) self._order = order self._knots = None if knots is None else tuple(knots) @@ -138,35 +138,43 @@ def __init__(self, domain_range=None, n_basis=None, order=4, knots=None): # Checks if self.n_basis != self.order + len(self.knots) - 2: - raise ValueError(f"The number of basis ({self.n_basis}) has to " - f"equal the order ({self.order}) plus the " - f"number of knots ({len(self.knots)}) minus 2.") + raise ValueError( + f"The number of basis ({self.n_basis}) has to " + f"equal the order ({self.order}) plus the " + f"number of knots ({len(self.knots)}) minus 2.", + ) @property - def knots(self): + def knots(self) -> Tuple[float, ...]: if self._knots is None: - return tuple(np.linspace(*self.domain_range[0], - self.n_basis - self.order + 2)) - else: - return self._knots + return tuple(np.linspace( + *self.domain_range[0], + self.n_basis - self.order + 2, + )) + + return self._knots @property - def order(self): + def order(self) -> int: return self._order - def _evaluation_knots(self): + def _evaluation_knots(self) -> Tuple[float, ...]: """ - Get the knots adding m knots to the boundary in order to allow a - discontinuous behaviour at the boundaries of the domain [RS05]_. + Get the knots adding m knots to the boundary. + + This needs to be done in order to allow a discontinuous behaviour + at the boundaries of the domain [RS05]_. References: .. [RS05] Ramsay, J., Silverman, B. W. (2005). *Functional Data Analysis*. Springer. 50-51. """ - return np.array((self.knots[0],) * (self.order - 1) + self.knots + - (self.knots[-1],) * (self.order - 1)) + return tuple( + (self.knots[0],) * (self.order - 1) + self.knots + + (self.knots[-1],) * (self.order - 1), + ) - def _evaluate(self, eval_points): + def _evaluate(self, eval_points: np.ndarray) -> np.ndarray: # Input is scalar eval_points = eval_points[..., 0] @@ -186,45 +194,53 @@ def _evaluate(self, eval_points): # iteration c[i] = 1 # compute the spline - mat[i] = scipy.interpolate.splev(eval_points, - (knots, c, self.order - 1)) + mat[i] = scipy.interpolate.splev( + eval_points, + (knots, c, self.order - 1), + ) c[i] = 0 return mat - def _derivative_basis_and_coefs(self, coefs, order=1): + def _derivative_basis_and_coefs( + self: T, + coefs: np.ndarray, + order: int = 1, + ) -> Tuple[T, np.ndarray]: if order >= self.order: return ( - BSpline(n_basis=1, domain_range=self.domain_range, order=1), - np.zeros((len(coefs), 1))) + type(self)(n_basis=1, domain_range=self.domain_range, order=1), + np.zeros((len(coefs), 1)), + ) - deriv_splines = [self._to_scipy_BSpline(coefs[i]).derivative(order) - for i in range(coefs.shape[0])] + deriv_splines = [ + self._to_scipy_bspline(coefs[i]).derivative(order) + for i in range(coefs.shape[0]) + ] - deriv_coefs = [BSpline._from_scipy_BSpline(spline)[1] - for spline in deriv_splines] + deriv_coefs = [ + self._from_scipy_bspline(spline)[1] + for spline in deriv_splines + ] - deriv_basis = BSpline._from_scipy_BSpline(deriv_splines[0])[0] + deriv_basis = self._from_scipy_bspline(deriv_splines[0])[0] return deriv_basis, np.array(deriv_coefs)[:, 0:deriv_basis.n_basis] - def rescale(self, domain_range=None): - r"""Return a copy of the basis with a new domain range, with the - corresponding values rescaled to the new bounds. - The knots of the BSpline will be rescaled in the new interval. - - Args: - domain_range (tuple, optional): Definition of the interval - where the basis defines a space. Defaults uses the same as - the original basis. - """ + def rescale( # noqa: D102 + self: T, + domain_range: Optional[DomainRangeLike] = None, + ) -> T: knots = np.array(self.knots, dtype=np.dtype('float')) if domain_range is not None: # Rescales the knots + domain_range = _to_domain_range(domain_range)[0] knots -= knots[0] - knots *= ((domain_range[1] - domain_range[0] - ) / (self.knots[-1] - self.knots[0])) + knots *= ( + (domain_range[1] - domain_range[0]) + / (self.knots[-1] - self.knots[0]) + ) knots += domain_range[0] # Fix possible round error @@ -235,15 +251,17 @@ def rescale(self, domain_range=None): # TODO: Allow multiple dimensions domain_range = self.domain_range[0] - return BSpline(domain_range, self.n_basis, self.order, knots) + return type(self)(domain_range, self.n_basis, self.order, knots) - def __repr__(self): + def __repr__(self) -> str: """Representation of a BSpline basis.""" - return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " - f"n_basis={self.n_basis}, order={self.order}, " - f"knots={self.knots})") + return ( + f"{self.__class__.__name__}(domain_range={self.domain_range}, " + f"n_basis={self.n_basis}, order={self.order}, " + f"knots={self.knots})" + ) - def _gram_matrix(self): + def _gram_matrix(self) -> np.ndarray: # Places m knots at the boundaries knots = self._evaluation_knots() @@ -257,11 +275,11 @@ def _gram_matrix(self): no_0_intervals = np.where(np.diff(knots) > 0)[0] # For each basis gets its piecewise polynomial representation - for i in range(self.n_basis): + for n in range(self.n_basis): # Write a 1 in c in the position of the spline # transformed in each iteration - c[i] = 1 + c[n] = 1 # Gets the piecewise polynomial representation and gets # only the positions for no zero length intervals @@ -276,24 +294,29 @@ def _gram_matrix(self): # (x - a), so we will need to subtract a when computing the # definite integral ppoly_lst.append(pp_coefs) - c[i] = 0 + c[n] = 0 # Now for each pair of basis computes the inner product after # applying the linear differential operator matrix = np.zeros((self.n_basis, self.n_basis)) - for interval in range(len(no_0_intervals)): + for interval, _ in enumerate(no_0_intervals): for i in range(self.n_basis): - poly_i = np.trim_zeros(ppoly_lst[i][:, - interval], 'f') + poly_i = np.trim_zeros( + ppoly_lst[i][:, interval], + 'f', + ) # Indefinite integral square = polymul(poly_i, poly_i) integral = polyint(square) # Definite integral - matrix[i, i] += np.diff(polyval( - integral, self.knots[interval: interval + 2] - - self.knots[interval]))[0] + matrix[i, i] += np.diff( + polyval( + integral, np.array(self.knots[interval: interval + 2]) + - self.knots[interval], + ), + )[0] # The Gram matrix is banded, so not all intervals are used for j in range(i + 1, min(i + self.order, self.n_basis)): @@ -303,9 +326,12 @@ def _gram_matrix(self): integral = polyint(polymul(poly_i, poly_j)) # Definite integral - matrix[i, j] += np.diff(polyval( - integral, self.knots[interval: interval + 2] - - self.knots[interval]) + matrix[i, j] += np.diff( + polyval( + integral, + np.array(self.knots[interval: interval + 2]) + - self.knots[interval], + ), )[0] # The matrix is symmetric @@ -313,17 +339,21 @@ def _gram_matrix(self): return matrix - def _to_scipy_BSpline(self, coefs): + def _to_scipy_bspline(self, coefs: np.ndarray) -> SciBSpline: knots = np.concatenate(( np.repeat(self.knots[0], self.order - 1), self.knots, - np.repeat(self.knots[-1], self.order - 1))) + np.repeat(self.knots[-1], self.order - 1), + )) return SciBSpline(knots, coefs, self.order - 1) - @staticmethod - def _from_scipy_BSpline(bspline): + @classmethod + def _from_scipy_bspline( + cls: Type[T], + bspline: SciBSpline, + ) -> Tuple[T, np.ndarray]: order = bspline.k knots = bspline.t @@ -334,17 +364,14 @@ def _from_scipy_BSpline(bspline): coefs = bspline.c domain_range = [knots[0], knots[-1]] - return BSpline(domain_range, order=order + 1, knots=knots), coefs - - @property - def inknots(self): - """Return number of basis.""" - return self.knots[1:len(self.knots) - 1] + return cls(domain_range, order=order + 1, knots=knots), coefs - def __eq__(self, other): - return (super().__eq__(other) - and self.order == other.order - and self.knots == other.knots) + def __eq__(self, other: Any) -> bool: + return ( + super().__eq__(other) + and self.order == other.order + and self.knots == other.knots + ) - def __hash__(self): + def __hash__(self) -> int: return hash((super().__hash__(), self.order, self.knots)) diff --git a/skfda/representation/basis/_fourier.py b/skfda/representation/basis/_fourier.py index 46b95986a..f1f5e619d 100644 --- a/skfda/representation/basis/_fourier.py +++ b/skfda/representation/basis/_fourier.py @@ -177,7 +177,7 @@ def _gram_matrix(self) -> np.ndarray: return super()._gram_matrix() - def rescale( + def rescale( # noqa: D102 self: T, domain_range: Optional[DomainRangeLike] = None, *, From 457ddfe3f0f3f3553632ec8c4e72e23c75689389 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sun, 3 Jan 2021 19:24:25 +0100 Subject: [PATCH 012/417] Fix style errors. --- skfda/_utils/_utils.py | 1 - skfda/representation/_typing.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 25fc89623..0807ecd5a 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -126,7 +126,6 @@ def _tuple_of_arrays(original_array): def _to_domain_range(sequence: DomainRangeLike) -> DomainRange: """Convert sequence to a proper domain range.""" - seq_aux = cast( Sequence[Sequence[float]], (sequence,) if isinstance(sequence[0], numbers.Real) else sequence, diff --git a/skfda/representation/_typing.py b/skfda/representation/_typing.py index 52a530deb..f60ff4fda 100644 --- a/skfda/representation/_typing.py +++ b/skfda/representation/_typing.py @@ -5,5 +5,5 @@ DomainRangeLike = Union[ DomainRange, Sequence[float], - Sequence[Sequence[float]] + Sequence[Sequence[float]], ] From 98cf019f83ac6d1ba22a08475191b338f969c263 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sun, 3 Jan 2021 23:03:58 +0100 Subject: [PATCH 013/417] Typing for FDataBasis --- skfda/_utils/_utils.py | 2 +- skfda/representation/_functional_data.py | 40 +-- skfda/representation/_typing.py | 10 +- skfda/representation/basis/_basis.py | 18 +- skfda/representation/basis/_fdatabasis.py | 282 +++++++++++++--------- 5 files changed, 205 insertions(+), 147 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 0807ecd5a..eafd4d972 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -441,7 +441,7 @@ def _pairwise_commutative(function, arg1, arg2=None, **kwargs): (len(arg1), len(arg2))) -def _int_to_real(array): +def _int_to_real(array: np.ndarray) -> np.ndarray: """ Convert integer arrays to floating point. """ diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 849b15260..415cc1825 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -4,6 +4,8 @@ objects of the package and contains some commons methods. """ +from __future__ import annotations + import warnings from abc import ABC, abstractmethod from typing import ( @@ -14,7 +16,6 @@ NoReturn, Optional, Sequence, - Tuple, TypeVar, Union, ) @@ -23,7 +24,7 @@ import pandas.api.extensions from .._utils import _evaluate_grid, _reshape_eval_points -from ._typing import DomainRange +from ._typing import DomainRange, LabelTuple, LabelTupleLike from .evaluator import Evaluator from .extrapolation import _parse_extrapolation @@ -32,7 +33,6 @@ from .basis import Basis T = TypeVar('T', bound='FData') -LabelTuple = Tuple[Optional[str], ...] class FData( # noqa: WPS214 @@ -57,16 +57,16 @@ class FData( # noqa: WPS214 def __init__( self, *, - extrapolation: Evaluator, + extrapolation: Optional[Union[str, Evaluator]] = None, dataset_name: Optional[str] = None, dataset_label: Optional[str] = None, - axes_labels: Optional[LabelTuple] = None, - argument_names: Optional[LabelTuple] = None, - coordinate_names: Optional[LabelTuple] = None, - sample_names: Optional[LabelTuple] = None, + axes_labels: Optional[LabelTupleLike] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + sample_names: Optional[LabelTupleLike] = None, ) -> None: - self.extrapolation = extrapolation + self.extrapolation = extrapolation # type: ignore self.dataset_name = dataset_name if dataset_label is not None: @@ -75,7 +75,7 @@ def __init__( self.argument_names = argument_names # type: ignore self.coordinate_names = coordinate_names # type: ignore if axes_labels is not None: - self.axes_labels = axes_labels + self.axes_labels = axes_labels # type: ignore self.sample_names = sample_names # type: ignore @property @@ -103,7 +103,7 @@ def argument_names(self) -> LabelTuple: @argument_names.setter def argument_names( self, - names: Optional[LabelTuple], + names: Optional[LabelTupleLike], ) -> None: if names is None: names = (None,) * self.dim_domain @@ -124,7 +124,7 @@ def coordinate_names(self) -> LabelTuple: @coordinate_names.setter def coordinate_names( self, - names: Optional[LabelTuple], + names: Optional[LabelTupleLike], ) -> None: if names is None: names = (None,) * self.dim_codomain @@ -150,7 +150,7 @@ def axes_labels(self) -> LabelTuple: return self.argument_names + self.coordinate_names @axes_labels.setter - def axes_labels(self, labels: LabelTuple) -> None: + def axes_labels(self, labels: LabelTupleLike) -> None: """Set the list of labels.""" if labels is not None: @@ -182,7 +182,7 @@ def sample_names(self) -> LabelTuple: return self._sample_names @sample_names.setter - def sample_names(self, names: Optional[LabelTuple]) -> None: + def sample_names(self, names: Optional[LabelTupleLike]) -> None: if names is None: names = (None,) * self.n_samples else: @@ -229,7 +229,7 @@ def dim_codomain(self) -> int: @property @abstractmethod - def coordinates(self: T) -> T: + def coordinates(self: T) -> Sequence[T]: r"""Return a component of the FDataGrid. If the functional object contains multivariate samples @@ -528,7 +528,7 @@ def __call__( ) @abstractmethod - def derivative(self: T, order: int = 1) -> T: + def derivative(self: T, *, order: int = 1) -> T: """Differentiate a FData object. Args: @@ -609,7 +609,7 @@ def copy(self: T, **kwargs: Any) -> T: def sum( # noqa: WPS125 self: T, *, - axis: None = None, + axis: Optional[int] = None, out: None = None, keepdims: bool = False, skipna: bool = False, @@ -676,7 +676,7 @@ def mean( ) @abstractmethod - def to_grid(self, grid_points: np.ndarray = None) -> 'FDataGrid': + def to_grid(self, grid_points: np.ndarray = None) -> FDataGrid: """Return the discrete representation of the object. Args: @@ -693,9 +693,9 @@ def to_grid(self, grid_points: np.ndarray = None) -> 'FDataGrid': @abstractmethod def to_basis( self, - basis: 'Basis', + basis: Basis, **kwargs: Any, - ) -> 'FDataBasis': + ) -> FDataBasis: """Return the basis representation of the object. Args: diff --git a/skfda/representation/_typing.py b/skfda/representation/_typing.py index f60ff4fda..3865337a6 100644 --- a/skfda/representation/_typing.py +++ b/skfda/representation/_typing.py @@ -1,5 +1,7 @@ """Common types.""" -from typing import Sequence, Tuple, Union +from typing import Optional, Sequence, Tuple, Union + +import numpy as np DomainRange = Tuple[Tuple[float, float], ...] DomainRangeLike = Union[ @@ -7,3 +9,9 @@ Sequence[float], Sequence[Sequence[float]], ] + +LabelTuple = Tuple[Optional[str], ...] +LabelTupleLike = Sequence[Optional[str]] + +GridPoints = Tuple[np.ndarray, ...] +GridPointsLike = Sequence[np.ndarray] diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index 9dcef68e4..9f19cbcec 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -5,14 +5,16 @@ import copy import warnings from abc import ABC, abstractmethod -from typing import Any, Optional, Tuple, TypeVar, Union +from typing import TYPE_CHECKING, Any, Optional, Tuple, TypeVar, Union import numpy as np from matplotlib.figure import Figure from ..._utils import _reshape_eval_points, _same_domain, _to_domain_range from .._typing import DomainRange, DomainRangeLike -from . import _fdatabasis + +if TYPE_CHECKING: + from . import FDataBasis T = TypeVar("T", bound='Basis') @@ -154,7 +156,7 @@ def evaluate( def __len__(self) -> int: return self.n_basis - def derivative(self, *, order: int = 1) -> _fdatabasis.FDataBasis: + def derivative(self, *, order: int = 1) -> FDataBasis: """Construct a FDataBasis object containing the derivative. Args: @@ -199,9 +201,9 @@ def plot(self, *args: Any, **kwargs: Any) -> Figure: def _coordinate_nonfull( self, - fdatabasis: _fdatabasis.FDataBasis, + fdatabasis: FDataBasis, key: Union[int, range], - ) -> _fdatabasis.FDataBasis: + ) -> FDataBasis: """ Return a fdatagrid for the coordinate functions indexed by key. @@ -215,9 +217,9 @@ def _coordinate_nonfull( def _coordinate( self, - fdatabasis: _fdatabasis.FDataBasis, + fdatabasis: FDataBasis, key: Union[int, slice], - ) -> _fdatabasis.FDataBasis: + ) -> FDataBasis: """Return a fdatabasis for the coordinate functions indexed by key.""" # Raises error if not in range and normalize key r_key = range(self.dim_codomain)[key] @@ -260,7 +262,7 @@ def copy(self: T, domain_range: Optional[DomainRangeLike] = None) -> T: return new_copy - def to_basis(self) -> _fdatabasis.FDataBasis: + def to_basis(self) -> FDataBasis: """Convert the Basis to FDatabasis. Returns: diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 3525eb51f..19cc6c173 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -1,8 +1,18 @@ +from __future__ import annotations + import copy -import numbers import warnings from builtins import isinstance -from typing import Any +from typing import ( + Any, + Generic, + Iterator, + Optional, + Sequence, + Type, + TypeVar, + Union, +) import numpy as np import pandas.api.extensions @@ -10,10 +20,11 @@ from ..._utils import _check_array_key, _int_to_real, constants from .. import grid from .._functional_data import FData +from .._typing import DomainRange, GridPointsLike, LabelTupleLike +from ..evaluator import Evaluator +from . import Basis - -def _same_domain(one_domain_range, other_domain_range): - return np.array_equal(one_domain_range, other_domain_range) +T = TypeVar('T', bound='FDataBasis') class FDataBasis(FData): @@ -30,21 +41,21 @@ class FDataBasis(FData): ..., \phi_K)` the basis function system. Attributes: - basis (:obj:`Basis`): Basis function system. - coefficients (numpy.darray): List or matrix of coefficients. Has to + basis: Basis function system. + coefficients: List or matrix of coefficients. Has to have the same length or number of columns as the number of basis function in the basis. If a matrix, each row contains the coefficients that multiplied by the basis functions produce each functional datum. - domain_range (numpy.ndarray): 2 dimension matrix where each row + domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data is considered to exist for each one of the axies. - dataset_name (str): name of the dataset. - argument_names (tuple): tuple containing the names of the different + dataset_name: name of the dataset. + argument_names: tuple containing the names of the different arguments. - coordinate_names (tuple): tuple containing the names of the different + coordinate_names: tuple containing the names of the different coordinate functions. - extrapolation (str or Extrapolation): defines the default type of + extrapolation: defines the default type of extrapolation. By default None, which does not apply any type of extrapolation. See `Extrapolation` for detailled information of the types of extrapolation. @@ -61,46 +72,21 @@ class FDataBasis(FData): ...) """ - class _CoordinateIterator: - """Internal class to iterate through the image coordinates. - - Dummy object. Should be change to support multidimensional objects. - - """ - - def __init__(self, fdatabasis): - """Create an iterator through the image coordinates.""" - self._fdatabasis = fdatabasis - - def __iter__(self): - """Return an iterator through the image coordinates.""" - - for i in range(len(self)): - yield self[i] - def __getitem__(self, key): - """Get a specific coordinate.""" - - return self._fdatabasis.basis._coordinate(self._fdatabasis, key) - - def __len__(self): - """Return the number of coordinates.""" - return self._fdatabasis.dim_codomain - - def __init__(self, basis, coefficients, *, dataset_label=None, - dataset_name=None, - axes_labels=None, argument_names=None, - coordinate_names=None, - sample_names=None, - extrapolation=None): - """Construct a FDataBasis object. - - Args: - basis (:obj:`Basis`): Basis function system. - coefficients (array_like): List or matrix of coefficients. Has to - have the same length or number of columns as the number of - basis function in the basis. - """ + def __init__( + self, + basis: Basis, + coefficients: np.ndarray, + *, + dataset_label: Optional[str] = None, + dataset_name: Optional[str] = None, + axes_labels: Optional[LabelTupleLike] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + sample_names: Optional[LabelTupleLike] = None, + extrapolation: Optional[Union[str, Evaluator]] = None + ) -> None: + """Construct a FDataBasis object.""" coefficients = _int_to_real(np.atleast_2d(coefficients)) if coefficients.shape[1] != basis.n_basis: raise ValueError("The length or number of columns of coefficients " @@ -109,19 +95,26 @@ def __init__(self, basis, coefficients, *, dataset_label=None, self.basis = basis self.coefficients = coefficients - super().__init__(extrapolation=extrapolation, - dataset_label=dataset_label, - dataset_name=dataset_name, - axes_labels=axes_labels, - argument_names=argument_names, - coordinate_names=coordinate_names, - sample_names=sample_names) + super().__init__( + extrapolation=extrapolation, + dataset_label=dataset_label, + dataset_name=dataset_name, + axes_labels=axes_labels, + argument_names=argument_names, + coordinate_names=coordinate_names, + sample_names=sample_names + ) @classmethod - def from_data(cls, data_matrix, *, basis, - grid_points=None, - sample_points=None, - method='cholesky'): + def from_data( + cls, + data_matrix: np.ndarray, + *, + basis: Basis, + grid_points: Optional[GridPointsLike] = None, + sample_points: Optional[GridPointsLike] = None, + method: str = 'cholesky', + ) -> FDataBasis: r"""Transform raw data to a smooth functional form. Takes functional data in a discrete form and makes an approximates it @@ -213,19 +206,19 @@ def from_data(cls, data_matrix, *, basis, return fd.to_basis(basis=basis, method=method) @property - def n_samples(self): - return self.coefficients.shape[0] + def n_samples(self) -> int: + return len(self.coefficients) @property - def dim_domain(self): + def dim_domain(self) -> int: return self.basis.dim_domain @property - def dim_codomain(self): + def dim_codomain(self) -> int: return self.basis.dim_codomain @property - def coordinates(self): + def coordinates(self: T) -> _CoordinateIterator[T]: r"""Return a component of the FDataBasis. If the functional object contains samples @@ -239,19 +232,24 @@ def coordinates(self): """ - return FDataBasis._CoordinateIterator(self) + return _CoordinateIterator(self) @property - def n_basis(self): + def n_basis(self) -> int: """Return number of basis.""" return self.basis.n_basis @property - def domain_range(self): + def domain_range(self) -> DomainRange: return self.basis.domain_range - def _evaluate(self, eval_points, *, aligned=True): + def _evaluate( + self, + eval_points: np.ndarray, + *, + aligned: bool = True, + ) -> np.ndarray: if aligned: @@ -276,8 +274,15 @@ def _evaluate(self, eval_points, *, aligned=True): return res_matrix - def shift(self, shifts, *, restrict_domain=False, extrapolation=None, - eval_points=None, **kwargs): + def shift( + self: T, + shifts: np.ndarray, + *, + restrict_domain: bool = False, + extrapolation: Optional[Union[str, Evaluator]] = None, + eval_points: Optional[np.ndarray] = None, + **kwargs: Any, + ) -> T: r"""Perform a shift of the curves. Args: @@ -297,7 +302,7 @@ def shift(self, shifts, *, restrict_domain=False, extrapolation=None, passed it calls numpy.linspace with bounds equal to the ones defined in fd.domain_range and the number of points the maximum between 201 and 10 times the number of basis plus 1. - **kwargs: Keyword arguments to be passed to :meth:`from_data`. + kwargs: Keyword arguments to be passed to :meth:`from_data`. Returns: :obj:`FDataBasis` with the shifted data. @@ -353,13 +358,7 @@ def shift(self, shifts, *, restrict_domain=False, extrapolation=None, return FDataBasis.from_data(_data_matrix, grid_points=eval_points, basis=_basis, **kwargs) - def derivative(self, *, order=1): - r"""Differentiate a FDataBasis object. - - - Args: - order (int, optional): Order of the derivative. Defaults to one. - """ + def derivative(self: T, *, order: int = 1) -> T: if order < 0: raise ValueError("order only takes non-negative integer values.") @@ -372,8 +371,15 @@ def derivative(self, *, order=1): return FDataBasis(basis, coefficients) - def sum(self, *, axis=None, out=None, keepdims=False, skipna=False, - min_count=0): + def sum( + self: T, + *, + axis: Optional[int] = None, + out: None = None, + keepdims: bool = False, + skipna: bool = False, + min_count: int = 0, + ) -> T: """Compute the sum of all the samples in a FDataBasis object. Returns: @@ -406,7 +412,7 @@ def sum(self, *, axis=None, out=None, keepdims=False, skipna=False, return self.copy(coefficients=coefs, sample_names=(None,)) - def gmean(self, eval_points=None): + def gmean(self: T, eval_points: Optional[np.ndarray] = None) -> T: """Compute the geometric mean of the functional data object. A numerical approach its used. The object its transformed into its @@ -427,7 +433,7 @@ def gmean(self, eval_points=None): """ return self.to_grid(eval_points).gmean().to_basis(self.basis) - def var(self, eval_points=None): + def var(self: T, eval_points: Optional[np.ndarray] = None) -> T: """Compute the variance of the functional data object. A numerical approach its used. The object its transformed into its @@ -448,7 +454,7 @@ def var(self, eval_points=None): """ return self.to_grid(eval_points).var().to_basis(self.basis) - def cov(self, eval_points=None): + def cov(self, eval_points: Optional[np.ndarray] = None): """Compute the covariance of the functional data object. A numerical approach its used. The object its transformed into its @@ -516,7 +522,12 @@ def to_grid(self, grid_points=None, *, sample_points=None): grid_points=grid_points, domain_range=self.domain_range) - def to_basis(self, basis, eval_points=None, **kwargs): + def to_basis( + self, + basis: Basis, + eval_points: Optional[np.ndarray] = None, + **kwargs: Any + ) -> FDataBasis: """Return the basis representation of the object. Args: @@ -535,12 +546,17 @@ def to_basis(self, basis, eval_points=None, **kwargs): return self.to_grid(eval_points=eval_points).to_basis(basis, **kwargs) - def copy(self, *, basis=None, coefficients=None, - dataset_name=None, - argument_names=None, - coordinate_names=None, - sample_names=None, - extrapolation=None): + def copy( + self: T, + *, + basis: Optional[Basis] = None, + coefficients: Optional[np.ndarray] = None, + dataset_name: Optional[str] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + sample_names: Optional[LabelTupleLike] = None, + extrapolation: Optional[Union[str, Evaluator]] = None, + ) -> T: """FDataBasis copy""" if basis is None: @@ -574,18 +590,19 @@ def copy(self, *, basis=None, coefficients=None, sample_names=sample_names, extrapolation=extrapolation) - def _to_R(self): + def _to_R(self) -> str: """Gives the code to build the object on fda package on R""" return ("fd(coef = " + self._array_to_R(self.coefficients, True) + ", basisobj = " + self.basis._to_R() + ")") - def _array_to_R(self, coefficients, transpose=False): + def _array_to_R( + self, + coefficients: np.ndarray, + transpose: bool = False, + ) -> str: if len(coefficients.shape) == 1: coefficients = coefficients.reshape((1, coefficients.shape[0])) - if len(coefficients.shape) > 2: - return NotImplementedError - if transpose is True: coefficients = np.transpose(coefficients) @@ -598,7 +615,7 @@ def _array_to_R(self, coefficients, transpose=False): return (retstring[0:len(retstring) - 2] + "), nrow = " + str(rows) + ", ncol = " + str(cols) + ")") - def __repr__(self): + def __repr__(self) -> str: """Representation of FDataBasis object.""" return (f"{self.__class__.__name__}(" @@ -610,21 +627,21 @@ def __repr__(self): f"\nextrapolation={self.extrapolation})").replace( '\n', '\n ') - def __str__(self): + def __str__(self) -> str: """Return str(self).""" return (f"{self.__class__.__name__}(" f"\n_basis={self.basis}," f"\ncoefficients={self.coefficients})").replace('\n', '\n ') - def equals(self, other): + def equals(self, other: Any) -> bool: """Equality of FDataBasis""" # TODO check all other params return (super().equals(other) and self.basis == other.basis and np.array_equal(self.coefficients, other.coefficients)) - def __eq__(self, other): + def __eq__(self, other: Any) -> np.ndarray: """Elementwise equality of FDataBasis""" if not isinstance(other, type(self)) or self.dtype != other.dtype: @@ -644,7 +661,11 @@ def __eq__(self, other): return np.all(self.coefficients == other.coefficients, axis=1) - def concatenate(self, *others, as_coordinates=False): + def concatenate( + self: T, + *others: T, + as_coordinates: bool = False + ) -> T: """Join samples from a similar FDataBasis object. Joins samples from another FDataBasis object if they have the same @@ -705,7 +726,7 @@ def compose(self, fd, *, eval_points=None, **kwargs): return composition - def __getitem__(self, key): + def __getitem__(self: T, key: Union[int, slice]) -> T: """Return self[key].""" key = _check_array_key(self.coefficients, key) @@ -713,7 +734,7 @@ def __getitem__(self, key): return self.copy(coefficients=self.coefficients[key], sample_names=np.array(self.sample_names)[key]) - def __add__(self, other): + def __add__(self: T, other: Union[T, np.ndarray, float]) -> T: """Addition for FDataBasis object.""" if isinstance(other, FDataBasis): @@ -732,12 +753,12 @@ def __add__(self, other): return self._copy_op(other, basis=basis, coefficients=coefs) - def __radd__(self, other): + def __radd__(self: T, other: Union[T, np.ndarray, float]) -> T: """Addition for FDataBasis object.""" return self.__add__(other) - def __sub__(self, other): + def __sub__(self: T, other: Union[T, np.ndarray, float]) -> T: """Subtraction for FDataBasis object.""" if isinstance(other, FDataBasis): if self.basis != other.basis: @@ -754,11 +775,11 @@ def __sub__(self, other): return self._copy_op(other, basis=basis, coefficients=coefs) - def __rsub__(self, other): + def __rsub__(self: T, other: Union[T, np.ndarray, float]) -> T: """Right subtraction for FDataBasis object.""" return (self * -1).__add__(other) - def __mul__(self, other): + def __mul__(self: T, other: Union[np.ndarray, float]) -> T: """Multiplication for FDataBasis object.""" if isinstance(other, FDataBasis): return NotImplemented @@ -770,11 +791,11 @@ def __mul__(self, other): return self._copy_op(other, basis=basis, coefficients=coefs) - def __rmul__(self, other): + def __rmul__(self: T, other: Union[np.ndarray, float]) -> T: """Multiplication for FDataBasis object.""" return self.__mul__(other) - def __truediv__(self, other): + def __truediv__(self: T, other: Union[np.ndarray, float]) -> T: """Division for FDataBasis object.""" other = np.array(other) @@ -786,7 +807,7 @@ def __truediv__(self, other): return self * other - def __rtruediv__(self, other): + def __rtruediv__(self: T, other: Union[np.ndarray, float]) -> T: """Right division for FDataBasis object.""" return NotImplemented @@ -795,7 +816,7 @@ def __rtruediv__(self, other): # Pandas ExtensionArray methods ##################################################################### @property - def dtype(self): + def dtype(self) -> FDataBasisDType: """The dtype for this extension array, FDataGridDType""" return FDataBasisDType(basis=self.basis) @@ -806,7 +827,7 @@ def nbytes(self) -> int: """ return self.coefficients.nbytes - def isna(self): + def isna(self) -> np.ndarray: """ A 1-D array indicating if each value is missing. @@ -816,7 +837,7 @@ def isna(self): return np.all(np.isnan(self.coefficients), axis=1) -class FDataBasisDType(pandas.api.extensions.ExtensionDtype): +class FDataBasisDType(pandas.api.extensions.ExtensionDtype): # type: ignore """ DType corresponding to FDataBasis in Pandas """ @@ -827,11 +848,11 @@ class FDataBasisDType(pandas.api.extensions.ExtensionDtype): _metadata = ("basis") - def __init__(self, basis) -> None: + def __init__(self, basis: Basis) -> None: self.basis = basis @classmethod - def construct_array_type(cls) -> type: + def construct_array_type(cls) -> Type[FDataBasis]: return FDataBasis def _na_repr(self) -> FDataBasis: @@ -857,3 +878,30 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: return hash(self.basis) + + +class _CoordinateIterator(Sequence[T]): + """Internal class to iterate through the image coordinates. + + Dummy object. Should be change to support multidimensional objects. + + """ + + def __init__(self, fdatabasis: T) -> None: + """Create an iterator through the image coordinates.""" + self._fdatabasis = fdatabasis + + def __iter__(self) -> Iterator[T]: + """Return an iterator through the image coordinates.""" + + for i in range(len(self)): + yield self[i] + + def __getitem__(self, key: Union[int, slice]) -> T: + """Get a specific coordinate.""" + + return self._fdatabasis.basis._coordinate(self._fdatabasis, key) + + def __len__(self) -> int: + """Return the number of coordinates.""" + return self._fdatabasis.dim_codomain From 0928e354f2ad61d55aff579fb4566464066d0707 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 5 Jan 2021 02:33:33 +0100 Subject: [PATCH 014/417] Partial typing of FDataGrid. --- skfda/_utils/__init__.py | 2 +- skfda/_utils/_utils.py | 23 +- .../visualization/representation.py | 14 +- skfda/ml/clustering/_kmeans.py | 11 +- skfda/representation/_functional_data.py | 19 +- skfda/representation/basis/_fdatabasis.py | 46 ++- skfda/representation/grid.py | 376 +++++++++--------- 7 files changed, 260 insertions(+), 231 deletions(-) diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index f988f773f..a8359d1c6 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -14,7 +14,7 @@ _to_array_maybe_ragged, _to_domain_range, _to_grid, - _tuple_of_arrays, + _to_grid_points, check_is_univariate, nquad_vec, ) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index eafd4d972..69ac94a11 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -10,7 +10,12 @@ import scipy.integrate from pandas.api.indexers import check_array_indexer -from ..representation._typing import DomainRange, DomainRangeLike +from ..representation._typing import ( + DomainRange, + DomainRangeLike, + GridPoints, + GridPointsLike, +) from ..representation.evaluator import Evaluator RandomStateLike = Optional[Union[int, np.random.RandomState]] @@ -92,8 +97,8 @@ def _to_grid(X, y, eval_points=None): return X, y -def _tuple_of_arrays(original_array): - """Convert to a list of arrays. +def _to_grid_points(grid_points_like: GridPointsLike) -> GridPoints: + """Convert to grid points. If the original list is one-dimensional (e.g. [1, 2, 3]), return list to array (in this case [array([1, 2, 3])]). @@ -109,19 +114,19 @@ def _tuple_of_arrays(original_array): unidimensional = False try: - iter(original_array) + iter(grid_points_like) except TypeError: - original_array = [original_array] + grid_points_like = [grid_points_like] try: - iter(original_array[0]) + iter(grid_points_like[0]) except TypeError: unidimensional = True if unidimensional: - return (_int_to_real(np.asarray(original_array)),) + return (_int_to_real(np.asarray(grid_points_like)),) else: - return tuple(_int_to_real(np.asarray(i)) for i in original_array) + return tuple(_int_to_real(np.asarray(i)) for i in grid_points_like) def _to_domain_range(sequence: DomainRangeLike) -> DomainRange: @@ -288,7 +293,7 @@ def _one_grid_to_points(axes, *, dim_domain): Returns also the shape containing the information of how each point is formed. """ - axes = _tuple_of_arrays(axes) + axes = _to_grid_points(axes) if len(axes) != dim_domain: raise ValueError(f"Length of axes should be " diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 7ccb90794..f60228835 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -1,12 +1,14 @@ import matplotlib.cm import matplotlib.patches - import numpy as np -from ..._utils import _tuple_of_arrays, constants -from ._utils import (_get_figure_and_axes, _set_figure_layout_for_fdata, - _set_labels) +from ..._utils import _to_domain_range, constants +from ._utils import ( + _get_figure_and_axes, + _set_figure_layout_for_fdata, + _set_labels, +) def _get_label_colors(n_labels, group_colors=None): @@ -149,7 +151,7 @@ def plot_graph(fdata, chart=None, *, fig=None, axes=None, if domain_range is None: domain_range = fdata.domain_range else: - domain_range = _tuple_of_arrays(domain_range) + domain_range = _to_domain_range(domain_range) sample_colors, patches = _get_color_info( fdata, group, group_names, group_colors, legend, kwargs) @@ -283,7 +285,7 @@ def plot_scatter(fdata, chart=None, *, grid_points=None, if domain_range is None: domain_range = fdata.domain_range else: - domain_range = _tuple_of_arrays(domain_range) + domain_range = _to_domain_range(domain_range) sample_colors, patches = _get_color_info( fdata, group, group_names, group_colors, legend, kwargs) diff --git a/skfda/ml/clustering/_kmeans.py b/skfda/ml/clustering/_kmeans.py index 2f9ef8bc2..fecc4e35f 100644 --- a/skfda/ml/clustering/_kmeans.py +++ b/skfda/ml/clustering/_kmeans.py @@ -1,15 +1,14 @@ """K-Means Algorithms Module.""" -from abc import abstractmethod import warnings +from abc import abstractmethod +import numpy as np from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin from sklearn.utils import check_random_state from sklearn.utils.validation import check_is_fitted -import numpy as np - -from ...misc.metrics import pairwise_distance, lp_distance +from ...misc.metrics import lp_distance, pairwise_distance class BaseKMeans(BaseEstimator, ClusterMixin, TransformerMixin): @@ -89,7 +88,7 @@ def _check_clustering(self, fdata): "because the init parameter is set.") if self.init is not None and self.init.data_matrix.shape != ( - self.n_clusters, fdata.ncol, fdata.dim_codomain): + self.n_clusters,) + fdata.data_matrix.shape[1:]: raise ValueError("The init FDataGrid data_matrix should be of " "shape (n_clusters, n_features, dim_codomain) " "and gives the initial centers.") @@ -182,7 +181,7 @@ def _algorithm(self, fdata, random_state): """ repetitions = 0 centroids_old_matrix = np.zeros( - (self.n_clusters, fdata.ncol, fdata.dim_codomain)) + (self.n_clusters,) + fdata.data_matrix.shape[1:]) membership_matrix = self._create_membership(fdata.n_samples) centroids = self._init_centroids(fdata, random_state) diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 415cc1825..b2da39c8e 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -548,7 +548,6 @@ def shift( restrict_domain: bool = False, extrapolation: Optional[Union[str, Evaluator]] = None, eval_points: np.ndarray = None, - **kwargs: Any, ) -> T: """Perform a shift of the curves. @@ -569,7 +568,6 @@ def shift( passed it calls np.linspace with bounds equal to the ones defined in fd.domain_range and the number of points the maximum between 201 and 10 times the number of basis plus 1. - kwargs: Additional arguments. Returns: :class:`FData` with the shifted functional data. @@ -593,12 +591,18 @@ def plot(self, *args: Any, **kwargs: Any) -> Any: return plot_graph(self, *args, **kwargs) @abstractmethod - def copy(self: T, **kwargs: Any) -> T: + def copy( + self: T, + *, + deep: bool = False, # For Pandas compatibility + dataset_name: Optional[str] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + sample_names: Optional[LabelTupleLike] = None, + extrapolation: Optional[Union[str, Evaluator]] = None, + ) -> T: """Make a copy of the object. - Args: - kwargs: named args with attributes to be changed in the new copy. - Returns: A copy of the FData object. @@ -737,7 +741,6 @@ def compose( fd: T, *, eval_points: np.ndarray = None, - **kwargs: Any, ) -> T: """Composition of functions. @@ -748,8 +751,6 @@ def compose( have the same number of samples and image dimension equal to the domain dimension of the object composed. eval_points: Points to perform the evaluation. - kwargs: Named arguments to be passed to the composition method of - the specific functional object. """ pass diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 19cc6c173..7f3cf1285 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -4,6 +4,7 @@ import warnings from builtins import isinstance from typing import ( + TYPE_CHECKING, Any, Generic, Iterator, @@ -24,6 +25,9 @@ from ..evaluator import Evaluator from . import Basis +if TYPE_CHECKING: + from .. import FDataGrid + T = TypeVar('T', bound='FDataBasis') @@ -148,13 +152,13 @@ def from_data( [RS05-5-2-7]_ Args: - data_matrix (array_like): List or matrix containing the + data_matrix: List or matrix containing the observations. If a matrix each row represents a single functional datum and the columns the different observations. - grid_points (array_like): Values of the domain where the previous + grid_points: Values of the domain where the previous data were taken. - basis: (Basis): Basis used. - method (str): Algorithm used for calculating the coefficients using + basis: Basis used. + method: Algorithm used for calculating the coefficients using the least squares method. The values admitted are 'cholesky' and 'qr' for Cholesky and QR factorisation methods respectively. @@ -225,13 +229,7 @@ def coordinates(self: T) -> _CoordinateIterator[T]: :math:`f: \mathbb{R}^n \rightarrow \mathbb{R}^d`, this object allows a component of the vector :math:`f = (f_1, ..., f_d)`. - - Todo: - By the moment, only unidimensional objects are supported in basis - form. - """ - return _CoordinateIterator(self) @property @@ -286,17 +284,17 @@ def shift( r"""Perform a shift of the curves. Args: - shifts (array_like or numeric): List with the the shift + shifts: List with the the shift corresponding for each sample or numeric with the shift to apply to all samples. - restrict_domain (bool, optional): If True restricts the domain to + restrict_domain: If True restricts the domain to avoid evaluate points outside the domain using extrapolation. Defaults uses extrapolation. - extrapolation (str or Extrapolation, optional): Controls the + extrapolation: Controls the extrapolation mode for elements outside the domain range. By default uses the method defined in fd. See extrapolation to more information. - eval_points (array_like, optional): Set of points where + eval_points: Set of points where the functions are evaluated to obtain the discrete representation of the object to operate. If an empty list is passed it calls numpy.linspace with bounds equal to the ones @@ -383,7 +381,7 @@ def sum( """Compute the sum of all the samples in a FDataBasis object. Returns: - :obj:`FDataBasis`: A FDataBais object with just one sample + A FDataBais object with just one sample representing the sum of all the samples in the original FDataBasis object. @@ -420,7 +418,7 @@ def gmean(self: T, eval_points: Optional[np.ndarray] = None) -> T: then the object is taken back to the basis representation. Args: - eval_points (array_like, optional): Set of points where the + eval_points: Set of points where the functions are evaluated to obtain the discrete representation of the object. If none are passed it calls numpy.linspace with bounds equal to the ones defined in @@ -441,7 +439,7 @@ def var(self: T, eval_points: Optional[np.ndarray] = None) -> T: then the object is taken back to the basis representation. Args: - eval_points (array_like, optional): Set of points where the + eval_points: Set of points where the functions are evaluated to obtain the discrete representation of the object. If none are passed it calls numpy.linspace with bounds equal to the ones defined in @@ -449,7 +447,7 @@ def var(self: T, eval_points: Optional[np.ndarray] = None) -> T: between 501 and 10 times the number of basis. Returns: - FDataBasis: Variance of the original object. + Variance of the original object. """ return self.to_grid(eval_points).var().to_basis(self.basis) @@ -461,7 +459,7 @@ def cov(self, eval_points: Optional[np.ndarray] = None): discrete representation and then the covariance matrix is computed. Args: - eval_points (array_like, optional): Set of points where the + eval_points: Set of points where the functions are evaluated to obtain the discrete representation of the object. If none are passed it calls numpy.linspace with bounds equal to the ones defined in @@ -469,12 +467,17 @@ def cov(self, eval_points: Optional[np.ndarray] = None): between 501 and 10 times the number of basis. Returns: - numpy.darray: Matrix of covariances. + Matrix of covariances. """ return self.to_grid(eval_points).cov() - def to_grid(self, grid_points=None, *, sample_points=None): + def to_grid( + self, + grid_points: Optional[GridPointsLike] = None, + *, + sample_points: np.ndarray = None, + ) -> FDataGrid: """Return the discrete representation of the object. Args: @@ -549,6 +552,7 @@ def to_basis( def copy( self: T, *, + deep: bool = False, # For Pandas compatibility basis: Optional[Basis] = None, coefficients: Optional[np.ndarray] = None, dataset_name: Optional[str] = None, diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 4f75d1078..b63f075a1 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -5,30 +5,51 @@ list of discretisation points. """ +from __future__ import annotations import copy import numbers import warnings -from typing import Any +from typing import ( + TYPE_CHECKING, + Any, + Iterator, + Optional, + Sequence, + Type, + TypeVar, + Union, +) import findiff import numpy as np import pandas.api.extensions import scipy.stats.mstats +from matplotlib.figure import Figure from .._utils import ( _check_array_key, _int_to_real, _to_domain_range, - _tuple_of_arrays, + _to_grid_points, constants, ) -from . import basis as fdbasis from ._functional_data import FData +from ._typing import ( + DomainRange, + DomainRangeLike, + GridPoints, + GridPointsLike, + LabelTupleLike, +) +from .basis import Basis +from .evaluator import Evaluator from .interpolation import SplineInterpolation -__author__ = "Miguel Carbajo Berrocal" -__email__ = "miguel.carbajo@estudiante.uam.es" +if TYPE_CHECKING: + from .. import FDataBasis + +T = TypeVar("T", bound='FDataGrid') class FDataGrid(FData): @@ -38,24 +59,24 @@ class FDataGrid(FData): in a grid of points. Attributes: - data_matrix (numpy.ndarray): a matrix where each entry of the first + data_matrix: a matrix where each entry of the first axis contains the values of a functional datum evaluated at the points of discretisation. - grid_points (numpy.ndarray): 2 dimension matrix where each row + grid_points: 2 dimension matrix where each row contains the points of dicretisation for each axis of data_matrix. - domain_range (numpy.ndarray): 2 dimension matrix where each row + domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data is considered to exist for each one of the axies. - dataset_name (str): name of the dataset. - argument_names (tuple): tuple containing the names of the different + dataset_name: name of the dataset. + argument_names: tuple containing the names of the different arguments. - coordinate_names (tuple): tuple containing the names of the different + coordinate_names: tuple containing the names of the different coordinate functions. - extrapolation (str or Extrapolation): defines the default type of + extrapolation: defines the default type of extrapolation. By default None, which does not apply any type of extrapolation. See `Extrapolation` for detailled information of the types of extrapolation. - interpolation (GridInterpolation): Defines the type of interpolation + interpolation: Defines the type of interpolation applied in `evaluate`. Examples: @@ -108,65 +129,39 @@ class FDataGrid(FData): """ - class _CoordinateIterator: - """Internal class to iterate through the image coordinates.""" - - def __init__(self, fdatagrid): - """Create an iterator through the image coordinates.""" - self._fdatagrid = fdatagrid - - def __iter__(self): - """Return an iterator through the image coordinates.""" - - for i in range(len(self)): - yield self[i] - - def __getitem__(self, key): - """Get a specific coordinate.""" - - s_key = key - if isinstance(s_key, int): - s_key = slice(s_key, s_key + 1) - - coordinate_names = np.array( - self._fdatagrid.coordinate_names)[s_key] - - return self._fdatagrid.copy( - data_matrix=self._fdatagrid.data_matrix[..., key], - coordinate_names=coordinate_names) - - def __len__(self): - """Return the number of coordinates.""" - return self._fdatagrid.dim_codomain - - def __init__(self, data_matrix, grid_points=None, - *, - sample_points=None, - domain_range=None, - dataset_label=None, - dataset_name=None, - argument_names=None, - coordinate_names=None, - sample_names=None, - axes_labels=None, extrapolation=None, - interpolation=None): + def __init__( + self, + data_matrix: np.ndarray, + grid_points: Optional[GridPointsLike] = None, + *, + sample_points: Optional[GridPointsLike] = None, + domain_range: Optional[DomainRangeLike] = None, + dataset_label: Optional[str] = None, + dataset_name: Optional[str] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + sample_names: Optional[LabelTupleLike] = None, + axes_labels: Optional[LabelTupleLike] = None, + extrapolation: Optional[Union[str, Evaluator]] = None, + interpolation: Optional[Evaluator] = None + ): """Construct a FDataGrid object. Args: - data_matrix (array_like): a matrix where each row contains the + data_matrix: a matrix where each row contains the values of a functional datum evaluated at the points of discretisation. - grid_points (array_like, optional): an array containing the + grid_points: an array containing the points of discretisation where values have been recorded or a list of lists with each of the list containing the points of dicretisation for each axis. - domain_range (tuple or list of tuples, optional): contains the + domain_range: contains the edges of the interval in which the functional data is considered to exist (if the argument has 2 dimensions each row is interpreted as the limits of one of the dimension of the domain). - dataset_label (str, optional): name of the dataset. - axes_labels (list, optional): list containing the labels of the + dataset_label: name of the dataset. + axes_labels: list containing the labels of the different axes. The length of the list must be equal to the sum of the number of dimensions of the domain plus the number of dimensions of the image. @@ -180,7 +175,7 @@ def __init__(self, data_matrix, grid_points=None, self.data_matrix = _int_to_real(np.atleast_2d(data_matrix)) if grid_points is None: - self.grid_points = _tuple_of_arrays( + self.grid_points = _to_grid_points( [np.linspace(0., 1., self.data_matrix.shape[i]) for i in range(1, self.data_matrix.ndim)]) @@ -188,7 +183,7 @@ def __init__(self, data_matrix, grid_points=None, # Check that the dimension of the data matches the grid_points # list - self.grid_points = _tuple_of_arrays(grid_points) + self.grid_points = _to_grid_points(grid_points) data_shape = self.data_matrix.shape[1: 1 + self.dim_domain] grid_points_shape = [len(i) for i in self.grid_points] @@ -223,7 +218,7 @@ def __init__(self, data_matrix, grid_points=None, if self.data_matrix.ndim == 1 + self.dim_domain: self.data_matrix = self.data_matrix[..., np.newaxis] - self.interpolation = interpolation + self.interpolation = interpolation # type: ignore super().__init__(extrapolation=extrapolation, dataset_label=dataset_label, @@ -233,35 +228,34 @@ def __init__(self, data_matrix, grid_points=None, coordinate_names=coordinate_names, sample_names=sample_names) - def round(self, decimals=0): + def round(self: T, decimals: int = 0) -> T: """Evenly round to the given number of decimals. Args: - decimals (int, optional): Number of decimal places to round to. + decimals: Number of decimal places to round to. If decimals is negative, it specifies the number of positions to the left of the decimal point. Defaults to 0. Returns: - :obj:FDataGrid: Returns a FDataGrid object where all elements - in its data_matrix are rounded .The real and - imaginary parts of complex numbers are rounded separately. + Returns a FDataGrid object where all elements + in its data_matrix are rounded. """ return self.copy(data_matrix=self.data_matrix.round(decimals)) @property - def sample_points(self): + def sample_points(self) -> GridPoints: warnings.warn("Parameter sample_points is deprecated. Use the " "parameter grid_points instead.", DeprecationWarning) return self.grid_points @property - def dim_domain(self): + def dim_domain(self) -> int: return len(self.grid_points) @property - def dim_codomain(self): + def dim_codomain(self) -> int: try: # The dimension of the image is the length of the array that can # be extracted from the data_matrix using all the dimensions of @@ -272,7 +266,7 @@ def dim_codomain(self): return 1 @property - def coordinates(self): + def coordinates(self: T) -> _CoordinateIterator[T]: r"""Returns an object to access to the image coordinates. If the functional object contains multivariate samples @@ -326,10 +320,10 @@ def coordinates(self): """ - return FDataGrid._CoordinateIterator(self) + return _CoordinateIterator(self) @property - def n_samples(self): + def n_samples(self) -> int: """Return number of rows of the data_matrix. Also the number of samples. Returns: @@ -340,19 +334,7 @@ def n_samples(self): return self.data_matrix.shape[0] @property - def ncol(self): - """Return number of columns of the data_matrix. - - Also the number of points of discretisation. - - Returns: - int: Number of columns of the data_matrix. - - """ - return self.data_matrix.shape[1] - - @property - def sample_range(self): + def sample_range(self) -> DomainRange: """Return the edges of the interval in which the functional data is considered to exist by the sample points. @@ -361,7 +343,7 @@ def sample_range(self): return self._sample_range @property - def domain_range(self): + def domain_range(self) -> DomainRange: """Return the edges of the interval in which the functional data is considered to exist by the sample points. @@ -370,24 +352,29 @@ def domain_range(self): return self._domain_range @property - def interpolation(self): + def interpolation(self) -> Evaluator: """Defines the type of interpolation applied in `evaluate`.""" return self._interpolation @interpolation.setter - def interpolation(self, new_interpolation): + def interpolation(self, new_interpolation: Optional[Evaluator]) -> None: """Sets the interpolation of the FDataGrid.""" if new_interpolation is None: new_interpolation = SplineInterpolation() self._interpolation = new_interpolation - def _evaluate(self, eval_points, *, aligned=True): + def _evaluate( + self, + eval_points: np.ndarray, + *, + aligned: bool = True, + ) -> np.ndarray: return self.interpolation.evaluate(self, eval_points, aligned=aligned) - def derivative(self, *, order=1): + def derivative(self: T, *, order: int=1) -> T: r"""Differentiate a FDataGrid object. It is calculated using central finite differences when possible. In @@ -395,7 +382,7 @@ def derivative(self, *, order=1): 2 are used. Args: - order (int, optional): Order of the derivative. Defaults to one. + order: Order of the derivative. Defaults to one. Examples: First order derivative @@ -436,25 +423,31 @@ def derivative(self, *, order=1): zip(self.grid_points, order_list))]) data_matrix = operator(self.data_matrix.astype(float)) - if self.dataset_name: - dataset_name = "{} - {} derivative".format(self.dataset_name, - order) - else: - dataset_name = None + dataset_name = "{} - {} derivative".format( + self.dataset_name, + order + ) if self.dataset_name else None fdatagrid = self.copy(data_matrix=data_matrix, dataset_name=dataset_name) return fdatagrid - def __check_same_dimensions(self, other): + def __check_same_dimensions(self: T, other: T) -> None: if self.data_matrix.shape[1:-1] != other.data_matrix.shape[1:-1]: raise ValueError("Error in columns dimensions") if not np.array_equal(self.grid_points, other.grid_points): raise ValueError("Sample points for both objects must be equal") - def sum(self, *, axis=None, out=None, keepdims=False, skipna=False, - min_count=0): + def sum( + self: T, + *, + axis: Optional[int] = None, + out: None = None, + keepdims: bool = False, + skipna: bool = False, + min_count: int = 0 + ) -> T: """Compute the sum of all the samples. Returns: @@ -487,7 +480,7 @@ def sum(self, *, axis=None, out=None, keepdims=False, skipna=False, return self.copy(data_matrix=data, sample_names=(None,)) - def var(self): + def var(self: T) -> T: """Compute the variance of a set of samples in a FDataGrid object. Returns: @@ -498,7 +491,7 @@ def var(self): return self.copy(data_matrix=[np.var(self.data_matrix, 0)], sample_names=("variance",)) - def cov(self): + def cov(self: T) -> T: """Compute the covariance. Calculates the covariance matrix representing the covariance of the @@ -528,7 +521,7 @@ def cov(self): argument_names=self.argument_names * 2, sample_names=("covariance",)) - def gmean(self): + def gmean(self: T) -> T: """Compute the geometric mean of all samples in the FDataGrid object. Returns: @@ -541,7 +534,7 @@ def gmean(self): scipy.stats.mstats.gmean(self.data_matrix, 0)], sample_names=("geometric mean",)) - def equals(self, other): + def equals(self, other: Any) -> bool: """Comparison of FDataGrid objects""" if not super().equals(other): return False @@ -564,7 +557,7 @@ def equals(self, other): return True - def __eq__(self, other): + def __eq__(self, other: Any) -> np.ndarray: """Elementwise equality of FDataGrid""" if not isinstance(other, type(self)) or self.dtype != other.dtype: @@ -585,7 +578,7 @@ def __eq__(self, other): return np.all(self.data_matrix == other.data_matrix, axis=tuple(range(1, self.data_matrix.ndim))) - def _get_op_matrix(self, other): + def _get_op_matrix(self, other: Union[T, np.ndarray, float]) -> np.ndarray: if isinstance(other, numbers.Number): return other elif isinstance(other, np.ndarray): @@ -606,12 +599,7 @@ def _get_op_matrix(self, other): else: return None - def __add__(self, other): - """Addition for FDataGrid object. - - It supports other FDataGrid objects, numpy.ndarray and numbers. - - """ + def __add__(self: T, other: Union[T, np.ndarray, float]) -> T: data_matrix = self._get_op_matrix(other) if data_matrix is None: @@ -619,84 +607,55 @@ def __add__(self, other): return self._copy_op(other, data_matrix=self.data_matrix + data_matrix) - def __radd__(self, other): - """Addition for FDataGrid object. - - It supports other FDataGrid objects, numpy.ndarray and numbers. - - """ + def __radd__(self: T, other: Union[T, np.ndarray, float]) -> T: return self.__add__(other) - def __sub__(self, other): - """Subtraction for FDataGrid object. - - It supports other FDataGrid objects, numpy.ndarray and numbers. + def __sub__(self: T, other: Union[T, np.ndarray, float]) -> T: - """ data_matrix = self._get_op_matrix(other) if data_matrix is None: return NotImplemented return self._copy_op(other, data_matrix=self.data_matrix - data_matrix) - def __rsub__(self, other): - """Right Subtraction for FDataGrid object. - - It supports other FDataGrid objects, numpy.ndarray and numbers. + def __rsub__(self: T, other: Union[T, np.ndarray, float]) -> T: - """ data_matrix = self._get_op_matrix(other) if data_matrix is None: return NotImplemented return self.copy(data_matrix=data_matrix - self.data_matrix) - def __mul__(self, other): - """Multiplication for FDataGrid object. + def __mul__(self: T, other: Union[T, np.ndarray, float]) -> T: - It supports other FDataGrid objects, numpy.ndarray and numbers. - - """ data_matrix = self._get_op_matrix(other) if data_matrix is None: return NotImplemented return self._copy_op(other, data_matrix=self.data_matrix * data_matrix) - def __rmul__(self, other): - """Multiplication for FDataGrid object. - - It supports other FDataGrid objects, numpy.ndarray and numbers. + def __rmul__(self: T, other: Union[T, np.ndarray, float]) -> T: - """ return self.__mul__(other) - def __truediv__(self, other): - """Division for FDataGrid object. - - It supports other FDataGrid objects, numpy.ndarray and numbers. + def __truediv__(self: T, other: Union[T, np.ndarray, float]) -> T: - """ data_matrix = self._get_op_matrix(other) if data_matrix is None: return NotImplemented return self._copy_op(other, data_matrix=self.data_matrix / data_matrix) - def __rtruediv__(self, other): - """Division for FDataGrid object. + def __rtruediv__(self: T, other: Union[T, np.ndarray, float]) -> T: - It supports other FDataGrid objects, numpy.ndarray and numbers. - - """ data_matrix = self._get_op_matrix(other) if data_matrix is None: return NotImplemented return self._copy_op(other, data_matrix=data_matrix / self.data_matrix) - def concatenate(self, *others, as_coordinates=False): + def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: """Join samples from a similar FDataGrid object. Joins samples from another FDataGrid object if it has the same @@ -765,7 +724,7 @@ def concatenate(self, *others, as_coordinates=False): return self.copy(data_matrix=np.concatenate(data, axis=0), sample_names=sum(sample_names, ())) - def scatter(self, *args, **kwargs): + def scatter(self, *args: Any, **kwargs: Any) -> Figure: """Scatter plot of the FDatGrid object. Args: @@ -792,7 +751,7 @@ def scatter(self, *args, **kwargs): return plot_scatter(self, *args, **kwargs) - def to_basis(self, basis, **kwargs): + def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: """Return the basis representation of the object. Args: @@ -844,7 +803,12 @@ def to_basis(self, basis, **kwargs): return smoother.fit_transform(self) - def to_grid(self, grid_points=None, *, sample_points=None): + def to_grid( + self: T, + grid_points: Optional[GridPointsLike] = None, + *, + sample_points: Optional[GridPointsLike] = None + ) -> T: if sample_points is not None: warnings.warn("Parameter sample_points is deprecated. Use the " @@ -859,19 +823,20 @@ def to_grid(self, grid_points=None, *, sample_points=None): grid_points=grid_points) def copy( - self, *, - deep=False, # For Pandas compatibility - data_matrix=None, - grid_points=None, - sample_points=None, - domain_range=None, - dataset_name=None, - argument_names=None, - coordinate_names=None, - sample_names=None, - extrapolation=None, - interpolation=None, - ): + self: T, + *, + deep: bool = False, # For Pandas compatibility + data_matrix: Optional[np.ndarray] = None, + grid_points: Optional[GridPointsLike] = None, + sample_points: Optional[GridPointsLike] = None, + domain_range: Optional[DomainRangeLike] = None, + dataset_name: Optional[str] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + sample_names: Optional[LabelTupleLike] = None, + extrapolation: Optional[Union[str, Evaluator]] = None, + interpolation: Optional[Evaluator] = None, + ) -> T: """Returns a copy of the FDataGrid. If an argument is provided the corresponding attribute in the new copy @@ -929,8 +894,14 @@ def copy( interpolation=interpolation, ) - def shift(self, shifts, *, restrict_domain=False, extrapolation=None, - eval_points=None): + def shift( + self: T, + shifts: Union[np.ndarray, float], + *, + restrict_domain: bool = False, + extrapolation: Optional[Union[str, Evaluator]] = None, + eval_points: np.ndarray = None, + ) -> T: """Perform a shift of the curves. Args: @@ -1021,7 +992,12 @@ def shift(self, shifts, *, restrict_domain=False, extrapolation=None, return self.copy(data_matrix=data_matrix, grid_points=eval_points, domain_range=domain) - def compose(self, fd, *, eval_points=None): + def compose( + self: T, + fd: T, + *, + eval_points: np.ndarray = None + ) -> T: """Composition of functions. Performs the composition of functions. @@ -1078,13 +1054,13 @@ def compose(self, fd, *, eval_points=None): domain_range=fd.domain_range, argument_names=fd.argument_names) - def __str__(self): + def __str__(self) -> str: """Return str(self).""" return ('Data set: ' + str(self.data_matrix) + '\ngrid_points: ' + str(self.grid_points) + '\ntime range: ' + str(self.domain_range)) - def __repr__(self): + def __repr__(self) -> str: """Return repr(self).""" return (f"FDataGrid(" @@ -1098,7 +1074,7 @@ def __repr__(self): f"\ninterpolation={repr(self.interpolation)})").replace( '\n', '\n ') - def __getitem__(self, key): + def __getitem__(self: T, key: Union[int, slice]) -> T: """Return self[key].""" key = _check_array_key(self.data_matrix, key) @@ -1110,7 +1086,13 @@ def __getitem__(self, key): # Numpy methods ##################################################################### - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + def __array_ufunc__( + self, + ufunc: Any, + method: str, + *inputs: Any, + **kwargs: Any + ) -> Any: for i in inputs: if isinstance(i, FDataGrid) and not np.array_equal( @@ -1147,7 +1129,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # Pandas ExtensionArray methods ##################################################################### @property - def dtype(self): + def dtype(self) -> FDataGridDType: """The dtype for this extension array, FDataGridDType""" return FDataGridDType( grid_points=self.grid_points, @@ -1162,7 +1144,7 @@ def nbytes(self) -> int: return self.data_matrix.nbytes + sum( p.nbytes for p in self.grid_points) - def isna(self): + def isna(self) -> np.ndarray: """ A 1-D array indicating if each value is missing. @@ -1173,7 +1155,7 @@ def isna(self): axis=tuple(range(1, self.data_matrix.ndim))) -class FDataGridDType(pandas.api.extensions.ExtensionDtype): +class FDataGridDType(pandas.api.extensions.ExtensionDtype): # type: ignore """ DType corresponding to FDataGrid in Pandas """ @@ -1182,8 +1164,13 @@ class FDataGridDType(pandas.api.extensions.ExtensionDtype): type = FDataGrid na_value = pandas.NA - def __init__(self, grid_points, dim_codomain, domain_range=None) -> None: - grid_points = _tuple_of_arrays(grid_points) + def __init__( + self, + grid_points: GridPointsLike, + dim_codomain: int, + domain_range: Optional[DomainRangeLike] = None + ) -> None: + grid_points = _to_grid_points(grid_points) self.grid_points = tuple(tuple(s) for s in grid_points) @@ -1195,7 +1182,7 @@ def __init__(self, grid_points, dim_codomain, domain_range=None) -> None: self.dim_codomain = dim_codomain @classmethod - def construct_array_type(cls): + def construct_array_type(cls) -> Type[FDataGrid]: return FDataGrid def _na_repr(self) -> FDataGrid: @@ -1232,3 +1219,34 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: return hash((self.grid_points, self.domain_range, self.dim_codomain)) + + +class _CoordinateIterator(Sequence[T]): + """Internal class to iterate through the image coordinates.""" + + def __init__(self, fdatagrid: T) -> None: + """Create an iterator through the image coordinates.""" + self._fdatagrid = fdatagrid + + def __iter__(self) -> Iterator[T]: + """Return an iterator through the image coordinates.""" + for i in range(len(self)): + yield self[i] + + def __getitem__(self, key: Union[int, slice]) -> T: + """Get a specific coordinate.""" + + s_key = key + if isinstance(s_key, int): + s_key = slice(s_key, s_key + 1) + + coordinate_names = np.array( + self._fdatagrid.coordinate_names)[s_key] + + return self._fdatagrid.copy( + data_matrix=self._fdatagrid.data_matrix[..., key], + coordinate_names=coordinate_names) + + def __len__(self) -> int: + """Return the number of coordinates.""" + return self._fdatagrid.dim_codomain From cfa149ba8c11cb39ddc59f3a50f8bbad81aa7e6b Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 5 Jan 2021 18:23:52 +0100 Subject: [PATCH 015/417] Some style changes for grid. --- setup.cfg | 4 +- skfda/representation/grid.py | 699 ++++++++++++++++++++--------------- 2 files changed, 400 insertions(+), 303 deletions(-) diff --git a/setup.cfg b/setup.cfg index a7d21a3f8..dd1df4f64 100644 --- a/setup.cfg +++ b/setup.cfg @@ -64,6 +64,8 @@ ignore = WPS450, # Explicit len compare is better than implicit WPS507, + # Comparison with not is not the same as with equality + WPS520, per-file-ignores = __init__.py: @@ -89,7 +91,7 @@ rst-directives = rst-roles = attr,class,func,meth,mod,obj,ref,term, -allowed-domain-names = data, obj, result, val, value, values, var +allowed-domain-names = data, obj, result, results, val, value, values, var # Needs to be tuned max-imports = 20 diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index b63f075a1..8c3cdbe8d 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -10,16 +10,7 @@ import copy import numbers import warnings -from typing import ( - TYPE_CHECKING, - Any, - Iterator, - Optional, - Sequence, - Type, - TypeVar, - Union, -) +from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, TypeVar, Union import findiff import numpy as np @@ -143,41 +134,24 @@ def __init__( sample_names: Optional[LabelTupleLike] = None, axes_labels: Optional[LabelTupleLike] = None, extrapolation: Optional[Union[str, Evaluator]] = None, - interpolation: Optional[Evaluator] = None + interpolation: Optional[Evaluator] = None, ): - """Construct a FDataGrid object. - - Args: - data_matrix: a matrix where each row contains the - values of a functional datum evaluated at the - points of discretisation. - grid_points: an array containing the - points of discretisation where values have been recorded or a - list of lists with each of the list containing the points of - dicretisation for each axis. - domain_range: contains the - edges of the interval in which the functional data is - considered to exist (if the argument has 2 dimensions each - row is interpreted as the limits of one of the dimension of - the domain). - dataset_label: name of the dataset. - axes_labels: list containing the labels of the - different axes. The length of the list must be equal to the sum - of the number of dimensions of the domain plus the number of - dimensions of the image. - """ + """Construct a FDataGrid object.""" if sample_points is not None: - warnings.warn("Parameter sample_points is deprecated. Use the " - "parameter grid_points instead.", - DeprecationWarning) + warnings.warn( + "Parameter sample_points is deprecated. Use the " + "parameter grid_points instead.", + DeprecationWarning, + ) grid_points = sample_points self.data_matrix = _int_to_real(np.atleast_2d(data_matrix)) if grid_points is None: - self.grid_points = _to_grid_points( - [np.linspace(0., 1., self.data_matrix.shape[i]) for i in - range(1, self.data_matrix.ndim)]) + self.grid_points = _to_grid_points([ + np.linspace(0.0, 1.0, self.data_matrix.shape[i]) + for i in range(1, self.data_matrix.ndim) + ]) else: # Check that the dimension of the data matches the grid_points @@ -189,13 +163,15 @@ def __init__( grid_points_shape = [len(i) for i in self.grid_points] if not np.array_equal(data_shape, grid_points_shape): - raise ValueError("Incorrect dimension in data_matrix and " - "grid_points. Data has shape {} and grid " - "points have shape {}" - .format(data_shape, grid_points_shape)) + raise ValueError( + f"Incorrect dimension in data_matrix and " + f"grid_points. Data has shape {data_shape} and grid " + f"points have shape {grid_points_shape}", + ) self._sample_range = np.array( - [(s[0], s[-1]) for s in self.grid_points]) + [(s[0], s[-1]) for s in self.grid_points], + ) if domain_range is None: domain_range = self.sample_range @@ -208,11 +184,13 @@ def __init__( raise ValueError("Incorrect shape of domain_range.") for i in range(self.dim_domain): - if (self._domain_range[i][0] > self.grid_points[i][0] - or self._domain_range[i][-1] < self.grid_points[i] - [-1]): - raise ValueError("Sample points must be within the domain " - "range.") + if ( + self._domain_range[i][0] > self.grid_points[i][0] + or self._domain_range[i][-1] < self.grid_points[i][-1] + ): + raise ValueError( + "Sample points must be within the domain range.", + ) # Adjust the data matrix if the dimension of the image is one if self.data_matrix.ndim == 1 + self.dim_domain: @@ -220,15 +198,17 @@ def __init__( self.interpolation = interpolation # type: ignore - super().__init__(extrapolation=extrapolation, - dataset_label=dataset_label, - dataset_name=dataset_name, - axes_labels=axes_labels, - argument_names=argument_names, - coordinate_names=coordinate_names, - sample_names=sample_names) + super().__init__( + extrapolation=extrapolation, + dataset_label=dataset_label, + dataset_name=dataset_name, + axes_labels=axes_labels, + argument_names=argument_names, + coordinate_names=coordinate_names, + sample_names=sample_names, + ) - def round(self: T, decimals: int = 0) -> T: + def round(self: T, decimals: int = 0) -> T: # noqa: WPS125 """Evenly round to the given number of decimals. Args: @@ -245,9 +225,11 @@ def round(self: T, decimals: int = 0) -> T: @property def sample_points(self) -> GridPoints: - warnings.warn("Parameter sample_points is deprecated. Use the " - "parameter grid_points instead.", - DeprecationWarning) + warnings.warn( + "Parameter sample_points is deprecated. Use the " + "parameter grid_points instead.", + DeprecationWarning, + ) return self.grid_points @property @@ -324,41 +306,47 @@ def coordinates(self: T) -> _CoordinateIterator[T]: @property def n_samples(self) -> int: - """Return number of rows of the data_matrix. Also the number of samples. + """ + Return the number of samples. + + This is also the number of rows of the data_matrix. Returns: - int: Number of samples of the FDataGrid object. Also the number of - rows of the data_matrix. + Number of samples of the FDataGrid object. """ return self.data_matrix.shape[0] @property def sample_range(self) -> DomainRange: - """Return the edges of the interval in which the functional data is - considered to exist by the sample points. + """ + Return the sample range of the function. + + This contains the minimum and maximum values of the grid points in + each dimension. - Do not have to be equal to the domain_range. + It does not have to be equal to the `domain_range`. """ return self._sample_range @property def domain_range(self) -> DomainRange: - """Return the edges of the interval in which the functional data is - considered to exist by the sample points. + """ + Return the :term:`domain range` of the function. + + It does not have to be equal to the `sample_range`. - Do not have to be equal to the sample_range. """ return self._domain_range @property def interpolation(self) -> Evaluator: - """Defines the type of interpolation applied in `evaluate`.""" + """Define the type of interpolation applied in `evaluate`.""" return self._interpolation @interpolation.setter def interpolation(self, new_interpolation: Optional[Evaluator]) -> None: - """Sets the interpolation of the FDataGrid.""" + if new_interpolation is None: new_interpolation = SplineInterpolation() @@ -371,10 +359,13 @@ def _evaluate( aligned: bool = True, ) -> np.ndarray: - return self.interpolation.evaluate(self, eval_points, - aligned=aligned) + return self.interpolation.evaluate( + self, + eval_points, + aligned=aligned, + ) - def derivative(self: T, *, order: int=1) -> T: + def derivative(self: T, *, order: int = 1) -> T: r"""Differentiate a FDataGrid object. It is calculated using central finite differences when possible. In @@ -384,6 +375,9 @@ def derivative(self: T, *, order: int=1) -> T: Args: order: Order of the derivative. Defaults to one. + Returns: + Derivative function. + Examples: First order derivative @@ -418,44 +412,55 @@ def derivative(self: T, *, order: int=1) -> T: if order_list.ndim != 1 or len(order_list) != self.dim_domain: raise ValueError("The order for each partial should be specified.") - operator = findiff.FinDiff(*[(1 + i, p, o) - for i, (p, o) in enumerate( - zip(self.grid_points, order_list))]) + operator = findiff.FinDiff(*[ + (1 + i, *z) + for i, z in enumerate( + zip(self.grid_points, order_list), + ) + ]) data_matrix = operator(self.data_matrix.astype(float)) - dataset_name = "{} - {} derivative".format( - self.dataset_name, - order - ) if self.dataset_name else None - - fdatagrid = self.copy(data_matrix=data_matrix, - dataset_name=dataset_name) + dataset_name = ( + f"{self.dataset_name} - {order} derivative" + if self.dataset_name else None + ) - return fdatagrid + return self.copy( + data_matrix=data_matrix, + dataset_name=dataset_name, + ) - def __check_same_dimensions(self: T, other: T) -> None: + def _check_same_dimensions(self: T, other: T) -> None: if self.data_matrix.shape[1:-1] != other.data_matrix.shape[1:-1]: raise ValueError("Error in columns dimensions") if not np.array_equal(self.grid_points, other.grid_points): raise ValueError("Sample points for both objects must be equal") - def sum( + def sum( # noqa: WPS125 self: T, *, axis: Optional[int] = None, out: None = None, keepdims: bool = False, skipna: bool = False, - min_count: int = 0 + min_count: int = 0, ) -> T: """Compute the sum of all the samples. + Args: + axis: Used for compatibility with numpy. Must be None or 0. + out: Used for compatibility with numpy. Must be None. + keepdims: Used for compatibility with numpy. Must be False. + skipna: Wether the NaNs are ignored or not. + min_count: Number of valid (non NaN) data to have in order + for the a variable to not be NaN when `skipna` is + `True`. + Returns: - FDataGrid : A FDataGrid object with just one sample representing + A FDataGrid object with just one sample representing the sum of all the samples in the original object. Examples: - >>> from skfda import FDataGrid >>> data_matrix = [[0.5, 1, 2, .5], [1.5, 1, 4, .5]] >>> FDataGrid(data_matrix).sum() @@ -469,27 +474,33 @@ def sum( """ super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) - data = (np.nansum(self.data_matrix, axis=0, keepdims=True) if skipna - else np.sum(self.data_matrix, axis=0, keepdims=True)) + data = ( + np.nansum(self.data_matrix, axis=0, keepdims=True) if skipna + else np.sum(self.data_matrix, axis=0, keepdims=True) + ) if min_count > 0: valid = ~np.isnan(self.data_matrix) n_valid = np.sum(valid, axis=0) data[n_valid < min_count] = np.NaN - return self.copy(data_matrix=data, - sample_names=(None,)) + return self.copy( + data_matrix=data, + sample_names=(None,), + ) def var(self: T) -> T: """Compute the variance of a set of samples in a FDataGrid object. Returns: - FDataGrid: A FDataGrid object with just one sample representing the + A FDataGrid object with just one sample representing the variance of all the samples in the original FDataGrid object. """ - return self.copy(data_matrix=[np.var(self.data_matrix, 0)], - sample_names=("variance",)) + return self.copy( + data_matrix=[np.var(self.data_matrix, 0)], + sample_names=("variance",), + ) def cov(self: T) -> T: """Compute the covariance. @@ -498,44 +509,56 @@ def cov(self: T) -> T: functional samples at the observation points. Returns: - numpy.darray: Matrix of covariances. + Covariance function. """ - - if self.dataset_name is not None: - dataset_name = self.dataset_name + ' - covariance' - else: - dataset_name = None + dataset_name = ( + f"{self.dataset_name} - covariance" + if self.dataset_name is not None else None + ) if self.dim_domain != 1 or self.dim_codomain != 1: - raise NotImplementedError("Covariance only implemented " - "for univariate functions") - - return self.copy(data_matrix=np.cov(self.data_matrix[..., 0], - rowvar=False)[np.newaxis, ...], - grid_points=[self.grid_points[0], - self.grid_points[0]], - domain_range=[self.domain_range[0], - self.domain_range[0]], - dataset_name=dataset_name, - argument_names=self.argument_names * 2, - sample_names=("covariance",)) + raise NotImplementedError( + "Covariance only implemented " + "for univariate functions", + ) + + return self.copy( + data_matrix=np.cov( + self.data_matrix[..., 0], + rowvar=False, + )[np.newaxis, ...], + grid_points=[ + self.grid_points[0], + self.grid_points[0], + ], + domain_range=[ + self.domain_range[0], + self.domain_range[0], + ], + dataset_name=dataset_name, + argument_names=self.argument_names * 2, + sample_names=("covariance",), + ) def gmean(self: T) -> T: """Compute the geometric mean of all samples in the FDataGrid object. Returns: - FDataGrid: A FDataGrid object with just one sample representing + A FDataGrid object with just one sample representing the geometric mean of all the samples in the original FDataGrid object. """ - return self.copy(data_matrix=[ - scipy.stats.mstats.gmean(self.data_matrix, 0)], - sample_names=("geometric mean",)) + return self.copy( + data_matrix=[ + scipy.stats.mstats.gmean(self.data_matrix, 0), + ], + sample_names=("geometric mean",), + ) def equals(self, other: Any) -> bool: - """Comparison of FDataGrid objects""" + """Comparison of FDataGrid objects.""" if not super().equals(other): return False @@ -558,8 +581,7 @@ def equals(self, other: Any) -> bool: return True def __eq__(self, other: Any) -> np.ndarray: - """Elementwise equality of FDataGrid""" - + """Elementwise equality of FDataGrid.""" if not isinstance(other, type(self)) or self.dtype != other.dtype: if other is pandas.NA: return self.isna() @@ -567,37 +589,46 @@ def __eq__(self, other: Any) -> np.ndarray: other, (pandas.Series, pandas.Index, pandas.DataFrame), ): return np.concatenate([x == y for x, y in zip(self, other)]) - else: - return NotImplemented - if len(self) != len(other) and len(self) != 1 and len(other) != 1: - raise ValueError(f"Different lengths: " - f"len(self)={len(self)} and " - f"len(other)={len(other)}") + return NotImplemented - return np.all(self.data_matrix == other.data_matrix, - axis=tuple(range(1, self.data_matrix.ndim))) + if len(self) != len(other) and len(self) != 1 and len(other) != 1: + raise ValueError( + f"Different lengths: " + f"len(self)={len(self)} and " + f"len(other)={len(other)}", + ) + + return np.all( + self.data_matrix == other.data_matrix, + axis=tuple(range(1, self.data_matrix.ndim)), + ) - def _get_op_matrix(self, other: Union[T, np.ndarray, float]) -> np.ndarray: + def _get_op_matrix( + self, + other: Union[T, np.ndarray, float], + ) -> Optional[np.ndarray]: if isinstance(other, numbers.Number): return other elif isinstance(other, np.ndarray): - if other.shape == () or other.shape == (1,): + if other.shape in {(), (1,)}: return other elif other.shape == (self.n_samples,): - other_index = ((slice(None),) + (np.newaxis,) * - (self.data_matrix.ndim - 1)) + other_index = ( + (slice(None),) + (np.newaxis,) + * (self.data_matrix.ndim - 1) + ) return other[other_index] - else: - return None + + return None elif isinstance(other, FDataGrid): - self.__check_same_dimensions(other) + self._check_same_dimensions(other) return other.data_matrix - else: - return None + + return None def __add__(self: T, other: Union[T, np.ndarray, float]) -> T: @@ -662,14 +693,13 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: dimensions and sampling points. Args: - others (:obj:`FDataGrid`): Objects to be concatenated. - as_coordinates (boolean, optional): If False concatenates as + others: Objects to be concatenated. + as_coordinates: If False concatenates as new samples, else, concatenates the other functions as new components of the image. Defaults to false. Returns: - :obj:`FDataGrid`: FDataGrid object with the samples from the - original objects. + FDataGrid object with the samples from the original objects. Examples: >>> fd = FDataGrid([1,2,4,5,8], range(5)) @@ -695,55 +725,54 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: # Checks if not as_coordinates: for other in others: - self.__check_same_dimensions(other) + self._check_same_dimensions(other) - elif not all([np.array_equal(self.grid_points, other.grid_points) - for other in others]): - raise ValueError("All the FDataGrids must be sampled in the same " - "sample points.") + elif not all( + np.array_equal(self.grid_points, other.grid_points) + for other in others + ): + raise ValueError( + "All the FDataGrids must be sampled in the same " + "sample points.", + ) - elif any([self.n_samples != other.n_samples for other in others]): + elif any(self.n_samples != other.n_samples for other in others): - raise ValueError(f"All the FDataGrids must contain the same " - f"number of samples {self.n_samples} to " - f"concatenate as a new coordinate.") + raise ValueError( + f"All the FDataGrids must contain the same " + f"number of samples {self.n_samples} to " + f"concatenate as a new coordinate.", + ) data = [self.data_matrix] + [other.data_matrix for other in others] if as_coordinates: - coordinate_names = [fd.coordinate_names for fd in [self, *others]] + coordinate_names = [fd.coordinate_names for fd in (self, *others)] - return self.copy(data_matrix=np.concatenate(data, axis=-1), - coordinate_names=sum(coordinate_names, ())) - - else: + return self.copy( + data_matrix=np.concatenate(data, axis=-1), + coordinate_names=sum(coordinate_names, ()), + ) - sample_names = [fd.sample_names for fd in [self, *others]] + sample_names = [fd.sample_names for fd in (self, *others)] - return self.copy(data_matrix=np.concatenate(data, axis=0), - sample_names=sum(sample_names, ())) + return self.copy( + data_matrix=np.concatenate(data, axis=0), + sample_names=sum(sample_names, ()), + ) def scatter(self, *args: Any, **kwargs: Any) -> Figure: """Scatter plot of the FDatGrid object. Args: - fig (figure object, optional): figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs - are plotted. If None, see param fig. - n_rows(int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols(int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. + args: positional arguments to be passed to the + matplotlib.pyplot.scatter function. kwargs: keyword arguments to be passed to the - matplotlib.pyplot.scatter function; + matplotlib.pyplot.scatter function. Returns: - fig (figure): figure object in which the graphs are plotted. + Figure object in which the graphs are plotted. """ @@ -757,7 +786,7 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: Args: basis(Basis): basis object in which the functional data are going to be represented. - **kwargs: keyword arguments to be passed to + kwargs: keyword arguments to be passed to FDataBasis.from_data(). Returns: @@ -782,15 +811,19 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: from ..preprocessing.smoothing import BasisSmoother if self.dim_domain != basis.dim_domain: - raise ValueError(f"The domain of the function has " - f"dimension {self.dim_domain} " - f"but the domain of the basis has " - f"dimension {basis.dim_domain}") + raise ValueError( + f"The domain of the function has " + f"dimension {self.dim_domain} " + f"but the domain of the basis has " + f"dimension {basis.dim_domain}", + ) elif self.dim_codomain != basis.dim_codomain: - raise ValueError(f"The codomain of the function has " - f"dimension {self.dim_codomain} " - f"but the codomain of the basis has " - f"dimension {basis.dim_codomain}") + raise ValueError( + f"The codomain of the function has " + f"dimension {self.dim_codomain} " + f"but the codomain of the basis has " + f"dimension {basis.dim_codomain}", + ) # Readjust the domain range if there was not an explicit one if basis._domain_range is None: @@ -799,30 +832,35 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: smoother = BasisSmoother( basis=basis, **kwargs, - return_basis=True) + return_basis=True, + ) return smoother.fit_transform(self) - def to_grid( + def to_grid( # noqa: D102 self: T, grid_points: Optional[GridPointsLike] = None, *, - sample_points: Optional[GridPointsLike] = None + sample_points: Optional[GridPointsLike] = None, ) -> T: if sample_points is not None: - warnings.warn("Parameter sample_points is deprecated. Use the " - "parameter grid_points instead.", - DeprecationWarning) + warnings.warn( + "Parameter sample_points is deprecated. Use the " + "parameter grid_points instead.", + DeprecationWarning, + ) grid_points = sample_points if grid_points is None: grid_points = self.grid_points - return self.copy(data_matrix=self.evaluate(grid_points, grid=True), - grid_points=grid_points) + return self.copy( + data_matrix=self.evaluate(grid_points, grid=True), + grid_points=grid_points, + ) - def copy( + def copy( # noqa: WPS211 self: T, *, deep: bool = False, # For Pandas compatibility @@ -837,17 +875,19 @@ def copy( extrapolation: Optional[Union[str, Evaluator]] = None, interpolation: Optional[Evaluator] = None, ) -> T: - """Returns a copy of the FDataGrid. + """ + Return a copy of the FDataGrid. If an argument is provided the corresponding attribute in the new copy is updated. """ - if sample_points is not None: - warnings.warn("Parameter sample_points is deprecated. Use the " - "parameter grid_points instead.", - DeprecationWarning) + warnings.warn( + "Parameter sample_points is deprecated. Use the " + "parameter grid_points instead.", + DeprecationWarning, + ) grid_points = sample_points if data_matrix is None: @@ -905,17 +945,17 @@ def shift( """Perform a shift of the curves. Args: - shifts (array_like or numeric): List with the shifts + shifts: List with the shifts corresponding for each sample or numeric with the shift to apply to all samples. - restrict_domain (bool, optional): If True restricts the domain to + restrict_domain: If True restricts the domain to avoid evaluate points outside the domain using extrapolation. Defaults uses extrapolation. - extrapolation (str or Extrapolation, optional): Controls the + extrapolation: Controls the extrapolation mode for elements outside the domain range. By default uses the method defined in fd. See extrapolation to more information. - eval_points (array_like, optional): Set of points where + eval_points: Set of points where the functions are evaluated to obtain the discrete representation of the object to operate. If an empty list the current grid_points are used to unificate the domain of the @@ -923,8 +963,8 @@ def shift( Returns: :class:`FDataGrid` with the shifted data. - """ + """ if np.isscalar(shifts): shifts = [shifts] @@ -943,12 +983,17 @@ def shift( grid_points = self.grid_points + shifts domain_range = self.domain_range + shifts - return self.copy(grid_points=grid_points, - domain_range=domain_range) + return self.copy( + grid_points=grid_points, + domain_range=domain_range, + ) + if shifts.shape[0] != self.n_samples: - raise ValueError(f"shifts vector ({shifts.shape[0]}) must have the" - f" same length than the number of samples " - f"({self.n_samples})") + raise ValueError( + f"shifts vector ({shifts.shape[0]}) must have the" + f" same length than the number of samples " + f"({self.n_samples})", + ) if eval_points is None: eval_points = self.grid_points @@ -962,73 +1007,100 @@ def shift( domain = np.vstack((a, b)).T - eval_points = [eval_points[i][ - np.logical_and(eval_points[i] >= domain[i, 0], - eval_points[i] <= domain[i, 1])] - for i in range(self.dim_domain)] + eval_points = [ + eval_points[i][ + np.logical_and( + eval_points[i] >= domain[i, 0], + eval_points[i] <= domain[i, 1], + )] + for i in range(self.dim_domain) + ] else: domain = self.domain_range eval_points = np.asarray(eval_points) - eval_points_repeat = np.repeat(eval_points[np.newaxis, :], - self.n_samples, axis=0) + eval_points_repeat = np.repeat( + eval_points[np.newaxis, :], + self.n_samples, + axis=0, + ) # Solve problem with cartesian and matrix indexing if self.dim_domain > 1: shifts[:, :2] = np.flip(shifts[:, :2], axis=1) - shifts = np.repeat(shifts[..., np.newaxis], - eval_points.shape[1], axis=2) + shifts = np.repeat( + shifts[..., np.newaxis], + eval_points.shape[1], + axis=2, + ) eval_points_shifted = eval_points_repeat + shifts - data_matrix = self.evaluate(eval_points_shifted, - extrapolation=extrapolation, - aligned=False, - grid=True) + data_matrix = self.evaluate( + eval_points_shifted, + extrapolation=extrapolation, + aligned=False, + grid=True, + ) - return self.copy(data_matrix=data_matrix, grid_points=eval_points, - domain_range=domain) + return self.copy( + data_matrix=data_matrix, + grid_points=eval_points, + domain_range=domain, + ) def compose( self: T, fd: T, *, - eval_points: np.ndarray = None + eval_points: np.ndarray = None, ) -> T: """Composition of functions. Performs the composition of functions. Args: - fd (:class:`FData`): FData object to make the composition. Should + fd: FData object to make the composition. Should have the same number of samples and image dimension equal to 1. - eval_points (array_like): Points to perform the evaluation. - """ + eval_points: Points to perform the evaluation. + + Returns: + Function representing the composition. + """ if self.dim_domain != fd.dim_codomain: - raise ValueError(f"Dimension of codomain of first function do not " - f"match with the domain of the second function " - f"({self.dim_domain})!=({fd.dim_codomain}).") + raise ValueError( + f"Dimension of codomain of first function do not " + f"match with the domain of the second function " + f"{self.dim_domain} != {fd.dim_codomain}.", + ) # All composed with same function if fd.n_samples == 1 and self.n_samples != 1: - fd = fd.copy(data_matrix=np.repeat(fd.data_matrix, self.n_samples, - axis=0)) + fd = fd.copy(data_matrix=np.repeat( + fd.data_matrix, + self.n_samples, + axis=0, + )) if fd.dim_domain == 1: if eval_points is None: try: eval_points = fd.grid_points[0] except AttributeError: - eval_points = np.linspace(*fd.domain_range[0], - constants.N_POINTS_COARSE_MESH) + eval_points = np.linspace( + *fd.domain_range[0], + constants.N_POINTS_COARSE_MESH, + ) eval_points_transformation = fd(eval_points) - data_matrix = self(eval_points_transformation, - aligned=False) + data_matrix = self( + eval_points_transformation, + aligned=False, + ) else: if eval_points is None: eval_points = fd.grid_points @@ -1037,50 +1109,62 @@ def compose( lengths = [len(ax) for ax in eval_points] - eval_points_transformation = np.empty((self.n_samples, - np.prod(lengths), - self.dim_domain)) + eval_points_transformation = np.empty(( + self.n_samples, + np.prod(lengths), + self.dim_domain, + )) for i in range(self.n_samples): eval_points_transformation[i] = np.array( - list(map(np.ravel, grid_transformation[i].T)) + list(map(np.ravel, grid_transformation[i].T)), ).T - data_matrix = self(eval_points_transformation, - aligned=False) + data_matrix = self( + eval_points_transformation, + aligned=False, + ) - return self.copy(data_matrix=data_matrix, - grid_points=eval_points, - domain_range=fd.domain_range, - argument_names=fd.argument_names) + return self.copy( + data_matrix=data_matrix, + grid_points=eval_points, + domain_range=fd.domain_range, + argument_names=fd.argument_names, + ) def __str__(self) -> str: """Return str(self).""" - return ('Data set: ' + str(self.data_matrix) - + '\ngrid_points: ' + str(self.grid_points) - + '\ntime range: ' + str(self.domain_range)) + return ( + f"Data set: {self.data_matrix}\n" + f"grid_points: {self.grid_points}\n" + f"time range: {self.domain_range}" + ) def __repr__(self) -> str: """Return repr(self).""" - - return (f"FDataGrid(" - f"\n{repr(self.data_matrix)}," - f"\ngrid_points={repr(self.grid_points)}," - f"\ndomain_range={repr(self.domain_range)}," - f"\ndataset_name={repr(self.dataset_name)}," - f"\nargument_names={repr(self.argument_names)}," - f"\ncoordinate_names={repr(self.coordinate_names)}," - f"\nextrapolation={repr(self.extrapolation)}," - f"\ninterpolation={repr(self.interpolation)})").replace( - '\n', '\n ') + return ( + f"FDataGrid(" + f"\n{repr(self.data_matrix)}," + f"\ngrid_points={repr(self.grid_points)}," + f"\ndomain_range={repr(self.domain_range)}," + f"\ndataset_name={repr(self.dataset_name)}," + f"\nargument_names={repr(self.argument_names)}," + f"\ncoordinate_names={repr(self.coordinate_names)}," + f"\nextrapolation={repr(self.extrapolation)}," + f"\ninterpolation={repr(self.interpolation)})" + ).replace( + '\n', + '\n ', + ) def __getitem__(self: T, key: Union[int, slice]) -> T: """Return self[key].""" - key = _check_array_key(self.data_matrix, key) - return self.copy(data_matrix=self.data_matrix[key], - sample_names=np.array(self.sample_names)[key]) + return self.copy( + data_matrix=self.data_matrix[key], + sample_names=np.array(self.sample_names)[key], + ) ##################################################################### # Numpy methods @@ -1091,21 +1175,27 @@ def __array_ufunc__( ufunc: Any, method: str, *inputs: Any, - **kwargs: Any + **kwargs: Any, ) -> Any: for i in inputs: - if isinstance(i, FDataGrid) and not np.array_equal( - i.grid_points, self.grid_points): + if ( + isinstance(i, FDataGrid) + and not np.array_equal(i.grid_points, self.grid_points) + ): return NotImplemented - new_inputs = [i.data_matrix if isinstance(i, FDataGrid) - else i for i in inputs] + new_inputs = [ + i.data_matrix if isinstance(i, FDataGrid) + else i for i in inputs + ] outputs = kwargs.pop('out', None) if outputs: - new_outputs = [o.data_matrix if isinstance(o, FDataGrid) - else o for o in outputs] + new_outputs = [ + o.data_matrix if isinstance(o, FDataGrid) + else o for o in outputs + ] kwargs['out'] = tuple(new_outputs) else: new_outputs = (None,) * ufunc.nout @@ -1117,9 +1207,10 @@ def __array_ufunc__( if ufunc.nout == 1: results = (results,) - results = tuple((result - if output is None else output) - for result, output in zip(results, new_outputs)) + results = tuple( + (result if output is None else output) + for result, output in zip(results, new_outputs) + ) results = [self.copy(data_matrix=r) for r in results] @@ -1134,7 +1225,8 @@ def dtype(self) -> FDataGridDType: return FDataGridDType( grid_points=self.grid_points, domain_range=self.domain_range, - dim_codomain=self.dim_codomain) + dim_codomain=self.dim_codomain, + ) @property def nbytes(self) -> int: @@ -1142,33 +1234,35 @@ def nbytes(self) -> int: The number of bytes needed to store this object in memory. """ return self.data_matrix.nbytes + sum( - p.nbytes for p in self.grid_points) + p.nbytes for p in self.grid_points + ) def isna(self) -> np.ndarray: """ - A 1-D array indicating if each value is missing. + Return a 1-D array indicating if each value is missing. Returns: - na_values (np.ndarray): Positions of NA. + na_values: Positions of NA. """ - return np.all(np.isnan(self.data_matrix), - axis=tuple(range(1, self.data_matrix.ndim))) + return np.all( + np.isnan(self.data_matrix), + axis=tuple(range(1, self.data_matrix.ndim)), + ) class FDataGridDType(pandas.api.extensions.ExtensionDtype): # type: ignore - """ - DType corresponding to FDataGrid in Pandas - """ + """DType corresponding to FDataGrid in Pandas.""" + name = 'FDataGrid' kind = 'O' - type = FDataGrid + type = FDataGrid # noqa: WPS125 na_value = pandas.NA def __init__( self, grid_points: GridPointsLike, dim_codomain: int, - domain_range: Optional[DomainRangeLike] = None + domain_range: Optional[DomainRangeLike] = None, ) -> None: grid_points = _to_grid_points(grid_points) @@ -1176,30 +1270,36 @@ def __init__( if domain_range is None: domain_range = np.array( - [(s[0], s[-1]) for s in self.grid_points]) + [(s[0], s[-1]) for s in self.grid_points], + ) self.domain_range = _to_domain_range(domain_range) self.dim_codomain = dim_codomain @classmethod - def construct_array_type(cls) -> Type[FDataGrid]: + def construct_array_type(cls) -> Type[FDataGrid]: # noqa: D102 return FDataGrid def _na_repr(self) -> FDataGrid: - shape = ((1,) - + tuple(len(s) for s in self.grid_points) - + (self.dim_codomain,)) + shape = ( + (1,) + + tuple(len(s) for s in self.grid_points) + + (self.dim_codomain,) + ) data_matrix = np.full(shape=shape, fill_value=np.NaN) return FDataGrid( grid_points=self.grid_points, domain_range=self.domain_range, - data_matrix=data_matrix) + data_matrix=data_matrix, + ) def __eq__(self, other: Any) -> bool: """ + Compare dtype equality. + Rules for equality (similar to categorical): 1) Any FData is equal to the string 'category' 2) Any FData is equal to itself @@ -1210,15 +1310,16 @@ def __eq__(self, other: Any) -> bool: return other == self.name elif other is self: return True - else: - return (isinstance(other, FDataGridDType) - and self.dim_codomain == other.dim_codomain - and self.domain_range == other.domain_range - and self.grid_points == other.grid_points) + + return ( + isinstance(other, FDataGridDType) + and self.dim_codomain == other.dim_codomain + and self.domain_range == other.domain_range + and self.grid_points == other.grid_points + ) def __hash__(self) -> int: - return hash((self.grid_points, - self.domain_range, self.dim_codomain)) + return hash((self.grid_points, self.domain_range, self.dim_codomain)) class _CoordinateIterator(Sequence[T]): @@ -1228,24 +1329,18 @@ def __init__(self, fdatagrid: T) -> None: """Create an iterator through the image coordinates.""" self._fdatagrid = fdatagrid - def __iter__(self) -> Iterator[T]: - """Return an iterator through the image coordinates.""" - for i in range(len(self)): - yield self[i] - def __getitem__(self, key: Union[int, slice]) -> T: """Get a specific coordinate.""" - s_key = key if isinstance(s_key, int): s_key = slice(s_key, s_key + 1) - coordinate_names = np.array( - self._fdatagrid.coordinate_names)[s_key] + coordinate_names = np.array(self._fdatagrid.coordinate_names)[s_key] return self._fdatagrid.copy( data_matrix=self._fdatagrid.data_matrix[..., key], - coordinate_names=coordinate_names) + coordinate_names=coordinate_names, + ) def __len__(self) -> int: """Return the number of coordinates.""" From 81e7a4ef061d22e1e4c219be1a7bb69af9b9d433 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Wed, 6 Jan 2021 22:59:21 +0100 Subject: [PATCH 016/417] Fix some style problems for FDataBasis. --- skfda/representation/basis/_fdatabasis.py | 396 +++++++++++++--------- 1 file changed, 229 insertions(+), 167 deletions(-) diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 7f3cf1285..4959c694e 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -3,17 +3,7 @@ import copy import warnings from builtins import isinstance -from typing import ( - TYPE_CHECKING, - Any, - Generic, - Iterator, - Optional, - Sequence, - Type, - TypeVar, - Union, -) +from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, TypeVar, Union import numpy as np import pandas.api.extensions @@ -88,14 +78,16 @@ def __init__( argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None, sample_names: Optional[LabelTupleLike] = None, - extrapolation: Optional[Union[str, Evaluator]] = None + extrapolation: Optional[Union[str, Evaluator]] = None, ) -> None: """Construct a FDataBasis object.""" coefficients = _int_to_real(np.atleast_2d(coefficients)) if coefficients.shape[1] != basis.n_basis: - raise ValueError("The length or number of columns of coefficients " - "has to be the same equal to the number of " - "elements of the basis.") + raise ValueError( + "The length or number of columns of coefficients " + "has to be the same equal to the number of " + "elements of the basis.", + ) self.basis = basis self.coefficients = coefficients @@ -106,7 +98,7 @@ def __init__( axes_labels=axes_labels, argument_names=argument_names, coordinate_names=coordinate_names, - sample_names=sample_names + sample_names=sample_names, ) @classmethod @@ -162,6 +154,10 @@ def from_data( the least squares method. The values admitted are 'cholesky' and 'qr' for Cholesky and QR factorisation methods respectively. + sample_points: Old name for `grid_points`. New code should + use `grid_points` instead. + + .. deprecated:: 0.5 Returns: FDataBasis: Represention of the data in a functional form as @@ -190,12 +186,12 @@ def from_data( Data Analysis* (pp. 86-87). Springer. """ - from ..grid import FDataGrid - if sample_points is not None: - warnings.warn("Parameter sample_points is deprecated. Use the " - "parameter grid_points instead.", - DeprecationWarning) + warnings.warn( + "Parameter sample_points is deprecated. Use the " + "parameter grid_points instead.", + DeprecationWarning, + ) grid_points = sample_points # n is the samples @@ -205,7 +201,7 @@ def from_data( # Each sample in a column (m x n) data_matrix = np.atleast_2d(data_matrix) - fd = FDataGrid(data_matrix=data_matrix, grid_points=grid_points) + fd = grid.FDataGrid(data_matrix=data_matrix, grid_points=grid_points) return fd.to_basis(basis=basis, method=method) @@ -257,20 +253,20 @@ def _evaluate( res = np.tensordot(self.coefficients, basis_values, axes=(1, 0)) return res.reshape( - (self.n_samples, len(eval_points), self.dim_codomain)) - - else: + (self.n_samples, len(eval_points), self.dim_codomain), + ) - res_matrix = np.empty( - (self.n_samples, eval_points.shape[1], self.dim_codomain)) + res_matrix = np.empty( + (self.n_samples, eval_points.shape[1], self.dim_codomain), + ) - for i in range(self.n_samples): - basis_values = self.basis.evaluate(eval_points[i]) + for i in range(self.n_samples): + basis_values = self.basis.evaluate(eval_points[i]) - values = self.coefficients[i] * basis_values.T - np.sum(values.T, axis=0, out=res_matrix[i]) + values = self.coefficients[i] * basis_values.T + np.sum(values.T, axis=0, out=res_matrix[i]) - return res_matrix + return res_matrix def shift( self: T, @@ -304,8 +300,8 @@ def shift( Returns: :obj:`FDataBasis` with the shifted data. - """ + """ if self.dim_codomain > 1 or self.dim_domain > 1: raise ValueError @@ -319,44 +315,62 @@ def shift( if np.isscalar(shifts): # Special case, all curves with same shift - _basis = self.basis.rescale((domain_range[0] + shifts, - domain_range[1] + shifts)) + basis = self.basis.rescale(( + domain_range[0] + shifts, + domain_range[1] + shifts, + )) - return FDataBasis.from_data(self.evaluate(eval_points), - grid_points=eval_points + shifts, - basis=_basis, **kwargs) + return FDataBasis.from_data( + self.evaluate(eval_points), + grid_points=eval_points + shifts, + basis=basis, + **kwargs, + ) elif len(shifts) != self.n_samples: - raise ValueError(f"shifts vector ({len(shifts)}) must have the " - f"same length than the number of samples " - f"({self.n_samples})") + raise ValueError( + f"shifts vector ({len(shifts)}) must have the " + f"same length than the number of samples " + f"({self.n_samples})", + ) if restrict_domain: a = domain_range[0] - min(np.min(shifts), 0) b = domain_range[1] - max(np.max(shifts), 0) domain = (a, b) eval_points = eval_points[ - np.logical_and(eval_points >= a, - eval_points <= b)] + np.logical_and( + eval_points >= a, + eval_points <= b, + ) + ] else: domain = domain_range - points_shifted = np.outer(np.ones(self.n_samples), - eval_points) + points_shifted = np.outer( + np.ones(self.n_samples), + eval_points, + ) points_shifted += np.atleast_2d(shifts).T # Matrix of shifted values - _data_matrix = self(points_shifted, - aligned=False, - extrapolation=extrapolation)[..., 0] + data_matrix = self( + points_shifted, + aligned=False, + extrapolation=extrapolation, + )[..., 0] - _basis = self.basis.rescale(domain) + basis = self.basis.rescale(domain) - return FDataBasis.from_data(_data_matrix, grid_points=eval_points, - basis=_basis, **kwargs) + return FDataBasis.from_data( + data_matrix, + grid_points=eval_points, + basis=basis, + **kwargs, + ) - def derivative(self: T, *, order: int = 1) -> T: + def derivative(self: T, *, order: int = 1) -> T: # noqa: D102 if order < 0: raise ValueError("order only takes non-negative integer values.") @@ -365,11 +379,13 @@ def derivative(self: T, *, order: int = 1) -> T: return self.copy() basis, coefficients = self.basis._derivative_basis_and_coefs( - self.coefficients, order) + self.coefficients, + order, + ) return FDataBasis(basis, coefficients) - def sum( + def sum( # noqa: WPS125 self: T, *, axis: Optional[int] = None, @@ -380,13 +396,21 @@ def sum( ) -> T: """Compute the sum of all the samples in a FDataBasis object. + Args: + axis: Used for compatibility with numpy. Must be None or 0. + out: Used for compatibility with numpy. Must be None. + keepdims: Used for compatibility with numpy. Must be False. + skipna: Wether the NaNs are ignored or not. + min_count: Number of valid (non NaN) data to have in order + for the a variable to not be NaN when `skipna` is + `True`. + Returns: A FDataBais object with just one sample representing the sum of all the samples in the original FDataBasis object. Examples: - >>> from skfda.representation.basis import FDataBasis, Monomial >>> basis = Monomial(n_basis=4) >>> coefficients = [[0.5, 1, 2, .5], [1.5, 1, 4, .5]] @@ -399,16 +423,20 @@ def sum( """ super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) - coefs = (np.nansum(self.coefficients, axis=0) if skipna - else np.sum(self.coefficients, axis=0)) + coefs = ( + np.nansum(self.coefficients, axis=0) if skipna + else np.sum(self.coefficients, axis=0) + ) if min_count > 0: valid = ~np.isnan(self.coefficients) n_valid = np.sum(valid, axis=0) coefs[n_valid < min_count] = np.NaN - return self.copy(coefficients=coefs, - sample_names=(None,)) + return self.copy( + coefficients=coefs, + sample_names=(None,), + ) def gmean(self: T, eval_points: Optional[np.ndarray] = None) -> T: """Compute the geometric mean of the functional data object. @@ -426,7 +454,7 @@ def gmean(self: T, eval_points: Optional[np.ndarray] = None) -> T: between 501 and 10 times the number of basis. Returns: - FDataBasis: Geometric mean of the original object. + Geometric mean of the original object. """ return self.to_grid(eval_points).gmean().to_basis(self.basis) @@ -486,13 +514,16 @@ def to_grid( numpy.linspace with bounds equal to the ones defined in self.domain_range and the number of points the maximum between 501 and 10 times the number of basis. + sample_points: Old name for `grid_points`. New code should + use `grid_points` instead. + + .. deprecated:: 0.5 Returns: FDataGrid: Discrete representation of the functional data object. Examples: - >>> from skfda.representation.basis import FDataBasis, Monomial >>> fd = FDataBasis(coefficients=[[1, 1, 1], [1, 0, 1]], ... basis=Monomial(domain_range=(0,5), n_basis=3)) @@ -510,40 +541,50 @@ def to_grid( """ if sample_points is not None: - warnings.warn("Parameter sample_points is deprecated. Use the " - "parameter grid_points instead.", - DeprecationWarning) + warnings.warn( + "Parameter sample_points is deprecated. Use the " + "parameter grid_points instead.", + DeprecationWarning, + ) grid_points = sample_points if grid_points is None: - npoints = max(constants.N_POINTS_FINE_MESH, - constants.BASIS_MIN_FACTOR * self.n_basis) - grid_points = [np.linspace(*r, npoints) - for r in self.domain_range] - - return grid.FDataGrid(self.evaluate(grid_points, grid=True), - grid_points=grid_points, - domain_range=self.domain_range) + npoints = max( + constants.N_POINTS_FINE_MESH, + constants.BASIS_MIN_FACTOR * self.n_basis, + ) + grid_points = [ + np.linspace(*r, npoints) + for r in self.domain_range + ] + + return grid.FDataGrid( + self.evaluate(grid_points, grid=True), + grid_points=grid_points, + domain_range=self.domain_range, + ) def to_basis( self, basis: Basis, eval_points: Optional[np.ndarray] = None, - **kwargs: Any + **kwargs: Any, ) -> FDataBasis: - """Return the basis representation of the object. + """ + Return the basis representation of the object. Args: - basis(Basis): basis object in which the functional data are + basis: Basis object in which the functional data are going to be represented. - **kwargs: keyword arguments to be passed to + eval_points: Evaluation points used to discretize the function + if the basis is going to be changed. + kwargs: Keyword arguments to be passed to FDataBasis.from_data(). Returns: - FDataBasis: Basis representation of the funtional data - object. - """ + Basis representation of the funtional data object. + """ if basis == self.basis: return self.copy() @@ -561,8 +602,7 @@ def copy( sample_names: Optional[LabelTupleLike] = None, extrapolation: Optional[Union[str, Evaluator]] = None, ) -> T: - """FDataBasis copy""" - + """Copy the FDataBasis.""" if basis is None: basis = copy.deepcopy(self.basis) @@ -587,19 +627,24 @@ def copy( if extrapolation is None: extrapolation = self.extrapolation - return FDataBasis(basis, coefficients, - dataset_name=dataset_name, - argument_names=argument_names, - coordinate_names=coordinate_names, - sample_names=sample_names, - extrapolation=extrapolation) + return FDataBasis( + basis, + coefficients, + dataset_name=dataset_name, + argument_names=argument_names, + coordinate_names=coordinate_names, + sample_names=sample_names, + extrapolation=extrapolation, + ) - def _to_R(self) -> str: - """Gives the code to build the object on fda package on R""" - return ("fd(coef = " + self._array_to_R(self.coefficients, True) + - ", basisobj = " + self.basis._to_R() + ")") + def _to_R(self) -> str: # noqa: N802 + """Return the code to build the object on fda package on R.""" + return ( + f"fd(coef = {self._array_to_R(self.coefficients, transpose=True)}," + f" basisobj = {self.basis._to_R()})" + ) - def _array_to_R( + def _array_to_R( # noqa: N802 self, coefficients: np.ndarray, transpose: bool = False, @@ -612,42 +657,48 @@ def _array_to_R( (rows, cols) = coefficients.shape retstring = "matrix(c(" - for j in range(cols): - for i in range(rows): - retstring = retstring + str(coefficients[i, j]) + ", " + retstring += "".join( + f"{coefficients[i, j]}, " + for j in range(cols) + for i in range(rows) + ) - return (retstring[0:len(retstring) - 2] + "), nrow = " + str(rows) + - ", ncol = " + str(cols) + ")") + return ( + retstring[:len(retstring) - 2] + + f"), nrow = {rows}, ncol = {cols})" + ) def __repr__(self) -> str: - """Representation of FDataBasis object.""" - return (f"{self.__class__.__name__}(" - f"\nbasis={self.basis}," - f"\ncoefficients={self.coefficients}," - f"\ndataset_name={self.dataset_name}," - f"\nargument_names={repr(self.argument_names)}," - f"\ncoordinate_names={repr(self.coordinate_names)}," - f"\nextrapolation={self.extrapolation})").replace( - '\n', '\n ') + return ( + f"{self.__class__.__name__}(" + f"\nbasis={self.basis}," + f"\ncoefficients={self.coefficients}," + f"\ndataset_name={self.dataset_name}," + f"\nargument_names={repr(self.argument_names)}," + f"\ncoordinate_names={repr(self.coordinate_names)}," + f"\nextrapolation={self.extrapolation})" + ).replace('\n', '\n ') def __str__(self) -> str: - """Return str(self).""" - return (f"{self.__class__.__name__}(" - f"\n_basis={self.basis}," - f"\ncoefficients={self.coefficients})").replace('\n', '\n ') + return ( + f"{self.__class__.__name__}(" + f"\n_basis={self.basis}," + f"\ncoefficients={self.coefficients})" + ).replace('\n', '\n ') def equals(self, other: Any) -> bool: - """Equality of FDataBasis""" + """Equality of FDataBasis.""" # TODO check all other params - return (super().equals(other) - and self.basis == other.basis - and np.array_equal(self.coefficients, other.coefficients)) + return ( + super().equals(other) + and self.basis == other.basis + and np.array_equal(self.coefficients, other.coefficients) + ) def __eq__(self, other: Any) -> np.ndarray: - """Elementwise equality of FDataBasis""" - + """Elementwise equality of FDataBasis.""" if not isinstance(other, type(self)) or self.dtype != other.dtype: if other is pandas.NA: return self.isna() @@ -655,29 +706,32 @@ def __eq__(self, other: Any) -> np.ndarray: other, (pandas.Series, pandas.Index, pandas.DataFrame), ): return np.concatenate([x == y for x, y in zip(self, other)]) - else: - return NotImplemented + + return NotImplemented if len(self) != len(other) and len(self) != 1 and len(other) != 1: - raise ValueError(f"Different lengths: " - f"len(self)={len(self)} and " - f"len(other)={len(other)}") + raise ValueError( + f"Different lengths: " + f"len(self)={len(self)} and " + f"len(other)={len(other)}", + ) return np.all(self.coefficients == other.coefficients, axis=1) def concatenate( self: T, *others: T, - as_coordinates: bool = False + as_coordinates: bool = False, ) -> T: - """Join samples from a similar FDataBasis object. + """ + Join samples from a similar FDataBasis object. Joins samples from another FDataBasis object if they have the same basis. Args: - others (:class:`FDataBasis`): Objects to be concatenated. - as_coordinates (boolean, optional): If False concatenates as + others: Objects to be concatenated. + as_coordinates: If False concatenates as new samples, else, concatenates the other functions as new components of the image. Defaults to False. @@ -688,8 +742,8 @@ def concatenate( Todo: By the moment, only unidimensional objects are supported in basis representation. - """ + """ # TODO: Change to support multivariate functions # in basis representation if as_coordinates: @@ -701,13 +755,16 @@ def concatenate( data = [self.coefficients] + [other.coefficients for other in others] - sample_names = [fd.sample_names for fd in [self, *others]] + sample_names = [fd.sample_names for fd in (self, *others)] - return self.copy(coefficients=np.concatenate(data, axis=0), - sample_names=sum(sample_names, ())) + return self.copy( + coefficients=np.concatenate(data, axis=0), + sample_names=sum(sample_names, ()), + ) def compose(self, fd, *, eval_points=None, **kwargs): - """Composition of functions. + """ + Composition of functions. Performs the composition of functions. The basis is discretized to compute the composition. @@ -717,8 +774,11 @@ def compose(self, fd, *, eval_points=None, **kwargs): have the same number of samples and image dimension equal to 1. eval_points (array_like): Points to perform the evaluation. kwargs: Named arguments to be passed to :func:`from_data`. - """ + Returns: + Function resulting from the composition. + + """ grid = self.to_grid().compose(fd, eval_points=eval_points) if fd.dim_domain == 1: @@ -732,26 +792,30 @@ def compose(self, fd, *, eval_points=None, **kwargs): def __getitem__(self: T, key: Union[int, slice]) -> T: """Return self[key].""" - key = _check_array_key(self.coefficients, key) - return self.copy(coefficients=self.coefficients[key], - sample_names=np.array(self.sample_names)[key]) + return self.copy( + coefficients=self.coefficients[key], + sample_names=np.array(self.sample_names)[key], + ) def __add__(self: T, other: Union[T, np.ndarray, float]) -> T: """Addition for FDataBasis object.""" - if isinstance(other, FDataBasis): - if self.basis != other.basis: - return NotImplemented + if self.basis == other.basis: + basis, coefs = self.basis._add_same_basis( + self.coefficients, + other.coefficients, + ) else: - basis, coefs = self.basis._add_same_basis(self.coefficients, - other.coefficients) + return NotImplemented else: try: - basis, coefs = self.basis._add_constant(self.coefficients, - other) + basis, coefs = self.basis._add_constant( + self.coefficients, + other, + ) except Exception: return NotImplemented @@ -759,21 +823,24 @@ def __add__(self: T, other: Union[T, np.ndarray, float]) -> T: def __radd__(self: T, other: Union[T, np.ndarray, float]) -> T: """Addition for FDataBasis object.""" - return self.__add__(other) def __sub__(self: T, other: Union[T, np.ndarray, float]) -> T: """Subtraction for FDataBasis object.""" if isinstance(other, FDataBasis): - if self.basis != other.basis: - return NotImplemented + if self.basis == other.basis: + basis, coefs = self.basis._sub_same_basis( + self.coefficients, + other.coefficients, + ) else: - basis, coefs = self.basis._sub_same_basis(self.coefficients, - other.coefficients) + return NotImplemented else: try: - basis, coefs = self.basis._sub_constant(self.coefficients, - other) + basis, coefs = self.basis._sub_constant( + self.coefficients, + other, + ) except Exception: return NotImplemented @@ -801,7 +868,6 @@ def __rmul__(self: T, other: Union[np.ndarray, float]) -> T: def __truediv__(self: T, other: Union[np.ndarray, float]) -> T: """Division for FDataBasis object.""" - other = np.array(other) try: @@ -813,7 +879,6 @@ def __truediv__(self: T, other: Union[np.ndarray, float]) -> T: def __rtruediv__(self: T, other: Union[np.ndarray, float]) -> T: """Right division for FDataBasis object.""" - return NotImplemented ##################################################################### @@ -833,7 +898,7 @@ def nbytes(self) -> int: def isna(self) -> np.ndarray: """ - A 1-D array indicating if each value is missing. + Return a 1-D array indicating if each value is missing. Returns: na_values (np.ndarray): Positions of NA. @@ -842,11 +907,10 @@ def isna(self) -> np.ndarray: class FDataBasisDType(pandas.api.extensions.ExtensionDtype): # type: ignore - """ - DType corresponding to FDataBasis in Pandas - """ + """DType corresponding to FDataBasis in Pandas.""" + kind = 'O' - type = FDataBasis + type = FDataBasis # noqa: WPS125 name = 'FDataBasis' na_value = pandas.NA @@ -856,16 +920,19 @@ def __init__(self, basis: Basis) -> None: self.basis = basis @classmethod - def construct_array_type(cls) -> Type[FDataBasis]: + def construct_array_type(cls) -> Type[FDataBasis]: # noqa: D102 return FDataBasis def _na_repr(self) -> FDataBasis: return FDataBasis( basis=self.basis, - coefficients=((np.NaN,) * self.basis.n_basis,)) + coefficients=((np.NaN,) * self.basis.n_basis,), + ) def __eq__(self, other: Any) -> bool: """ + Compare dtype equality. + Rules for equality (similar to categorical): 1) Any FData is equal to the string 'category' 2) Any FData is equal to itself @@ -876,9 +943,11 @@ def __eq__(self, other: Any) -> bool: return other == self.name elif other is self: return True - else: - return (isinstance(other, FDataBasisDType) - and self.basis == other.basis) + + return ( + isinstance(other, FDataBasisDType) + and self.basis == other.basis + ) def __hash__(self) -> int: return hash(self.basis) @@ -895,15 +964,8 @@ def __init__(self, fdatabasis: T) -> None: """Create an iterator through the image coordinates.""" self._fdatabasis = fdatabasis - def __iter__(self) -> Iterator[T]: - """Return an iterator through the image coordinates.""" - - for i in range(len(self)): - yield self[i] - def __getitem__(self, key: Union[int, slice]) -> T: """Get a specific coordinate.""" - return self._fdatabasis.basis._coordinate(self._fdatabasis, key) def __len__(self) -> int: From 7314dda631b37854c8fe650aa2b79a4f965172fe Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 7 Jan 2021 17:24:42 +0100 Subject: [PATCH 017/417] Refactor FDataBasis operations. --- skfda/representation/basis/_basis.py | 43 ++-------- skfda/representation/basis/_constant.py | 2 +- skfda/representation/basis/_fdatabasis.py | 95 ++++++++++++----------- skfda/representation/basis/_monomial.py | 2 +- tests/test_basis.py | 24 ------ 5 files changed, 56 insertions(+), 110 deletions(-) diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index 9f19cbcec..6fc1ac7f7 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -3,6 +3,7 @@ from __future__ import annotations import copy +import numbers import warnings from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any, Optional, Tuple, TypeVar, Union @@ -263,7 +264,8 @@ def copy(self: T, domain_range: Optional[DomainRangeLike] = None) -> T: return new_copy def to_basis(self) -> FDataBasis: - """Convert the Basis to FDatabasis. + """ + Convert the Basis to FDatabasis. Returns: FDataBasis with this basis as its basis, and all basis functions @@ -277,7 +279,8 @@ def _to_R(self) -> str: # noqa: N802 raise NotImplementedError def inner_product_matrix(self, other: Optional[Basis] = None) -> np.array: - r"""Return the Inner Product Matrix of a pair of basis. + r""" + Return the Inner Product Matrix of a pair of basis. The Inner Product Matrix is defined as @@ -345,42 +348,6 @@ def gram_matrix(self) -> np.array: return gram - def _add_same_basis( - self: T, - coefs1: np.ndarray, - coefs2: np.ndarray, - ) -> Tuple[T, np.ndarray]: - return self.copy(), coefs1 + coefs2 - - def _add_constant( - self: T, - coefs: np.ndarray, - constant: float, - ) -> Tuple[T, np.ndarray]: - coefs = coefs.copy() - constant = np.array(constant) - coefs[:, 0] = coefs[:, 0] + constant - - return self.copy(), coefs - - def _sub_same_basis( - self: T, - coefs1: np.ndarray, - coefs2: np.ndarray, - ) -> Tuple[T, np.ndarray]: - return self.copy(), coefs1 - coefs2 - - def _sub_constant( - self: T, - coefs: np.ndarray, - other: float, - ) -> Tuple[T, np.ndarray]: - coefs = coefs.copy() - other = np.array(other) - coefs[:, 0] = coefs[:, 0] - other - - return self.copy(), coefs - def _mul_constant( self: T, coefs: np.ndarray, diff --git a/skfda/representation/basis/_constant.py b/skfda/representation/basis/_constant.py index 3d49af323..cb50cfc60 100644 --- a/skfda/representation/basis/_constant.py +++ b/skfda/representation/basis/_constant.py @@ -1,4 +1,4 @@ -from typing import Optional, Tuple, TypeVar +from typing import Any, Optional, Tuple, TypeVar import numpy as np diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 4959c694e..2944c8ae6 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -801,81 +801,84 @@ def __getitem__(self: T, key: Union[int, slice]) -> T: def __add__(self: T, other: Union[T, np.ndarray, float]) -> T: """Addition for FDataBasis object.""" - if isinstance(other, FDataBasis): - if self.basis == other.basis: - basis, coefs = self.basis._add_same_basis( - self.coefficients, - other.coefficients, - ) - else: - return NotImplemented + if isinstance(other, FDataBasis) and self.basis == other.basis: - else: - try: - basis, coefs = self.basis._add_constant( - self.coefficients, - other, - ) - except Exception: - return NotImplemented + return self._copy_op( + other, + basis=self.basis, + coefficients=self.coefficients + other.coefficients, + ) - return self._copy_op(other, basis=basis, coefficients=coefs) + return NotImplemented def __radd__(self: T, other: Union[T, np.ndarray, float]) -> T: """Addition for FDataBasis object.""" - return self.__add__(other) + if isinstance(other, FDataBasis) and self.basis == other.basis: + + return self._copy_op( + other, + basis=self.basis, + coefficients=self.coefficients + other.coefficients, + ) + + return NotImplemented def __sub__(self: T, other: Union[T, np.ndarray, float]) -> T: """Subtraction for FDataBasis object.""" - if isinstance(other, FDataBasis): - if self.basis == other.basis: - basis, coefs = self.basis._sub_same_basis( - self.coefficients, - other.coefficients, - ) - else: - return NotImplemented - else: - try: - basis, coefs = self.basis._sub_constant( - self.coefficients, - other, - ) - except Exception: - return NotImplemented + if isinstance(other, FDataBasis) and self.basis == other.basis: - return self._copy_op(other, basis=basis, coefficients=coefs) + return self._copy_op( + other, + basis=self.basis, + coefficients=self.coefficients - other.coefficients, + ) + + return NotImplemented def __rsub__(self: T, other: Union[T, np.ndarray, float]) -> T: """Right subtraction for FDataBasis object.""" - return (self * -1).__add__(other) + if isinstance(other, FDataBasis) and self.basis == other.basis: - def __mul__(self: T, other: Union[np.ndarray, float]) -> T: - """Multiplication for FDataBasis object.""" - if isinstance(other, FDataBasis): - return NotImplemented + return self._copy_op( + other, + basis=self.basis, + coefficients=other.coefficients - self.coefficients, + ) + return NotImplemented + + def _mul_scalar(self: T, other: Union[np.ndarray, float]) -> T: + """Multiplication by scalar.""" try: - basis, coefs = self.basis._mul_constant(self.coefficients, other) + vector = np.atleast_1d(other) except Exception: return NotImplemented - return self._copy_op(other, basis=basis, coefficients=coefs) + if vector.ndim > 1: + return NotImplemented + + return self._copy_op( + other, + basis=self.basis, + coefficients=self.coefs * vector, + ) + + def __mul__(self: T, other: Union[np.ndarray, float]) -> T: + """Multiplication for FDataBasis object.""" + return self._mul_scalar(other) def __rmul__(self: T, other: Union[np.ndarray, float]) -> T: """Multiplication for FDataBasis object.""" - return self.__mul__(other) + return self._mul_scalar(other) def __truediv__(self: T, other: Union[np.ndarray, float]) -> T: """Division for FDataBasis object.""" - other = np.array(other) - try: other = 1 / other except Exception: return NotImplemented - return self * other + return self._mul_scalar(other) def __rtruediv__(self: T, other: Union[np.ndarray, float]) -> T: """Right division for FDataBasis object.""" diff --git a/skfda/representation/basis/_monomial.py b/skfda/representation/basis/_monomial.py index 811fc4d36..9cd76ed9a 100644 --- a/skfda/representation/basis/_monomial.py +++ b/skfda/representation/basis/_monomial.py @@ -1,4 +1,4 @@ -from typing import Tuple, TypeVar +from typing import Any, Optional, Tuple, TypeVar import numpy as np import scipy.linalg diff --git a/tests/test_basis.py b/tests/test_basis.py index 276bd23fc..27c065868 100644 --- a/tests/test_basis.py +++ b/tests/test_basis.py @@ -175,18 +175,6 @@ def test_fdatabasis__add__(self): self.assertTrue((monomial1 + monomial2).equals( FDataBasis(Monomial(n_basis=3), [[2, 4, 6], [4, 6, 8]]))) - self.assertTrue((monomial2 + 1).equals( - FDataBasis(Monomial(n_basis=3), - [[2, 2, 3], [4, 4, 5]]))) - self.assertTrue((1 + monomial2).equals( - FDataBasis(Monomial(n_basis=3), - [[2, 2, 3], [4, 4, 5]]))) - self.assertTrue((monomial2 + [1, 2]).equals( - FDataBasis(Monomial(n_basis=3), - [[2, 2, 3], [5, 4, 5]]))) - self.assertTrue(([1, 2] + monomial2).equals( - FDataBasis(Monomial(n_basis=3), - [[2, 2, 3], [5, 4, 5]]))) with np.testing.assert_raises(TypeError): monomial2 + FDataBasis(Fourier(n_basis=3), @@ -199,18 +187,6 @@ def test_fdatabasis__sub__(self): self.assertTrue((monomial1 - monomial2).equals( FDataBasis(Monomial(n_basis=3), [[0, 0, 0], [-2, -2, -2]]))) - self.assertTrue((monomial2 - 1).equals( - FDataBasis(Monomial(n_basis=3), - [[0, 2, 3], [2, 4, 5]]))) - self.assertTrue((1 - monomial2).equals( - FDataBasis(Monomial(n_basis=3), - [[0, -2, -3], [-2, -4, -5]]))) - self.assertTrue((monomial2 - [1, 2]).equals( - FDataBasis(Monomial(n_basis=3), - [[0, 2, 3], [1, 4, 5]]))) - self.assertTrue(([1, 2] - monomial2).equals( - FDataBasis(Monomial(n_basis=3), - [[0, -2, -3], [-1, -4, -5]]))) with np.testing.assert_raises(TypeError): monomial2 - FDataBasis(Fourier(n_basis=3), From 64bb0e47bf12ba51bf58f98bf9848dfecc24ecca Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 7 Jan 2021 18:11:24 +0100 Subject: [PATCH 018/417] Fix tests. --- skfda/representation/basis/_fdatabasis.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 2944c8ae6..dd5ffe3d5 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -857,10 +857,12 @@ def _mul_scalar(self: T, other: Union[np.ndarray, float]) -> T: if vector.ndim > 1: return NotImplemented + vector = vector[:, np.newaxis] + return self._copy_op( other, basis=self.basis, - coefficients=self.coefs * vector, + coefficients=self.coefficients * vector, ) def __mul__(self: T, other: Union[np.ndarray, float]) -> T: @@ -874,7 +876,7 @@ def __rmul__(self: T, other: Union[np.ndarray, float]) -> T: def __truediv__(self: T, other: Union[np.ndarray, float]) -> T: """Division for FDataBasis object.""" try: - other = 1 / other + other = 1 / np.asarray(other) except Exception: return NotImplemented From bdcb051ddd087801af61c300cbcdddfb4cb31516 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Fri, 8 Jan 2021 14:10:09 +0100 Subject: [PATCH 019/417] Refactor coordinate indexing for FDataBasis. --- skfda/representation/basis/_basis.py | 49 ++++++++++++++++----- skfda/representation/basis/_fdatabasis.py | 30 +++++++++---- skfda/representation/basis/_vector_basis.py | 41 ++++++----------- 3 files changed, 72 insertions(+), 48 deletions(-) diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index 6fc1ac7f7..02db6817f 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -177,6 +177,13 @@ def _derivative_basis_and_coefs( """ Return basis and coefficients of the derivative. + Args: + coefs: Coefficients of a vector expressed in this basis. + order: Order of the derivative. + + Returns: + Tuple with the basis of the derivative and its coefficients. + Subclasses can override this to provide derivative construction. """ @@ -185,6 +192,24 @@ def _derivative_basis_and_coefs( "basis of the derivatives.", ) + def derivative_basis_and_coefs( + self: T, + coefs: np.ndarray, + order: int = 1, + ) -> Tuple[T, np.ndarray]: + """ + Return basis and coefficients of the derivative. + + Args: + coefs: Coefficients of a vector expressed in this basis. + order: Order of the derivative. + + Returns: + Tuple with the basis of the derivative and its coefficients. + + """ + return self._derivative_basis_and_coefs(coefs, order) + def plot(self, *args: Any, **kwargs: Any) -> Figure: """Plot the basis object or its derivatives. @@ -202,25 +227,22 @@ def plot(self, *args: Any, **kwargs: Any) -> Figure: def _coordinate_nonfull( self, - fdatabasis: FDataBasis, - key: Union[int, range], - ) -> FDataBasis: + coefs: np.ndarray, + key: Union[int, slice], + ) -> Tuple[Basis, np.ndarray]: """ - Return a fdatagrid for the coordinate functions indexed by key. + Return a basis and coefficients for the indexed coordinate functions. Subclasses can override this to provide coordinate indexing. - The key parameter has been already validated and is an integer or - slice in the range [0, self.dim_codomain. - """ raise NotImplementedError("Coordinate indexing not implemented") - def _coordinate( + def coordinate_basis_and_coefs( self, - fdatabasis: FDataBasis, + coefs: np.ndarray, key: Union[int, slice], - ) -> FDataBasis: + ) -> Tuple[Basis, np.ndarray]: """Return a fdatabasis for the coordinate functions indexed by key.""" # Raises error if not in range and normalize key r_key = range(self.dim_codomain)[key] @@ -233,9 +255,12 @@ def _coordinate( (self.dim_codomain == 1 and r_key == 0) or (isinstance(r_key, range) and len(r_key) == self.dim_codomain) ): - return fdatabasis.copy() + return self, np.copy(coefs) - return self._coordinate_nonfull(fdatabasis=fdatabasis, key=r_key) + return self._coordinate_nonfull( + coefs=coefs, + key=key, + ) def rescale(self: T, domain_range: Optional[DomainRangeLike] = None) -> T: r""" diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index dd5ffe3d5..f8171e4ef 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -21,7 +21,7 @@ T = TypeVar('T', bound='FDataBasis') -class FDataBasis(FData): +class FDataBasis(FData): # noqa: WPS214 r"""Basis representation of functional data. Class representation for functional data in the form of a set of basis @@ -378,7 +378,7 @@ def derivative(self: T, *, order: int = 1) -> T: # noqa: D102 if order == 0: return self.copy() - basis, coefficients = self.basis._derivative_basis_and_coefs( + basis, coefficients = self.basis.derivative_basis_and_coefs( self.coefficients, order, ) @@ -640,7 +640,8 @@ def copy( def _to_R(self) -> str: # noqa: N802 """Return the code to build the object on fda package on R.""" return ( - f"fd(coef = {self._array_to_R(self.coefficients, transpose=True)}," + f"fd(" # noqa: WPS437 + f"coef = {self._array_to_R(self.coefficients, transpose=True)}," f" basisobj = {self.basis._to_R()})" ) @@ -671,7 +672,7 @@ def _array_to_R( # noqa: N802 def __repr__(self) -> str: return ( - f"{self.__class__.__name__}(" + f"{self.__class__.__name__}(" # noqa: WPS221 f"\nbasis={self.basis}," f"\ncoefficients={self.coefficients}," f"\ndataset_name={self.dataset_name}," @@ -779,14 +780,14 @@ def compose(self, fd, *, eval_points=None, **kwargs): Function resulting from the composition. """ - grid = self.to_grid().compose(fd, eval_points=eval_points) + fd_grid = self.to_grid().compose(fd, eval_points=eval_points) if fd.dim_domain == 1: basis = self.basis.rescale(fd.domain_range[0]) - composition = grid.to_basis(basis, **kwargs) + composition = fd_grid.to_basis(basis, **kwargs) else: #  Cant be convertered to basis due to the dimensions - composition = grid + composition = fd_grid return composition @@ -971,7 +972,20 @@ def __init__(self, fdatabasis: T) -> None: def __getitem__(self, key: Union[int, slice]) -> T: """Get a specific coordinate.""" - return self._fdatabasis.basis._coordinate(self._fdatabasis, key) + basis, coefs = self._fdatabasis.basis.coordinate_basis_and_coefs( + self._fdatabasis.coefficients, + key, + ) + + coord_names = self._fdatabasis.coordinate_names[key] + if coord_names is None or isinstance(coord_names, str): + coord_names = (coord_names,) + + return self._fdatabasis.copy( + basis=basis, + coefficients=coefs, + coordinate_names=coord_names, + ) def __len__(self) -> int: """Return the number of coordinates.""" diff --git a/skfda/representation/basis/_vector_basis.py b/skfda/representation/basis/_vector_basis.py index 1d6996d0c..2f6e69a72 100644 --- a/skfda/representation/basis/_vector_basis.py +++ b/skfda/representation/basis/_vector_basis.py @@ -149,38 +149,23 @@ def _gram_matrix(self) -> np.ndarray: def _coordinate_nonfull( self, - fdatabasis: FDataBasis, - key: Union[int, range], - ) -> FDataBasis: - - r_key = key - if isinstance(r_key, int): - r_key = range(r_key, r_key + 1) - - s_key = slice(r_key.start, r_key.stop, r_key.step) - - coef_indexes = np.concatenate([ - np.ones(b.n_basis, dtype=np.bool_) if i in r_key - else np.zeros(b.n_basis, dtype=np.bool_) - for i, b in enumerate(self.basis_list) - ]) - - new_basis_list = self.basis_list[s_key] + coefs: np.ndarray, + key: Union[int, slice], + ) -> Tuple[Basis, np.ndarray]: - basis = ( - new_basis_list[0] if isinstance(key, int) - else VectorValued(new_basis_list) - ) + basis_sizes = [b.n_basis for b in self.basis_list] + basis_indexes = np.cumsum(basis_sizes) + coef_splits = np.split(coefs, basis_indexes[:-1], axis=1) - coefs = fdatabasis.coefficients[:, coef_indexes] + new_basis = self.basis_list[key] + if not isinstance(new_basis, Basis): + new_basis = VectorValued(new_basis) - coordinate_names = np.array(fdatabasis.coordinate_names)[s_key] + new_coefs = coef_splits[key] + if not isinstance(new_coefs, np.ndarray): + new_coefs = np.concatenate(coef_splits[key], axis=1) - return fdatabasis.copy( - basis=basis, - coefficients=coefs, - coordinate_names=coordinate_names, - ) + return new_basis, new_coefs def __repr__(self) -> str: """Representation of a Basis object.""" From c05daed44e14c706fe88233b8e6288634397399a Mon Sep 17 00:00:00 2001 From: vnmabus Date: Fri, 8 Jan 2021 17:53:32 +0100 Subject: [PATCH 020/417] Style fixes. --- skfda/representation/basis/_basis.py | 14 ++++++++++++++ skfda/representation/grid.py | 27 +++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index 02db6817f..896539e05 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -96,6 +96,20 @@ def domain_range(self) -> DomainRange: def n_basis(self) -> int: return self._n_basis + def is_domain_range_fixed(self) -> bool: + """ + Return wether the :term:`domain ramge` has been set explicitly. + + This is useful when using a basis for converting a dataset, since + if this is not explicitly assigned it can be changed to the domain of + the data. + + Returns: + `True` if the domain range has been fixed. `False` otherwise. + + """ + return self._domain_range is not None + @abstractmethod def _evaluate( self, diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 8c3cdbe8d..06ada531d 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -43,7 +43,7 @@ T = TypeVar("T", bound='FDataGrid') -class FDataGrid(FData): +class FDataGrid(FData): # noqa: WPS214 r"""Represent discretised functional data. Class for representing functional data as a set of curves discretised @@ -120,7 +120,7 @@ class FDataGrid(FData): """ - def __init__( + def __init__( # noqa: WPS211 self, data_matrix: np.ndarray, grid_points: Optional[GridPointsLike] = None, @@ -565,14 +565,15 @@ def equals(self, other: Any) -> bool: if not np.array_equal(self.data_matrix, other.data_matrix): return False - if len(self.grid_points) != len(other.grid_points): - return False - - for a, b in zip(self.grid_points, other.grid_points): - if not np.array_equal(a, b): - return False - - if not np.array_equal(self.domain_range, other.domain_range): + # Comparison of the domain + if ( + not np.array_equal(self.domain_range, other.domain_range) + or len(self.grid_points) != len(other.grid_points) + or not all( + np.array_equal(a, b) + for a, b in zip(self.grid_points, other.grid_points) + ) + ): return False if self.interpolation != other.interpolation: @@ -622,8 +623,6 @@ def _get_op_matrix( return other[other_index] - return None - elif isinstance(other, FDataGrid): self._check_same_dimensions(other) return other.data_matrix @@ -826,7 +825,7 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: ) # Readjust the domain range if there was not an explicit one - if basis._domain_range is None: + if not basis.is_domain_range_fixed(): basis = basis.copy(domain_range=self.domain_range) smoother = BasisSmoother( @@ -1143,7 +1142,7 @@ def __str__(self) -> str: def __repr__(self) -> str: """Return repr(self).""" return ( - f"FDataGrid(" + f"FDataGrid(" # noqa: WPS221 f"\n{repr(self.data_matrix)}," f"\ngrid_points={repr(self.grid_points)}," f"\ndomain_range={repr(self.domain_range)}," From fc4e653948239b0c4974c9b706433821f1d2b124 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Fri, 8 Jan 2021 18:46:01 +0100 Subject: [PATCH 021/417] Fix documentation. --- skfda/exploratory/visualization/_boxplot.py | 14 ++++++++------ skfda/representation/basis/_basis.py | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 191f888a7..07f69c25c 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -4,19 +4,21 @@ visualize it. """ -from abc import ABC, abstractmethod import math +from abc import ABC, abstractmethod import matplotlib - import matplotlib.pyplot as plt import numpy as np from ..depth import ModifiedBandDepth from ..outliers import _envelopes -from ._utils import (_figure_to_svg, _get_figure_and_axes, - _set_figure_layout_for_fdata, _set_labels) - +from ._utils import ( + _figure_to_svg, + _get_figure_and_axes, + _set_figure_layout_for_fdata, + _set_labels, +) __author__ = "Amanda Hernando Bernabé" __email__ = "amanda.hernando@estudiante.uam.es" @@ -417,7 +419,7 @@ def plot(self, chart=None, *, fig=None, axes=None, color=self.barcol, zorder=4) # vertical lines - index = math.ceil(self.fdatagrid.ncol / 2) + index = math.ceil(len(self.fdatagrid.grid_points[0]) / 2) x = self.fdatagrid.grid_points[0][index] axes[m].plot([x, x], [self.non_outlying_envelope[0][..., m][index], diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index 896539e05..8fc6dd33e 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -98,7 +98,7 @@ def n_basis(self) -> int: def is_domain_range_fixed(self) -> bool: """ - Return wether the :term:`domain ramge` has been set explicitly. + Return wether the :term:`domain range` has been set explicitly. This is useful when using a basis for converting a dataset, since if this is not explicitly assigned it can be changed to the domain of From 8ddfb84f4ac2310300fefaf5be5943b0bfc1a90f Mon Sep 17 00:00:00 2001 From: vnmabus Date: Fri, 8 Jan 2021 20:16:05 +0100 Subject: [PATCH 022/417] Fix style errors. --- skfda/_utils/_utils.py | 5 ++--- skfda/representation/_functional_data.py | 9 ++------- skfda/representation/basis/_basis.py | 1 - skfda/representation/basis/_constant.py | 2 +- skfda/representation/basis/_fdatabasis.py | 12 +++++++++--- skfda/representation/basis/_monomial.py | 2 +- 6 files changed, 15 insertions(+), 16 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 69ac94a11..99e30d90d 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -110,7 +110,6 @@ def _to_grid_points(grid_points_like: GridPointsLike) -> GridPoints: In any other case the behaviour is unespecified. """ - unidimensional = False try: @@ -125,8 +124,8 @@ def _to_grid_points(grid_points_like: GridPointsLike) -> GridPoints: if unidimensional: return (_int_to_real(np.asarray(grid_points_like)),) - else: - return tuple(_int_to_real(np.asarray(i)) for i in grid_points_like) + + return tuple(_int_to_real(np.asarray(i)) for i in grid_points_like) def _to_domain_range(sequence: DomainRangeLike) -> DomainRange: diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index b2da39c8e..e5b70865b 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -601,12 +601,7 @@ def copy( sample_names: Optional[LabelTupleLike] = None, extrapolation: Optional[Union[str, Evaluator]] = None, ) -> T: - """Make a copy of the object. - - Returns: - A copy of the FData object. - - """ + """Make a copy of the object.""" pass @abstractmethod # noqa: WPS125 @@ -741,7 +736,7 @@ def compose( fd: T, *, eval_points: np.ndarray = None, - ) -> T: + ) -> FData: """Composition of functions. Performs the composition of functions. diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index 8fc6dd33e..db17e34a0 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -3,7 +3,6 @@ from __future__ import annotations import copy -import numbers import warnings from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any, Optional, Tuple, TypeVar, Union diff --git a/skfda/representation/basis/_constant.py b/skfda/representation/basis/_constant.py index cb50cfc60..3d49af323 100644 --- a/skfda/representation/basis/_constant.py +++ b/skfda/representation/basis/_constant.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Tuple, TypeVar +from typing import Optional, Tuple, TypeVar import numpy as np diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index f8171e4ef..77472c354 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -480,7 +480,7 @@ def var(self: T, eval_points: Optional[np.ndarray] = None) -> T: """ return self.to_grid(eval_points).var().to_basis(self.basis) - def cov(self, eval_points: Optional[np.ndarray] = None): + def cov(self, eval_points: Optional[np.ndarray] = None) -> FData: """Compute the covariance of the functional data object. A numerical approach its used. The object its transformed into its @@ -588,7 +588,7 @@ def to_basis( if basis == self.basis: return self.copy() - return self.to_grid(eval_points=eval_points).to_basis(basis, **kwargs) + return self.to_grid(grid_points=eval_points).to_basis(basis, **kwargs) def copy( self: T, @@ -763,7 +763,13 @@ def concatenate( sample_names=sum(sample_names, ()), ) - def compose(self, fd, *, eval_points=None, **kwargs): + def compose( + self, + fd: FData, + *, + eval_points: np.ndarray = None, + **kwargs: Any + ) -> FData: """ Composition of functions. diff --git a/skfda/representation/basis/_monomial.py b/skfda/representation/basis/_monomial.py index 9cd76ed9a..811fc4d36 100644 --- a/skfda/representation/basis/_monomial.py +++ b/skfda/representation/basis/_monomial.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Tuple, TypeVar +from typing import Tuple, TypeVar import numpy as np import scipy.linalg From 3fe2ba9237c8e7adeab94430713f72a017eb1b85 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Fri, 8 Jan 2021 20:41:48 +0100 Subject: [PATCH 023/417] Fix trailing comma. --- skfda/representation/basis/_fdatabasis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 77472c354..c62d9de6a 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -768,7 +768,7 @@ def compose( fd: FData, *, eval_points: np.ndarray = None, - **kwargs: Any + **kwargs: Any, ) -> FData: """ Composition of functions. From b7a723ce7e0cd1a6e5d928a04a91d4311cfecb2f Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sat, 9 Jan 2021 20:54:15 +0100 Subject: [PATCH 024/417] Deprecate basic math functions. --- skfda/misc/_math.py | 151 +++++++++++++++++++++++++--------- skfda/representation/grid.py | 32 ++++++- tests/test_fdatagrid_numpy.py | 78 ++++++++++++------ 3 files changed, 195 insertions(+), 66 deletions(-) diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 8af2af8c2..73cbb05a8 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -4,19 +4,18 @@ package. FDataBasis and FDataGrid. """ +import warnings from builtins import isinstance -from typing import Union +from typing import Any, TypeVar, Union, cast import multimethod -import scipy.integrate - import numpy as np +import scipy.integrate -from .._utils import _same_domain, nquad_vec, _pairwise_commutative -from ..representation import FDataGrid, FDataBasis +from .._utils import _same_domain, nquad_vec +from ..representation import FDataBasis, FDataGrid from ..representation.basis import Basis - __author__ = "Miguel Carbajo Berrocal" __license__ = "GPL3" __version__ = "" @@ -24,111 +23,176 @@ __email__ = "" __status__ = "Development" +Vector = TypeVar("Vector") + -def sqrt(fdatagrid): +def sqrt(fdatagrid: FDataGrid) -> FDataGrid: """Perform a element wise square root operation. + .. deprecated:: 0.6 + Use :func:`numpy.sqrt` function instead. + Args: - fdatagrid (FDataGrid): Object to whose elements the square root + fdatagrid: Object to whose elements the square root operation is going to be applied. Returns: - FDataGrid: Object whose elements are the square roots of the original. + FDataGrid object whose elements are the square roots of the original. """ - return fdatagrid.copy(data_matrix=np.sqrt(fdatagrid.data_matrix)) + warnings.warn( + "Function sqrt is deprecated. Use numpy.sqrt with a FDataGrid " + "parameter instead.", + DeprecationWarning, + ) + + return cast(FDataGrid, np.sqrt(fdatagrid)) -def absolute(fdatagrid): +def absolute(fdatagrid: FDataGrid) -> FDataGrid: """Get the absolute value of all elements in the FDataGrid object. + .. deprecated:: 0.6 + Use :func:`numpy.absolute` function instead. + Args: - fdatagrid (FDataGrid): Object from whose elements the absolute value + fdatagrid: Object from whose elements the absolute value is going to be retrieved. Returns: - FDataGrid: Object whose elements are the absolute values of the + FDataGrid object whose elements are the absolute values of the original. """ - return fdatagrid.copy(data_matrix=np.absolute(fdatagrid.data_matrix)) + warnings.warn( + "Function absolute is deprecated. Use numpy.absolute with a FDataGrid " + "parameter instead.", + DeprecationWarning, + ) + return cast(FDataGrid, np.absolute(fdatagrid)) -def round(fdatagrid, decimals=0): + +def round(fdatagrid: FDataGrid, decimals: int = 0) -> FDataGrid: """Round all elements of the object. + .. deprecated:: 0.6 + Use :func:`numpy.round` function instead. + Args: - fdatagrid (FDataGrid): Object to whose elements are going to be + fdatagrid: Object to whose elements are going to be rounded. - decimals (int, optional): Number of decimals wanted. Defaults to 0. + decimals: Number of decimals wanted. Defaults to 0. Returns: - FDataGrid: Object whose elements are rounded. + FDataGrid object whose elements are rounded. """ - return fdatagrid.round(decimals) + warnings.warn( + "Function round is deprecated. Use numpy.round with a FDataGrid " + "parameter instead.", + DeprecationWarning, + ) + return cast(FDataGrid, np.round(fdatagrid, decimals)) -def exp(fdatagrid): + +def exp(fdatagrid: FDataGrid) -> FDataGrid: """Perform a element wise exponential operation. + .. deprecated:: 0.6 + Use :func:`numpy.exp` function instead. + Args: - fdatagrid (FDataGrid): Object to whose elements the exponential + fdatagrid: Object to whose elements the exponential operation is going to be applied. Returns: - FDataGrid: Object whose elements are the result of exponentiating + FDataGrid object whose elements are the result of exponentiating the elements of the original. """ - return fdatagrid.copy(data_matrix=np.exp(fdatagrid.data_matrix)) + warnings.warn( + "Function exp is deprecated. Use numpy.exp with a FDataGrid " + "parameter instead.", + DeprecationWarning, + ) + + return cast(FDataGrid, np.exp(fdatagrid)) -def log(fdatagrid): +def log(fdatagrid: FDataGrid) -> FDataGrid: """Perform a element wise logarithm operation. + .. deprecated:: 0.6 + Use :func:`numpy.log` function instead. + Args: - fdatagrid (FDataGrid): Object to whose elements the logarithm + fdatagrid: Object to whose elements the logarithm operation is going to be applied. Returns: - FDataGrid: Object whose elements are the logarithm of the original. + FDataGrid object whose elements are the logarithm of the original. """ - return fdatagrid.copy(data_matrix=np.log(fdatagrid.data_matrix)) + warnings.warn( + "Function log is deprecated. Use numpy.log with a FDataGrid " + "parameter instead.", + DeprecationWarning, + ) + + return cast(FDataGrid, np.log(fdatagrid)) -def log10(fdatagrid): +def log10(fdatagrid: FDataGrid) -> FDataGrid: """Perform an element wise base 10 logarithm operation. + .. deprecated:: 0.6 + Use :func:`numpy.log10` function instead. + Args: - fdatagrid (FDataGrid): Object to whose elements the base 10 logarithm + fdatagrid: Object to whose elements the base 10 logarithm operation is going to be applied. Returns: - FDataGrid: Object whose elements are the base 10 logarithm of the + FDataGrid object whose elements are the base 10 logarithm of the original. """ - return fdatagrid.copy(data_matrix=np.log10(fdatagrid.data_matrix)) + warnings.warn( + "Function log10 is deprecated. Use numpy.log10 with a FDataGrid " + "parameter instead.", + DeprecationWarning, + ) + + return cast(FDataGrid, np.log10(fdatagrid)) -def log2(fdatagrid): +def log2(fdatagrid: FDataGrid) -> FDataGrid: """Perform an element wise binary logarithm operation. + .. deprecated:: 0.6 + Use :func:`numpy.log2` function instead. + Args: - fdatagrid (FDataGrid): Object to whose elements the binary logarithm + fdatagrid: Object to whose elements the binary logarithm operation is going to be applied. Returns: - FDataGrid: Object whose elements are the binary logarithm of the + FDataGrid object whose elements are the binary logarithm of the original. """ - return fdatagrid.copy(data_matrix=np.log2(fdatagrid.data_matrix)) + warnings.warn( + "Function log2 is deprecated. Use numpy.log2 with a FDataGrid " + "parameter instead.", + DeprecationWarning, + ) + + return cast(FDataGrid, np.log2(fdatagrid)) -def cumsum(fdatagrid): +def cumsum(fdatagrid: FDataGrid) -> FDataGrid: """Return the cumulative sum of the samples. Args: @@ -144,7 +208,13 @@ def cumsum(fdatagrid): @multimethod.multidispatch -def inner_product(arg1, arg2, *, matrix=False, **kwargs): +def inner_product( + arg1, + arg2, + *, + matrix=False, + **kwargs, +): r"""Return the usual (:math:`L_2`) inner product. Calculates the inner product between matching samples in two @@ -255,7 +325,12 @@ def inner_product(arg1, arg2, *, matrix=False, **kwargs): @inner_product.register -def inner_product_fdatagrid(arg1: FDataGrid, arg2: FDataGrid, *, matrix=False): +def inner_product_fdatagrid( + arg1: FDataGrid, + arg2: FDataGrid, + *, + matrix=False +): if not np.array_equal(arg1.grid_points, arg2.grid_points): diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 06ada531d..2c2da86fe 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -208,9 +208,16 @@ def __init__( # noqa: WPS211 sample_names=sample_names, ) - def round(self: T, decimals: int = 0) -> T: # noqa: WPS125 + def round( # noqa: WPS125 + self, + decimals: int = 0, + out: Optional[FDataGrid] = None, + ) -> FDataGrid: """Evenly round to the given number of decimals. + .. deprecated:: 0.6 + Use :func:`numpy.round` function instead. + Args: decimals: Number of decimal places to round to. If decimals is negative, it specifies the number of @@ -221,7 +228,28 @@ def round(self: T, decimals: int = 0) -> T: # noqa: WPS125 in its data_matrix are rounded. """ - return self.copy(data_matrix=self.data_matrix.round(decimals)) + out_matrix = None if out is None else out.data_matrix + + if ( + out is not None + and ( + self.domain_range != out.domain_range + or not all( + np.array_equal(a, b) + for a, b in zip(self.grid_points, out.grid_points) + ) + or self.data_matrix.shape != out.data_matrix.shape + ) + ): + raise ValueError("out parameter is not valid") + + data_matrix = np.round( + self.data_matrix, + decimals=decimals, + out=out_matrix + ) + + return self.copy(data_matrix=data_matrix) if out is None else out @property def sample_points(self) -> GridPoints: diff --git a/tests/test_fdatagrid_numpy.py b/tests/test_fdatagrid_numpy.py index e58d5396d..d54f902b4 100644 --- a/tests/test_fdatagrid_numpy.py +++ b/tests/test_fdatagrid_numpy.py @@ -1,47 +1,73 @@ -from skfda import FDataGrid +"""Tests of compatibility between numpy ufuncs and FDataGrid.""" + import unittest +from typing import Any, Callable, TypeVar + import numpy as np +import pytest + +from skfda import FDataGrid + + +@pytest.fixture(params=[ + np.sqrt, + np.absolute, + np.round, + np.exp, + np.log, + np.log10, + np.log2, +]) +def monary(request: Any) -> Any: + """ + A fixture providing the monary function to validate. + + Not all of them are ufuncs. + + """ + return request.param + +T = TypeVar("T", np.ndarray, FDataGrid) -class TestFDataGridNumpy(unittest.TestCase): - def test_monary_ufunc(self): - data_matrix = np.arange(15).reshape(3, 5) +def test_monary_ufuncs(monary: Callable[[T], T]) -> None: + """Test that unary ufuncs can be applied to FDataGrid.""" + data_matrix = np.arange(15).reshape(3, 5) + 1 - fd = FDataGrid(data_matrix) + fd = FDataGrid(data_matrix) - fd_sqrt = np.sqrt(fd) + fd_monary = monary(fd) - fd_sqrt_build = FDataGrid(np.sqrt(data_matrix)) + fd_monary_build = FDataGrid(monary(data_matrix)) - self.assertTrue(fd_sqrt.equals(fd_sqrt_build)) + assert fd_monary.equals(fd_monary_build) - def test_binary_ufunc(self): - data_matrix = np.arange(15).reshape(3, 5) - data_matrix2 = 2 * np.arange(15).reshape(3, 5) - fd = FDataGrid(data_matrix) - fd2 = FDataGrid(data_matrix2) +def test_binary_ufunc() -> None: + """Test that binary ufuncs can be applied to FDataGrid.""" + data_matrix = np.arange(15).reshape(3, 5) + data_matrix2 = 2 * np.arange(15).reshape(3, 5) - fd_mul = np.multiply(fd, fd2) + fd = FDataGrid(data_matrix) + fd2 = FDataGrid(data_matrix2) - fd_mul_build = FDataGrid(data_matrix * data_matrix2) + fd_mul = np.multiply(fd, fd2) - self.assertTrue(fd_mul.equals(fd_mul_build)) + fd_mul_build = FDataGrid(data_matrix * data_matrix2) - def test_out_ufunc(self): - data_matrix = np.arange(15.).reshape(3, 5) - data_matrix_copy = np.copy(data_matrix) + assert fd_mul.equals(fd_mul_build) - fd = FDataGrid(data_matrix) - np.sqrt(fd, out=fd) +def test_out_ufunc(monary) -> None: + """Test that the out parameter of ufuncs work for FDataGrid.""" + data_matrix = np.arange(15).reshape(3, 5) + 1 + data_matrix_copy = np.copy(data_matrix) - fd_sqrt_build = FDataGrid(np.sqrt(data_matrix_copy)) + fd = FDataGrid(data_matrix) - self.assertTrue(fd.equals(fd_sqrt_build)) + monary(fd, out=fd) + fd_monary_build = FDataGrid(monary(data_matrix_copy)) -if __name__ == '__main__': - print() - unittest.main() + assert fd.equals(fd_monary_build) From b4627fad0310e4d5c8fd5ab551e151c9d7489f7f Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sun, 10 Jan 2021 00:15:18 +0100 Subject: [PATCH 025/417] Add typing to math. --- skfda/_utils/_utils.py | 18 +++++++++++++--- skfda/misc/_math.py | 47 ++++++++++++++++++++++++++---------------- 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 99e30d90d..ccde70bff 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -4,7 +4,16 @@ import functools import numbers -from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Optional, + Sequence, + Tuple, + Union, + cast, +) import numpy as np import scipy.integrate @@ -396,11 +405,14 @@ def _evaluate_grid( return res -def nquad_vec(func, ranges): +def nquad_vec( + func: Callable[[np.ndarray], np.ndarray], + ranges: Sequence[Tuple[float, float]], +) -> np.ndarray: initial_depth = len(ranges) - 1 - def integrate(*args, depth): + def integrate(*args: Any, depth: int) -> np.ndarray: if depth == 0: f = functools.partial(func, *args) diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 73cbb05a8..42d8f5c49 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -6,14 +6,14 @@ """ import warnings from builtins import isinstance -from typing import Any, TypeVar, Union, cast +from typing import Any, Optional, TypeVar, Union, cast import multimethod import numpy as np import scipy.integrate from .._utils import _same_domain, nquad_vec -from ..representation import FDataBasis, FDataGrid +from ..representation import FData, FDataBasis, FDataGrid from ..representation.basis import Basis __author__ = "Miguel Carbajo Berrocal" @@ -209,12 +209,12 @@ def cumsum(fdatagrid: FDataGrid) -> FDataGrid: @multimethod.multidispatch def inner_product( - arg1, - arg2, + arg1: Any, + arg2: Any, *, matrix=False, **kwargs, -): +) -> np.ndarray: r"""Return the usual (:math:`L_2`) inner product. Calculates the inner product between matching samples in two @@ -329,8 +329,8 @@ def inner_product_fdatagrid( arg1: FDataGrid, arg2: FDataGrid, *, - matrix=False -): + matrix=False, +) -> np.ndarray: if not np.array_equal(arg1.grid_points, arg2.grid_points): @@ -371,12 +371,14 @@ def inner_product_fdatagrid( @inner_product.register(FDataBasis, Basis) @inner_product.register(Basis, FDataBasis) @inner_product.register(Basis, Basis) -def inner_product_fdatabasis(arg1: Union[FDataBasis, Basis], - arg2: Union[FDataBasis, Basis], - *, - matrix=False, - inner_product_matrix=None, - force_numerical=False): +def inner_product_fdatabasis( + arg1: Union[FDataBasis, Basis], + arg2: Union[FDataBasis, Basis], + *, + matrix=False, + inner_product_matrix=None, + force_numerical=False, +) -> np.ndarray: if not _same_domain(arg1, arg2): raise ValueError("Both Objects should have the same domain_range") @@ -425,15 +427,20 @@ def inner_product_fdatabasis(arg1: Union[FDataBasis, Basis], return _inner_product_integrate(arg1, arg2, matrix=matrix) -def _inner_product_integrate(arg1, arg2, *, matrix=False): +def _inner_product_integrate( + arg1: FData, + arg2: FData, + *, + matrix: bool = False, +) -> np.ndarray: if not np.array_equal(arg1.domain_range, arg2.domain_range): raise ValueError("Domain range for both objects must be equal") - def integrand(*args): - f1 = arg1([*args])[:, 0, :] - f2 = arg2([*args])[:, 0, :] + def integrand(*args: np.ndarray) -> np.ndarray: + f1 = arg1(args)[:, 0, :] + f2 = arg2(args)[:, 0, :] if matrix: ret = np.einsum('n...,m...->nm...', f1, f2) @@ -454,7 +461,11 @@ def integrand(*args): return summation -def inner_product_matrix(arg1, arg2=None, **kwargs): +def inner_product_matrix( + arg1: Vector, + arg2: Optional[Vector] = None, + **kwargs: Any +) -> np.ndarray: """ Returns the inner product matrix between is arguments. From ae09501d9c831704cef227b1aeee2d1447c8191c Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sun, 10 Jan 2021 03:34:46 +0100 Subject: [PATCH 026/417] Allow None in FDataBasis to_basis. --- skfda/representation/basis/_fdatabasis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index c62d9de6a..492c388b0 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -566,7 +566,7 @@ def to_grid( def to_basis( self, - basis: Basis, + basis: Optional[Basis] = None, eval_points: Optional[np.ndarray] = None, **kwargs: Any, ) -> FDataBasis: @@ -585,7 +585,7 @@ def to_basis( Basis representation of the funtional data object. """ - if basis == self.basis: + if basis is None or basis == self.basis: return self.copy() return self.to_grid(grid_points=eval_points).to_basis(basis, **kwargs) From 48baacbc3aaa75160fea8083147dba3ba928628d Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 12 Jan 2021 14:14:21 +0100 Subject: [PATCH 027/417] Fix style in math module. --- setup.cfg | 2 +- skfda/misc/_math.py | 145 ++++++++++++++++++++++++-------------------- 2 files changed, 80 insertions(+), 67 deletions(-) diff --git a/setup.cfg b/setup.cfg index dd1df4f64..9ff2a35d4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -102,7 +102,7 @@ max-methods = 30 max-local-variables = 15 max-expressions = 15 max-module-expressions = 15 -max-module-members = 10 +max-module-members = 15 max-string-usages = 10 max-cognitive-score = 30 diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 42d8f5c49..594c607c3 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -16,13 +16,6 @@ from ..representation import FData, FDataBasis, FDataGrid from ..representation.basis import Basis -__author__ = "Miguel Carbajo Berrocal" -__license__ = "GPL3" -__version__ = "" -__maintainer__ = "" -__email__ = "" -__status__ = "Development" - Vector = TypeVar("Vector") @@ -73,7 +66,10 @@ def absolute(fdatagrid: FDataGrid) -> FDataGrid: return cast(FDataGrid, np.absolute(fdatagrid)) -def round(fdatagrid: FDataGrid, decimals: int = 0) -> FDataGrid: +def round( # noqa: WPS125 + fdatagrid: FDataGrid, + decimals: int = 0, +) -> FDataGrid: """Round all elements of the object. .. deprecated:: 0.6 @@ -203,8 +199,9 @@ def cumsum(fdatagrid: FDataGrid) -> FDataGrid: FDataGrid: Object with the sample wise cumulative sum. """ - return fdatagrid.copy(data_matrix=np.cumsum(fdatagrid.data_matrix, - axis=0)) + return fdatagrid.copy( + data_matrix=np.cumsum(fdatagrid.data_matrix, axis=0), + ) @multimethod.multidispatch @@ -212,8 +209,7 @@ def inner_product( arg1: Any, arg2: Any, *, - matrix=False, - **kwargs, + _matrix=False, ) -> np.ndarray: r"""Return the usual (:math:`L_2`) inner product. @@ -236,17 +232,14 @@ def inner_product( contain only one sample (and will be broadcasted). Args: - arg1: First sample. arg2: Second sample. Returns: - numpy.darray: Vector with the inner products of each pair of samples. Examples: - This function can compute the multivariate inner product. >>> import numpy as np @@ -316,24 +309,27 @@ def inner_product( array([ 0.5 , 0.25]) """ - if callable(arg1): - return _inner_product_integrate(arg1, arg2, matrix=matrix) - else: - return (np.einsum('n...,m...->nm...', arg1, arg2).sum(axis=-1) - if matrix else (arg1 * arg2).sum(axis=-1)) + return _inner_product_integrate(arg1, arg2, _matrix=_matrix) + + return ( + np.einsum('n...,m...->nm...', arg1, arg2).sum(axis=-1) + if _matrix else (arg1 * arg2).sum(axis=-1) + ) @inner_product.register -def inner_product_fdatagrid( +def _inner_product_fdatagrid( arg1: FDataGrid, arg2: FDataGrid, *, - matrix=False, + _matrix=False, ) -> np.ndarray: - if not np.array_equal(arg1.grid_points, - arg2.grid_points): + if not np.array_equal( + arg1.grid_points, + arg2.grid_points, + ): raise ValueError("Sample points for both objects must be equal") d1 = arg1.data_matrix @@ -341,7 +337,7 @@ def inner_product_fdatagrid( einsum_broadcast_list = (np.arange(d1.ndim - 1) + 2).tolist() - if matrix: + if _matrix: d1 = np.copy(d1) @@ -352,30 +348,35 @@ def inner_product_fdatagrid( index = (slice(None),) + (np.newaxis,) * (i + 1) d1 *= weights[index] - return np.einsum(d1, [0] + einsum_broadcast_list, - d2, [1] + einsum_broadcast_list, - [0, 1]) + return np.einsum( + d1, + [0] + einsum_broadcast_list, + d2, + [1] + einsum_broadcast_list, + [0, 1], + ) - else: - integrand = d1 * d2 + integrand = d1 * d2 - for s in arg1.grid_points[::-1]: - integrand = scipy.integrate.simps(integrand, - x=s, - axis=-2) + for g in arg1.grid_points[::-1]: + integrand = scipy.integrate.simps( + integrand, + x=g, + axis=-2, + ) - return np.sum(integrand, axis=-1) + return np.sum(integrand, axis=-1) @inner_product.register(FDataBasis, FDataBasis) @inner_product.register(FDataBasis, Basis) @inner_product.register(Basis, FDataBasis) @inner_product.register(Basis, Basis) -def inner_product_fdatabasis( +def _inner_product_fdatabasis( arg1: Union[FDataBasis, Basis], arg2: Union[FDataBasis, Basis], *, - matrix=False, + _matrix=False, inner_product_matrix=None, force_numerical=False, ) -> np.ndarray: @@ -402,12 +403,15 @@ def inner_product_fdatabasis( # The number of operations is less using the matrix n_ops_best_with_matrix = max( - arg1.n_samples, arg2.n_samples) > arg1.n_basis * arg2.n_basis + arg1.n_samples, + arg2.n_samples, + ) > arg1.n_basis * arg2.n_basis if not force_numerical and ( - inner_product_matrix is not None - or same_basis - or n_ops_best_with_matrix): + inner_product_matrix is not None + or same_basis + or n_ops_best_with_matrix + ): if inner_product_matrix is None: inner_product_matrix = arg1.basis.inner_product_matrix(arg2.basis) @@ -415,47 +419,54 @@ def inner_product_fdatabasis( coef1 = arg1.coefficients coef2 = arg2.coefficients - if matrix: - return np.einsum('nb,bc,mc->nm', - coef1, inner_product_matrix, coef2) - else: - return (coef1 @ - inner_product_matrix * - coef2).sum(axis=-1) + if _matrix: + return np.einsum( + 'nb,bc,mc->nm', + coef1, + inner_product_matrix, + coef2, + ) - else: - return _inner_product_integrate(arg1, arg2, matrix=matrix) + return ( + coef1 + @ inner_product_matrix + * coef2 + ).sum(axis=-1) + + return _inner_product_integrate(arg1, arg2, _matrix=_matrix) def _inner_product_integrate( arg1: FData, arg2: FData, *, - matrix: bool = False, + _matrix: bool = False, ) -> np.ndarray: - if not np.array_equal(arg1.domain_range, - arg2.domain_range): + if not np.array_equal( + arg1.domain_range, + arg2.domain_range, + ): raise ValueError("Domain range for both objects must be equal") - def integrand(*args: np.ndarray) -> np.ndarray: + def integrand(*args: np.ndarray) -> np.ndarray: # noqa: WPS430 f1 = arg1(args)[:, 0, :] f2 = arg2(args)[:, 0, :] - if matrix: + if _matrix: ret = np.einsum('n...,m...->nm...', f1, f2) - ret = ret.reshape((-1,) + ret.shape[2:]) - return ret - else: - return f1 * f2 + return ret.reshape((-1,) + ret.shape[2:]) + + return f1 * f2 integral = nquad_vec( integrand, - arg1.domain_range) + arg1.domain_range, + ) summation = np.sum(integral, axis=-1) - if matrix: + if _matrix: summation = summation.reshape((len(arg1), len(arg2))) return summation @@ -464,20 +475,22 @@ def integrand(*args: np.ndarray) -> np.ndarray: def inner_product_matrix( arg1: Vector, arg2: Optional[Vector] = None, - **kwargs: Any + **kwargs: Any, ) -> np.ndarray: """ - Returns the inner product matrix between is arguments. + Return the inner product matrix between is arguments. If arg2 is ``None`` returns the Gram matrix. Args: - arg1: First sample. arg2: Second sample. + kwargs: Keyword arguments for inner product. - """ + Returns: + Inner product matrix between samples. + """ if isinstance(arg1, Basis): arg1 = arg1.to_basis() if isinstance(arg2, Basis): @@ -486,4 +499,4 @@ def inner_product_matrix( if arg2 is None: arg2 = arg1 - return inner_product(arg1, arg2, matrix=True, **kwargs) + return inner_product(arg1, arg2, _matrix=True, **kwargs) From 6fe7144c7a8a54b222d448520f11ec94b5495d22 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 12 Jan 2021 16:38:23 +0100 Subject: [PATCH 028/417] Fix style errors. --- setup.cfg | 2 ++ skfda/_utils/_utils.py | 3 ++- skfda/representation/grid.py | 3 ++- tests/test_fdatagrid_numpy.py | 4 ++-- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 9ff2a35d4..ee1ec96cc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,6 +27,8 @@ ignore = # Google Python style is not RST until after processed by Napoleon # See https://github.com/peterjc/flake8-rst-docstrings/issues/17 RST201, RST203, RST301, + # assert is used by pytest tests + S101, # Line break occurred before a binary operator (antipattern) W503, # Short names like X or y are common in scikit-learn diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index ccde70bff..d889652a0 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -409,10 +409,11 @@ def nquad_vec( func: Callable[[np.ndarray], np.ndarray], ranges: Sequence[Tuple[float, float]], ) -> np.ndarray: + """Perform multiple integration of vector valued functions.""" initial_depth = len(ranges) - 1 - def integrate(*args: Any, depth: int) -> np.ndarray: + def integrate(*args: Any, depth: int) -> np.ndarray: # noqa: WPS430 if depth == 0: f = functools.partial(func, *args) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 2c2da86fe..5475cda49 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -222,6 +222,7 @@ def round( # noqa: WPS125 decimals: Number of decimal places to round to. If decimals is negative, it specifies the number of positions to the left of the decimal point. Defaults to 0. + out: FDataGrid where to place the result, if any. Returns: Returns a FDataGrid object where all elements @@ -246,7 +247,7 @@ def round( # noqa: WPS125 data_matrix = np.round( self.data_matrix, decimals=decimals, - out=out_matrix + out=out_matrix, ) return self.copy(data_matrix=data_matrix) if out is None else out diff --git a/tests/test_fdatagrid_numpy.py b/tests/test_fdatagrid_numpy.py index d54f902b4..ef3455844 100644 --- a/tests/test_fdatagrid_numpy.py +++ b/tests/test_fdatagrid_numpy.py @@ -20,7 +20,7 @@ ]) def monary(request: Any) -> Any: """ - A fixture providing the monary function to validate. + Fixture providing the monary function to validate. Not all of them are ufuncs. @@ -59,7 +59,7 @@ def test_binary_ufunc() -> None: assert fd_mul.equals(fd_mul_build) -def test_out_ufunc(monary) -> None: +def test_out_ufunc(monary: Callable[..., Any]) -> None: """Test that the out parameter of ufuncs work for FDataGrid.""" data_matrix = np.arange(15).reshape(3, 5) + 1 data_matrix_copy = np.copy(data_matrix) From 35075eb64bd49d9c5f35130ad397756875bf589e Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 12 Jan 2021 18:35:31 +0100 Subject: [PATCH 029/417] Fix blank line after docstring. --- skfda/_utils/_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index d889652a0..e9c7fb26c 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -410,7 +410,6 @@ def nquad_vec( ranges: Sequence[Tuple[float, float]], ) -> np.ndarray: """Perform multiple integration of vector valued functions.""" - initial_depth = len(ranges) - 1 def integrate(*args: Any, depth: int) -> np.ndarray: # noqa: WPS430 From 73b3f4cdbc333c53dbbd7cdf97dab4541622d4e4 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Wed, 13 Jan 2021 17:35:12 +0100 Subject: [PATCH 030/417] Typing and style for multivariate depth functions. --- skfda/exploratory/depth/multivariate.py | 215 ++++++++++++++---------- 1 file changed, 130 insertions(+), 85 deletions(-) diff --git a/skfda/exploratory/depth/multivariate.py b/skfda/exploratory/depth/multivariate.py index 3930526f2..346c445e3 100644 --- a/skfda/exploratory/depth/multivariate.py +++ b/skfda/exploratory/depth/multivariate.py @@ -1,76 +1,101 @@ +"""Depth and outlyingness ABCs and implementations for multivariate data.""" + +from __future__ import annotations + import abc import math -from scipy.special import comb +from typing import Generic, Optional, TypeVar +import numpy as np import scipy.stats import sklearn +from scipy.special import comb +from typing_extensions import Literal -import numpy as np - +T = TypeVar("T") +SelfType = TypeVar("SelfType") +_Side = Literal["left", "right"] -class _DepthOrOutlyingness(abc.ABC, sklearn.base.BaseEstimator): - """ - Abstract class representing a depth or outlyingness function. - """ +class _DepthOrOutlyingness( + abc.ABC, + sklearn.base.BaseEstimator, # type: ignore + Generic[T], +): + """Abstract class representing a depth or outlyingness function.""" - def fit(self, X, y=None): + def fit(self: SelfType, X: T, y: None = None) -> SelfType: """ Learn the distribution from the observations. Args: X: Functional dataset from which the distribution of the data is - inferred. + inferred. y: Unused. Kept only for convention. Returns: - self: Fitted estimator. + Fitted estimator. """ return self @abc.abstractmethod - def predict(self, X): + def predict(self, X: T) -> np.ndarray: """ Compute the depth or outlyingness inside the learned distribution. Args: X: Points whose depth is going to be evaluated. + Returns: + Depth of each observation. + """ pass - def fit_predict(self, X, y=None): + def fit_predict(self, X: T, y: None = None) -> np.ndarray: """ - Compute the depth or outlyingness of each observation with respect to - the whole dataset. + Compute the depth or outlyingness of each observation. + + This computation is done with respect to the whole dataset. Args: X: Dataset. y: Unused. Kept only for convention. + Returns: + Depth of each observation. + """ return self.fit(X).predict(X) - def __call__(self, X, *, distribution=None): + def __call__( + self, + X: T, + *, + distribution: Optional[T] = None, + ) -> np.ndarray: """ - Allows the depth or outlyingness to be used as a function. + Allow the depth or outlyingness to be used as a function. Args: X: Points whose depth is going to be evaluated. distribution: Functional dataset from which the distribution of the data is inferred. If ``None`` it is the same as ``X``. + Returns: + Depth of each observation. + """ copy = sklearn.base.clone(self) if distribution is None: return copy.fit_predict(X) - else: - return copy.fit(distribution).predict(X) - @property - def max(self): + return copy.fit(distribution).predict(X) + + @property # noqa: WPS125 + def max(self) -> float: """ Maximum (or supremum if there is no maximum) of the possibly predicted values. @@ -78,8 +103,8 @@ def max(self): """ return 1 - @property - def min(self): + @property # noqa: WPS125 + def min(self) -> float: """ Minimum (or infimum if there is no maximum) of the possibly predicted values. @@ -88,41 +113,41 @@ def min(self): return 0 -class Depth(_DepthOrOutlyingness): - """ - Abstract class representing a depth function. - - """ - pass - - -class Outlyingness(_DepthOrOutlyingness): - """ - Abstract class representing an outlyingness function. +class Depth(_DepthOrOutlyingness[T]): + """Abstract class representing a depth function.""" - """ - pass +class Outlyingness(_DepthOrOutlyingness[T]): + """Abstract class representing an outlyingness function.""" -def _searchsorted_one_dim(array, values, *, side='left'): - searched_index = np.searchsorted(array, values, side=side) - return searched_index +def _searchsorted_one_dim( + array: np.ndarray, + values: np.ndarray, + *, + side: _Side = 'left', +) -> np.ndarray: + return np.searchsorted(array, values, side=side) _searchsorted_vectorized = np.vectorize( _searchsorted_one_dim, signature='(n),(m),()->(m)', - excluded='side') + excluded='side', +) -def _searchsorted_ordered(array, values, *, side='left'): +def _searchsorted_ordered( + array: np.ndarray, + values: np.ndarray, + *, + side: _Side = 'left', +) -> np.ndarray: return _searchsorted_vectorized(array, values, side=side) -def _cumulative_distribution(column): - """Calculates the cumulative distribution function of the values passed to - the function and evaluates it at each point. +def _cumulative_distribution(column: np.ndarray) -> np.ndarray: + """Calculate the cumulative distribution function at each point. Args: column (numpy.darray): Array containing the values over which the @@ -137,11 +162,14 @@ def _cumulative_distribution(column): array([ 0.4, 0.9, 1. , 0.4, 0.6, 0.6, 0.9, 0.4, 0.4, 0.7]) """ - return _searchsorted_ordered(np.sort(column), column, - side='right') / len(column) + return _searchsorted_ordered( + np.sort(column), + column, + side='right', + ) / len(column) -class _UnivariateFraimanMuniz(Depth): +class _UnivariateFraimanMuniz(Depth[np.ndarray]): r""" Univariate depth used to compute the Fraiman an Muniz depth. @@ -157,24 +185,26 @@ class _UnivariateFraimanMuniz(Depth): """ - def fit(self, X, y=None): + def fit(self: SelfType, X: np.ndarray, y: None = None) -> SelfType: self._sorted_values = np.sort(X, axis=0) return self - def predict(self, X): + def predict(self, X: np.ndarray) -> np.ndarray: cum_dist = _searchsorted_ordered( np.moveaxis(self._sorted_values, 0, -1), - np.moveaxis(X, 0, -1), side='right') / len(self._sorted_values) + np.moveaxis(X, 0, -1), + side='right', + ) / len(self._sorted_values) assert cum_dist.shape[-2] == 1 return 1 - np.abs(0.5 - np.moveaxis(cum_dist, -1, 0)[..., 0]) - @property - def min(self): + @property # noqa: WPS125 + def min(self) -> float: return 1 / 2 -class SimplicialDepth(Depth): +class SimplicialDepth(Depth[np.ndarray]): r""" Simplicial depth. @@ -183,38 +213,46 @@ class SimplicialDepth(Depth): :math:`p + 1` points sampled from :math:`F` contains :math:`x`. References: - Liu, R. Y. (1990). On a Notion of Data Depth Based on Random Simplices. The Annals of Statistics, 18(1), 405–414. """ - def fit(self, X, y=None): + def fit( # noqa: D102 + self, + X: np.ndarray, + y: None = None, + ) -> SimplicialDepth: self._dim = X.shape[-1] if self._dim == 1: self.sorted_values = np.sort(X, axis=0) else: - raise NotImplementedError("SimplicialDepth is currently only " - "implemented for one-dimensional data.") + raise NotImplementedError( + "SimplicialDepth is currently only " + "implemented for one-dimensional data.", + ) return self - def predict(self, X): + def predict(self, X: np.ndarray) -> np.ndarray: # noqa: D102 assert self._dim == X.shape[-1] if self._dim == 1: positions_left = _searchsorted_ordered( np.moveaxis(self.sorted_values, 0, -1), - np.moveaxis(X, 0, -1)) + np.moveaxis(X, 0, -1), + ) positions_left = np.moveaxis(positions_left, -1, 0)[..., 0] positions_right = _searchsorted_ordered( np.moveaxis(self.sorted_values, 0, -1), - np.moveaxis(X, 0, -1), side='right') + np.moveaxis(X, 0, -1), + side='right', + ) positions_right = np.moveaxis(positions_right, -1, 0)[..., 0] @@ -223,11 +261,13 @@ def predict(self, X): total_pairs = comb(len(self.sorted_values), 2) - return (total_pairs - comb(num_strictly_below, 2) - - comb(num_strictly_above, 2)) / total_pairs + return ( + total_pairs - comb(num_strictly_below, 2) + - comb(num_strictly_above, 2) + ) / total_pairs -class OutlyingnessBasedDepth(Depth): +class OutlyingnessBasedDepth(Depth[T]): r""" Computes depth based on an outlyingness measure. @@ -249,34 +289,37 @@ class OutlyingnessBasedDepth(Depth): outlyingness (Outlyingness): Outlyingness object. References: - Serfling, R. (2006). Depth functions in nonparametric multivariate inference. DIMACS Series in Discrete Mathematics and Theoretical Computer Science, 72, 1. """ - def __init__(self, outlyingness): + def __init__(self, outlyingness: Outlyingness[T]): self.outlyingness = outlyingness - def fit(self, X, y=None): + def fit( # noqa: D102 + self, + X: T, + y: None = None, + ) -> OutlyingnessBasedDepth: self.outlyingness.fit(X) return self - def predict(self, X): + def predict(self, X: np.ndarray) -> np.ndarray: # noqa: D102 outlyingness_values = self.outlyingness.predict(X) min_val = self.outlyingness.min max_val = self.outlyingness.max - if(math.isinf(max_val)): + if math.isinf(max_val): return 1 / (1 + outlyingness_values - min_val) - else: - return 1 - (outlyingness_values - min_val) / (max_val - min_val) + + return 1 - (outlyingness_values - min_val) / (max_val - min_val) -class StahelDonohoOutlyingness(Outlyingness): +class StahelDonohoOutlyingness(Outlyingness[np.ndarray]): r""" Computes Stahel-Donoho outlyingness. @@ -290,44 +333,47 @@ class StahelDonohoOutlyingness(Outlyingness): median absolute deviation. References: - Zuo, Y., Cui, H., & He, X. (2004). On the Stahel-Donoho estimator and depth-weighted means of multivariate data. Annals of Statistics, 32(1), 167–188. https://doi.org/10.1214/aos/1079120132 """ - def fit(self, X, y=None): + def fit( # noqa: D102 + self, + X: np.ndarray, + y: None = None, + ) -> StahelDonohoOutlyingness: dim = X.shape[-1] if dim == 1: self._location = np.median(X, axis=0) - self._scale = scipy.stats.median_abs_deviation( - X, axis=0) + self._scale = scipy.stats.median_abs_deviation(X, axis=0) else: raise NotImplementedError("Only implemented for one dimension") return self - def predict(self, X): + def predict(self, X: np.ndarray) -> np.ndarray: # noqa: D102 dim = X.shape[-1] if dim == 1: # Special case, can be computed exactly - return (np.abs(X - self._location) / - self._scale)[..., 0] + return ( + np.abs(X - self._location) + / self._scale + )[..., 0] - else: - raise NotImplementedError("Only implemented for one dimension") + raise NotImplementedError("Only implemented for one dimension") - @property - def max(self): - return np.inf + @property # noqa: WPS125 + def max(self) -> float: + return math.inf -class ProjectionDepth(OutlyingnessBasedDepth): +class ProjectionDepth(OutlyingnessBasedDepth[np.ndarray]): r""" Computes Projection depth. @@ -338,12 +384,11 @@ class ProjectionDepth(OutlyingnessBasedDepth): :class:`StahelDonohoOutlyingness`: Stahel-Donoho outlyingness. References: - Zuo, Y., Cui, H., & He, X. (2004). On the Stahel-Donoho estimator and depth-weighted means of multivariate data. Annals of Statistics, 32(1), 167–188. https://doi.org/10.1214/aos/1079120132 """ - def __init__(self): + def __init__(self) -> None: super().__init__(outlyingness=StahelDonohoOutlyingness()) From 2950a854944ee2a638bc8dd274978c0402c1cdc5 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Wed, 13 Jan 2021 18:40:12 +0100 Subject: [PATCH 031/417] Typing and style for depth functions. --- skfda/exploratory/depth/_depth.py | 110 ++++++++++++++---------- skfda/exploratory/depth/multivariate.py | 2 +- 2 files changed, 66 insertions(+), 46 deletions(-) diff --git a/skfda/exploratory/depth/_depth.py b/skfda/exploratory/depth/_depth.py index 04e188413..e8dd197c4 100644 --- a/skfda/exploratory/depth/_depth.py +++ b/skfda/exploratory/depth/_depth.py @@ -1,23 +1,24 @@ -"""Depth Measures Module. +""" +Depth Measures Module. This module includes different methods to order functional data, -from the center (larger values) outwards(smaller ones).""" +from the center (larger values) outwards(smaller ones). -import itertools +""" +from __future__ import annotations -import scipy.integrate +import itertools +from typing import Optional import numpy as np +import scipy.integrate +from ... import FDataGrid from . import multivariate -from .multivariate import Depth +from .multivariate import Depth, _UnivariateFraimanMuniz -__author__ = "Amanda Hernando Bernabé" -__email__ = "amanda.hernando@estudiante.uam.es" - - -class IntegratedDepth(Depth): +class IntegratedDepth(Depth[FDataGrid]): r""" Functional depth as the integral of a multivariate depth. @@ -29,7 +30,6 @@ class IntegratedDepth(Depth): D(x) = 1 - \left\lvert \frac{1}{2}- F(x)\right\rvert Examples: - >>> import skfda >>> >>> data_matrix = [[1, 1, 2, 3, 2.5, 2], @@ -43,49 +43,66 @@ class IntegratedDepth(Depth): array([ 0.5 , 0.75 , 0.925, 0.875]) References: - Fraiman, R., & Muniz, G. (2001). Trimmed means for functional data. Test, 10(2), 419–440. https://doi.org/10.1007/BF02595706 """ - def __init__(self, *, - multivariate_depth=multivariate._UnivariateFraimanMuniz()): + def __init__( + self, + *, + multivariate_depth: Optional[Depth[np.ndarray]] = None, + ) -> None: self.multivariate_depth = multivariate_depth - def fit(self, X, y=None): + def fit( # noqa: D102 + self, + X: FDataGrid, + y: None = None, + ) -> IntegratedDepth: + + self.multivariate_depth_: Depth[np.ndarray] + + if self.multivariate_depth is None: + self.multivariate_depth_ = _UnivariateFraimanMuniz() + else: + self.multivariate_depth_ = self.multivariate_depth self._domain_range = X.domain_range self._grid_points = X.grid_points - self.multivariate_depth.fit(X.data_matrix) + self.multivariate_depth_.fit(X.data_matrix) return self - def predict(self, X): + def predict(self, X: FDataGrid) -> np.ndarray: # noqa: D102 - pointwise_depth = self.multivariate_depth.predict(X.data_matrix) + pointwise_depth = self.multivariate_depth_.predict(X.data_matrix) - interval_len = (self._domain_range[0][1] - - self._domain_range[0][0]) + interval_len = ( + self._domain_range[0][1] + - self._domain_range[0][0] + ) integrand = pointwise_depth for d, s in zip(X.domain_range, X.grid_points): - integrand = scipy.integrate.simps(integrand, - x=s, - axis=1) + integrand = scipy.integrate.simps( + integrand, + x=s, + axis=1, + ) interval_len = d[1] - d[0] integrand /= interval_len return integrand - @property - def max(self): - return self.multivariate_depth.max + @property # noqa: WPS125 + def max(self) -> float: + return self.multivariate_depth_.max - @property - def min(self): - return self.multivariate_depth.min + @property # noqa: WPS125 + def min(self) -> float: + return self.multivariate_depth_.min class ModifiedBandDepth(IntegratedDepth): @@ -99,7 +116,6 @@ class ModifiedBandDepth(IntegratedDepth): determine the bands. Examples: - >>> import skfda >>> >>> data_matrix = [[1, 1, 2, 3, 2.5, 2], @@ -114,18 +130,17 @@ class ModifiedBandDepth(IntegratedDepth): array([ 0.5 , 0.83, 0.73, 0.67]) References: - López-Pintado, S., & Romo, J. (2009). On the Concept of Depth for Functional Data. Journal of the American Statistical Association, 104(486), 718–734. https://doi.org/10.1198/jasa.2009.0108 """ - def __init__(self): + def __init__(self) -> None: super().__init__(multivariate_depth=multivariate.SimplicialDepth()) -class BandDepth(Depth): +class BandDepth(Depth[FDataGrid]): r""" Implementation of Band Depth for functional data. @@ -136,7 +151,6 @@ class BandDepth(Depth): hyperplanes determine the bands. Examples: - >>> import skfda >>> >>> data_matrix = [[1, 1, 2, 3, 2.5, 2], @@ -150,7 +164,6 @@ class BandDepth(Depth): array([ 0.5 , 0.83333333, 0.5 , 0.5 ]) References: - López-Pintado, S., & Romo, J. (2009). On the Concept of Depth for Functional Data. Journal of the American Statistical Association, 104(486), 718–734. @@ -158,31 +171,38 @@ class BandDepth(Depth): """ - def fit(self, X, y=None): + def fit(self, X: FDataGrid, y: None = None) -> BandDepth: # noqa: D102 if X.dim_codomain != 1: - raise NotImplementedError("Band depth not implemented for vector " - "valued functions") + raise NotImplementedError( + "Band depth not implemented for vector valued functions", + ) self._distribution = X return self - def predict(self, X): + def predict(self, X: FDataGrid) -> np.ndarray: # noqa: D102 num_in = 0 n_total = 0 for f1, f2 in itertools.combinations(self._distribution, 2): - between_range_1 = (f1.data_matrix <= X.data_matrix) & ( - X.data_matrix <= f2.data_matrix) + between_range_1 = ( + (f1.data_matrix <= X.data_matrix) + & (X.data_matrix <= f2.data_matrix) + ) - between_range_2 = (f2.data_matrix <= X.data_matrix) & ( - X.data_matrix <= f1.data_matrix) + between_range_2 = ( + (f2.data_matrix <= X.data_matrix) + & (X.data_matrix <= f1.data_matrix) + ) between_range = between_range_1 | between_range_2 - num_in += np.all(between_range, - axis=tuple(range(1, X.data_matrix.ndim))) + num_in += np.all( + between_range, + axis=tuple(range(1, X.data_matrix.ndim)), + ) n_total += 1 return num_in / n_total diff --git a/skfda/exploratory/depth/multivariate.py b/skfda/exploratory/depth/multivariate.py index 346c445e3..d98413c01 100644 --- a/skfda/exploratory/depth/multivariate.py +++ b/skfda/exploratory/depth/multivariate.py @@ -12,7 +12,7 @@ from scipy.special import comb from typing_extensions import Literal -T = TypeVar("T") +T = TypeVar("T", contravariant=True) SelfType = TypeVar("SelfType") _Side = Literal["left", "right"] From 58f3bb37481b387b5f93482583fc8a470edfeb36 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Wed, 13 Jan 2021 18:46:40 +0100 Subject: [PATCH 032/417] Fix style errors. --- skfda/exploratory/depth/_depth.py | 4 ++-- skfda/exploratory/depth/multivariate.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/depth/_depth.py b/skfda/exploratory/depth/_depth.py index e8dd197c4..30947e36f 100644 --- a/skfda/exploratory/depth/_depth.py +++ b/skfda/exploratory/depth/_depth.py @@ -97,11 +97,11 @@ def predict(self, X: FDataGrid) -> np.ndarray: # noqa: D102 return integrand @property # noqa: WPS125 - def max(self) -> float: + def max(self) -> float: # noqa: WPS125 return self.multivariate_depth_.max @property # noqa: WPS125 - def min(self) -> float: + def min(self) -> float: # noqa: WPS125 return self.multivariate_depth_.min diff --git a/skfda/exploratory/depth/multivariate.py b/skfda/exploratory/depth/multivariate.py index d98413c01..f7c1d5081 100644 --- a/skfda/exploratory/depth/multivariate.py +++ b/skfda/exploratory/depth/multivariate.py @@ -95,7 +95,7 @@ def __call__( return copy.fit(distribution).predict(X) @property # noqa: WPS125 - def max(self) -> float: + def max(self) -> float: # noqa: WPS125 """ Maximum (or supremum if there is no maximum) of the possibly predicted values. @@ -104,7 +104,7 @@ def max(self) -> float: return 1 @property # noqa: WPS125 - def min(self) -> float: + def min(self) -> float: # noqa: WPS125 """ Minimum (or infimum if there is no maximum) of the possibly predicted values. @@ -200,7 +200,7 @@ def predict(self, X: np.ndarray) -> np.ndarray: return 1 - np.abs(0.5 - np.moveaxis(cum_dist, -1, 0)[..., 0]) @property # noqa: WPS125 - def min(self) -> float: + def min(self) -> float: # noqa: WPS125 return 1 / 2 @@ -302,7 +302,7 @@ def fit( # noqa: D102 self, X: T, y: None = None, - ) -> OutlyingnessBasedDepth: + ) -> OutlyingnessBasedDepth[T]: self.outlyingness.fit(X) return self @@ -369,7 +369,7 @@ def predict(self, X: np.ndarray) -> np.ndarray: # noqa: D102 raise NotImplementedError("Only implemented for one dimension") @property # noqa: WPS125 - def max(self) -> float: + def max(self) -> float: # noqa: WPS125 return math.inf From 5a616ce8ea762fd7e8e9e71cf9e8caa4713613c3 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sat, 16 Jan 2021 18:08:17 +0100 Subject: [PATCH 033/417] Refactored norm. --- skfda/exploratory/stats/_stats.py | 4 +- skfda/misc/metrics.py | 665 ++++++++++++++++++++++-------- skfda/misc/operators/_identity.py | 4 +- skfda/ml/_neighbors_base.py | 12 +- skfda/representation/_typing.py | 22 +- tests/test_metrics.py | 101 +++-- 6 files changed, 582 insertions(+), 226 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 32871ffb9..6aaac3871 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -5,7 +5,7 @@ import numpy as np -from ...misc.metrics import l2_distance, lp_norm +from ...misc.metrics import l2_distance, l2_norm from ...representation import FData, FDataGrid from ..depth import Depth, ModifiedBandDepth @@ -177,7 +177,7 @@ def geometric_median( median_new = _weighted_average(X, weights_new) - if lp_norm(median_new - median) < tol: + if l2_norm(median_new - median) < tol: return median_new distances = metric(X, median_new) diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py index 2fc60799c..972847d85 100644 --- a/skfda/misc/metrics.py +++ b/skfda/misc/metrics.py @@ -1,17 +1,58 @@ +import math +import warnings +from abc import abstractmethod from builtins import isinstance - -import scipy.integrate +from typing import Any, Optional, Tuple, TypeVar, Union import numpy as np +import scipy.integrate +from typing_extensions import Protocol from .._utils import _pairwise_commutative -from ..preprocessing.registration import normalize_warping, ElasticRegistration +from ..preprocessing.registration import ElasticRegistration, normalize_warping from ..preprocessing.registration._warping import _normalize_scale from ..preprocessing.registration.elastic import SRSF -from ..representation import FData, FDataGrid, FDataBasis +from ..representation import FData, FDataBasis, FDataGrid +from ..representation._typing import Vector + +T = TypeVar("T", bound=FData) +VectorType = TypeVar("VectorType", contravariant=True, bound=Vector) +MetricElementType = TypeVar("MetricElementType", contravariant=True) + + +class Norm(Protocol[VectorType]): + """Protocol for a norm of a vector.""" + + @abstractmethod + def __call__(self, __vector: VectorType) -> np.ndarray: + """Compute the norm of a vector.""" + +class Metric(Protocol[MetricElementType]): + """Protocol for a metric between two elements of a metric space.""" -def _check_compatible(fdata1, fdata2): + @abstractmethod + def __call__( + self, + __e1: MetricElementType, + __e2: MetricElementType, + ) -> np.ndarray: + """Compute the norm of a vector.""" + + +class PairwiseMetric(Protocol[MetricElementType]): + """Protocol for a pairwise metric between elements of a metric space.""" + + @abstractmethod + def __call__( + self, + __e1: MetricElementType, + __e2: Optional[MetricElementType] = None, + ) -> np.ndarray: + """Compute the norm of a vector.""" + + +def _check_compatible(fdata1: T, fdata2: T) -> None: if isinstance(fdata1, FData) and isinstance(fdata2, FData): if (fdata2.dim_codomain != fdata1.dim_codomain or @@ -22,7 +63,12 @@ def _check_compatible(fdata1, fdata2): raise ValueError("Domain ranges for both objects must be equal") -def _cast_to_grid(fdata1, fdata2, eval_points=None, _check=True, **kwargs): +def _cast_to_grid( + fdata1: FData, + fdata2: FData, + eval_points: np.ndarray = None, + _check: bool = True, +) -> Tuple[FDataGrid, FDataGrid]: """Convert fdata1 and fdata2 to FDatagrid. Checks if the fdatas passed as argument are unidimensional and compatible @@ -67,89 +113,175 @@ def _cast_to_grid(fdata1, fdata2, eval_points=None, _check=True, **kwargs): return fdata1, fdata2 -def distance_from_norm(norm, **kwargs): - r"""Return the distance induced by a norm. +class LpNorm(Norm[FData]): + r""" + Norm of all the observations in a FDataGrid object. - Given a norm :math:`\| \cdot \|: X \rightarrow \mathbb{R}`, - returns the distance :math:`d: X \times X \rightarrow \mathbb{R}` induced - by the norm: + For each observation f the Lp norm is defined as: .. math:: - d(f,g) = \|f - g\| + \| f \| = \left( \int_D \| f \|^p dx \right)^{ + \frac{1}{p}} - Args: - norm (:obj:`Function`): Norm function `norm(fdata, **kwargs)`. - **kwargs (dict, optional): Named parameters to be passed to the norm - function. + Where D is the :term:`domain` over which the functions are defined. - Returns: - :obj:`Function`: Distance function `norm_distance(fdata1, fdata2)`. + The integral is approximated using Simpson's rule. - Examples: - Computes the :math:`\mathbb{L}^2` distance between an object containing - functional data corresponding to the function :math:`y(x) = x` defined - over the interval [0, 1] and another one containing data of the - function :math:`y(x) = x/2`. + In general, if f is a multivariate function :math:`(f_1, ..., f_d)`, and + :math:`D \subset \mathbb{R}^n`, it is applied the following generalization + of the Lp norm. - Firstly we create the functional data. + .. math:: + \| f \| = \left( \int_D \| f \|_{*}^p dx \right)^{ + \frac{1}{p}} + + Where :math:`\| \cdot \|_*` denotes a vectorial norm. See + :func:`vectorial_norm` to more information. + + For example, if :math:`f: \mathbb{R}^2 \rightarrow \mathbb{R}^2`, and + :math:`\| \cdot \|_*` is the euclidean norm + :math:`\| (x,y) \|_* = \sqrt{x^2 + y^2}`, the lp norm applied is + + .. math:: + \| f \| = \left( \int \int_D \left ( \sqrt{ \| f_1(x,y) + \|^2 + \| f_2(x,y) \|^2 } \right )^p dxdy \right)^{ + \frac{1}{p}} + The objects `l1_norm`, `l2_norm` and `linf_norm` are instances of this + class with commonly used values of `p`, namely 1, 2 and infinity. + + Args: + fdata (FData): FData object. + p: p of the lp norm. Must be greater or equal + than 1. If p='inf' or p=np.inf it is used the L infinity metric. + Defaults to 2. + p2: vector norm to apply. If it is a float, is the index of the + multivariate lp norm. Defaults to the same as `p`. + + Examples: + Calculates the norm of a FDataGrid containing the functions y = 1 + and y = x defined in the interval [0,1]. + + >>> import skfda + >>> import numpy as np + >>> >>> x = np.linspace(0, 1, 1001) - >>> fd = FDataGrid([x], x) - >>> fd2 = FDataGrid([x/2], x) + >>> fd = skfda.FDataGrid([np.ones(len(x)), x] ,x) + >>> norm = skfda.misc.metrics.LpNorm(2) + >>> norm(fd).round(2) + array([ 1. , 0.58]) - To construct the :math:`\mathbb{L}^2` distance it is used the - :math:`\mathbb{L}^2` norm wich it is used to compute the distance. + As the norm with `p=2` is a common choice, one can use `l2_norm` + directly: + >>> skfda.misc.metrics.l2_norm(fd).round(2) + array([ 1. , 0.58]) - >>> l2_distance = distance_from_norm(lp_norm, p=2) - >>> d = l2_distance(fd, fd2) - >>> float('%.3f'% d) - 0.289 + The lp norm is only defined if p >= 1. + + >>> norm = skfda.misc.metrics.LpNorm(0.5) + Traceback (most recent call last): + .... + ValueError: p must be equal or greater than 1. """ - def norm_distance(fdata1, fdata2): - # Substract operation checks if objects are compatible - return norm(fdata1 - fdata2, **kwargs) - norm_distance.__name__ = f"{norm.__name__}_distance" + def __init__( + self, p: float, + vector_norm: Optional[Union[Norm[np.ndarray], float]] = None, + ) -> None: - return norm_distance + # Checks that the lp normed is well defined + if not np.isinf(p) and p < 1: + raise ValueError(f"p must be equal or greater than 1.") + self.p = p + self.vector_norm = vector_norm -def pairwise_distance(distance, **kwargs): - r"""Return a pairwise distance function for FData objects. + def __repr__(self) -> str: + return ( + f"{type(self).__name__}(" + f"p={self.p}, vector_norm={self.vector_norm})" + ) - Given a distance it returns the corresponding pairwise distance function. + def __call__(self, fdata: FData) -> np.ndarray: + from ..misc import inner_product - The returned pairwise distance function calculates the distance between - all possible pairs consisting of one observation of the first FDataGrid - object and one of the second one. + vector_norm = self.vector_norm - The matrix returned by the pairwise distance is a matrix with as many rows - as observations in the first object and as many columns as observations in - the second one. Each element (i, j) of the matrix is the distance between - the ith observation of the first object and the jth observation of the - second one. + if vector_norm is None: + vector_norm = self.p - Args: - distance (:obj:`Function`): Distance functions between two functional - objects `distance(fdata1, fdata2, **kwargs)`. - **kwargs (:obj:`dict`, optional): parameters dictionary to be passed - to the distance function. + # Special case, the inner product is heavily optimized + if self.p == vector_norm == 2: + return np.sqrt(inner_product(fdata, fdata)) - Returns: - :obj:`Function`: Pairwise distance function, wich accepts two - functional data objects and returns the pairwise distance matrix. - """ - def pairwise(fdata1, fdata2=None): + if isinstance(fdata, FDataBasis): + if self.p != 2: + raise NotImplementedError - return _pairwise_commutative(distance, fdata1, fdata2) + start, end = fdata.domain_range[0] + integral = scipy.integrate.quad_vec( + lambda x: np.power(np.abs(fdata(x)), self.p), + start, + end, + ) + res = np.sqrt(integral[0]).flatten() - pairwise.__name__ = f"pairwise_{distance.__name__}" + else: + if fdata.dim_codomain > 1: + data_matrix = fdata.data_matrix + original_shape = data_matrix.shape + data_matrix = data_matrix.reshape(-1, original_shape[-1]) + + data_matrix = (np.linalg.norm( + fdata.data_matrix, + ord=vector_norm, + axis=-1, + keepdims=True, + ) if isinstance(vector_norm, (float, int)) + else vector_norm(data_matrix) + ) + data_matrix = data_matrix.reshape(original_shape[:-1] + (1,)) + else: + data_matrix = np.abs(fdata.data_matrix) - return pairwise + if np.isinf(self.p): + if fdata.dim_domain == 1: + res = np.max(data_matrix[..., 0], axis=1) + else: + res = np.array([np.max(observation) + for observation in data_matrix]) -def lp_norm(fdata, p=2, p2=None): + elif fdata.dim_domain == 1: + + # Computes the norm, approximating the integral with Simpson's + # rule. + res = scipy.integrate.simps( + data_matrix[..., 0] ** self.p, + x=fdata.grid_points, + ) ** (1 / self.p) + + else: + # Needed to perform surface integration + return NotImplemented + + if len(res) == 1: + return res[0] + + return res + + +l1_norm = LpNorm(1) +l2_norm = LpNorm(2) +linf_norm = LpNorm(math.inf) + + +def lp_norm( + fdata: FData, + p: float, + vector_norm: Optional[Union[Norm[np.ndarray], float]] = None, +) -> np.ndarray: r"""Calculate the norm of all the observations in a FDataGrid object. For each observation f the Lp norm is defined as: @@ -182,7 +314,6 @@ def lp_norm(fdata, p=2, p2=None): \|^2 + \| f_2(x,y) \|^2 } \right )^p dxdy \right)^{ \frac{1}{p}} - Args: fdata (FData): FData object. p (int, optional): p of the lp norm. Must be greater or equal @@ -202,77 +333,174 @@ def lp_norm(fdata, p=2, p2=None): Calculates the norm of a FDataGrid containing the functions y = 1 and y = x defined in the interval [0,1]. - + >>> import skfda + >>> import numpy as np + >>> >>> x = np.linspace(0,1,1001) - >>> fd = FDataGrid([np.ones(len(x)), x] ,x) - >>> lp_norm(fd).round(2) + >>> fd = skfda.FDataGrid([np.ones(len(x)), x] ,x) + >>> skfda.misc.metrics.lp_norm(fd, p=2).round(2) + array([ 1. , 0.58]) + + As the norm with `p=2` is a common choice, one can use `l2_norm` + directly: + >>> skfda.misc.metrics.l2_norm(fd).round(2) array([ 1. , 0.58]) The lp norm is only defined if p >= 1. - >>> lp_norm(fd, p = 0.5) + >>> skfda.misc.metrics.lp_norm(fd, p=0.5) Traceback (most recent call last): .... ValueError: p must be equal or greater than 1. """ - from ..misc import inner_product - - if p2 is None: - p2 = p - - # Special case, the inner product is heavily optimized - if p == p2 == 2: - return np.sqrt(inner_product(fdata, fdata)) - - # Checks that the lp normed is well defined - if not (p == 'inf' or np.isinf(p)) and p < 1: - raise ValueError(f"p must be equal or greater than 1.") - - if isinstance(fdata, FDataBasis): - if fdata.dim_codomain > 1 or p != 2: - raise NotImplementedError - - start, end = fdata.domain_range[0] - integral = scipy.integrate.quad_vec( - lambda x: np.power(np.abs(fdata(x)), p), start, end) - res = np.sqrt(integral[0]).flatten() - - else: - if fdata.dim_codomain > 1: - if p2 == 'inf': - p2 = np.inf - data_matrix = np.linalg.norm(fdata.data_matrix, ord=p2, axis=-1, - keepdims=True) - else: - data_matrix = np.abs(fdata.data_matrix) + return LpNorm(p=p, vector_norm=vector_norm)(fdata) - if p == 'inf' or np.isinf(p): - if fdata.dim_domain == 1: - res = np.max(data_matrix[..., 0], axis=1) - else: - res = np.array([np.max(observation) - for observation in data_matrix]) +class NormInducedMetric(Metric[VectorType]): + r""" + Metric induced by a norm. - elif fdata.dim_domain == 1: + Given a norm :math:`\| \cdot \|: X \rightarrow \mathbb{R}`, + returns the metric :math:`d: X \times X \rightarrow \mathbb{R}` induced + by the norm: - # Computes the norm, approximating the integral with Simpson's - # rule. - res = scipy.integrate.simps(data_matrix[..., 0] ** p, - x=fdata.grid_points) ** (1 / p) + .. math:: + d(f,g) = \|f - g\| + + Args: + norm (:obj:`Function`): Norm function `norm(fdata, **kwargs)`. + + Examples: + Computes the :math:`\mathbb{L}^2` distance between an object containing + functional data corresponding to the function :math:`y(x) = x` defined + over the interval [0, 1] and another one containing data of the + function :math:`y(x) = x/2`. + + Firstly we create the functional data. + + >>> import skfda + >>> + >>> x = np.linspace(0, 1, 1001) + >>> fd = FDataGrid([x], x) + >>> fd2 = FDataGrid([x/2], x) + + To construct the :math:`\mathbb{L}^2` distance it is used the + :math:`\mathbb{L}^2` norm wich it is used to compute the distance. + + >>> l2_distance = distance_from_norm(lp_norm, p=2) + >>> d = l2_distance(fd, fd2) + >>> float('%.3f'% d) + 0.289 + + """ + + def __init__(self, norm: Norm[VectorType]): + self.norm = norm + + def __call__(self, elem1: VectorType, elem2: VectorType) -> np.ndarray: + return self.norm(elem1 - elem2) + + def __repr__(self) -> str: + return f"{type(self).__name__}(norm={self.norm})" + + +def distance_from_norm( + norm: Norm[VectorType], + **kwargs: Any, +) -> Metric[VectorType]: + r"""Return the distance induced by a norm. + + Given a norm :math:`\| \cdot \|: X \rightarrow \mathbb{R}`, + returns the distance :math:`d: X \times X \rightarrow \mathbb{R}` induced + by the norm: + + .. math:: + d(f,g) = \|f - g\| + + Args: + norm (:obj:`Function`): Norm function `norm(fdata, **kwargs)`. + **kwargs (dict, optional): Named parameters to be passed to the norm + function. + + Returns: + :obj:`Function`: Distance function `norm_distance(fdata1, fdata2)`. + + Examples: + Computes the :math:`\mathbb{L}^2` distance between an object containing + functional data corresponding to the function :math:`y(x) = x` defined + over the interval [0, 1] and another one containing data of the + function :math:`y(x) = x/2`. + + Firstly we create the functional data. + + >>> x = np.linspace(0, 1, 1001) + >>> fd = FDataGrid([x], x) + >>> fd2 = FDataGrid([x/2], x) + + To construct the :math:`\mathbb{L}^2` distance it is used the + :math:`\mathbb{L}^2` norm wich it is used to compute the distance. + + >>> l2_distance = distance_from_norm(lp_norm, p=2) + >>> d = l2_distance(fd, fd2) + >>> float('%.3f'% d) + 0.289 + + """ + def norm_distance(fdata1: VectorType, fdata2: VectorType) -> np.ndarray: + # Substract operation checks if objects are compatible + return norm(fdata1 - fdata2, **kwargs) # type: ignore + + norm_distance.__name__ = f"{norm.__name__}_distance" + + return norm_distance - else: - # Needed to perform surface integration - return NotImplemented - if len(res) == 1: - return res[0] +def pairwise_distance( + distance: Metric[MetricElementType], + **kwargs, +) -> PairwiseMetric[MetricElementType]: + r"""Return a pairwise distance function for FData objects. - return res + Given a distance it returns the corresponding pairwise distance function. + The returned pairwise distance function calculates the distance between + all possible pairs consisting of one observation of the first FDataGrid + object and one of the second one. + + The matrix returned by the pairwise distance is a matrix with as many rows + as observations in the first object and as many columns as observations in + the second one. Each element (i, j) of the matrix is the distance between + the ith observation of the first object and the jth observation of the + second one. -def lp_distance(fdata1, fdata2, p=2, p2=2, *, eval_points=None, _check=True): + Args: + distance (:obj:`Function`): Distance functions between two functional + objects `distance(fdata1, fdata2, **kwargs)`. + **kwargs (:obj:`dict`, optional): parameters dictionary to be passed + to the distance function. + + Returns: + :obj:`Function`: Pairwise distance function, wich accepts two + functional data objects and returns the pairwise distance matrix. + """ + def pairwise(fdata1, fdata2=None): + + return _pairwise_commutative(distance, fdata1, fdata2, **kwargs) + + pairwise.__name__ = f"pairwise_{distance.__name__}" + + return pairwise + + +def lp_distance( + fdata1: T, + fdata2: T, + p: int = 2, + p2: int = 2, + *, + eval_points: np.ndarray = None, +) -> np.ndarray: r"""Lp distance for FDataGrid objects. Calculates the distance between two functional objects. @@ -325,10 +553,15 @@ def lp_distance(fdata1, fdata2, p=2, p2=2, *, eval_points=None, _check=True): """ _check_compatible(fdata1, fdata2) - return lp_norm(fdata1 - fdata2, p=p, p2=p2) + return lp_norm(fdata1 - fdata2, p=p, vector_norm=p2) -def l1_distance(fdata1, fdata2, *, eval_points=None, _check=True): +def l1_distance( + fdata1: T, + fdata2: T, + *, + eval_points: np.ndarray = None, +) -> np.ndarray: r"""L1 distance for FDataGrid objects. Calculates the L1 distance between fdata1 and fdata2: @@ -342,11 +575,21 @@ def l1_distance(fdata1, fdata2, *, eval_points=None, _check=True): :func:`~skfda.misc.metrics.l2_distance :func:`~skfda.misc.metrics.linf_distance """ - return lp_distance(fdata1, fdata2, p=1, p2=1, - eval_points=eval_points, _check=_check) - - -def l2_distance(fdata1, fdata2, *, eval_points=None, _check=True): + return lp_distance( + fdata1, + fdata2, + p=1, + p2=1, + eval_points=eval_points, + ) + + +def l2_distance( + fdata1: T, + fdata2: T, + *, + eval_points: np.ndarray = None, +) -> np.ndarray: r"""L2 distance for FDataGrid objects. Calculates the euclidean distance between fdata1 and fdata2: @@ -360,11 +603,21 @@ def l2_distance(fdata1, fdata2, *, eval_points=None, _check=True): :func:`~skfda.misc.metrics.l1_distance :func:`~skfda.misc.metrics.linf_distance """ - return lp_distance(fdata1, fdata2, p=2, p2=2, - eval_points=eval_points, _check=_check) - - -def linf_distance(fdata1, fdata2, *, eval_points=None, _check=True): + return lp_distance( + fdata1, + fdata2, + p=2, + p2=2, + eval_points=eval_points, + ) + + +def linf_distance( + fdata1: T, + fdata2: T, + *, + eval_points: np.ndarray = None, +) -> np.ndarray: r"""L_infinity distance for FDataGrid objects. Calculates the L_infinity distance between fdata1 and fdata2: @@ -377,11 +630,22 @@ def linf_distance(fdata1, fdata2, *, eval_points=None, _check=True): :func:`~skfda.misc.metrics.l1_distance :func:`~skfda.misc.metrics.l2_distance """ - return lp_distance(fdata1, fdata2, p=np.inf, p2=np.inf, - eval_points=eval_points, _check=_check) - - -def fisher_rao_distance(fdata1, fdata2, *, eval_points=None, _check=True): + return lp_distance( + fdata1, + fdata2, + p=np.inf, + p2=np.inf, + eval_points=eval_points, + ) + + +def fisher_rao_distance( + fdata1: T, + fdata2: T, + *, + eval_points: np.ndarray = None, + _check: bool = True, +) -> np.ndarray: r"""Compute the Fisher-Rao distance between two functional objects. Let :math:`f_i` and :math:`f_j` be two functional observations, and let @@ -402,9 +666,9 @@ def fisher_rao_distance(fdata1, fdata2, *, eval_points=None, _check=True): transformation. Args: - fdata1 (FData): First FData object. - fdata2 (FData): Second FData object. - eval_points (array_like, optional): Array with points of evaluation. + fdata1: First FData object. + fdata2: Second FData object. + eval_points: Array with points of evaluation. Returns: Fisher rao distance. @@ -418,17 +682,25 @@ def fisher_rao_distance(fdata1, fdata2, *, eval_points=None, _check=True): Metric* (pp. 5-7). arXiv:1103.3817v2. """ - fdata1, fdata2 = _cast_to_grid(fdata1, fdata2, eval_points=eval_points, - _check=_check) + fdata1, fdata2 = _cast_to_grid( + fdata1, + fdata2, + eval_points=eval_points, + _check=_check, + ) # Both should have the same grid points eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) # Calculate the corresponding srsf and normalize to (0,1) - fdata1 = fdata1.copy(grid_points=eval_points_normalized, - domain_range=(0, 1)) - fdata2 = fdata2.copy(grid_points=eval_points_normalized, - domain_range=(0, 1)) + fdata1 = fdata1.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) + fdata2 = fdata2.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) srsf = SRSF(initial_value=0) fdata1_srsf = srsf.fit_transform(fdata1) @@ -438,8 +710,15 @@ def fisher_rao_distance(fdata1, fdata2, *, eval_points=None, _check=True): return lp_distance(fdata1_srsf, fdata2_srsf, p=2) -def amplitude_distance(fdata1, fdata2, *, lam=0., eval_points=None, - _check=True, **kwargs): +def amplitude_distance( + fdata1: T, + fdata2: T, + *, + lam: float = 0.0, + eval_points: np.ndarray = None, + _check: bool = True, + **kwargs: Any, +) -> np.ndarray: r"""Compute the amplitude distance between two functional objects. Let :math:`f_i` and :math:`f_j` be two functional observations, and let @@ -470,15 +749,15 @@ def amplitude_distance(fdata1, fdata2, *, lam=0., eval_points=None, transformation. Args: - fdata1 (FData): First FData object. - fdata2 (FData): Second FData object. - lam (float, optional): Penalty term to restric the elasticity. - eval_points (array_like, optional): Array with points of evaluation. - **kwargs (dict): Name arguments to be passed to + fdata1: First FData object. + fdata2: Second FData object. + lam: Penalty term to restric the elasticity. + eval_points: Array with points of evaluation. + **kwargs: Name arguments to be passed to :func:`elastic_registration_warping`. Returns: - float: Elastic distance. + Elastic distance. Raises: ValueError: If the objects are not unidimensional. @@ -488,23 +767,32 @@ def amplitude_distance(fdata1, fdata2, *, lam=0., eval_points=None, Functional and shape data analysis. In *Amplitude Space and a Metric Structure* (pp. 107-109). Springer. """ - fdata1, fdata2 = _cast_to_grid(fdata1, fdata2, eval_points=eval_points, - _check=_check) + fdata1, fdata2 = _cast_to_grid( + fdata1, + fdata2, + eval_points=eval_points, + _check=_check, + ) # Both should have the same grid points eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) # Calculate the corresponding srsf and normalize to (0,1) - fdata1 = fdata1.copy(grid_points=eval_points_normalized, - domain_range=(0, 1)) - fdata2 = fdata2.copy(grid_points=eval_points_normalized, - domain_range=(0, 1)) + fdata1 = fdata1.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) + fdata2 = fdata2.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) elastic_registration = ElasticRegistration( template=fdata2, penalty=lam, output_points=eval_points_normalized, - **kwargs) + **kwargs, + ) fdata1_reg = elastic_registration.fit_transform(fdata1) @@ -527,8 +815,14 @@ def amplitude_distance(fdata1, fdata2, *, lam=0., eval_points=None, return distance -def phase_distance(fdata1, fdata2, *, lam=0., eval_points=None, _check=True, - **kwargs): +def phase_distance( + fdata1: T, + fdata2: T, + *, + lam: float = 0.0, + eval_points: np.ndarray = None, + _check: bool = True, +) -> np.ndarray: r"""Compute the phase distance between two functional objects. Let :math:`f_i` and :math:`f_j` be two functional observations, and let @@ -548,15 +842,15 @@ def phase_distance(fdata1, fdata2, *, lam=0., eval_points=None, _check=True, transformation. Args: - fdata1 (FData): First FData object. - fdata2 (FData): Second FData object. - lambda (float, optional): Penalty term to restric the elasticity. + fdata1: First FData object. + fdata2: Second FData object. + lambda: Penalty term to restric the elasticity. eval_points (array_like, optional): Array with points of evaluation. **kwargs (dict): Name arguments to be passed to :func:`elastic_registration_warping`. Returns: - float: Phase distance between the objects. + Phase distance between the objects. Raises: ValueError: If the objects are not unidimensional. @@ -566,21 +860,30 @@ def phase_distance(fdata1, fdata2, *, lam=0., eval_points=None, _check=True, Functional and shape data analysis. In *Phase Space and a Metric Structure* (pp. 109-111). Springer. """ - fdata1, fdata2 = _cast_to_grid(fdata1, fdata2, eval_points=eval_points, - _check=_check) + fdata1, fdata2 = _cast_to_grid( + fdata1, + fdata2, + eval_points=eval_points, + _check=_check, + ) # Rescale in (0,1) eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) # Calculate the corresponding srsf and normalize to (0,1) - fdata1 = fdata1.copy(grid_points=eval_points_normalized, - domain_range=(0, 1)) - fdata2 = fdata2.copy(grid_points=eval_points_normalized, - domain_range=(0, 1)) + fdata1 = fdata1.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) + fdata2 = fdata2.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) elastic_registration = ElasticRegistration( penalty=lam, template=fdata2, - output_points=eval_points_normalized) + output_points=eval_points_normalized, + ) elastic_registration.fit_transform(fdata1) @@ -595,7 +898,13 @@ def phase_distance(fdata1, fdata2, *, lam=0., eval_points=None, _check=True, return np.arccos(d) -def warping_distance(warping1, warping2, *, eval_points=None, _check=True): +def warping_distance( + warping1: T, + warping2: T, + *, + eval_points: np.ndarray = None, + _check: bool = True, +) -> np.ndarray: r"""Compute the distance between warpings functions. Let :math:`\gamma_i` and :math:`\gamma_j` be two warpings, defined in @@ -614,12 +923,12 @@ def warping_distance(warping1, warping2, *, eval_points=None, _check=True): to change the :term:`domain`. Args: - fdata1 (:obj:`FData`): First warping. - fdata2 (:obj:`FData`): Second warping. - eval_points (array_like, optional): Array with points of evaluation. + warping1: First warping. + warping2: Second warping. + eval_points: Array with points of evaluation. Returns: - float: Distance between warpings: + Distance between warpings: Raises: ValueError: If the objects are not unidimensional. @@ -630,8 +939,12 @@ def warping_distance(warping1, warping2, *, eval_points=None, _check=True): Functions* (pp. 113-117). Springer. """ - warping1, warping2 = _cast_to_grid(warping1, warping2, - eval_points=eval_points, _check=_check) + warping1, warping2 = _cast_to_grid( + warping1, + warping2, + eval_points=eval_points, + _check=_check, + ) # Normalization of warping to (0,1)x(0,1) warping1 = normalize_warping(warping1, (0, 1)) diff --git a/skfda/misc/operators/_identity.py b/skfda/misc/operators/_identity.py index 16067002e..d2d3ac259 100644 --- a/skfda/misc/operators/_identity.py +++ b/skfda/misc/operators/_identity.py @@ -33,6 +33,6 @@ def basis_penalty_matrix_optimized( def fdatagrid_penalty_matrix_optimized( linear_operator: Identity, basis: FDataGrid): - from ..metrics import lp_norm + from ..metrics import l2_norm - return np.diag(lp_norm(basis)**2) + return np.diag(l2_norm(basis)**2) diff --git a/skfda/ml/_neighbors_base.py b/skfda/ml/_neighbors_base.py index ac7b02662..64d3a057a 100644 --- a/skfda/ml/_neighbors_base.py +++ b/skfda/ml/_neighbors_base.py @@ -2,13 +2,11 @@ from abc import ABC -from sklearn.base import BaseEstimator -from sklearn.base import RegressorMixin -from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted - import numpy as np +from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted -from .. import FDataGrid, FData +from .. import FData, FDataGrid from ..misc.metrics import lp_distance @@ -84,11 +82,11 @@ def _to_multivariate_metric(metric, grid_points): # Shape -> (n_samples = 1, domain_dims...., image_dimension (-1)) shape = [1] + [len(axis) for axis in grid_points] + [-1] - def multivariate_metric(x, y, _check=False, **kwargs): + def multivariate_metric(x, y, **kwargs): return metric(_from_multivariate(x, grid_points, shape), _from_multivariate(y, grid_points, shape), - _check=_check, **kwargs) + **kwargs) return multivariate_metric diff --git a/skfda/representation/_typing.py b/skfda/representation/_typing.py index 3865337a6..78a87f2fe 100644 --- a/skfda/representation/_typing.py +++ b/skfda/representation/_typing.py @@ -1,7 +1,10 @@ """Common types.""" -from typing import Optional, Sequence, Tuple, Union +from typing import Optional, Sequence, Tuple, TypeVar, Union import numpy as np +from typing_extensions import Protocol + +VectorType = TypeVar("VectorType") DomainRange = Tuple[Tuple[float, float], ...] DomainRangeLike = Union[ @@ -15,3 +18,20 @@ GridPoints = Tuple[np.ndarray, ...] GridPointsLike = Sequence[np.ndarray] + + +class Vector(Protocol): + """ + Protocol representing a generic vector. + + It should accept numpy arrays and FData, among other things. + """ + + def __add__(self: VectorType, __other: VectorType) -> VectorType: + pass + + def __sub__(self: VectorType, __other: VectorType) -> VectorType: + pass + + def __mul__(self: VectorType, __other: float) -> VectorType: + pass diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 9c38b5db7..aefb779d1 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -1,49 +1,65 @@ -from skfda import FDataGrid, FDataBasis -from skfda.datasets import make_multimodal_samples -from skfda.exploratory import stats -from skfda.misc.metrics import lp_distance, lp_norm -from skfda.representation.basis import Monomial -import unittest +"""Tests for the metrics module.""" -import scipy.stats.mstats +import unittest import numpy as np +from skfda import FDataBasis, FDataGrid +from skfda.datasets import make_multimodal_samples +from skfda.misc.metrics import lp_distance, lp_norm +from skfda.representation.basis import Monomial + class TestLpMetrics(unittest.TestCase): + """Test the lp metrics.""" - def setUp(self): + def setUp(self) -> None: + """Create a few functional data objects.""" grid_points = [1, 2, 3, 4, 5] - self.fd = FDataGrid([[2, 3, 4, 5, 6], [1, 4, 9, 16, 25]], - grid_points=grid_points) + self.fd = FDataGrid( + [ + [2, 3, 4, 5, 6], + [1, 4, 9, 16, 25], + ], + grid_points=grid_points, + ) basis = Monomial(n_basis=3, domain_range=(1, 5)) self.fd_basis = FDataBasis(basis, [[1, 1, 0], [0, 0, 1]]) self.fd_curve = self.fd.concatenate(self.fd, as_coordinates=True) - self.fd_surface = make_multimodal_samples(n_samples=3, dim_domain=2, - random_state=0) - - def test_lp_norm(self): - - np.testing.assert_allclose(lp_norm(self.fd, p=1), [16., 41.33333333]) - np.testing.assert_allclose(lp_norm(self.fd, p='inf'), [6, 25]) - - def test_lp_norm_curve(self): - - np.testing.assert_allclose(lp_norm(self.fd_curve, p=1, p2=1), - [32., 82.666667]) - np.testing.assert_allclose(lp_norm(self.fd_curve, p='inf', p2='inf'), - [6, 25]) - - def test_lp_norm_surface_inf(self): - np.testing.assert_allclose(lp_norm(self.fd_surface, p='inf').round(5), - [0.99994, 0.99793, 0.99868]) - - def test_lp_norm_surface(self): + self.fd_surface = make_multimodal_samples( + n_samples=3, + dim_domain=2, + random_state=0, + ) + + def test_lp_norm(self) -> None: + + np.testing.assert_allclose(lp_norm(self.fd, p=1), [16.0, 41.33333333]) + np.testing.assert_allclose(lp_norm(self.fd, p=np.inf), [6, 25]) + + def test_lp_norm_curve(self) -> None: + + np.testing.assert_allclose( + lp_norm(self.fd_curve, p=1), + [32.0, 82.666667], + ) + np.testing.assert_allclose( + lp_norm(self.fd_curve, p=np.inf), + [6, 25], + ) + + def test_lp_norm_surface_inf(self) -> None: + np.testing.assert_allclose( + lp_norm(self.fd_surface, p=np.inf).round(5), + [0.99994, 0.99793, 0.99868], + ) + + def test_lp_norm_surface(self) -> None: # Integration of surfaces not implemented, add test case after # implementation self.assertEqual(lp_norm(self.fd_surface, p=1), NotImplemented) - def test_lp_error_dimensions(self): + def test_lp_error_dimensions(self) -> None: # Case internal arrays with np.testing.assert_raises(ValueError): lp_distance(self.fd, self.fd_surface) @@ -54,23 +70,32 @@ def test_lp_error_dimensions(self): with np.testing.assert_raises(ValueError): lp_distance(self.fd_surface, self.fd_curve) - def test_lp_error_domain_ranges(self): + def test_lp_error_domain_ranges(self) -> None: grid_points = [2, 3, 4, 5, 6] - fd2 = FDataGrid([[2, 3, 4, 5, 6], [1, 4, 9, 16, 25]], - grid_points=grid_points) + fd2 = FDataGrid( + [ + [2, 3, 4, 5, 6], + [1, 4, 9, 16, 25], + ], + grid_points=grid_points, + ) with np.testing.assert_raises(ValueError): lp_distance(self.fd, fd2) - def test_lp_error_grid_points(self): + def test_lp_error_grid_points(self) -> None: grid_points = [1, 2, 4, 4.3, 5] - fd2 = FDataGrid([[2, 3, 4, 5, 6], [1, 4, 9, 16, 25]], - grid_points=grid_points) + fd2 = FDataGrid( + [ + [2, 3, 4, 5, 6], + [1, 4, 9, 16, 25], + ], + grid_points=grid_points, + ) with np.testing.assert_raises(ValueError): lp_distance(self.fd, fd2) if __name__ == '__main__': - print() unittest.main() From 9a4fd2611a17ce1dd5fad7cc4de0278578b839a8 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Sat, 16 Jan 2021 19:48:57 +0100 Subject: [PATCH 034/417] Changes in lp norm. --- skfda/misc/metrics.py | 193 +++++++++++++++++++++--------------------- 1 file changed, 98 insertions(+), 95 deletions(-) diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py index 972847d85..11fe04b48 100644 --- a/skfda/misc/metrics.py +++ b/skfda/misc/metrics.py @@ -279,6 +279,7 @@ def __call__(self, fdata: FData) -> np.ndarray: def lp_norm( fdata: FData, + *, p: float, vector_norm: Optional[Union[Norm[np.ndarray], float]] = None, ) -> np.ndarray: @@ -314,6 +315,12 @@ def lp_norm( \|^2 + \| f_2(x,y) \|^2 } \right )^p dxdy \right)^{ \frac{1}{p}} + Note: + This function is a wrapper of :class:`LpNorm`, available only for + convenience. As the parameter `p` is mandatory, it cannot be used + where a fully-defined norm is required: use an instance of + :class:`LpNorm` in those cases. + Args: fdata (FData): FData object. p (int, optional): p of the lp norm. Must be greater or equal @@ -353,6 +360,9 @@ def lp_norm( .... ValueError: p must be equal or greater than 1. + See also: + :class:`LpNorm` + """ return LpNorm(p=p, vector_norm=vector_norm)(fdata) @@ -380,15 +390,16 @@ class NormInducedMetric(Metric[VectorType]): Firstly we create the functional data. >>> import skfda + >>> import numpy as np >>> >>> x = np.linspace(0, 1, 1001) - >>> fd = FDataGrid([x], x) - >>> fd2 = FDataGrid([x/2], x) + >>> fd = skfda.FDataGrid([x], x) + >>> fd2 = skfda.FDataGrid([x/2], x) To construct the :math:`\mathbb{L}^2` distance it is used the :math:`\mathbb{L}^2` norm wich it is used to compute the distance. - >>> l2_distance = distance_from_norm(lp_norm, p=2) + >>> l2_distance = skfda.misc.metrics.NormInducedMetric(l2_norm) >>> d = l2_distance(fd, fd2) >>> float('%.3f'% d) 0.289 @@ -407,7 +418,6 @@ def __repr__(self) -> str: def distance_from_norm( norm: Norm[VectorType], - **kwargs: Any, ) -> Metric[VectorType]: r"""Return the distance induced by a norm. @@ -441,19 +451,13 @@ def distance_from_norm( To construct the :math:`\mathbb{L}^2` distance it is used the :math:`\mathbb{L}^2` norm wich it is used to compute the distance. - >>> l2_distance = distance_from_norm(lp_norm, p=2) + >>> l2_distance = distance_from_norm(l2_norm) >>> d = l2_distance(fd, fd2) >>> float('%.3f'% d) 0.289 """ - def norm_distance(fdata1: VectorType, fdata2: VectorType) -> np.ndarray: - # Substract operation checks if objects are compatible - return norm(fdata1 - fdata2, **kwargs) # type: ignore - - norm_distance.__name__ = f"{norm.__name__}_distance" - - return norm_distance + return NormInducedMetric(norm) def pairwise_distance( @@ -493,14 +497,7 @@ def pairwise(fdata1, fdata2=None): return pairwise -def lp_distance( - fdata1: T, - fdata2: T, - p: int = 2, - p2: int = 2, - *, - eval_points: np.ndarray = None, -) -> np.ndarray: +class LpDistance(NormInducedMetric[FData]): r"""Lp distance for FDataGrid objects. Calculates the distance between two functional objects. @@ -513,6 +510,9 @@ def lp_distance( where :math:`\| {}\cdot{} \|_p` denotes the :func:`Lp norm `. + The objects `l1_distance`, `l2_distance` and `linf_distance` are instances + of this class with commonly used values of `p`, namely 1, 2 and infinity. + Args: fdatagrid (FDataGrid): FDataGrid object. p (int, optional): p of the lp norm. Must be greater or equal @@ -528,10 +528,14 @@ def lp_distance( = 0 and y = x/2. The result then is an array 2x2 with the computed l2 distance between every pair of functions. + >>> import skfda + >>> import numpy as np + >>> >>> x = np.linspace(0, 1, 1001) - >>> fd = FDataGrid([np.ones(len(x))], x) - >>> fd2 = FDataGrid([np.zeros(len(x))], x) - >>> lp_distance(fd, fd2).round(2) + >>> fd = skfda.FDataGrid([np.ones(len(x))], x) + >>> fd2 = skfda.FDataGrid([np.zeros(len(x))], x) + >>> + >>> skfda.misc.metrics.lp_distance(fd, fd2).round(2) array([ 1.]) @@ -540,103 +544,102 @@ def lp_distance( >>> x = np.linspace(0, 2, 1001) >>> fd2 = FDataGrid([np.zeros(len(x)), x/2 + 0.5], x) - >>> lp_distance(fd, fd2) + >>> skfda.misc.metrics.lp_distance(fd, fd2) Traceback (most recent call last): .... ValueError: ... - See also: - :func:`~skfda.misc.metrics.l1_distance - :func:`~skfda.misc.metrics.l2_distance - :func:`~skfda.misc.metrics.linf_distance - """ - _check_compatible(fdata1, fdata2) - return lp_norm(fdata1 - fdata2, p=p, vector_norm=p2) + def __init__( + self, p: float, + vector_norm: Optional[Union[Norm[np.ndarray], float]] = None, + ) -> None: + self.p = p + self.vector_norm = vector_norm + norm = LpNorm(p=p, vector_norm=vector_norm) -def l1_distance( - fdata1: T, - fdata2: T, - *, - eval_points: np.ndarray = None, -) -> np.ndarray: - r"""L1 distance for FDataGrid objects. + super().__init__(norm) - Calculates the L1 distance between fdata1 and fdata2: - .. math:: - d(fdata1, fdata2) = - \left( \int_D \| fdata1(x)-fdata2(x) \| dx - \right) + def __repr__(self) -> str: + return ( + f"{type(self).__name__}(" + f"p={self.p}, vector_norm={self.vector_norm})" + ) - See also: - :func:`~skfda.misc.metrics.lp_distance - :func:`~skfda.misc.metrics.l2_distance - :func:`~skfda.misc.metrics.linf_distance - """ - return lp_distance( - fdata1, - fdata2, - p=1, - p2=1, - eval_points=eval_points, - ) + +l1_distance = LpDistance(p=1) +l2_distance = LpDistance(p=2) +linf_distance = LpDistance(p=math.inf) -def l2_distance( +def lp_distance( fdata1: T, fdata2: T, *, - eval_points: np.ndarray = None, + p: float, + vector_norm: Optional[Union[Norm[np.ndarray], float]], ) -> np.ndarray: - r"""L2 distance for FDataGrid objects. + r"""Lp distance for FDataGrid objects. + + Calculates the distance between two functional objects. + + For each pair of observations f and g the distance between them is defined + as: - Calculates the euclidean distance between fdata1 and fdata2: .. math:: - d(fdata1, fdata2) = - \left( \int_D \| fdata1(x)-fdata2(x) \|^2 dx - \right)^{\frac{1}{2}} + d(f, g) = d(g, f) = \| f - g \|_p - See also: - :func:`~skfda.misc.metrics.lp_distance - :func:`~skfda.misc.metrics.l1_distance - :func:`~skfda.misc.metrics.linf_distance - """ - return lp_distance( - fdata1, - fdata2, - p=2, - p2=2, - eval_points=eval_points, - ) + where :math:`\| {}\cdot{} \|_p` denotes the :func:`Lp norm `. + Note: + This function is a wrapper of :class:`LpDistance`, available only for + convenience. As the parameter `p` is mandatory, it cannot be used + where a fully-defined metric is required: use an instance of + :class:`LpDistance` in those cases. -def linf_distance( - fdata1: T, - fdata2: T, - *, - eval_points: np.ndarray = None, -) -> np.ndarray: - r"""L_infinity distance for FDataGrid objects. + Args: + fdatagrid (FDataGrid): FDataGrid object. + p (int, optional): p of the lp norm. Must be greater or equal + than 1. If p='inf' or p=np.inf it is used the L infinity metric. + Defaults to 2. + p2 (int, optional): p index of the vectorial norm applied in case of + multivariate objects. Defaults to 2. See :func:`lp_norm`. - Calculates the L_infinity distance between fdata1 and fdata2: - .. math:: - d(fdata1, fdata2) \equiv \inf \{ C\ge 0 : |fdata1(x)-fdata2(x)| - \le C a.e. \}. + Examples: + Computes the distances between an object containing functional data + corresponding to the functions y = 1 and y = x defined over the + interval [0, 1] and another ones containing data of the functions y + = 0 and y = x/2. The result then is an array 2x2 with the computed + l2 distance between every pair of functions. + + >>> import skfda + >>> import numpy as np + >>> + >>> x = np.linspace(0, 1, 1001) + >>> fd = skfda.FDataGrid([np.ones(len(x))], x) + >>> fd2 = skfda.FDataGrid([np.zeros(len(x))], x) + >>> + >>> skfda.misc.metrics.lp_distance(fd, fd2).round(2) + array([ 1.]) + + + If the functional data are defined over a different set of points of + discretisation the functions returns an exception. + + >>> x = np.linspace(0, 2, 1001) + >>> fd2 = FDataGrid([np.zeros(len(x)), x/2 + 0.5], x) + >>> skfda.misc.metrics.lp_distance(fd, fd2) + Traceback (most recent call last): + .... + ValueError: ... See also: - :func:`~skfda.misc.metrics.lp_distance - :func:`~skfda.misc.metrics.l1_distance - :func:`~skfda.misc.metrics.l2_distance + :class:`~skfda.misc.metrics.LpDistance + """ - return lp_distance( - fdata1, - fdata2, - p=np.inf, - p2=np.inf, - eval_points=eval_points, - ) + return LpDistance(p=p, vector_norm=vector_norm)(fdata1, fdata2) def fisher_rao_distance( From b22f4fc5953a79fac76d8252eb23a0bee839fa03 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Mon, 18 Jan 2021 03:23:57 +0100 Subject: [PATCH 035/417] Refactor pairwise. --- .../plot_radius_neighbors_classification.py | 12 ++- skfda/_utils/__init__.py | 2 +- skfda/_utils/_utils.py | 76 +++++++++++++++-- .../exploratory/outliers/neighbors_outlier.py | 6 +- skfda/misc/metrics.py | 83 ++++++++++++------- skfda/ml/_neighbors_base.py | 14 ++-- .../classification/_centroid_classifiers.py | 10 +-- .../classification/_neighbors_classifiers.py | 4 +- skfda/ml/clustering/_kmeans.py | 26 +++--- skfda/ml/clustering/_neighbors_clustering.py | 2 +- skfda/ml/regression/_neighbors_regression.py | 4 +- .../preprocessing/registration/validation.py | 10 +-- tests/test_elastic.py | 36 +++++--- tests/test_math.py | 9 +- tests/test_metrics.py | 12 +-- tests/test_neighbors.py | 20 ++--- 16 files changed, 208 insertions(+), 118 deletions(-) diff --git a/examples/plot_radius_neighbors_classification.py b/examples/plot_radius_neighbors_classification.py index 57dd64a00..5a34c267d 100644 --- a/examples/plot_radius_neighbors_classification.py +++ b/examples/plot_radius_neighbors_classification.py @@ -11,15 +11,13 @@ # sphinx_gallery_thumbnail_number = 2 -import skfda -from skfda.misc.metrics import pairwise_distance, lp_distance -from skfda.ml.classification import RadiusNeighborsClassifier - -from sklearn.model_selection import train_test_split - import matplotlib.pyplot as plt import numpy as np +from sklearn.model_selection import train_test_split +import skfda +from skfda.misc.metrics import PairwiseMetric, linf_distance +from skfda.ml.classification import RadiusNeighborsClassifier ############################################################################## # @@ -89,7 +87,7 @@ # Creation of pairwise distance -l_inf = pairwise_distance(lp_distance, p=np.inf) +l_inf = PairwiseMetric(linf_distance) distances = l_inf(sample, X_train)[0] # L_inf distances to 'sample' # Plot samples in the ball diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index a8359d1c6..a652a0b82 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -8,7 +8,7 @@ _evaluate_grid, _FDataCallable, _int_to_real, - _pairwise_commutative, + _pairwise_symmetric, _reshape_eval_points, _same_domain, _to_array_maybe_ragged, diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index e9c7fb26c..69c2cd46c 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -8,6 +8,7 @@ TYPE_CHECKING, Any, Callable, + List, Optional, Sequence, Tuple, @@ -424,20 +425,72 @@ def integrate(*args: Any, depth: int) -> np.ndarray: # noqa: WPS430 return integrate(depth=initial_depth) -def _pairwise_commutative(function, arg1, arg2=None, **kwargs): +def _map_in_batches( + function: Callable[..., np.ndarray], + arguments: Tuple[Union[FData, np.ndarray], ...], + indexes: Tuple[np.ndarray, ...], + memory_per_batch: Optional[int] = None, + **kwargs: Any, +) -> np.ndarray: + """ + Map a function over samples of FData or ndarray tuples efficiently. + + This function prevents a large set of indexes to use all available + memory and hang the PC. + + """ + if memory_per_batch is None: + # 256MB is not too big + memory_per_batch = 256 * 1024 * 1024 + + memory_per_element = sum(a.nbytes // len(a) for a in arguments) + n_elements_per_batch_allowed = memory_per_batch // memory_per_element + print(n_elements_per_batch_allowed) + if n_elements_per_batch_allowed < 1: + raise ValueError("Too few memory allowed for the operation") + + n_indexes = len(indexes[0]) + + assert all(n_indexes == len(i) for i in indexes) + + batches: List[np.ndarray] = [] + + for pos in range(0, n_indexes, n_elements_per_batch_allowed): + print(pos) + batch_args = tuple( + a[i[pos:pos + n_elements_per_batch_allowed]] + for a, i in zip(arguments, indexes) + ) + + batches.append(function(*batch_args, **kwargs)) + + return np.concatenate(batches, axis=0) + + +def _pairwise_symmetric( + function: Callable[..., np.ndarray], + arg1: Union[FData, np.ndarray], + arg2: Optional[Union[FData, np.ndarray]] = None, + memory_per_batch: Optional[int] = None, + **kwargs: Any, +) -> np.ndarray: """ Compute pairwise a commutative function. """ - if arg2 is None: + if arg2 is None or arg2 is arg1: indices = np.triu_indices(len(arg1)) matrix = np.empty((len(arg1), len(arg1))) - triang_vec = function( - arg1[indices[0]], arg1[indices[1]], - **kwargs) + triang_vec = _map_in_batches( + function, + (arg1, arg1), + indices, + memory_per_batch=memory_per_batch, + **kwargs, + ) # Set upper matrix matrix[indices] = triang_vec @@ -451,10 +504,15 @@ def _pairwise_commutative(function, arg1, arg2=None, **kwargs): indices = np.indices((len(arg1), len(arg2))) - return function( - arg1[indices[0].ravel()], arg2[indices[1].ravel()], - **kwargs).reshape( - (len(arg1), len(arg2))) + vec = _map_in_batches( + function, + (arg1, arg2), + (indices[0].ravel(), indices[1].ravel()), + memory_per_batch=memory_per_batch, + **kwargs, + ) + + return vec.reshape((len(arg1), len(arg2))) def _int_to_real(array: np.ndarray) -> np.ndarray: diff --git a/skfda/exploratory/outliers/neighbors_outlier.py b/skfda/exploratory/outliers/neighbors_outlier.py index d9fd591a0..ce12611d9 100644 --- a/skfda/exploratory/outliers/neighbors_outlier.py +++ b/skfda/exploratory/outliers/neighbors_outlier.py @@ -1,7 +1,7 @@ """Neighbors outlier detection methods.""" from sklearn.base import OutlierMixin -from ...misc.metrics import lp_distance +from ...misc.metrics import l2_distance from ...ml._neighbors_base import ( KNeighborsMixin, NeighborsBase, @@ -51,7 +51,7 @@ class LocalOutlierFactor(NeighborsBase, NeighborsMixin, KNeighborsMixin, required to store the tree. The optimal value depends on the nature of the problem. metric : string or callable, (default - :func:`lp_distance `) + :func:`l2_distance `) the distance metric to use for the tree. The default metric is the L2 distance. See the documentation of the metrics module for a list of available metrics. @@ -286,7 +286,7 @@ def fit_predict(self, X, y=None): if not self.multivariate_metric: # Constructs sklearn metric to manage vector if self.metric == 'l2': - metric = lp_distance + metric = l2_distance else: metric = self.metric sklearn_metric = _to_multivariate_metric(metric, diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py index 11fe04b48..32483a6f4 100644 --- a/skfda/misc/metrics.py +++ b/skfda/misc/metrics.py @@ -2,13 +2,13 @@ import warnings from abc import abstractmethod from builtins import isinstance -from typing import Any, Optional, Tuple, TypeVar, Union +from typing import Any, Generic, Optional, Tuple, TypeVar, Union import numpy as np import scipy.integrate from typing_extensions import Protocol -from .._utils import _pairwise_commutative +from .._utils import _pairwise_symmetric from ..preprocessing.registration import ElasticRegistration, normalize_warping from ..preprocessing.registration._warping import _normalize_scale from ..preprocessing.registration.elastic import SRSF @@ -40,18 +40,6 @@ def __call__( """Compute the norm of a vector.""" -class PairwiseMetric(Protocol[MetricElementType]): - """Protocol for a pairwise metric between elements of a metric space.""" - - @abstractmethod - def __call__( - self, - __e1: MetricElementType, - __e2: Optional[MetricElementType] = None, - ) -> np.ndarray: - """Compute the norm of a vector.""" - - def _check_compatible(fdata1: T, fdata2: T) -> None: if isinstance(fdata1, FData) and isinstance(fdata2, FData): @@ -460,9 +448,40 @@ def distance_from_norm( return NormInducedMetric(norm) +class PairwiseMetric(Generic[MetricElementType]): + r"""Pairwise metric function. + + Computes a given metric pairwise. The matrix returned by the pairwise + metric is a matrix with as many rows as observations in the first object + and as many columns as observations in the second one. Each element + (i, j) of the matrix is the distance between the ith observation of the + first object and the jth observation of the second one. + + Args: + metric: Metric between two elements of a metric + space. + + """ + + def __init__( + self, + metric: Metric[MetricElementType], + ): + self.metric = metric + + def __call__( + self, + elem1: MetricElementType, + elem2: Optional[MetricElementType] = None, + ) -> np.ndarray: + return _pairwise_symmetric(self.metric, elem1, elem2) + + def __repr__(self) -> str: + return f"{type(self).__name__}(metric={self.metric})" + + def pairwise_distance( - distance: Metric[MetricElementType], - **kwargs, + distance: Metric[MetricElementType] ) -> PairwiseMetric[MetricElementType]: r"""Return a pairwise distance function for FData objects. @@ -478,6 +497,9 @@ def pairwise_distance( the ith observation of the first object and the jth observation of the second one. + .. deprecated:: 0.6 + Use class :class:`PairwiseMetric` instead. + Args: distance (:obj:`Function`): Distance functions between two functional objects `distance(fdata1, fdata2, **kwargs)`. @@ -488,13 +510,14 @@ def pairwise_distance( :obj:`Function`: Pairwise distance function, wich accepts two functional data objects and returns the pairwise distance matrix. """ - def pairwise(fdata1, fdata2=None): - - return _pairwise_commutative(distance, fdata1, fdata2, **kwargs) - pairwise.__name__ = f"pairwise_{distance.__name__}" + warnings.warn( + "Function pairwise_distance is deprecated. Use the " + "class PairwiseMetric instead.", + DeprecationWarning, + ) - return pairwise + return PairwiseMetric(distance) class LpDistance(NormInducedMetric[FData]): @@ -535,7 +558,8 @@ class LpDistance(NormInducedMetric[FData]): >>> fd = skfda.FDataGrid([np.ones(len(x))], x) >>> fd2 = skfda.FDataGrid([np.zeros(len(x))], x) >>> - >>> skfda.misc.metrics.lp_distance(fd, fd2).round(2) + >>> distance = skfda.misc.metrics.LpDistance(p=2) + >>> distance(fd, fd2).round(2) array([ 1.]) @@ -544,9 +568,10 @@ class LpDistance(NormInducedMetric[FData]): >>> x = np.linspace(0, 2, 1001) >>> fd2 = FDataGrid([np.zeros(len(x)), x/2 + 0.5], x) - >>> skfda.misc.metrics.lp_distance(fd, fd2) + >>> distance = skfda.misc.metrics.LpDistance(p=2) + >>> distance(fd, fd2) Traceback (most recent call last): - .... + ... ValueError: ... """ @@ -579,7 +604,7 @@ def lp_distance( fdata2: T, *, p: float, - vector_norm: Optional[Union[Norm[np.ndarray], float]], + vector_norm: Optional[Union[Norm[np.ndarray], float]] = None, ) -> np.ndarray: r"""Lp distance for FDataGrid objects. @@ -621,7 +646,7 @@ def lp_distance( >>> fd = skfda.FDataGrid([np.ones(len(x))], x) >>> fd2 = skfda.FDataGrid([np.zeros(len(x))], x) >>> - >>> skfda.misc.metrics.lp_distance(fd, fd2).round(2) + >>> skfda.misc.metrics.lp_distance(fd, fd2, p=2).round(2) array([ 1.]) @@ -630,7 +655,7 @@ def lp_distance( >>> x = np.linspace(0, 2, 1001) >>> fd2 = FDataGrid([np.zeros(len(x)), x/2 + 0.5], x) - >>> skfda.misc.metrics.lp_distance(fd, fd2) + >>> skfda.misc.metrics.lp_distance(fd, fd2, p=2) Traceback (most recent call last): .... ValueError: ... @@ -710,7 +735,7 @@ def fisher_rao_distance( fdata2_srsf = srsf.transform(fdata2) # Return the L2 distance of the SRSF - return lp_distance(fdata1_srsf, fdata2_srsf, p=2) + return l2_distance(fdata1_srsf, fdata2_srsf) def amplitude_distance( @@ -802,7 +827,7 @@ def amplitude_distance( srsf = SRSF(initial_value=0) fdata1_reg_srsf = srsf.fit_transform(fdata1_reg) fdata2_srsf = srsf.transform(fdata2) - distance = lp_distance(fdata1_reg_srsf, fdata2_srsf) + distance = l2_distance(fdata1_reg_srsf, fdata2_srsf) if lam != 0.0: # L2 norm || sqrt(Dh) - 1 ||^2 diff --git a/skfda/ml/_neighbors_base.py b/skfda/ml/_neighbors_base.py index 64d3a057a..5d2ffebd0 100644 --- a/skfda/ml/_neighbors_base.py +++ b/skfda/ml/_neighbors_base.py @@ -7,7 +7,7 @@ from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted from .. import FData, FDataGrid -from ..misc.metrics import lp_distance +from ..misc.metrics import l2_distance def _to_multivariate(fdatagrid): @@ -61,7 +61,7 @@ def _to_multivariate_metric(metric, grid_points): >>> import numpy as np >>> from skfda import FDataGrid - >>> from skfda.misc.metrics import lp_distance + >>> from skfda.misc.metrics import l2_distance >>> from skfda.ml._neighbors_base import _to_multivariate_metric Calculate the Lp distance between fd and fd2. @@ -69,13 +69,13 @@ def _to_multivariate_metric(metric, grid_points): >>> x = np.linspace(0, 1, 101) >>> fd = FDataGrid([np.ones(len(x))], x) >>> fd2 = FDataGrid([np.zeros(len(x))], x) - >>> lp_distance(fd, fd2).round(2) + >>> l2_distance(fd, fd2).round(2) array([ 1.]) Creation of the sklearn-style metric. - >>> sklearn_lp_distance = _to_multivariate_metric(lp_distance, [x]) - >>> sklearn_lp_distance(np.ones(len(x)), np.zeros(len(x))).round(2) + >>> sklearn_l2_distance = _to_multivariate_metric(l2_distance, [x]) + >>> sklearn_l2_distance(np.ones(len(x)), np.zeros(len(x))).round(2) array([ 1.]) """ @@ -160,7 +160,7 @@ def fit(self, X, y=None): if not self.multivariate_metric: # Constructs sklearn metric to manage vector if self.metric == 'l2': - metric = lp_distance + metric = l2_distance else: metric = self.metric @@ -497,7 +497,7 @@ def _functional_fit(self, X, y): if not self.multivariate_metric: if self.metric == 'l2': - metric = lp_distance + metric = l2_distance else: metric = self.metric diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index 8b36fc001..3f7d112f9 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -8,7 +8,7 @@ from ..._utils import _classifier_get_classes from ...exploratory.depth import Depth, ModifiedBandDepth from ...exploratory.stats import mean, trim_mean -from ...misc.metrics import l2_distance, lp_distance, pairwise_distance +from ...misc.metrics import PairwiseMetric, l2_distance class NearestCentroid(BaseEstimator, ClassifierMixin): @@ -19,7 +19,7 @@ class NearestCentroid(BaseEstimator, ClassifierMixin): Parameters: metric: callable, (default - :func:`lp_distance `) + :func:`l2_distance `) The metric to use when calculating distance between test samples and centroids. See the documentation of the metrics module for a list of available metrics. Defaults used L2 distance. @@ -99,7 +99,7 @@ def predict(self, X): """ sklearn_check_is_fitted(self) - return self.classes_[pairwise_distance(self.metric)( + return self.classes_[PairwiseMetric(self.metric)( X, self.centroids_, ).argmin(axis=1) @@ -122,7 +122,7 @@ class DTMClassifier(BaseEstimator, ClassifierMixin): the depths module for a list of available depths. By default it is ModifiedBandDepth. metric (Callable, default - :func:`lp_distance `): + :func:`l2_distance `): Distance function between two functional objects. See the documentation of the metrics module for a list of available metrics. @@ -168,7 +168,7 @@ def __init__( self, proportiontocut: float, depth_method: Depth = None, - metric: Callable = lp_distance, + metric: Callable = l2_distance, ) -> None: self.proportiontocut = proportiontocut diff --git a/skfda/ml/classification/_neighbors_classifiers.py b/skfda/ml/classification/_neighbors_classifiers.py index 743d269ee..6339ba542 100644 --- a/skfda/ml/classification/_neighbors_classifiers.py +++ b/skfda/ml/classification/_neighbors_classifiers.py @@ -49,7 +49,7 @@ class KNeighborsClassifier(NeighborsBase, NeighborsMixin, KNeighborsMixin, required to store the tree. The optimal value depends on the nature of the problem. metric: string or callable, (default - :func:`lp_distance `) + :func:`l2_distance `) the distance metric to use for the tree. The default metric is the L2 distance. See the documentation of the metrics module for a list of available metrics. @@ -199,7 +199,7 @@ class RadiusNeighborsClassifier(NeighborsBase, NeighborsMixin, required to store the tree. The optimal value depends on the nature of the problem. metric: string or callable, (default - :func:`lp_distance `) + :func:`l2_distance `) the distance metric to use for the tree. The default metric is the L2 distance. See the documentation of the metrics module for a list of available metrics. diff --git a/skfda/ml/clustering/_kmeans.py b/skfda/ml/clustering/_kmeans.py index fecc4e35f..09e644b22 100644 --- a/skfda/ml/clustering/_kmeans.py +++ b/skfda/ml/clustering/_kmeans.py @@ -8,7 +8,7 @@ from sklearn.utils import check_random_state from sklearn.utils.validation import check_is_fitted -from ...misc.metrics import lp_distance, pairwise_distance +from ...misc.metrics import PairwiseMetric, l2_distance class BaseKMeans(BaseEstimator, ClusterMixin, TransformerMixin): @@ -33,7 +33,7 @@ def __init__(self, n_clusters, init, metric, n_init, max_iter, tol, fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. metric (optional): functional data metric. Defaults to - *lp_distance*. + *l2_distance*. n_init (int, optional): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of @@ -187,7 +187,7 @@ def _algorithm(self, fdata, random_state): centroids = self._init_centroids(fdata, random_state) centroids_old = centroids.copy(data_matrix=centroids_old_matrix) - pairwise_metric = pairwise_distance(self.metric) + pairwise_metric = PairwiseMetric(self.metric) tolerance = self._tolerance(fdata) @@ -197,8 +197,7 @@ def _algorithm(self, fdata, random_state): centroids_old.data_matrix[...] = centroids.data_matrix - distances_to_centroids = pairwise_metric(fdata1=fdata, - fdata2=centroids) + distances_to_centroids = pairwise_metric(fdata, centroids) self._update( fdata=fdata, @@ -290,10 +289,9 @@ def predict(self, X, sample_weight=None): membership_matrix = self._create_membership(X.n_samples) centroids = self.cluster_centers_.copy() - pairwise_metric = pairwise_distance(self.metric) + pairwise_metric = PairwiseMetric(self.metric) - distances_to_centroids = pairwise_metric(fdata1=X, - fdata2=centroids) + distances_to_centroids = pairwise_metric(X, centroids) self._update( fdata=X, @@ -420,7 +418,7 @@ class KMeans(BaseKMeans): fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. metric (optional): functional data metric. Defaults to - *lp_distance*. + *l2_distance*. n_init (int, optional): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. @@ -475,7 +473,7 @@ class KMeans(BaseKMeans): """ def __init__(self, n_clusters=2, init=None, - metric=lp_distance, + metric=l2_distance, n_init=1, max_iter=100, tol=1e-4, random_state=0): """Initialization of the KMeans class. @@ -488,7 +486,7 @@ def __init__(self, n_clusters=2, init=None, fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. metric (optional): functional data metric. Defaults to - *lp_distance*. + *l2_distance*. n_init (int, optional): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms @@ -600,7 +598,7 @@ class FuzzyCMeans(BaseKMeans): fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. metric (optional): functional data metric. Defaults to - *lp_distance*. + *l2_distance*. n_init (int, optional): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. @@ -655,7 +653,7 @@ class FuzzyCMeans(BaseKMeans): """ def __init__(self, n_clusters=2, init=None, - metric=lp_distance, n_init=1, max_iter=100, + metric=l2_distance, n_init=1, max_iter=100, tol=1e-4, random_state=0, fuzzifier=2): """Initialization of the FuzzyKMeans class. @@ -668,7 +666,7 @@ def __init__(self, n_clusters=2, init=None, fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. metric (optional): functional data metric. Defaults to - *lp_distance*. + *l2_distance*. n_init (int, optional): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. diff --git a/skfda/ml/clustering/_neighbors_clustering.py b/skfda/ml/clustering/_neighbors_clustering.py index 3b305b961..143f62338 100644 --- a/skfda/ml/clustering/_neighbors_clustering.py +++ b/skfda/ml/clustering/_neighbors_clustering.py @@ -33,7 +33,7 @@ class NearestNeighbors(NeighborsBase, NeighborsMixin, KNeighborsMixin, required to store the tree. The optimal value depends on the nature of the problem. metric : string or callable, (default - :func:`lp_distance `) + :func:`l2_distance `) the distance metric to use for the tree. The default metric is the L2 distance. See the documentation of the metrics module for a list of available metrics. diff --git a/skfda/ml/regression/_neighbors_regression.py b/skfda/ml/regression/_neighbors_regression.py index 9308f824b..7da0e4316 100644 --- a/skfda/ml/regression/_neighbors_regression.py +++ b/skfda/ml/regression/_neighbors_regression.py @@ -57,7 +57,7 @@ class KNeighborsRegressor(NeighborsBase, NeighborsRegressorMixin, required to store the tree. The optimal value depends on the nature of the problem. metric : string or callable, (default - :func:`lp_distance `) + :func:`l2_distance `) the distance metric to use for the tree. The default metric is the L2 distance. See the documentation of the metrics module for a list of available metrics. @@ -219,7 +219,7 @@ class RadiusNeighborsRegressor(NeighborsBase, NeighborsRegressorMixin, required to store the tree. The optimal value depends on the nature of the problem. metric : string or callable, (default - :func:`lp_distance `) + :func:`l2_distance `) the distance metric to use for the tree. The default metric is the L2 distance. See the documentation of the metrics module for a list of available metrics. diff --git a/skfda/preprocessing/registration/validation.py b/skfda/preprocessing/registration/validation.py index fc27ee72f..49d9b8126 100644 --- a/skfda/preprocessing/registration/validation.py +++ b/skfda/preprocessing/registration/validation.py @@ -4,7 +4,7 @@ import numpy as np -from ..._utils import check_is_univariate, _to_grid +from ..._utils import _to_grid, check_is_univariate class RegistrationScorer(): @@ -435,7 +435,7 @@ def score_function(self, X, y): float: Score of the transformation. """ - from ...misc.metrics import pairwise_distance, lp_distance + from ...misc.metrics import PairwiseMetric, l2_distance check_is_univariate(X) check_is_univariate(y) @@ -456,7 +456,7 @@ def score_function(self, X, y): mean_y = C2 * y.mean() # Compute distance to mean - distance = pairwise_distance(lp_distance) + distance = PairwiseMetric(l2_distance) ls_x = distance(X, mean_X).flatten() ls_y = distance(y, mean_y).flatten() @@ -550,7 +550,7 @@ def score_function(self, X, y): float: Score of the transformation. """ - from ...misc.metrics import pairwise_distance, lp_distance + from ...misc.metrics import PairwiseMetric, l2_distance check_is_univariate(X) check_is_univariate(y) @@ -563,7 +563,7 @@ def score_function(self, X, y): X, y = _to_grid(X, y, eval_points=self.eval_points) # L2 distance to mean - distance = pairwise_distance(lp_distance) + distance = PairwiseMetric(l2_distance) sls_x = distance(X, X.mean()) sls_y = distance(y, y.mean()) diff --git a/tests/test_elastic.py b/tests/test_elastic.py index 1671cd5a4..b5d3dd3e6 100644 --- a/tests/test_elastic.py +++ b/tests/test_elastic.py @@ -1,20 +1,30 @@ -from skfda import FDataGrid -from skfda.datasets import make_multimodal_samples, make_random_warping -from skfda.misc.metrics import (fisher_rao_distance, amplitude_distance, - phase_distance, pairwise_distance, lp_distance, - warping_distance) -from skfda.preprocessing.registration import (ElasticRegistration, - invert_warping, - normalize_warping) -from skfda.preprocessing.registration.elastic import (SRSF, elastic_mean, - warping_mean) import unittest import numpy as np - -metric = pairwise_distance(lp_distance) -pairwise_fisher_rao = pairwise_distance(fisher_rao_distance) +from skfda import FDataGrid +from skfda.datasets import make_multimodal_samples, make_random_warping +from skfda.misc.metrics import ( + PairwiseMetric, + amplitude_distance, + fisher_rao_distance, + l2_distance, + phase_distance, + warping_distance, +) +from skfda.preprocessing.registration import ( + ElasticRegistration, + invert_warping, + normalize_warping, +) +from skfda.preprocessing.registration.elastic import ( + SRSF, + elastic_mean, + warping_mean, +) + +metric = PairwiseMetric(l2_distance) +pairwise_fisher_rao = PairwiseMetric(fisher_rao_distance) class TestElasticRegistration(unittest.TestCase): diff --git a/tests/test_math.py b/tests/test_math.py index d86be7b40..d2da4017c 100644 --- a/tests/test_math.py +++ b/tests/test_math.py @@ -1,10 +1,11 @@ -import skfda -from skfda._utils import _pairwise_commutative -from skfda.representation.basis import Monomial, Tensor, VectorValued import unittest import numpy as np +import skfda +from skfda._utils import _pairwise_symmetric +from skfda.representation.basis import Monomial, Tensor, VectorValued + def ndm(*args): return [x[(None,) * i + (slice(None),) + (None,) * (len(args) - i - 1)] @@ -92,7 +93,7 @@ def test_matrix(self): np.testing.assert_allclose(gram, gram_basis, rtol=1e-2) - gram_pairwise = _pairwise_commutative( + gram_pairwise = _pairwise_symmetric( skfda.misc.inner_product, X, Y) np.testing.assert_allclose(gram, gram_pairwise) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index aefb779d1..2b5fbca0a 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -6,7 +6,7 @@ from skfda import FDataBasis, FDataGrid from skfda.datasets import make_multimodal_samples -from skfda.misc.metrics import lp_distance, lp_norm +from skfda.misc.metrics import l2_distance, lp_norm from skfda.representation.basis import Monomial @@ -62,13 +62,13 @@ def test_lp_norm_surface(self) -> None: def test_lp_error_dimensions(self) -> None: # Case internal arrays with np.testing.assert_raises(ValueError): - lp_distance(self.fd, self.fd_surface) + l2_distance(self.fd, self.fd_surface) with np.testing.assert_raises(ValueError): - lp_distance(self.fd, self.fd_curve) + l2_distance(self.fd, self.fd_curve) with np.testing.assert_raises(ValueError): - lp_distance(self.fd_surface, self.fd_curve) + l2_distance(self.fd_surface, self.fd_curve) def test_lp_error_domain_ranges(self) -> None: grid_points = [2, 3, 4, 5, 6] @@ -81,7 +81,7 @@ def test_lp_error_domain_ranges(self) -> None: ) with np.testing.assert_raises(ValueError): - lp_distance(self.fd, fd2) + l2_distance(self.fd, fd2) def test_lp_error_grid_points(self) -> None: grid_points = [1, 2, 4, 4.3, 5] @@ -94,7 +94,7 @@ def test_lp_error_grid_points(self) -> None: ) with np.testing.assert_raises(ValueError): - lp_distance(self.fd, fd2) + l2_distance(self.fd, fd2) if __name__ == '__main__': diff --git a/tests/test_neighbors.py b/tests/test_neighbors.py index e699afa22..903a9528d 100644 --- a/tests/test_neighbors.py +++ b/tests/test_neighbors.py @@ -7,7 +7,7 @@ from skfda.datasets import make_multimodal_samples, make_sinusoidal_process from skfda.exploratory.outliers import LocalOutlierFactor # Pending theory from skfda.exploratory.stats import mean -from skfda.misc.metrics import l2_distance, lp_distance, pairwise_distance +from skfda.misc.metrics import PairwiseMetric, l2_distance from skfda.ml.classification import ( KNeighborsClassifier, NearestCentroid, @@ -71,7 +71,7 @@ def test_predict_classifier(self): KNeighborsClassifier(), RadiusNeighborsClassifier(radius=0.1), NearestCentroid(), - NearestCentroid(metric=lp_distance, centroid=mean), + NearestCentroid(metric=l2_distance, centroid=mean), ): neigh.fit(self.X, self.y) @@ -84,7 +84,7 @@ def test_predict_classifier(self): def test_predict_proba_classifier(self): """Tests predict proba for k neighbors classifier.""" - neigh = KNeighborsClassifier(metric=lp_distance) + neigh = KNeighborsClassifier(metric=l2_distance) neigh.fit(self.X, self.y) probs = neigh.predict_proba(self.X) @@ -139,7 +139,7 @@ def test_kneighbors(self): graph = neigh.kneighbors_graph(self.X[:4]) - dist_kneigh = lp_distance(self.X[0], self.X[7]) + dist_kneigh = l2_distance(self.X[0], self.X[7]) np.testing.assert_array_almost_equal(dist[0, 1], dist_kneigh) @@ -167,7 +167,7 @@ def test_radius_neighbors(self): np.testing.assert_array_equal(links[2], np.array([2, 17, 22, 27])) np.testing.assert_array_equal(links[3], np.array([3, 4, 9])) - dist_kneigh = lp_distance(self.X[0], self.X[7]) + dist_kneigh = l2_distance(self.X[0], self.X[7]) np.testing.assert_array_almost_equal(dist[0][1], dist_kneigh) @@ -209,7 +209,7 @@ def test_knn_functional_response_precomputed(self): weights='distance', metric='precomputed', ) - d = pairwise_distance(lp_distance) + d = PairwiseMetric(l2_distance) distances = d(self.X[:4], self.X[:4]) knnr.fit(distances, self.X[:4]) @@ -221,7 +221,7 @@ def test_knn_functional_response_precomputed(self): def test_radius_functional_response(self): knnr = RadiusNeighborsRegressor( - metric=lp_distance, + metric=l2_distance, weights='distance', ) @@ -251,7 +251,7 @@ def test_functional_regression_distance_weights(self): knnr.fit(self.X[:10], self.X[:10]) res = knnr.predict(self.X[11]) - d = pairwise_distance(lp_distance) + d = PairwiseMetric(l2_distance) distances = d(self.X[:10], self.X[11]).flatten() weights = 1 / distances @@ -311,7 +311,7 @@ def test_functional_regressor_exceptions(self): knnr.fit(self.X[:3], self.X[:4]) def test_search_neighbors_precomputed(self): - d = pairwise_distance(lp_distance) + d = PairwiseMetric(l2_distance) distances = d(self.X[:4], self.X[:4]) nn = NearestNeighbors(metric='precomputed', n_neighbors=2) @@ -401,7 +401,7 @@ def test_lof_fit_predict(self): res2 = lof2.fit_predict(self.fd_lof) np.testing.assert_array_equal(expected, res2) - d = pairwise_distance(lp_distance) + d = PairwiseMetric(l2_distance) distances = d(self.fd_lof, self.fd_lof) # With precompute distances From f310b8c3db675d877bab7df0be99253c66164918 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Mon, 18 Jan 2021 23:03:22 +0100 Subject: [PATCH 036/417] Optimize L2 distance. --- skfda/_utils/_utils.py | 2 -- skfda/misc/metrics.py | 66 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 69c2cd46c..6a4aecd48 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -445,7 +445,6 @@ def _map_in_batches( memory_per_element = sum(a.nbytes // len(a) for a in arguments) n_elements_per_batch_allowed = memory_per_batch // memory_per_element - print(n_elements_per_batch_allowed) if n_elements_per_batch_allowed < 1: raise ValueError("Too few memory allowed for the operation") @@ -456,7 +455,6 @@ def _map_in_batches( batches: List[np.ndarray] = [] for pos in range(0, n_indexes, n_elements_per_batch_allowed): - print(pos) batch_args = tuple( a[i[pos:pos + n_elements_per_batch_allowed]] for a, i in zip(arguments, indexes) diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py index 32483a6f4..b8d3a6b8a 100644 --- a/skfda/misc/metrics.py +++ b/skfda/misc/metrics.py @@ -4,6 +4,7 @@ from builtins import isinstance from typing import Any, Generic, Optional, Tuple, TypeVar, Union +import multimethod import numpy as np import scipy.integrate from typing_extensions import Protocol @@ -448,6 +449,20 @@ def distance_from_norm( return NormInducedMetric(norm) +@multimethod.multidispatch +def pairwise_metric_optimization( + metric: Any, + elem1: Any, + elem2: Optional[Any], +) -> np.ndarray: + r""" + Generic function that can be subclassed for different combinations of + metric and operators in order to provide a more efficient implementation + for the pairwise metric matrix. + """ + return NotImplemented + + class PairwiseMetric(Generic[MetricElementType]): r"""Pairwise metric function. @@ -474,7 +489,13 @@ def __call__( elem1: MetricElementType, elem2: Optional[MetricElementType] = None, ) -> np.ndarray: - return _pairwise_symmetric(self.metric, elem1, elem2) + optimized = pairwise_metric_optimization(self.metric, elem1, elem2) + + return ( + _pairwise_symmetric(self.metric, elem1, elem2) + if optimized is NotImplemented + else optimized + ) def __repr__(self) -> str: return f"{type(self).__name__}(metric={self.metric})" @@ -599,6 +620,49 @@ def __repr__(self) -> str: linf_distance = LpDistance(p=math.inf) +@pairwise_metric_optimization.register +def _pairwise_metric_optimization_lp_fdata( + metric: LpDistance, + elem1: FData, + elem2: Optional[FData], +) -> np.ndarray: + from ..misc import inner_product, inner_product_matrix + + vector_norm = metric.vector_norm + + if vector_norm is None: + vector_norm = metric.p + + # Special case, the inner product is heavily optimized + if metric.p == vector_norm == 2: + diag1 = inner_product(elem1, elem1) + diag2 = diag1 if elem2 is None else inner_product(elem2, elem2) + + if elem2 is None: + elem2 = elem1 + + inner_matrix = inner_product_matrix(elem1, elem2) + + distance_matrix_sqr = ( + -2 * inner_matrix + + diag1[:, np.newaxis] + + diag2[np.newaxis, :] + ) + + np.clip( + distance_matrix_sqr, + a_min=0, + a_max=None, + out=distance_matrix_sqr, + ) + + distance_matrix = np.sqrt(distance_matrix_sqr) + + return distance_matrix + + return NotImplemented + + def lp_distance( fdata1: T, fdata2: T, From 614d35b6869fc86ddc01baf3d0b0c188ba74d5d3 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 19 Jan 2021 22:18:39 +0100 Subject: [PATCH 037/417] Improve style in metrics module. --- skfda/misc/metrics.py | 185 +++++++++++++++++++++++------------------- 1 file changed, 101 insertions(+), 84 deletions(-) diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py index b8d3a6b8a..5d4f48854 100644 --- a/skfda/misc/metrics.py +++ b/skfda/misc/metrics.py @@ -1,3 +1,4 @@ +"""Norms and metrics for functional data objects.""" import math import warnings from abc import abstractmethod @@ -25,7 +26,7 @@ class Norm(Protocol[VectorType]): """Protocol for a norm of a vector.""" @abstractmethod - def __call__(self, __vector: VectorType) -> np.ndarray: + def __call__(self, __vector: VectorType) -> np.ndarray: # noqa: WPS112 """Compute the norm of a vector.""" @@ -35,8 +36,8 @@ class Metric(Protocol[MetricElementType]): @abstractmethod def __call__( self, - __e1: MetricElementType, - __e2: MetricElementType, + __e1: MetricElementType, # noqa: WPS112 + __e2: MetricElementType, # noqa: WPS112 ) -> np.ndarray: """Compute the norm of a vector.""" @@ -44,8 +45,10 @@ def __call__( def _check_compatible(fdata1: T, fdata2: T) -> None: if isinstance(fdata1, FData) and isinstance(fdata2, FData): - if (fdata2.dim_codomain != fdata1.dim_codomain or - fdata2.dim_domain != fdata1.dim_domain): + if ( + fdata2.dim_codomain != fdata1.dim_codomain + or fdata2.dim_domain != fdata1.dim_domain + ): raise ValueError("Objects should have the same dimensions") if not np.array_equal(fdata1.domain_range, fdata2.domain_range): @@ -64,11 +67,12 @@ def _cast_to_grid( and converts them to FDatagrid to compute their distances. Args: - fdata1: (:obj:`FData`): First functional object. - fdata2: (:obj:`FData`): Second functional object. + fdata1: First functional object. + fdata2: Second functional object. + eval_points: Evaluation points. Returns: - tuple: Tuple with two :obj:`FDataGrid` with the same grid points. + Tuple with two :obj:`FDataGrid` with the same grid points. """ # Dont perform any check if not _check: @@ -77,7 +81,7 @@ def _cast_to_grid( _check_compatible(fdata1, fdata2) # Case new evaluation points specified - if eval_points is not None: + if eval_points is not None: # noqa: WPS223 fdata1 = fdata1.to_grid(eval_points) fdata2 = fdata2.to_grid(eval_points) @@ -87,17 +91,23 @@ def _cast_to_grid( elif not isinstance(fdata2, FDataGrid) and isinstance(fdata1, FDataGrid): fdata2 = fdata2.to_grid(fdata1.grid_points[0]) - elif (not isinstance(fdata1, FDataGrid) and - not isinstance(fdata2, FDataGrid)): + elif ( + not isinstance(fdata1, FDataGrid) + and not isinstance(fdata2, FDataGrid) + ): domain = fdata1.domain_range[0] grid_points = np.linspace(*domain) fdata1 = fdata1.to_grid(grid_points) fdata2 = fdata2.to_grid(grid_points) - elif not np.array_equal(fdata1.grid_points, - fdata2.grid_points): - raise ValueError("Grid points for both objects must be equal or" - "a new list evaluation points must be specified") + elif not np.array_equal( + fdata1.grid_points, + fdata2.grid_points, + ): + raise ValueError( + "Grid points for both objects must be equal or" + "a new list evaluation points must be specified", + ) return fdata1, fdata2 @@ -136,16 +146,16 @@ class LpNorm(Norm[FData]): \|^2 + \| f_2(x,y) \|^2 } \right )^p dxdy \right)^{ \frac{1}{p}} - The objects `l1_norm`, `l2_norm` and `linf_norm` are instances of this - class with commonly used values of `p`, namely 1, 2 and infinity. + The objects ``l1_norm``, ``l2_norm`` and ``linf_norm`` are instances of + this class with commonly used values of ``p``, namely 1, 2 and infinity. Args: - fdata (FData): FData object. + fdata: FData object. p: p of the lp norm. Must be greater or equal - than 1. If p='inf' or p=np.inf it is used the L infinity metric. + than 1. If ``p=math.inf`` it is used the L infinity metric. Defaults to 2. - p2: vector norm to apply. If it is a float, is the index of the - multivariate lp norm. Defaults to the same as `p`. + vector_norm: vector norm to apply. If it is a float, is the index of + the multivariate lp norm. Defaults to the same as ``p``. Examples: Calculates the norm of a FDataGrid containing the functions y = 1 @@ -175,13 +185,14 @@ class with commonly used values of `p`, namely 1, 2 and infinity. """ def __init__( - self, p: float, - vector_norm: Optional[Union[Norm[np.ndarray], float]] = None, + self, + p: float, + vector_norm: Union[Norm[np.ndarray], float, None] = None, ) -> None: # Checks that the lp normed is well defined if not np.isinf(p) and p < 1: - raise ValueError(f"p must be equal or greater than 1.") + raise ValueError(f"p (={p}) must be equal or greater than 1.") self.p = p self.vector_norm = vector_norm @@ -193,6 +204,7 @@ def __repr__(self) -> str: ) def __call__(self, fdata: FData) -> np.ndarray: + """Compute the Lp norm of a functional data object.""" from ..misc import inner_product vector_norm = self.vector_norm @@ -239,8 +251,10 @@ def __call__(self, fdata: FData) -> np.ndarray: if fdata.dim_domain == 1: res = np.max(data_matrix[..., 0], axis=1) else: - res = np.array([np.max(observation) - for observation in data_matrix]) + res = np.array([ + np.max(observation) + for observation in data_matrix + ]) elif fdata.dim_domain == 1: @@ -270,7 +284,7 @@ def lp_norm( fdata: FData, *, p: float, - vector_norm: Optional[Union[Norm[np.ndarray], float]] = None, + vector_norm: Union[Norm[np.ndarray], float, None] = None, ) -> np.ndarray: r"""Calculate the norm of all the observations in a FDataGrid object. @@ -306,17 +320,17 @@ def lp_norm( Note: This function is a wrapper of :class:`LpNorm`, available only for - convenience. As the parameter `p` is mandatory, it cannot be used + convenience. As the parameter ``p`` is mandatory, it cannot be used where a fully-defined norm is required: use an instance of :class:`LpNorm` in those cases. Args: - fdata (FData): FData object. - p (int, optional): p of the lp norm. Must be greater or equal - than 1. If p='inf' or p=np.inf it is used the L infinity metric. + fdata: FData object. + p: p of the lp norm. Must be greater or equal + than 1. If ``p=math.inf`` it is used the L infinity metric. Defaults to 2. - p2 (int, optional): p index of the vectorial norm applied in case of - multivariate objects. Defaults to 2. + vector_norm: vector norm to apply. If it is a float, is the index of + the multivariate lp norm. Defaults to the same as ``p``. Returns: numpy.darray: Matrix with as many rows as observations in the first @@ -337,7 +351,7 @@ def lp_norm( >>> skfda.misc.metrics.lp_norm(fd, p=2).round(2) array([ 1. , 0.58]) - As the norm with `p=2` is a common choice, one can use `l2_norm` + As the norm with ``p=2`` is a common choice, one can use ``l2_norm`` directly: >>> skfda.misc.metrics.l2_norm(fd).round(2) array([ 1. , 0.58]) @@ -368,7 +382,7 @@ class NormInducedMetric(Metric[VectorType]): d(f,g) = \|f - g\| Args: - norm (:obj:`Function`): Norm function `norm(fdata, **kwargs)`. + norm: Norm used to induce the metric. Examples: Computes the :math:`\mathbb{L}^2` distance between an object containing @@ -399,6 +413,7 @@ def __init__(self, norm: Norm[VectorType]): self.norm = norm def __call__(self, elem1: VectorType, elem2: VectorType) -> np.ndarray: + """Compute the induced norm between two vectors.""" return self.norm(elem1 - elem2) def __repr__(self) -> str: @@ -418,9 +433,7 @@ def distance_from_norm( d(f,g) = \|f - g\| Args: - norm (:obj:`Function`): Norm function `norm(fdata, **kwargs)`. - **kwargs (dict, optional): Named parameters to be passed to the norm - function. + norm: Norm used to induce the metric. Returns: :obj:`Function`: Distance function `norm_distance(fdata1, fdata2)`. @@ -456,9 +469,11 @@ def pairwise_metric_optimization( elem2: Optional[Any], ) -> np.ndarray: r""" - Generic function that can be subclassed for different combinations of - metric and operators in order to provide a more efficient implementation - for the pairwise metric matrix. + Optimized computation of a pairwise metric. + + This is a generic function that can be subclassed for different + combinations of metric and operators in order to provide a more + efficient implementation for the pairwise metric matrix. """ return NotImplemented @@ -489,6 +504,7 @@ def __call__( elem1: MetricElementType, elem2: Optional[MetricElementType] = None, ) -> np.ndarray: + """Evaluate the pairwise metric.""" optimized = pairwise_metric_optimization(self.metric, elem1, elem2) return ( @@ -502,7 +518,7 @@ def __repr__(self) -> str: def pairwise_distance( - distance: Metric[MetricElementType] + distance: Metric[MetricElementType], ) -> PairwiseMetric[MetricElementType]: r"""Return a pairwise distance function for FData objects. @@ -522,16 +538,13 @@ def pairwise_distance( Use class :class:`PairwiseMetric` instead. Args: - distance (:obj:`Function`): Distance functions between two functional - objects `distance(fdata1, fdata2, **kwargs)`. - **kwargs (:obj:`dict`, optional): parameters dictionary to be passed - to the distance function. + distance: Distance function between two functional data objects. Returns: - :obj:`Function`: Pairwise distance function, wich accepts two - functional data objects and returns the pairwise distance matrix. - """ + Pairwise distance function, wich accepts two functional data objects + and returns the pairwise distance matrix. + """ warnings.warn( "Function pairwise_distance is deprecated. Use the " "class PairwiseMetric instead.", @@ -554,16 +567,16 @@ class LpDistance(NormInducedMetric[FData]): where :math:`\| {}\cdot{} \|_p` denotes the :func:`Lp norm `. - The objects `l1_distance`, `l2_distance` and `linf_distance` are instances - of this class with commonly used values of `p`, namely 1, 2 and infinity. + The objects ``l1_distance``, ``l2_distance`` and ``linf_distance`` are + instances of this class with commonly used values of ``p``, namely 1, 2 and + infinity. Args: - fdatagrid (FDataGrid): FDataGrid object. - p (int, optional): p of the lp norm. Must be greater or equal - than 1. If p='inf' or p=np.inf it is used the L infinity metric. + p: p of the lp norm. Must be greater or equal + than 1. If ``p=math.inf`` it is used the L infinity metric. Defaults to 2. - p2 (int, optional): p index of the vectorial norm applied in case of - multivariate objects. Defaults to 2. See :func:`lp_norm`. + vector_norm: vector norm to apply. If it is a float, is the index of + the multivariate lp norm. Defaults to the same as ``p``. Examples: Computes the distances between an object containing functional data @@ -595,11 +608,12 @@ class LpDistance(NormInducedMetric[FData]): ... ValueError: ... - """ + """ # noqa: P102 def __init__( - self, p: float, - vector_norm: Optional[Union[Norm[np.ndarray], float]] = None, + self, + p: float, + vector_norm: Union[Norm[np.ndarray], float, None] = None, ) -> None: self.p = p @@ -656,9 +670,7 @@ def _pairwise_metric_optimization_lp_fdata( out=distance_matrix_sqr, ) - distance_matrix = np.sqrt(distance_matrix_sqr) - - return distance_matrix + return np.sqrt(distance_matrix_sqr) return NotImplemented @@ -668,9 +680,10 @@ def lp_distance( fdata2: T, *, p: float, - vector_norm: Optional[Union[Norm[np.ndarray], float]] = None, + vector_norm: Union[Norm[np.ndarray], float, None] = None, ) -> np.ndarray: - r"""Lp distance for FDataGrid objects. + r""" + Lp distance for FDataGrid objects. Calculates the distance between two functional objects. @@ -684,17 +697,23 @@ def lp_distance( Note: This function is a wrapper of :class:`LpDistance`, available only for - convenience. As the parameter `p` is mandatory, it cannot be used + convenience. As the parameter ``p`` is mandatory, it cannot be used where a fully-defined metric is required: use an instance of :class:`LpDistance` in those cases. Args: - fdatagrid (FDataGrid): FDataGrid object. - p (int, optional): p of the lp norm. Must be greater or equal - than 1. If p='inf' or p=np.inf it is used the L infinity metric. + fdata1: First FData object. + fdata2: Second FData object. + p: p of the lp norm. Must be greater or equal + than 1. If ``p=math.inf`` it is used the L infinity metric. Defaults to 2. - p2 (int, optional): p index of the vectorial norm applied in case of - multivariate objects. Defaults to 2. See :func:`lp_norm`. + vector_norm: vector norm to apply. If it is a float, is the index of + the multivariate lp norm. Defaults to the same as ``p``. + + Returns: + Numpy vector where the i-th coordinate has the distance between the + i-th element of the first object and the i-th element of the second + one. Examples: Computes the distances between an object containing functional data @@ -713,7 +732,6 @@ def lp_distance( >>> skfda.misc.metrics.lp_distance(fd, fd2, p=2).round(2) array([ 1.]) - If the functional data are defined over a different set of points of discretisation the functions returns an exception. @@ -721,13 +739,13 @@ def lp_distance( >>> fd2 = FDataGrid([np.zeros(len(x)), x/2 + 0.5], x) >>> skfda.misc.metrics.lp_distance(fd, fd2, p=2) Traceback (most recent call last): - .... + ... ValueError: ... See also: - :class:`~skfda.misc.metrics.LpDistance + :class:`~skfda.misc.metrics.LpDistance` - """ + """ # noqa: P102 return LpDistance(p=p, vector_norm=vector_norm)(fdata1, fdata2) @@ -768,7 +786,7 @@ def fisher_rao_distance( Raises: ValueError: If the objects are not unidimensional. - Refereces: + References: .. [S11-2] Srivastava, Anuj et. al. Registration of Functional Data Using Fisher-Rao Metric (2011). In *Function Representation and Metric* (pp. 5-7). arXiv:1103.3817v2. @@ -845,7 +863,7 @@ def amplitude_distance( fdata2: Second FData object. lam: Penalty term to restric the elasticity. eval_points: Array with points of evaluation. - **kwargs: Name arguments to be passed to + kwargs: Name arguments to be passed to :func:`elastic_registration_warping`. Returns: @@ -854,7 +872,7 @@ def amplitude_distance( Raises: ValueError: If the objects are not unidimensional. - Refereces: + References: .. [SK16-4-10-1] Srivastava, Anuj & Klassen, Eric P. (2016). Functional and shape data analysis. In *Amplitude Space and a Metric Structure* (pp. 107-109). Springer. @@ -936,10 +954,8 @@ def phase_distance( Args: fdata1: First FData object. fdata2: Second FData object. - lambda: Penalty term to restric the elasticity. + lam: Penalty term to restric the elasticity. eval_points (array_like, optional): Array with points of evaluation. - **kwargs (dict): Name arguments to be passed to - :func:`elastic_registration_warping`. Returns: Phase distance between the objects. @@ -947,7 +963,7 @@ def phase_distance( Raises: ValueError: If the objects are not unidimensional. - Refereces: + References: .. [SK16-4-10-2] Srivastava, Anuj & Klassen, Eric P. (2016). Functional and shape data analysis. In *Phase Space and a Metric Structure* (pp. 109-111). Springer. @@ -959,7 +975,7 @@ def phase_distance( _check=_check, ) - # Rescale in (0,1) + # Rescale in the interval (0,1) eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) # Calculate the corresponding srsf and normalize to (0,1) @@ -973,7 +989,8 @@ def phase_distance( ) elastic_registration = ElasticRegistration( - penalty=lam, template=fdata2, + penalty=lam, + template=fdata2, output_points=eval_points_normalized, ) @@ -1025,7 +1042,7 @@ def warping_distance( Raises: ValueError: If the objects are not unidimensional. - Refereces: + References: .. [SK16-4-11-2] Srivastava, Anuj & Klassen, Eric P. (2016). Functional and shape data analysis. In *Probability Density Functions* (pp. 113-117). Springer. From 9afe6cbf6b768c2235e78d489044f99b004a6f44 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 19 Jan 2021 23:31:57 +0100 Subject: [PATCH 038/417] Improve online docs. --- docs/modules/misc/metrics.rst | 49 +++++++++++++++++--- skfda/misc/metrics.py | 86 ++--------------------------------- 2 files changed, 46 insertions(+), 89 deletions(-) diff --git a/docs/modules/misc/metrics.rst b/docs/modules/misc/metrics.rst index a7ac7c7e7..9cb424244 100644 --- a/docs/modules/misc/metrics.rst +++ b/docs/modules/misc/metrics.rst @@ -7,15 +7,33 @@ This module contains multiple functional distances and norms. Lp Spaces --------- -The following functions computes the norms and distances used in Lp spaces. +The following classes compute the norms and metrics used in Lp spaces. One +first has to create an instance for the class, specifying the desired value +for ``p``, and use this instance to evaluate the norm or distance over +:term:`functional data objects`. .. autosummary:: :toctree: autosummary - skfda.misc.metrics.lp_norm - skfda.misc.metrics.lp_distance + skfda.misc.metrics.LpNorm + skfda.misc.metrics.LpDistance + +As the :math:`L_1`, :math:`L_2` and :math:`L_{\infty}` norms are very common +in :term:`FDA`, instances for these have been created, called respectively +``l1_norm``, ``l2_norm`` and ``linf_norm``. The same is true for metrics, +having ``l1_distance``, ``l2_distance`` and ``linf_distance`` already +created. +The following functions are wrappers for convenience, in case that one +only wants to evaluate the norm/metric for a value of ``p``. These functions +cannot be used in objects or methods that require a norm or metric, as the +value of ``p`` must be explicitly passed in each call. +.. autosummary:: + :toctree: autosummary + + skfda.misc.metrics.lp_norm + skfda.misc.metrics.lp_distance Elastic distances ----------------- @@ -32,11 +50,28 @@ analysis and registration of functional data. skfda.misc.metrics.warping_distance -Utils ------ +Metric induced by a norm +------------------------ + +If a norm has been defined, it is possible to construct a metric between two +elements simply subtracting one from the other and computing the norm of the +result. Such a metric is called the metric induced by the norm, and the +:math:`Lp` distance is an example of these. The following class can be used +to construct a metric from a norm in this way: + +.. autosummary:: + :toctree: autosummary + + skfda.misc.metrics.NormInducedMetric + + +Pairwise metric +--------------- + +Some tasks require the computation of all possible distances between pairs +of objets. The following class can compute that efficiently: .. autosummary:: :toctree: autosummary - skfda.misc.metrics.distance_from_norm - skfda.misc.metrics.pairwise_distance + skfda.misc.metrics.PairwiseMetric diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py index 5d4f48854..2ccca4462 100644 --- a/skfda/misc/metrics.py +++ b/skfda/misc/metrics.py @@ -150,7 +150,6 @@ class LpNorm(Norm[FData]): this class with commonly used values of ``p``, namely 1, 2 and infinity. Args: - fdata: FData object. p: p of the lp norm. Must be greater or equal than 1. If ``p=math.inf`` it is used the L infinity metric. Defaults to 2. @@ -172,6 +171,7 @@ class LpNorm(Norm[FData]): As the norm with `p=2` is a common choice, one can use `l2_norm` directly: + >>> skfda.misc.metrics.l2_norm(fd).round(2) array([ 1. , 0.58]) @@ -180,7 +180,7 @@ class LpNorm(Norm[FData]): >>> norm = skfda.misc.metrics.LpNorm(0.5) Traceback (most recent call last): .... - ValueError: p must be equal or greater than 1. + ValueError: p (=0.5) must be equal or greater than 1. """ @@ -353,6 +353,7 @@ def lp_norm( As the norm with ``p=2`` is a common choice, one can use ``l2_norm`` directly: + >>> skfda.misc.metrics.l2_norm(fd).round(2) array([ 1. , 0.58]) @@ -361,7 +362,7 @@ def lp_norm( >>> skfda.misc.metrics.lp_norm(fd, p=0.5) Traceback (most recent call last): .... - ValueError: p must be equal or greater than 1. + ValueError: p (=0.5) must be equal or greater than 1. See also: :class:`LpNorm` @@ -420,48 +421,6 @@ def __repr__(self) -> str: return f"{type(self).__name__}(norm={self.norm})" -def distance_from_norm( - norm: Norm[VectorType], -) -> Metric[VectorType]: - r"""Return the distance induced by a norm. - - Given a norm :math:`\| \cdot \|: X \rightarrow \mathbb{R}`, - returns the distance :math:`d: X \times X \rightarrow \mathbb{R}` induced - by the norm: - - .. math:: - d(f,g) = \|f - g\| - - Args: - norm: Norm used to induce the metric. - - Returns: - :obj:`Function`: Distance function `norm_distance(fdata1, fdata2)`. - - Examples: - Computes the :math:`\mathbb{L}^2` distance between an object containing - functional data corresponding to the function :math:`y(x) = x` defined - over the interval [0, 1] and another one containing data of the - function :math:`y(x) = x/2`. - - Firstly we create the functional data. - - >>> x = np.linspace(0, 1, 1001) - >>> fd = FDataGrid([x], x) - >>> fd2 = FDataGrid([x/2], x) - - To construct the :math:`\mathbb{L}^2` distance it is used the - :math:`\mathbb{L}^2` norm wich it is used to compute the distance. - - >>> l2_distance = distance_from_norm(l2_norm) - >>> d = l2_distance(fd, fd2) - >>> float('%.3f'% d) - 0.289 - - """ - return NormInducedMetric(norm) - - @multimethod.multidispatch def pairwise_metric_optimization( metric: Any, @@ -517,43 +476,6 @@ def __repr__(self) -> str: return f"{type(self).__name__}(metric={self.metric})" -def pairwise_distance( - distance: Metric[MetricElementType], -) -> PairwiseMetric[MetricElementType]: - r"""Return a pairwise distance function for FData objects. - - Given a distance it returns the corresponding pairwise distance function. - - The returned pairwise distance function calculates the distance between - all possible pairs consisting of one observation of the first FDataGrid - object and one of the second one. - - The matrix returned by the pairwise distance is a matrix with as many rows - as observations in the first object and as many columns as observations in - the second one. Each element (i, j) of the matrix is the distance between - the ith observation of the first object and the jth observation of the - second one. - - .. deprecated:: 0.6 - Use class :class:`PairwiseMetric` instead. - - Args: - distance: Distance function between two functional data objects. - - Returns: - Pairwise distance function, wich accepts two functional data objects - and returns the pairwise distance matrix. - - """ - warnings.warn( - "Function pairwise_distance is deprecated. Use the " - "class PairwiseMetric instead.", - DeprecationWarning, - ) - - return PairwiseMetric(distance) - - class LpDistance(NormInducedMetric[FData]): r"""Lp distance for FDataGrid objects. From 86c70ef9ec750c108e6ea9ab4fe9631f8aa075cd Mon Sep 17 00:00:00 2001 From: vnmabus Date: Wed, 20 Jan 2021 14:16:04 +0100 Subject: [PATCH 039/417] Improve style and tests. --- skfda/_utils/_utils.py | 22 +++++++----- skfda/misc/metrics.py | 41 +++++++++------------- skfda/representation/_typing.py | 15 ++++++-- tests/test_metrics.py | 62 ++++++++++++++++++++++++++------- 4 files changed, 93 insertions(+), 47 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 6a4aecd48..e498c2c8f 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -441,7 +441,7 @@ def _map_in_batches( """ if memory_per_batch is None: # 256MB is not too big - memory_per_batch = 256 * 1024 * 1024 + memory_per_batch = 256 * 1024 * 1024 # noqa: WPS432 memory_per_element = sum(a.nbytes // len(a) for a in arguments) n_elements_per_batch_allowed = memory_per_batch // memory_per_element @@ -472,10 +472,7 @@ def _pairwise_symmetric( memory_per_batch: Optional[int] = None, **kwargs: Any, ) -> np.ndarray: - """ - Compute pairwise a commutative function. - - """ + """Compute pairwise a commutative function.""" if arg2 is None or arg2 is arg1: indices = np.triu_indices(len(arg1)) @@ -484,7 +481,10 @@ def _pairwise_symmetric( triang_vec = _map_in_batches( function, - (arg1, arg1), + ( + arg1, + arg1, + ), indices, memory_per_batch=memory_per_batch, **kwargs, @@ -504,8 +504,14 @@ def _pairwise_symmetric( vec = _map_in_batches( function, - (arg1, arg2), - (indices[0].ravel(), indices[1].ravel()), + ( + arg1, + arg2, + ), + ( + indices[0].ravel(), + indices[1].ravel(), + ), memory_per_batch=memory_per_batch, **kwargs, ) diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py index 2ccca4462..ddf7a85bc 100644 --- a/skfda/misc/metrics.py +++ b/skfda/misc/metrics.py @@ -1,6 +1,5 @@ """Norms and metrics for functional data objects.""" import math -import warnings from abc import abstractmethod from builtins import isinstance from typing import Any, Generic, Optional, Tuple, TypeVar, Union @@ -229,32 +228,26 @@ def __call__(self, fdata: FData) -> np.ndarray: res = np.sqrt(integral[0]).flatten() else: - if fdata.dim_codomain > 1: - data_matrix = fdata.data_matrix - original_shape = data_matrix.shape - data_matrix = data_matrix.reshape(-1, original_shape[-1]) - - data_matrix = (np.linalg.norm( - fdata.data_matrix, - ord=vector_norm, - axis=-1, - keepdims=True, - ) if isinstance(vector_norm, (float, int)) - else vector_norm(data_matrix) - ) - data_matrix = data_matrix.reshape(original_shape[:-1] + (1,)) - else: - data_matrix = np.abs(fdata.data_matrix) + data_matrix = fdata.data_matrix + original_shape = data_matrix.shape + data_matrix = data_matrix.reshape(-1, original_shape[-1]) + + data_matrix = (np.linalg.norm( + fdata.data_matrix, + ord=vector_norm, + axis=-1, + keepdims=True, + ) if isinstance(vector_norm, (float, int)) + else vector_norm(data_matrix) + ) + data_matrix = data_matrix.reshape(original_shape[:-1] + (1,)) if np.isinf(self.p): - if fdata.dim_domain == 1: - res = np.max(data_matrix[..., 0], axis=1) - else: - res = np.array([ - np.max(observation) - for observation in data_matrix - ]) + res = np.max( + data_matrix, + axis=tuple(range(1, data_matrix.ndim)), + ) elif fdata.dim_domain == 1: diff --git a/skfda/representation/_typing.py b/skfda/representation/_typing.py index 78a87f2fe..38ad1fe6a 100644 --- a/skfda/representation/_typing.py +++ b/skfda/representation/_typing.py @@ -27,11 +27,20 @@ class Vector(Protocol): It should accept numpy arrays and FData, among other things. """ - def __add__(self: VectorType, __other: VectorType) -> VectorType: + def __add__( + self: VectorType, + __other: VectorType, # noqa: WPS112 + ) -> VectorType: pass - def __sub__(self: VectorType, __other: VectorType) -> VectorType: + def __sub__( + self: VectorType, + __other: VectorType, # noqa: WPS112 + ) -> VectorType: pass - def __mul__(self: VectorType, __other: float) -> VectorType: + def __mul__( + self: VectorType, + __other: float, # noqa: WPS112 + ) -> VectorType: pass diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 2b5fbca0a..7593bc109 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -6,12 +6,18 @@ from skfda import FDataBasis, FDataGrid from skfda.datasets import make_multimodal_samples -from skfda.misc.metrics import l2_distance, lp_norm +from skfda.misc.metrics import ( + l1_norm, + l2_distance, + l2_norm, + linf_norm, + lp_norm, +) from skfda.representation.basis import Monomial -class TestLpMetrics(unittest.TestCase): - """Test the lp metrics.""" +class TestLp(unittest.TestCase): + """Test the lp norms and distances.""" def setUp(self) -> None: """Create a few functional data objects.""" @@ -25,52 +31,83 @@ def setUp(self) -> None: ) basis = Monomial(n_basis=3, domain_range=(1, 5)) self.fd_basis = FDataBasis(basis, [[1, 1, 0], [0, 0, 1]]) - self.fd_curve = self.fd.concatenate(self.fd, as_coordinates=True) + self.fd_vector_valued = self.fd.concatenate( + self.fd, + as_coordinates=True, + ) self.fd_surface = make_multimodal_samples( n_samples=3, dim_domain=2, random_state=0, ) - def test_lp_norm(self) -> None: + def test_lp_norm_grid(self) -> None: + """Test that the Lp norms work with FDataGrid.""" + np.testing.assert_allclose( + l1_norm(self.fd), + [16.0, 41.33333333], + ) + + np.testing.assert_allclose( + l2_norm(self.fd), + [8.326664, 25.006666], + ) + + np.testing.assert_allclose( + lp_norm(self.fd, p=3), + [6.839904, 22.401268], + ) - np.testing.assert_allclose(lp_norm(self.fd, p=1), [16.0, 41.33333333]) - np.testing.assert_allclose(lp_norm(self.fd, p=np.inf), [6, 25]) + np.testing.assert_allclose( + linf_norm(self.fd), + [6, 25], + ) - def test_lp_norm_curve(self) -> None: + def test_lp_norm_basis(self) -> None: + """Test that the L2 norm works with FDataBasis.""" + np.testing.assert_allclose( + l2_norm(self.fd_basis), + [8.326664, 24.996], + ) + def test_lp_norm_vector_valued(self) -> None: + """Test that the Lp norms work with vector-valued FDataGrid.""" np.testing.assert_allclose( - lp_norm(self.fd_curve, p=1), + l1_norm(self.fd_vector_valued), [32.0, 82.666667], ) np.testing.assert_allclose( - lp_norm(self.fd_curve, p=np.inf), + linf_norm(self.fd_vector_valued), [6, 25], ) def test_lp_norm_surface_inf(self) -> None: + """Test that the Linf norm works with multidimensional domains.""" np.testing.assert_allclose( lp_norm(self.fd_surface, p=np.inf).round(5), [0.99994, 0.99793, 0.99868], ) def test_lp_norm_surface(self) -> None: + """Test that integration of surfaces has not been implemented.""" # Integration of surfaces not implemented, add test case after # implementation self.assertEqual(lp_norm(self.fd_surface, p=1), NotImplemented) def test_lp_error_dimensions(self) -> None: + """Test error on metric between different kind of objects.""" # Case internal arrays with np.testing.assert_raises(ValueError): l2_distance(self.fd, self.fd_surface) with np.testing.assert_raises(ValueError): - l2_distance(self.fd, self.fd_curve) + l2_distance(self.fd, self.fd_vector_valued) with np.testing.assert_raises(ValueError): - l2_distance(self.fd_surface, self.fd_curve) + l2_distance(self.fd_surface, self.fd_vector_valued) def test_lp_error_domain_ranges(self) -> None: + """Test error on metric between objects with different domains.""" grid_points = [2, 3, 4, 5, 6] fd2 = FDataGrid( [ @@ -84,6 +121,7 @@ def test_lp_error_domain_ranges(self) -> None: l2_distance(self.fd, fd2) def test_lp_error_grid_points(self) -> None: + """Test error on metric for FDataGrids with different grid points.""" grid_points = [1, 2, 4, 4.3, 5] fd2 = FDataGrid( [ From f8d9ac190c8ab2370ef34389177957d9c578277b Mon Sep 17 00:00:00 2001 From: vnmabus Date: Wed, 20 Jan 2021 14:20:14 +0100 Subject: [PATCH 040/417] Fix style error. --- skfda/representation/_typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/representation/_typing.py b/skfda/representation/_typing.py index 38ad1fe6a..1c14ffbdb 100644 --- a/skfda/representation/_typing.py +++ b/skfda/representation/_typing.py @@ -42,5 +42,5 @@ def __sub__( def __mul__( self: VectorType, __other: float, # noqa: WPS112 - ) -> VectorType: + ) -> VectorType: pass From 880cd5f3785131565b16ea305f6b1996aeae69f4 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Wed, 20 Jan 2021 19:27:33 +0100 Subject: [PATCH 041/417] Fix docs. --- examples/plot_magnitude_shape.py | 7 +++---- .../outliers/_directional_outlyingness.py | 18 ++++++++++++------ .../visualization/_magnitude_shape_plot.py | 4 +--- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/examples/plot_magnitude_shape.py b/examples/plot_magnitude_shape.py index 9b4e19751..777b1524e 100644 --- a/examples/plot_magnitude_shape.py +++ b/examples/plot_magnitude_shape.py @@ -10,15 +10,14 @@ # sphinx_gallery_thumbnail_number = 2 +import matplotlib.pyplot as plt +import numpy as np + from skfda import datasets from skfda.exploratory.depth import IntegratedDepth from skfda.exploratory.depth.multivariate import SimplicialDepth from skfda.exploratory.visualization import MagnitudeShapePlot -import matplotlib.pyplot as plt -import numpy as np - - ############################################################################## # First, the Canadian Weather dataset is downloaded from the package 'fda' in # CRAN. It contains a FDataGrid with daily temperatures and precipitations, diff --git a/skfda/exploratory/outliers/_directional_outlyingness.py b/skfda/exploratory/outliers/_directional_outlyingness.py index 6b00efc0c..91f917fe1 100644 --- a/skfda/exploratory/outliers/_directional_outlyingness.py +++ b/skfda/exploratory/outliers/_directional_outlyingness.py @@ -1,14 +1,14 @@ -from skfda.exploratory.depth.multivariate import ProjectionDepth import typing -from numpy import linalg as la +import numpy as np import scipy.integrate -from scipy.stats import f import scipy.stats +from numpy import linalg as la +from scipy.stats import f from sklearn.base import BaseEstimator, OutlierMixin from sklearn.covariance import MinCovDet -import numpy as np +from skfda.exploratory.depth.multivariate import ProjectionDepth from ... import FDataGrid @@ -303,7 +303,9 @@ class DirectionalOutlierDetector(BaseEstimator, OutlierMixin): """ def __init__( - self, *, multivariate_depth=ProjectionDepth(), + self, + *, + multivariate_depth=None, pointwise_weights=None, assume_centered=False, support_fraction=None, @@ -321,10 +323,14 @@ def __init__( self._force_asymptotic = _force_asymptotic def _compute_points(self, X): + multivariate_depth = self.multivariate_depth + if multivariate_depth is None: + multivariate_depth = ProjectionDepth() + # The depths of the samples are calculated giving them an ordering. *_, mean_dir_outl, variation_dir_outl = directional_outlyingness_stats( X, - multivariate_depth=self.multivariate_depth, + multivariate_depth=multivariate_depth, pointwise_weights=self.pointwise_weights) points = np.concatenate((mean_dir_outl, diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 9e3e5e4d7..7e7c53dc6 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -7,14 +7,12 @@ """ import matplotlib - import matplotlib.pyplot as plt import numpy as np from ..outliers import DirectionalOutlierDetector from ._utils import _figure_to_svg, _get_figure_and_axes, _set_figure_layout - __author__ = "Amanda Hernando Bernabé" __email__ = "amanda.hernando@estudiante.uam.es" @@ -136,7 +134,7 @@ class MagnitudeShapePlot: grid_points=(array([ 0., 2., 4., 6., 8., 10.]),), domain_range=((0.0, 10.0),), ...), - multivariate_depth=ProjectionDepth(), + multivariate_depth=None, pointwise_weights=None, alpha=0.993, points=array([[ 1.66666667, 0.12777778], From c10485b3c0dcbf760c73136150c362900842bc06 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 21 Jan 2021 20:23:48 +0100 Subject: [PATCH 042/417] ddplot class --- .../visualization/representation.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index f60228835..b1ad57359 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -325,3 +325,82 @@ def plot_scatter(fdata, chart=None, *, grid_points=None, _set_labels(fdata, fig, axes, patches) return fig + + +class DDPlotDisplay: + + """DDPlot visualization. Plot the depth of our fdata elements in two + different distributions, one in each axis. It is useful to understand + how our data is more related with one subset of data / distribution + than another one. + + Args: + fdata: functional data set that we want to examine. + dist1: functional data set that represents the first distribution that + we want to use to compute the depth (Depth X). + dist2: functional data set that represents the second distribution that + we want to use to compute the depth (Depth Y). + depth_method: method that will be used to compute the depths of the + data with respect to the distributions. + + """ + def __init__(self, fdata, dist1, dist2, depth_method): + self.fdata = fdata + self.dist1 = dist1 + self.dist2 = dist2 + self.depth_method = depth_method + + def plot(self, chart=None, *, fig=None, axes=None, + n_rows=None, n_cols=None, **kwargs): + """Plot the depth of our fdata elements in the two different distributions, + one in each axis. It is useful to understand how our data is more related with + one subset of data / distribution than another one. + + Args: + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (list of axis objects, optional): axis over where the graphs are + plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + **kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. + + Returns: + fig (figure object): figure object in which the depths will be scattered. + + """ + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata(self.fdata, fig, axes, n_rows, n_cols) + + depth_dist1 = self.depth_method.__call__(self.fdata, distribution = self.dist1) + depth_dist2 = self.depth_method.__call__(self.fdata, distribution = self.dist2) + + if self.fdata.dim_domain == 1: + + for i in range(self.fdata.dim_codomain): + + axes[i].scatter(depth_dist1, depth_dist2, + **kwargs) + + #Set labels of graph + fig.suptitle("DDPlot") + for i in range(self.fdata.dim_codomain): + axes[i].set_xlabel("X depth") + axes[i].set_ylabel("Y depth") + axes[i].set_xlim([self.depth_method.min, self.depth_method.max]) + axes[i].set_ylim([self.depth_method.min, self.depth_method.max]) + axes[i].plot([0,1], linewidth = 0.2, color = "gray") + + return fig \ No newline at end of file From 90803ef21a2c81aec35ce70579ef8607eda83c8d Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 21 Jan 2021 23:24:53 +0100 Subject: [PATCH 043/417] Refactor metrics. Extracted enum values. --- setup.cfg | 7 + skfda/misc/metrics.py | 994 ------------------------- skfda/misc/metrics/__init__.py | 22 + skfda/misc/metrics/_elastic_metrics.py | 348 +++++++++ skfda/misc/metrics/_lp_distances.py | 209 ++++++ skfda/misc/metrics/_lp_norms.py | 264 +++++++ skfda/misc/metrics/_typing.py | 75 ++ skfda/misc/metrics/_utils.py | 187 +++++ skfda/ml/clustering/_hierarchical.py | 235 +++--- 9 files changed, 1254 insertions(+), 1087 deletions(-) delete mode 100644 skfda/misc/metrics.py create mode 100644 skfda/misc/metrics/__init__.py create mode 100644 skfda/misc/metrics/_elastic_metrics.py create mode 100644 skfda/misc/metrics/_lp_distances.py create mode 100644 skfda/misc/metrics/_lp_norms.py create mode 100644 skfda/misc/metrics/_typing.py create mode 100644 skfda/misc/metrics/_utils.py diff --git a/setup.cfg b/setup.cfg index ee1ec96cc..9c80fea61 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,10 +31,14 @@ ignore = S101, # Line break occurred before a binary operator (antipattern) W503, + # Utils is used as a module name + WPS100, # Short names like X or y are common in scikit-learn WPS111, # We do not like this underscored numbers convention WPS114, + # Attributes in uppercase are used in enums + WPS115, # Trailing underscores are a scikit-learn convention WPS120, # The number of imported things may be large, especially for typing @@ -140,6 +144,9 @@ ignore_missing_imports = True [mypy-findiff.*] ignore_missing_imports = True +[mypy-joblib.*] +ignore_missing_imports = True + [mypy-matplotlib.*] ignore_missing_imports = True diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py deleted file mode 100644 index ddf7a85bc..000000000 --- a/skfda/misc/metrics.py +++ /dev/null @@ -1,994 +0,0 @@ -"""Norms and metrics for functional data objects.""" -import math -from abc import abstractmethod -from builtins import isinstance -from typing import Any, Generic, Optional, Tuple, TypeVar, Union - -import multimethod -import numpy as np -import scipy.integrate -from typing_extensions import Protocol - -from .._utils import _pairwise_symmetric -from ..preprocessing.registration import ElasticRegistration, normalize_warping -from ..preprocessing.registration._warping import _normalize_scale -from ..preprocessing.registration.elastic import SRSF -from ..representation import FData, FDataBasis, FDataGrid -from ..representation._typing import Vector - -T = TypeVar("T", bound=FData) -VectorType = TypeVar("VectorType", contravariant=True, bound=Vector) -MetricElementType = TypeVar("MetricElementType", contravariant=True) - - -class Norm(Protocol[VectorType]): - """Protocol for a norm of a vector.""" - - @abstractmethod - def __call__(self, __vector: VectorType) -> np.ndarray: # noqa: WPS112 - """Compute the norm of a vector.""" - - -class Metric(Protocol[MetricElementType]): - """Protocol for a metric between two elements of a metric space.""" - - @abstractmethod - def __call__( - self, - __e1: MetricElementType, # noqa: WPS112 - __e2: MetricElementType, # noqa: WPS112 - ) -> np.ndarray: - """Compute the norm of a vector.""" - - -def _check_compatible(fdata1: T, fdata2: T) -> None: - - if isinstance(fdata1, FData) and isinstance(fdata2, FData): - if ( - fdata2.dim_codomain != fdata1.dim_codomain - or fdata2.dim_domain != fdata1.dim_domain - ): - raise ValueError("Objects should have the same dimensions") - - if not np.array_equal(fdata1.domain_range, fdata2.domain_range): - raise ValueError("Domain ranges for both objects must be equal") - - -def _cast_to_grid( - fdata1: FData, - fdata2: FData, - eval_points: np.ndarray = None, - _check: bool = True, -) -> Tuple[FDataGrid, FDataGrid]: - """Convert fdata1 and fdata2 to FDatagrid. - - Checks if the fdatas passed as argument are unidimensional and compatible - and converts them to FDatagrid to compute their distances. - - Args: - fdata1: First functional object. - fdata2: Second functional object. - eval_points: Evaluation points. - - Returns: - Tuple with two :obj:`FDataGrid` with the same grid points. - """ - # Dont perform any check - if not _check: - return fdata1, fdata2 - - _check_compatible(fdata1, fdata2) - - # Case new evaluation points specified - if eval_points is not None: # noqa: WPS223 - fdata1 = fdata1.to_grid(eval_points) - fdata2 = fdata2.to_grid(eval_points) - - elif not isinstance(fdata1, FDataGrid) and isinstance(fdata2, FDataGrid): - fdata1 = fdata1.to_grid(fdata2.grid_points[0]) - - elif not isinstance(fdata2, FDataGrid) and isinstance(fdata1, FDataGrid): - fdata2 = fdata2.to_grid(fdata1.grid_points[0]) - - elif ( - not isinstance(fdata1, FDataGrid) - and not isinstance(fdata2, FDataGrid) - ): - domain = fdata1.domain_range[0] - grid_points = np.linspace(*domain) - fdata1 = fdata1.to_grid(grid_points) - fdata2 = fdata2.to_grid(grid_points) - - elif not np.array_equal( - fdata1.grid_points, - fdata2.grid_points, - ): - raise ValueError( - "Grid points for both objects must be equal or" - "a new list evaluation points must be specified", - ) - - return fdata1, fdata2 - - -class LpNorm(Norm[FData]): - r""" - Norm of all the observations in a FDataGrid object. - - For each observation f the Lp norm is defined as: - - .. math:: - \| f \| = \left( \int_D \| f \|^p dx \right)^{ - \frac{1}{p}} - - Where D is the :term:`domain` over which the functions are defined. - - The integral is approximated using Simpson's rule. - - In general, if f is a multivariate function :math:`(f_1, ..., f_d)`, and - :math:`D \subset \mathbb{R}^n`, it is applied the following generalization - of the Lp norm. - - .. math:: - \| f \| = \left( \int_D \| f \|_{*}^p dx \right)^{ - \frac{1}{p}} - - Where :math:`\| \cdot \|_*` denotes a vectorial norm. See - :func:`vectorial_norm` to more information. - - For example, if :math:`f: \mathbb{R}^2 \rightarrow \mathbb{R}^2`, and - :math:`\| \cdot \|_*` is the euclidean norm - :math:`\| (x,y) \|_* = \sqrt{x^2 + y^2}`, the lp norm applied is - - .. math:: - \| f \| = \left( \int \int_D \left ( \sqrt{ \| f_1(x,y) - \|^2 + \| f_2(x,y) \|^2 } \right )^p dxdy \right)^{ - \frac{1}{p}} - - The objects ``l1_norm``, ``l2_norm`` and ``linf_norm`` are instances of - this class with commonly used values of ``p``, namely 1, 2 and infinity. - - Args: - p: p of the lp norm. Must be greater or equal - than 1. If ``p=math.inf`` it is used the L infinity metric. - Defaults to 2. - vector_norm: vector norm to apply. If it is a float, is the index of - the multivariate lp norm. Defaults to the same as ``p``. - - Examples: - Calculates the norm of a FDataGrid containing the functions y = 1 - and y = x defined in the interval [0,1]. - - >>> import skfda - >>> import numpy as np - >>> - >>> x = np.linspace(0, 1, 1001) - >>> fd = skfda.FDataGrid([np.ones(len(x)), x] ,x) - >>> norm = skfda.misc.metrics.LpNorm(2) - >>> norm(fd).round(2) - array([ 1. , 0.58]) - - As the norm with `p=2` is a common choice, one can use `l2_norm` - directly: - - >>> skfda.misc.metrics.l2_norm(fd).round(2) - array([ 1. , 0.58]) - - The lp norm is only defined if p >= 1. - - >>> norm = skfda.misc.metrics.LpNorm(0.5) - Traceback (most recent call last): - .... - ValueError: p (=0.5) must be equal or greater than 1. - - """ - - def __init__( - self, - p: float, - vector_norm: Union[Norm[np.ndarray], float, None] = None, - ) -> None: - - # Checks that the lp normed is well defined - if not np.isinf(p) and p < 1: - raise ValueError(f"p (={p}) must be equal or greater than 1.") - - self.p = p - self.vector_norm = vector_norm - - def __repr__(self) -> str: - return ( - f"{type(self).__name__}(" - f"p={self.p}, vector_norm={self.vector_norm})" - ) - - def __call__(self, fdata: FData) -> np.ndarray: - """Compute the Lp norm of a functional data object.""" - from ..misc import inner_product - - vector_norm = self.vector_norm - - if vector_norm is None: - vector_norm = self.p - - # Special case, the inner product is heavily optimized - if self.p == vector_norm == 2: - return np.sqrt(inner_product(fdata, fdata)) - - if isinstance(fdata, FDataBasis): - if self.p != 2: - raise NotImplementedError - - start, end = fdata.domain_range[0] - integral = scipy.integrate.quad_vec( - lambda x: np.power(np.abs(fdata(x)), self.p), - start, - end, - ) - res = np.sqrt(integral[0]).flatten() - - else: - data_matrix = fdata.data_matrix - original_shape = data_matrix.shape - data_matrix = data_matrix.reshape(-1, original_shape[-1]) - - data_matrix = (np.linalg.norm( - fdata.data_matrix, - ord=vector_norm, - axis=-1, - keepdims=True, - ) if isinstance(vector_norm, (float, int)) - else vector_norm(data_matrix) - ) - data_matrix = data_matrix.reshape(original_shape[:-1] + (1,)) - - if np.isinf(self.p): - - res = np.max( - data_matrix, - axis=tuple(range(1, data_matrix.ndim)), - ) - - elif fdata.dim_domain == 1: - - # Computes the norm, approximating the integral with Simpson's - # rule. - res = scipy.integrate.simps( - data_matrix[..., 0] ** self.p, - x=fdata.grid_points, - ) ** (1 / self.p) - - else: - # Needed to perform surface integration - return NotImplemented - - if len(res) == 1: - return res[0] - - return res - - -l1_norm = LpNorm(1) -l2_norm = LpNorm(2) -linf_norm = LpNorm(math.inf) - - -def lp_norm( - fdata: FData, - *, - p: float, - vector_norm: Union[Norm[np.ndarray], float, None] = None, -) -> np.ndarray: - r"""Calculate the norm of all the observations in a FDataGrid object. - - For each observation f the Lp norm is defined as: - - .. math:: - \| f \| = \left( \int_D \| f \|^p dx \right)^{ - \frac{1}{p}} - - Where D is the :term:`domain` over which the functions are defined. - - The integral is approximated using Simpson's rule. - - In general, if f is a multivariate function :math:`(f_1, ..., f_d)`, and - :math:`D \subset \mathbb{R}^n`, it is applied the following generalization - of the Lp norm. - - .. math:: - \| f \| = \left( \int_D \| f \|_{*}^p dx \right)^{ - \frac{1}{p}} - - Where :math:`\| \cdot \|_*` denotes a vectorial norm. See - :func:`vectorial_norm` to more information. - - For example, if :math:`f: \mathbb{R}^2 \rightarrow \mathbb{R}^2`, and - :math:`\| \cdot \|_*` is the euclidean norm - :math:`\| (x,y) \|_* = \sqrt{x^2 + y^2}`, the lp norm applied is - - .. math:: - \| f \| = \left( \int \int_D \left ( \sqrt{ \| f_1(x,y) - \|^2 + \| f_2(x,y) \|^2 } \right )^p dxdy \right)^{ - \frac{1}{p}} - - Note: - This function is a wrapper of :class:`LpNorm`, available only for - convenience. As the parameter ``p`` is mandatory, it cannot be used - where a fully-defined norm is required: use an instance of - :class:`LpNorm` in those cases. - - Args: - fdata: FData object. - p: p of the lp norm. Must be greater or equal - than 1. If ``p=math.inf`` it is used the L infinity metric. - Defaults to 2. - vector_norm: vector norm to apply. If it is a float, is the index of - the multivariate lp norm. Defaults to the same as ``p``. - - Returns: - numpy.darray: Matrix with as many rows as observations in the first - object and as many columns as observations in the second one. Each - element (i, j) of the matrix is the inner product of the ith - observation of the first object and the jth observation of the second - one. - - Examples: - Calculates the norm of a FDataGrid containing the functions y = 1 - and y = x defined in the interval [0,1]. - - >>> import skfda - >>> import numpy as np - >>> - >>> x = np.linspace(0,1,1001) - >>> fd = skfda.FDataGrid([np.ones(len(x)), x] ,x) - >>> skfda.misc.metrics.lp_norm(fd, p=2).round(2) - array([ 1. , 0.58]) - - As the norm with ``p=2`` is a common choice, one can use ``l2_norm`` - directly: - - >>> skfda.misc.metrics.l2_norm(fd).round(2) - array([ 1. , 0.58]) - - The lp norm is only defined if p >= 1. - - >>> skfda.misc.metrics.lp_norm(fd, p=0.5) - Traceback (most recent call last): - .... - ValueError: p (=0.5) must be equal or greater than 1. - - See also: - :class:`LpNorm` - - """ - return LpNorm(p=p, vector_norm=vector_norm)(fdata) - - -class NormInducedMetric(Metric[VectorType]): - r""" - Metric induced by a norm. - - Given a norm :math:`\| \cdot \|: X \rightarrow \mathbb{R}`, - returns the metric :math:`d: X \times X \rightarrow \mathbb{R}` induced - by the norm: - - .. math:: - d(f,g) = \|f - g\| - - Args: - norm: Norm used to induce the metric. - - Examples: - Computes the :math:`\mathbb{L}^2` distance between an object containing - functional data corresponding to the function :math:`y(x) = x` defined - over the interval [0, 1] and another one containing data of the - function :math:`y(x) = x/2`. - - Firstly we create the functional data. - - >>> import skfda - >>> import numpy as np - >>> - >>> x = np.linspace(0, 1, 1001) - >>> fd = skfda.FDataGrid([x], x) - >>> fd2 = skfda.FDataGrid([x/2], x) - - To construct the :math:`\mathbb{L}^2` distance it is used the - :math:`\mathbb{L}^2` norm wich it is used to compute the distance. - - >>> l2_distance = skfda.misc.metrics.NormInducedMetric(l2_norm) - >>> d = l2_distance(fd, fd2) - >>> float('%.3f'% d) - 0.289 - - """ - - def __init__(self, norm: Norm[VectorType]): - self.norm = norm - - def __call__(self, elem1: VectorType, elem2: VectorType) -> np.ndarray: - """Compute the induced norm between two vectors.""" - return self.norm(elem1 - elem2) - - def __repr__(self) -> str: - return f"{type(self).__name__}(norm={self.norm})" - - -@multimethod.multidispatch -def pairwise_metric_optimization( - metric: Any, - elem1: Any, - elem2: Optional[Any], -) -> np.ndarray: - r""" - Optimized computation of a pairwise metric. - - This is a generic function that can be subclassed for different - combinations of metric and operators in order to provide a more - efficient implementation for the pairwise metric matrix. - """ - return NotImplemented - - -class PairwiseMetric(Generic[MetricElementType]): - r"""Pairwise metric function. - - Computes a given metric pairwise. The matrix returned by the pairwise - metric is a matrix with as many rows as observations in the first object - and as many columns as observations in the second one. Each element - (i, j) of the matrix is the distance between the ith observation of the - first object and the jth observation of the second one. - - Args: - metric: Metric between two elements of a metric - space. - - """ - - def __init__( - self, - metric: Metric[MetricElementType], - ): - self.metric = metric - - def __call__( - self, - elem1: MetricElementType, - elem2: Optional[MetricElementType] = None, - ) -> np.ndarray: - """Evaluate the pairwise metric.""" - optimized = pairwise_metric_optimization(self.metric, elem1, elem2) - - return ( - _pairwise_symmetric(self.metric, elem1, elem2) - if optimized is NotImplemented - else optimized - ) - - def __repr__(self) -> str: - return f"{type(self).__name__}(metric={self.metric})" - - -class LpDistance(NormInducedMetric[FData]): - r"""Lp distance for FDataGrid objects. - - Calculates the distance between two functional objects. - - For each pair of observations f and g the distance between them is defined - as: - - .. math:: - d(f, g) = d(g, f) = \| f - g \|_p - - where :math:`\| {}\cdot{} \|_p` denotes the :func:`Lp norm `. - - The objects ``l1_distance``, ``l2_distance`` and ``linf_distance`` are - instances of this class with commonly used values of ``p``, namely 1, 2 and - infinity. - - Args: - p: p of the lp norm. Must be greater or equal - than 1. If ``p=math.inf`` it is used the L infinity metric. - Defaults to 2. - vector_norm: vector norm to apply. If it is a float, is the index of - the multivariate lp norm. Defaults to the same as ``p``. - - Examples: - Computes the distances between an object containing functional data - corresponding to the functions y = 1 and y = x defined over the - interval [0, 1] and another ones containing data of the functions y - = 0 and y = x/2. The result then is an array 2x2 with the computed - l2 distance between every pair of functions. - - >>> import skfda - >>> import numpy as np - >>> - >>> x = np.linspace(0, 1, 1001) - >>> fd = skfda.FDataGrid([np.ones(len(x))], x) - >>> fd2 = skfda.FDataGrid([np.zeros(len(x))], x) - >>> - >>> distance = skfda.misc.metrics.LpDistance(p=2) - >>> distance(fd, fd2).round(2) - array([ 1.]) - - - If the functional data are defined over a different set of points of - discretisation the functions returns an exception. - - >>> x = np.linspace(0, 2, 1001) - >>> fd2 = FDataGrid([np.zeros(len(x)), x/2 + 0.5], x) - >>> distance = skfda.misc.metrics.LpDistance(p=2) - >>> distance(fd, fd2) - Traceback (most recent call last): - ... - ValueError: ... - - """ # noqa: P102 - - def __init__( - self, - p: float, - vector_norm: Union[Norm[np.ndarray], float, None] = None, - ) -> None: - - self.p = p - self.vector_norm = vector_norm - norm = LpNorm(p=p, vector_norm=vector_norm) - - super().__init__(norm) - - def __repr__(self) -> str: - return ( - f"{type(self).__name__}(" - f"p={self.p}, vector_norm={self.vector_norm})" - ) - - -l1_distance = LpDistance(p=1) -l2_distance = LpDistance(p=2) -linf_distance = LpDistance(p=math.inf) - - -@pairwise_metric_optimization.register -def _pairwise_metric_optimization_lp_fdata( - metric: LpDistance, - elem1: FData, - elem2: Optional[FData], -) -> np.ndarray: - from ..misc import inner_product, inner_product_matrix - - vector_norm = metric.vector_norm - - if vector_norm is None: - vector_norm = metric.p - - # Special case, the inner product is heavily optimized - if metric.p == vector_norm == 2: - diag1 = inner_product(elem1, elem1) - diag2 = diag1 if elem2 is None else inner_product(elem2, elem2) - - if elem2 is None: - elem2 = elem1 - - inner_matrix = inner_product_matrix(elem1, elem2) - - distance_matrix_sqr = ( - -2 * inner_matrix - + diag1[:, np.newaxis] - + diag2[np.newaxis, :] - ) - - np.clip( - distance_matrix_sqr, - a_min=0, - a_max=None, - out=distance_matrix_sqr, - ) - - return np.sqrt(distance_matrix_sqr) - - return NotImplemented - - -def lp_distance( - fdata1: T, - fdata2: T, - *, - p: float, - vector_norm: Union[Norm[np.ndarray], float, None] = None, -) -> np.ndarray: - r""" - Lp distance for FDataGrid objects. - - Calculates the distance between two functional objects. - - For each pair of observations f and g the distance between them is defined - as: - - .. math:: - d(f, g) = d(g, f) = \| f - g \|_p - - where :math:`\| {}\cdot{} \|_p` denotes the :func:`Lp norm `. - - Note: - This function is a wrapper of :class:`LpDistance`, available only for - convenience. As the parameter ``p`` is mandatory, it cannot be used - where a fully-defined metric is required: use an instance of - :class:`LpDistance` in those cases. - - Args: - fdata1: First FData object. - fdata2: Second FData object. - p: p of the lp norm. Must be greater or equal - than 1. If ``p=math.inf`` it is used the L infinity metric. - Defaults to 2. - vector_norm: vector norm to apply. If it is a float, is the index of - the multivariate lp norm. Defaults to the same as ``p``. - - Returns: - Numpy vector where the i-th coordinate has the distance between the - i-th element of the first object and the i-th element of the second - one. - - Examples: - Computes the distances between an object containing functional data - corresponding to the functions y = 1 and y = x defined over the - interval [0, 1] and another ones containing data of the functions y - = 0 and y = x/2. The result then is an array 2x2 with the computed - l2 distance between every pair of functions. - - >>> import skfda - >>> import numpy as np - >>> - >>> x = np.linspace(0, 1, 1001) - >>> fd = skfda.FDataGrid([np.ones(len(x))], x) - >>> fd2 = skfda.FDataGrid([np.zeros(len(x))], x) - >>> - >>> skfda.misc.metrics.lp_distance(fd, fd2, p=2).round(2) - array([ 1.]) - - If the functional data are defined over a different set of points of - discretisation the functions returns an exception. - - >>> x = np.linspace(0, 2, 1001) - >>> fd2 = FDataGrid([np.zeros(len(x)), x/2 + 0.5], x) - >>> skfda.misc.metrics.lp_distance(fd, fd2, p=2) - Traceback (most recent call last): - ... - ValueError: ... - - See also: - :class:`~skfda.misc.metrics.LpDistance` - - """ # noqa: P102 - return LpDistance(p=p, vector_norm=vector_norm)(fdata1, fdata2) - - -def fisher_rao_distance( - fdata1: T, - fdata2: T, - *, - eval_points: np.ndarray = None, - _check: bool = True, -) -> np.ndarray: - r"""Compute the Fisher-Rao distance between two functional objects. - - Let :math:`f_i` and :math:`f_j` be two functional observations, and let - :math:`q_i` and :math:`q_j` be the corresponding SRSF - (see :class:`SRSF`), the fisher rao distance is defined as - - .. math:: - d_{FR}(f_i, f_j) = \| q_i - q_j \|_2 = - \left ( \int_0^1 sgn(\dot{f_i}(t))\sqrt{|\dot{f_i}(t)|} - - sgn(\dot{f_j}(t))\sqrt{|\dot{f_j}(t)|} dt \right )^{\frac{1}{2}} - - If the observations are distributions of random variables the distance will - match with the usual fisher-rao distance in non-parametric form for - probability distributions [S11-2]_. - - If the observations are defined in a :term:`domain` different than (0,1) - their domains are normalized to this interval with an affine - transformation. - - Args: - fdata1: First FData object. - fdata2: Second FData object. - eval_points: Array with points of evaluation. - - Returns: - Fisher rao distance. - - Raises: - ValueError: If the objects are not unidimensional. - - References: - .. [S11-2] Srivastava, Anuj et. al. Registration of Functional Data - Using Fisher-Rao Metric (2011). In *Function Representation and - Metric* (pp. 5-7). arXiv:1103.3817v2. - - """ - fdata1, fdata2 = _cast_to_grid( - fdata1, - fdata2, - eval_points=eval_points, - _check=_check, - ) - - # Both should have the same grid points - eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) - - # Calculate the corresponding srsf and normalize to (0,1) - fdata1 = fdata1.copy( - grid_points=eval_points_normalized, - domain_range=(0, 1), - ) - fdata2 = fdata2.copy( - grid_points=eval_points_normalized, - domain_range=(0, 1), - ) - - srsf = SRSF(initial_value=0) - fdata1_srsf = srsf.fit_transform(fdata1) - fdata2_srsf = srsf.transform(fdata2) - - # Return the L2 distance of the SRSF - return l2_distance(fdata1_srsf, fdata2_srsf) - - -def amplitude_distance( - fdata1: T, - fdata2: T, - *, - lam: float = 0.0, - eval_points: np.ndarray = None, - _check: bool = True, - **kwargs: Any, -) -> np.ndarray: - r"""Compute the amplitude distance between two functional objects. - - Let :math:`f_i` and :math:`f_j` be two functional observations, and let - :math:`q_i` and :math:`q_j` be the corresponding SRSF - (see :class:`SRSF`), the amplitude distance is defined as - - .. math:: - d_{A}(f_i, f_j)=min_{\gamma \in \Gamma}d_{FR}(f_i \circ \gamma,f_j) - - A penalty term could be added to restrict the ammount of elasticity in the - alignment used. - - .. math:: - d_{\lambda}^2(f_i, f_j) =min_{\gamma \in \Gamma} \{ - d_{FR}^2(f_i \circ \gamma, f_j) + \lambda \mathcal{R}(\gamma) \} - - - Where :math:`d_{FR}` is the Fisher-Rao distance and the penalty term is - given by - - .. math:: - \mathcal{R}(\gamma) = \|\sqrt{\dot{\gamma}}- 1 \|_{\mathbb{L}^2}^2 - - See [SK16-4-10-1]_ for a detailed explanation. - - If the observations are defined in a :term:`domain` different than (0,1) - their domains are normalized to this interval with an affine - transformation. - - Args: - fdata1: First FData object. - fdata2: Second FData object. - lam: Penalty term to restric the elasticity. - eval_points: Array with points of evaluation. - kwargs: Name arguments to be passed to - :func:`elastic_registration_warping`. - - Returns: - Elastic distance. - - Raises: - ValueError: If the objects are not unidimensional. - - References: - .. [SK16-4-10-1] Srivastava, Anuj & Klassen, Eric P. (2016). - Functional and shape data analysis. In *Amplitude Space and a - Metric Structure* (pp. 107-109). Springer. - """ - fdata1, fdata2 = _cast_to_grid( - fdata1, - fdata2, - eval_points=eval_points, - _check=_check, - ) - - # Both should have the same grid points - eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) - - # Calculate the corresponding srsf and normalize to (0,1) - fdata1 = fdata1.copy( - grid_points=eval_points_normalized, - domain_range=(0, 1), - ) - fdata2 = fdata2.copy( - grid_points=eval_points_normalized, - domain_range=(0, 1), - ) - - elastic_registration = ElasticRegistration( - template=fdata2, - penalty=lam, - output_points=eval_points_normalized, - **kwargs, - ) - - fdata1_reg = elastic_registration.fit_transform(fdata1) - - srsf = SRSF(initial_value=0) - fdata1_reg_srsf = srsf.fit_transform(fdata1_reg) - fdata2_srsf = srsf.transform(fdata2) - distance = l2_distance(fdata1_reg_srsf, fdata2_srsf) - - if lam != 0.0: - # L2 norm || sqrt(Dh) - 1 ||^2 - warping_deriv = elastic_registration.warping_.derivative() - penalty = warping_deriv(eval_points_normalized)[0, ..., 0] - penalty = np.sqrt(penalty, out=penalty) - penalty -= 1 - penalty = np.square(penalty, out=penalty) - penalty = scipy.integrate.simps(penalty, x=eval_points_normalized) - - distance = np.sqrt(distance**2 + lam * penalty) - - return distance - - -def phase_distance( - fdata1: T, - fdata2: T, - *, - lam: float = 0.0, - eval_points: np.ndarray = None, - _check: bool = True, -) -> np.ndarray: - r"""Compute the phase distance between two functional objects. - - Let :math:`f_i` and :math:`f_j` be two functional observations, and let - :math:`\gamma_{ij}` the corresponding warping used in the elastic - registration to align :math:`f_i` to :math:`f_j` (see - :func:`elastic_registration`). The phase distance between :math:`f_i` - and :math:`f_j` is defined as - - .. math:: - d_{P}(f_i, f_j) = d_{FR}(\gamma_{ij}, \gamma_{id}) = - arcos \left ( \int_0^1 \sqrt {\dot \gamma_{ij}(t)} dt \right ) - - See [SK16-4-10-2]_ for a detailed explanation. - - If the observations are defined in a :term:`domain` different than (0,1) - their domains are normalized to this interval with an affine - transformation. - - Args: - fdata1: First FData object. - fdata2: Second FData object. - lam: Penalty term to restric the elasticity. - eval_points (array_like, optional): Array with points of evaluation. - - Returns: - Phase distance between the objects. - - Raises: - ValueError: If the objects are not unidimensional. - - References: - .. [SK16-4-10-2] Srivastava, Anuj & Klassen, Eric P. (2016). - Functional and shape data analysis. In *Phase Space and a Metric - Structure* (pp. 109-111). Springer. - """ - fdata1, fdata2 = _cast_to_grid( - fdata1, - fdata2, - eval_points=eval_points, - _check=_check, - ) - - # Rescale in the interval (0,1) - eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) - - # Calculate the corresponding srsf and normalize to (0,1) - fdata1 = fdata1.copy( - grid_points=eval_points_normalized, - domain_range=(0, 1), - ) - fdata2 = fdata2.copy( - grid_points=eval_points_normalized, - domain_range=(0, 1), - ) - - elastic_registration = ElasticRegistration( - penalty=lam, - template=fdata2, - output_points=eval_points_normalized, - ) - - elastic_registration.fit_transform(fdata1) - - warping_deriv = elastic_registration.warping_.derivative() - derivative_warping = warping_deriv(eval_points_normalized)[0, ..., 0] - - derivative_warping = np.sqrt(derivative_warping, out=derivative_warping) - - d = scipy.integrate.simps(derivative_warping, x=eval_points_normalized) - d = np.clip(d, -1, 1) - - return np.arccos(d) - - -def warping_distance( - warping1: T, - warping2: T, - *, - eval_points: np.ndarray = None, - _check: bool = True, -) -> np.ndarray: - r"""Compute the distance between warpings functions. - - Let :math:`\gamma_i` and :math:`\gamma_j` be two warpings, defined in - :math:`\gamma_i:[a,b] \rightarrow [a,b]`. The distance in the - space of warping functions, :math:`\Gamma`, with the riemannian metric - given by the fisher-rao inner product can be computed using the structure - of hilbert sphere in their srsf's. - - .. math:: - d_{\Gamma}(\gamma_i, \gamma_j) = cos^{-1} \left ( \int_0^1 - \sqrt{\dot \gamma_i(t)\dot \gamma_j(t)}dt \right ) - - See [SK16-4-11-2]_ for a detailed explanation. - - If the warpings are not defined in [0,1], an affine transformation is maked - to change the :term:`domain`. - - Args: - warping1: First warping. - warping2: Second warping. - eval_points: Array with points of evaluation. - - Returns: - Distance between warpings: - - Raises: - ValueError: If the objects are not unidimensional. - - References: - .. [SK16-4-11-2] Srivastava, Anuj & Klassen, Eric P. (2016). - Functional and shape data analysis. In *Probability Density - Functions* (pp. 113-117). Springer. - - """ - warping1, warping2 = _cast_to_grid( - warping1, - warping2, - eval_points=eval_points, - _check=_check, - ) - - # Normalization of warping to (0,1)x(0,1) - warping1 = normalize_warping(warping1, (0, 1)) - warping2 = normalize_warping(warping2, (0, 1)) - - warping1_data = warping1.derivative().data_matrix[0, ..., 0] - warping2_data = warping2.derivative().data_matrix[0, ..., 0] - - # Derivative approximations can have negatives, specially in the - # borders. - warping1_data[warping1_data < 0] = 0 - warping2_data[warping2_data < 0] = 0 - - # In this case the srsf is the sqrt(gamma') - srsf_warping1 = np.sqrt(warping1_data, out=warping1_data) - srsf_warping2 = np.sqrt(warping2_data, out=warping2_data) - - product = np.multiply(srsf_warping1, srsf_warping2, out=srsf_warping1) - - d = scipy.integrate.simps(product, x=warping1.grid_points[0]) - d = np.clip(d, -1, 1) - - return np.arccos(d) diff --git a/skfda/misc/metrics/__init__.py b/skfda/misc/metrics/__init__.py new file mode 100644 index 000000000..d8dc69ebd --- /dev/null +++ b/skfda/misc/metrics/__init__.py @@ -0,0 +1,22 @@ +"""Metrics, norms and related utilities.""" + +from ._elastic_metrics import ( + amplitude_distance, + fisher_rao_distance, + phase_distance, + warping_distance, +) +from ._lp_distances import ( + LpDistance, + l1_distance, + l2_distance, + linf_distance, + lp_distance, +) +from ._lp_norms import LpNorm, l1_norm, l2_norm, linf_norm, lp_norm +from ._typing import PRECOMPUTED, Metric, Norm +from ._utils import ( + NormInducedMetric, + PairwiseMetric, + pairwise_metric_optimization, +) diff --git a/skfda/misc/metrics/_elastic_metrics.py b/skfda/misc/metrics/_elastic_metrics.py new file mode 100644 index 000000000..871bac8bd --- /dev/null +++ b/skfda/misc/metrics/_elastic_metrics.py @@ -0,0 +1,348 @@ +"""Elastic metrics.""" + +from typing import Any, TypeVar + +import numpy as np +import scipy.integrate + +from ...preprocessing.registration import ( + ElasticRegistration, + normalize_warping, +) +from ...preprocessing.registration._warping import _normalize_scale +from ...preprocessing.registration.elastic import SRSF +from ...representation import FData +from ._lp_distances import l2_distance +from ._utils import _cast_to_grid + +T = TypeVar("T", bound=FData) + + +def fisher_rao_distance( + fdata1: T, + fdata2: T, + *, + eval_points: np.ndarray = None, + _check: bool = True, +) -> np.ndarray: + r"""Compute the Fisher-Rao distance between two functional objects. + + Let :math:`f_i` and :math:`f_j` be two functional observations, and let + :math:`q_i` and :math:`q_j` be the corresponding SRSF + (see :class:`SRSF`), the fisher rao distance is defined as + + .. math:: + d_{FR}(f_i, f_j) = \| q_i - q_j \|_2 = + \left ( \int_0^1 sgn(\dot{f_i}(t))\sqrt{|\dot{f_i}(t)|} - + sgn(\dot{f_j}(t))\sqrt{|\dot{f_j}(t)|} dt \right )^{\frac{1}{2}} + + If the observations are distributions of random variables the distance will + match with the usual fisher-rao distance in non-parametric form for + probability distributions [S11-2]_. + + If the observations are defined in a :term:`domain` different than (0,1) + their domains are normalized to this interval with an affine + transformation. + + Args: + fdata1: First FData object. + fdata2: Second FData object. + eval_points: Array with points of evaluation. + + Returns: + Fisher rao distance. + + Raises: + ValueError: If the objects are not unidimensional. + + References: + .. [S11-2] Srivastava, Anuj et. al. Registration of Functional Data + Using Fisher-Rao Metric (2011). In *Function Representation and + Metric* (pp. 5-7). arXiv:1103.3817v2. + + """ + fdata1, fdata2 = _cast_to_grid( + fdata1, + fdata2, + eval_points=eval_points, + _check=_check, + ) + + # Both should have the same grid points + eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) + + # Calculate the corresponding srsf and normalize to (0,1) + fdata1 = fdata1.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) + fdata2 = fdata2.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) + + srsf = SRSF(initial_value=0) + fdata1_srsf = srsf.fit_transform(fdata1) + fdata2_srsf = srsf.transform(fdata2) + + # Return the L2 distance of the SRSF + return l2_distance(fdata1_srsf, fdata2_srsf) + + +def amplitude_distance( + fdata1: T, + fdata2: T, + *, + lam: float = 0.0, + eval_points: np.ndarray = None, + _check: bool = True, + **kwargs: Any, +) -> np.ndarray: + r"""Compute the amplitude distance between two functional objects. + + Let :math:`f_i` and :math:`f_j` be two functional observations, and let + :math:`q_i` and :math:`q_j` be the corresponding SRSF + (see :class:`SRSF`), the amplitude distance is defined as + + .. math:: + d_{A}(f_i, f_j)=min_{\gamma \in \Gamma}d_{FR}(f_i \circ \gamma,f_j) + + A penalty term could be added to restrict the ammount of elasticity in the + alignment used. + + .. math:: + d_{\lambda}^2(f_i, f_j) =min_{\gamma \in \Gamma} \{ + d_{FR}^2(f_i \circ \gamma, f_j) + \lambda \mathcal{R}(\gamma) \} + + + Where :math:`d_{FR}` is the Fisher-Rao distance and the penalty term is + given by + + .. math:: + \mathcal{R}(\gamma) = \|\sqrt{\dot{\gamma}}- 1 \|_{\mathbb{L}^2}^2 + + See [SK16-4-10-1]_ for a detailed explanation. + + If the observations are defined in a :term:`domain` different than (0,1) + their domains are normalized to this interval with an affine + transformation. + + Args: + fdata1: First FData object. + fdata2: Second FData object. + lam: Penalty term to restric the elasticity. + eval_points: Array with points of evaluation. + kwargs: Name arguments to be passed to + :func:`elastic_registration_warping`. + + Returns: + Elastic distance. + + Raises: + ValueError: If the objects are not unidimensional. + + References: + .. [SK16-4-10-1] Srivastava, Anuj & Klassen, Eric P. (2016). + Functional and shape data analysis. In *Amplitude Space and a + Metric Structure* (pp. 107-109). Springer. + """ + fdata1, fdata2 = _cast_to_grid( + fdata1, + fdata2, + eval_points=eval_points, + _check=_check, + ) + + # Both should have the same grid points + eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) + + # Calculate the corresponding srsf and normalize to (0,1) + fdata1 = fdata1.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) + fdata2 = fdata2.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) + + elastic_registration = ElasticRegistration( + template=fdata2, + penalty=lam, + output_points=eval_points_normalized, + **kwargs, + ) + + fdata1_reg = elastic_registration.fit_transform(fdata1) + + srsf = SRSF(initial_value=0) + fdata1_reg_srsf = srsf.fit_transform(fdata1_reg) + fdata2_srsf = srsf.transform(fdata2) + distance = l2_distance(fdata1_reg_srsf, fdata2_srsf) + + if lam != 0.0: + # L2 norm || sqrt(Dh) - 1 ||^2 + warping_deriv = elastic_registration.warping_.derivative() + penalty = warping_deriv(eval_points_normalized)[0, ..., 0] + penalty = np.sqrt(penalty, out=penalty) + penalty -= 1 + penalty = np.square(penalty, out=penalty) + penalty = scipy.integrate.simps(penalty, x=eval_points_normalized) + + distance = np.sqrt(distance**2 + lam * penalty) + + return distance + + +def phase_distance( + fdata1: T, + fdata2: T, + *, + lam: float = 0.0, + eval_points: np.ndarray = None, + _check: bool = True, +) -> np.ndarray: + r"""Compute the phase distance between two functional objects. + + Let :math:`f_i` and :math:`f_j` be two functional observations, and let + :math:`\gamma_{ij}` the corresponding warping used in the elastic + registration to align :math:`f_i` to :math:`f_j` (see + :func:`elastic_registration`). The phase distance between :math:`f_i` + and :math:`f_j` is defined as + + .. math:: + d_{P}(f_i, f_j) = d_{FR}(\gamma_{ij}, \gamma_{id}) = + arcos \left ( \int_0^1 \sqrt {\dot \gamma_{ij}(t)} dt \right ) + + See [SK16-4-10-2]_ for a detailed explanation. + + If the observations are defined in a :term:`domain` different than (0,1) + their domains are normalized to this interval with an affine + transformation. + + Args: + fdata1: First FData object. + fdata2: Second FData object. + lam: Penalty term to restric the elasticity. + eval_points (array_like, optional): Array with points of evaluation. + + Returns: + Phase distance between the objects. + + Raises: + ValueError: If the objects are not unidimensional. + + References: + .. [SK16-4-10-2] Srivastava, Anuj & Klassen, Eric P. (2016). + Functional and shape data analysis. In *Phase Space and a Metric + Structure* (pp. 109-111). Springer. + """ + fdata1, fdata2 = _cast_to_grid( + fdata1, + fdata2, + eval_points=eval_points, + _check=_check, + ) + + # Rescale in the interval (0,1) + eval_points_normalized = _normalize_scale(fdata1.grid_points[0]) + + # Calculate the corresponding srsf and normalize to (0,1) + fdata1 = fdata1.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) + fdata2 = fdata2.copy( + grid_points=eval_points_normalized, + domain_range=(0, 1), + ) + + elastic_registration = ElasticRegistration( + penalty=lam, + template=fdata2, + output_points=eval_points_normalized, + ) + + elastic_registration.fit_transform(fdata1) + + warping_deriv = elastic_registration.warping_.derivative() + derivative_warping = warping_deriv(eval_points_normalized)[0, ..., 0] + + derivative_warping = np.sqrt(derivative_warping, out=derivative_warping) + + d = scipy.integrate.simps(derivative_warping, x=eval_points_normalized) + d = np.clip(d, -1, 1) + + return np.arccos(d) + + +def warping_distance( + warping1: T, + warping2: T, + *, + eval_points: np.ndarray = None, + _check: bool = True, +) -> np.ndarray: + r"""Compute the distance between warpings functions. + + Let :math:`\gamma_i` and :math:`\gamma_j` be two warpings, defined in + :math:`\gamma_i:[a,b] \rightarrow [a,b]`. The distance in the + space of warping functions, :math:`\Gamma`, with the riemannian metric + given by the fisher-rao inner product can be computed using the structure + of hilbert sphere in their srsf's. + + .. math:: + d_{\Gamma}(\gamma_i, \gamma_j) = cos^{-1} \left ( \int_0^1 + \sqrt{\dot \gamma_i(t)\dot \gamma_j(t)}dt \right ) + + See [SK16-4-11-2]_ for a detailed explanation. + + If the warpings are not defined in [0,1], an affine transformation is maked + to change the :term:`domain`. + + Args: + warping1: First warping. + warping2: Second warping. + eval_points: Array with points of evaluation. + + Returns: + Distance between warpings: + + Raises: + ValueError: If the objects are not unidimensional. + + References: + .. [SK16-4-11-2] Srivastava, Anuj & Klassen, Eric P. (2016). + Functional and shape data analysis. In *Probability Density + Functions* (pp. 113-117). Springer. + + """ + warping1, warping2 = _cast_to_grid( + warping1, + warping2, + eval_points=eval_points, + _check=_check, + ) + + # Normalization of warping to (0,1)x(0,1) + warping1 = normalize_warping(warping1, (0, 1)) + warping2 = normalize_warping(warping2, (0, 1)) + + warping1_data = warping1.derivative().data_matrix[0, ..., 0] + warping2_data = warping2.derivative().data_matrix[0, ..., 0] + + # Derivative approximations can have negatives, specially in the + # borders. + warping1_data[warping1_data < 0] = 0 + warping2_data[warping2_data < 0] = 0 + + # In this case the srsf is the sqrt(gamma') + srsf_warping1 = np.sqrt(warping1_data, out=warping1_data) + srsf_warping2 = np.sqrt(warping2_data, out=warping2_data) + + product = np.multiply(srsf_warping1, srsf_warping2, out=srsf_warping1) + + d = scipy.integrate.simps(product, x=warping1.grid_points[0]) + d = np.clip(d, -1, 1) + + return np.arccos(d) diff --git a/skfda/misc/metrics/_lp_distances.py b/skfda/misc/metrics/_lp_distances.py new file mode 100644 index 000000000..0c6e2d6f1 --- /dev/null +++ b/skfda/misc/metrics/_lp_distances.py @@ -0,0 +1,209 @@ +"""Implementation of Lp distances.""" + +import math +from typing import Optional, TypeVar, Union + +import numpy as np +from typing_extensions import Final + +from ...representation import FData +from ._lp_norms import LpNorm +from ._typing import Norm +from ._utils import NormInducedMetric, pairwise_metric_optimization + +T = TypeVar("T", bound=FData) + + +class LpDistance(NormInducedMetric[FData]): + r"""Lp distance for FDataGrid objects. + + Calculates the distance between two functional objects. + + For each pair of observations f and g the distance between them is defined + as: + + .. math:: + d(f, g) = d(g, f) = \| f - g \|_p + + where :math:`\| {}\cdot{} \|_p` denotes the :func:`Lp norm `. + + The objects ``l1_distance``, ``l2_distance`` and ``linf_distance`` are + instances of this class with commonly used values of ``p``, namely 1, 2 and + infinity. + + Args: + p: p of the lp norm. Must be greater or equal + than 1. If ``p=math.inf`` it is used the L infinity metric. + Defaults to 2. + vector_norm: vector norm to apply. If it is a float, is the index of + the multivariate lp norm. Defaults to the same as ``p``. + + Examples: + Computes the distances between an object containing functional data + corresponding to the functions y = 1 and y = x defined over the + interval [0, 1] and another ones containing data of the functions y + = 0 and y = x/2. The result then is an array 2x2 with the computed + l2 distance between every pair of functions. + + >>> import skfda + >>> import numpy as np + >>> + >>> x = np.linspace(0, 1, 1001) + >>> fd = skfda.FDataGrid([np.ones(len(x))], x) + >>> fd2 = skfda.FDataGrid([np.zeros(len(x))], x) + >>> + >>> distance = skfda.misc.metrics.LpDistance(p=2) + >>> distance(fd, fd2).round(2) + array([ 1.]) + + + If the functional data are defined over a different set of points of + discretisation the functions returns an exception. + + >>> x = np.linspace(0, 2, 1001) + >>> fd2 = skfda.FDataGrid([np.zeros(len(x)), x/2 + 0.5], x) + >>> distance = skfda.misc.metrics.LpDistance(p=2) + >>> distance(fd, fd2) + Traceback (most recent call last): + ... + ValueError: ... + + """ # noqa: P102 + + def __init__( + self, + p: float, + vector_norm: Union[Norm[np.ndarray], float, None] = None, + ) -> None: + + self.p = p + self.vector_norm = vector_norm + norm = LpNorm(p=p, vector_norm=vector_norm) + + super().__init__(norm) + + def __repr__(self) -> str: + return ( + f"{type(self).__name__}(" + f"p={self.p}, vector_norm={self.vector_norm})" + ) + + +l1_distance: Final = LpDistance(p=1) +l2_distance: Final = LpDistance(p=2) +linf_distance: Final = LpDistance(p=math.inf) + + +@pairwise_metric_optimization.register +def _pairwise_metric_optimization_lp_fdata( + metric: LpDistance, + elem1: FData, + elem2: Optional[FData], +) -> np.ndarray: + from ...misc import inner_product, inner_product_matrix + + vector_norm = metric.vector_norm + + if vector_norm is None: + vector_norm = metric.p + + # Special case, the inner product is heavily optimized + if metric.p == vector_norm == 2: + diag1 = inner_product(elem1, elem1) + diag2 = diag1 if elem2 is None else inner_product(elem2, elem2) + + if elem2 is None: + elem2 = elem1 + + inner_matrix = inner_product_matrix(elem1, elem2) + + distance_matrix_sqr = ( + -2 * inner_matrix + + diag1[:, np.newaxis] + + diag2[np.newaxis, :] + ) + + np.clip( + distance_matrix_sqr, + a_min=0, + a_max=None, + out=distance_matrix_sqr, + ) + + return np.sqrt(distance_matrix_sqr) + + return NotImplemented + + +def lp_distance( + fdata1: T, + fdata2: T, + *, + p: float, + vector_norm: Union[Norm[np.ndarray], float, None] = None, +) -> np.ndarray: + r""" + Lp distance for FDataGrid objects. + + Calculates the distance between two functional objects. + + For each pair of observations f and g the distance between them is defined + as: + + .. math:: + d(f, g) = d(g, f) = \| f - g \|_p + + where :math:`\| {}\cdot{} \|_p` denotes the :func:`Lp norm `. + + Note: + This function is a wrapper of :class:`LpDistance`, available only for + convenience. As the parameter ``p`` is mandatory, it cannot be used + where a fully-defined metric is required: use an instance of + :class:`LpDistance` in those cases. + + Args: + fdata1: First FData object. + fdata2: Second FData object. + p: p of the lp norm. Must be greater or equal + than 1. If ``p=math.inf`` it is used the L infinity metric. + Defaults to 2. + vector_norm: vector norm to apply. If it is a float, is the index of + the multivariate lp norm. Defaults to the same as ``p``. + + Returns: + Numpy vector where the i-th coordinate has the distance between the + i-th element of the first object and the i-th element of the second + one. + + Examples: + Computes the distances between an object containing functional data + corresponding to the functions y = 1 and y = x defined over the + interval [0, 1] and another ones containing data of the functions y + = 0 and y = x/2. The result then is an array 2x2 with the computed + l2 distance between every pair of functions. + + >>> import skfda + >>> import numpy as np + >>> + >>> x = np.linspace(0, 1, 1001) + >>> fd = skfda.FDataGrid([np.ones(len(x))], x) + >>> fd2 = skfda.FDataGrid([np.zeros(len(x))], x) + >>> + >>> skfda.misc.metrics.lp_distance(fd, fd2, p=2).round(2) + array([ 1.]) + + If the functional data are defined over a different set of points of + discretisation the functions returns an exception. + + >>> x = np.linspace(0, 2, 1001) + >>> fd2 = skfda.FDataGrid([np.zeros(len(x)), x/2 + 0.5], x) + >>> skfda.misc.metrics.lp_distance(fd, fd2, p=2) + Traceback (most recent call last): + ... + ValueError: ... + + See also: + :class:`~skfda.misc.metrics.LpDistance` + + """ # noqa: P102 + return LpDistance(p=p, vector_norm=vector_norm)(fdata1, fdata2) diff --git a/skfda/misc/metrics/_lp_norms.py b/skfda/misc/metrics/_lp_norms.py new file mode 100644 index 000000000..e2db76cda --- /dev/null +++ b/skfda/misc/metrics/_lp_norms.py @@ -0,0 +1,264 @@ +"""Implementation of Lp norms.""" +import math +from builtins import isinstance +from typing import Union + +import numpy as np +import scipy.integrate +from typing_extensions import Final + +from ...representation import FData, FDataBasis +from ._typing import Norm + + +class LpNorm(Norm[FData]): + r""" + Norm of all the observations in a FDataGrid object. + + For each observation f the Lp norm is defined as: + + .. math:: + \| f \| = \left( \int_D \| f \|^p dx \right)^{ + \frac{1}{p}} + + Where D is the :term:`domain` over which the functions are defined. + + The integral is approximated using Simpson's rule. + + In general, if f is a multivariate function :math:`(f_1, ..., f_d)`, and + :math:`D \subset \mathbb{R}^n`, it is applied the following generalization + of the Lp norm. + + .. math:: + \| f \| = \left( \int_D \| f \|_{*}^p dx \right)^{ + \frac{1}{p}} + + Where :math:`\| \cdot \|_*` denotes a vectorial norm. See + :func:`vectorial_norm` to more information. + + For example, if :math:`f: \mathbb{R}^2 \rightarrow \mathbb{R}^2`, and + :math:`\| \cdot \|_*` is the euclidean norm + :math:`\| (x,y) \|_* = \sqrt{x^2 + y^2}`, the lp norm applied is + + .. math:: + \| f \| = \left( \int \int_D \left ( \sqrt{ \| f_1(x,y) + \|^2 + \| f_2(x,y) \|^2 } \right )^p dxdy \right)^{ + \frac{1}{p}} + + The objects ``l1_norm``, ``l2_norm`` and ``linf_norm`` are instances of + this class with commonly used values of ``p``, namely 1, 2 and infinity. + + Args: + p: p of the lp norm. Must be greater or equal + than 1. If ``p=math.inf`` it is used the L infinity metric. + Defaults to 2. + vector_norm: vector norm to apply. If it is a float, is the index of + the multivariate lp norm. Defaults to the same as ``p``. + + Examples: + Calculates the norm of a FDataGrid containing the functions y = 1 + and y = x defined in the interval [0,1]. + + >>> import skfda + >>> import numpy as np + >>> + >>> x = np.linspace(0, 1, 1001) + >>> fd = skfda.FDataGrid([np.ones(len(x)), x] ,x) + >>> norm = skfda.misc.metrics.LpNorm(2) + >>> norm(fd).round(2) + array([ 1. , 0.58]) + + As the norm with `p=2` is a common choice, one can use `l2_norm` + directly: + + >>> skfda.misc.metrics.l2_norm(fd).round(2) + array([ 1. , 0.58]) + + The lp norm is only defined if p >= 1. + + >>> norm = skfda.misc.metrics.LpNorm(0.5) + Traceback (most recent call last): + .... + ValueError: p (=0.5) must be equal or greater than 1. + + """ + + def __init__( + self, + p: float, + vector_norm: Union[Norm[np.ndarray], float, None] = None, + ) -> None: + + # Checks that the lp normed is well defined + if not np.isinf(p) and p < 1: + raise ValueError(f"p (={p}) must be equal or greater than 1.") + + self.p = p + self.vector_norm = vector_norm + + def __repr__(self) -> str: + return ( + f"{type(self).__name__}(" + f"p={self.p}, vector_norm={self.vector_norm})" + ) + + def __call__(self, fdata: FData) -> np.ndarray: + """Compute the Lp norm of a functional data object.""" + from ...misc import inner_product + + vector_norm = self.vector_norm + + if vector_norm is None: + vector_norm = self.p + + # Special case, the inner product is heavily optimized + if self.p == vector_norm == 2: + return np.sqrt(inner_product(fdata, fdata)) + + if isinstance(fdata, FDataBasis): + if self.p != 2: + raise NotImplementedError + + start, end = fdata.domain_range[0] + integral = scipy.integrate.quad_vec( + lambda x: np.power(np.abs(fdata(x)), self.p), + start, + end, + ) + res = np.sqrt(integral[0]).flatten() + + else: + data_matrix = fdata.data_matrix + original_shape = data_matrix.shape + data_matrix = data_matrix.reshape(-1, original_shape[-1]) + + data_matrix = (np.linalg.norm( + fdata.data_matrix, + ord=vector_norm, + axis=-1, + keepdims=True, + ) if isinstance(vector_norm, (float, int)) + else vector_norm(data_matrix) + ) + data_matrix = data_matrix.reshape(original_shape[:-1] + (1,)) + + if np.isinf(self.p): + + res = np.max( + data_matrix, + axis=tuple(range(1, data_matrix.ndim)), + ) + + elif fdata.dim_domain == 1: + + # Computes the norm, approximating the integral with Simpson's + # rule. + res = scipy.integrate.simps( + data_matrix[..., 0] ** self.p, + x=fdata.grid_points, + ) ** (1 / self.p) + + else: + # Needed to perform surface integration + return NotImplemented + + if len(res) == 1: + return res[0] + + return res + + +l1_norm: Final = LpNorm(1) +l2_norm: Final = LpNorm(2) +linf_norm: Final = LpNorm(math.inf) + + +def lp_norm( + fdata: FData, + *, + p: float, + vector_norm: Union[Norm[np.ndarray], float, None] = None, +) -> np.ndarray: + r"""Calculate the norm of all the observations in a FDataGrid object. + + For each observation f the Lp norm is defined as: + + .. math:: + \| f \| = \left( \int_D \| f \|^p dx \right)^{ + \frac{1}{p}} + + Where D is the :term:`domain` over which the functions are defined. + + The integral is approximated using Simpson's rule. + + In general, if f is a multivariate function :math:`(f_1, ..., f_d)`, and + :math:`D \subset \mathbb{R}^n`, it is applied the following generalization + of the Lp norm. + + .. math:: + \| f \| = \left( \int_D \| f \|_{*}^p dx \right)^{ + \frac{1}{p}} + + Where :math:`\| \cdot \|_*` denotes a vectorial norm. See + :func:`vectorial_norm` to more information. + + For example, if :math:`f: \mathbb{R}^2 \rightarrow \mathbb{R}^2`, and + :math:`\| \cdot \|_*` is the euclidean norm + :math:`\| (x,y) \|_* = \sqrt{x^2 + y^2}`, the lp norm applied is + + .. math:: + \| f \| = \left( \int \int_D \left ( \sqrt{ \| f_1(x,y) + \|^2 + \| f_2(x,y) \|^2 } \right )^p dxdy \right)^{ + \frac{1}{p}} + + Note: + This function is a wrapper of :class:`LpNorm`, available only for + convenience. As the parameter ``p`` is mandatory, it cannot be used + where a fully-defined norm is required: use an instance of + :class:`LpNorm` in those cases. + + Args: + fdata: FData object. + p: p of the lp norm. Must be greater or equal + than 1. If ``p=math.inf`` it is used the L infinity metric. + Defaults to 2. + vector_norm: vector norm to apply. If it is a float, is the index of + the multivariate lp norm. Defaults to the same as ``p``. + + Returns: + numpy.darray: Matrix with as many rows as observations in the first + object and as many columns as observations in the second one. Each + element (i, j) of the matrix is the inner product of the ith + observation of the first object and the jth observation of the second + one. + + Examples: + Calculates the norm of a FDataGrid containing the functions y = 1 + and y = x defined in the interval [0,1]. + + >>> import skfda + >>> import numpy as np + >>> + >>> x = np.linspace(0,1,1001) + >>> fd = skfda.FDataGrid([np.ones(len(x)), x] ,x) + >>> skfda.misc.metrics.lp_norm(fd, p=2).round(2) + array([ 1. , 0.58]) + + As the norm with ``p=2`` is a common choice, one can use ``l2_norm`` + directly: + + >>> skfda.misc.metrics.l2_norm(fd).round(2) + array([ 1. , 0.58]) + + The lp norm is only defined if p >= 1. + + >>> skfda.misc.metrics.lp_norm(fd, p=0.5) + Traceback (most recent call last): + .... + ValueError: p (=0.5) must be equal or greater than 1. + + See also: + :class:`LpNorm` + + """ + return LpNorm(p=p, vector_norm=vector_norm)(fdata) diff --git a/skfda/misc/metrics/_typing.py b/skfda/misc/metrics/_typing.py new file mode 100644 index 000000000..1b3e76c84 --- /dev/null +++ b/skfda/misc/metrics/_typing.py @@ -0,0 +1,75 @@ +"""Typing for norms and metrics.""" +import enum +from abc import abstractmethod +from builtins import isinstance +from typing import Any, TypeVar, Union, overload + +import numpy as np +from typing_extensions import Final, Literal, Protocol + +from ...representation._typing import Vector + +VectorType = TypeVar("VectorType", contravariant=True, bound=Vector) +MetricElementType = TypeVar("MetricElementType", contravariant=True) + + +class _MetricSingletons(enum.Enum): + PRECOMPUTED = "precomputed" + + +PRECOMPUTED: Final = _MetricSingletons.PRECOMPUTED + +_PrecomputedTypes = Literal[ + _MetricSingletons.PRECOMPUTED, + "precomputed", +] + + +class Norm(Protocol[VectorType]): + """Protocol for a norm of a vector.""" + + @abstractmethod + def __call__(self, __vector: VectorType) -> np.ndarray: # noqa: WPS112 + """Compute the norm of a vector.""" + + +class Metric(Protocol[MetricElementType]): + """Protocol for a metric between two elements of a metric space.""" + + @abstractmethod + def __call__( + self, + __e1: MetricElementType, # noqa: WPS112 + __e2: MetricElementType, # noqa: WPS112 + ) -> np.ndarray: + """Compute the norm of a vector.""" + + +_NonStringMetric = TypeVar( + "_NonStringMetric", + bound=Union[ + Metric[Any], + _MetricSingletons, + ], +) + + +@overload +def _parse_metric( + metric: str, +) -> _MetricSingletons: + pass + + +@overload +def _parse_metric( + metric: _NonStringMetric, +) -> _NonStringMetric: + pass + + +def _parse_metric( + metric: Union[Metric[Any], _MetricSingletons, str], +) -> Union[Metric[Any], _MetricSingletons]: + + return _MetricSingletons(metric) if isinstance(metric, str) else metric diff --git a/skfda/misc/metrics/_utils.py b/skfda/misc/metrics/_utils.py new file mode 100644 index 000000000..cced983ac --- /dev/null +++ b/skfda/misc/metrics/_utils.py @@ -0,0 +1,187 @@ +"""Utilities for norms and metrics.""" +from typing import Any, Generic, Optional, Tuple, TypeVar + +import multimethod +import numpy as np + +from ..._utils import _pairwise_symmetric +from ...representation import FData, FDataGrid +from ._typing import Metric, MetricElementType, Norm, VectorType + +T = TypeVar("T", bound=FData) + + +def _check_compatible(fdata1: T, fdata2: T) -> None: + + if isinstance(fdata1, FData) and isinstance(fdata2, FData): + if ( + fdata2.dim_codomain != fdata1.dim_codomain + or fdata2.dim_domain != fdata1.dim_domain + ): + raise ValueError("Objects should have the same dimensions") + + if not np.array_equal(fdata1.domain_range, fdata2.domain_range): + raise ValueError("Domain ranges for both objects must be equal") + + +def _cast_to_grid( + fdata1: FData, + fdata2: FData, + eval_points: np.ndarray = None, + _check: bool = True, +) -> Tuple[FDataGrid, FDataGrid]: + """Convert fdata1 and fdata2 to FDatagrid. + + Checks if the fdatas passed as argument are unidimensional and compatible + and converts them to FDatagrid to compute their distances. + + Args: + fdata1: First functional object. + fdata2: Second functional object. + eval_points: Evaluation points. + + Returns: + Tuple with two :obj:`FDataGrid` with the same grid points. + """ + # Dont perform any check + if not _check: + return fdata1, fdata2 + + _check_compatible(fdata1, fdata2) + + # Case new evaluation points specified + if eval_points is not None: # noqa: WPS223 + fdata1 = fdata1.to_grid(eval_points) + fdata2 = fdata2.to_grid(eval_points) + + elif not isinstance(fdata1, FDataGrid) and isinstance(fdata2, FDataGrid): + fdata1 = fdata1.to_grid(fdata2.grid_points[0]) + + elif not isinstance(fdata2, FDataGrid) and isinstance(fdata1, FDataGrid): + fdata2 = fdata2.to_grid(fdata1.grid_points[0]) + + elif ( + not isinstance(fdata1, FDataGrid) + and not isinstance(fdata2, FDataGrid) + ): + domain = fdata1.domain_range[0] + grid_points = np.linspace(*domain) + fdata1 = fdata1.to_grid(grid_points) + fdata2 = fdata2.to_grid(grid_points) + + elif not np.array_equal( + fdata1.grid_points, + fdata2.grid_points, + ): + raise ValueError( + "Grid points for both objects must be equal or" + "a new list evaluation points must be specified", + ) + + return fdata1, fdata2 + + +class NormInducedMetric(Metric[VectorType]): + r""" + Metric induced by a norm. + + Given a norm :math:`\| \cdot \|: X \rightarrow \mathbb{R}`, + returns the metric :math:`d: X \times X \rightarrow \mathbb{R}` induced + by the norm: + + .. math:: + d(f,g) = \|f - g\| + + Args: + norm: Norm used to induce the metric. + + Examples: + Computes the :math:`\mathbb{L}^2` distance between an object containing + functional data corresponding to the function :math:`y(x) = x` defined + over the interval [0, 1] and another one containing data of the + function :math:`y(x) = x/2`. + + Firstly we create the functional data. + + >>> import skfda + >>> import numpy as np + >>> from skfda.misc.metrics import l2_norm, NormInducedMetric + >>> + >>> x = np.linspace(0, 1, 1001) + >>> fd = skfda.FDataGrid([x], x) + >>> fd2 = skfda.FDataGrid([x/2], x) + + To construct the :math:`\mathbb{L}^2` distance it is used the + :math:`\mathbb{L}^2` norm wich it is used to compute the distance. + + >>> l2_distance = NormInducedMetric(l2_norm) + >>> d = l2_distance(fd, fd2) + >>> float('%.3f'% d) + 0.289 + + """ + + def __init__(self, norm: Norm[VectorType]): + self.norm = norm + + def __call__(self, elem1: VectorType, elem2: VectorType) -> np.ndarray: + """Compute the induced norm between two vectors.""" + return self.norm(elem1 - elem2) + + def __repr__(self) -> str: + return f"{type(self).__name__}(norm={self.norm})" + + +@multimethod.multidispatch +def pairwise_metric_optimization( + metric: Any, + elem1: Any, + elem2: Optional[Any], +) -> np.ndarray: + r""" + Optimized computation of a pairwise metric. + + This is a generic function that can be subclassed for different + combinations of metric and operators in order to provide a more + efficient implementation for the pairwise metric matrix. + """ + return NotImplemented + + +class PairwiseMetric(Generic[MetricElementType]): + r"""Pairwise metric function. + + Computes a given metric pairwise. The matrix returned by the pairwise + metric is a matrix with as many rows as observations in the first object + and as many columns as observations in the second one. Each element + (i, j) of the matrix is the distance between the ith observation of the + first object and the jth observation of the second one. + + Args: + metric: Metric between two elements of a metric + space. + + """ + + def __init__( + self, + metric: Metric[MetricElementType], + ): + self.metric = metric + + def __call__( + self, + elem1: MetricElementType, + elem2: Optional[MetricElementType] = None, + ) -> np.ndarray: + """Evaluate the pairwise metric.""" + optimized = pairwise_metric_optimization(self.metric, elem1, elem2) + + return ( + _pairwise_symmetric(self.metric, elem1, elem2) + if optimized is NotImplemented + else optimized + ) + + def __repr__(self) -> str: + return f"{type(self).__name__}(metric={self.metric})" diff --git a/skfda/ml/clustering/_hierarchical.py b/skfda/ml/clustering/_hierarchical.py index 45233a780..09fbcfaac 100644 --- a/skfda/ml/clustering/_hierarchical.py +++ b/skfda/ml/clustering/_hierarchical.py @@ -1,91 +1,123 @@ -from typing import Any +from __future__ import annotations +import enum +from typing import Callable, Generic, Optional, TypeVar, Union + +import joblib import numpy as np import sklearn.cluster from sklearn.base import BaseEstimator, ClusterMixin +from typing_extensions import Literal -from ...misc.metrics import l2_distance, pairwise_distance +from ...misc.metrics import PRECOMPUTED, Metric, PairwiseMetric, l2_distance +from ...misc.metrics._typing import _parse_metric, _PrecomputedTypes from ...representation import FData +kk = ["ward", "average", "complete"] + +MetricElementType = TypeVar( + "MetricElementType", + contravariant=True, + bound=FData, +) + + +class LinkageCriterion(enum.Enum): + # WARD = "ward" Not until + # https://github.com/scikit-learn/scikit-learn/issues/15287 is solved + COMPLETE = "complete" + AVERAGE = "average" + SINGLE = "single" + + +LinkageCriterionLike = Union[ + LinkageCriterion, + Literal["ward", "complete", "average", "single"] +] + -class AgglomerativeClustering(ClusterMixin, BaseEstimator): # type: ignore +class AgglomerativeClustering( + ClusterMixin, # type: ignore + BaseEstimator, # type: ignore + Generic[MetricElementType], +): """ Agglomerative Clustering + Recursively merges the pair of clusters that minimally increases a given linkage distance. - Read more in the :ref:`User Guide `. - Parameters - ---------- - n_clusters : int or None, default=2 - The number of clusters to find. It must be ``None`` if - ``distance_threshold`` is not ``None``. - metric : str or callable, default='euclidean' - Metric used to compute the linkage. Can be "euclidean", "l1", "l2", - "manhattan", "cosine", or "precomputed". - If linkage is "ward", only "euclidean" is accepted. - If "precomputed", a distance matrix (instead of a similarity matrix) - is needed as input for the fit method. - memory : str or object with the joblib.Memory interface, default=None - Used to cache the output of the computation of the tree. - By default, no caching is done. If a string is given, it is the - path to the caching directory. - connectivity : array-like or callable, default=None - Connectivity matrix. Defines for each sample the neighboring - samples following a given structure of the data. - This can be a connectivity matrix itself or a callable that transforms - the data into a connectivity matrix, such as derived from - kneighbors_graph. Default is None, i.e, the - hierarchical clustering algorithm is unstructured. - compute_full_tree : 'auto' or bool, default='auto' - Stop early the construction of the tree at n_clusters. This is useful - to decrease computation time if the number of clusters is not small - compared to the number of samples. This option is useful only when - specifying a connectivity matrix. Note also that when varying the - number of clusters and using caching, it may be advantageous to compute - the full tree. It must be ``True`` if ``distance_threshold`` is not - ``None``. By default `compute_full_tree` is "auto", which is equivalent - to `True` when `distance_threshold` is not `None` or that `n_clusters` - is inferior to the maximum between 100 or `0.02 * n_samples`. - Otherwise, "auto" is equivalent to `False`. - linkage : {"ward", "complete", "average", "single"}, default="ward" - Which linkage criterion to use. The linkage criterion determines which - distance to use between sets of observation. The algorithm will merge - the pairs of cluster that minimize this criterion. - - ward minimizes the variance of the clusters being merged. - - average uses the average of the distances of each observation of - the two sets. - - complete or maximum linkage uses the maximum distances between - all observations of the two sets. - - single uses the minimum of the distances between all observations - of the two sets. - .. versionadded:: 0.20 - Added the 'single' option - distance_threshold : float, default=None - The linkage distance threshold above which, clusters will not be - merged. If not ``None``, ``n_clusters`` must be ``None`` and - ``compute_full_tree`` must be ``True``. - .. versionadded:: 0.21 - Attributes - ---------- - n_clusters_ : int - The number of clusters found by the algorithm. If - ``distance_threshold=None``, it will be equal to the given - ``n_clusters``. - labels_ : ndarray of shape (n_samples) - cluster labels for each point - n_leaves_ : int - Number of leaves in the hierarchical tree. - n_connected_components_ : int - The estimated number of connected components in the graph. - .. versionadded:: 0.21 - ``n_connected_components_`` was added to replace ``n_components_``. - children_ : array-like of shape (n_samples-1, 2) - The children of each non-leaf node. Values less than `n_samples` - correspond to leaves of the tree which are the original samples. - A node `i` greater than or equal to `n_samples` is a non-leaf - node and has children `children_[i - n_samples]`. Alternatively - at the i-th iteration, children[i][0] and children[i][1] - are merged to form node `n_samples + i` + + Notes: + This class is an extension of + :class:`sklearn.cluster.AgglomerativeClustering` that accepts + functional data objects and metrics. Please check also the + documentation of the original class. + + Parameters: + n_clusters: + The number of clusters to find. It must be ``None`` if + ``distance_threshold`` is not ``None``. + metric: + Metric used to compute the linkage. + If it is ``skfda.misc.metrics.PRECOMPUTED`` or the string + ``"precomputed"``, a distance matrix (instead of a similarity + matrix) is needed as input for the fit method. + memory: + Used to cache the output of the computation of the tree. + By default, no caching is done. If a string is given, it is the + path to the caching directory. + connectivity: + Connectivity matrix. Defines for each sample the neighboring + samples following a given structure of the data. + This can be a connectivity matrix itself or a callable that + transforms the data into a connectivity matrix, such as derived + from kneighbors_graph. Default is None, i.e, the + hierarchical clustering algorithm is unstructured. + compute_full_tree: + Stop early the construction of the tree at n_clusters. This is + useful to decrease computation time if the number of clusters + is not small compared to the number of samples. This option is + useful only when specifying a connectivity matrix. Note also + that when varying the number of clusters and using caching, it + may be advantageous to compute the full tree. It must be ``True`` + if ``distance_threshold`` is not ``None``. By default + `compute_full_tree` is "auto", which is equivalent to `True` when + `distance_threshold` is not `None` or that `n_clusters` is + inferior to the maximum between 100 or `0.02 * n_samples`. + Otherwise, "auto" is equivalent to `False`. + linkage: + Which linkage criterion to use. The linkage criterion determines + which distance to use between sets of observation. The algorithm + will merge the pairs of cluster that minimize this criterion. + - average uses the average of the distances of each observation of + the two sets. + - complete or maximum linkage uses the maximum distances between + all observations of the two sets. + - single uses the minimum of the distances between all observations + of the two sets. + distance_threshold: + The linkage distance threshold above which, clusters will not be + merged. If not ``None``, ``n_clusters`` must be ``None`` and + ``compute_full_tree`` must be ``True``. + + Attributes: + n_clusters_: + The number of clusters found by the algorithm. If + ``distance_threshold=None``, it will be equal to the given + ``n_clusters``. + labels_: + cluster labels for each point + n_leaves_: + Number of leaves in the hierarchical tree. + n_connected_components_: + The estimated number of connected components in the graph. + children_ : + The children of each non-leaf node. Values less than `n_samples` + correspond to leaves of the tree which are the original samples. + A node `i` greater than or equal to `n_samples` is a non-leaf + node and has children `children_[i - n_samples]`. Alternatively + at the i-th iteration, children[i][0] and children[i][1] + are merged to form node `n_samples + i` Examples: @@ -95,23 +127,34 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): # type: ignore >>> data_matrix = np.array([[1, 2], [1, 4], [1, 0], ... [4, 2], [4, 4], [4, 0]]) >>> X = FDataGrid(data_matrix) - >>> clustering = AgglomerativeClustering().fit(X) - >>> clustering - AgglomerativeClustering() + >>> clustering = AgglomerativeClustering( + ... linkage=AgglomerativeClustering.LinkageCriterion.COMPLETE, + ... ) + >>> clustering.fit(X) + AgglomerativeClustering(...) >>> clustering.labels_ array([0, 0, 1, 0, 0, 1], dtype=int64) """ + LinkageCriterion = LinkageCriterion + def __init__( self, - n_clusters: int = 2, + n_clusters: Optional[int] = 2, *, - metric=l2_distance, - memory=None, - connectivity=None, - compute_full_tree='auto', - linkage='complete', - distance_threshold=None + metric: Union[ + Metric[MetricElementType], + _PrecomputedTypes, + ] = l2_distance, + memory: Union[str, joblib.Memory, None] = None, + connectivity: Union[ + np.ndarray, + Callable[[MetricElementType], np.ndarray], + None, + ] = None, + compute_full_tree: Union[Literal['auto'], bool] = 'auto', + linkage: LinkageCriterionLike, + distance_threshold: Optional[float] = None, ) -> None: self.n_clusters = n_clusters self.metric = metric @@ -122,32 +165,38 @@ def __init__( self.distance_threshold = distance_threshold def _init_estimator(self) -> None: + linkage = LinkageCriterion(self.linkage) + self._estimator = sklearn.cluster.AgglomerativeClustering( n_clusters=self.n_clusters, affinity='precomputed', memory=self.memory, connectivity=self.connectivity, compute_full_tree=self.compute_full_tree, - linkage=self.linkage, + linkage=linkage.value, distance_threshold=self.distance_threshold, ) - def fit(self, X: FData, y: Any = None) -> 'AgglomerativeClustering': + def fit(self, X: MetricElementType, y: None = None) -> AgglomerativeClustering: self._init_estimator() - if self.metric != 'precomputed': - data = pairwise_distance(self.metric)(X) + metric = _parse_metric(self.metric) + + if metric is not PRECOMPUTED: + data = PairwiseMetric(metric)(X) self._estimator.fit(data, y) return self - def fit_predict(self, X, y=None): + def fit_predict(self, X: MetricElementType, y: None = None) -> np.ndarray: self._init_estimator() - if self.metric != 'precomputed': - data = pairwise_distance(self.metric)(X) + metric = _parse_metric(self.metric) + + if metric is not PRECOMPUTED: + data = PairwiseMetric(metric)(X) return self._estimator.fit_predict(data, y) From 647262bafa4362fd3dec56e1199e6a43172c6e77 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Fri, 22 Jan 2021 19:36:31 +0100 Subject: [PATCH 044/417] Improve docs. --- docs/modules/ml/clustering.rst | 20 +++++++ skfda/ml/clustering/_hierarchical.py | 84 ++++++++++++++++++---------- 2 files changed, 74 insertions(+), 30 deletions(-) diff --git a/docs/modules/ml/clustering.rst b/docs/modules/ml/clustering.rst index 3bdb59647..9101dd5a1 100644 --- a/docs/modules/ml/clustering.rst +++ b/docs/modules/ml/clustering.rst @@ -34,3 +34,23 @@ searches. :toctree: autosummary skfda.ml.clustering.NearestNeighbors + +Hierarchical clustering +----------------------- + +Hierarchical clusterings are constructed by iteratively merging or splitting +clusters given a metric between their elements, in order to cluster together +elements that are close from each other. This is repeated until a desired +number of clusters is obtained. The resulting hierarchy of clusters can be +represented as a tree, called a dendogram. The following hierarchical +clusterings are supported: + +.. autosummary:: + :toctree: autosummary + + skfda.ml.clustering.AgglomerativeClustering + +.. autosummary:: + :toctree: autosummary + + skfda.ml.clustering.Agglomerative clustering diff --git a/skfda/ml/clustering/_hierarchical.py b/skfda/ml/clustering/_hierarchical.py index 09fbcfaac..0f7230412 100644 --- a/skfda/ml/clustering/_hierarchical.py +++ b/skfda/ml/clustering/_hierarchical.py @@ -21,8 +21,17 @@ bound=FData, ) +MetricOrPrecomputed = Union[Metric[MetricElementType], _PrecomputedTypes] +Connectivity = Union[ + np.ndarray, + Callable[[MetricElementType], np.ndarray], + None, +] + class LinkageCriterion(enum.Enum): + """Linkage criterion to use in :class:`AgglomerativeClustering`.""" + # WARD = "ward" Not until # https://github.com/scikit-learn/scikit-learn/issues/15287 is solved COMPLETE = "complete" @@ -32,17 +41,17 @@ class LinkageCriterion(enum.Enum): LinkageCriterionLike = Union[ LinkageCriterion, - Literal["ward", "complete", "average", "single"] + Literal["ward", "complete", "average", "single"], ] -class AgglomerativeClustering( - ClusterMixin, # type: ignore - BaseEstimator, # type: ignore - Generic[MetricElementType], +class AgglomerativeClustering( # noqa: WPS230 + ClusterMixin, # type: ignore + BaseEstimator, # type: ignore + Generic[MetricElementType], ): - """ - Agglomerative Clustering + r""" + Agglomerative Clustering. Recursively merges the pair of clusters that minimally increases a given linkage distance. @@ -88,7 +97,8 @@ class AgglomerativeClustering( linkage: Which linkage criterion to use. The linkage criterion determines which distance to use between sets of observation. The algorithm - will merge the pairs of cluster that minimize this criterion. + will merge the pairs of clusters that minimize this criterion. + - average uses the average of the distances of each observation of the two sets. - complete or maximum linkage uses the maximum distances between @@ -101,17 +111,17 @@ class AgglomerativeClustering( ``compute_full_tree`` must be ``True``. Attributes: - n_clusters_: + n_clusters\_: The number of clusters found by the algorithm. If ``distance_threshold=None``, it will be equal to the given ``n_clusters``. - labels_: + labels\_: cluster labels for each point - n_leaves_: + n_leaves\_: Number of leaves in the hierarchical tree. - n_connected_components_: + n_connected_components\_: The estimated number of connected components in the graph. - children_ : + children\_ : The children of each non-leaf node. Values less than `n_samples` correspond to leaves of the tree which are the original samples. A node `i` greater than or equal to `n_samples` is a non-leaf @@ -119,8 +129,8 @@ class AgglomerativeClustering( at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_samples + i` - Examples: + Examples: >>> from skfda import FDataGrid >>> from skfda.ml.clustering import AgglomerativeClustering >>> import numpy as np @@ -132,8 +142,8 @@ class AgglomerativeClustering( ... ) >>> clustering.fit(X) AgglomerativeClustering(...) - >>> clustering.labels_ - array([0, 0, 1, 0, 0, 1], dtype=int64) + >>> clustering.labels_.astype(np.int_) + array([0, 0, 1, 0, 0, 1]) """ LinkageCriterion = LinkageCriterion @@ -142,16 +152,9 @@ def __init__( self, n_clusters: Optional[int] = 2, *, - metric: Union[ - Metric[MetricElementType], - _PrecomputedTypes, - ] = l2_distance, + metric: MetricOrPrecomputed[MetricElementType] = l2_distance, memory: Union[str, joblib.Memory, None] = None, - connectivity: Union[ - np.ndarray, - Callable[[MetricElementType], np.ndarray], - None, - ] = None, + connectivity: Connectivity[MetricElementType] = None, compute_full_tree: Union[Literal['auto'], bool] = 'auto', linkage: LinkageCriterionLike, distance_threshold: Optional[float] = None, @@ -177,7 +180,20 @@ def _init_estimator(self) -> None: distance_threshold=self.distance_threshold, ) - def fit(self, X: MetricElementType, y: None = None) -> AgglomerativeClustering: + def _copy_attrs(self) -> None: + self.n_clusters_: int = self._estimator.n_clusters_ + self.labels_: np.ndarray = self._estimator.labels_ + self.n_leaves_: int = self._estimator.n_leaves_ + self.n_connected_components_: int = ( + self._estimator.n_connected_components_ + ) + self.children_: np.ndarray = self._estimator.children_ + + def fit( # noqa: D102 + self, + X: MetricElementType, + y: None = None, + ) -> AgglomerativeClustering[MetricElementType]: self._init_estimator() @@ -187,9 +203,16 @@ def fit(self, X: MetricElementType, y: None = None) -> AgglomerativeClustering: data = PairwiseMetric(metric)(X) self._estimator.fit(data, y) + + self._copy_attrs() + return self - def fit_predict(self, X: MetricElementType, y: None = None) -> np.ndarray: + def fit_predict( # noqa: D102 + self, + X: MetricElementType, + y: None = None, + ) -> np.ndarray: self._init_estimator() @@ -198,7 +221,8 @@ def fit_predict(self, X: MetricElementType, y: None = None) -> np.ndarray: if metric is not PRECOMPUTED: data = PairwiseMetric(metric)(X) - return self._estimator.fit_predict(data, y) + predicted = self._estimator.fit_predict(data, y) + + self._copy_attrs() - def __getattr__(self, attr): - return getattr(self._estimator, attr) + return predicted From 7463be1597472b255f8d94b04603e90a13f36c66 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 26 Jan 2021 18:53:22 +0100 Subject: [PATCH 045/417] First implementation of Finite Element basis. --- skfda/representation/basis/__init__.py | 1 + skfda/representation/basis/_basis.py | 4 +- skfda/representation/basis/_finite_element.py | 128 ++++++++++++++++++ skfda/representation/basis/_tensor_basis.py | 4 - 4 files changed, 132 insertions(+), 5 deletions(-) create mode 100644 skfda/representation/basis/_finite_element.py diff --git a/skfda/representation/basis/__init__.py b/skfda/representation/basis/__init__.py index 7b2fa39e7..28c891554 100644 --- a/skfda/representation/basis/__init__.py +++ b/skfda/representation/basis/__init__.py @@ -3,6 +3,7 @@ from ._coefficients_transformer import CoefficientsTransformer from ._constant import Constant from ._fdatabasis import FDataBasis, FDataBasisDType +from ._finite_element import FiniteElement from ._fourier import Fourier from ._monomial import Monomial from ._tensor_basis import Tensor diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index db17e34a0..45fe9fd15 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -78,7 +78,9 @@ def __call__( @property def dim_domain(self) -> int: - return 1 + if self._domain_range is None: + return 1 + return len(self._domain_range) @property def dim_codomain(self) -> int: diff --git a/skfda/representation/basis/_finite_element.py b/skfda/representation/basis/_finite_element.py new file mode 100644 index 000000000..43de6a370 --- /dev/null +++ b/skfda/representation/basis/_finite_element.py @@ -0,0 +1,128 @@ +from typing import Optional, Tuple, TypeVar + +import numpy as np +import scipy.linalg + +from .._typing import DomainRangeLike +from ._basis import Basis + +T = TypeVar("T", bound='FiniteElement') + + +class FiniteElement(Basis): + """Finite element basis. + + Given a n-dimensional grid made of simplices, each element of the basis + is a piecewise linear function that takes the value 1 at exactly one + vertex and 0 in the other vertices. + + Parameters: + vertices: The vertices of the grid. + cells: A list of individual cells, consisting in the indexes of + :math:`n+1` vertices for an n-dimensional domain space. + + Examples: + + >>> basis = FiniteElement( + ... vertices=[[0, 0], [0, 1], [1, 0], [1, 1]], + ... cells=[[0, 1, 2], [1, 2, 3]], + ... domain_range=[(0, 1), (0, 1)], + ... ) + + Evaluates all the functions in the basis in a list of discrete + values. + + >>> basis([[0.1, 0.1], [0.6, 0.6], [0.1, 0.2], [0.8, 0.9]]) + array([[[ 0.8], + [ 0. ], + [ 0.7], + [ 0. ]], + [[ 0.1], + [ 0.4], + [ 0.2], + [ 0.2]], + [[ 0.1], + [ 0.4], + [ 0.1], + [ 0.1]], + [[ 0. ], + [ 0.2], + [ 0. ], + [ 0.7]]]) + + """ + + def __init__( + self, + vertices: np.ndarray, + cells: np.ndarray, + domain_range: Optional[DomainRangeLike] = None, + )-> None: + Basis.__init__(self, domain_range=domain_range, n_basis=len(vertices)) + self.vertices = np.asarray(vertices) + self.cells = np.asarray(cells) + + def _barycentric_coords(self, points: np.ndarray) -> np.ndarray: + """ + Find the barycentric coordinates of each point in each cell. + + Only works for simplex cells. + + """ + cell_coordinates = self.vertices[self.cells] + + cartesian_matrix = np.append( + cell_coordinates, + np.ones(cell_coordinates.shape[:-1] + (1,)), + axis=-1, + ) + + cartesian_vector = np.append( + points, + np.ones(points.shape[:-1] + (1,)), + axis=-1, + ) + + coords = np.linalg.solve( + np.swapaxes(cartesian_matrix, -2, -1), + cartesian_vector.T[np.newaxis, ...], + ) + + return np.swapaxes(coords, -2, -1) + + def _cell_points_values(self, points: np.ndarray) -> np.ndarray: + """ + Compute the values of each point in each of the vertices of each cell. + + Only works for simplex cells. + + """ + barycentric_coords = self._barycentric_coords(points) + + # Remove values outside each cell + wrong_vals = np.any( + (barycentric_coords < 0) | (barycentric_coords > 1), + axis=-1, + ) + + barycentric_coords[wrong_vals] = 0 + + points_in_cells = np.any(barycentric_coords, axis=-1) + n_cells_per_point = np.sum(points_in_cells, axis=0) + + barycentric_coords /= n_cells_per_point[:, np.newaxis] + + return barycentric_coords + + def _evaluate(self, eval_points: np.ndarray) -> np.ndarray: + + points_values_per_cell = self._cell_points_values(eval_points) + + cell_points_values = np.swapaxes(points_values_per_cell, -2, -1) + cell_points_values = cell_points_values.reshape(-1, len(eval_points)) + indexes = self.cells.ravel() + + eval_matrix = np.zeros((self.n_basis, len(eval_points))) + np.add.at(eval_matrix, indexes, cell_points_values) + + return eval_matrix diff --git a/skfda/representation/basis/_tensor_basis.py b/skfda/representation/basis/_tensor_basis.py index ae625a1a9..8d6a07191 100644 --- a/skfda/representation/basis/_tensor_basis.py +++ b/skfda/representation/basis/_tensor_basis.py @@ -80,10 +80,6 @@ def __init__(self, basis_list: Iterable[Basis]): def basis_list(self) -> Tuple[Basis, ...]: return self._basis_list - @property - def dim_domain(self) -> int: - return len(self.basis_list) - def _evaluate(self, eval_points: np.ndarray) -> np.ndarray: matrix = np.zeros((self.n_basis, len(eval_points), self.dim_codomain)) From f8278db448f3da6d43aedb13ab94bffefe38d296 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 26 Jan 2021 23:43:43 +0100 Subject: [PATCH 046/417] Fix points left outside because of fp errors. --- skfda/representation/basis/_finite_element.py | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/skfda/representation/basis/_finite_element.py b/skfda/representation/basis/_finite_element.py index 43de6a370..73d617f51 100644 --- a/skfda/representation/basis/_finite_element.py +++ b/skfda/representation/basis/_finite_element.py @@ -1,7 +1,6 @@ -from typing import Optional, Tuple, TypeVar +from typing import Optional, TypeVar import numpy as np -import scipy.linalg from .._typing import DomainRangeLike from ._basis import Basis @@ -23,6 +22,7 @@ class FiniteElement(Basis): Examples: + >>> from skfda.representation.basis import FiniteElement >>> basis = FiniteElement( ... vertices=[[0, 0], [0, 1], [1, 0], [1, 1]], ... cells=[[0, 1, 2], [1, 2, 3]], @@ -50,6 +50,20 @@ class FiniteElement(Basis): [ 0. ], [ 0.7]]]) + + >>> from scipy.spatial import Delaunay + >>> import numpy as np + >>> + >>> n_points = 10 + >>> points = np.random.uniform(size=(n_points, 2)) + >>> delaunay = Delaunay(points) + >>> basis = FiniteElement( + ... vertices=delaunay.points, + ... cells=delaunay.simplices, + ... ) + >>> basis.n_basis + 10 + """ def __init__( @@ -62,6 +76,10 @@ def __init__( self.vertices = np.asarray(vertices) self.cells = np.asarray(cells) + @property + def dim_domain(self) -> int: + return self.vertices.shape[-1] + def _barycentric_coords(self, points: np.ndarray) -> np.ndarray: """ Find the barycentric coordinates of each point in each cell. @@ -101,7 +119,8 @@ def _cell_points_values(self, points: np.ndarray) -> np.ndarray: # Remove values outside each cell wrong_vals = np.any( - (barycentric_coords < 0) | (barycentric_coords > 1), + ((barycentric_coords < 0) & ~np.isclose(barycentric_coords + 1, 1)) + | ((barycentric_coords > 1) & ~np.isclose(barycentric_coords, 1)), axis=-1, ) @@ -110,6 +129,7 @@ def _cell_points_values(self, points: np.ndarray) -> np.ndarray: points_in_cells = np.any(barycentric_coords, axis=-1) n_cells_per_point = np.sum(points_in_cells, axis=0) + n_cells_per_point[n_cells_per_point == 0] = 1 barycentric_coords /= n_cells_per_point[:, np.newaxis] return barycentric_coords From c1c99340d90117240f82aeb983142a090755d271 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 27 Jan 2021 21:31:31 +0100 Subject: [PATCH 047/417] error npoints instead of n_points --- .../visualization/representation.py | 93 ++----------------- 1 file changed, 7 insertions(+), 86 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index b1ad57359..74c04ca61 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -180,16 +180,16 @@ def plot_graph(fdata, chart=None, *, fig=None, axes=None, # Selects the number of points if n_points is None: - npoints = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) - elif np.isscalar(npoints): - npoints = (npoints, npoints) - elif len(npoints) != 2: + n_points = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) + elif np.isscalar(n_points): + n_points = (n_points, n_points) + elif len(n_points) != 2: raise ValueError(f"n_points should be a number or a tuple of " - f"length 2, and has length {len(npoints)}") + f"length 2, and has length {len(n_points)}") # Axes where will be evaluated - x = np.linspace(*domain_range[0], npoints[0]) - y = np.linspace(*domain_range[1], npoints[1]) + x = np.linspace(*domain_range[0], n_points[0]) + y = np.linspace(*domain_range[1], n_points[1]) # Evaluation of the functional object Z = fdata((x, y), grid=True) @@ -325,82 +325,3 @@ def plot_scatter(fdata, chart=None, *, grid_points=None, _set_labels(fdata, fig, axes, patches) return fig - - -class DDPlotDisplay: - - """DDPlot visualization. Plot the depth of our fdata elements in two - different distributions, one in each axis. It is useful to understand - how our data is more related with one subset of data / distribution - than another one. - - Args: - fdata: functional data set that we want to examine. - dist1: functional data set that represents the first distribution that - we want to use to compute the depth (Depth X). - dist2: functional data set that represents the second distribution that - we want to use to compute the depth (Depth Y). - depth_method: method that will be used to compute the depths of the - data with respect to the distributions. - - """ - def __init__(self, fdata, dist1, dist2, depth_method): - self.fdata = fdata - self.dist1 = dist1 - self.dist2 = dist2 - self.depth_method = depth_method - - def plot(self, chart=None, *, fig=None, axes=None, - n_rows=None, n_cols=None, **kwargs): - """Plot the depth of our fdata elements in the two different distributions, - one in each axis. It is useful to understand how our data is more related with - one subset of data / distribution than another one. - - Args: - chart (figure object, axe or list of axes, optional): figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - fig (figure object, optional): figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs are - plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols(int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - **kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. - - Returns: - fig (figure object): figure object in which the depths will be scattered. - - """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata(self.fdata, fig, axes, n_rows, n_cols) - - depth_dist1 = self.depth_method.__call__(self.fdata, distribution = self.dist1) - depth_dist2 = self.depth_method.__call__(self.fdata, distribution = self.dist2) - - if self.fdata.dim_domain == 1: - - for i in range(self.fdata.dim_codomain): - - axes[i].scatter(depth_dist1, depth_dist2, - **kwargs) - - #Set labels of graph - fig.suptitle("DDPlot") - for i in range(self.fdata.dim_codomain): - axes[i].set_xlabel("X depth") - axes[i].set_ylabel("Y depth") - axes[i].set_xlim([self.depth_method.min, self.depth_method.max]) - axes[i].set_ylim([self.depth_method.min, self.depth_method.max]) - axes[i].plot([0,1], linewidth = 0.2, color = "gray") - - return fig \ No newline at end of file From e79a098f522c504c24b89ba5883206f64e3f7514 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 27 Jan 2021 21:32:11 +0100 Subject: [PATCH 048/417] npoints change --- .vscode/settings.json | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..3b6641073 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "git.ignoreLimitWarning": true +} \ No newline at end of file From f745118fc23b91e4adfc114d6790e79d08c877c0 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 27 Jan 2021 21:34:01 +0100 Subject: [PATCH 049/417] delete --- .vscode/settings.json | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 3b6641073..000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "git.ignoreLimitWarning": true -} \ No newline at end of file From 1239d39ce70ce68ec9589b4937f5acc79871b143 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Wed, 27 Jan 2021 23:34:33 +0100 Subject: [PATCH 050/417] Better explanation of kernel smoothers example. --- examples/plot_kernel_smoothing.py | 62 +++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/examples/plot_kernel_smoothing.py b/examples/plot_kernel_smoothing.py index 7f661e89d..5473abc54 100644 --- a/examples/plot_kernel_smoothing.py +++ b/examples/plot_kernel_smoothing.py @@ -35,36 +35,66 @@ ############################################################################## # Here we show the general cross validation scores for different values of the -# parameters given to the different smoothing methods. +# parameters given to the different smoothing methods. Currently we have +# three kernel smoothing methods implemented: Nadaraya Watson, Local Linear +# Regression and K Nearest Neighbors (k-NN) -param_values_knn = np.arange(1, 24, 2) -param_values_others = param_values_knn / 32 +############################################################################## +# The smoothing parameter for k-NN is the number of neighbors. We will choose +# this parameter between 1 and 23 in this example. + +n_neighbors = np.arange(1, 24) + +############################################################################## +# The smoothing parameter for Nadaraya Watson and Local Linear Regression is +# a bandwidth parameter, with the same units as the domain of the function. +# As we want to compare the results of these smoothers with k-NN, with uses +# as the smoothing parameter the number of neighbors, we want to use a +# comparable range of values. In this case, we know that our grid points are +# equispaced, so a given bandwidth ``B`` will include +# ``B * N / D grid points``, where ``N`` is the total number of grid points +# and ``D`` the size of the whole domain range. Thus, if we pick +# ``B = n_neighbors * D / N``, ``B`` will include ``n_neighbors`` grid points +# and we could compare the results of the different smoothers. + +scale_factor = ( + (fd.domain_range[0][1] - fd.domain_range[0][0]) + / len(fd.grid_points[0]) +) + +bandwidth = n_neighbors * scale_factor + +# K-nearest neighbours kernel smoothing. +knn = val.SmoothingParameterSearch( + ks.KNeighborsSmoother(), n_neighbors) +knn.fit(fd) +knn_fd = knn.transform(fd) # Local linear regression kernel smoothing. llr = val.SmoothingParameterSearch( - ks.LocalLinearRegressionSmoother(), param_values_others) + ks.LocalLinearRegressionSmoother(), bandwidth) llr.fit(fd) llr_fd = llr.transform(fd) # Nadaraya-Watson kernel smoothing. nw = val.SmoothingParameterSearch( - ks.NadarayaWatsonSmoother(), param_values_others) + ks.NadarayaWatsonSmoother(), bandwidth) nw.fit(fd) nw_fd = nw.transform(fd) -# K-nearest neighbours kernel smoothing. -knn = val.SmoothingParameterSearch( - ks.KNeighborsSmoother(), param_values_knn) -knn.fit(fd) -knn_fd = knn.transform(fd) +############################################################################## +# The plot of the mean test scores for all smoothers are shown below. +# As the X axis we will use the neighbors for all the smoothers in order +# to compare k-NN with the others, but remember that the bandwidth is +# this quantity scaled by ``scale_factor``! fig = plt.figure() ax = fig.add_subplot(1, 1, 1) -ax.plot(param_values_knn, knn.cv_results_['mean_test_score'], +ax.plot(n_neighbors, knn.cv_results_['mean_test_score'], label='k-nearest neighbors') -ax.plot(param_values_knn, llr.cv_results_['mean_test_score'], +ax.plot(n_neighbors, llr.cv_results_['mean_test_score'], label='local linear regression') -ax.plot(param_values_knn, nw.cv_results_['mean_test_score'], +ax.plot(n_neighbors, nw.cv_results_['mean_test_score'], label='Nadaraya-Watson') ax.legend() fig @@ -117,9 +147,11 @@ # the following plots. fd_us = ks.NadarayaWatsonSmoother( - smoothing_parameter=2 / 32).fit_transform(fd[10]) + smoothing_parameter=2 * scale_factor, +).fit_transform(fd[10]) fd_os = ks.NadarayaWatsonSmoother( - smoothing_parameter=15 / 32).fit_transform(fd[10]) + smoothing_parameter=15 * scale_factor, +).fit_transform(fd[10]) ############################################################################## # Under-smoothed From 7b845994bdaef56054a14368e85759c8eb228de0 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Wed, 27 Jan 2021 23:59:57 +0100 Subject: [PATCH 051/417] Fix style. --- examples/plot_kernel_smoothing.py | 46 ++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/examples/plot_kernel_smoothing.py b/examples/plot_kernel_smoothing.py index 5473abc54..b01a18ffb 100644 --- a/examples/plot_kernel_smoothing.py +++ b/examples/plot_kernel_smoothing.py @@ -66,19 +66,25 @@ # K-nearest neighbours kernel smoothing. knn = val.SmoothingParameterSearch( - ks.KNeighborsSmoother(), n_neighbors) + ks.KNeighborsSmoother(), + n_neighbors, +) knn.fit(fd) knn_fd = knn.transform(fd) # Local linear regression kernel smoothing. llr = val.SmoothingParameterSearch( - ks.LocalLinearRegressionSmoother(), bandwidth) + ks.LocalLinearRegressionSmoother(), + bandwidth, +) llr.fit(fd) llr_fd = llr.transform(fd) # Nadaraya-Watson kernel smoothing. nw = val.SmoothingParameterSearch( - ks.NadarayaWatsonSmoother(), bandwidth) + ks.NadarayaWatsonSmoother(), + bandwidth, +) nw.fit(fd) nw_fd = nw.transform(fd) @@ -90,12 +96,21 @@ fig = plt.figure() ax = fig.add_subplot(1, 1, 1) -ax.plot(n_neighbors, knn.cv_results_['mean_test_score'], - label='k-nearest neighbors') -ax.plot(n_neighbors, llr.cv_results_['mean_test_score'], - label='local linear regression') -ax.plot(n_neighbors, nw.cv_results_['mean_test_score'], - label='Nadaraya-Watson') +ax.plot( + n_neighbors, + knn.cv_results_['mean_test_score'], + label='k-nearest neighbors', +) +ax.plot( + n_neighbors, + llr.cv_results_['mean_test_score'], + label='local linear regression', +) +ax.plot( + n_neighbors, + nw.cv_results_['mean_test_score'], + label='Nadaraya-Watson', +) ax.legend() fig @@ -114,10 +129,15 @@ knn_fd[10].plot(fig=fig) llr_fd[10].plot(fig=fig) nw_fd[10].plot(fig=fig) -ax.legend(['original data', 'k-nearest neighbors', - 'local linear regression', - 'Nadaraya-Watson'], - title='Smoothing method') +ax.legend( + [ + 'original data', + 'k-nearest neighbors', + 'local linear regression', + 'Nadaraya-Watson', + ], + title='Smoothing method', +) fig ############################################################################## From cf84e12ae5e431ee5941bfc482ef85cc6b487f65 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 28 Jan 2021 14:59:01 +0100 Subject: [PATCH 052/417] Fix typos. --- examples/plot_kernel_smoothing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/plot_kernel_smoothing.py b/examples/plot_kernel_smoothing.py index b01a18ffb..bda9d1689 100644 --- a/examples/plot_kernel_smoothing.py +++ b/examples/plot_kernel_smoothing.py @@ -52,7 +52,7 @@ # as the smoothing parameter the number of neighbors, we want to use a # comparable range of values. In this case, we know that our grid points are # equispaced, so a given bandwidth ``B`` will include -# ``B * N / D grid points``, where ``N`` is the total number of grid points +# ``B * N / D`` grid points, where ``N`` is the total number of grid points # and ``D`` the size of the whole domain range. Thus, if we pick # ``B = n_neighbors * D / N``, ``B`` will include ``n_neighbors`` grid points # and we could compare the results of the different smoothers. @@ -89,7 +89,7 @@ nw_fd = nw.transform(fd) ############################################################################## -# The plot of the mean test scores for all smoothers are shown below. +# The plot of the mean test scores for all smoothers is shown below. # As the X axis we will use the neighbors for all the smoothers in order # to compare k-NN with the others, but remember that the bandwidth is # this quantity scaled by ``scale_factor``! From d70389f8aaa34cca2b860f434d8a9e986214ffac Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 3 Feb 2021 00:17:57 +0100 Subject: [PATCH 053/417] Fix style. --- skfda/representation/basis/_finite_element.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/skfda/representation/basis/_finite_element.py b/skfda/representation/basis/_finite_element.py index 73d617f51..d2d03a85c 100644 --- a/skfda/representation/basis/_finite_element.py +++ b/skfda/representation/basis/_finite_element.py @@ -21,7 +21,6 @@ class FiniteElement(Basis): :math:`n+1` vertices for an n-dimensional domain space. Examples: - >>> from skfda.representation.basis import FiniteElement >>> basis = FiniteElement( ... vertices=[[0, 0], [0, 1], [1, 0], [1, 1]], @@ -71,8 +70,12 @@ def __init__( vertices: np.ndarray, cells: np.ndarray, domain_range: Optional[DomainRangeLike] = None, - )-> None: - Basis.__init__(self, domain_range=domain_range, n_basis=len(vertices)) + ) -> None: + super().__init__( + self, + domain_range=domain_range, + n_basis=len(vertices), + ) self.vertices = np.asarray(vertices) self.cells = np.asarray(cells) From 802cfa246a85e67f07f0acf2edf7c3db39e4cc4a Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 3 Feb 2021 00:29:19 +0100 Subject: [PATCH 054/417] Fix style. --- skfda/representation/basis/_finite_element.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skfda/representation/basis/_finite_element.py b/skfda/representation/basis/_finite_element.py index d2d03a85c..33412f6fd 100644 --- a/skfda/representation/basis/_finite_element.py +++ b/skfda/representation/basis/_finite_element.py @@ -72,7 +72,6 @@ def __init__( domain_range: Optional[DomainRangeLike] = None, ) -> None: super().__init__( - self, domain_range=domain_range, n_basis=len(vertices), ) From 74c08046dee1192f2aeb68e9a70750909af2f51b Mon Sep 17 00:00:00 2001 From: lena123315 <32038332+lena123315@users.noreply.github.com> Date: Wed, 3 Feb 2021 18:45:04 +0100 Subject: [PATCH 055/417] Update kernel_smoothers.py --- .../smoothing/kernel_smoothers.py | 48 +++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/skfda/preprocessing/smoothing/kernel_smoothers.py b/skfda/preprocessing/smoothing/kernel_smoothers.py index 263496fa7..f68bbd2b5 100644 --- a/skfda/preprocessing/smoothing/kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/kernel_smoothers.py @@ -82,14 +82,14 @@ class NadarayaWatsonSmoother(_LinearKernelSmoother): It is a linear kernel smoothing method. Uses an smoothing matrix :math:`\hat{H}` for the discretisation points in argvals by the Nadaraya-Watson estimator. The smoothed - values :math:`\hat{Y}` can be calculated as :math:`\hat{ - Y} = \hat{H}Y` where :math:`Y` is the vector of observations at the - points of discretisation :math:`(x_1, x_2, ..., x_n)`. + values :math:`\hat{X}` at the points :math:`(t_1', t_2', ..., t_m')` + can be calculated as :math:`\hat{X} = \hat{H}X` where :math:`X` is the + vector of observations at the points of discretisation + :math:`(t_1, t_2, ..., t_n)`. .. math:: - \hat{H}_{i,j} = \frac{K\left(\frac{x_i-x_j}{h}\right)}{\sum_{k=1}^{ - n}K\left( - \frac{x_i-x_k}{h}\right)} + \hat{H}_{i,j} = \frac{K\left(\frac{t_j-t_i'}{h}\right)}{\sum_{k=1}^{ + n}K\left(\frac{t_k-t_i'}{h}\right)} where :math:`K(\cdot)` is a kernel function and :math:`h` the kernel window width or smoothing parameter. @@ -179,19 +179,20 @@ class LocalLinearRegressionSmoother(_LinearKernelSmoother): It is a linear kernel smoothing method. Uses an smoothing matrix :math:`\hat{H}` for the discretisation points in argvals by the local linear regression estimator. The smoothed - values :math:`\hat{Y}` can be calculated as :math:`\hat{ - Y} = \hat{H}Y` where :math:`Y` is the vector of observations at the points - of discretisation :math:`(x_1, x_2, ..., x_n)`. + values :math:`\hat{X}` at the points :math:`(t_1', t_2', ..., t_m')` + can be calculated as :math:`\hat{X} = \hat{H}X` where :math:`X` is the + vector of observations at the points of discretisation + :math:`(t_1, t_2, ..., t_n)`. .. math:: - \hat{H}_{i,j} = \frac{b_i(x_j)}{\sum_{k=1}^{n}b_k(x_j)} + \hat{H}_{i,j} = \frac{b_j(t_i')}{\sum_{k=1}^{n}b_k(t_i')} .. math:: - b_i(x) = K\left(\frac{x_i - x}{h}\right) S_{n,2}(x) - (x_i - x)S_{n, - 1}(x) + b_j(t') = K\left(\frac{t_j - t'}{h}\right) S_{n,2}(t') - (t_j - t')S_{n, + 1}(t') .. math:: - S_{n,k} = \sum_{i=1}^{n}K\left(\frac{x_i-x}{h}\right)(x_i-x)^k + S_{n,k}(t') = \sum_{j=1}^{n}K\left(\frac{t_j-t'}{h}\right)(t_j-t')^k where :math:`K(\cdot)` is a kernel function and :math:`h` the kernel window width. @@ -276,11 +277,24 @@ def _hat_matrix_function_not_normalized(self, *, delta_x, class KNeighborsSmoother(_LinearKernelSmoother): - """K-nearest neighbour kernel smoother. + r"""K-nearest neighbour kernel smoother. It is a linear kernel smoothing method. Uses an smoothing matrix S for the discretisation points in argvals by - the k nearest neighbours estimator. + the :math:`k` nearest neighbours estimator. + + The smoothed values :math:`\hat{X}` at the points + :math:`(t_1', t_2', ..., t_m')` can be calculated as + :math:`\hat{X} = \hat{H}X` where :math:`X` is the vector of observations + at the points of discretisation :math:`(t_1, t_2, ..., t_n)`. + + .. math:: + + H_{i,j} =\frac{K\left(\frac{t_j-t_i'}{h_{ik}}\right)}{\sum_{r=1}^n + K\left(\frac{t_r-t_i'}{h_{ik}}\right)} + + :math:`K(\cdot)` is a kernel function and :math:`h_{ik}` the is the distance + from :math:`t_i'` to the 𝑘-th nearest neighbor of :math:`t_i'`. Usually used with the uniform kernel, it takes the average of the closest k points to a given point. @@ -393,9 +407,5 @@ def _hat_matrix_function_not_normalized(self, *, delta_x, axis=1, interpolation='lower') + tol rr = kernel((delta_x.T / vec).T) - # Applies the kernel to the result of dividing each row by the result - # of the previous operation, all the discretisation points - # corresponding to the knn are below 1 and the rest above 1 so the - # kernel returns values distinct to 0 only for the knn. return rr From f90562998e7e423f718071de1ff87921714eb340 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Wed, 3 Feb 2021 20:46:52 +0100 Subject: [PATCH 056/417] DD-G --- docs/modules/ml/classification.rst | 2 +- skfda/_utils/__init__.py | 30 +++-- skfda/_utils/_utils.py | 37 +++-- skfda/ml/classification/__init__.py | 2 +- .../classification/_centroid_classifiers.py | 2 +- skfda/ml/classification/_depth_classifiers.py | 115 ++++++++-------- skfda/preprocessing/dim_reduction/__init__.py | 2 + .../feature_extraction/__init__.py | 1 + .../feature_extraction/_ddg_transformer.py | 127 ++++++++++++++++++ 9 files changed, 235 insertions(+), 83 deletions(-) create mode 100644 skfda/preprocessing/dim_reduction/feature_extraction/__init__.py create mode 100644 skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py diff --git a/docs/modules/ml/classification.rst b/docs/modules/ml/classification.rst index a82dcae65..1fd1e63bd 100644 --- a/docs/modules/ml/classification.rst +++ b/docs/modules/ml/classification.rst @@ -24,4 +24,4 @@ it is explained the basic usage of these estimators. skfda.ml.classification.NearestCentroid skfda.ml.classification.DTMClassifier skfda.ml.classification.MaximumDepthClassifier - skfda.ml.classification.DDTransform + skfda.ml.classification.DDGClassifier diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 9b78fb479..042b82e4e 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -1,11 +1,21 @@ from . import constants - -from ._utils import (_tuple_of_arrays, _cartesian_product, - _check_estimator, _int_to_real, - _to_grid, check_is_univariate, - _same_domain, _to_array_maybe_ragged, - _reshape_eval_points, - _evaluate_grid, nquad_vec, - _FDataCallable, _pairwise_commutative, - _domain_range, _check_array_key, - _classifier_get_classes) +from ._utils import ( + _cartesian_product, + _check_array_key, + _check_estimator, + _classifier_fit_distributions, + _classifier_get_classes, + _classifier_get_distributions, + _domain_range, + _evaluate_grid, + _FDataCallable, + _int_to_real, + _pairwise_commutative, + _reshape_eval_points, + _same_domain, + _to_array_maybe_ragged, + _to_grid, + _tuple_of_arrays, + check_is_univariate, + nquad_vec, +) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 821b440ed..00654b470 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -8,6 +8,8 @@ from pandas.api.indexers import check_array_indexer import scipy.integrate +from sklearn.base import clone + import numpy as np @@ -54,12 +56,12 @@ def check_is_univariate(fd): """ if fd.dim_domain != 1 or fd.dim_codomain != 1: raise ValueError(f"The functional data must be univariate, i.e., " + - f"with dim_domain=1 " + - (f"" if fd.dim_domain == 1 - else f"(currently is {fd.dim_domain}) ") + - f"and dim_codomain=1 " + - (f"" if fd.dim_codomain == 1 else - f"(currently is {fd.dim_codomain})")) + f"with dim_domain=1 " + + (f"" if fd.dim_domain == 1 + else f"(currently is {fd.dim_domain}) ") + + f"and dim_codomain=1 " + + (f"" if fd.dim_codomain == 1 else + f"(currently is {fd.dim_codomain})")) def _to_grid(X, y, eval_points=None): @@ -355,8 +357,8 @@ def _evaluate_grid(axes, *, evaluate_method, # Reshape the result if aligned: - res = res.reshape([n_samples] + - list(shape) + [dim_codomain]) + res = res.reshape([n_samples] + + list(shape) + [dim_codomain]) else: @@ -464,3 +466,22 @@ def _classifier_get_classes(y): raise ValueError(f'The number of classes has to be greater than' f' one; got {classes.size} class') return classes, y_ind + + +def _classifier_get_distributions(classes, X, y_ind, depth_methods): + distributions = [ + clone(depth_method).fit(X[y_ind == cur_class]) + for cur_class in range(classes.size) + for depth_method in depth_methods + ] + return distributions + + +def _classifier_fit_distributions(X, y, depth_methods): + classes_, y_ind = _classifier_get_classes(y) + + distributions_ = _classifier_get_distributions( + classes_, X, y_ind, depth_methods, + ) + + return classes_, distributions_ diff --git a/skfda/ml/classification/__init__.py b/skfda/ml/classification/__init__.py index 76b445f27..5498de463 100644 --- a/skfda/ml/classification/__init__.py +++ b/skfda/ml/classification/__init__.py @@ -1,6 +1,6 @@ """Classification.""" from ._centroid_classifiers import DTMClassifier, NearestCentroid -from ._depth_classifiers import DDTransform, MaximumDepthClassifier +from ._depth_classifiers import DDGClassifier, MaximumDepthClassifier from ._neighbors_classifiers import ( KNeighborsClassifier, RadiusNeighborsClassifier, diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index 79df49ef4..49aef2754 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -157,7 +157,7 @@ class DTMClassifier(BaseEstimator, ClassifierMixin): 0.875 See also: - :class:`~skfda.ml.classification.MaximumDepthClassifier` + :class:`~skfda.ml.classification.NearestCentroid` References: Fraiman, R. and Muniz, G. (2001). Trimmed means for functional diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 68da77cf1..dae284409 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -3,16 +3,13 @@ from typing import List import numpy as np -from sklearn.base import ( - BaseEstimator, - ClassifierMixin, - TransformerMixin, - clone, -) +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.pipeline import make_pipeline from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted -from ..._utils import _classifier_get_classes -from ...exploratory.depth import Depth, IntegratedDepth, ModifiedBandDepth +from ..._utils import _classifier_fit_distributions +from ...exploratory.depth import Depth, ModifiedBandDepth +from ...preprocessing.dim_reduction.feature_extraction import DDGTransformer class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): @@ -56,19 +53,15 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): 0.875 See also: - :class:`~skfda.ml.classification.DDClassifier` - :class:`~skfda.ml.classification.DTMClassifier` + :class:`~skfda.ml.classification.DDGClassifier` References: Ghosh, A. K. and Chaudhuri, P. (2005b). On maximum depth and related classifiers. Scandinavian Journal of Statistics, 32, 327–350. """ - def __init__(self, depth_method: Depth = None): - if depth_method is None: - self.depth_method = ModifiedBandDepth() - else: - self.depth_method = depth_method + def __init__(self, depth_method: Depth = ModifiedBandDepth()): + self.depth_method = depth_method def fit(self, X, y): """Fit the model using X as training data and y as target values. @@ -80,13 +73,12 @@ def fit(self, X, y): Returns: self (object) """ - classes_, y_ind = _classifier_get_classes(y) + classes_, distributions_ = _classifier_fit_distributions( + X, y, [self.depth_method], + ) self.classes_ = classes_ - self.distributions_ = [ - clone(self.depth_method).fit(X[y_ind == cur_class]) - for cur_class in range(self.classes_.size) - ] + self.distributions_ = distributions_ return self @@ -110,14 +102,16 @@ def predict(self, X): return self.classes_[np.argmax(depths, axis=0)] -class DDTransform(BaseEstimator, TransformerMixin): - r"""Depth-versus-depth (DD) transformer for functional data. +class DDGClassifier(BaseEstimator, ClassifierMixin): + r"""Generalized depth-versus-depth (DD) classifer for functional data. + + This classifier builds an interface around the DDGTransfomer. - This transformer takes a list of k depths and performs the following map: + The transformer takes a list of k depths and performs the following map: .. math:: \mathcal{X} &\rightarrow \mathbb{R}^G \\ - x &\rightarrow \textbf{d} = (D_1^1(x),...,D_g^k(x)) + x &\rightarrow \textbf{d} = (D_1^1(x), D_1^2(x),...,D_g^k(x)) Where :math:`D_i^j(x)` is the depth of the point :math:`x` with respect to the data in the :math:`i`-th group using the :math:`j`-th depth of the @@ -126,13 +120,21 @@ class DDTransform(BaseEstimator, TransformerMixin): Note that :math:`\mathcal{X}` is possibly multivariate, that is, :math:`\mathcal{X} = \mathcal{X}_1 \times ... \times \mathcal{X}_p`. + In the G dimensional space the classification is performed using a + multivariate classifer. + Parameters: - depth_methods (default + depth_method (default :class:`ModifiedBandDepth `): + The depth class to use when calculating the depth of a test + sample in a class. See the documentation of the depths module + for a list of available depths. By default it is ModifiedBandDepth. + depth_methods (optional): List of depth classes to use when calculating the depth of a test sample in a class. See the documentation of the depths module - for a list of available depths. By default it is the list - containing ModifiedBandDepth. + for a list of available depths. By default it is None. + multivariate_classifier (): + The multivariate classifier to use in the DDG-plot. Examples: Firstly, we will import and split the Berkeley Growth Study dataset @@ -145,34 +147,29 @@ class DDTransform(BaseEstimator, TransformerMixin): >>> X_train, X_test, y_train, y_test = train_test_split( ... fd, y, test_size=0.25, stratify=y, random_state=0) - >>> from skfda.ml.classification import DDTransform - >>> from sklearn.pipeline import make_pipeline >>> from sklearn.neighbors import KNeighborsClassifier - We classify by first transforming our data using the defined map - and then using KNN + We will fit a Maximum depth classifier using KNN - >>> pipe = make_pipeline(DDTransform(), KNeighborsClassifier()) - >>> pipe.fit(X_train, y_train) - Pipeline(steps=[('ddtransform', - DDTransform(depth_methods=[ModifiedBandDepth(), - IntegratedDepth()])), - ('kneighborsclassifier', KNeighborsClassifier())]) + >>> from skfda.ml.classification import DDGClassifier + >>> clf = DDGClassifier(KNeighborsClassifier()) + >>> clf.fit(X_train, y_train) + DDGClassifier(...) We can predict the class of new samples - >>> pipe.predict(X_test) + >>> clf.predict(X_test) array([1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1]) Finally, we calculate the mean accuracy for the test data - >>> pipe.score(X_test, y_test) + >>> clf.score(X_test, y_test) 0.875 See also: - :class:`~skfda.ml.classification.DTMClassifier` :class:`~skfda.ml.classification.MaximumDepthClassifier` + :class:`~skfda.preprocessing.dim_reduction.feature_extraction._ddg_transformer` References: Li, J., Cuesta-Albertos, J. A., and Liu, R. Y. (2012). DD-classifier: @@ -183,11 +180,16 @@ class DDTransform(BaseEstimator, TransformerMixin): (2017) The DDG-classifier in the functional setting. TEST, 26. 119-142. """ - def __init__(self, depth_methods: List[Depth] = None): - if depth_methods is None: - self.depth_methods = [ModifiedBandDepth(), IntegratedDepth()] - else: - self.depth_methods = depth_methods + def __init__( + self, + multivariate_classifier: ClassifierMixin = None, + depth_method: Depth = ModifiedBandDepth(), + depth_methods: List[Depth] = None, + ): + self.pipeline = make_pipeline( + DDGTransformer(depth_method, depth_methods), + multivariate_classifier, + ) def fit(self, X, y): """Fit the model using X as training data and y as target values. @@ -199,29 +201,18 @@ def fit(self, X, y): Returns: self (object) """ - classes_, y_ind = _classifier_get_classes(y) - - self.classes_ = classes_ - self.distributions_ = [ - clone(depth_method).fit(X[y_ind == cur_class]) - for cur_class in range(self.classes_.size) - for depth_method in self.depth_methods - ] + self.pipeline.fit(X, y) return self - def transform(self, X): - """Transform the provided data using the defined map. + def predict(self, X): + """Predict the class labels for the provided data. Args: X (:class:`FDataGrid`): FDataGrid with the test samples. Returns: - X_new (array-like): array of shape (n_samples, G). + y (np.array): array of shape (n_samples) with class labels + for each data sample. """ - sklearn_check_is_fitted(self) - - return np.transpose([ - distribution.predict(X) - for distribution in self.distributions_ - ]) + return self.pipeline.predict(X) diff --git a/skfda/preprocessing/dim_reduction/__init__.py b/skfda/preprocessing/dim_reduction/__init__.py index b079520b4..b73126bad 100644 --- a/skfda/preprocessing/dim_reduction/__init__.py +++ b/skfda/preprocessing/dim_reduction/__init__.py @@ -1,2 +1,4 @@ +from . import feature_selection +from . import feature_extraction from . import projection from . import variable_selection diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py b/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py new file mode 100644 index 000000000..7c0c539f3 --- /dev/null +++ b/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py @@ -0,0 +1 @@ +from ._ddg_transformer import DDGTransformer diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py new file mode 100644 index 000000000..b41ce8b19 --- /dev/null +++ b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py @@ -0,0 +1,127 @@ +"""Feature extraction transformers for dimensionality reduction.""" + +from typing import List + +import numpy as np +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted + +from ...._utils import _classifier_fit_distributions +from ....exploratory.depth import Depth, ModifiedBandDepth + + +class DDGTransformer(BaseEstimator, TransformerMixin): + r"""Generalized depth-versus-depth (DD) transformer for functional data. + + This transformer takes a list of k depths and performs the following map: + + .. math:: + \mathcal{X} &\rightarrow \mathbb{R}^G \\ + x &\rightarrow \textbf{d} = (D_1^1(x), D_1^2(x),...,D_g^k(x)) + + Where :math:`D_i^j(x)` is the depth of the point :math:`x` with respect to + the data in the :math:`i`-th group using the :math:`j`-th depth of the + provided list. + + Note that :math:`\mathcal{X}` is possibly multivariate, that is, + :math:`\mathcal{X} = \mathcal{X}_1 \times ... \times \mathcal{X}_p`. + + Parameters: + depth_method (default + :class:`ModifiedBandDepth `): + The depth class to use when calculating the depth of a test + sample in a class. See the documentation of the depths module + for a list of available depths. By default it is ModifiedBandDepth. + depth_methods (optional): + List of depth classes to use when calculating the depth of a test + sample in a class. See the documentation of the depths module + for a list of available depths. By default it is None. + If a list is provided, the parameter depth_method will be ignored. + + Examples: + Firstly, we will import and split the Berkeley Growth Study dataset + + >>> from skfda.datasets import fetch_growth + >>> from sklearn.model_selection import train_test_split + >>> dataset = fetch_growth() + >>> fd = dataset['data'] + >>> y = dataset['target'] + >>> X_train, X_test, y_train, y_test = train_test_split( + ... fd, y, test_size=0.25, stratify=y, random_state=0) + + >>> from skfda.preprocessing.dim_reduction.feature_extraction import \ + ... DDGTransformer + >>> from sklearn.pipeline import make_pipeline + >>> from sklearn.neighbors import KNeighborsClassifier + + We classify by first transforming our data using the defined map + and then using KNN + + >>> pipe = make_pipeline(DDGTransformer(), KNeighborsClassifier()) + >>> pipe.fit(X_train, y_train) + Pipeline(steps=[('ddgtransformer', + DDGTransformer(depth_method=None, + depth_methods=[ModifiedBandDepth()])), + ('kneighborsclassifier', KNeighborsClassifier())]) + + We can predict the class of new samples + + >>> pipe.predict(X_test) + array([1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, + 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1]) + + Finally, we calculate the mean accuracy for the test data + + >>> pipe.score(X_test, y_test) + 0.875 + + References: + Cuesta-Albertos, J. A., Febrero-Bande, M. and Oviedo de la Fuente, M. + (2017). The DDG-classifier in the functional setting. + TEST, 26. 119-142. + """ + + def __init__( + self, + depth_method: Depth = ModifiedBandDepth(), + depth_methods: List[Depth] = None, + ): + if depth_methods is None: + self.depth_methods = [depth_method] + else: + self.depth_methods = depth_methods + + def fit(self, X, y): + """Fit the model using X as training data and y as target values. + + Args: + X (:class:`FDataGrid`): FDataGrid with the training data. + y (array-like): Target values of shape = (n_samples). + + Returns: + self (object) + """ + classes_, distributions_ = _classifier_fit_distributions( + X, y, self.depth_methods, + ) + + self.classes_ = classes_ + self.distributions_ = distributions_ + + return self + + def transform(self, X): + """Transform the provided data using the defined map. + + Args: + X (:class:`FDataGrid`): FDataGrid with the test samples. + + Returns: + X_new (array-like): array of shape (n_samples, G). + """ + sklearn_check_is_fitted(self) + + return np.transpose([ + distribution.predict(X) + for distribution in self.distributions_ + ]) From 02dd5093da88b9dffa388765a95b938ffd9acba2 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Wed, 3 Feb 2021 20:53:49 +0100 Subject: [PATCH 057/417] feature_selection is still empty --- skfda/preprocessing/dim_reduction/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skfda/preprocessing/dim_reduction/__init__.py b/skfda/preprocessing/dim_reduction/__init__.py index b73126bad..1473aa4b5 100644 --- a/skfda/preprocessing/dim_reduction/__init__.py +++ b/skfda/preprocessing/dim_reduction/__init__.py @@ -1,4 +1,3 @@ -from . import feature_selection from . import feature_extraction from . import projection from . import variable_selection From bea33ddb8229edbbf800140e140145fe26bcf5cb Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Wed, 3 Feb 2021 21:32:13 +0100 Subject: [PATCH 058/417] depth_method attribute --- .../dim_reduction/feature_extraction/_ddg_transformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py index b41ce8b19..08a8bbc08 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py @@ -60,8 +60,7 @@ class DDGTransformer(BaseEstimator, TransformerMixin): >>> pipe = make_pipeline(DDGTransformer(), KNeighborsClassifier()) >>> pipe.fit(X_train, y_train) Pipeline(steps=[('ddgtransformer', - DDGTransformer(depth_method=None, - depth_methods=[ModifiedBandDepth()])), + DDGTransformer(depth_methods=[ModifiedBandDepth()])), ('kneighborsclassifier', KNeighborsClassifier())]) We can predict the class of new samples @@ -86,6 +85,7 @@ def __init__( depth_method: Depth = ModifiedBandDepth(), depth_methods: List[Depth] = None, ): + self.depth_method = depth_method if depth_methods is None: self.depth_methods = [depth_method] else: From b16cd12bcf550c6ab667c5da7dd6650a02b55292 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Wed, 3 Feb 2021 21:43:46 +0100 Subject: [PATCH 059/417] depth_method? --- skfda/ml/classification/_depth_classifiers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index dae284409..5d0ac214b 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -186,6 +186,7 @@ def __init__( depth_method: Depth = ModifiedBandDepth(), depth_methods: List[Depth] = None, ): + self.depth_method = depth_method self.pipeline = make_pipeline( DDGTransformer(depth_method, depth_methods), multivariate_classifier, From efe19d9547fa540bf7380f5a3d758c1da12aa6b6 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Wed, 3 Feb 2021 22:26:32 +0100 Subject: [PATCH 060/417] Attributes --- skfda/ml/classification/_depth_classifiers.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 5d0ac214b..41bc94c99 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -1,5 +1,6 @@ """Depth-based models for supervised classification.""" +from skfda.exploratory.depth import multivariate from typing import List import numpy as np @@ -186,11 +187,9 @@ def __init__( depth_method: Depth = ModifiedBandDepth(), depth_methods: List[Depth] = None, ): + self.multivariate_classifier = multivariate_classifier self.depth_method = depth_method - self.pipeline = make_pipeline( - DDGTransformer(depth_method, depth_methods), - multivariate_classifier, - ) + self.depth_methods = depth_methods def fit(self, X, y): """Fit the model using X as training data and y as target values. @@ -202,6 +201,11 @@ def fit(self, X, y): Returns: self (object) """ + self.pipeline = make_pipeline( + DDGTransformer(self.depth_method, self.depth_methods), + self.multivariate_classifier, + ) + self.pipeline.fit(X, y) return self From a5c540e5d3c9f729c79299e6fa51a02bc629045e Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Wed, 3 Feb 2021 23:42:08 +0100 Subject: [PATCH 061/417] depth_method and some types --- skfda/_utils/_utils.py | 5 ++-- skfda/ml/classification/_depth_classifiers.py | 21 ++++++--------- .../feature_extraction/_ddg_transformer.py | 27 +++++++------------ 3 files changed, 21 insertions(+), 32 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 491bd4ff0..a3ef5626d 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -396,8 +396,9 @@ def _evaluate_grid( # Reshape the result if aligned: - res = res.reshape([n_samples] - + list(shape) + [dim_codomain]) + res = res.reshape( + [n_samples] + list(shape) + [dim_codomain] + ) else: diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 41bc94c99..379c847cf 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -1,7 +1,7 @@ """Depth-based models for supervised classification.""" from skfda.exploratory.depth import multivariate -from typing import List +from typing import Sequence, Union import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin @@ -127,14 +127,11 @@ class DDGClassifier(BaseEstimator, ClassifierMixin): Parameters: depth_method (default :class:`ModifiedBandDepth `): - The depth class to use when calculating the depth of a test - sample in a class. See the documentation of the depths module - for a list of available depths. By default it is ModifiedBandDepth. - depth_methods (optional): - List of depth classes to use when calculating the depth of a test - sample in a class. See the documentation of the depths module - for a list of available depths. By default it is None. - multivariate_classifier (): + The depth class or sequence of depths to use when calculating + the depth of a test sample in a class. See the documentation of + the depths module for a list of available depths. By default it + is ModifiedBandDepth. + multivariate_classifier: The multivariate classifier to use in the DDG-plot. Examples: @@ -184,12 +181,10 @@ class DDGClassifier(BaseEstimator, ClassifierMixin): def __init__( self, multivariate_classifier: ClassifierMixin = None, - depth_method: Depth = ModifiedBandDepth(), - depth_methods: List[Depth] = None, + depth_method: Union[Depth, Sequence[Depth]] = ModifiedBandDepth(), ): self.multivariate_classifier = multivariate_classifier self.depth_method = depth_method - self.depth_methods = depth_methods def fit(self, X, y): """Fit the model using X as training data and y as target values. @@ -202,7 +197,7 @@ def fit(self, X, y): self (object) """ self.pipeline = make_pipeline( - DDGTransformer(self.depth_method, self.depth_methods), + DDGTransformer(self.depth_method), self.multivariate_classifier, ) diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py index 08a8bbc08..4fcf91006 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py @@ -1,6 +1,6 @@ """Feature extraction transformers for dimensionality reduction.""" -from typing import List +from typing import List, Sequence, Union import numpy as np from sklearn.base import BaseEstimator, TransformerMixin @@ -29,14 +29,10 @@ class DDGTransformer(BaseEstimator, TransformerMixin): Parameters: depth_method (default :class:`ModifiedBandDepth `): - The depth class to use when calculating the depth of a test - sample in a class. See the documentation of the depths module - for a list of available depths. By default it is ModifiedBandDepth. - depth_methods (optional): - List of depth classes to use when calculating the depth of a test - sample in a class. See the documentation of the depths module - for a list of available depths. By default it is None. - If a list is provided, the parameter depth_method will be ignored. + The depth class or sequence of depths to use when calculating + the depth of a test sample in a class. See the documentation of + the depths module for a list of available depths. By default it + is ModifiedBandDepth. Examples: Firstly, we will import and split the Berkeley Growth Study dataset @@ -60,7 +56,7 @@ class DDGTransformer(BaseEstimator, TransformerMixin): >>> pipe = make_pipeline(DDGTransformer(), KNeighborsClassifier()) >>> pipe.fit(X_train, y_train) Pipeline(steps=[('ddgtransformer', - DDGTransformer(depth_methods=[ModifiedBandDepth()])), + DDGTransformer(depth_method=[ModifiedBandDepth()])), ('kneighborsclassifier', KNeighborsClassifier())]) We can predict the class of new samples @@ -82,14 +78,11 @@ class DDGTransformer(BaseEstimator, TransformerMixin): def __init__( self, - depth_method: Depth = ModifiedBandDepth(), - depth_methods: List[Depth] = None, + depth_method: Union[Depth, Sequence[Depth]] = ModifiedBandDepth(), ): + if isinstance(depth_method, Depth): + depth_method = [depth_method] self.depth_method = depth_method - if depth_methods is None: - self.depth_methods = [depth_method] - else: - self.depth_methods = depth_methods def fit(self, X, y): """Fit the model using X as training data and y as target values. @@ -102,7 +95,7 @@ def fit(self, X, y): self (object) """ classes_, distributions_ = _classifier_fit_distributions( - X, y, self.depth_methods, + X, y, self.depth_method, ) self.classes_ = classes_ From 0516b89a6fb995d7b2412e647cfaaf6626dc075b Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Thu, 4 Feb 2021 23:11:40 +0100 Subject: [PATCH 062/417] Added some types --- setup.cfg | 4 +- skfda/_utils/_utils.py | 9 ++-- skfda/ml/classification/_depth_classifiers.py | 45 ++++++++++--------- .../feature_extraction/_ddg_transformer.py | 25 ++++++----- 4 files changed, 44 insertions(+), 39 deletions(-) diff --git a/setup.cfg b/setup.cfg index 64780c834..f7131ccb6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -81,7 +81,7 @@ per-file-ignores = WPS235, # Logic is allowec in `__init__.py` WPS412 - + # There are many datasets _real_datasets.py: WPS202 @@ -96,7 +96,7 @@ rst-directives = rst-roles = attr,class,func,meth,mod,obj,ref,term, - + allowed-domain-names = data, obj, result, results, val, value, values, var # Needs to be tuned diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index a3ef5626d..bbc4eba50 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -1,4 +1,4 @@ -"""Module with generic methods""" +"""Module with generic methods.""" from __future__ import annotations @@ -19,7 +19,6 @@ import numpy as np import scipy.integrate from pandas.api.indexers import check_array_indexer - from sklearn.base import clone from ..representation._typing import ( @@ -547,7 +546,9 @@ def _check_array_key(array, key): def _check_estimator(estimator): from sklearn.utils.estimator_checks import ( - check_get_params_invariance, check_set_params) + check_get_params_invariance, + check_set_params, + ) name = estimator.__name__ instance = estimator() @@ -556,8 +557,8 @@ def _check_estimator(estimator): def _classifier_get_classes(y): - from sklearn.utils.multiclass import check_classification_targets from sklearn.preprocessing import LabelEncoder + from sklearn.utils.multiclass import check_classification_targets check_classification_targets(y) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 379c847cf..ea14eea2c 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -1,17 +1,20 @@ """Depth-based models for supervised classification.""" - -from skfda.exploratory.depth import multivariate from typing import Sequence, Union import numpy as np +from numpy import ndarray from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.pipeline import make_pipeline from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted +from skfda.representation.grid import FDataGrid + from ..._utils import _classifier_fit_distributions from ...exploratory.depth import Depth, ModifiedBandDepth from ...preprocessing.dim_reduction.feature_extraction import DDGTransformer +default_depth = ModifiedBandDepth() + class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): """Maximum depth classifier for functional data. @@ -19,8 +22,7 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): Test samples are classified to the class where they are deeper. Parameters: - depth_method (Depth, default - :class:`ModifiedBandDepth `): + depth_method: The depth class to use when calculating the depth of a test sample in a class. See the documentation of the depths module for a list of available depths. By default it is ModifiedBandDepth. @@ -61,15 +63,15 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): related classifiers. Scandinavian Journal of Statistics, 32, 327–350. """ - def __init__(self, depth_method: Depth = ModifiedBandDepth()): + def __init__(self, depth_method: Depth = default_depth) -> None: self.depth_method = depth_method - def fit(self, X, y): + def fit(self, X: FDataGrid, y: ndarray): """Fit the model using X as training data and y as target values. Args: - X (:class:`FDataGrid`): FDataGrid with the training data. - y (array-like): Target values of shape = (n_samples). + X: FDataGrid with the training data. + y: Target values of shape = (n_samples). Returns: self (object) @@ -83,15 +85,15 @@ def fit(self, X, y): return self - def predict(self, X): + def predict(self, X: FDataGrid) -> ndarray: """Predict the class labels for the provided data. Args: - X (:class:`FDataGrid`): FDataGrid with the test samples. + X: FDataGrid with the test samples. Returns: - y (np.array): array of shape (n_samples) with class labels - for each data sample. + ndarray: array of shape (n_samples) with class labels + for each data sample. """ sklearn_check_is_fitted(self) @@ -125,8 +127,7 @@ class DDGClassifier(BaseEstimator, ClassifierMixin): multivariate classifer. Parameters: - depth_method (default - :class:`ModifiedBandDepth `): + depth_method: The depth class or sequence of depths to use when calculating the depth of a test sample in a class. See the documentation of the depths module for a list of available depths. By default it @@ -181,17 +182,17 @@ class DDGClassifier(BaseEstimator, ClassifierMixin): def __init__( self, multivariate_classifier: ClassifierMixin = None, - depth_method: Union[Depth, Sequence[Depth]] = ModifiedBandDepth(), - ): + depth_method: Union[Depth, Sequence[Depth]] = default_depth, + ) -> None: self.multivariate_classifier = multivariate_classifier self.depth_method = depth_method - def fit(self, X, y): + def fit(self, X: FDataGrid, y: ndarray): """Fit the model using X as training data and y as target values. Args: - X (:class:`FDataGrid`): FDataGrid with the training data. - y (array-like): Target values of shape = (n_samples). + X: FDataGrid with the training data. + y: Target values of shape = (n_samples). Returns: self (object) @@ -205,14 +206,14 @@ def fit(self, X, y): return self - def predict(self, X): + def predict(self, X: FDataGrid) -> ndarray: """Predict the class labels for the provided data. Args: - X (:class:`FDataGrid`): FDataGrid with the test samples. + X: FDataGrid with the test samples. Returns: - y (np.array): array of shape (n_samples) with class labels + ndarray: array of shape (n_samples) with class labels for each data sample. """ return self.pipeline.predict(X) diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py index 4fcf91006..8fcc25853 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py @@ -1,13 +1,17 @@ """Feature extraction transformers for dimensionality reduction.""" -from typing import List, Sequence, Union +from typing import Sequence, Union import numpy as np +from numpy import ndarray from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted from ...._utils import _classifier_fit_distributions from ....exploratory.depth import Depth, ModifiedBandDepth +from ....representation.grid import FDataGrid + +default_depth = ModifiedBandDepth() class DDGTransformer(BaseEstimator, TransformerMixin): @@ -27,8 +31,7 @@ class DDGTransformer(BaseEstimator, TransformerMixin): :math:`\mathcal{X} = \mathcal{X}_1 \times ... \times \mathcal{X}_p`. Parameters: - depth_method (default - :class:`ModifiedBandDepth `): + depth_method: The depth class or sequence of depths to use when calculating the depth of a test sample in a class. See the documentation of the depths module for a list of available depths. By default it @@ -78,18 +81,18 @@ class DDGTransformer(BaseEstimator, TransformerMixin): def __init__( self, - depth_method: Union[Depth, Sequence[Depth]] = ModifiedBandDepth(), - ): + depth_method: Union[Depth, Sequence[Depth]] = default_depth, + ) -> None: if isinstance(depth_method, Depth): depth_method = [depth_method] self.depth_method = depth_method - def fit(self, X, y): + def fit(self, X: FDataGrid, y: ndarray): """Fit the model using X as training data and y as target values. Args: - X (:class:`FDataGrid`): FDataGrid with the training data. - y (array-like): Target values of shape = (n_samples). + X: FDataGrid with the training data. + y: Target values of shape = (n_samples). Returns: self (object) @@ -103,14 +106,14 @@ def fit(self, X, y): return self - def transform(self, X): + def transform(self, X: FDataGrid) -> ndarray: """Transform the provided data using the defined map. Args: - X (:class:`FDataGrid`): FDataGrid with the test samples. + X: FDataGrid with the test samples. Returns: - X_new (array-like): array of shape (n_samples, G). + ndarray: array of shape (n_samples, G). """ sklearn_check_is_fitted(self) From 01e9b72c981f895ee0fe59e2e67d8d3e24260e91 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Fri, 5 Feb 2021 19:08:15 +0100 Subject: [PATCH 063/417] _utils reviewed --- setup.cfg | 2 +- skfda/_utils/_utils.py | 186 ++++++++++-------- skfda/ml/classification/_depth_classifiers.py | 7 +- skfda/preprocessing/dim_reduction/__init__.py | 5 +- .../feature_extraction/__init__.py | 1 + .../feature_extraction/_ddg_transformer.py | 3 +- 6 files changed, 107 insertions(+), 97 deletions(-) diff --git a/setup.cfg b/setup.cfg index f7131ccb6..3baa6db1d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -103,7 +103,7 @@ allowed-domain-names = data, obj, result, results, val, value, values, var max-imports = 20 max-arguments = 10 max-attributes = 10 -max-line-complexity = 25 +max-line-complexity = 30 max-local-variables = 15 max-methods = 30 max-module-expressions = 15 diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index bbc4eba50..49b322da5 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -18,6 +18,7 @@ import numpy as np import scipy.integrate +from numpy import ndarray from pandas.api.indexers import check_array_indexer from sklearn.base import clone @@ -32,8 +33,10 @@ RandomStateLike = Optional[Union[int, np.random.RandomState]] if TYPE_CHECKING: + from ..exploratory.depth import Depth from ..representation import FData from ..representation.basis import Basis + from ..representation.grid import FDataGrid class _FDataCallable(): @@ -60,13 +63,15 @@ def new_function(*args, **kwargs): tmp = np.empty(self.n_samples) new_nsamples = len(tmp[key]) - return _FDataCallable(new_function, - domain_range=self.domain_range, - n_samples=new_nsamples) + return _FDataCallable( + new_function, + domain_range=self.domain_range, + n_samples=new_nsamples, + ) def check_is_univariate(fd): - """Checks if an FData is univariate and raises an error + """Check if an FData is univariate and raises an error. Args: fd (:class:`~skfda.FData`): Functional object to check if is @@ -78,18 +83,17 @@ def check_is_univariate(fd): """ if fd.dim_domain != 1 or fd.dim_codomain != 1: - raise ValueError(f"The functional data must be univariate, i.e., " + - f"with dim_domain=1 " - + (f"" if fd.dim_domain == 1 - else f"(currently is {fd.dim_domain}) ") - + f"and dim_codomain=1 " - + (f"" if fd.dim_codomain == 1 else - f"(currently is {fd.dim_codomain})")) + raise ValueError( + f'The functional data must be univariate, i.e., ' + f'with dim_domain=1 ' + f'{"" if fd.dim_domain == 1 else f"(now {fd.dim_domain}) "}' + f'and dim_codomain=1 ' + f'{"" if fd.dim_codomain == 1 else f"(now {fd.dim_codomain})"}', + ) def _to_grid(X, y, eval_points=None): """Transform a pair of FDatas in grids to perform calculations.""" - from .. import FDataGrid x_is_grid = isinstance(X, FDataGrid) y_is_grid = isinstance(y, FDataGrid) @@ -178,31 +182,32 @@ def convert_row(row): if all(s == shapes[0] for s in shapes): return np.array(array_list) - else: - res = np.empty(len(array_list), dtype=np.object_) - for i, a in enumerate(array_list): - res[i] = a + res = np.empty(len(array_list), dtype=np.object_) - return res + for i, a in enumerate(array_list): + res[i] = a + + return res def _cartesian_product(axes, flatten=True, return_shape=False): - """Computes the cartesian product of the axes. + """Compute the cartesian product of the axes. Computes the cartesian product of the axes and returns a numpy array of 1 dimension with all the possible combinations, for an arbitrary number of dimensions. Args: - Axes (array_like): List with axes. + axes (array_like): List with axes. + flatten: Boolean flag. True if the product is flattened. + return_shape: Boolean flag. True if the shape is returned. - Return: + Returns: (np.ndarray): Numpy 2-D array with all the possible combinations. The entry (i,j) represent the j-th coordinate of the i-th point. Examples: - >>> from skfda._utils import _cartesian_product >>> axes = [[0,1],[2,3]] >>> _cartesian_product(axes) @@ -222,7 +227,6 @@ def _cartesian_product(axes, flatten=True, return_shape=False): >>> _cartesian_product(axes) array([[0], [1]]) - """ cartesian = np.stack(np.meshgrid(*axes, indexing='ij'), -1) @@ -233,8 +237,8 @@ def _cartesian_product(axes, flatten=True, return_shape=False): if return_shape: return cartesian, shape - else: - return cartesian + + return cartesian def _same_domain(fd: Union[Basis, FData], fd2: Union[Basis, FData]) -> bool: @@ -249,8 +253,7 @@ def _reshape_eval_points( n_samples: int, dim_domain: int, ) -> np.ndarray: - """Convert and reshape the eval_points to ndarray with the - corresponding shape. + """Convert and reshape the eval_points to ndarray. Args: eval_points: Evaluation points to be reshaped. @@ -267,31 +270,36 @@ def _reshape_eval_points( x `dim_domain`. """ - if aligned: eval_points = np.asarray(eval_points) else: eval_points = _to_array_maybe_ragged( - eval_points, row_shape=(-1, dim_domain)) + eval_points, row_shape=(-1, dim_domain), + ) # Case evaluation of a single value, i.e., f(0) # Only allowed for aligned evaluation - if aligned and (eval_points.shape == (dim_domain,) - or (eval_points.ndim == 0 and dim_domain == 1)): + if aligned and ( + eval_points.shape == (dim_domain,) + or (eval_points.ndim == 0 and dim_domain == 1) + ): eval_points = np.array([eval_points]) if aligned: # Samples evaluated at same eval points - eval_points = eval_points.reshape((eval_points.shape[0], - dim_domain)) + eval_points = eval_points.reshape(( + eval_points.shape[0], + dim_domain, + )) else: # Different eval_points for each sample if eval_points.shape[0] != n_samples: - - raise ValueError(f"eval_points should be a list " - f"of length {n_samples} with the " - f"evaluation points for each sample.") + raise ValueError( + f'eval_points should be a list ' + f'of length {n_samples} with the ' + f'evaluation points for each sample.', + ) return eval_points @@ -306,8 +314,9 @@ def _one_grid_to_points(axes, *, dim_domain): axes = _to_grid_points(axes) if len(axes) != dim_domain: - raise ValueError(f"Length of axes should be " - f"{dim_domain}") + raise ValueError( + f'Length of axes should be {dim_domain}', + ) cartesian, shape = _cartesian_product(axes, return_shape=True) @@ -358,6 +367,10 @@ def _evaluate_grid( object. aligned: If False evaluates each sample in a different grid. + evaluate_method: method to use to evaluate the points + n_samples: number of samples + dim_domain: dimension of the domain + dim_codomain: dimensions of the codomain Returns: Numpy array with dim_domain + 1 dimensions with @@ -368,7 +381,6 @@ def _evaluate_grid( dimension. """ - # Compute intersection points and resulting shapes if aligned: @@ -379,31 +391,36 @@ def _evaluate_grid( axes = list(axes) if len(axes) != n_samples: - raise ValueError("Should be provided a list of axis per " - "sample") + raise ValueError( + "A list of axis per sample should be provided", + ) eval_points, shape = zip( - *[_one_grid_to_points(a, dim_domain=dim_domain) for a in axes]) + *[_one_grid_to_points(a, dim_domain=dim_domain) for a in axes], + ) eval_points = _to_array_maybe_ragged(eval_points) # Evaluate the points - res = evaluate_method(eval_points, - extrapolation=extrapolation, - aligned=aligned) + res = evaluate_method( + eval_points, + extrapolation=extrapolation, + aligned=aligned, + ) # Reshape the result if aligned: res = res.reshape( - [n_samples] + list(shape) + [dim_codomain] + [n_samples] + list(shape) + [dim_codomain], ) else: res = _to_array_maybe_ragged([ r.reshape(list(s) + [dim_codomain]) - for r, s in zip(res, shape)]) + for r, s in zip(res, shape) + ]) return res @@ -475,18 +492,15 @@ def _pairwise_symmetric( **kwargs: Any, ) -> np.ndarray: """Compute pairwise a commutative function.""" + dim1 = len(arg1) if arg2 is None or arg2 is arg1: + indices = np.triu_indices(dim1) - indices = np.triu_indices(len(arg1)) - - matrix = np.empty((len(arg1), len(arg1))) + matrix = np.empty((dim1, dim1)) triang_vec = _map_in_batches( function, - ( - arg1, - arg1, - ), + (arg1, arg1), indices, memory_per_batch=memory_per_batch, **kwargs, @@ -500,39 +514,27 @@ def _pairwise_symmetric( return matrix - else: + dim2 = len(arg2) + indices = np.indices((dim1, dim2)) - indices = np.indices((len(arg1), len(arg2))) - - vec = _map_in_batches( - function, - ( - arg1, - arg2, - ), - ( - indices[0].ravel(), - indices[1].ravel(), - ), - memory_per_batch=memory_per_batch, - **kwargs, - ) + vec = _map_in_batches( + function, + (arg1, arg2), + (indices[0].ravel(), indices[1].ravel()), + memory_per_batch=memory_per_batch, + **kwargs, + ) - return vec.reshape((len(arg1), len(arg2))) + return vec.reshape((dim1, dim2)) def _int_to_real(array: np.ndarray) -> np.ndarray: - """ - Convert integer arrays to floating point. - """ + """Convert integer arrays to floating point.""" return array + 0.0 def _check_array_key(array, key): - """ - Checks a getitem key. - """ - + """Check a getitem key.""" key = check_array_indexer(array, key) if isinstance(key, numbers.Integral): # To accept also numpy ints @@ -540,8 +542,8 @@ def _check_array_key(array, key): key = range(len(array))[key] return slice(key, key + 1) - else: - return key + + return key def _check_estimator(estimator): @@ -556,7 +558,7 @@ def _check_estimator(estimator): check_set_params(name, instance) -def _classifier_get_classes(y): +def _classifier_get_classes(y: ndarray) -> Tuple[ndarray, ndarray]: from sklearn.preprocessing import LabelEncoder from sklearn.utils.multiclass import check_classification_targets @@ -568,21 +570,31 @@ def _classifier_get_classes(y): classes = le.classes_ if classes.size < 2: - raise ValueError(f'The number of classes has to be greater than' - f' one; got {classes.size} class') + raise ValueError( + f'The number of classes has to be greater than' + f'one; got {classes.size} class', + ) return classes, y_ind -def _classifier_get_distributions(classes, X, y_ind, depth_methods): - distributions = [ +def _classifier_get_distributions( + classes: ndarray, + X: FDataGrid, + y_ind: ndarray, + depth_methods: Sequence[Depth], +) -> Sequence[Depth]: + return [ clone(depth_method).fit(X[y_ind == cur_class]) for cur_class in range(classes.size) for depth_method in depth_methods ] - return distributions -def _classifier_fit_distributions(X, y, depth_methods): +def _classifier_fit_distributions( + X: FDataGrid, + y: ndarray, + depth_methods: Sequence[Depth], +) -> Tuple[ndarray, Sequence[Depth]]: classes_, y_ind = _classifier_get_classes(y) distributions_ = _classifier_get_distributions( diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index ea14eea2c..8d85d5437 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -7,11 +7,10 @@ from sklearn.pipeline import make_pipeline from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted -from skfda.representation.grid import FDataGrid - from ..._utils import _classifier_fit_distributions from ...exploratory.depth import Depth, ModifiedBandDepth from ...preprocessing.dim_reduction.feature_extraction import DDGTransformer +from ...representation.grid import FDataGrid default_depth = ModifiedBandDepth() @@ -66,7 +65,7 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): def __init__(self, depth_method: Depth = default_depth) -> None: self.depth_method = depth_method - def fit(self, X: FDataGrid, y: ndarray): + def fit(self, X: FDataGrid, y: ndarray) -> 'MaximumDepthClassifier': """Fit the model using X as training data and y as target values. Args: @@ -187,7 +186,7 @@ def __init__( self.multivariate_classifier = multivariate_classifier self.depth_method = depth_method - def fit(self, X: FDataGrid, y: ndarray): + def fit(self, X: FDataGrid, y: ndarray) -> 'DDGClassifier': """Fit the model using X as training data and y as target values. Args: diff --git a/skfda/preprocessing/dim_reduction/__init__.py b/skfda/preprocessing/dim_reduction/__init__.py index 1473aa4b5..3dec4569f 100644 --- a/skfda/preprocessing/dim_reduction/__init__.py +++ b/skfda/preprocessing/dim_reduction/__init__.py @@ -1,3 +1,2 @@ -from . import feature_extraction -from . import projection -from . import variable_selection +"""Dim reduction.""" +from . import feature_extraction, projection, variable_selection diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py b/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py index 7c0c539f3..a288b52a6 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py @@ -1 +1,2 @@ +"""Feature extraction.""" from ._ddg_transformer import DDGTransformer diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py index 8fcc25853..aea4e854a 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py @@ -1,5 +1,4 @@ """Feature extraction transformers for dimensionality reduction.""" - from typing import Sequence, Union import numpy as np @@ -87,7 +86,7 @@ def __init__( depth_method = [depth_method] self.depth_method = depth_method - def fit(self, X: FDataGrid, y: ndarray): + def fit(self, X: FDataGrid, y: ndarray) -> 'DDGTransformer': """Fit the model using X as training data and y as target values. Args: From c06a62e33088a57a5ee050f0b1d02c1c66619bd2 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 6 Feb 2021 16:52:42 +0100 Subject: [PATCH 064/417] plotgr --- .../visualization/representation.py | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 74c04ca61..30b1ed43f 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -325,3 +325,149 @@ def plot_scatter(fdata, chart=None, *, grid_points=None, _set_labels(fdata, fig, axes, patches) return fig + + +def plot_color_gradient(fdata, chart=None, *, fig=None, axes=None, + n_rows=None, n_cols=None, n_points=None, + domain_range=None, gradient_color_list, + max_grad = None, min_grad = None, + colormap_name = 'autumn', + **kwargs): + """Plot the FDatGrid object graph as hypersurfaces, representing each + instance depending on a color defined by the gradient_color_list. + + Plots each coordinate separately. If the domain is one dimensional, the + plots will be curves, and if it is two dimensional, they will be surfaces. + + Args: + fdata: functional data to be represented. + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (list of axis objects, optional): axis over where the graphs are + plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + n_points (int or tuple, optional): Number of points to evaluate in + the plot. In case of surfaces a tuple of length 2 can be pased + with the number of points to plot in each axis, otherwise the + same number of points will be used in the two axes. By default + in unidimensional plots will be used 501 points; in surfaces + will be used 30 points per axis, wich makes a grid with 900 + points. + domain_range (tuple or list of tuples, optional): Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + gradient_color_list: list of real values used to determine the color + in which each of the instances will be plotted. The size + max_grad: maximum value that the gradient_list can take, it will be + used to normalize the gradient_color_list in order to get values that + can be used in the funcion colormap.__call__(). If not declared + it will be initialized to the maximum value of gradient_list + min_grad: minimum value that the gradient_list can take, it will be + used to normalize the gradient_color_list in order to get values that + can be used in the funcion colormap.__call__(). If not declared + it will be initialized to the minimum value of gradient_list + colormap_name: name of the colormap to be used. By default we will + use autumn. + **kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. + + Returns: + fig (figure object): figure object in which the graphs are plotted. + + """ + + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata(fdata, fig, axes, n_rows, n_cols) + + if domain_range is None: + domain_range = fdata.domain_range + else: + domain_range = _to_domain_range(domain_range) + + if len(gradient_color_list) != fdata.n_samples: + raise ValueError("The length of the gradient color" + "list should be the same as the number" + "of samples in fdata") + + colormap = matplotlib.cm.get_cmap(colormap_name) + colormap = colormap.reversed() + if min_grad is None: + min_grad = min(gradient_color_list) + + if max_grad is None: + max_grad = max(gradient_color_list) + + gradient_list = (gradient_color_list-min_grad)/(max_grad-min_grad) + + sample_colors = [None] * fdata.n_samples + for i in range(fdata.n_samples): + sample_colors[i] = colormap.__call__(gradient_list[i]) + + + if fdata.dim_domain == 1: + if n_points is None: + n_points = constants.N_POINTS_UNIDIMENSIONAL_PLOT_MESH + + # Evaluates the object in a linspace + eval_points = np.linspace(*domain_range[0], n_points) + mat = fdata(eval_points) + + color_dict = {} + + for i in range(fdata.dim_codomain): + for j in range(fdata.n_samples): + + if sample_colors is not None: + color_dict["color"] = sample_colors[j] + + axes[i].plot(eval_points, mat[j, ..., i].T, + **color_dict, **kwargs) + + else: + # Selects the number of points + if n_points is None: + n_points = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) + elif np.isscalar(n_points): + n_points = (n_points, n_points) + elif len(n_points) != 2: + raise ValueError(f"n_points should be a number or a tuple of " + f"length 2, and has length {len(n_points)}") + + # Axes where will be evaluated + x = np.linspace(*domain_range[0], n_points[0]) + y = np.linspace(*domain_range[1], n_points[1]) + + # Evaluation of the functional object + Z = fdata((x, y), grid=True) + + X, Y = np.meshgrid(x, y, indexing='ij') + + color_dict = {} + + for i in range(fdata.dim_codomain): + for j in range(fdata.n_samples): + + if sample_colors is not None: + color_dict["color"] = sample_colors[j] + + axes[i].plot_surface(X, Y, Z[j, ..., i], + **color_dict, **kwargs) + + _set_labels(fdata, fig, axes) + + return fig \ No newline at end of file From 71e675d01e97586082b897ea1951fd2ebab8b2bc Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 7 Feb 2021 11:09:00 +0100 Subject: [PATCH 065/417] Design matrix. --- .../ml/regression/_historical_linear_model.py | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 skfda/ml/regression/_historical_linear_model.py diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py new file mode 100644 index 000000000..e1f6d9666 --- /dev/null +++ b/skfda/ml/regression/_historical_linear_model.py @@ -0,0 +1,97 @@ +from typing import Tuple + +import numpy as np +import scipy.integrate + +from ..._utils import _pairwise_symmetric +from ...representation import FDataBasis, FDataGrid +from ...representation.basis._finite_element import FiniteElement + + +def _fem_inner_product_matrix( + fem_basis: FiniteElement, + fd: FDataGrid, + limits: Tuple[float, float], + y_val: float, +) -> np.ndarray: + """ + Computes the matrix of inner products of an FEM basis with a functional + data object over a range of x-values for a fixed y-value. The numerical + integration uses Romberg integration with the trapezoidal rule. + + Arguments: + fem_basis: an FEM basis defined by a triangulation within a + rectangular domain. It is assumed that only the part of the mesh + that is within the upper left triangular is of interest. + fd: a regular functional data object. + limits: limits of integration, as a tuple of form + (lower limit, upper limit) + y_val: the fixed y value. + + """ + + fem_basis_fd = fem_basis.to_basis() + grid = fd.grid_points[0] + grid_index = (grid >= limits[0]) & (grid <= limits[1]) + grid = grid[grid_index] + + def _pairwise_fem_inner_product( + fem_basis_fd: FDataBasis, + fd: FDataGrid, + ) -> np.ndarray: + + eval_grid_fem = np.concatenate( + ( + grid[:, None], + np.full( + shape=(len(grid), 1), + fill_value=y_val, + ) + ), + axis=1, + ) + + eval_fem = fem_basis_fd(eval_grid_fem) + eval_fd = fd(grid) + + # Only for scalar valued functions for now + assert eval_fem.shape[-1] == 1 + assert eval_fd.shape[-1] == 1 + + prod = eval_fem[..., 0] * eval_fd[..., 0] + + return scipy.integrate.simps(prod, grid, axis=1) + + return _pairwise_symmetric( + _pairwise_fem_inner_product, + fem_basis_fd, + fd, + ) + + +def _design_matrix( + fem_basis: FiniteElement, + fd: FDataGrid, + pred_points: np.ndarray, +) -> np.ndarray: + """ + Computes the indefinite integrals of the curves over s up to each t-value. + + Arguments: + fem_basis: an FEM basis defined by a triangulation within a + rectangular domain. It is assumed that only the part of the mesh + that is within the upper left triangular is of interest. + fd: a regular functional data object. + pred_points: points where ``fd`` is evaluated. + + Returns: + Design matrix. + + """ + + matrix = np.array([ + _fem_inner_product_matrix(fem_basis, fd, limits=(0, t), y_val=t).T + for t in pred_points + ]) + + return np.swapaxes(matrix, 0, 1) From d5eeaade47bae30208dffdf7f75773bffd3f8f7f Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 7 Feb 2021 13:38:41 +0100 Subject: [PATCH 066/417] comienzo --- skfda/exploratory/visualization/representation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 30b1ed43f..9b8e88ff0 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -78,6 +78,9 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): return sample_colors, patches +class GraphPlot: + + def plot_graph(fdata, chart=None, *, fig=None, axes=None, n_rows=None, n_cols=None, n_points=None, From fa6be239a47c6f9fbfddb4fa3552f2432b87995a Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Sun, 7 Feb 2021 19:21:37 +0100 Subject: [PATCH 067/417] DDClassifier with polynomial --- docs/modules/ml/classification.rst | 3 +- setup.cfg | 5 +- skfda/_utils/_utils.py | 11 +- skfda/ml/classification/__init__.py | 6 +- .../classification/_centroid_classifiers.py | 82 +++++---- skfda/ml/classification/_depth_classifiers.py | 168 ++++++++++++++++-- .../feature_extraction/_ddg_transformer.py | 20 ++- 7 files changed, 231 insertions(+), 64 deletions(-) diff --git a/docs/modules/ml/classification.rst b/docs/modules/ml/classification.rst index 1fd1e63bd..d87071a81 100644 --- a/docs/modules/ml/classification.rst +++ b/docs/modules/ml/classification.rst @@ -23,5 +23,6 @@ it is explained the basic usage of these estimators. skfda.ml.classification.RadiusNeighborsClassifier skfda.ml.classification.NearestCentroid skfda.ml.classification.DTMClassifier - skfda.ml.classification.MaximumDepthClassifier + skfda.ml.classification.DDClassifier skfda.ml.classification.DDGClassifier + skfda.ml.classification.MaximumDepthClassifier diff --git a/setup.cfg b/setup.cfg index 3baa6db1d..2ce2039f9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -100,16 +100,17 @@ rst-roles = allowed-domain-names = data, obj, result, results, val, value, values, var # Needs to be tuned -max-imports = 20 max-arguments = 10 max-attributes = 10 +max-cognitive-score = 30 +max-expressions = 15 +max-imports = 20 max-line-complexity = 30 max-local-variables = 15 max-methods = 30 max-module-expressions = 15 max-module-members = 15 max-string-usages = 10 -max-cognitive-score = 30 ignore-decorators = (property)|(overload) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 49b322da5..ea43cd3e1 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -12,6 +12,7 @@ Optional, Sequence, Tuple, + TypeVar, Union, cast, ) @@ -38,6 +39,8 @@ from ..representation.basis import Basis from ..representation.grid import FDataGrid +T = TypeVar("T", contravariant=True) + class _FDataCallable(): @@ -581,8 +584,8 @@ def _classifier_get_distributions( classes: ndarray, X: FDataGrid, y_ind: ndarray, - depth_methods: Sequence[Depth], -) -> Sequence[Depth]: + depth_methods: Sequence[Depth[T]], +) -> Sequence[Depth[T]]: return [ clone(depth_method).fit(X[y_ind == cur_class]) for cur_class in range(classes.size) @@ -593,8 +596,8 @@ def _classifier_get_distributions( def _classifier_fit_distributions( X: FDataGrid, y: ndarray, - depth_methods: Sequence[Depth], -) -> Tuple[ndarray, Sequence[Depth]]: + depth_methods: Sequence[Depth[T]], +) -> Tuple[ndarray, Sequence[Depth[T]]]: classes_, y_ind = _classifier_get_classes(y) distributions_ = _classifier_get_distributions( diff --git a/skfda/ml/classification/__init__.py b/skfda/ml/classification/__init__.py index 5498de463..2e5689d8b 100644 --- a/skfda/ml/classification/__init__.py +++ b/skfda/ml/classification/__init__.py @@ -1,6 +1,10 @@ """Classification.""" from ._centroid_classifiers import DTMClassifier, NearestCentroid -from ._depth_classifiers import DDGClassifier, MaximumDepthClassifier +from ._depth_classifiers import ( + DDClassifier, + DDGClassifier, + MaximumDepthClassifier, +) from ._neighbors_classifiers import ( KNeighborsClassifier, RadiusNeighborsClassifier, diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index 2ab425042..3a1cd3d77 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -1,14 +1,20 @@ """Centroid-based models for supervised classification.""" +from __future__ import annotations -from typing import Callable +from typing import Callable, Optional, TypeVar, Union +from numpy import ndarray from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted +from skfda.representation.grid import FDataGrid + from ..._utils import _classifier_get_classes from ...exploratory.depth import Depth, ModifiedBandDepth from ...exploratory.stats import mean, trim_mean -from ...misc.metrics import PairwiseMetric, l2_distance +from ...misc.metrics import LpDistance, PairwiseMetric, l2_distance + +T = TypeVar("T", contravariant=True) class NearestCentroid(BaseEstimator, ClassifierMixin): @@ -18,13 +24,11 @@ class NearestCentroid(BaseEstimator, ClassifierMixin): the class with the nearest centroid. Parameters: - metric: callable, (default - :func:`l2_distance `) + metric: The metric to use when calculating distance between test samples and centroids. See the documentation of the metrics module - for a list of available metrics. Defaults used L2 distance. - centroid: callable, (default - :func:`mean `) + for a list of available metrics. L2 distance is used by default. + centroid: The centroids for the samples corresponding to each class is the point from which the sum of the distances (according to the metric) of all samples that belong to that particular class are minimized. @@ -59,22 +63,25 @@ class and return a :class:`FData` object with only one sample :class:`~skfda.ml.classification.DTMClassifier` """ - def __init__(self, metric=l2_distance, centroid=mean): + def __init__( + self, + metric: Union[str, LpDistance] = l2_distance, + centroid: Callable = mean, + ): self.metric = metric self.centroid = centroid - def fit(self, X, y): + def fit(self, X: Union[FDataGrid, ndarray], y: ndarray) -> NearestCentroid: """Fit the model using X as training data and y as target values. Args: - X (:class:`FDataGrid`, array_matrix): Training data. FDataGrid - with the training data or array matrix with shape + X: FDataGrid with the training data or array matrix with shape (n_samples, n_samples) if metric='precomputed'. - y (array-like or sparse matrix): Target values of + y: Target values of shape = (n_samples) or (n_samples, n_outputs). Returns: - self (object) + self """ classes_, y_ind = _classifier_get_classes(y) @@ -87,15 +94,15 @@ def fit(self, X, y): return self - def predict(self, X): + def predict(self, X: FDataGrid) -> ndarray: """Predict the class labels for the provided data. Args: - X (:class:`FDataGrid`): FDataGrid with the test samples. + X: FDataGrid with the test samples. Returns: - y (np.array): array of shape (n_samples) or - (n_samples, n_outputs) with class labels for each data sample. + ndarray: array of shape (n_samples) or + (n_samples, n_outputs) with class labels for each data sample. """ sklearn_check_is_fitted(self) @@ -113,19 +120,18 @@ class DTMClassifier(BaseEstimator, ClassifierMixin): the observation to the trimmed mean of the group. Parameters: - proportiontocut (float): indicates the percentage of functions to - remove. It is not easy to determine as it varies from dataset to + proportiontocut: + Indicates the percentage of functions to remove. + It is not easy to determine as it varies from dataset to dataset. - depth_method (Depth, default - :class:`ModifiedBandDepth `): + depth_method: The depth class used to order the data. See the documentation of the depths module for a list of available depths. By default it is ModifiedBandDepth. - metric (Callable, default - :func:`l2_distance `): + metric: Distance function between two functional objects. See the documentation of the metrics module for a list of available - metrics. + metrics. L2 distance is used by default. Examples: Firstly, we will import and split the Berkeley Growth Study dataset @@ -167,28 +173,26 @@ class DTMClassifier(BaseEstimator, ClassifierMixin): def __init__( self, proportiontocut: float, - depth_method: Depth = None, + depth_method: Optional[Depth[T]] = None, metric: Callable = l2_distance, ) -> None: self.proportiontocut = proportiontocut - - if depth_method is None: - self.depth_method = ModifiedBandDepth() - else: - self.depth_method = depth_method - + self.depth_method = depth_method self.metric = metric - def fit(self, X, y): + def fit(self, X: FDataGrid, y: ndarray) -> DTMClassifier: """Fit the model using X as training data and y as target values. Args: - X (:class:`FDataGrid`): FDataGrid with the training data. - y (array-like): Target values of shape = (n_samples). + X: FDataGrid with the training data. + y: Target values of shape = (n_samples). Returns: - self (object) + self """ + if self.depth_method is None: + self.depth_method = ModifiedBandDepth() + self._clf = NearestCentroid( metric=self.metric, centroid=lambda fdatagrid: trim_mean( @@ -201,14 +205,14 @@ def fit(self, X, y): return self - def predict(self, X): + def predict(self, X: FDataGrid) -> ndarray: """Predict the class labels for the provided data. Args: - X (:class:`FDataGrid`): FDataGrid with the test samples. + X: FDataGrid with the test samples. Returns: - y (np.array): array of shape (n_samples) or - (n_samples, n_outputs) with class labels for each data sample. + ndarray: array of shape (n_samples) or + (n_samples, n_outputs) with class labels for each data sample. """ return self._clf.predict(X) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 8d85d5437..a048432d0 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -1,9 +1,14 @@ """Depth-based models for supervised classification.""" -from typing import Sequence, Union +from __future__ import annotations + +from itertools import combinations +from typing import Optional, Sequence, TypeVar, Union import numpy as np +import numpy.polynomial.polynomial as poly from numpy import ndarray from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.metrics import accuracy_score from sklearn.pipeline import make_pipeline from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted @@ -12,7 +17,7 @@ from ...preprocessing.dim_reduction.feature_extraction import DDGTransformer from ...representation.grid import FDataGrid -default_depth = ModifiedBandDepth() +T = TypeVar("T", contravariant=True) class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): @@ -55,6 +60,7 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): 0.875 See also: + :class:`~skfda.ml.classification.DDClassifier` :class:`~skfda.ml.classification.DDGClassifier` References: @@ -62,10 +68,10 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): related classifiers. Scandinavian Journal of Statistics, 32, 327–350. """ - def __init__(self, depth_method: Depth = default_depth) -> None: + def __init__(self, depth_method: Optional[Depth[T]] = None) -> None: self.depth_method = depth_method - def fit(self, X: FDataGrid, y: ndarray) -> 'MaximumDepthClassifier': + def fit(self, X: FDataGrid, y: ndarray) -> MaximumDepthClassifier: """Fit the model using X as training data and y as target values. Args: @@ -73,8 +79,11 @@ def fit(self, X: FDataGrid, y: ndarray) -> 'MaximumDepthClassifier': y: Target values of shape = (n_samples). Returns: - self (object) + self """ + if self.depth_method is None: + self.depth_method = ModifiedBandDepth() + classes_, distributions_ = _classifier_fit_distributions( X, y, [self.depth_method], ) @@ -92,7 +101,7 @@ def predict(self, X: FDataGrid) -> ndarray: Returns: ndarray: array of shape (n_samples) with class labels - for each data sample. + for each data sample. """ sklearn_check_is_fitted(self) @@ -104,6 +113,144 @@ def predict(self, X: FDataGrid) -> ndarray: return self.classes_[np.argmax(depths, axis=0)] +class DDClassifier(BaseEstimator, ClassifierMixin): + """Depth-versus-depth (DD) classifer for functional data. + + Parameters: + degree: degree of the polynomial used to classify in the DD-plot + depth_method: + The depth class to use when calculating the depth of a test + sample in a class. See the documentation of the depths module + for a list of available depths. By default it is ModifiedBandDepth. + + Examples: + Firstly, we will import and split the Berkeley Growth Study dataset + + >>> from skfda.datasets import fetch_growth + >>> from sklearn.model_selection import train_test_split + >>> dataset = fetch_growth() + >>> fd = dataset['data'] + >>> y = dataset['target'] + >>> X_train, X_test, y_train, y_test = train_test_split( + ... fd, y, test_size=0.25, stratify=y, random_state=0) + + We will fit a DD-classifier + + >>> from skfda.ml.classification import DDClassifier + >>> clf = DDClassifier(degree=2) + >>> clf.fit(X_train, y_train) + DDClassifier(...) + + We can predict the class of new samples + + >>> clf.predict(X_test) + array([1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1]) + + Finally, we calculate the mean accuracy for the test data + + >>> clf.score(X_test, y_test) + 0.875 + + See also: + :class:`~skfda.ml.classification.DDGClassifier` + :class:`~skfda.ml.classification.MaximumDepthClassifier` + :class:`~skfda.preprocessing.dim_reduction.feature_extraction._ddg_transformer` + + References: + Li, J., Cuesta-Albertos, J. A., and Liu, R. Y. (2012). DD-classifier: + Nonparametric classification procedure based on DD-plot. Journal of + the American Statistical Association, 107(498):737-753. + """ + + def __init__( + self, + degree: int, + depth_method: Optional[Depth[T]] = None, + ) -> None: + self.depth_method = depth_method + self.degree = degree + + def fit(self, X: FDataGrid, y: ndarray) -> DDClassifier: + """Fit the model using X as training data and y as target values. + + Args: + X: FDataGrid with the training data. + y: Target values of shape = (n_samples). + + Returns: + self + """ + if self.depth_method is None: + self.depth_method = ModifiedBandDepth() + + classes_, distributions_ = _classifier_fit_distributions( + X, y, [self.depth_method], + ) + + self.classes_ = classes_ + self.distributions_ = distributions_ + + dd_coordinates = [ + distribution.predict(X) + for distribution in self.distributions_ + ] + + polynomial_elements = combinations( + range(len(dd_coordinates[0])), + self.degree, + ) + + accuracy = -1 + + for elements in polynomial_elements: + x_coord = [0] + [dd_coordinates[0][e] for e in elements] + y_coord = [0] + [dd_coordinates[1][e] for e in elements] + + coefs = poly.polyfit( + x_coord, y_coord, self.degree, + ) + + polynomial = poly.Polynomial(coefs) + + predicted_values = polynomial(dd_coordinates[0]) + + y_pred = [ + self.classes_[0] if z - y > 0 else self.classes_[1] + for (z, y) in zip(predicted_values, dd_coordinates[1]) + ] + + new_accuracy = accuracy_score(y, y_pred) + + if (new_accuracy > accuracy): + accuracy = new_accuracy + self.polynomial = polynomial + + return self + + def predict(self, X: FDataGrid) -> ndarray: + """Predict the class labels for the provided data. + + Args: + X: FDataGrid with the test samples. + + Returns: + ndarray: array of shape (n_samples) with class labels + for each data sample. + """ + dd_coordinates = [ + distribution.predict(X) + for distribution in self.distributions_ + ] + + predicted_values = self.polynomial(dd_coordinates[0]) + + return np.array([ + self.classes_[0] if z - y > 0 else self.classes_[1] + for (z, y) in zip(predicted_values, dd_coordinates[1]) + ]) + + class DDGClassifier(BaseEstimator, ClassifierMixin): r"""Generalized depth-versus-depth (DD) classifer for functional data. @@ -147,7 +294,7 @@ class DDGClassifier(BaseEstimator, ClassifierMixin): >>> from sklearn.neighbors import KNeighborsClassifier - We will fit a Maximum depth classifier using KNN + We will fit a DDG-classifier using KNN >>> from skfda.ml.classification import DDGClassifier >>> clf = DDGClassifier(KNeighborsClassifier()) @@ -166,6 +313,7 @@ class DDGClassifier(BaseEstimator, ClassifierMixin): 0.875 See also: + :class:`~skfda.ml.classification.DDClassifier` :class:`~skfda.ml.classification.MaximumDepthClassifier` :class:`~skfda.preprocessing.dim_reduction.feature_extraction._ddg_transformer` @@ -181,12 +329,12 @@ class DDGClassifier(BaseEstimator, ClassifierMixin): def __init__( self, multivariate_classifier: ClassifierMixin = None, - depth_method: Union[Depth, Sequence[Depth]] = default_depth, + depth_method: Optional[Union[Depth[T], Sequence[Depth[T]]]] = None, ) -> None: self.multivariate_classifier = multivariate_classifier self.depth_method = depth_method - def fit(self, X: FDataGrid, y: ndarray) -> 'DDGClassifier': + def fit(self, X: FDataGrid, y: ndarray) -> DDGClassifier: """Fit the model using X as training data and y as target values. Args: @@ -194,7 +342,7 @@ def fit(self, X: FDataGrid, y: ndarray) -> 'DDGClassifier': y: Target values of shape = (n_samples). Returns: - self (object) + self """ self.pipeline = make_pipeline( DDGTransformer(self.depth_method), diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py index aea4e854a..54d4e735c 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py @@ -1,5 +1,7 @@ """Feature extraction transformers for dimensionality reduction.""" -from typing import Sequence, Union +from __future__ import annotations + +from typing import Optional, Sequence, TypeVar, Union import numpy as np from numpy import ndarray @@ -10,7 +12,7 @@ from ....exploratory.depth import Depth, ModifiedBandDepth from ....representation.grid import FDataGrid -default_depth = ModifiedBandDepth() +T = TypeVar("T", contravariant=True) class DDGTransformer(BaseEstimator, TransformerMixin): @@ -80,13 +82,11 @@ class DDGTransformer(BaseEstimator, TransformerMixin): def __init__( self, - depth_method: Union[Depth, Sequence[Depth]] = default_depth, + depth_method: Optional[Union[Depth[T], Sequence[Depth[T]]]] = None, ) -> None: - if isinstance(depth_method, Depth): - depth_method = [depth_method] self.depth_method = depth_method - def fit(self, X: FDataGrid, y: ndarray) -> 'DDGTransformer': + def fit(self, X: FDataGrid, y: ndarray) -> DDGTransformer: """Fit the model using X as training data and y as target values. Args: @@ -94,8 +94,14 @@ def fit(self, X: FDataGrid, y: ndarray) -> 'DDGTransformer': y: Target values of shape = (n_samples). Returns: - self (object) + self """ + if self.depth_method is None: + self.depth_method = ModifiedBandDepth() + + if isinstance(self.depth_method, Depth): + self.depth_method = [self.depth_method] + classes_, distributions_ = _classifier_fit_distributions( X, y, self.depth_method, ) From 0a35f0be2c5204b1dbb206ef8eb57b520d698e9b Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Sun, 7 Feb 2021 19:31:32 +0100 Subject: [PATCH 068/417] check_is_fitted --- skfda/ml/classification/_depth_classifiers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index a048432d0..a442bf953 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -238,6 +238,8 @@ def predict(self, X: FDataGrid) -> ndarray: ndarray: array of shape (n_samples) with class labels for each data sample. """ + sklearn_check_is_fitted(self) + dd_coordinates = [ distribution.predict(X) for distribution in self.distributions_ @@ -363,4 +365,6 @@ def predict(self, X: FDataGrid) -> ndarray: ndarray: array of shape (n_samples) with class labels for each data sample. """ + sklearn_check_is_fitted(self) + return self.pipeline.predict(X) From c45e2882c3d52e1c99b7a2c59fc9caef92edb187 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Sun, 7 Feb 2021 19:33:46 +0100 Subject: [PATCH 069/417] isort --- skfda/ml/classification/_centroid_classifiers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index 3a1cd3d77..5e4e9cb24 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -7,12 +7,11 @@ from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted -from skfda.representation.grid import FDataGrid - from ..._utils import _classifier_get_classes from ...exploratory.depth import Depth, ModifiedBandDepth from ...exploratory.stats import mean, trim_mean from ...misc.metrics import LpDistance, PairwiseMetric, l2_distance +from ...representation.grid import FDataGrid T = TypeVar("T", contravariant=True) From 6b40f5a3e7f18d187c1ebff2cc4548066ffd85ca Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Sun, 7 Feb 2021 19:42:02 +0100 Subject: [PATCH 070/417] DDclassifier doctsting --- skfda/ml/classification/_depth_classifiers.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index a442bf953..83554346a 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -116,6 +116,13 @@ def predict(self, X: FDataGrid) -> ndarray: class DDClassifier(BaseEstimator, ClassifierMixin): """Depth-versus-depth (DD) classifer for functional data. + Transforms the data into a DD-plot and then classifies using a polynomial + of a chosen degree. The polynomial passes through zero and maximizes the + accuracy of the classification on the train dataset. + + If a point is below the polynomial in the DD-plot, it is classified to + the first class. Otherwise, the point is classified to the second class. + Parameters: degree: degree of the polynomial used to classify in the DD-plot depth_method: From 3123f7a9f17c7baa5fd9c9544cdbd66c9f8d2f41 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Sun, 7 Feb 2021 19:44:26 +0100 Subject: [PATCH 071/417] Small bug --- skfda/ml/classification/_depth_classifiers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 83554346a..a56fc013f 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -372,6 +372,4 @@ def predict(self, X: FDataGrid) -> ndarray: ndarray: array of shape (n_samples) with class labels for each data sample. """ - sklearn_check_is_fitted(self) - return self.pipeline.predict(X) From 5dab7a004df8ca8a4afaf5de0b81f31f07cfbea0 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 8 Feb 2021 13:22:57 +0100 Subject: [PATCH 072/417] class plotgraph --- .../visualization/representation.py | 142 +++++++++++++++++- 1 file changed, 137 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 9b8e88ff0..c12a7d5d6 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -1,14 +1,22 @@ - +from matplotlib import colors import matplotlib.cm import matplotlib.patches import numpy as np +from ... import FDataGrid from ..._utils import _to_domain_range, constants +from matplotlib.axes import Axes +from matplotlib.figure import Figure from ._utils import ( _get_figure_and_axes, _set_figure_layout_for_fdata, _set_labels, ) +from typing import TypeVar, Optional, Any + +T = TypeVar('T', FDataGrid, np.ndarray) +S = TypeVar('S', int, tuple) +V = TypeVar('V', tuple, list[tuple]) def _get_label_colors(n_labels, group_colors=None): @@ -78,8 +86,132 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): return sample_colors, patches + class GraphPlot: - + + def __init__( + self, + fdata: T, + gradient_color_list: Optional[list[float]] = None, + max_grad: Optional[float] = None, + min_grad: Optional[float] = None, + ) -> None: + self.fdata = fdata + self.gradient_color_list = gradient_color_list + if self.gradient_color_list is not None: + if len(gradient_color_list) != fdata.n_samples: + raise ValueError( + "The length of the gradient color" + "list should be the same as the number" + "of samples in fdata") + if min_grad is None: + self.min_grad = min(gradient_color_list) + else: + self.min_grad = None + + if max_grad is None: + self.max_grad = max(gradient_color_list) + else: + self.max_grad = None + + self.gradient_list = ( + (gradient_color_list - min_grad) / (max_grad - min_grad) + ) + else: + self.gradient_list = None + + def plot( + self, + chart: Figure = None, + *, + fig: Figure = None, + axes: Axes = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + n_points: Optional[S] = None, + domain_range: Optional[V] = None, + group: list[int] = None, + group_colors: list[Any] = None, + group_names: list[str] = None, + colormap_name: str = 'autumn', + legend: bool = False, + **kwargs: Any, + ) -> Figure: + + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata(self.fdata, fig, axes, n_rows, n_cols) + + if domain_range is None: + domain_range = self.fdata.domain_range + else: + domain_range = _to_domain_range(domain_range) + + if self.gradient_list is None: + sample_colors, patches = _get_color_info( + self.fdata, group, group_names, group_colors, legend, kwargs) + else: + patches = None + colormap = matplotlib.cm.get_cmap(colormap_name) + colormap = colormap.reversed() + + sample_colors = [None] * self.fdata.n_samples + for i in range(self.fdata.n_samples): + sample_colors[i] = colormap.__call__(self.gradient_list[i]) + + + if self.fdata.dim_domain == 1: + + if n_points is None: + n_points = constants.N_POINTS_UNIDIMENSIONAL_PLOT_MESH + + # Evaluates the object in a linspace + eval_points = np.linspace(*domain_range[0], n_points) + mat = self.fdata(eval_points) + + color_dict = {} + + for i in range(self.fdata.dim_codomain): + for j in range(self.fdata.n_samples): + if sample_colors is not None: + color_dict["color"] = sample_colors[j] + + axes[i].plot(eval_points, mat[j, ..., i].T, + **color_dict, **kwargs) + + else: + + # Selects the number of points + if n_points is None: + n_points = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) + elif np.isscalar(n_points): + n_points = (n_points, n_points) + elif len(n_points) != 2: + raise ValueError(f"n_points should be a number or a tuple of " + f"length 2, and has length {len(n_points)}") + + # Axes where will be evaluated + x = np.linspace(*domain_range[0], n_points[0]) + y = np.linspace(*domain_range[1], n_points[1]) + + # Evaluation of the functional object + Z = self.fdata((x, y), grid=True) + + X, Y = np.meshgrid(x, y, indexing='ij') + + color_dict = {} + + for i in range(self.fdata.dim_codomain): + for j in range(self.fdata.n_samples): + + if sample_colors is not None: + color_dict["color"] = sample_colors[j] + + axes[i].plot_surface(X, Y, Z[j, ..., i], + **color_dict, **kwargs) + + _set_labels(self.fdata, fig, axes, patches) + + return fig def plot_graph(fdata, chart=None, *, fig=None, axes=None, @@ -376,13 +508,13 @@ def plot_color_gradient(fdata, chart=None, *, fig=None, axes=None, in which each of the instances will be plotted. The size max_grad: maximum value that the gradient_list can take, it will be used to normalize the gradient_color_list in order to get values that - can be used in the funcion colormap.__call__(). If not declared + can be used in the funcion colormap.__call__(). If not declared it will be initialized to the maximum value of gradient_list min_grad: minimum value that the gradient_list can take, it will be used to normalize the gradient_color_list in order to get values that - can be used in the funcion colormap.__call__(). If not declared + can be used in the funcion colormap.__call__(). If not declared it will be initialized to the minimum value of gradient_list - colormap_name: name of the colormap to be used. By default we will + colormap_name: name of the colormap to be used. By default we will use autumn. **kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, From ed7d04d84e15446ad68c77f7f0f34cc45bc39218 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 8 Feb 2021 13:25:57 +0100 Subject: [PATCH 073/417] Compute points and triangles. --- skfda/_utils/_utils.py | 10 +- .../ml/regression/_historical_linear_model.py | 151 ++++++++++++++++-- 2 files changed, 146 insertions(+), 15 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index e498c2c8f..5f480c17f 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -186,7 +186,11 @@ def convert_row(row): return res -def _cartesian_product(axes, flatten=True, return_shape=False): +def _cartesian_product( + axes: Sequence[np.ndarray], + flatten: bool=True, + return_shape: bool=False, +) -> np.ndarray: """Computes the cartesian product of the axes. Computes the cartesian product of the axes and returns a numpy array of @@ -194,10 +198,10 @@ def _cartesian_product(axes, flatten=True, return_shape=False): dimensions. Args: - Axes (array_like): List with axes. + Axes: List with axes. Return: - (np.ndarray): Numpy 2-D array with all the possible combinations. + Numpy 2-D array with all the possible combinations. The entry (i,j) represent the j-th coordinate of the i-th point. Examples: diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index e1f6d9666..97009adec 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -1,15 +1,19 @@ +from __future__ import annotations + +from math import ceil from typing import Tuple import numpy as np import scipy.integrate +from sklearn.base import BaseEstimator, RegressorMixin -from ..._utils import _pairwise_symmetric +from ..._utils import _cartesian_product, _pairwise_symmetric from ...representation import FDataBasis, FDataGrid -from ...representation.basis._finite_element import FiniteElement +from ...representation.basis import Basis, FiniteElement -def _fem_inner_product_matrix( - fem_basis: FiniteElement, +def _inner_product_matrix( + basis: Basis, fd: FDataGrid, limits: Tuple[float, float], y_val: float, @@ -20,7 +24,7 @@ def _fem_inner_product_matrix( integration uses Romberg integration with the trapezoidal rule. Arguments: - fem_basis: an FEM basis defined by a triangulation within a + basis: typically a FEM basis defined by a triangulation within a rectangular domain. It is assumed that only the part of the mesh that is within the upper left triangular is of interest. fd: a regular functional data object. @@ -30,13 +34,13 @@ def _fem_inner_product_matrix( """ - fem_basis_fd = fem_basis.to_basis() + basis_fd = basis.to_basis() grid = fd.grid_points[0] grid_index = (grid >= limits[0]) & (grid <= limits[1]) grid = grid[grid_index] def _pairwise_fem_inner_product( - fem_basis_fd: FDataBasis, + basis_fd: FDataBasis, fd: FDataGrid, ) -> np.ndarray: @@ -51,7 +55,7 @@ def _pairwise_fem_inner_product( axis=1, ) - eval_fem = fem_basis_fd(eval_grid_fem) + eval_fem = basis_fd(eval_grid_fem) eval_fd = fd(grid) # Only for scalar valued functions for now @@ -64,13 +68,13 @@ def _pairwise_fem_inner_product( return _pairwise_symmetric( _pairwise_fem_inner_product, - fem_basis_fd, + basis_fd, fd, ) def _design_matrix( - fem_basis: FiniteElement, + basis: Basis, fd: FDataGrid, pred_points: np.ndarray, ) -> np.ndarray: @@ -78,7 +82,7 @@ def _design_matrix( Computes the indefinite integrals of the curves over s up to each t-value. Arguments: - fem_basis: an FEM basis defined by a triangulation within a + basis: typically a FEM basis defined by a triangulation within a rectangular domain. It is assumed that only the part of the mesh that is within the upper left triangular is of interest. fd: a regular functional data object. @@ -90,8 +94,131 @@ def _design_matrix( """ matrix = np.array([ - _fem_inner_product_matrix(fem_basis, fd, limits=(0, t), y_val=t).T + _inner_product_matrix(basis, fd, limits=(0, t), y_val=t).T for t in pred_points ]) return np.swapaxes(matrix, 0, 1) + + +def _get_valid_points( + interval_len: float, + n_intervals: int, + lag: float, +) -> np.ndarray: + """Return the valid points as integer tuples.""" + interval_points = np.arange(n_intervals + 1) + full_grid_points = _cartesian_product((interval_points, interval_points)) + + past_points = full_grid_points[ + full_grid_points[:, 0] <= full_grid_points[:, 1] + ] + + discrete_lag = np.inf if lag == np.inf else ceil(lag / interval_len) + + valid_points = past_points[ + past_points[:, 1] - past_points[:, 0] <= discrete_lag + ] + + return valid_points + + +def _get_triangles( + n_intervals: int, + valid_points: np.ndarray, +) -> np.ndarray: + """Construct the triangle grid given the valid points.""" + # A matrix where the (integer) coords of a point match + # to its index or to -1 if it does not exist. + indexes_matrix = np.full( + shape=(n_intervals + 1, n_intervals + 1), + fill_value=-1, + dtype=np.int_, + ) + + indexes_matrix[ + valid_points[:, 0], + valid_points[:, 1], + ] = np.arange(len(valid_points)) + + interval_without_end = np.arange(n_intervals) + + pts_coords = _cartesian_product( + (interval_without_end, interval_without_end), + ) + + down_triangles = np.stack( + ( + indexes_matrix[pts_coords[:, 0], pts_coords[:, 1]], + indexes_matrix[pts_coords[:, 0] + 1, pts_coords[:, 1]], + indexes_matrix[pts_coords[:, 0] + 1, pts_coords[:, 1] + 1], + ), + axis=1, + ) + + up_triangles = np.stack( + ( + indexes_matrix[pts_coords[:, 0], pts_coords[:, 1]], + indexes_matrix[pts_coords[:, 0], pts_coords[:, 1] + 1], + indexes_matrix[pts_coords[:, 0] + 1, pts_coords[:, 1] + 1], + ), + axis=1, + ) + + triangles = np.concatenate((down_triangles, up_triangles)) + has_wrong_index = np.any(triangles < 0, axis=1) + + triangles = triangles[~has_wrong_index] + + return triangles + + +def _create_fem_basis( + start: float, + stop: float, + n_intervals: int, + lag: float, +) -> FiniteElement: + + interval_len = (stop - start) / n_intervals + + valid_points = _get_valid_points( + interval_len=interval_len, + n_intervals=n_intervals, + lag=lag, + ) + + final_points = valid_points * interval_len + start + + triangles = _get_triangles( + n_intervals=n_intervals, + valid_points=valid_points, + ) + + return FiniteElement( + vertices=final_points, + cells=triangles, + domain_range=(start, stop), + ) + + +class HistoricalLinearRegression( + BaseEstimator, # type: ignore + RegressorMixin, # type: ignore +): + + def __init__(self) -> None: + pass + + def fit(self, X: FDataGrid, y: FDataGrid) -> HistoricalLinearRegression: + + design_matrix = _design_matrix( + fem_basis, + X, + pred_points=y.grid_points[0], + ) + + return self + + def predict(self) -> FDataGrid: + pass From 4d6ed8c200d849dbf5fa0116eb301fded7d69ae1 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 8 Feb 2021 15:15:51 +0100 Subject: [PATCH 074/417] First working version of the historical linear model. --- .../ml/regression/_historical_linear_model.py | 67 +++++++++++++++++-- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index 97009adec..a94a6ee75 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -1,11 +1,13 @@ from __future__ import annotations +import math from math import ceil from typing import Tuple import numpy as np import scipy.integrate from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.utils.validation import check_is_fitted from ..._utils import _cartesian_product, _pairwise_symmetric from ...representation import FDataBasis, FDataGrid @@ -207,18 +209,69 @@ class HistoricalLinearRegression( RegressorMixin, # type: ignore ): - def __init__(self) -> None: - pass + def __init__(self, *, n_intervals: int, lag: float=math.inf) -> None: + self.n_intervals = n_intervals + self.lag = lag - def fit(self, X: FDataGrid, y: FDataGrid) -> HistoricalLinearRegression: + def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: + + self._pred_points = y.grid_points[0] + self._pred_domain_range = y.domain_range[0] + + self._basis = _create_fem_basis( + start=X.domain_range[0][0], + stop=X.domain_range[0][1], + n_intervals=self.n_intervals, + lag=self.lag, + ) design_matrix = _design_matrix( - fem_basis, + self._basis, X, - pred_points=y.grid_points[0], + pred_points=self._pred_points, + ) + design_matrix = design_matrix.reshape(-1, design_matrix.shape[-1]) + + self.discretized_coef_ = np.linalg.lstsq( + design_matrix, + y.data_matrix[:, ..., 0].ravel(), + rcond=None, + )[0] + + return design_matrix + + def _prediction_from_matrix(self, design_matrix: np.ndarray) -> FDataGrid: + + points = (design_matrix @ self.discretized_coef_).reshape( + -1, + len(self._pred_points), + ) + + return FDataGrid( + points, + grid_points=self._pred_points, + domain_range=self._pred_domain_range, ) + def fit(self, X: FDataGrid, y: FDataGrid) -> HistoricalLinearRegression: + + self._fit_and_return_matrix(X, y) return self - def predict(self) -> FDataGrid: - pass + def fit_predict(self, X: FDataGrid, y: FDataGrid) -> FDataGrid: + + design_matrix = self._fit_and_return_matrix(X, y) + return self._prediction_from_matrix(design_matrix) + + def predict(self, X: FDataGrid) -> FDataGrid: + + check_is_fitted(self) + + design_matrix = _design_matrix( + self._basis, + X, + pred_points=self._pred_points, + ) + design_matrix = design_matrix.reshape(-1, design_matrix.shape[-1]) + + return self._prediction_from_matrix(design_matrix) From c9d01f6cab47d1a4ea6133778234cdfcdf59e75d Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 8 Feb 2021 15:18:04 +0100 Subject: [PATCH 075/417] Export HistoricalLinearRegression. --- skfda/ml/regression/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skfda/ml/regression/__init__.py b/skfda/ml/regression/__init__.py index 7ba5a1ac9..485151c9b 100644 --- a/skfda/ml/regression/__init__.py +++ b/skfda/ml/regression/__init__.py @@ -1,4 +1,5 @@ """Regression.""" +from ._historical_linear_model import HistoricalLinearRegression from ._linear_regression import LinearRegression from ._neighbors_regression import ( KNeighborsRegressor, From 609d491946e1009b52d9adc46dc031a0c00dc96d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 8 Feb 2021 17:32:20 +0100 Subject: [PATCH 076/417] changes --- .../visualization/representation.py | 61 +++++++++++++++---- skfda/representation/_functional_data.py | 4 +- 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index c12a7d5d6..37de7027d 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -12,11 +12,11 @@ _set_figure_layout_for_fdata, _set_labels, ) -from typing import TypeVar, Optional, Any +from typing import TypeVar, Optional, Any, List T = TypeVar('T', FDataGrid, np.ndarray) S = TypeVar('S', int, tuple) -V = TypeVar('V', tuple, list[tuple]) +V = TypeVar('V', tuple, list) def _get_label_colors(n_labels, group_colors=None): @@ -89,10 +89,33 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): class GraphPlot: + """Class used to plot the FDatGrid object graph as hypersurfaces. A list + of variables (probably depths) can be used as an argument to display the + functions wtih a gradient of colors. + + Args: + fdata: functional data set that we want to plot. + gradient_color_list: list of real values used to determine the color + in which each of the instances will be plotted. The size + max_grad: maximum value that the gradient_list can take, it will be + used to normalize the gradient_color_list in order to get values + thatcan be used in the funcion colormap.__call__(). If not + declared it will be initialized to the maximum value of + gradient_list + min_grad: minimum value that the gradient_list can take, it will be + used to normalize the gradient_color_list in order to get values + thatcan be used in the funcion colormap.__call__(). If not + declared it will be initialized to the minimum value of + gradient_list + Attributes: + gradient_list: normalization of the values from gradient color_list + that will be used to determine the intensity of the color + each function will have. + """ def __init__( self, fdata: T, - gradient_color_list: Optional[list[float]] = None, + gradient_color_list: List[float] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, ) -> None: @@ -104,18 +127,21 @@ def __init__( "The length of the gradient color" "list should be the same as the number" "of samples in fdata") - if min_grad is None: + + if min_grad is None: self.min_grad = min(gradient_color_list) else: - self.min_grad = None + self.min_grad = min_grad if max_grad is None: self.max_grad = max(gradient_color_list) else: - self.max_grad = None + self.max_grad = max_grad self.gradient_list = ( - (gradient_color_list - min_grad) / (max_grad - min_grad) + (gradient_color_list - self.min_grad) + / + (self.max_grad - self.min_grad) ) else: self.gradient_list = None @@ -130,16 +156,28 @@ def plot( n_cols: Optional[int] = None, n_points: Optional[S] = None, domain_range: Optional[V] = None, - group: list[int] = None, - group_colors: list[Any] = None, - group_names: list[str] = None, + group: List[int] = None, + group_colors: List[Any] = None, + group_names: List[str] = None, colormap_name: str = 'autumn', legend: bool = False, **kwargs: Any, ) -> Figure: + """Method used to plot the graph. Plots each coordinate separately. + If the :term:`domain` is one dimensional, the plots will be curves, + and if it is two dimensional, they will be surfaces. + + There are two styles of visualizations, one that displays the + functions without any criteria choosing the colors and a new one + that displays the function with a gradient of colors depending + on the initial gradient_color_list (normalized in + gradient_list).""" + fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata(self.fdata, fig, axes, n_rows, n_cols) + fig, axes = _set_figure_layout_for_fdata( + self.fdata, fig, axes, n_rows, n_cols, + ) if domain_range is None: domain_range = self.fdata.domain_range @@ -158,7 +196,6 @@ def plot( for i in range(self.fdata.n_samples): sample_colors[i] = colormap.__call__(self.gradient_list[i]) - if self.fdata.dim_domain == 1: if n_points is None: diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index e5b70865b..f8fa4eaec 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -586,9 +586,9 @@ def plot(self, *args: Any, **kwargs: Any) -> Any: fig (figure object): figure object in which the graphs are plotted. """ - from ..exploratory.visualization.representation import plot_graph + from ..exploratory.visualization.representation import GraphPlot - return plot_graph(self, *args, **kwargs) + return GraphPlot(self).plot(*args, **kwargs) @abstractmethod def copy( From c8d577c4dcaa52c62d181b3c39f01c71e1a1f629 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 8 Feb 2021 19:00:39 +0100 Subject: [PATCH 077/417] repr --- .../visualization/representation.py | 99 +++++++++++++++---- 1 file changed, 80 insertions(+), 19 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 37de7027d..1716589dd 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -1,18 +1,19 @@ -from matplotlib import colors +from typing import Any, List, Optional, TypeVar + import matplotlib.cm import matplotlib.patches import numpy as np +from matplotlib import colors +from matplotlib.axes import Axes +from matplotlib.figure import Figure from ... import FDataGrid from ..._utils import _to_domain_range, constants -from matplotlib.axes import Axes -from matplotlib.figure import Figure from ._utils import ( _get_figure_and_axes, _set_figure_layout_for_fdata, _set_labels, ) -from typing import TypeVar, Optional, Any, List T = TypeVar('T', FDataGrid, np.ndarray) S = TypeVar('S', int, tuple) @@ -89,9 +90,11 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): class GraphPlot: - """Class used to plot the FDatGrid object graph as hypersurfaces. A list - of variables (probably depths) can be used as an argument to display the - functions wtih a gradient of colors. + """ + Class used to plot the FDatGrid object graph as hypersurfaces. + + A list of variables (probably depths) can be used as an argument to + display the functions wtih a gradient of colors. Args: fdata: functional data set that we want to plot. @@ -106,11 +109,13 @@ class GraphPlot: used to normalize the gradient_color_list in order to get values thatcan be used in the funcion colormap.__call__(). If not declared it will be initialized to the minimum value of - gradient_list + gradient_list. + Attributes: gradient_list: normalization of the values from gradient color_list that will be used to determine the intensity of the color each function will have. + """ def __init__( self, @@ -151,7 +156,7 @@ def plot( chart: Figure = None, *, fig: Figure = None, - axes: Axes = None, + axes: List[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, n_points: Optional[S] = None, @@ -163,16 +168,72 @@ def plot( legend: bool = False, **kwargs: Any, ) -> Figure: - - """Method used to plot the graph. Plots each coordinate separately. - If the :term:`domain` is one dimensional, the plots will be curves, - and if it is two dimensional, they will be surfaces. + """ + Plot the graph. + + Plots each coordinate separately. If the :term:`domain` is one + dimensional, the plots will be curves, and if it is two + dimensional, they will be surfaces. There are two styles of + visualizations, one that displays the functions without any + criteria choosing the colors and a new one that displays the + function with a gradient of colors depending on the initial + gradient_color_list (normalized in gradient_list). - There are two styles of visualizations, one that displays the - functions without any criteria choosing the colors and a new one - that displays the function with a gradient of colors depending - on the initial gradient_color_list (normalized in - gradient_list).""" + Args: + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (list of axis objects, optional): axis over where the graphs are + plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + n_points (int or tuple, optional): Number of points to evaluate in + the plot. In case of surfaces a tuple of length 2 can be pased + with the number of points to plot in each axis, otherwise the + same number of points will be used in the two axes. By default + in unidimensional plots will be used 501 points; in surfaces + will be used 30 points per axis, wich makes a grid with 900 + points. + domain_range (tuple or list of tuples, optional): Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group (list of int): contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors (list of colors): colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names (list of str): name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + colormap_name: name of the colormap to be used. By default we will + use autumn. + legend (bool): if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. + **kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. + + Returns: + fig (figure object): figure object in which the graphs are plotted. + + """ fig, axes = _get_figure_and_axes(chart, fig, axes) fig, axes = _set_figure_layout_for_fdata( @@ -642,4 +703,4 @@ def plot_color_gradient(fdata, chart=None, *, fig=None, axes=None, _set_labels(fdata, fig, axes) - return fig \ No newline at end of file + return fig From a2f5dec54fdf4279ace2b3a5d29edfc10a36cfa3 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 8 Feb 2021 19:29:57 +0100 Subject: [PATCH 078/417] classes created pending review flake8 --- .../visualization/representation.py | 248 ++++++++++++++---- 1 file changed, 196 insertions(+), 52 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 1716589dd..dcd470b48 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -180,55 +180,55 @@ def plot( gradient_color_list (normalized in gradient_list). Args: - chart (figure object, axe or list of axes, optional): figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - fig (figure object, optional): figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs are - plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols(int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - n_points (int or tuple, optional): Number of points to evaluate in - the plot. In case of surfaces a tuple of length 2 can be pased - with the number of points to plot in each axis, otherwise the - same number of points will be used in the two axes. By default - in unidimensional plots will be used 501 points; in surfaces - will be used 30 points per axis, wich makes a grid with 900 - points. - domain_range (tuple or list of tuples, optional): Range where the - function will be plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - group (list of int): contains integers from [0 to number of - labels) indicating to which group each sample belongs to. Then, - the samples with the same label are plotted in the same color. - If None, the default value, each sample is plotted in the color - assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors (list of colors): colors in which groups are - represented, there must be one for each group. If None, each - group is shown with distict colors in the "Greys" colormap. - group_names (list of str): name of each of the groups which appear - in a legend, there must be one for each one. Defaults to None - and the legend is not shown. Implies `legend=True`. - colormap_name: name of the colormap to be used. By default we will - use autumn. - legend (bool): if `True`, show a legend with the groups. If - `group_names` is passed, it will be used for finding the names - to display in the legend. Otherwise, the values passed to - `group` will be used. - **kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (list of axis objects, optional): axis over where the graphs are + plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + n_points (int or tuple, optional): Number of points to evaluate in + the plot. In case of surfaces a tuple of length 2 can be pased + with the number of points to plot in each axis, otherwise the + same number of points will be used in the two axes. By default + in unidimensional plots will be used 501 points; in surfaces + will be used 30 points per axis, wich makes a grid with 900 + points. + domain_range (tuple or list of tuples, optional): Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group (list of int): contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors (list of colors): colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names (list of str): name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + colormap_name: name of the colormap to be used. By default we will + use autumn. + legend (bool): if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. + **kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. Returns: fig (figure object): figure object in which the graphs are plotted. @@ -445,13 +445,157 @@ def plot_graph(fdata, chart=None, *, fig=None, axes=None, return fig -def plot_scatter(fdata, chart=None, *, grid_points=None, - fig=None, axes=None, +class ScatterPlot: + + """ + Class used to scatter the FDataGrid object. + + Args: + fdata: functional data set that we want to plot. + grid_points (ndarray): points to plot. + + """ + def __init__( + self, + fdata: T, + grid_points: np.ndarray = None, + ) -> None: + self.fdata = fdata + self.grid_points = grid_points + + def plot( + self, + chart: Figure = None, + *, + fig: Figure = None, + axes: List[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + n_points: Optional[S] = None, + domain_range: Optional[V] = None, + group: List[int] = None, + group_colors: List[Any] = None, + group_names: List[str] = None, + legend: bool = False, + **kwargs: Any, + ) -> Figure: + """ + Scatter FDataGrid object. + + Args: + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (list of axis objects, optional): axis over where the graphs are + plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + domain_range (tuple or list of tuples, optional): Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group (list of int): contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors (list of colors): colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names (list of str): name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + legend (bool): if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. + **kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. + + Returns: + fig (figure object): figure object in which the graphs are plotted. + + """ + + evaluated_points = None + + if self.grid_points is None: + # This can only be done for FDataGrid + grid_points = self.fdata.grid_points + evaluated_points = self.fdata.data_matrix + + if evaluated_points is None: + evaluated_points = self.fdata( + self.grid_points, grid=True) + + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata(self.fdata, fig, axes, n_rows, n_cols) + + if domain_range is None: + domain_range = self.fdata.domain_range + else: + domain_range = _to_domain_range(domain_range) + + sample_colors, patches = _get_color_info( + self.fdata, group, group_names, group_colors, legend, kwargs + ) + + if self.fdata.dim_domain == 1: + + color_dict = {} + + for i in range(self.fdata.dim_codomain): + for j in range(self.fdata.n_samples): + + if sample_colors is not None: + color_dict["color"] = sample_colors[j] + + axes[i].scatter(self.grid_points[0], + evaluated_points[j, ..., i].T, + **color_dict, **kwargs) + + else: + + X = self.fdata.grid_points[0] + Y = self.fdata.grid_points[1] + X, Y = np.meshgrid(X, Y) + + color_dict = {} + + for i in range(self.fdata.dim_codomain): + for j in range(self.fdata.n_samples): + + if sample_colors is not None: + color_dict["color"] = sample_colors[j] + + axes[i].scatter(X, Y, + evaluated_points[j, ..., i].T, + **color_dict, **kwargs) + + _set_labels(self.fdata, fig, axes, patches) + + return fig + + +def plot_scatter(fdata, chart=None, *, + fig=None, axes=None, grid_points = None, n_rows=None, n_cols=None, domain_range=None, group=None, group_colors=None, group_names=None, legend: bool = False, **kwargs): - """Plot the FDatGrid object. + """Plot the FDataGrid object. Args: chart (figure object, axe or list of axes, optional): figure over From 3d2835877c7bdf0f23aa6bb032cd763681e3d45c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 8 Feb 2021 19:34:41 +0100 Subject: [PATCH 079/417] change call plot_scatter to new class --- skfda/representation/grid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 5475cda49..daf9b3d6c 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -804,9 +804,9 @@ def scatter(self, *args: Any, **kwargs: Any) -> Figure: """ - from ..exploratory.visualization.representation import plot_scatter + from ..exploratory.visualization.representation import ScatterPlot - return plot_scatter(self, *args, **kwargs) + return ScatterPlot(self).plot(*args, **kwargs) def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: """Return the basis representation of the object. From 999d0d62214434a08cf0a3c21e89b77656018e5d Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Mon, 8 Feb 2021 23:25:40 +0100 Subject: [PATCH 080/417] Lagrange polynomial --- skfda/_utils/__init__.py | 4 +- skfda/_utils/_utils.py | 12 +-- .../classification/_centroid_classifiers.py | 42 +++++--- skfda/ml/classification/_depth_classifiers.py | 100 ++++++++++-------- .../feature_extraction/_ddg_transformer.py | 30 +++--- 5 files changed, 105 insertions(+), 83 deletions(-) diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 1856a7837..c8227ab93 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -4,9 +4,9 @@ _cartesian_product, _check_array_key, _check_estimator, - _classifier_fit_distributions, + _classifier_fit_depth_methods, _classifier_get_classes, - _classifier_get_distributions, + _classifier_get_depth_methods, _evaluate_grid, _FDataCallable, _int_to_real, diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index ea43cd3e1..289e8c535 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -580,7 +580,7 @@ def _classifier_get_classes(y: ndarray) -> Tuple[ndarray, ndarray]: return classes, y_ind -def _classifier_get_distributions( +def _classifier_get_depth_methods( classes: ndarray, X: FDataGrid, y_ind: ndarray, @@ -593,15 +593,15 @@ def _classifier_get_distributions( ] -def _classifier_fit_distributions( +def _classifier_fit_depth_methods( X: FDataGrid, y: ndarray, depth_methods: Sequence[Depth[T]], ) -> Tuple[ndarray, Sequence[Depth[T]]]: - classes_, y_ind = _classifier_get_classes(y) + classes, y_ind = _classifier_get_classes(y) - distributions_ = _classifier_get_distributions( - classes_, X, y_ind, depth_methods, + class_depth_methods_ = _classifier_get_depth_methods( + classes, X, y_ind, depth_methods, ) - return classes_, distributions_ + return classes, class_depth_methods_ diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index 5e4e9cb24..06296970f 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -1,7 +1,7 @@ """Centroid-based models for supervised classification.""" from __future__ import annotations -from typing import Callable, Optional, TypeVar, Union +from typing import Callable, Generic, Optional, TypeVar from numpy import ndarray from sklearn.base import BaseEstimator, ClassifierMixin @@ -10,13 +10,17 @@ from ..._utils import _classifier_get_classes from ...exploratory.depth import Depth, ModifiedBandDepth from ...exploratory.stats import mean, trim_mean -from ...misc.metrics import LpDistance, PairwiseMetric, l2_distance -from ...representation.grid import FDataGrid +from ...misc.metrics import Metric, PairwiseMetric, l2_distance +from ...representation import FData -T = TypeVar("T", contravariant=True) +T = TypeVar("T", bound=FData) -class NearestCentroid(BaseEstimator, ClassifierMixin): +class NearestCentroid( + BaseEstimator, # type: ignore + ClassifierMixin, # type: ignore + Generic[T], +): """Nearest centroid classifier for functional data. Each class is represented by its centroid, with test samples classified to @@ -64,13 +68,13 @@ class and return a :class:`FData` object with only one sample def __init__( self, - metric: Union[str, LpDistance] = l2_distance, - centroid: Callable = mean, + metric: Metric[T] = l2_distance, + centroid: Callable[[T], T] = mean, ): self.metric = metric self.centroid = centroid - def fit(self, X: Union[FDataGrid, ndarray], y: ndarray) -> NearestCentroid: + def fit(self, X: T, y: ndarray) -> NearestCentroid[T]: """Fit the model using X as training data and y as target values. Args: @@ -82,18 +86,18 @@ def fit(self, X: Union[FDataGrid, ndarray], y: ndarray) -> NearestCentroid: Returns: self """ - classes_, y_ind = _classifier_get_classes(y) + classes, y_ind = _classifier_get_classes(y) - self.classes_ = classes_ + self._classes = classes self.centroids_ = self.centroid(X[y_ind == 0]) - for cur_class in range(1, self.classes_.size): + for cur_class in range(1, self._classes.size): centroid = self.centroid(X[y_ind == cur_class]) self.centroids_ = self.centroids_.concatenate(centroid) return self - def predict(self, X: FDataGrid) -> ndarray: + def predict(self, X: T) -> ndarray: """Predict the class labels for the provided data. Args: @@ -105,14 +109,18 @@ def predict(self, X: FDataGrid) -> ndarray: """ sklearn_check_is_fitted(self) - return self.classes_[PairwiseMetric(self.metric)( + return self._classes[PairwiseMetric(self.metric)( X, self.centroids_, ).argmin(axis=1) ] -class DTMClassifier(BaseEstimator, ClassifierMixin): +class DTMClassifier( + BaseEstimator, # type: ignore + ClassifierMixin, # type: ignore + Generic[T], +): """Distance to trimmed means (DTM) classification. Test samples are classified to the class that minimizes the distance of @@ -173,13 +181,13 @@ def __init__( self, proportiontocut: float, depth_method: Optional[Depth[T]] = None, - metric: Callable = l2_distance, + metric: Metric[T] = l2_distance, ) -> None: self.proportiontocut = proportiontocut self.depth_method = depth_method self.metric = metric - def fit(self, X: FDataGrid, y: ndarray) -> DTMClassifier: + def fit(self, X: T, y: ndarray) -> DTMClassifier[T]: """Fit the model using X as training data and y as target values. Args: @@ -204,7 +212,7 @@ def fit(self, X: FDataGrid, y: ndarray) -> DTMClassifier: return self - def predict(self, X: FDataGrid) -> ndarray: + def predict(self, X: T) -> ndarray: """Predict the class labels for the provided data. Args: diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index a56fc013f..39ff57de7 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -2,25 +2,29 @@ from __future__ import annotations from itertools import combinations -from typing import Optional, Sequence, TypeVar, Union +from typing import Generic, Optional, Sequence, TypeVar, Union import numpy as np -import numpy.polynomial.polynomial as poly from numpy import ndarray +from scipy.interpolate import lagrange from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.metrics import accuracy_score from sklearn.pipeline import make_pipeline from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted -from ..._utils import _classifier_fit_distributions +from ..._utils import _classifier_fit_depth_methods from ...exploratory.depth import Depth, ModifiedBandDepth from ...preprocessing.dim_reduction.feature_extraction import DDGTransformer -from ...representation.grid import FDataGrid +from ...representation.grid import FData -T = TypeVar("T", contravariant=True) +T = TypeVar("T", bound=FData) -class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): +class MaximumDepthClassifier( + BaseEstimator, # type: ignore + ClassifierMixin, # type: ignore + Generic[T], +): """Maximum depth classifier for functional data. Test samples are classified to the class where they are deeper. @@ -71,7 +75,7 @@ class MaximumDepthClassifier(BaseEstimator, ClassifierMixin): def __init__(self, depth_method: Optional[Depth[T]] = None) -> None: self.depth_method = depth_method - def fit(self, X: FDataGrid, y: ndarray) -> MaximumDepthClassifier: + def fit(self, X: T, y: ndarray) -> MaximumDepthClassifier[T]: """Fit the model using X as training data and y as target values. Args: @@ -84,16 +88,16 @@ def fit(self, X: FDataGrid, y: ndarray) -> MaximumDepthClassifier: if self.depth_method is None: self.depth_method = ModifiedBandDepth() - classes_, distributions_ = _classifier_fit_distributions( + classes, class_depth_methods = _classifier_fit_depth_methods( X, y, [self.depth_method], ) - self.classes_ = classes_ - self.distributions_ = distributions_ + self._classes = classes + self.class_depth_methods_ = class_depth_methods return self - def predict(self, X: FDataGrid) -> ndarray: + def predict(self, X: T) -> ndarray: """Predict the class labels for the provided data. Args: @@ -106,14 +110,18 @@ def predict(self, X: FDataGrid) -> ndarray: sklearn_check_is_fitted(self) depths = [ - distribution.predict(X) - for distribution in self.distributions_ + depth_method.predict(X) + for depth_method in self.class_depth_methods_ ] - return self.classes_[np.argmax(depths, axis=0)] + return self._classes[np.argmax(depths, axis=0)] -class DDClassifier(BaseEstimator, ClassifierMixin): +class DDClassifier( + BaseEstimator, # type: ignore + ClassifierMixin, # type: ignore + Generic[T], +): """Depth-versus-depth (DD) classifer for functional data. Transforms the data into a DD-plot and then classifies using a polynomial @@ -178,7 +186,7 @@ def __init__( self.depth_method = depth_method self.degree = degree - def fit(self, X: FDataGrid, y: ndarray) -> DDClassifier: + def fit(self, X: T, y: ndarray) -> DDClassifier[T]: """Fit the model using X as training data and y as target values. Args: @@ -191,16 +199,16 @@ def fit(self, X: FDataGrid, y: ndarray) -> DDClassifier: if self.depth_method is None: self.depth_method = ModifiedBandDepth() - classes_, distributions_ = _classifier_fit_distributions( + classes, class_depth_methods = _classifier_fit_depth_methods( X, y, [self.depth_method], ) - self.classes_ = classes_ - self.distributions_ = distributions_ + self._classes = classes + self.class_depth_methods_ = class_depth_methods dd_coordinates = [ - distribution.predict(X) - for distribution in self.distributions_ + depth_method.predict(X) + for depth_method in self.class_depth_methods_ ] polynomial_elements = combinations( @@ -208,34 +216,32 @@ def fit(self, X: FDataGrid, y: ndarray) -> DDClassifier: self.degree, ) - accuracy = -1 + accuracy = -1 # initialise accuracy for elements in polynomial_elements: - x_coord = [0] + [dd_coordinates[0][e] for e in elements] - y_coord = [0] + [dd_coordinates[1][e] for e in elements] + x_coord = np.append(dd_coordinates[0][list(elements)], 0) + y_coord = np.append(dd_coordinates[1][list(elements)], 0) - coefs = poly.polyfit( - x_coord, y_coord, self.degree, + poly = lagrange( + x_coord, y_coord, ) - polynomial = poly.Polynomial(coefs) + predicted_values = np.polyval(poly, dd_coordinates[0]) - predicted_values = polynomial(dd_coordinates[0]) - - y_pred = [ - self.classes_[0] if z - y > 0 else self.classes_[1] - for (z, y) in zip(predicted_values, dd_coordinates[1]) + y_pred = self._classes[( + dd_coordinates[1] > predicted_values + ).astype(int) ] new_accuracy = accuracy_score(y, y_pred) if (new_accuracy > accuracy): accuracy = new_accuracy - self.polynomial = polynomial + self.poly = poly return self - def predict(self, X: FDataGrid) -> ndarray: + def predict(self, X: T) -> ndarray: """Predict the class labels for the provided data. Args: @@ -248,19 +254,23 @@ def predict(self, X: FDataGrid) -> ndarray: sklearn_check_is_fitted(self) dd_coordinates = [ - distribution.predict(X) - for distribution in self.distributions_ + depth_method.predict(X) + for depth_method in self.class_depth_methods_ ] - predicted_values = self.polynomial(dd_coordinates[0]) + predicted_values = np.polyval(self.poly, dd_coordinates[0]) - return np.array([ - self.classes_[0] if z - y > 0 else self.classes_[1] - for (z, y) in zip(predicted_values, dd_coordinates[1]) - ]) + return self._classes[( + dd_coordinates[1] > predicted_values + ).astype(int) + ] -class DDGClassifier(BaseEstimator, ClassifierMixin): +class DDGClassifier( + BaseEstimator, # type: ignore + ClassifierMixin, # type: ignore + Generic[T], +): r"""Generalized depth-versus-depth (DD) classifer for functional data. This classifier builds an interface around the DDGTransfomer. @@ -338,12 +348,12 @@ class DDGClassifier(BaseEstimator, ClassifierMixin): def __init__( self, multivariate_classifier: ClassifierMixin = None, - depth_method: Optional[Union[Depth[T], Sequence[Depth[T]]]] = None, + depth_method: Union[Depth[T], Sequence[Depth[T]], None] = None, ) -> None: self.multivariate_classifier = multivariate_classifier self.depth_method = depth_method - def fit(self, X: FDataGrid, y: ndarray) -> DDGClassifier: + def fit(self, X: T, y: ndarray) -> DDGClassifier[T]: """Fit the model using X as training data and y as target values. Args: @@ -362,7 +372,7 @@ def fit(self, X: FDataGrid, y: ndarray) -> DDGClassifier: return self - def predict(self, X: FDataGrid) -> ndarray: + def predict(self, X: T) -> ndarray: """Predict the class labels for the provided data. Args: diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py index 54d4e735c..532888244 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py @@ -1,21 +1,25 @@ """Feature extraction transformers for dimensionality reduction.""" from __future__ import annotations -from typing import Optional, Sequence, TypeVar, Union +from typing import Generic, Sequence, TypeVar, Union import numpy as np from numpy import ndarray from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted -from ...._utils import _classifier_fit_distributions +from ...._utils import _classifier_fit_depth_methods from ....exploratory.depth import Depth, ModifiedBandDepth -from ....representation.grid import FDataGrid +from ....representation.grid import FData -T = TypeVar("T", contravariant=True) +T = TypeVar("T", bound=FData) -class DDGTransformer(BaseEstimator, TransformerMixin): +class DDGTransformer( + BaseEstimator, # type: ignore + TransformerMixin, # type: ignore + Generic[T], +): r"""Generalized depth-versus-depth (DD) transformer for functional data. This transformer takes a list of k depths and performs the following map: @@ -82,11 +86,11 @@ class DDGTransformer(BaseEstimator, TransformerMixin): def __init__( self, - depth_method: Optional[Union[Depth[T], Sequence[Depth[T]]]] = None, + depth_method: Union[Depth[T], Sequence[Depth[T]], None] = None, ) -> None: self.depth_method = depth_method - def fit(self, X: FDataGrid, y: ndarray) -> DDGTransformer: + def fit(self, X: T, y: ndarray) -> DDGTransformer[T]: """Fit the model using X as training data and y as target values. Args: @@ -102,16 +106,16 @@ def fit(self, X: FDataGrid, y: ndarray) -> DDGTransformer: if isinstance(self.depth_method, Depth): self.depth_method = [self.depth_method] - classes_, distributions_ = _classifier_fit_distributions( + classes, class_depth_methods = _classifier_fit_depth_methods( X, y, self.depth_method, ) - self.classes_ = classes_ - self.distributions_ = distributions_ + self._classes = classes + self.class_depth_methods_ = class_depth_methods return self - def transform(self, X: FDataGrid) -> ndarray: + def transform(self, X: T) -> ndarray: """Transform the provided data using the defined map. Args: @@ -123,6 +127,6 @@ def transform(self, X: FDataGrid) -> ndarray: sklearn_check_is_fitted(self) return np.transpose([ - distribution.predict(X) - for distribution in self.distributions_ + depth_method.predict(X) + for depth_method in self.class_depth_methods_ ]) From cb47d9767bcb3be81dc2aa0a87707c93b85daa9e Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 8 Feb 2021 23:33:57 +0100 Subject: [PATCH 081/417] c --- .../visualization/representation.py | 400 +----------------- 1 file changed, 3 insertions(+), 397 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index dcd470b48..f87f1a5fb 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -312,139 +312,6 @@ def plot( return fig -def plot_graph(fdata, chart=None, *, fig=None, axes=None, - n_rows=None, n_cols=None, n_points=None, - domain_range=None, - group=None, group_colors=None, group_names=None, - legend: bool = False, - **kwargs): - """Plot the FDatGrid object graph as hypersurfaces. - - Plots each coordinate separately. If the :term:`domain` is one dimensional, - the plots will be curves, and if it is two dimensional, they will be - surfaces. - - Args: - chart (figure object, axe or list of axes, optional): figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - fig (figure object, optional): figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs are - plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols(int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - n_points (int or tuple, optional): Number of points to evaluate in - the plot. In case of surfaces a tuple of length 2 can be pased - with the number of points to plot in each axis, otherwise the - same number of points will be used in the two axes. By default - in unidimensional plots will be used 501 points; in surfaces - will be used 30 points per axis, wich makes a grid with 900 - points. - domain_range (tuple or list of tuples, optional): Range where the - function will be plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - group (list of int): contains integers from [0 to number of - labels) indicating to which group each sample belongs to. Then, - the samples with the same label are plotted in the same color. - If None, the default value, each sample is plotted in the color - assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors (list of colors): colors in which groups are - represented, there must be one for each group. If None, each - group is shown with distict colors in the "Greys" colormap. - group_names (list of str): name of each of the groups which appear - in a legend, there must be one for each one. Defaults to None - and the legend is not shown. Implies `legend=True`. - legend (bool): if `True`, show a legend with the groups. If - `group_names` is passed, it will be used for finding the names - to display in the legend. Otherwise, the values passed to - `group` will be used. - **kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. - - Returns: - fig (figure object): figure object in which the graphs are plotted. - - """ - - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata(fdata, fig, axes, n_rows, n_cols) - - if domain_range is None: - domain_range = fdata.domain_range - else: - domain_range = _to_domain_range(domain_range) - - sample_colors, patches = _get_color_info( - fdata, group, group_names, group_colors, legend, kwargs) - - if fdata.dim_domain == 1: - - if n_points is None: - n_points = constants.N_POINTS_UNIDIMENSIONAL_PLOT_MESH - - # Evaluates the object in a linspace - eval_points = np.linspace(*domain_range[0], n_points) - mat = fdata(eval_points) - - color_dict = {} - - for i in range(fdata.dim_codomain): - for j in range(fdata.n_samples): - - if sample_colors is not None: - color_dict["color"] = sample_colors[j] - - axes[i].plot(eval_points, mat[j, ..., i].T, - **color_dict, **kwargs) - - else: - - # Selects the number of points - if n_points is None: - n_points = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) - elif np.isscalar(n_points): - n_points = (n_points, n_points) - elif len(n_points) != 2: - raise ValueError(f"n_points should be a number or a tuple of " - f"length 2, and has length {len(n_points)}") - - # Axes where will be evaluated - x = np.linspace(*domain_range[0], n_points[0]) - y = np.linspace(*domain_range[1], n_points[1]) - - # Evaluation of the functional object - Z = fdata((x, y), grid=True) - - X, Y = np.meshgrid(x, y, indexing='ij') - - color_dict = {} - - for i in range(fdata.dim_codomain): - for j in range(fdata.n_samples): - - if sample_colors is not None: - color_dict["color"] = sample_colors[j] - - axes[i].plot_surface(X, Y, Z[j, ..., i], - **color_dict, **kwargs) - - _set_labels(fdata, fig, axes, patches) - - return fig - - class ScatterPlot: """ @@ -490,8 +357,8 @@ def plot( fig (figure object, optional): figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs are - plotted. If None, see param fig. + axes (list of axis objects, optional): axis over where the graphs + are plotted. If None, see param fig. n_rows (int, optional): designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. @@ -586,265 +453,4 @@ def plot( _set_labels(self.fdata, fig, axes, patches) - return fig - - -def plot_scatter(fdata, chart=None, *, - fig=None, axes=None, grid_points = None, - n_rows=None, n_cols=None, domain_range=None, - group=None, group_colors=None, group_names=None, - legend: bool = False, - **kwargs): - """Plot the FDataGrid object. - - Args: - chart (figure object, axe or list of axes, optional): figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - grid_points (ndarray): points to plot. - fig (figure object, optional): figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs are - plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols(int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - domain_range (tuple or list of tuples, optional): Range where the - function will be plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - group (list of int): contains integers from [0 to number of - labels) indicating to which group each sample belongs to. Then, - the samples with the same label are plotted in the same color. - If None, the default value, each sample is plotted in the color - assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors (list of colors): colors in which groups are - represented, there must be one for each group. If None, each - group is shown with distict colors in the "Greys" colormap. - group_names (list of str): name of each of the groups which appear - in a legend, there must be one for each one. Defaults to None - and the legend is not shown. Implies `legend=True`. - legend (bool): if `True`, show a legend with the groups. If - `group_names` is passed, it will be used for finding the names - to display in the legend. Otherwise, the values passed to - `group` will be used. - **kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. - - Returns: - fig (figure object): figure object in which the graphs are plotted. - - """ - - evaluated_points = None - - if grid_points is None: - # This can only be done for FDataGrid - grid_points = fdata.grid_points - evaluated_points = fdata.data_matrix - - if evaluated_points is None: - evaluated_points = fdata( - grid_points, grid=True) - - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata(fdata, fig, axes, n_rows, n_cols) - - if domain_range is None: - domain_range = fdata.domain_range - else: - domain_range = _to_domain_range(domain_range) - - sample_colors, patches = _get_color_info( - fdata, group, group_names, group_colors, legend, kwargs) - - if fdata.dim_domain == 1: - - color_dict = {} - - for i in range(fdata.dim_codomain): - for j in range(fdata.n_samples): - - if sample_colors is not None: - color_dict["color"] = sample_colors[j] - - axes[i].scatter(grid_points[0], - evaluated_points[j, ..., i].T, - **color_dict, **kwargs) - - else: - - X = fdata.grid_points[0] - Y = fdata.grid_points[1] - X, Y = np.meshgrid(X, Y) - - color_dict = {} - - for i in range(fdata.dim_codomain): - for j in range(fdata.n_samples): - - if sample_colors is not None: - color_dict["color"] = sample_colors[j] - - axes[i].scatter(X, Y, - evaluated_points[j, ..., i].T, - **color_dict, **kwargs) - - _set_labels(fdata, fig, axes, patches) - - return fig - - -def plot_color_gradient(fdata, chart=None, *, fig=None, axes=None, - n_rows=None, n_cols=None, n_points=None, - domain_range=None, gradient_color_list, - max_grad = None, min_grad = None, - colormap_name = 'autumn', - **kwargs): - """Plot the FDatGrid object graph as hypersurfaces, representing each - instance depending on a color defined by the gradient_color_list. - - Plots each coordinate separately. If the domain is one dimensional, the - plots will be curves, and if it is two dimensional, they will be surfaces. - - Args: - fdata: functional data to be represented. - chart (figure object, axe or list of axes, optional): figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - fig (figure object, optional): figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs are - plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols(int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - n_points (int or tuple, optional): Number of points to evaluate in - the plot. In case of surfaces a tuple of length 2 can be pased - with the number of points to plot in each axis, otherwise the - same number of points will be used in the two axes. By default - in unidimensional plots will be used 501 points; in surfaces - will be used 30 points per axis, wich makes a grid with 900 - points. - domain_range (tuple or list of tuples, optional): Range where the - function will be plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - gradient_color_list: list of real values used to determine the color - in which each of the instances will be plotted. The size - max_grad: maximum value that the gradient_list can take, it will be - used to normalize the gradient_color_list in order to get values that - can be used in the funcion colormap.__call__(). If not declared - it will be initialized to the maximum value of gradient_list - min_grad: minimum value that the gradient_list can take, it will be - used to normalize the gradient_color_list in order to get values that - can be used in the funcion colormap.__call__(). If not declared - it will be initialized to the minimum value of gradient_list - colormap_name: name of the colormap to be used. By default we will - use autumn. - **kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. - - Returns: - fig (figure object): figure object in which the graphs are plotted. - - """ - - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata(fdata, fig, axes, n_rows, n_cols) - - if domain_range is None: - domain_range = fdata.domain_range - else: - domain_range = _to_domain_range(domain_range) - - if len(gradient_color_list) != fdata.n_samples: - raise ValueError("The length of the gradient color" - "list should be the same as the number" - "of samples in fdata") - - colormap = matplotlib.cm.get_cmap(colormap_name) - colormap = colormap.reversed() - if min_grad is None: - min_grad = min(gradient_color_list) - - if max_grad is None: - max_grad = max(gradient_color_list) - - gradient_list = (gradient_color_list-min_grad)/(max_grad-min_grad) - - sample_colors = [None] * fdata.n_samples - for i in range(fdata.n_samples): - sample_colors[i] = colormap.__call__(gradient_list[i]) - - - if fdata.dim_domain == 1: - if n_points is None: - n_points = constants.N_POINTS_UNIDIMENSIONAL_PLOT_MESH - - # Evaluates the object in a linspace - eval_points = np.linspace(*domain_range[0], n_points) - mat = fdata(eval_points) - - color_dict = {} - - for i in range(fdata.dim_codomain): - for j in range(fdata.n_samples): - - if sample_colors is not None: - color_dict["color"] = sample_colors[j] - - axes[i].plot(eval_points, mat[j, ..., i].T, - **color_dict, **kwargs) - - else: - # Selects the number of points - if n_points is None: - n_points = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) - elif np.isscalar(n_points): - n_points = (n_points, n_points) - elif len(n_points) != 2: - raise ValueError(f"n_points should be a number or a tuple of " - f"length 2, and has length {len(n_points)}") - - # Axes where will be evaluated - x = np.linspace(*domain_range[0], n_points[0]) - y = np.linspace(*domain_range[1], n_points[1]) - - # Evaluation of the functional object - Z = fdata((x, y), grid=True) - - X, Y = np.meshgrid(x, y, indexing='ij') - - color_dict = {} - - for i in range(fdata.dim_codomain): - for j in range(fdata.n_samples): - - if sample_colors is not None: - color_dict["color"] = sample_colors[j] - - axes[i].plot_surface(X, Y, Z[j, ..., i], - **color_dict, **kwargs) - - _set_labels(fdata, fig, axes) - - return fig + return fig \ No newline at end of file From b3582e95df11505e02e618551267126435c9ec12 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Mon, 8 Feb 2021 23:35:43 +0100 Subject: [PATCH 082/417] Type T --- skfda/_utils/_utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 289e8c535..48bad6b32 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -37,9 +37,8 @@ from ..exploratory.depth import Depth from ..representation import FData from ..representation.basis import Basis - from ..representation.grid import FDataGrid -T = TypeVar("T", contravariant=True) +T = TypeVar("T", bound=FData) class _FDataCallable(): @@ -582,7 +581,7 @@ def _classifier_get_classes(y: ndarray) -> Tuple[ndarray, ndarray]: def _classifier_get_depth_methods( classes: ndarray, - X: FDataGrid, + X: T, y_ind: ndarray, depth_methods: Sequence[Depth[T]], ) -> Sequence[Depth[T]]: @@ -594,7 +593,7 @@ def _classifier_get_depth_methods( def _classifier_fit_depth_methods( - X: FDataGrid, + X: T, y: ndarray, depth_methods: Sequence[Depth[T]], ) -> Tuple[ndarray, Sequence[Depth[T]]]: From 8878b13454a557ba763253e50d4f4892ec20afb4 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Mon, 8 Feb 2021 23:43:48 +0100 Subject: [PATCH 083/417] Type T --- skfda/_utils/_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 48bad6b32..9c169dde4 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -37,8 +37,7 @@ from ..exploratory.depth import Depth from ..representation import FData from ..representation.basis import Basis - -T = TypeVar("T", bound=FData) + T = TypeVar("T", bound=FData) class _FDataCallable(): From 9ae385e417b9520ae9e4d1e2a894cf72b7267c63 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 9 Feb 2021 00:07:20 +0100 Subject: [PATCH 084/417] ch --- .../visualization/representation.py | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index f87f1a5fb..71636ff0e 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -1,9 +1,9 @@ -from typing import Any, List, Optional, TypeVar +from typing import Any, List +from typing import Optional, TypeVar import matplotlib.cm import matplotlib.patches import numpy as np -from matplotlib import colors from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -25,9 +25,11 @@ def _get_label_colors(n_labels, group_colors=None): if group_colors is not None: if len(group_colors) != n_labels: - raise ValueError("There must be a color in group_colors " - "for each of the labels that appear in " - "group.") + raise ValueError( + "There must be a color in group_colors " + "for each of the labels that appear in " + "group." + ) else: colormap = matplotlib.cm.get_cmap() group_colors = colormap(np.arange(n_labels) / (n_labels - 1)) @@ -187,8 +189,8 @@ def plot( fig (figure object, optional): figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs are - plotted. If None, see param fig. + axes (list of axis objects, optional): axis over where the graphs + are plotted. If None, see param fig. n_rows (int, optional): designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. @@ -225,7 +227,7 @@ def plot( `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. - **kwargs: if dim_domain is 1, keyword arguments to be passed to + kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the matplotlib.pyplot.plot_surface function. @@ -273,8 +275,9 @@ def plot( if sample_colors is not None: color_dict["color"] = sample_colors[j] - axes[i].plot(eval_points, mat[j, ..., i].T, - **color_dict, **kwargs) + axes[i].plot( + eval_points, mat[j, ..., i].T, **color_dict, **kwargs + ) else: @@ -284,8 +287,10 @@ def plot( elif np.isscalar(n_points): n_points = (n_points, n_points) elif len(n_points) != 2: - raise ValueError(f"n_points should be a number or a tuple of " - f"length 2, and has length {len(n_points)}") + raise ValueError( + f"n_points should be a number or a tuple of " + f"length 2, and has length {len(n_points)}" + ) # Axes where will be evaluated x = np.linspace(*domain_range[0], n_points[0]) @@ -386,7 +391,7 @@ def plot( `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. - **kwargs: if dim_domain is 1, keyword arguments to be passed to + kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the matplotlib.pyplot.plot_surface function. @@ -400,15 +405,18 @@ def plot( if self.grid_points is None: # This can only be done for FDataGrid - grid_points = self.fdata.grid_points + self.grid_points = self.fdata.grid_points evaluated_points = self.fdata.data_matrix if evaluated_points is None: evaluated_points = self.fdata( - self.grid_points, grid=True) + self.grid_points, grid=True, + ) fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata(self.fdata, fig, axes, n_rows, n_cols) + fig, axes = _set_figure_layout_for_fdata( + self.fdata, fig, axes, n_rows, n_cols + ) if domain_range is None: domain_range = self.fdata.domain_range @@ -416,7 +424,7 @@ def plot( domain_range = _to_domain_range(domain_range) sample_colors, patches = _get_color_info( - self.fdata, group, group_names, group_colors, legend, kwargs + self.fdata, group, group_names, group_colors, legend, kwargs, ) if self.fdata.dim_domain == 1: From 86b8ad033b7bc36b99b0636445096d1cba0da126 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 9 Feb 2021 00:26:45 +0100 Subject: [PATCH 085/417] c --- .../visualization/representation.py | 81 ++++++++++--------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 71636ff0e..8b3be45b2 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -1,5 +1,4 @@ -from typing import Any, List -from typing import Optional, TypeVar +import typing as t import matplotlib.cm import matplotlib.patches @@ -15,9 +14,9 @@ _set_labels, ) -T = TypeVar('T', FDataGrid, np.ndarray) -S = TypeVar('S', int, tuple) -V = TypeVar('V', tuple, list) +T = t.TypeVar('T', FDataGrid, np.ndarray) +S = t.TypeVar('S', int, tuple) +V = t.TypeVar('V', tuple, list) def _get_label_colors(n_labels, group_colors=None): @@ -50,13 +49,15 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): if group_colors is not None: group_colors_array = np.array( - [group_colors[g] for g in group_unique]) + [group_colors[g] for g in group_unique] + ) else: prop_cycle = matplotlib.rcParams['axes.prop_cycle'] cycle_colors = prop_cycle.by_key()['color'] group_colors_array = np.take( - cycle_colors, np.arange(n_labels), mode='wrap') + cycle_colors, np.arange(n_labels), mode='wrap', + ) sample_colors = group_colors_array[group_indexes] @@ -64,13 +65,16 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): if group_names is not None: group_names_array = np.array( - [group_names[g] for g in group_unique]) + [group_names[g] for g in group_unique] + ) elif legend is True: group_names_array = group_unique if group_names_array is not None: - patches = [matplotlib.patches.Patch(color=c, label=l) - for c, l in zip(group_colors_array, group_names_array)] + patches = [ + matplotlib.patches.Patch(color=c, label=l) + for c, l in zip(group_colors_array, group_names_array) + ] else: # In this case, each curve has a different color unless specified @@ -90,6 +94,7 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): return sample_colors, patches + class GraphPlot: """ @@ -122,9 +127,9 @@ class GraphPlot: def __init__( self, fdata: T, - gradient_color_list: List[float] = None, - max_grad: Optional[float] = None, - min_grad: Optional[float] = None, + gradient_color_list: t.List[float] = None, + max_grad: t.Optional[float] = None, + min_grad: t.Optional[float] = None, ) -> None: self.fdata = fdata self.gradient_color_list = gradient_color_list @@ -133,10 +138,11 @@ def __init__( raise ValueError( "The length of the gradient color" "list should be the same as the number" - "of samples in fdata") + "of samples in fdata" + ) if min_grad is None: - self.min_grad = min(gradient_color_list) + self.min_grad = min(gradient_color_list) else: self.min_grad = min_grad @@ -146,8 +152,7 @@ def __init__( self.max_grad = max_grad self.gradient_list = ( - (gradient_color_list - self.min_grad) - / + (gradient_color_list - self.min_grad) / (self.max_grad - self.min_grad) ) else: @@ -158,17 +163,17 @@ def plot( chart: Figure = None, *, fig: Figure = None, - axes: List[Axes] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, - n_points: Optional[S] = None, - domain_range: Optional[V] = None, - group: List[int] = None, - group_colors: List[Any] = None, - group_names: List[str] = None, + axes: t.List[Axes] = None, + n_rows: t.Optional[int] = None, + n_cols: t.Optional[int] = None, + n_points: t.Optional[S] = None, + domain_range: t.Optional[V] = None, + group: t.List[int] = None, + group_colors: t.List[t.Any] = None, + group_names: t.List[str] = None, colormap_name: str = 'autumn', legend: bool = False, - **kwargs: Any, + **kwargs: t.Any, ) -> Figure: """ Plot the graph. @@ -309,8 +314,10 @@ def plot( if sample_colors is not None: color_dict["color"] = sample_colors[j] - axes[i].plot_surface(X, Y, Z[j, ..., i], - **color_dict, **kwargs) + axes[i].plot_surface( + X, Y, Z[j, ..., i], + **color_dict, **kwargs, + ) _set_labels(self.fdata, fig, axes, patches) @@ -340,16 +347,16 @@ def plot( chart: Figure = None, *, fig: Figure = None, - axes: List[Axes] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, - n_points: Optional[S] = None, - domain_range: Optional[V] = None, - group: List[int] = None, - group_colors: List[Any] = None, - group_names: List[str] = None, + axes: t.List[Axes] = None, + n_rows: t.Optional[int] = None, + n_cols: t.Optional[int] = None, + n_points: t.Optional[S] = None, + domain_range: t.Optional[V] = None, + group: t.List[int] = None, + group_colors: t.List[t.Any] = None, + group_names: t.List[str] = None, legend: bool = False, - **kwargs: Any, + **kwargs: t.Any, ) -> Figure: """ Scatter FDataGrid object. From ebdc94cf464d6e33024861c0ad54efba5dc34e57 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 9 Feb 2021 14:43:50 +0100 Subject: [PATCH 086/417] ch --- skfda/exploratory/visualization/representation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 8b3be45b2..3c621f533 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -94,12 +94,11 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): return sample_colors, patches - class GraphPlot: """ Class used to plot the FDatGrid object graph as hypersurfaces. - + A list of variables (probably depths) can be used as an argument to display the functions wtih a gradient of colors. @@ -151,13 +150,14 @@ def __init__( else: self.max_grad = max_grad + aux_list = gradient_color_list - self.min_grad + self.gradient_list = ( - (gradient_color_list - self.min_grad) / - (self.max_grad - self.min_grad) + aux_list / (self.max_grad - self.min_grad) ) else: self.gradient_list = None - + def plot( self, chart: Figure = None, @@ -177,7 +177,7 @@ def plot( ) -> Figure: """ Plot the graph. - + Plots each coordinate separately. If the :term:`domain` is one dimensional, the plots will be curves, and if it is two dimensional, they will be surfaces. There are two styles of From 45fd95684166168cd6105d27b37875bce8122e74 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 9 Feb 2021 14:51:58 +0100 Subject: [PATCH 087/417] ddplot perfect --- setup.cfg | 2 + skfda/exploratory/visualization/_ddplot.py | 139 +++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 skfda/exploratory/visualization/_ddplot.py diff --git a/setup.cfg b/setup.cfg index ee1ec96cc..9bec78fce 100644 --- a/setup.cfg +++ b/setup.cfg @@ -46,6 +46,8 @@ ignore = # We love f-strings WPS305, # Implicit string concatenation is useful for exception messages + WPS306, + # No base class needed WPS326, # We allow multiline conditions WPS337, diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py new file mode 100644 index 000000000..c7477014d --- /dev/null +++ b/skfda/exploratory/visualization/_ddplot.py @@ -0,0 +1,139 @@ +"""DD-Plot Module. + +This module contains the necessary functions to construct the DD-Plot. +To do this depth is calculated for the two chosen distributions, and then +a scatter plot is created of this two variables. +""" + +from typing import List, Optional, TypeVar + +from matplotlib.axes import Axes +from matplotlib.figure import Figure + +from ...exploratory.depth.multivariate import Depth +from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata + +T = TypeVar('T') +S = TypeVar('S', Figure, Axes, List[Axes]) + + +class DDPlot: + """ + DDPlot visualization. + + Plot the depth of our fdata elements in two + different distributions, one in each axis. It is useful to understand + how our data is more related with one subset of data / distribution + than another one. + Args: + fdata: functional data set that we want to examine. + dist1: functional data set that represents the first distribution that + we want to use to compute the depth (Depth X). + dist2: functional data set that represents the second distribution that + we want to use to compute the depth (Depth Y). + depth_method: method that will be used to compute the depths of the + data with respect to the distributions. + """ + + def __init__( + self, + fdata: T, + dist1: T, + dist2: T, + depth_method: Depth[T], + ) -> None: + self.fdata = fdata + self.dist1 = dist1 + self.dist2 = dist2 + self.depth_method = depth_method + self.depth_method.fit(fdata) + + def plot( + self, + chart: Optional[S] = None, + *, + fig: Optional[Figure] = None, + axes: List[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + **kwargs, + ) -> Figure: + """ + Plot DDPlot graph. + + Plot the depth of our fdata elements in the two different + distributions,one in each axis. It is useful to understand how + our data is more related with one subset of data / distribution + than another one. + Args: + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (list of axis objects, optional): axis where the graphs + are plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + kwargs: if dim_domain is 1, keyword arguments to be passed to the + matplotlib.pyplot.plot function; if dim_domain is 2, keyword + arguments to be passed to the matplotlib.pyplot.plot_surface + function. + Returns: + fig (figure object): figure object in which the depths will be + scattered. + """ + margin = 0.025 + width_aux_line = 0.35 + color_aux_line = "gray" + + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + self.fdata, fig, axes, n_rows, n_cols, + ) + + depth_dist1 = self.depth_method( + self.fdata, distribution=self.dist1, + ) + depth_dist2 = self.depth_method( + self.fdata, distribution=self.dist2, + ) + + if self.fdata.dim_domain == 1: + for i in range(self.fdata.dim_codomain): + axes[i].scatter( + depth_dist1, + depth_dist2, + **kwargs, + ) + + # Set labels of graph + fig.suptitle("DDPlot") + for axe in axes: + axe.set_xlabel("X depth") + axe.set_ylabel("Y depth") + axe.set_xlim( + [ + self.depth_method.min - margin, + self.depth_method.max + margin, + ], + ) + axe.set_ylim( + [ + self.depth_method.min - margin, + self.depth_method.max + margin, + ], + ) + axe.plot( + [0, 1], + linewidth=width_aux_line, + color=color_aux_line, + ) + + return fig From 2e80907f47ac4f1eea8684dee0061591ff30dc80 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 9 Feb 2021 14:56:06 +0100 Subject: [PATCH 088/417] corrrected error axes (line 56) --- skfda/exploratory/visualization/_ddplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index c7477014d..92ae52c03 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -53,7 +53,7 @@ def plot( chart: Optional[S] = None, *, fig: Optional[Figure] = None, - axes: List[Axes] = None, + axes: Optional[List[Axes]] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, **kwargs, From 822670d4188fcafefd6b173881ec96d439e40329 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 10 Feb 2021 10:57:34 +0100 Subject: [PATCH 089/417] changes --- skfda/exploratory/visualization/representation.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 3c621f533..a7e7e2c55 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -176,7 +176,7 @@ def plot( **kwargs: t.Any, ) -> Figure: """ - Plot the graph. + Plot the graph. Plots each coordinate separately. If the :term:`domain` is one dimensional, the plots will be curves, and if it is two @@ -185,7 +185,7 @@ def plot( criteria choosing the colors and a new one that displays the function with a gradient of colors depending on the initial gradient_color_list (normalized in gradient_list). - + Args: chart (figure object, axe or list of axes, optional): figure over with the graphs are plotted or axis over where the graphs are @@ -254,7 +254,8 @@ def plot( if self.gradient_list is None: sample_colors, patches = _get_color_info( - self.fdata, group, group_names, group_colors, legend, kwargs) + self.fdata, group, group_names, group_colors, legend, kwargs + ) else: patches = None colormap = matplotlib.cm.get_cmap(colormap_name) @@ -262,7 +263,7 @@ def plot( sample_colors = [None] * self.fdata.n_samples for i in range(self.fdata.n_samples): - sample_colors[i] = colormap.__call__(self.gradient_list[i]) + sample_colors[i] = colormap(self.gradient_list[i]) if self.fdata.dim_domain == 1: @@ -281,7 +282,7 @@ def plot( color_dict["color"] = sample_colors[j] axes[i].plot( - eval_points, mat[j, ..., i].T, **color_dict, **kwargs + eval_points, mat[j, ..., i].T, **color_dict, **kwargs, ) else: From 78f680852f640e843a857cb5f459e454e1969b54 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 10 Feb 2021 11:17:23 +0100 Subject: [PATCH 090/417] some issues solved --- .../visualization/representation.py | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index a7e7e2c55..f5d4cab49 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -16,7 +16,8 @@ T = t.TypeVar('T', FDataGrid, np.ndarray) S = t.TypeVar('S', int, tuple) -V = t.TypeVar('V', tuple, list) +V = t.TypeVar('V', tuple, t.List) +C = t.TypeVar('C', Figure, Axes, t.List[Axes]) def _get_label_colors(n_labels, group_colors=None): @@ -27,7 +28,7 @@ def _get_label_colors(n_labels, group_colors=None): raise ValueError( "There must be a color in group_colors " "for each of the labels that appear in " - "group." + "group.", ) else: colormap = matplotlib.cm.get_cmap() @@ -49,7 +50,7 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): if group_colors is not None: group_colors_array = np.array( - [group_colors[g] for g in group_unique] + [group_colors[g] for g in group_unique], ) else: prop_cycle = matplotlib.rcParams['axes.prop_cycle'] @@ -65,7 +66,7 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): if group_names is not None: group_names_array = np.array( - [group_names[g] for g in group_unique] + [group_names[g] for g in group_unique], ) elif legend is True: group_names_array = group_unique @@ -95,7 +96,6 @@ def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): class GraphPlot: - """ Class used to plot the FDatGrid object graph as hypersurfaces. @@ -160,9 +160,9 @@ def __init__( def plot( self, - chart: Figure = None, + chart: t.Optional[C] = None, *, - fig: Figure = None, + fig: t.Optional[Figure] = None, axes: t.List[Axes] = None, n_rows: t.Optional[int] = None, n_cols: t.Optional[int] = None, @@ -254,7 +254,7 @@ def plot( if self.gradient_list is None: sample_colors, patches = _get_color_info( - self.fdata, group, group_names, group_colors, legend, kwargs + self.fdata, group, group_names, group_colors, legend, kwargs, ) else: patches = None @@ -295,7 +295,7 @@ def plot( elif len(n_points) != 2: raise ValueError( f"n_points should be a number or a tuple of " - f"length 2, and has length {len(n_points)}" + f"length 2, and has length {len(n_points)}", ) # Axes where will be evaluated @@ -326,7 +326,6 @@ def plot( class ScatterPlot: - """ Class used to scatter the FDataGrid object. @@ -335,6 +334,7 @@ class ScatterPlot: grid_points (ndarray): points to plot. """ + def __init__( self, fdata: T, @@ -345,13 +345,12 @@ def __init__( def plot( self, - chart: Figure = None, + chart: t.Optional[C] = None, *, - fig: Figure = None, - axes: t.List[Axes] = None, + fig: t.Optional[Figure] = None, + axes: t.Optional[C] = None, n_rows: t.Optional[int] = None, n_cols: t.Optional[int] = None, - n_points: t.Optional[S] = None, domain_range: t.Optional[V] = None, group: t.List[int] = None, group_colors: t.List[t.Any] = None, @@ -423,7 +422,7 @@ def plot( fig, axes = _get_figure_and_axes(chart, fig, axes) fig, axes = _set_figure_layout_for_fdata( - self.fdata, fig, axes, n_rows, n_cols + self.fdata, fig, axes, n_rows, n_cols, ) if domain_range is None: From 701e2cbbf273fee3f79773e7f0a0479ffef72ab5 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 11 Feb 2021 16:13:10 +0100 Subject: [PATCH 091/417] outliegream done --- skfda/exploratory/visualization/__init__.py | 1 + .../exploratory/visualization/_outliergram.py | 153 ++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 skfda/exploratory/visualization/_outliergram.py diff --git a/skfda/exploratory/visualization/__init__.py b/skfda/exploratory/visualization/__init__.py index 838c653f2..799fdc05e 100644 --- a/skfda/exploratory/visualization/__init__.py +++ b/skfda/exploratory/visualization/__init__.py @@ -1,4 +1,5 @@ from . import clustering, representation from ._boxplot import Boxplot, SurfaceBoxplot from ._magnitude_shape_plot import MagnitudeShapePlot +from ._outliergram import Outliergram from .fpca import plot_fpca_perturbation_graphs diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py new file mode 100644 index 000000000..579c770bf --- /dev/null +++ b/skfda/exploratory/visualization/_outliergram.py @@ -0,0 +1,153 @@ +"""Outliergram Module. + +This module contains the methods used to plot shapes in order to detect +shape outliers in our dataset. In order to do this, we plot the +Modified Band Depth and Modified Epigraph Index, that will help us detect +this outliers. The motivation of the method is that it is easy to find +magnitude outliers, but there is a necessity of capturing this other type. +""" + +from typing import List, Optional, TypeVar + +import scipy.integrate as integrate +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from scipy.stats import rankdata + +from ..depth._depth import ModifiedBandDepth +from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata + +T = TypeVar('T') +S = TypeVar('S', Figure, Axes, List[Axes]) + + +class Outliergram: + """ + Outliergram method of visualization. + + Plots the Modified Band Depth (MBD) on the Y axis and the Modified + Epigraph Index (MEI) on the X axis. This points will create the form of + a parabola. The shape outliers will be the points that appear far from + this curve. + Args: + fdata: functional data set that we want to examine. + Attributes: + mbd: result of the calculation of the Modified Band Depth on our + dataset. Represents the mean time a curve stays between other pair + of curves, being a good measure of centrality. + mei: result of the calculation of the Modified Epigraph Index on our + dataset. Represents the mean time a curve stays below other curve. + References: + López-Pintado S., Romo J.. (2011). A half-region depth for functional + data, Computational Statistics & Data Analysis, volume 55 + (page 1679-1695). + Arribas-Gil A., Romo J.. Shape outlier detection and visualization for + functional data: the outliergram + https://academic.oup.com/biostatistics/article/15/4/603/266279 + """ + + def __init__( + self, + fdata: T, + ) -> None: + self.fdata = fdata + self.mbd = ModifiedBandDepth(self.fdata) + self.mei = self.modified_epigraph_index_list() + + def plot( + self, + chart: Optional[S] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[List[Axes]] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + **kwargs, + ) -> Figure: + """ + Plot Outliergram. + + Plots the Modified Band Depth (MBD) on the Y axis and the Modified + Epigraph Index (MEI) on the X axis. This points will create the form of + a parabola. The shape outliers will be the points that appear far from + this curve. + Args: + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (list of axis objects, optional): axis where the graphs + are plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + kwargs: if dim_domain is 1, keyword arguments to be passed to the + matplotlib.pyplot.plot function; if dim_domain is 2, keyword + arguments to be passed to the matplotlib.pyplot.plot_surface + function. + Returns: + fig (figure object): figure object in which the depths will be + scattered. + """ + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + self.fdata, fig, axes, n_rows, n_cols, + ) + + if self.fdata.dim_domain == 1: + + for i in range(self.fdata.dim_codomain): + + axes[i].scatter( + self.mei, + self.mbd, + **kwargs, + ) + + # Set labels of graph + fig.suptitle("Outliergram") + for axe in Axes: + axe.set_xlabel("MEI") + axe.set_ylabel("MBD") + axe.set_xlim([0, 1]) + axe.set_ylim([ + ModifiedBandDepth().min, + ModifiedBandDepth().max, + ]) + + return fig + + def modified_epigraph_index_list(self): + """ + Calculate the Modified Epigraph Index of a FData. + + The MEI represents the mean time a curve stays below other curve. + In this case we will calculate the MEI for each curve in relation + with all the other curves of our dataset. + """ + interval_len = ( + self.fdata.domain_range()[0][1] + - self.fdata.domain_range()[0][0] + ) + + function = rankdata( + -self.fdata.data_matrix, + method='max', + axis=0, + ) - 1 + + integrand = integrate.simps( + function, + x=self.fdata.grid_points[0], + axis=1, + ) + + integrand /= (interval_len * self.fdata.n_samples) + + return integrand From 4af8b68aa700eb0040be364778aab120f54e8b3e Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 11 Feb 2021 17:00:44 +0100 Subject: [PATCH 092/417] works --- skfda/exploratory/visualization/_outliergram.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 579c770bf..30a4c45c4 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -51,7 +51,9 @@ def __init__( fdata: T, ) -> None: self.fdata = fdata - self.mbd = ModifiedBandDepth(self.fdata) + self.depth = ModifiedBandDepth() + self.depth.fit(fdata) + self.mbd = self.depth(fdata) self.mei = self.modified_epigraph_index_list() def plot( @@ -112,13 +114,13 @@ def plot( # Set labels of graph fig.suptitle("Outliergram") - for axe in Axes: + for axe in axes: axe.set_xlabel("MEI") axe.set_ylabel("MBD") axe.set_xlim([0, 1]) axe.set_ylim([ - ModifiedBandDepth().min, - ModifiedBandDepth().max, + self.depth.min, + self.depth.max, ]) return fig @@ -132,8 +134,8 @@ def modified_epigraph_index_list(self): with all the other curves of our dataset. """ interval_len = ( - self.fdata.domain_range()[0][1] - - self.fdata.domain_range()[0][0] + self.fdata.domain_range[0][1] + - self.fdata.domain_range[0][0] ) function = rankdata( From 43c6360dbe87e213abdad1f22bf77eb2d518061f Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 13 Feb 2021 15:18:36 +0100 Subject: [PATCH 093/417] phaseplaneplot --- skfda/exploratory/visualization/__init__.py | 1 + .../visualization/_phase_plane_plot.py | 79 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 skfda/exploratory/visualization/_phase_plane_plot.py diff --git a/skfda/exploratory/visualization/__init__.py b/skfda/exploratory/visualization/__init__.py index 838c653f2..acb79b6ab 100644 --- a/skfda/exploratory/visualization/__init__.py +++ b/skfda/exploratory/visualization/__init__.py @@ -1,4 +1,5 @@ from . import clustering, representation from ._boxplot import Boxplot, SurfaceBoxplot from ._magnitude_shape_plot import MagnitudeShapePlot +from ._phase_plane_plot import PhasePlanePlot from .fpca import plot_fpca_perturbation_graphs diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py new file mode 100644 index 000000000..f6f9fb387 --- /dev/null +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -0,0 +1,79 @@ +from typing import List, Optional, TypeVar + +from matplotlib.axes import Axes +from matplotlib.figure import Figure + +from ._utils import ( + _get_figure_and_axes, + _set_figure_layout, + _set_figure_layout_for_fdata, +) + +T = TypeVar('T') +S = TypeVar('S', Figure, Axes, List[Axes]) + + +class PhasePlanePlot: + + def __init__( + self, + fdata1: T, + fdata2: Optional[T] = None, + ) -> None: + self.fdata1 = fdata1 + self.fdata2 = fdata2 + + def plot( + self, + chart: Optional[S] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[List[Axes]] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + **kwargs, + ) -> Figure: + fig, axes = _get_figure_and_axes(chart, fig, axes) + + if ( + self.fdata1.dim_domain == 1 + and self.fdata1.dim_codomain == 2 + and self.fdata2 is None + ): + fig, axes = _set_figure_layout( + fig, + axes, + dim=self.fdata1.dim_domain + 1, + n_axes=1, + ) + axes[0].plot( + self.fdata1.data_matrix[0][0].tolist(), + self.fdata1.data_matrix[0][1].tolist(), + **kwargs, + ) + + elif ( + self.fdata1.dim_domain == self.fdata2.dim_domain + and self.fdata1.dim_codomain == self.fdata2.dim_codomain + and self.fdata1.dim_domain == 1 + and self.fdata1.dim_codomain == 1 + ): + fig, axes = _set_figure_layout_for_fdata( + self.fdata1, fig, axes, + ) + axes[0].plot( + self.fdata1.data_matrix[0].tolist(), + self.fdata2.data_matrix[0].tolist(), + **kwargs, + ) + + else: + raise ValueError( + "Error in data arguments", + ) + + fig.suptitle("Phase-Plane Plot") + axes[0].set_xlabel("Function 1") + axes[0].set_ylabel("Function 2") + + return fig \ No newline at end of file From 5177e63f5f71d9b724150270d1eda3e5f25240fb Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 13 Feb 2021 15:19:50 +0100 Subject: [PATCH 094/417] correct errrors --- skfda/exploratory/visualization/_phase_plane_plot.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py index f6f9fb387..f62c6e05e 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -36,14 +36,14 @@ def plot( fig, axes = _get_figure_and_axes(chart, fig, axes) if ( - self.fdata1.dim_domain == 1 + self.fdata1.dim_domain == 1 and self.fdata1.dim_codomain == 2 and self.fdata2 is None ): fig, axes = _set_figure_layout( - fig, - axes, - dim=self.fdata1.dim_domain + 1, + fig, + axes, + dim=self.fdata1.dim_domain + 1, n_axes=1, ) axes[0].plot( @@ -76,4 +76,4 @@ def plot( axes[0].set_xlabel("Function 1") axes[0].set_ylabel("Function 2") - return fig \ No newline at end of file + return fig From f5716a93b1ab09d89117da30e12a244a2e4a9ffd Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 13 Feb 2021 17:44:27 +0100 Subject: [PATCH 095/417] errors corrected --- skfda/exploratory/visualization/__init__.py | 1 + skfda/exploratory/visualization/_ddplot.py | 86 ++++++++++----------- 2 files changed, 40 insertions(+), 47 deletions(-) diff --git a/skfda/exploratory/visualization/__init__.py b/skfda/exploratory/visualization/__init__.py index 838c653f2..75c7749eb 100644 --- a/skfda/exploratory/visualization/__init__.py +++ b/skfda/exploratory/visualization/__init__.py @@ -1,4 +1,5 @@ from . import clustering, representation from ._boxplot import Boxplot, SurfaceBoxplot +from ._ddplot import DDPlot from ._magnitude_shape_plot import MagnitudeShapePlot from .fpca import plot_fpca_perturbation_graphs diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 92ae52c03..6d564b5c8 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -5,7 +5,7 @@ a scatter plot is created of this two variables. """ -from typing import List, Optional, TypeVar +from typing import List, Optional, TypeVar, Union from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -14,7 +14,6 @@ from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata T = TypeVar('T') -S = TypeVar('S', Figure, Axes, List[Axes]) class DDPlot: @@ -33,6 +32,11 @@ class DDPlot: we want to use to compute the depth (Depth Y). depth_method: method that will be used to compute the depths of the data with respect to the distributions. + Attributes: + depth_dist1: result of the calculation of the depth_method into our + first distribution (dist1). + depth_dist2: result of the calculation of the depth_method into our + second distribution (dist2). """ def __init__( @@ -43,19 +47,21 @@ def __init__( depth_method: Depth[T], ) -> None: self.fdata = fdata - self.dist1 = dist1 - self.dist2 = dist2 self.depth_method = depth_method self.depth_method.fit(fdata) + self.depth_dist1 = self.depth_method( + self.fdata, distribution=dist1, + ) + self.depth_dist2 = self.depth_method( + self.fdata, distribution=dist2, + ) def plot( self, - chart: Optional[S] = None, + chart: Union[Figure, Axes, List[Axes]] = None, *, fig: Optional[Figure] = None, axes: Optional[List[Axes]] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, **kwargs, ) -> Figure: """ @@ -75,12 +81,6 @@ def plot( None, the figure is initialized. axes (list of axis objects, optional): axis where the graphs are plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols(int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the matplotlib.pyplot.plot_surface @@ -95,45 +95,37 @@ def plot( fig, axes = _get_figure_and_axes(chart, fig, axes) fig, axes = _set_figure_layout_for_fdata( - self.fdata, fig, axes, n_rows, n_cols, + self.fdata, fig, axes, ) - depth_dist1 = self.depth_method( - self.fdata, distribution=self.dist1, - ) - depth_dist2 = self.depth_method( - self.fdata, distribution=self.dist2, - ) + axe = axes[0] - if self.fdata.dim_domain == 1: - for i in range(self.fdata.dim_codomain): - axes[i].scatter( - depth_dist1, - depth_dist2, - **kwargs, - ) + axe.scatter( + self.depth_dist1, + self.depth_dist2, + **kwargs, + ) # Set labels of graph fig.suptitle("DDPlot") - for axe in axes: - axe.set_xlabel("X depth") - axe.set_ylabel("Y depth") - axe.set_xlim( - [ - self.depth_method.min - margin, - self.depth_method.max + margin, - ], - ) - axe.set_ylim( - [ - self.depth_method.min - margin, - self.depth_method.max + margin, - ], - ) - axe.plot( - [0, 1], - linewidth=width_aux_line, - color=color_aux_line, - ) + axe.set_xlabel("X depth") + axe.set_ylabel("Y depth") + axe.set_xlim( + [ + self.depth_method.min - margin, + self.depth_method.max + margin, + ], + ) + axe.set_ylim( + [ + self.depth_method.min - margin, + self.depth_method.max + margin, + ], + ) + axe.plot( + [0, 1], + linewidth=width_aux_line, + color=color_aux_line, + ) return fig From fc330601493daac56bdadd8bcc04cb166c5f06ab Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 13 Feb 2021 17:52:35 +0100 Subject: [PATCH 096/417] corrections --- skfda/exploratory/visualization/__init__.py | 2 ++ skfda/exploratory/visualization/_ddplot.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/__init__.py b/skfda/exploratory/visualization/__init__.py index 75c7749eb..be67b3b80 100644 --- a/skfda/exploratory/visualization/__init__.py +++ b/skfda/exploratory/visualization/__init__.py @@ -1,3 +1,5 @@ +"""Initialization module of visualization folder.""" + from . import clustering, representation from ._boxplot import Boxplot, SurfaceBoxplot from ._ddplot import DDPlot diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 6d564b5c8..f739c4887 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -61,7 +61,7 @@ def plot( chart: Union[Figure, Axes, List[Axes]] = None, *, fig: Optional[Figure] = None, - axes: Optional[List[Axes]] = None, + axes: List[Axes] = None, **kwargs, ) -> Figure: """ From 1c9335162b800c0ed3d1b8f8e14685c5c605f8db Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 13 Feb 2021 18:13:13 +0100 Subject: [PATCH 097/417] corrections --- .../exploratory/visualization/_outliergram.py | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 30a4c45c4..cd2ad1efc 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -14,10 +14,10 @@ from matplotlib.figure import Figure from scipy.stats import rankdata +from ... import FDataGrid from ..depth._depth import ModifiedBandDepth from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata -T = TypeVar('T') S = TypeVar('S', Figure, Axes, List[Axes]) @@ -48,7 +48,7 @@ class Outliergram: def __init__( self, - fdata: T, + fdata: FDataGrid, ) -> None: self.fdata = fdata self.depth = ModifiedBandDepth() @@ -102,30 +102,27 @@ def plot( self.fdata, fig, axes, n_rows, n_cols, ) - if self.fdata.dim_domain == 1: + axe = axes[0] - for i in range(self.fdata.dim_codomain): - - axes[i].scatter( - self.mei, - self.mbd, - **kwargs, - ) + axe.scatter( + self.mei, + self.mbd, + **kwargs, + ) # Set labels of graph fig.suptitle("Outliergram") - for axe in axes: - axe.set_xlabel("MEI") - axe.set_ylabel("MBD") - axe.set_xlim([0, 1]) - axe.set_ylim([ - self.depth.min, - self.depth.max, - ]) + axe.set_xlabel("MEI") + axe.set_ylabel("MBD") + axe.set_xlim([0, 1]) + axe.set_ylim([ + self.depth.min, + self.depth.max, + ]) return fig - def modified_epigraph_index_list(self): + def modified_epigraph_index_list(self) -> List[float]: """ Calculate the Modified Epigraph Index of a FData. From b482f6e7e2fc32f15599e1eb2a21a7ed89ffc3ad Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 13 Feb 2021 18:37:28 +0100 Subject: [PATCH 098/417] functions called in _ddplot are typed --- skfda/exploratory/visualization/_utils.py | 27 +++++++++++++++++------ 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 021f11832..1fb6428fb 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,13 +1,16 @@ import io import math import re +from typing import List, Optional, Tuple, Union, TypeVar -import matplotlib.axes +import matplotlib.axes as a import matplotlib.backends.backend_svg -import matplotlib.figure - +import matplotlib.figure as f import matplotlib.pyplot as plt +T = TypeVar('T') + + non_close_text = '[^>]*?' svg_width_regex = re.compile( f'()') @@ -43,7 +46,11 @@ def _figure_to_svg(figure): return new_data -def _get_figure_and_axes(chart=None, fig=None, axes=None): +def _get_figure_and_axes( + chart: Union[f.Figure, a.Axes, List[a.Axes]] = None, + fig: Optional[f.Figure] = None, + axes: List[a.Axes] = None, +) -> Tuple[f.Figure, a.Axes]: """Obtain the figure and axes from the arguments.""" num_defined = sum(e is not None for e in (chart, fig, axes)) @@ -160,8 +167,13 @@ def _set_figure_layout(fig=None, axes=None, return fig, axes -def _set_figure_layout_for_fdata(fdata, fig=None, axes=None, - n_rows=None, n_cols=None): +def _set_figure_layout_for_fdata( + fdata: T, + fig: Optional[f.Figure] = None, + axes: List[a.Axes] = None, + n_rows: int = None, + n_cols: int = None, +) -> Tuple[f.Figure, a.Axes]: """Set the figure axes for plotting a :class:`~skfda.representation.FData` object. @@ -242,8 +254,9 @@ def _change_luminosity(color, amount=0.5): Note: Based on https://stackoverflow.com/a/49601444/2455333 """ - import matplotlib.colors as mc import colorsys + + import matplotlib.colors as mc try: c = mc.cnames[color] except TypeError: From 30664f991b8669eec105e949607f9c727ac0d093 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 13 Feb 2021 18:45:35 +0100 Subject: [PATCH 099/417] correction --- skfda/exploratory/visualization/_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 1fb6428fb..197ffea0f 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -171,8 +171,8 @@ def _set_figure_layout_for_fdata( fdata: T, fig: Optional[f.Figure] = None, axes: List[a.Axes] = None, - n_rows: int = None, - n_cols: int = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, ) -> Tuple[f.Figure, a.Axes]: """Set the figure axes for plotting a :class:`~skfda.representation.FData` object. From 32d0b9bfba009b6ff5b8dbb7c8fb3e9455fb51a0 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 14:50:54 +0100 Subject: [PATCH 100/417] correctyion --- skfda/exploratory/visualization/_ddplot.py | 6 ++---- skfda/exploratory/visualization/_utils.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index f739c4887..bf3893c7c 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -61,7 +61,7 @@ def plot( chart: Union[Figure, Axes, List[Axes]] = None, *, fig: Optional[Figure] = None, - axes: List[Axes] = None, + axe: Optional[Axes] = None, **kwargs, ) -> Figure: """ @@ -79,7 +79,7 @@ def plot( fig (figure object, optional): figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (list of axis objects, optional): axis where the graphs + axes (axis, optional): axis where the graphs are plotted. If None, see param fig. kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword @@ -98,8 +98,6 @@ def plot( self.fdata, fig, axes, ) - axe = axes[0] - axe.scatter( self.depth_dist1, self.depth_dist2, diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 197ffea0f..8bb8203bc 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -49,7 +49,7 @@ def _figure_to_svg(figure): def _get_figure_and_axes( chart: Union[f.Figure, a.Axes, List[a.Axes]] = None, fig: Optional[f.Figure] = None, - axes: List[a.Axes] = None, + axes: Union[a.Axes, List[a.Axes]] = None, ) -> Tuple[f.Figure, a.Axes]: """Obtain the figure and axes from the arguments.""" From 61e96863e22a8d3fc4ae9d8fcf8533ed24631cf2 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 14:52:20 +0100 Subject: [PATCH 101/417] corrected again --- skfda/exploratory/visualization/_ddplot.py | 6 +++--- skfda/exploratory/visualization/_utils.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index bf3893c7c..2a04cab9d 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -93,9 +93,9 @@ def plot( width_aux_line = 0.35 color_aux_line = "gray" - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - self.fdata, fig, axes, + fig, axe = _get_figure_and_axes(chart, fig, axe) + fig, axe = _set_figure_layout_for_fdata( + self.fdata, fig, axe, ) axe.scatter( diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 8bb8203bc..67a230c2e 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -49,7 +49,7 @@ def _figure_to_svg(figure): def _get_figure_and_axes( chart: Union[f.Figure, a.Axes, List[a.Axes]] = None, fig: Optional[f.Figure] = None, - axes: Union[a.Axes, List[a.Axes]] = None, + axes: Union[a.Axes, List[a.Axes]] = None, ) -> Tuple[f.Figure, a.Axes]: """Obtain the figure and axes from the arguments.""" @@ -170,7 +170,7 @@ def _set_figure_layout(fig=None, axes=None, def _set_figure_layout_for_fdata( fdata: T, fig: Optional[f.Figure] = None, - axes: List[a.Axes] = None, + axes: Union[a.Axes, List[a.Axes]] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, ) -> Tuple[f.Figure, a.Axes]: From 6154749183bea402f3f1956a391b4a43abb2a9e9 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 15:02:05 +0100 Subject: [PATCH 102/417] corrections --- skfda/exploratory/visualization/_ddplot.py | 20 ++++++++--------- skfda/exploratory/visualization/_utils.py | 25 +++++++++++----------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 2a04cab9d..7ec4f3f5a 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -61,7 +61,7 @@ def plot( chart: Union[Figure, Axes, List[Axes]] = None, *, fig: Optional[Figure] = None, - axe: Optional[Axes] = None, + ax: Axes = None, **kwargs, ) -> Figure: """ @@ -93,12 +93,12 @@ def plot( width_aux_line = 0.35 color_aux_line = "gray" - fig, axe = _get_figure_and_axes(chart, fig, axe) - fig, axe = _set_figure_layout_for_fdata( - self.fdata, fig, axe, + fig, ax = _get_figure_and_axes(chart, fig, ax) + fig, ax = _set_figure_layout_for_fdata( + self.fdata, fig, ax, ) - axe.scatter( + ax.scatter( self.depth_dist1, self.depth_dist2, **kwargs, @@ -106,21 +106,21 @@ def plot( # Set labels of graph fig.suptitle("DDPlot") - axe.set_xlabel("X depth") - axe.set_ylabel("Y depth") - axe.set_xlim( + ax.set_xlabel("X depth") + ax.set_ylabel("Y depth") + ax.set_xlim( [ self.depth_method.min - margin, self.depth_method.max + margin, ], ) - axe.set_ylim( + ax.set_ylim( [ self.depth_method.min - margin, self.depth_method.max + margin, ], ) - axe.plot( + ax.plot( [0, 1], linewidth=width_aux_line, color=color_aux_line, diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 67a230c2e..56f5bc7a8 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,15 +1,14 @@ import io import math import re -from typing import List, Optional, Tuple, Union, TypeVar +from typing import List, Optional, Tuple, Union -import matplotlib.axes as a +import matplotlib.axes as axes import matplotlib.backends.backend_svg -import matplotlib.figure as f +import matplotlib.figure as figure import matplotlib.pyplot as plt -T = TypeVar('T') - +from ...representation._functional_data import FData non_close_text = '[^>]*?' svg_width_regex = re.compile( @@ -47,10 +46,10 @@ def _figure_to_svg(figure): def _get_figure_and_axes( - chart: Union[f.Figure, a.Axes, List[a.Axes]] = None, - fig: Optional[f.Figure] = None, - axes: Union[a.Axes, List[a.Axes]] = None, -) -> Tuple[f.Figure, a.Axes]: + chart: Union[figure.Figure, axes.Axes, List[axes.Axes]] = None, + fig: Optional[figure.Figure] = None, + axes: Union[axes.Axes, List[axes.Axes]] = None, +) -> Tuple[figure.Figure, axes.Axes]: """Obtain the figure and axes from the arguments.""" num_defined = sum(e is not None for e in (chart, fig, axes)) @@ -168,12 +167,12 @@ def _set_figure_layout(fig=None, axes=None, def _set_figure_layout_for_fdata( - fdata: T, - fig: Optional[f.Figure] = None, - axes: Union[a.Axes, List[a.Axes]] = None, + fdata: FData, + fig: Optional[figure.Figure] = None, + axes: Union[axes.Axes, List[axes.Axes]] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, -) -> Tuple[f.Figure, a.Axes]: +) -> Tuple[figure.Figure, axes.Axes]: """Set the figure axes for plotting a :class:`~skfda.representation.FData` object. From 584ab0e53c5860d9e53b0bdd676247b1e90ba2ef Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 15:18:19 +0100 Subject: [PATCH 103/417] changes --- skfda/exploratory/visualization/_ddplot.py | 10 ++++++---- skfda/exploratory/visualization/_utils.py | 8 ++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 7ec4f3f5a..bbb73d511 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -61,7 +61,7 @@ def plot( chart: Union[Figure, Axes, List[Axes]] = None, *, fig: Optional[Figure] = None, - ax: Axes = None, + axes: Optional[List[Axes]] = None, **kwargs, ) -> Figure: """ @@ -93,11 +93,13 @@ def plot( width_aux_line = 0.35 color_aux_line = "gray" - fig, ax = _get_figure_and_axes(chart, fig, ax) - fig, ax = _set_figure_layout_for_fdata( - self.fdata, fig, ax, + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + self.fdata, fig, axes, ) + ax = axes[0] + ax.scatter( self.depth_dist1, self.depth_dist2, diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 56f5bc7a8..8aa75b2cf 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -48,8 +48,8 @@ def _figure_to_svg(figure): def _get_figure_and_axes( chart: Union[figure.Figure, axes.Axes, List[axes.Axes]] = None, fig: Optional[figure.Figure] = None, - axes: Union[axes.Axes, List[axes.Axes]] = None, -) -> Tuple[figure.Figure, axes.Axes]: + axes: List[axes.Axes] = None, +) -> Tuple[figure.Figure, List[axes.Axes]]: """Obtain the figure and axes from the arguments.""" num_defined = sum(e is not None for e in (chart, fig, axes)) @@ -169,10 +169,10 @@ def _set_figure_layout(fig=None, axes=None, def _set_figure_layout_for_fdata( fdata: FData, fig: Optional[figure.Figure] = None, - axes: Union[axes.Axes, List[axes.Axes]] = None, + axes: List[axes.Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, -) -> Tuple[figure.Figure, axes.Axes]: +) -> Tuple[figure.Figure, List[axes.Axes]]: """Set the figure axes for plotting a :class:`~skfda.representation.FData` object. From d59ef5720bd8c29ab0092b1d9a42bd4dc5e939de Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 15:32:15 +0100 Subject: [PATCH 104/417] cahgnes --- skfda/exploratory/visualization/_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 8aa75b2cf..32e7ee72b 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -48,7 +48,7 @@ def _figure_to_svg(figure): def _get_figure_and_axes( chart: Union[figure.Figure, axes.Axes, List[axes.Axes]] = None, fig: Optional[figure.Figure] = None, - axes: List[axes.Axes] = None, + axes: Optional[List[axes.Axes]] = None, ) -> Tuple[figure.Figure, List[axes.Axes]]: """Obtain the figure and axes from the arguments.""" @@ -169,7 +169,7 @@ def _set_figure_layout(fig=None, axes=None, def _set_figure_layout_for_fdata( fdata: FData, fig: Optional[figure.Figure] = None, - axes: List[axes.Axes] = None, + axes: Optional[List[axes.Axes]] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, ) -> Tuple[figure.Figure, List[axes.Axes]]: From d0f2baf91476cd3994431dcd3afb06f2a04000ea Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 15:43:01 +0100 Subject: [PATCH 105/417] changed wps442 --- skfda/exploratory/visualization/_utils.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 32e7ee72b..d3a117616 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -3,10 +3,10 @@ import re from typing import List, Optional, Tuple, Union -import matplotlib.axes as axes import matplotlib.backends.backend_svg -import matplotlib.figure as figure import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure from ...representation._functional_data import FData @@ -46,10 +46,10 @@ def _figure_to_svg(figure): def _get_figure_and_axes( - chart: Union[figure.Figure, axes.Axes, List[axes.Axes]] = None, - fig: Optional[figure.Figure] = None, - axes: Optional[List[axes.Axes]] = None, -) -> Tuple[figure.Figure, List[axes.Axes]]: + chart: Union[Figure, Axes, List[Axes]] = None, + fig: Optional[Figure] = None, + axes: Optional[List[Axes]] = None, +) -> Tuple[Figure, List[Axes]]: """Obtain the figure and axes from the arguments.""" num_defined = sum(e is not None for e in (chart, fig, axes)) @@ -168,11 +168,11 @@ def _set_figure_layout(fig=None, axes=None, def _set_figure_layout_for_fdata( fdata: FData, - fig: Optional[figure.Figure] = None, - axes: Optional[List[axes.Axes]] = None, + fig: Optional[Figure] = None, + axes: Optional[List[Axes]] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, -) -> Tuple[figure.Figure, List[axes.Axes]]: +) -> Tuple[Figure, List[Axes]]: """Set the figure axes for plotting a :class:`~skfda.representation.FData` object. From 439d585a6c2732e36630fda1cc30e7dc35edbaa0 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 17:45:15 +0100 Subject: [PATCH 106/417] changes --- skfda/exploratory/visualization/_ddplot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index bbb73d511..9e26e74b0 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -13,7 +13,7 @@ from ...exploratory.depth.multivariate import Depth from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata -T = TypeVar('T') +T = TypeVar('T', bound="FData") class DDPlot: @@ -61,7 +61,7 @@ def plot( chart: Union[Figure, Axes, List[Axes]] = None, *, fig: Optional[Figure] = None, - axes: Optional[List[Axes]] = None, + axes: Optional[Sequence[Axes]] = None, **kwargs, ) -> Figure: """ @@ -92,7 +92,7 @@ def plot( margin = 0.025 width_aux_line = 0.35 color_aux_line = "gray" - + #List axes fig, axes = _get_figure_and_axes(chart, fig, axes) fig, axes = _set_figure_layout_for_fdata( self.fdata, fig, axes, From 0790cfdb6902fc7e22a06f798691bcbbd6c35ff8 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 20:31:43 +0100 Subject: [PATCH 107/417] phaseplane --- .../visualization/_phase_plane_plot.py | 52 ++++++++----------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py index f62c6e05e..4eca167bb 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -2,14 +2,13 @@ from matplotlib.axes import Axes from matplotlib.figure import Figure +from ...representation import FData from ._utils import ( _get_figure_and_axes, - _set_figure_layout, _set_figure_layout_for_fdata, ) -T = TypeVar('T') S = TypeVar('S', Figure, Axes, List[Axes]) @@ -17,8 +16,8 @@ class PhasePlanePlot: def __init__( self, - fdata1: T, - fdata2: Optional[T] = None, + fdata1: FData, + fdata2: Optional[FData] = None, ) -> None: self.fdata1 = fdata1 self.fdata2 = fdata2 @@ -29,44 +28,39 @@ def plot( *, fig: Optional[Figure] = None, axes: Optional[List[Axes]] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, **kwargs, ) -> Figure: fig, axes = _get_figure_and_axes(chart, fig, axes) if ( - self.fdata1.dim_domain == 1 - and self.fdata1.dim_codomain == 2 - and self.fdata2 is None + self.fdata2 is not None ): - fig, axes = _set_figure_layout( - fig, - axes, - dim=self.fdata1.dim_domain + 1, - n_axes=1, - ) - axes[0].plot( - self.fdata1.data_matrix[0][0].tolist(), - self.fdata1.data_matrix[0][1].tolist(), - **kwargs, - ) + if ( + self.fdata1.dim_domain == self.fdata2.dim_domain + and self.fdata1.dim_codomain == self.fdata2.dim_codomain + and self.fdata1.dim_domain == 1 + and self.fdata1.dim_codomain == 1 + ): + fd = self.fdata1.concatenate() + else: + raise ValueError( + "Error in data arguments", + ) + else: + fd = self.fdata1 - elif ( - self.fdata1.dim_domain == self.fdata2.dim_domain - and self.fdata1.dim_codomain == self.fdata2.dim_codomain - and self.fdata1.dim_domain == 1 - and self.fdata1.dim_codomain == 1 + if ( + fd.dim_domain == 1 + and fd.dim_codomain == 2 ): fig, axes = _set_figure_layout_for_fdata( - self.fdata1, fig, axes, + fd, fig, axes, ) axes[0].plot( - self.fdata1.data_matrix[0].tolist(), - self.fdata2.data_matrix[0].tolist(), + fd.data_matrix[0][0].tolist(), + fd.data_matrix[0][1].tolist(), **kwargs, ) - else: raise ValueError( "Error in data arguments", From 2edc7bb8667bb847c2aeaaffb557fab95b3788a3 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 20:52:23 +0100 Subject: [PATCH 108/417] changes --- skfda/exploratory/visualization/_ddplot.py | 23 ++++++++++++---------- skfda/exploratory/visualization/_utils.py | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 9e26e74b0..56451126e 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -11,9 +11,10 @@ from matplotlib.figure import Figure from ...exploratory.depth.multivariate import Depth +from ...representation._functional_data import FData from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata -T = TypeVar('T', bound="FData") +T = TypeVar('T', bound=FData) class DDPlot: @@ -61,7 +62,7 @@ def plot( chart: Union[Figure, Axes, List[Axes]] = None, *, fig: Optional[Figure] = None, - axes: Optional[Sequence[Axes]] = None, + ax: Optional[Axes] = None, **kwargs, ) -> Figure: """ @@ -92,15 +93,17 @@ def plot( margin = 0.025 width_aux_line = 0.35 color_aux_line = "gray" - #List axes + + axes = [] + axes.append(ax) fig, axes = _get_figure_and_axes(chart, fig, axes) fig, axes = _set_figure_layout_for_fdata( self.fdata, fig, axes, ) - ax = axes[0] + ax_fig = axes[0] - ax.scatter( + ax_fig.scatter( self.depth_dist1, self.depth_dist2, **kwargs, @@ -108,21 +111,21 @@ def plot( # Set labels of graph fig.suptitle("DDPlot") - ax.set_xlabel("X depth") - ax.set_ylabel("Y depth") - ax.set_xlim( + ax_fig.set_xlabel("X depth") + ax_fig.set_ylabel("Y depth") + ax_fig.set_xlim( [ self.depth_method.min - margin, self.depth_method.max + margin, ], ) - ax.set_ylim( + ax_fig.set_ylim( [ self.depth_method.min - margin, self.depth_method.max + margin, ], ) - ax.plot( + ax_fig.plot( [0, 1], linewidth=width_aux_line, color=color_aux_line, diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index d3a117616..35122d50c 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,7 +1,7 @@ import io import math import re -from typing import List, Optional, Tuple, Union +from typing import Optional, List, Tuple, Union import matplotlib.backends.backend_svg import matplotlib.pyplot as plt From c0e80525e6ea3f1d7ea6daf2845decaa7321d59c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Feb 2021 20:52:30 +0100 Subject: [PATCH 109/417] cahgnes2 --- skfda/exploratory/visualization/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 35122d50c..167116f59 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -47,7 +47,7 @@ def _figure_to_svg(figure): def _get_figure_and_axes( chart: Union[Figure, Axes, List[Axes]] = None, - fig: Optional[Figure] = None, + fig: Optional[Figure] = None, axes: Optional[List[Axes]] = None, ) -> Tuple[Figure, List[Axes]]: """Obtain the figure and axes from the arguments.""" From 3ec4006e0e495834dca1c90d20c2ef65954e5032 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 16 Feb 2021 13:12:17 +0100 Subject: [PATCH 110/417] changes --- skfda/exploratory/visualization/_ddplot.py | 20 +++++++++----------- skfda/exploratory/visualization/_utils.py | 14 +++++++------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 56451126e..ada5a3594 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -80,7 +80,7 @@ def plot( fig (figure object, optional): figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (axis, optional): axis where the graphs + ax (axis, optional): axis where the graphs are plotted. If None, see param fig. kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword @@ -94,16 +94,14 @@ def plot( width_aux_line = 0.35 color_aux_line = "gray" - axes = [] - axes.append(ax) - fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _get_figure_and_axes(chart, fig, ax) fig, axes = _set_figure_layout_for_fdata( self.fdata, fig, axes, ) - ax_fig = axes[0] + ax = axes[0] - ax_fig.scatter( + ax.scatter( self.depth_dist1, self.depth_dist2, **kwargs, @@ -111,21 +109,21 @@ def plot( # Set labels of graph fig.suptitle("DDPlot") - ax_fig.set_xlabel("X depth") - ax_fig.set_ylabel("Y depth") - ax_fig.set_xlim( + ax.set_xlabel("X depth") + ax.set_ylabel("Y depth") + ax.set_xlim( [ self.depth_method.min - margin, self.depth_method.max + margin, ], ) - ax_fig.set_ylim( + ax.set_ylim( [ self.depth_method.min - margin, self.depth_method.max + margin, ], ) - ax_fig.plot( + ax.plot( [0, 1], linewidth=width_aux_line, color=color_aux_line, diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 167116f59..2097adebf 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -48,7 +48,7 @@ def _figure_to_svg(figure): def _get_figure_and_axes( chart: Union[Figure, Axes, List[Axes]] = None, fig: Optional[Figure] = None, - axes: Optional[List[Axes]] = None, + axes: Optional[Tuple[Axes, List[Axes]]] = None, ) -> Tuple[Figure, List[Axes]]: """Obtain the figure and axes from the arguments.""" @@ -62,22 +62,22 @@ def _get_figure_and_axes( if isinstance(chart, matplotlib.figure.Figure): fig = chart else: - axes = chart + axes_r = chart if fig is None and axes is None: fig = _create_figure() - axes = [] + axes_r = [] elif fig is not None: - axes = fig.axes + axes_r = fig.axes else: if isinstance(axes, matplotlib.axes.Axes): - axes = [axes] + axes_r = [axes] - fig = axes[0].figure + fig = axes_r[0].figure - return fig, axes + return fig, axes_r def _get_axes_shape(n_axes, n_rows=None, n_cols=None): From 750b5537edab85ab69740d52f5c61d3a9b540f6c Mon Sep 17 00:00:00 2001 From: mellamansanchez <38490771+mellamansanchez@users.noreply.github.com> Date: Tue, 16 Feb 2021 16:02:29 +0100 Subject: [PATCH 111/417] Update skfda/exploratory/visualization/_ddplot.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Ramos Carreño --- skfda/exploratory/visualization/_ddplot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index ada5a3594..1bde6180b 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -73,14 +73,14 @@ def plot( our data is more related with one subset of data / distribution than another one. Args: - chart (figure object, axe or list of axes, optional): figure over + chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - fig (figure object, optional): figure over with the graphs are + fig: figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - ax (axis, optional): axis where the graphs + ax: axis where the graphs are plotted. If None, see param fig. kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword From bfb88380497503f61e11343142ebe6297f8a69b8 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 16 Feb 2021 16:17:44 +0100 Subject: [PATCH 112/417] changed --- skfda/exploratory/visualization/_ddplot.py | 18 ++++++++-------- skfda/exploratory/visualization/_utils.py | 24 +++++++++++----------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 1bde6180b..7c0be7c02 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -5,7 +5,7 @@ a scatter plot is created of this two variables. """ -from typing import List, Optional, TypeVar, Union +from typing import Optional, TypeVar, Union from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -59,7 +59,7 @@ def __init__( def plot( self, - chart: Union[Figure, Axes, List[Axes]] = None, + chart: Union[Figure, Axes, None] = None, *, fig: Optional[Figure] = None, ax: Optional[Axes] = None, @@ -73,15 +73,13 @@ def plot( our data is more related with one subset of data / distribution than another one. Args: - chart: figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - fig: figure over with the graphs are - plotted in case ax is not specified. If None and ax is also + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also None, the figure is initialized. - ax: axis where the graphs - are plotted. If None, see param fig. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + ax: axis where the graphs are plotted. If None, see param fig. kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the matplotlib.pyplot.plot_surface diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 2097adebf..f99f1ffa5 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,7 +1,7 @@ import io import math import re -from typing import Optional, List, Tuple, Union +from typing import Optional, Sequence, Tuple, Union import matplotlib.backends.backend_svg import matplotlib.pyplot as plt @@ -46,10 +46,10 @@ def _figure_to_svg(figure): def _get_figure_and_axes( - chart: Union[Figure, Axes, List[Axes]] = None, + chart: Union[Figure, Axes, Sequence[Axes], None] = None, fig: Optional[Figure] = None, - axes: Optional[Tuple[Axes, List[Axes]]] = None, -) -> Tuple[Figure, List[Axes]]: + axes: Union[Axes, Sequence[Axes], None] = None, +) -> Tuple[Figure, Sequence[Axes]]: """Obtain the figure and axes from the arguments.""" num_defined = sum(e is not None for e in (chart, fig, axes)) @@ -62,22 +62,22 @@ def _get_figure_and_axes( if isinstance(chart, matplotlib.figure.Figure): fig = chart else: - axes_r = chart + axes = chart if fig is None and axes is None: fig = _create_figure() - axes_r = [] + axes = [] elif fig is not None: - axes_r = fig.axes + axes = fig.axes else: if isinstance(axes, matplotlib.axes.Axes): - axes_r = [axes] + axes = [axes] - fig = axes_r[0].figure + fig = axes[0].figure - return fig, axes_r + return fig, axes def _get_axes_shape(n_axes, n_rows=None, n_cols=None): @@ -169,10 +169,10 @@ def _set_figure_layout(fig=None, axes=None, def _set_figure_layout_for_fdata( fdata: FData, fig: Optional[Figure] = None, - axes: Optional[List[Axes]] = None, + axes: Optional[Sequence[Axes]] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, -) -> Tuple[Figure, List[Axes]]: +) -> Tuple[Figure, Sequence[Axes]]: """Set the figure axes for plotting a :class:`~skfda.representation.FData` object. From 6e8fe57e6191bb3d713f20c41adf195cef1cf128 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 22 Feb 2021 14:58:58 +0100 Subject: [PATCH 113/417] done --- .../visualization/_phase_plane_plot.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py index 4eca167bb..853f748f2 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -6,7 +6,7 @@ from ._utils import ( _get_figure_and_axes, - _set_figure_layout_for_fdata, + _set_figure_layout, ) S = TypeVar('S', Figure, Axes, List[Axes]) @@ -41,24 +41,26 @@ def plot( and self.fdata1.dim_domain == 1 and self.fdata1.dim_codomain == 1 ): - fd = self.fdata1.concatenate() + self.fd_final = self.fdata1.concatenate( + self.fdata2, as_coordinates=True + ) else: raise ValueError( "Error in data arguments", ) else: - fd = self.fdata1 + self.fd_final = self.fdata1 if ( - fd.dim_domain == 1 - and fd.dim_codomain == 2 + self.fd_final.dim_domain == 1 + and self.fd_final.dim_codomain == 2 ): - fig, axes = _set_figure_layout_for_fdata( - fd, fig, axes, + fig, axes = _set_figure_layout( + fig, axes, dim=2, n_axes=1, ) axes[0].plot( - fd.data_matrix[0][0].tolist(), - fd.data_matrix[0][1].tolist(), + self.fd_final.data_matrix[0][:,0].tolist(), + self.fd_final.data_matrix[0][:,1].tolist(), **kwargs, ) else: From 5a92721d799a3709c9aff0f64ba67986c7fc534c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 22 Feb 2021 15:12:16 +0100 Subject: [PATCH 114/417] phase_plane --- .../visualization/_phase_plane_plot.py | 51 ++++++++++++++++--- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py index 853f748f2..7033c139d 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -1,3 +1,11 @@ +"""Phase-Plane Plot Module. + +This module contains the functionality in charge of plotting +two different functions as coordinates, this can be done giving +one FData, with domain 1 and codomain 2, or giving two FData, both +of them with domain 1 and codomain 1. +""" + from typing import List, Optional, TypeVar from matplotlib.axes import Axes @@ -13,7 +21,18 @@ class PhasePlanePlot: + """Phase-Plane Plot visualization. + This class contains the functionality in charge of plotting + two different functions as coordinates, this can be done giving + one FData, with domain 1 and codomain 2, or giving two FData, both + of them with domain 1 and codomain 1. + Args: + fdata1: functional data set that we will use for the graph. If it has + a dim_codomain = 1, the fdata2 will be needed. + fdata2: optional functional data set, that will be needed if the fdata1 + has dim_codomain = 1. + """ def __init__( self, fdata1: FData, @@ -27,10 +46,28 @@ def plot( chart: Optional[S] = None, *, fig: Optional[Figure] = None, - axes: Optional[List[Axes]] = None, + ax: Optional[Axes] = None, **kwargs, ) -> Figure: - fig, axes = _get_figure_and_axes(chart, fig, axes) + """ + Plot Phase-Plane graph. + + Plot the functions as coordinates. If two functions are passed + it will concatenate both into one only FData. + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + ax: axis where the graphs are plotted. If None, see param fig. + kwargs: optional arguments. + Returns: + fig (figure object): figure object in which the phase-plane + graph will be plotted. + """ + fig, axes = _get_figure_and_axes(chart, fig, ax) if ( self.fdata2 is not None @@ -46,7 +83,8 @@ def plot( ) else: raise ValueError( - "Error in data arguments", + "Error in data arguments,", + "codomain or domain is not correct.", ) else: self.fd_final = self.fdata1 @@ -59,13 +97,14 @@ def plot( fig, axes, dim=2, n_axes=1, ) axes[0].plot( - self.fd_final.data_matrix[0][:,0].tolist(), - self.fd_final.data_matrix[0][:,1].tolist(), + self.fd_final.data_matrix[0][:, 0].tolist(), + self.fd_final.data_matrix[0][:, 1].tolist(), **kwargs, ) else: raise ValueError( - "Error in data arguments", + "Error in data arguments,", + "codomain or domain is not correct.", ) fig.suptitle("Phase-Plane Plot") From 7610d819a49b165408419b55c77b1c2b6b01db1d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 24 Feb 2021 21:22:34 +0100 Subject: [PATCH 115/417] changes --- skfda/exploratory/visualization/_utils.py | 21 +++- .../visualization/representation.py | 107 ++++++++++-------- 2 files changed, 73 insertions(+), 55 deletions(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index f99f1ffa5..2688da4fd 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,7 +1,7 @@ import io import math import re -from typing import Optional, Sequence, Tuple, Union +from typing import List, Optional, Sequence, Tuple, Union import matplotlib.backends.backend_svg import matplotlib.pyplot as plt @@ -100,10 +100,14 @@ def _get_axes_shape(n_axes, n_rows=None, n_cols=None): return n_rows, n_cols - -def _set_figure_layout(fig=None, axes=None, - dim=2, n_axes=1, - n_rows=None, n_cols=None): +def _set_figure_layout( + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + dim: int = 2, + n_axes: int = 1, + n_rows: int = None, + n_cols: int = None +) -> Tuple[Figure, Sequence[Axes]]: """Set the figure axes for plotting. Args: @@ -202,7 +206,12 @@ def _set_figure_layout_for_fdata( n_rows=n_rows, n_cols=n_cols) -def _set_labels(fdata, fig=None, axes=None, patches=None): +def _set_labels( + fdata: FData, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + patches = Optional[List[matplotlib.patches.Patch]], +): """Set labels if any. Args: diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index f5d4cab49..2f05f3b0e 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -1,4 +1,4 @@ -import typing as t +from typing import Any, Dict, List, Optional, Sequence, Tuple, TypeVar, Union import matplotlib.cm import matplotlib.patches @@ -8,19 +8,21 @@ from ... import FDataGrid from ..._utils import _to_domain_range, constants +from ...representation._functional_data import FData +from ...representation._typing import DomainRangeLike from ._utils import ( _get_figure_and_axes, _set_figure_layout_for_fdata, _set_labels, ) -T = t.TypeVar('T', FDataGrid, np.ndarray) -S = t.TypeVar('S', int, tuple) -V = t.TypeVar('V', tuple, t.List) -C = t.TypeVar('C', Figure, Axes, t.List[Axes]) +T = TypeVar('T', FDataGrid, np.ndarray) -def _get_label_colors(n_labels, group_colors=None): +def _get_label_colors( + n_labels: int, + group_colors: Union[Sequence[Any], None], +) -> np.ndarray: """Get the colors of each label""" if group_colors is not None: @@ -37,7 +39,14 @@ def _get_label_colors(n_labels, group_colors=None): return group_colors -def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): +def _get_color_info( + fdata: T, + group: Union[Sequence[int], None], + group_names: Union[Sequence[str], None], + group_colors: Union[Sequence[Any], None], + legend: bool, + kwargs: Any, +) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: patches = None @@ -125,15 +134,15 @@ class GraphPlot: """ def __init__( self, - fdata: T, - gradient_color_list: t.List[float] = None, - max_grad: t.Optional[float] = None, - min_grad: t.Optional[float] = None, + fdata: FData, + gradient_color_list: Union[Sequence[float], None], + max_grad: Optional[float] = None, + min_grad: Optional[float] = None, ) -> None: self.fdata = fdata self.gradient_color_list = gradient_color_list if self.gradient_color_list is not None: - if len(gradient_color_list) != fdata.n_samples: + if len(self.gradient_color_list) != fdata.n_samples: raise ValueError( "The length of the gradient color" "list should be the same as the number" @@ -141,16 +150,16 @@ def __init__( ) if min_grad is None: - self.min_grad = min(gradient_color_list) + self.min_grad = min(self.gradient_color_list) else: self.min_grad = min_grad if max_grad is None: - self.max_grad = max(gradient_color_list) + self.max_grad = max(self.gradient_color_list) else: self.max_grad = max_grad - aux_list = gradient_color_list - self.min_grad + aux_list = self.gradient_color_list - self.min_grad self.gradient_list = ( aux_list / (self.max_grad - self.min_grad) @@ -160,20 +169,20 @@ def __init__( def plot( self, - chart: t.Optional[C] = None, + chart: Union[Figure, Axes, None] = None, *, - fig: t.Optional[Figure] = None, - axes: t.List[Axes] = None, - n_rows: t.Optional[int] = None, - n_cols: t.Optional[int] = None, - n_points: t.Optional[S] = None, - domain_range: t.Optional[V] = None, - group: t.List[int] = None, - group_colors: t.List[t.Any] = None, - group_names: t.List[str] = None, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + n_points: Union[int, Tuple[int, int], None], + domain_range: Union[Tuple[int, int], DomainRangeLike, None], + group: Union[Sequence[int], None], + group_colors: Union[Sequence[Any], None], + group_names: Union[Sequence[str], None], colormap_name: str = 'autumn', legend: bool = False, - **kwargs: t.Any, + **kwargs: Any, ) -> Figure: """ Plot the graph. @@ -185,7 +194,7 @@ def plot( criteria choosing the colors and a new one that displays the function with a gradient of colors depending on the initial gradient_color_list (normalized in gradient_list). - + Args: chart (figure object, axe or list of axes, optional): figure over with the graphs are plotted or axis over where the graphs are @@ -268,13 +277,13 @@ def plot( if self.fdata.dim_domain == 1: if n_points is None: - n_points = constants.N_POINTS_UNIDIMENSIONAL_PLOT_MESH + self.n_points = constants.N_POINTS_UNIDIMENSIONAL_PLOT_MESH # Evaluates the object in a linspace eval_points = np.linspace(*domain_range[0], n_points) mat = self.fdata(eval_points) - color_dict = {} + color_dict: Dict[str, Any] = {} for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): @@ -289,9 +298,9 @@ def plot( # Selects the number of points if n_points is None: - n_points = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) - elif np.isscalar(n_points): - n_points = (n_points, n_points) + n_points_tuple = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) + elif isinstance(n_points, int): + n_points_tuple = (n_points, n_points) elif len(n_points) != 2: raise ValueError( f"n_points should be a number or a tuple of " @@ -299,8 +308,8 @@ def plot( ) # Axes where will be evaluated - x = np.linspace(*domain_range[0], n_points[0]) - y = np.linspace(*domain_range[1], n_points[1]) + x = np.linspace(*domain_range[0], n_points_tuple[0]) + y = np.linspace(*domain_range[1], n_points_tuple[1]) # Evaluation of the functional object Z = self.fdata((x, y), grid=True) @@ -337,30 +346,30 @@ class ScatterPlot: def __init__( self, - fdata: T, + fdata: FData, grid_points: np.ndarray = None, ) -> None: self.fdata = fdata self.grid_points = grid_points - + def plot( self, - chart: t.Optional[C] = None, + chart: Union[Figure, Axes, None] = None, *, - fig: t.Optional[Figure] = None, - axes: t.Optional[C] = None, - n_rows: t.Optional[int] = None, - n_cols: t.Optional[int] = None, - domain_range: t.Optional[V] = None, - group: t.List[int] = None, - group_colors: t.List[t.Any] = None, - group_names: t.List[str] = None, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + domain_range: Union[Tuple[int, int], Sequence[Tuple[int, int]], None], + group: Union[Sequence[int], None], + group_colors: Union[Sequence[Any], None], + group_names: Union[Sequence[str], None], legend: bool = False, - **kwargs: t.Any, + **kwargs: Any, ) -> Figure: """ Scatter FDataGrid object. - + Args: chart (figure object, axe or list of axes, optional): figure over with the graphs are plotted or axis over where the graphs are @@ -369,7 +378,7 @@ def plot( fig (figure object, optional): figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs + axes (list of axis objects, optional): axis over where the graphs are plotted. If None, see param fig. n_rows (int, optional): designates the number of rows of the figure to plot the different dimensions of the image. Only specified @@ -468,4 +477,4 @@ def plot( _set_labels(self.fdata, fig, axes, patches) - return fig \ No newline at end of file + return fig From d2b840b9315503d3b55740a34aba0f08eeb7cdfb Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 25 Feb 2021 00:46:57 +0100 Subject: [PATCH 116/417] no errors, all solved --- skfda/exploratory/visualization/_utils.py | 2 +- .../visualization/representation.py | 32 +++++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 2688da4fd..12bc7c141 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -172,7 +172,7 @@ def _set_figure_layout( def _set_figure_layout_for_fdata( fdata: FData, - fig: Optional[Figure] = None, + fig: Optional[Figure] = None, axes: Optional[Sequence[Axes]] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 2f05f3b0e..69dfbbea2 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -159,20 +159,26 @@ def __init__( else: self.max_grad = max_grad - aux_list = self.gradient_color_list - self.min_grad + aux_list = [ + grad_color - self.min_grad + for grad_color in self.gradient_color_list + ] - self.gradient_list = ( - aux_list / (self.max_grad - self.min_grad) + self.gradient_list: Sequence[float] = ( + [ + aux / (self.max_grad - self.min_grad) + for aux in aux_list + ] ) else: - self.gradient_list = None + self.gradient_list = [] def plot( self, chart: Union[Figure, Axes, None] = None, *, fig: Optional[Figure] = None, - axes: Optional[Axes] = None, + ax: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, n_points: Union[int, Tuple[int, int], None], @@ -203,7 +209,7 @@ def plot( fig (figure object, optional): figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs + ax (list of axis objects, optional): axis over where the graphs are plotted. If None, see param fig. n_rows (int, optional): designates the number of rows of the figure to plot the different dimensions of the image. Only specified @@ -251,7 +257,7 @@ def plot( """ - fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _get_figure_and_axes(chart, fig, ax) fig, axes = _set_figure_layout_for_fdata( self.fdata, fig, axes, n_rows, n_cols, ) @@ -261,7 +267,7 @@ def plot( else: domain_range = _to_domain_range(domain_range) - if self.gradient_list is None: + if not self.gradient_list: sample_colors, patches = _get_color_info( self.fdata, group, group_names, group_colors, legend, kwargs, ) @@ -357,7 +363,7 @@ def plot( chart: Union[Figure, Axes, None] = None, *, fig: Optional[Figure] = None, - axes: Optional[Axes] = None, + ax: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], Sequence[Tuple[int, int]], None], @@ -378,7 +384,7 @@ def plot( fig (figure object, optional): figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs + ax (list of axis objects, optional): axis over where the graphs are plotted. If None, see param fig. n_rows (int, optional): designates the number of rows of the figure to plot the different dimensions of the image. Only specified @@ -429,15 +435,15 @@ def plot( self.grid_points, grid=True, ) - fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _get_figure_and_axes(chart, fig, ax) fig, axes = _set_figure_layout_for_fdata( self.fdata, fig, axes, n_rows, n_cols, ) if domain_range is None: - domain_range = self.fdata.domain_range + self.domain_range = self.fdata.domain_range else: - domain_range = _to_domain_range(domain_range) + self.domain_range = _to_domain_range(domain_range) sample_colors, patches = _get_color_info( self.fdata, group, group_names, group_colors, legend, kwargs, From 0f8c906308ed5d3728097dd4aa5ac27916be2432 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 25 Feb 2021 00:50:14 +0100 Subject: [PATCH 117/417] all correct --- skfda/exploratory/visualization/_phase_plane_plot.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py index 7033c139d..46f474864 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -6,16 +6,13 @@ of them with domain 1 and codomain 1. """ -from typing import List, Optional, TypeVar +from typing import Any, List, Optional, TypeVar from matplotlib.axes import Axes from matplotlib.figure import Figure -from ...representation import FData -from ._utils import ( - _get_figure_and_axes, - _set_figure_layout, -) +from ...representation import FData +from ._utils import _get_figure_and_axes, _set_figure_layout S = TypeVar('S', Figure, Axes, List[Axes]) @@ -47,7 +44,7 @@ def plot( *, fig: Optional[Figure] = None, ax: Optional[Axes] = None, - **kwargs, + **kwargs: Any, ) -> Figure: """ Plot Phase-Plane graph. From 8e3c5f57730f94d8fa1f181803f697f6463b463e Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 25 Feb 2021 17:38:09 +0100 Subject: [PATCH 118/417] all corrected --- .vscode/settings.json | 3 +++ skfda/exploratory/visualization/_utils.py | 11 ++++++----- skfda/exploratory/visualization/representation.py | 11 +++++++---- skfda/representation/_functional_data.py | 2 +- 4 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..fafb871bf --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "C:\\ProgramData\\Anaconda3\\python.exe" +} \ No newline at end of file diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 12bc7c141..7b04ccd14 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -100,13 +100,14 @@ def _get_axes_shape(n_axes, n_rows=None, n_cols=None): return n_rows, n_cols + def _set_figure_layout( fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, dim: int = 2, n_axes: int = 1, - n_rows: int = None, - n_cols: int = None + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, ) -> Tuple[Figure, Sequence[Axes]]: """Set the figure axes for plotting. @@ -208,10 +209,10 @@ def _set_figure_layout_for_fdata( def _set_labels( fdata: FData, - fig: Optional[Figure] = None, + fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, - patches = Optional[List[matplotlib.patches.Patch]], -): + patches: Optional[List[matplotlib.patches.Patch]] = None, +) -> None: """Set labels if any. Args: diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 69dfbbea2..95da5eb8e 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -267,7 +267,7 @@ def plot( else: domain_range = _to_domain_range(domain_range) - if not self.gradient_list: + if len(self.gradient_list) == 0: sample_colors, patches = _get_color_info( self.fdata, group, group_names, group_colors, legend, kwargs, ) @@ -459,9 +459,12 @@ def plot( if sample_colors is not None: color_dict["color"] = sample_colors[j] - axes[i].scatter(self.grid_points[0], - evaluated_points[j, ..., i].T, - **color_dict, **kwargs) + axes[i].scatter( + self.grid_points[0], + evaluated_points[j, ..., i].T, + **color_dict, + **kwargs + ) else: diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index f8fa4eaec..043732a47 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -588,7 +588,7 @@ def plot(self, *args: Any, **kwargs: Any) -> Any: """ from ..exploratory.visualization.representation import GraphPlot - return GraphPlot(self).plot(*args, **kwargs) + return GraphPlot(fdata=self).plot(*args, **kwargs) @abstractmethod def copy( From 014701c2d8f43207b5281765b65ed182a45d185a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 25 Feb 2021 17:43:31 +0100 Subject: [PATCH 119/417] done --- skfda/exploratory/visualization/representation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 95da5eb8e..90e6cd224 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -135,7 +135,7 @@ class GraphPlot: def __init__( self, fdata: FData, - gradient_color_list: Union[Sequence[float], None], + gradient_color_list: Union[Sequence[float], None] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, ) -> None: From f1226d8855cddf9da5bd8de6769f8563829521da Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 25 Feb 2021 17:49:42 +0100 Subject: [PATCH 120/417] done --- skfda/exploratory/visualization/representation.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 90e6cd224..ec965b10f 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -463,7 +463,7 @@ def plot( self.grid_points[0], evaluated_points[j, ..., i].T, **color_dict, - **kwargs + **kwargs, ) else: @@ -480,9 +480,13 @@ def plot( if sample_colors is not None: color_dict["color"] = sample_colors[j] - axes[i].scatter(X, Y, - evaluated_points[j, ..., i].T, - **color_dict, **kwargs) + axes[i].scatter( + X, + Y, + evaluated_points[j, ..., i].T, + **color_dict, + **kwargs, + ) _set_labels(self.fdata, fig, axes, patches) From 484d7ea24f7f978a4a2bfe15b055e03e78b4100f Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 25 Feb 2021 18:35:47 +0100 Subject: [PATCH 121/417] all solved --- .vscode/settings.json | 3 +++ setup.cfg | 2 ++ skfda/exploratory/visualization/_phase_plane_plot.py | 6 ++++-- 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..fafb871bf --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "C:\\ProgramData\\Anaconda3\\python.exe" +} \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 69167fdbb..4a13decb8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,6 +41,8 @@ ignore = WPS115, # Trailing underscores are a scikit-learn convention WPS120, + # Access should have depth = 5 + WPS219, # The number of imported things may be large, especially for typing WPS235, # We like local imports, thanks diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py index 46f474864..e260b537b 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -18,7 +18,8 @@ class PhasePlanePlot: - """Phase-Plane Plot visualization. + """ + Phase-Plane Plot visualization. This class contains the functionality in charge of plotting two different functions as coordinates, this can be done giving @@ -30,6 +31,7 @@ class PhasePlanePlot: fdata2: optional functional data set, that will be needed if the fdata1 has dim_codomain = 1. """ + def __init__( self, fdata1: FData, @@ -76,7 +78,7 @@ def plot( and self.fdata1.dim_codomain == 1 ): self.fd_final = self.fdata1.concatenate( - self.fdata2, as_coordinates=True + self.fdata2, as_coordinates=True, ) else: raise ValueError( From d8a9ca84ce81134e29d22b1f528f79c2127f7e11 Mon Sep 17 00:00:00 2001 From: lena123315 <32038332+lena123315@users.noreply.github.com> Date: Mon, 1 Mar 2021 13:06:46 +0100 Subject: [PATCH 122/417] Update kernel_smoothers.py --- skfda/preprocessing/smoothing/kernel_smoothers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/skfda/preprocessing/smoothing/kernel_smoothers.py b/skfda/preprocessing/smoothing/kernel_smoothers.py index f68bbd2b5..df2ccde85 100644 --- a/skfda/preprocessing/smoothing/kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/kernel_smoothers.py @@ -161,6 +161,9 @@ class NadarayaWatsonSmoother(_LinearKernelSmoother): [ 0.017, 0.053, 0.238, 0.346, 0.346], [ 0.006, 0.022, 0.163, 0.305, 0.503]]) + References: + Wasserman, L. (2006). Local Regression. + In *All of Nonparametric Statistics* (pp. 71). Springer. """ def _hat_matrix_function_not_normalized(self, *, delta_x, @@ -264,6 +267,9 @@ class LocalLinearRegressionSmoother(_LinearKernelSmoother): [-0.098, -0.202, -0.003, 0.651, 0.651], [-0.012, -0.032, -0.025, 0.154, 0.915]]) + References: + Wasserman, L. (2006). Local Regression. + In *All of Nonparametric Statistics* (pp. 77). Springer. """ def _hat_matrix_function_not_normalized(self, *, delta_x, @@ -373,6 +379,10 @@ class KNeighborsSmoother(_LinearKernelSmoother): [ 0. , 0. , 0. , 0.5 , 0.5 ], [ 0. , 0. , 0. , 0.5 , 0.5 ]]) + References: + Frederic Ferraty, Philippe Vieu (2006). kNN Estimator. + In *Nonparametric Functional Data Analysis: Theory and Practice* + (pp. 116). Springer. """ def __init__(self, *, smoothing_parameter=None, From 447b8cf04ac39304757dcad3bbae27dcf7b499be Mon Sep 17 00:00:00 2001 From: lena123315 <32038332+lena123315@users.noreply.github.com> Date: Mon, 1 Mar 2021 13:22:14 +0100 Subject: [PATCH 123/417] Update kernel_smoothers.py --- skfda/preprocessing/smoothing/kernel_smoothers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skfda/preprocessing/smoothing/kernel_smoothers.py b/skfda/preprocessing/smoothing/kernel_smoothers.py index df2ccde85..5f099badc 100644 --- a/skfda/preprocessing/smoothing/kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/kernel_smoothers.py @@ -191,8 +191,8 @@ class LocalLinearRegressionSmoother(_LinearKernelSmoother): \hat{H}_{i,j} = \frac{b_j(t_i')}{\sum_{k=1}^{n}b_k(t_i')} .. math:: - b_j(t') = K\left(\frac{t_j - t'}{h}\right) S_{n,2}(t') - (t_j - t')S_{n, - 1}(t') + b_j(t') = K\left(\frac{t_j - t'}{h}\right) S_{n,2}(t') - + (t_j - t')S_{n,1}(t') .. math:: S_{n,k}(t') = \sum_{j=1}^{n}K\left(\frac{t_j-t'}{h}\right)(t_j-t')^k @@ -299,8 +299,8 @@ class KNeighborsSmoother(_LinearKernelSmoother): H_{i,j} =\frac{K\left(\frac{t_j-t_i'}{h_{ik}}\right)}{\sum_{r=1}^n K\left(\frac{t_r-t_i'}{h_{ik}}\right)} - :math:`K(\cdot)` is a kernel function and :math:`h_{ik}` the is the distance - from :math:`t_i'` to the 𝑘-th nearest neighbor of :math:`t_i'`. + :math:`K(\cdot)` is a kernel function and :math:`h_{ik}` the is the + distance from :math:`t_i'` to the 𝑘-th nearest neighbor of :math:`t_i'`. Usually used with the uniform kernel, it takes the average of the closest k points to a given point. From b7815bffad238786df68f5ffb85f9de13dc64de8 Mon Sep 17 00:00:00 2001 From: pedrorponga Date: Mon, 1 Mar 2021 20:06:44 +0100 Subject: [PATCH 124/417] fix requested changes --- skfda/_utils/_utils.py | 4 ++-- .../classification/_centroid_classifiers.py | 4 ++-- skfda/ml/classification/_depth_classifiers.py | 22 +++++++++---------- .../feature_extraction/_ddg_transformer.py | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 9c169dde4..1ebdcc0c3 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -22,6 +22,8 @@ from numpy import ndarray from pandas.api.indexers import check_array_indexer from sklearn.base import clone +from sklearn.preprocessing import LabelEncoder +from sklearn.utils.multiclass import check_classification_targets from ..representation._typing import ( DomainRange, @@ -560,8 +562,6 @@ def _check_estimator(estimator): def _classifier_get_classes(y: ndarray) -> Tuple[ndarray, ndarray]: - from sklearn.preprocessing import LabelEncoder - from sklearn.utils.multiclass import check_classification_targets check_classification_targets(y) diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index 06296970f..db2df1920 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -104,7 +104,7 @@ def predict(self, X: T) -> ndarray: X: FDataGrid with the test samples. Returns: - ndarray: array of shape (n_samples) or + Array of shape (n_samples) or (n_samples, n_outputs) with class labels for each data sample. """ sklearn_check_is_fitted(self) @@ -219,7 +219,7 @@ def predict(self, X: T) -> ndarray: X: FDataGrid with the test samples. Returns: - ndarray: array of shape (n_samples) or + Array of shape (n_samples) or (n_samples, n_outputs) with class labels for each data sample. """ return self._clf.predict(X) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 39ff57de7..1b67f7cfd 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -7,7 +7,7 @@ import numpy as np from numpy import ndarray from scipy.interpolate import lagrange -from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.base import BaseEstimator, ClassifierMixin, clone from sklearn.metrics import accuracy_score from sklearn.pipeline import make_pipeline from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted @@ -104,7 +104,7 @@ def predict(self, X: T) -> ndarray: X: FDataGrid with the test samples. Returns: - ndarray: array of shape (n_samples) with class labels + Array of shape (n_samples) with class labels for each data sample. """ sklearn_check_is_fitted(self) @@ -212,7 +212,7 @@ def fit(self, X: T, y: ndarray) -> DDClassifier[T]: ] polynomial_elements = combinations( - range(len(dd_coordinates[0])), + range(len(dd_coordinates[0])), # noqa: WPS518 self.degree, ) @@ -237,7 +237,7 @@ def fit(self, X: T, y: ndarray) -> DDClassifier[T]: if (new_accuracy > accuracy): accuracy = new_accuracy - self.poly = poly + self.poly_ = poly return self @@ -248,7 +248,7 @@ def predict(self, X: T) -> ndarray: X: FDataGrid with the test samples. Returns: - ndarray: array of shape (n_samples) with class labels + Array of shape (n_samples) with class labels for each data sample. """ sklearn_check_is_fitted(self) @@ -258,7 +258,7 @@ def predict(self, X: T) -> ndarray: for depth_method in self.class_depth_methods_ ] - predicted_values = np.polyval(self.poly, dd_coordinates[0]) + predicted_values = np.polyval(self.poly_, dd_coordinates[0]) return self._classes[( dd_coordinates[1] > predicted_values @@ -363,12 +363,12 @@ def fit(self, X: T, y: ndarray) -> DDGClassifier[T]: Returns: self """ - self.pipeline = make_pipeline( + self._pipeline = make_pipeline( DDGTransformer(self.depth_method), - self.multivariate_classifier, + clone(self.multivariate_classifier), ) - self.pipeline.fit(X, y) + self._pipeline.fit(X, y) return self @@ -379,7 +379,7 @@ def predict(self, X: T) -> ndarray: X: FDataGrid with the test samples. Returns: - ndarray: array of shape (n_samples) with class labels + Array of shape (n_samples) with class labels for each data sample. """ - return self.pipeline.predict(X) + return self._pipeline.predict(X) diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py index 532888244..ffe150f2d 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/_ddg_transformer.py @@ -122,7 +122,7 @@ def transform(self, X: T) -> ndarray: X: FDataGrid with the test samples. Returns: - ndarray: array of shape (n_samples, G). + Array of shape (n_samples, G). """ sklearn_check_is_fitted(self) From 707e67c8b7e364f3f6344e4e54cb381a330371a1 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 3 Mar 2021 14:26:42 +0100 Subject: [PATCH 125/417] solved --- .../exploratory/visualization/_outliergram.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index cd2ad1efc..2ac63733b 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -7,8 +7,9 @@ magnitude outliers, but there is a necessity of capturing this other type. """ -from typing import List, Optional, TypeVar +from typing import List, Optional, TypeVar, Union +import numpy as np import scipy.integrate as integrate from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -58,10 +59,10 @@ def __init__( def plot( self, - chart: Optional[S] = None, + chart: Union[Figure, Axes, None] = None, *, fig: Optional[Figure] = None, - axes: Optional[List[Axes]] = None, + axes: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, **kwargs, @@ -97,14 +98,16 @@ def plot( fig (figure object): figure object in which the depths will be scattered. """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - self.fdata, fig, axes, n_rows, n_cols, + fig, axes_list = _get_figure_and_axes(chart, fig, axes) + fig, axes_list = _set_figure_layout_for_fdata( + self.fdata, fig, axes_list, n_rows, n_cols, ) + self.fig = fig + self.axes = axes_list - axe = axes[0] + ax = self.axes[0] - axe.scatter( + ax.scatter( self.mei, self.mbd, **kwargs, @@ -112,17 +115,17 @@ def plot( # Set labels of graph fig.suptitle("Outliergram") - axe.set_xlabel("MEI") - axe.set_ylabel("MBD") - axe.set_xlim([0, 1]) - axe.set_ylim([ + ax.set_xlabel("MEI") + ax.set_ylabel("MBD") + ax.set_xlim([0, 1]) + ax.set_ylim([ self.depth.min, self.depth.max, ]) return fig - def modified_epigraph_index_list(self) -> List[float]: + def modified_epigraph_index_list(self) -> np.ndarray: """ Calculate the Modified Epigraph Index of a FData. From 51a28e7324ce173e66fa37a8bb3d333205645769 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 3 Mar 2021 14:31:28 +0100 Subject: [PATCH 126/417] solved --- skfda/exploratory/visualization/_outliergram.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 2ac63733b..f65006356 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -34,10 +34,12 @@ class Outliergram: fdata: functional data set that we want to examine. Attributes: mbd: result of the calculation of the Modified Band Depth on our - dataset. Represents the mean time a curve stays between other pair - of curves, being a good measure of centrality. + dataset. Represents the mean time a curve stays between all the + possible pair of curves we have in our data set, being a good + measure of centrality. mei: result of the calculation of the Modified Epigraph Index on our - dataset. Represents the mean time a curve stays below other curve. + dataset. Represents the mean time a curve stays below each curve + in our dataset. References: López-Pintado S., Romo J.. (2011). A half-region depth for functional data, Computational Statistics & Data Analysis, volume 55 From c2bae8e8f2e0a4380aa88bf404c801537db2ab94 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 4 Mar 2021 00:45:26 +0100 Subject: [PATCH 127/417] Improve documentation. Compute intercept in historical linear model. --- docs/modules/ml/clustering.rst | 5 - docs/modules/ml/regression.rst | 4 +- docs/modules/representation.rst | 6 +- .../ml/regression/_historical_linear_model.py | 130 ++++++++++++++++-- 4 files changed, 127 insertions(+), 18 deletions(-) diff --git a/docs/modules/ml/clustering.rst b/docs/modules/ml/clustering.rst index 9101dd5a1..c6099bfdb 100644 --- a/docs/modules/ml/clustering.rst +++ b/docs/modules/ml/clustering.rst @@ -49,8 +49,3 @@ clusterings are supported: :toctree: autosummary skfda.ml.clustering.AgglomerativeClustering - -.. autosummary:: - :toctree: autosummary - - skfda.ml.clustering.Agglomerative clustering diff --git a/docs/modules/ml/regression.rst b/docs/modules/ml/regression.rst index ce416a58a..ea582013a 100644 --- a/docs/modules/ml/regression.rst +++ b/docs/modules/ml/regression.rst @@ -10,12 +10,14 @@ Linear regression A linear regression model is one in which the response variable can be expressed as a linear combination of the covariates (which could be -multivariate or functional). +multivariate or functional). The following linear models are available +in scikit-fda: .. autosummary:: :toctree: autosummary skfda.ml.regression.LinearRegression + skfda.ml.regression.HistoricalLinearRegression Nearest Neighbors ----------------- diff --git a/docs/modules/representation.rst b/docs/modules/representation.rst index a5f53408a..16e0a75d1 100644 --- a/docs/modules/representation.rst +++ b/docs/modules/representation.rst @@ -56,14 +56,14 @@ The following classes are used to define different basis for skfda.representation.basis.Monomial skfda.representation.basis.Constant -The following class, allows the construction of a basis for -:math:`\mathbb{R}^n \to \mathbb{R}` functions from -several :math:`\mathbb{R} \to \mathbb{R}` bases. +The following classes, allow the construction of a basis for +:math:`\mathbb{R}^n \to \mathbb{R}` functions. .. autosummary:: :toctree: autosummary skfda.representation.basis.Tensor + skfda.representation.basis.FiniteElement The following class, allows the construction of a basis for :math:`\mathbb{R}^n \to \mathbb{R}^m` functions from diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index a94a6ee75..dd433470f 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -5,10 +5,11 @@ from typing import Tuple import numpy as np -import scipy.integrate from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.validation import check_is_fitted +import scipy.integrate + from ..._utils import _cartesian_product, _pairwise_symmetric from ...representation import FDataBasis, FDataGrid from ...representation.basis import Basis, FiniteElement @@ -208,26 +209,128 @@ class HistoricalLinearRegression( BaseEstimator, # type: ignore RegressorMixin, # type: ignore ): + r"""Historical functional linear regression. + + This is a linear regression method where the covariate and the response are + both functions :math:`\mathbb{R}` to :math:`\mathbb{R}` with the same + domain. In order to predict the value of the response function at point + :math:`t`, only the information of the covariate at points :math:`s < t` is + used. Is thus an "historical" model in the sense that, if the domain + represents time, only the data from the past, or historical data, is used + to predict a given point. + + The model assumed by this method is: + + .. math:: + y_i = \alpha(t) + \int_{s_0(t)}^t x_i(s) \beta(s, t) ds + + where :math:`s_0(t) = \max(0, t - \delta)` and :math:`\delta` is a + predefined time lag that can be specified so that points far in the past + do not affect the predicted value. + + Args: + n_intervals: Number of intervals used to create the basis of the + coefficients. This will be a bidimensional + :class:`~skfda.representation.basis.FiniteElement` basis, and + this parameter indirectly specifies the number of + elements of that basis, and thus the granularity. + fit_intercept: Whether to calculate the intercept for this + model. If set to False, no intercept will be used in calculations + (i.e. data is expected to be centered). + lag: The maximum time lag at which points in the past can still + influence the prediction. + + Attributes: + discretized_coef_: The discretized values of the fitted + coefficient function. + intercept_: Independent term in the linear model. Set to the constant + function 0 if `fit_intercept = False`. + + Examples: + + The following example test a case that conforms to this model. + + >>> from skfda import FDataGrid + >>> from skfda.ml.regression import HistoricalLinearRegression + >>> import numpy as np + >>> import scipy.integrate + + >>> random_state = np.random.RandomState(0) + >>> data_matrix = random_state.choice(10, size=(8, 6)) + >>> data_matrix + array([[5, 0, 3, 3, 7, 9], + [3, 5, 2, 4, 7, 6], + [8, 8, 1, 6, 7, 7], + [8, 1, 5, 9, 8, 9], + [4, 3, 0, 3, 5, 0], + [2, 3, 8, 1, 3, 3], + [3, 7, 0, 1, 9, 9], + [0, 4, 7, 3, 2, 7]]) + >>> intercept = random_state.choice(10, size=(1, 6)) + >>> intercept + array([[2, 0, 0, 4, 5, 5]]) + >>> y_data = scipy.integrate.cumtrapz( + ... data_matrix, + ... initial=0, + ... axis=1, + ... ) + intercept + >>> y_data + array([[ 2. , 2.5, 4. , 11. , 17. , 25. ], + [ 2. , 4. , 7.5, 14.5, 21. , 27.5], + [ 2. , 8. , 12.5, 20. , 27.5, 34.5], + [ 2. , 4.5, 7.5, 18.5, 28. , 36.5], + [ 2. , 3.5, 5. , 10.5, 15.5, 18. ], + [ 2. , 2.5, 8. , 16.5, 19.5, 22.5], + [ 2. , 5. , 8.5, 13. , 19. , 28. ], + [ 2. , 2. , 7.5, 16.5, 20. , 24.5]]) + >>> X = FDataGrid(data_matrix) + >>> y = FDataGrid(y_data) + >>> hist = HistoricalLinearRegression(n_intervals=8) + >>> _ = hist.fit(X, y) + >>> hist.predict(X).data_matrix[..., 0].round(1) + array([[ 2. , 2.5, 4. , 11. , 17. , 25. ], + [ 2. , 4. , 7.5, 14.5, 21. , 27.5], + [ 2. , 8. , 12.5, 20. , 27.5, 34.5], + [ 2. , 4.5, 7.5, 18.5, 28. , 36.5], + [ 2. , 3.5, 5. , 10.5, 15.5, 18. ], + [ 2. , 2.5, 8. , 16.5, 19.5, 22.5], + [ 2. , 5. , 8.5, 13. , 19. , 28. ], + [ 2. , 2. , 7.5, 16.5, 20. , 24.5]]) + >>> hist.intercept_.data_matrix[..., 0].round() + array([[ 2., 0., 0., 4., 5., 5.]]) + + References: + Malfait, N., & Ramsay, J. O. (2003). The historical functional linear + model. Canadian Journal of Statistics, 31(2), 115-128. + + """ - def __init__(self, *, n_intervals: int, lag: float=math.inf) -> None: + def __init__( + self, *, n_intervals: int, + fit_intercept: bool = True, + lag: float = math.inf, + ) -> None: self.n_intervals = n_intervals + self.fit_intercept = fit_intercept self.lag = lag def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: + X_centered = X - X.mean() if self.fit_intercept else X + self._pred_points = y.grid_points[0] self._pred_domain_range = y.domain_range[0] self._basis = _create_fem_basis( - start=X.domain_range[0][0], - stop=X.domain_range[0][1], + start=X_centered.domain_range[0][0], + stop=X_centered.domain_range[0][1], n_intervals=self.n_intervals, lag=self.lag, ) design_matrix = _design_matrix( self._basis, - X, + X_centered, pred_points=self._pred_points, ) design_matrix = design_matrix.reshape(-1, design_matrix.shape[-1]) @@ -238,6 +341,11 @@ def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: rcond=None, )[0] + if self.fit_intercept: + self.intercept_ = y.mean() - self._predict_no_intercept(X.mean()) + else: + self.intercept_ = y[0].copy() * 0 + return design_matrix def _prediction_from_matrix(self, design_matrix: np.ndarray) -> FDataGrid: @@ -261,11 +369,9 @@ def fit(self, X: FDataGrid, y: FDataGrid) -> HistoricalLinearRegression: def fit_predict(self, X: FDataGrid, y: FDataGrid) -> FDataGrid: design_matrix = self._fit_and_return_matrix(X, y) - return self._prediction_from_matrix(design_matrix) + return self._prediction_from_matrix(design_matrix) + self.intercept_ - def predict(self, X: FDataGrid) -> FDataGrid: - - check_is_fitted(self) + def _predict_no_intercept(self, X: FDataGrid) -> FDataGrid: design_matrix = _design_matrix( self._basis, @@ -275,3 +381,9 @@ def predict(self, X: FDataGrid) -> FDataGrid: design_matrix = design_matrix.reshape(-1, design_matrix.shape[-1]) return self._prediction_from_matrix(design_matrix) + + def predict(self, X: FDataGrid) -> FDataGrid: + + check_is_fitted(self) + + return self._predict_no_intercept(X) + self.intercept_ From d65137751e79b7aee09b59c016a796b2f07ecd3d Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 4 Mar 2021 11:13:58 +0100 Subject: [PATCH 128/417] Fix doctest in MacOS. --- skfda/ml/regression/_historical_linear_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index dd433470f..acdff5002 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -296,7 +296,7 @@ class HistoricalLinearRegression( [ 2. , 2.5, 8. , 16.5, 19.5, 22.5], [ 2. , 5. , 8.5, 13. , 19. , 28. ], [ 2. , 2. , 7.5, 16.5, 20. , 24.5]]) - >>> hist.intercept_.data_matrix[..., 0].round() + >>> abs(hist.intercept_.data_matrix[..., 0].round()) array([[ 2., 0., 0., 4., 5., 5.]]) References: From 2b7ff110c8f2c7b136cb75e020142d940ab67e05 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 5 Mar 2021 00:11:04 +0100 Subject: [PATCH 129/417] Fix style errors. --- setup.cfg | 2 + .../ml/regression/_historical_linear_model.py | 127 ++++++++++-------- 2 files changed, 74 insertions(+), 55 deletions(-) diff --git a/setup.cfg b/setup.cfg index 9c80fea61..bd94edabe 100644 --- a/setup.cfg +++ b/setup.cfg @@ -68,6 +68,8 @@ ignore = WPS436, # Our private objects are fine to import WPS450, + # Numpy mixes bitwise and comparison operators + WPS465, # Explicit len compare is better than implicit WPS507, # Comparison with not is not the same as with equality diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index acdff5002..40e4eb30c 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -1,7 +1,6 @@ from __future__ import annotations import math -from math import ceil from typing import Tuple import numpy as np @@ -15,6 +14,36 @@ from ...representation.basis import Basis, FiniteElement +def _pairwise_fem_inner_product( + basis_fd: FDataBasis, + fd: FDataGrid, + y_val: float, + grid: np.ndarray, +) -> np.ndarray: + + eval_grid_fem = np.concatenate( + ( + grid[:, None], + np.full( + shape=(len(grid), 1), + fill_value=y_val, + ), + ), + axis=1, + ) + + eval_fem = basis_fd(eval_grid_fem) + eval_fd = fd(grid) + + # Only for scalar valued functions for now + assert eval_fem.shape[-1] == 1 + assert eval_fd.shape[-1] == 1 + + prod = eval_fem[..., 0] * eval_fd[..., 0] + + return scipy.integrate.simps(prod, grid, axis=1) + + def _inner_product_matrix( basis: Basis, fd: FDataGrid, @@ -22,7 +51,9 @@ def _inner_product_matrix( y_val: float, ) -> np.ndarray: """ - Computes the matrix of inner products of an FEM basis with a functional + Compute inner products with the FEM basis. + + Compute the matrix of inner products of an FEM basis with a functional data object over a range of x-values for a fixed y-value. The numerical integration uses Romberg integration with the trapezoidal rule. @@ -35,44 +66,21 @@ def _inner_product_matrix( (lower limit, upper limit) y_val: the fixed y value. - """ + Returns: + Matrix of inner products. + """ basis_fd = basis.to_basis() grid = fd.grid_points[0] grid_index = (grid >= limits[0]) & (grid <= limits[1]) grid = grid[grid_index] - def _pairwise_fem_inner_product( - basis_fd: FDataBasis, - fd: FDataGrid, - ) -> np.ndarray: - - eval_grid_fem = np.concatenate( - ( - grid[:, None], - np.full( - shape=(len(grid), 1), - fill_value=y_val, - ) - ), - axis=1, - ) - - eval_fem = basis_fd(eval_grid_fem) - eval_fd = fd(grid) - - # Only for scalar valued functions for now - assert eval_fem.shape[-1] == 1 - assert eval_fd.shape[-1] == 1 - - prod = eval_fem[..., 0] * eval_fd[..., 0] - - return scipy.integrate.simps(prod, grid, axis=1) - return _pairwise_symmetric( _pairwise_fem_inner_product, basis_fd, fd, + y_val=y_val, + grid=grid, ) @@ -82,7 +90,7 @@ def _design_matrix( pred_points: np.ndarray, ) -> np.ndarray: """ - Computes the indefinite integrals of the curves over s up to each t-value. + Compute the indefinite integrals of the curves over s up to each t-value. Arguments: basis: typically a FEM basis defined by a triangulation within a @@ -95,7 +103,6 @@ def _design_matrix( Design matrix. """ - matrix = np.array([ _inner_product_matrix(basis, fd, limits=(0, t), y_val=t).T for t in pred_points @@ -117,14 +124,12 @@ def _get_valid_points( full_grid_points[:, 0] <= full_grid_points[:, 1] ] - discrete_lag = np.inf if lag == np.inf else ceil(lag / interval_len) + discrete_lag = np.inf if lag == np.inf else math.ceil(lag / interval_len) - valid_points = past_points[ + return past_points[ past_points[:, 1] - past_points[:, 0] <= discrete_lag ] - return valid_points - def _get_triangles( n_intervals: int, @@ -150,20 +155,23 @@ def _get_triangles( (interval_without_end, interval_without_end), ) + pts_coords_x = pts_coords[:, 0] + pts_coords_y = pts_coords[:, 1] + down_triangles = np.stack( ( - indexes_matrix[pts_coords[:, 0], pts_coords[:, 1]], - indexes_matrix[pts_coords[:, 0] + 1, pts_coords[:, 1]], - indexes_matrix[pts_coords[:, 0] + 1, pts_coords[:, 1] + 1], + indexes_matrix[pts_coords_x, pts_coords_y], + indexes_matrix[pts_coords_x + 1, pts_coords_y], + indexes_matrix[pts_coords_x + 1, pts_coords_y + 1], ), axis=1, ) up_triangles = np.stack( ( - indexes_matrix[pts_coords[:, 0], pts_coords[:, 1]], - indexes_matrix[pts_coords[:, 0], pts_coords[:, 1] + 1], - indexes_matrix[pts_coords[:, 0] + 1, pts_coords[:, 1] + 1], + indexes_matrix[pts_coords_x, pts_coords_y], + indexes_matrix[pts_coords_x, pts_coords_y + 1], + indexes_matrix[pts_coords_x + 1, pts_coords_y + 1], ), axis=1, ) @@ -171,9 +179,7 @@ def _get_triangles( triangles = np.concatenate((down_triangles, up_triangles)) has_wrong_index = np.any(triangles < 0, axis=1) - triangles = triangles[~has_wrong_index] - - return triangles + return triangles[~has_wrong_index] def _create_fem_basis( @@ -206,8 +212,8 @@ def _create_fem_basis( class HistoricalLinearRegression( - BaseEstimator, # type: ignore - RegressorMixin, # type: ignore + BaseEstimator, # type: ignore + RegressorMixin, # type: ignore ): r"""Historical functional linear regression. @@ -241,13 +247,12 @@ class HistoricalLinearRegression( influence the prediction. Attributes: - discretized_coef_: The discretized values of the fitted + discretized_coef\_: The discretized values of the fitted coefficient function. - intercept_: Independent term in the linear model. Set to the constant + intercept\_: Independent term in the linear model. Set to the constant function 0 if `fit_intercept = False`. Examples: - The following example test a case that conforms to this model. >>> from skfda import FDataGrid @@ -306,7 +311,9 @@ class HistoricalLinearRegression( """ def __init__( - self, *, n_intervals: int, + self, + *, + n_intervals: int, fit_intercept: bool = True, lag: float = math.inf, ) -> None: @@ -344,7 +351,9 @@ def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: if self.fit_intercept: self.intercept_ = y.mean() - self._predict_no_intercept(X.mean()) else: - self.intercept_ = y[0].copy() * 0 + self.intercept_ = y.copy( + data_matrix=np.zeros_like(y.data_matrix[0]), + ) return design_matrix @@ -361,12 +370,20 @@ def _prediction_from_matrix(self, design_matrix: np.ndarray) -> FDataGrid: domain_range=self._pred_domain_range, ) - def fit(self, X: FDataGrid, y: FDataGrid) -> HistoricalLinearRegression: + def fit( # noqa: D102 + self, + X: FDataGrid, + y: FDataGrid, + ) -> HistoricalLinearRegression: self._fit_and_return_matrix(X, y) return self - def fit_predict(self, X: FDataGrid, y: FDataGrid) -> FDataGrid: + def fit_predict( # noqa: D102 + self, + X: FDataGrid, + y: FDataGrid, + ) -> FDataGrid: design_matrix = self._fit_and_return_matrix(X, y) return self._prediction_from_matrix(design_matrix) + self.intercept_ @@ -382,7 +399,7 @@ def _predict_no_intercept(self, X: FDataGrid) -> FDataGrid: return self._prediction_from_matrix(design_matrix) - def predict(self, X: FDataGrid) -> FDataGrid: + def predict(self, X: FDataGrid) -> FDataGrid: # noqa: D102 check_is_fitted(self) From aa4e87d9c7b6f943bd16d7c2625772f087bd0780 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 8 Mar 2021 17:24:04 +0100 Subject: [PATCH 130/417] chagnes done --- skfda/exploratory/visualization/representation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index ec965b10f..2f012e015 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -146,7 +146,7 @@ def __init__( raise ValueError( "The length of the gradient color" "list should be the same as the number" - "of samples in fdata" + "of samples in fdata", ) if min_grad is None: From 25152c6c94d8f638e520b5bc2ac427bdd8f66f03 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 11 Mar 2021 18:06:10 +0100 Subject: [PATCH 131/417] Style changes. --- skfda/_utils/_utils.py | 33 +++++--- skfda/preprocessing/registration/base.py | 12 ++- .../preprocessing/registration/validation.py | 80 ++++++++++++------- 3 files changed, 81 insertions(+), 44 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 5f480c17f..0388818f7 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -17,9 +17,10 @@ ) import numpy as np -import scipy.integrate from pandas.api.indexers import check_array_indexer +import scipy.integrate + from ..representation._typing import ( DomainRange, DomainRangeLike, @@ -64,12 +65,11 @@ def new_function(*args, **kwargs): n_samples=new_nsamples) -def check_is_univariate(fd): - """Checks if an FData is univariate and raises an error +def check_is_univariate(fd: FData) -> None: + """Check if an FData is univariate and raises an error. Args: - fd (:class:`~skfda.FData`): Functional object to check if is - univariate. + fd: Functional object to check if is univariate. Raises: ValueError: If it is not univariate, i.e., `fd.dim_domain != 1` or @@ -77,13 +77,22 @@ def check_is_univariate(fd): """ if fd.dim_domain != 1 or fd.dim_codomain != 1: - raise ValueError(f"The functional data must be univariate, i.e., " + - f"with dim_domain=1 " + - (f"" if fd.dim_domain == 1 - else f"(currently is {fd.dim_domain}) ") + - f"and dim_codomain=1 " + - (f"" if fd.dim_codomain == 1 else - f"(currently is {fd.dim_codomain})")) + + domain_str = ( + "" if fd.dim_domain == 1 + else f"(currently is {fd.dim_domain}) " + ) + + codomain_str = ( + "" if fd.dim_codomain == 1 + else f"(currently is {fd.dim_codomain})" + ) + + raise ValueError( + f"The functional data must be univariate, i.e., " + f"with dim_domain=1 {domain_str}" + f"and dim_codomain=1 {codomain_str}", + ) def _to_grid(X, y, eval_points=None): diff --git a/skfda/preprocessing/registration/base.py b/skfda/preprocessing/registration/base.py index a705c52a0..28491a3fc 100644 --- a/skfda/preprocessing/registration/base.py +++ b/skfda/preprocessing/registration/base.py @@ -4,13 +4,21 @@ """ from abc import ABC + +import numpy as np from sklearn.base import BaseEstimator, TransformerMixin + from ... import FData -class RegistrationTransformer(ABC, BaseEstimator, TransformerMixin): + +class RegistrationTransformer( + ABC, + BaseEstimator, # type: ignore + TransformerMixin, # type: ignore +): """Base class for the registration methods.""" - def score(self, X: FData, y=None): + def score(self, X: FData, y: None=None) -> np.ndarray: r"""Returns the percentage of total variation removed. Computes the squared multiple correlation index of the proportion of diff --git a/skfda/preprocessing/registration/validation.py b/skfda/preprocessing/registration/validation.py index 49d9b8126..02cc5b75d 100644 --- a/skfda/preprocessing/registration/validation.py +++ b/skfda/preprocessing/registration/validation.py @@ -1,14 +1,15 @@ """Methods and classes for validation of the registration procedures""" -from typing import NamedTuple +from typing import NamedTuple, Optional import numpy as np from ..._utils import _to_grid, check_is_univariate +from ...representation import FData class RegistrationScorer(): - r"""Cross validation scoring for registration procedures. + """Cross validation scoring for registration procedures. It calculates the score of a registration procedure, used to perform model validation or parameter selection. @@ -80,6 +81,7 @@ class AmplitudePhaseDecompositionStats(NamedTuple): c_r (float): Constant :math:`C_R`. """ + r_squared: float mse_amp: float mse_pha: float @@ -183,7 +185,6 @@ class AmplitudePhaseDecomposition(RegistrationScorer): Springer. Examples: - Calculate the score of the shift registration of a sinusoidal process synthetically generated. @@ -227,8 +228,12 @@ class AmplitudePhaseDecomposition(RegistrationScorer): """ - def __init__(self, return_stats=False, eval_points=None): - """Initialize the transformer""" + def __init__( + self, + return_stats: bool = False, + eval_points: Optional[np.ndarray] = None, + ) -> None: + super().__init__(eval_points) self.return_stats = return_stats @@ -257,15 +262,22 @@ def __call__(self, estimator, X, y=None): else: return self.score_function(y, X_reg) - def score_function(self, X, y, *, warping=None): + def score_function( + self, + X: FData, + y: FData, + *, + warping: Optional[FData] = None, + ) -> float: """Compute the score of the transformation performed. Args: - X (FData): Original functional data. - y (FData): Functional data registered. + X: Original functional data. + y: Functional data registered. + warping: Warping function used to register the functions. Returns: - float: Score of the transformation. + Score of the transformation. """ from scipy.integrate import simps @@ -274,13 +286,17 @@ def score_function(self, X, y, *, warping=None): check_is_univariate(y) if len(y) != len(X): - raise ValueError(f"the registered and unregistered curves must have " - f"the same number of samples ({len(y)})!=({len(X)})") + raise ValueError( + f"The registered and unregistered curves must have " + f"the same number of samples ({len(y)})!=({len(X)})", + ) if warping is not None and len(warping) != len(X): - raise ValueError(f"The registered curves and the warping functions " - f"must have the same number of samples " - f"({len(X)})!=({len(warping)})") + raise ValueError( + f"The registered curves and the warping functions " + f"must have the same number of samples " + f"({len(X)})!=({len(warping)})", + ) # Creates the mesh to discretize the functions if self.eval_points is None: @@ -288,25 +304,27 @@ def score_function(self, X, y, *, warping=None): eval_points = y.grid_points[0] except AttributeError: - nfine = max(y.basis.n_basis * 10 + 1, 201) - eval_points = np.linspace(*y.domain_range[0], nfine) + n_points = max(y.basis.n_basis * 10 + 1, 201) + eval_points = np.linspace(*y.domain_range[0], n_points) else: eval_points = np.asarray(self.eval_points) - x_fine = X.evaluate(eval_points)[..., 0] - y_fine = y.evaluate(eval_points)[..., 0] - mu_fine = x_fine.mean(axis=0) # Mean unregistered function - eta_fine = y_fine.mean(axis=0) # Mean registered function - mu_fine_sq = np.square(mu_fine) - eta_fine_sq = np.square(eta_fine) + x_eval = X.evaluate(eval_points)[..., 0] + y_eval = y.evaluate(eval_points)[..., 0] + + x_eval_mean = x_eval.mean(axis=0) + y_eval_mean = y_eval.mean(axis=0) + + x_eval_mean_sq = np.square(x_eval_mean) + y_eval_mean_sq = np.square(y_eval_mean) # Total mean square error of the original funtions # mse_total = scipy.integrate.simps( # np.mean(np.square(x_fine - mu_fine), axis=0), # eval_points) - cr = 1. # Constant related to the covariation between the deformation - # functions and y^2 + # Constant related to the covariation between the warpings and y^2 + cr = 1.0 # If the warping functions are not provided, are suppose independent if warping is not None: @@ -316,20 +334,22 @@ def score_function(self, X, y, *, warping=None): dh_fine_mean = dh_fine.mean(axis=0) dh_fine_center = dh_fine - dh_fine_mean - y_fine_sq = np.square(y_fine) # y^2 - y_fine_sq_center = np.subtract(y_fine_sq, eta_fine_sq) # y^2-E[y2] + y_fine_sq = np.square(y_eval) # y^2 + y_fine_sq_center = np.subtract(y_fine_sq, y_eval_mean_sq) covariate = np.inner(dh_fine_center.T, y_fine_sq_center.T) covariate = covariate.mean(axis=0) - cr += np.divide(simps(covariate, eval_points), - simps(eta_fine_sq, eval_points)) + cr += np.divide( + simps(covariate, eval_points), + simps(y_eval_mean_sq, eval_points), + ) # mse due to phase variation - mse_pha = simps(cr * eta_fine_sq - mu_fine_sq, eval_points) + mse_pha = simps(cr * y_eval_mean_sq - x_eval_mean_sq, eval_points) # mse due to amplitude variation # mse_amp = mse_total - mse_pha - y_fine_center = np.subtract(y_fine, eta_fine) + y_fine_center = np.subtract(y_eval, y_eval_mean) y_fine_center_sq = np.square(y_fine_center, out=y_fine_center) y_fine_center_sq_mean = y_fine_center_sq.mean(axis=0) From 0f8e599551edbd595d0c349dfe1b1f1426ff9d54 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 11 Mar 2021 20:05:34 +0100 Subject: [PATCH 132/417] Fix warnings. --- skfda/datasets/_real_datasets.py | 21 ++++++++++++--------- skfda/representation/_functional_data.py | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index a1d027134..a13013554 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -807,15 +807,18 @@ def fetch_weather( else: feature_names = [curve_name] X = curves - meta = np.array(list(zip( - data["place"], - data["province"], - np.asarray(data["coordinates"])[:, 0], - np.asarray(data["coordinates"])[:, 1], - data["geogindex"], - np.asarray(data["monthlyTemp"]).T, - np.asarray(data["monthlyPrecip"]).T, - ))) + meta = np.concatenate( + ( + np.array(data["place"])[:, np.newaxis], + np.array(data["province"])[:, np.newaxis], + np.asarray(data["coordinates"]), + np.array(data["geogindex"])[:, np.newaxis], + np.asarray(data["monthlyTemp"]).T.tolist(), + np.asarray(data["monthlyPrecip"]).T.tolist(), + ), + axis=1, + dtype=np.object_ + ) meta_names = [ "place", "province", diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index e5b70865b..d8dc4489b 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -274,7 +274,7 @@ def _extrapolation_index(self, eval_points: np.ndarray) -> np.ndarray: should be applied. """ - index = np.zeros(eval_points.shape[:-1], dtype=np.bool) + index = np.zeros(eval_points.shape[:-1], dtype=np.bool_) # Checks bounds in each domain dimension for i, bounds in enumerate(self.domain_range): From ff10f62ecb3953338df5ba2bbb089e5653c710f8 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 13 Mar 2021 00:12:38 +0100 Subject: [PATCH 133/417] Fixed more validation methods. --- .../preprocessing/registration/validation.py | 254 +++++++----------- tests/test_elastic.py | 6 +- tests/test_registration.py | 84 +++--- 3 files changed, 147 insertions(+), 197 deletions(-) diff --git a/skfda/preprocessing/registration/validation.py b/skfda/preprocessing/registration/validation.py index 02cc5b75d..26009b127 100644 --- a/skfda/preprocessing/registration/validation.py +++ b/skfda/preprocessing/registration/validation.py @@ -1,14 +1,17 @@ """Methods and classes for validation of the registration procedures""" +from __future__ import annotations -from typing import NamedTuple, Optional +from abc import ABC, abstractmethod +from typing import NamedTuple, Optional, Union import numpy as np from ..._utils import _to_grid, check_is_univariate from ...representation import FData +from .base import RegistrationTransformer -class RegistrationScorer(): +class RegistrationScorer(ABC): """Cross validation scoring for registration procedures. It calculates the score of a registration procedure, used to perform @@ -41,22 +44,23 @@ class RegistrationScorer(): """ - def __init__(self, eval_points=None): - """Initialize the transformer""" - self.eval_points = eval_points - - def __call__(self, estimator, X, y=None): + def __call__( + self, + estimator: RegistrationTransformer, + X: FData, + y: Optional[FData] = None, + ) -> float: """Compute the score of the transformation. Args: - estimator (Estimator): Registration method estimator. The estimator + estimator: Registration method estimator. The estimator should be fitted. - X (:class:`FData `): Functional data to be registered. - y (:class:`FData `, optional): Functional data target. - If provided should be the same as `X` in general. + X: Functional data to be registered. + y: Functional data target. If provided should be the same as + `X` in general. Returns: - float: Cross validation score. + Cross validation score. """ if y is None: y = X @@ -66,6 +70,24 @@ def __call__(self, estimator, X, y=None): return self.score_function(y, X_reg) + @abstractmethod + def score_function( + self, + X: FData, + y: FData, + ) -> float: + """Compute the score of the transformation performed. + + Args: + X: Original functional data. + y: Functional data registered. + + Returns: + Score of the transformation. + + """ + pass + class AmplitudePhaseDecompositionStats(NamedTuple): r"""Named tuple to store the values of the amplitude-phase decomposition. @@ -75,20 +97,22 @@ class AmplitudePhaseDecompositionStats(NamedTuple): Args: r_square (float): Squared correlation index :math:`R^2`. - mse_amp (float): Mean square error of amplitude + mse_amplitude (float): Mean square error of amplitude :math:`\text{MSE}_{amp}`. - mse_pha (float): Mean square error of phase :math:`\text{MSE}_{pha}`. + mse_phase (float): Mean square error of phase :math:`\text{MSE}_{pha}`. c_r (float): Constant :math:`C_R`. """ r_squared: float - mse_amp: float - mse_pha: float + mse_amplitude: float + mse_phase: float c_r: float -class AmplitudePhaseDecomposition(RegistrationScorer): +class AmplitudePhaseDecomposition( + RegistrationScorer, +): r"""Compute mean square error measures for amplitude and phase variation. Once the registration has taken place, this function computes two mean @@ -113,15 +137,14 @@ class AmplitudePhaseDecomposition(RegistrationScorer): .. math:: \text{MSE}_{phase}= - \int \left [C_R \overline{y}^2(t) - \overline{x}^2(t) \right]dt + C_R \int \overline{y}^2(t) dt - \int \overline{x}^2(t) dt where the constant :math:`C_R` is defined as .. math:: - C_R = 1 + \frac{\frac{1}{N}\sum_{i}^{N}\int [Dh_i(t)-\overline{Dh}(t)] - [ y_i^2(t)- \overline{y^2}(t) ]dt} - {\frac{1}{N} \sum_{i}^{N} \int y_i^2(t)dt} + C_R = \frac{\frac{1}{N}\sum_{i=1}^{N}\int[x_i(t)-\overline x(t)]^2dt + }{\frac{1}{N}\sum_{i=1}^{N}\int[y_i(t)-\overline y(t)]^2dt} whose structure is related to the covariation between the deformation functions :math:`Dh_i(t)` and the squared registered functions @@ -205,20 +228,20 @@ class AmplitudePhaseDecomposition(RegistrationScorer): >>> scorer = AmplitudePhaseDecomposition() >>> score = scorer(shift_registration, X) >>> round(score, 3) - 0.972 + 0.971 Also it is possible to get all the values of the decomposition. >>> scorer = AmplitudePhaseDecomposition(return_stats=True) >>> stats = scorer(shift_registration, X) >>> round(stats.r_squared, 3) - 0.972 - >>> round(stats.mse_amp, 3) - 0.007 - >>> round(stats.mse_pha, 3) - 0.227 + 0.971 + >>> round(stats.mse_amplitude, 3) + 0.006 + >>> round(stats.mse_phase, 3) + 0.214 >>> round(stats.c_r, 3) - 1.0 + 0.976 See also: @@ -231,56 +254,33 @@ class AmplitudePhaseDecomposition(RegistrationScorer): def __init__( self, return_stats: bool = False, - eval_points: Optional[np.ndarray] = None, ) -> None: - - super().__init__(eval_points) self.return_stats = return_stats - def __call__(self, estimator, X, y=None): - """Compute the score of the transformation. - - Args: - estimator (Estimator): Registration method estimator. The estimator - should be fitted. - X (:class:`FData `): Functional data to be registered. - y (:class:`FData `, optional): Functional data target. - If provided should be the same as `X` in general. - - Returns: - float: Cross validation score. - """ - if y is None: - y = X - - # Register the data - X_reg = estimator.transform(X) - - # Pass the warpings if are generated in the transformer - if hasattr(estimator, 'warping_'): - return self.score_function(y, X_reg, warping=estimator.warping_) - else: - return self.score_function(y, X_reg) + def __call__( + self, + estimator: RegistrationTransformer, + X: FData, + y: Optional[FData] = None, + ) -> Union[float, AmplitudePhaseDecompositionStats]: + return super().__call__(estimator, X, y) def score_function( self, X: FData, y: FData, - *, - warping: Optional[FData] = None, - ) -> float: + ) -> Union[float, AmplitudePhaseDecompositionStats]: """Compute the score of the transformation performed. Args: X: Original functional data. y: Functional data registered. - warping: Warping function used to register the functions. Returns: Score of the transformation. """ - from scipy.integrate import simps + from ...misc.metrics import l2_distance, l2_norm check_is_univariate(X) check_is_univariate(y) @@ -291,84 +291,32 @@ def score_function( f"the same number of samples ({len(y)})!=({len(X)})", ) - if warping is not None and len(warping) != len(X): - raise ValueError( - f"The registered curves and the warping functions " - f"must have the same number of samples " - f"({len(X)})!=({len(warping)})", - ) + X_mean = X.mean() + y_mean = y.mean() - # Creates the mesh to discretize the functions - if self.eval_points is None: - try: - eval_points = y.grid_points[0] - - except AttributeError: - n_points = max(y.basis.n_basis * 10 + 1, 201) - eval_points = np.linspace(*y.domain_range[0], n_points) - else: - eval_points = np.asarray(self.eval_points) - - x_eval = X.evaluate(eval_points)[..., 0] - y_eval = y.evaluate(eval_points)[..., 0] - - x_eval_mean = x_eval.mean(axis=0) - y_eval_mean = y_eval.mean(axis=0) - - x_eval_mean_sq = np.square(x_eval_mean) - y_eval_mean_sq = np.square(y_eval_mean) - - # Total mean square error of the original funtions - # mse_total = scipy.integrate.simps( - # np.mean(np.square(x_fine - mu_fine), axis=0), - # eval_points) - - # Constant related to the covariation between the warpings and y^2 - cr = 1.0 - - # If the warping functions are not provided, are suppose independent - if warping is not None: - # Derivates warping functions - warping_deriv = warping.derivative() - dh_fine = warping_deriv(eval_points)[..., 0] - dh_fine_mean = dh_fine.mean(axis=0) - dh_fine_center = dh_fine - dh_fine_mean - - y_fine_sq = np.square(y_eval) # y^2 - y_fine_sq_center = np.subtract(y_fine_sq, y_eval_mean_sq) - - covariate = np.inner(dh_fine_center.T, y_fine_sq_center.T) - covariate = covariate.mean(axis=0) - cr += np.divide( - simps(covariate, eval_points), - simps(y_eval_mean_sq, eval_points), - ) + c_r = np.sum(l2_norm(X)**2) / np.sum(l2_norm(y)**2) - # mse due to phase variation - mse_pha = simps(cr * y_eval_mean_sq - x_eval_mean_sq, eval_points) + mse_amplitude = c_r * np.mean(l2_distance(y, y.mean())**2) + mse_phase = (c_r * l2_norm(y_mean)**2 - l2_norm(X_mean)**2).item() - # mse due to amplitude variation - # mse_amp = mse_total - mse_pha - y_fine_center = np.subtract(y_eval, y_eval_mean) - y_fine_center_sq = np.square(y_fine_center, out=y_fine_center) - y_fine_center_sq_mean = y_fine_center_sq.mean(axis=0) - - mse_amp = simps(y_fine_center_sq_mean, eval_points) - - # Total mean square error of the original funtions - mse_total = mse_pha + mse_amp + # Should be equal to np.mean(l2_distance(X, X_mean)**2) + mse_total = mse_amplitude + mse_phase # squared correlation measure of proportion of phase variation - rsq = mse_pha / (mse_total) + rsq = mse_phase / mse_total if self.return_stats is True: - stats = AmplitudePhaseDecompositionStats(rsq, mse_amp, mse_pha, cr) - return stats + return AmplitudePhaseDecompositionStats( + r_squared=rsq, + mse_amplitude=mse_amplitude, + mse_phase=mse_phase, + c_r=c_r, + ) - return rsq + return float(rsq) -class LeastSquares(AmplitudePhaseDecomposition): +class LeastSquares(RegistrationScorer): r"""Cross-validated measure of the registration procedure. Computes a cross-validated measure of the level of synchronization @@ -414,7 +362,6 @@ class LeastSquares(AmplitudePhaseDecomposition): (p. 18). arXiv:1103.3817v2. Examples: - Calculate the score of the shift registration of a sinusoidal process synthetically generated. @@ -434,7 +381,7 @@ class LeastSquares(AmplitudePhaseDecomposition): >>> scorer = LeastSquares() >>> score = scorer(shift_registration, X) >>> round(score, 3) - 0.796 + 0.953 See also: @@ -444,7 +391,7 @@ class LeastSquares(AmplitudePhaseDecomposition): """ - def score_function(self, X, y): + def score_function(self, X: FData, y: FData) -> float: """Compute the score of the transformation performed. Args: @@ -455,13 +402,11 @@ def score_function(self, X, y): float: Score of the transformation. """ - from ...misc.metrics import PairwiseMetric, l2_distance + from ...misc.metrics import l2_distance check_is_univariate(X) check_is_univariate(y) - X, y = _to_grid(X, y, eval_points=self.eval_points) - # Instead of compute f_i - 1/(N-1) sum(j!=i)f_j for each i = 1 ... N # It is used (1 + 1/(N-1))f_i - 1/(N-1) sum(j=1 ... N) f_j = # (1 + 1/(N-1))f_i - N/(N-1) mean(f) = @@ -476,14 +421,13 @@ def score_function(self, X, y): mean_y = C2 * y.mean() # Compute distance to mean - distance = PairwiseMetric(l2_distance) - ls_x = distance(X, mean_X).flatten() - ls_y = distance(y, mean_y).flatten() + ls_x = l2_distance(X, mean_X)**2 + ls_y = l2_distance(y, mean_y)**2 # Quotient of distance quotient = ls_y / ls_x - return 1 - 1. / N * quotient.sum() + return float(1 - np.mean(quotient)) class SobolevLeastSquares(RegistrationScorer): @@ -498,8 +442,8 @@ class SobolevLeastSquares(RegistrationScorer): {\sum_{i=1}^{N} \int\left(\dot{f}_{i}(t)-\frac{1}{N} \sum_{j=1}^{N} \dot{f}_{j}\right)^{2} dt} - where :math:`\dot f_i` and :math:`\dot \tilde f_i` are the derivatives of - the original and the registered data respectively. + where :math:`\dot{f}_i` and :math:`\dot{\tilde{f}}_i` are the derivatives + of the original and the registered data respectively. This criterion measures the total cross-sectional variance of the derivatives of the aligned functions, relative to the original value. @@ -530,7 +474,6 @@ class SobolevLeastSquares(RegistrationScorer): (p. 18). arXiv:1103.3817v2. Examples: - Calculate the score of the shift registration of a sinusoidal process synthetically generated. @@ -550,7 +493,7 @@ class SobolevLeastSquares(RegistrationScorer): >>> scorer = SobolevLeastSquares() >>> score = scorer(shift_registration, X) >>> round(score, 3) - 0.761 + 0.924 See also: :class:`~AmplitudePhaseDecomposition` @@ -559,7 +502,7 @@ class SobolevLeastSquares(RegistrationScorer): """ - def score_function(self, X, y): + def score_function(self, X: FData, y: FData) -> float: """Compute the score of the transformation performed. Args: @@ -570,7 +513,7 @@ def score_function(self, X, y): float: Score of the transformation. """ - from ...misc.metrics import PairwiseMetric, l2_distance + from ...misc.metrics import l2_distance check_is_univariate(X) check_is_univariate(y) @@ -579,16 +522,11 @@ def score_function(self, X, y): X = X.derivative() y = y.derivative() - # Discretize if needed - X, y = _to_grid(X, y, eval_points=self.eval_points) - # L2 distance to mean - distance = PairwiseMetric(l2_distance) - - sls_x = distance(X, X.mean()) - sls_y = distance(y, y.mean()) + sls_x = l2_distance(X, X.mean())**2 + sls_y = l2_distance(y, y.mean())**2 - return 1 - sls_y.sum() / sls_x.sum() + return float(1 - sls_y.sum() / sls_x.sum()) class PairwiseCorrelation(RegistrationScorer): @@ -629,7 +567,6 @@ class PairwiseCorrelation(RegistrationScorer): (p. 18). arXiv:1103.3817v2. Examples: - Calculate the score of the shift registration of a sinusoidal process synthetically generated. @@ -658,7 +595,10 @@ class PairwiseCorrelation(RegistrationScorer): """ - def score_function(self, X, y): + def __init__(self, eval_points: Optional[np.ndarray] = None) -> None: + self.eval_points = eval_points + + def score_function(self, X: FData, y: FData) -> float: """Compute the score of the transformation performed. Args: @@ -679,9 +619,9 @@ def score_function(self, X, y): # corrcoefs computes the correlation between vector, without weights # due to the sample points X_corr = np.corrcoef(X.data_matrix[..., 0]) - np.fill_diagonal(X_corr, 0.) + np.fill_diagonal(X_corr, 0) y_corr = np.corrcoef(y.data_matrix[..., 0]) - np.fill_diagonal(y_corr, 0.) + np.fill_diagonal(y_corr, 0) - return y_corr.sum() / X_corr.sum() + return float(y_corr.sum() / X_corr.sum()) diff --git a/tests/test_elastic.py b/tests/test_elastic.py index b5d3dd3e6..71f92ff3d 100644 --- a/tests/test_elastic.py +++ b/tests/test_elastic.py @@ -184,12 +184,12 @@ def test_raises(self): with np.testing.assert_raises(ValueError): reg.transform(self.unimodal_samples[0]) - def test_score(self): - """Test score method of the transformer""" + def test_score(self) -> None: + """Test score method of the transformer.""" reg = ElasticRegistration() reg.fit(self.unimodal_samples) score = reg.score(self.unimodal_samples) - np.testing.assert_almost_equal(score, 0.9994225) + np.testing.assert_almost_equal(score, 0.999389) def test_warping_mean(self): warping = make_random_warping(start=-1, random_state=0) diff --git a/tests/test_registration.py b/tests/test_registration.py index 62781ee4b..789a91477 100644 --- a/tests/test_registration.py +++ b/tests/test_registration.py @@ -1,21 +1,33 @@ +import unittest + +import numpy as np +from sklearn.exceptions import NotFittedError + from skfda import FDataGrid from skfda._utils import _check_estimator -from skfda.datasets import (make_multimodal_samples, make_multimodal_landmarks, - make_sinusoidal_process) +from skfda.datasets import ( + make_multimodal_landmarks, + make_multimodal_samples, + make_sinusoidal_process, +) from skfda.exploratory.stats import mean from skfda.preprocessing.registration import ( - normalize_warping, invert_warping, landmark_shift_deltas, landmark_shift, - landmark_registration_warping, landmark_registration, ShiftRegistration) + ShiftRegistration, + invert_warping, + landmark_registration, + landmark_registration_warping, + landmark_shift, + landmark_shift_deltas, + normalize_warping, +) from skfda.preprocessing.registration.validation import ( - AmplitudePhaseDecomposition, LeastSquares, - SobolevLeastSquares, PairwiseCorrelation) + AmplitudePhaseDecomposition, + LeastSquares, + PairwiseCorrelation, + SobolevLeastSquares, +) from skfda.representation.basis import Fourier from skfda.representation.interpolation import SplineInterpolation -import unittest - -from sklearn.exceptions import NotFittedError - -import numpy as np class TestWarping(unittest.TestCase): @@ -327,51 +339,49 @@ def test_custom_output_points(self): class TestRegistrationValidation(unittest.TestCase): - """Test shift registration""" + """Test validation functions.""" - def setUp(self): - """Initialization of samples""" + def setUp(self) -> None: + """Initialize the samples.""" self.X = make_sinusoidal_process(error_std=0, random_state=0) self.shift_registration = ShiftRegistration().fit(self.X) - def test_amplitude_phase_score(self): + def test_amplitude_phase_score(self) -> None: + """Test basic usage of AmplitudePhaseDecomposition.""" scorer = AmplitudePhaseDecomposition() score = scorer(self.shift_registration, self.X) - np.testing.assert_allclose(score, 0.972095, rtol=1e-6) - - def test_amplitude_phase_score_with_output_points(self): - eval_points = self.X.grid_points[0] - scorer = AmplitudePhaseDecomposition(eval_points=eval_points) - score = scorer(self.shift_registration, self.X) - np.testing.assert_allclose(score, 0.972095, rtol=1e-6) + np.testing.assert_allclose(score, 0.971144, rtol=1e-6) - def test_amplitude_phase_score_with_basis(self): + def test_amplitude_phase_score_with_basis(self) -> None: + """Test the AmplitudePhaseDecomposition with FDataBasis.""" scorer = AmplitudePhaseDecomposition() X = self.X.to_basis(Fourier()) score = scorer(self.shift_registration, X) np.testing.assert_allclose(score, 0.995087, rtol=1e-6) - def test_default_score(self): - + def test_default_score(self) -> None: + """Test default score of a registration transformer.""" score = self.shift_registration.score(self.X) - np.testing.assert_allclose(score, 0.972095, rtol=1e-6) + np.testing.assert_allclose(score, 0.971144, rtol=1e-6) - def test_least_squares_score(self): + def test_least_squares_score(self) -> None: + """Test LeastSquares.""" scorer = LeastSquares() score = scorer(self.shift_registration, self.X) - np.testing.assert_allclose(score, 0.795933, rtol=1e-6) + np.testing.assert_allclose(score, 0.953355, rtol=1e-6) - def test_sobolev_least_squares_score(self): + def test_sobolev_least_squares_score(self) -> None: + """Test SobolevLeastSquares.""" scorer = SobolevLeastSquares() score = scorer(self.shift_registration, self.X) - np.testing.assert_allclose(score, 0.76124, rtol=1e-6) + np.testing.assert_allclose(score, 0.923962, rtol=1e-6) def test_pairwise_correlation(self): scorer = PairwiseCorrelation() score = scorer(self.shift_registration, self.X) np.testing.assert_allclose(score, 1.816228, rtol=1e-6) - def test_mse_decomposition(self): + def test_mse_decomposition(self) -> None: fd = make_multimodal_samples(n_samples=3, random_state=1) landmarks = make_multimodal_landmarks(n_samples=3, random_state=1) @@ -379,11 +389,11 @@ def test_mse_decomposition(self): warping = landmark_registration_warping(fd, landmarks) fd_registered = fd.compose(warping) scorer = AmplitudePhaseDecomposition(return_stats=True) - ret = scorer.score_function(fd, fd_registered, warping=warping) - np.testing.assert_allclose(ret.mse_amp, 0.0009866997121476962) - np.testing.assert_allclose(ret.mse_pha, 0.11576935495450151) - np.testing.assert_allclose(ret.r_squared, 0.9915489952877273) - np.testing.assert_allclose(ret.c_r, 0.999999, rtol=1e-6) + ret = scorer.score_function(fd, fd_registered) + np.testing.assert_allclose(ret.mse_amplitude, 0.0009465483) + np.testing.assert_allclose(ret.mse_phase, 0.1051769136) + np.testing.assert_allclose(ret.r_squared, 0.9910806875) + np.testing.assert_allclose(ret.c_r, 0.9593073773) def test_raises_amplitude_phase(self): scorer = AmplitudePhaseDecomposition() @@ -394,7 +404,7 @@ def test_raises_amplitude_phase(self): # Inconsistent number of functions registered with np.testing.assert_raises(ValueError): - scorer.score_function(self.X, self.X, warping=self.X[:2]) + scorer.score_function(self.X, self.X[:-1]) if __name__ == '__main__': From af62e2ff1de9466ee103c7901707e7552d0bf6bc Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 14 Mar 2021 17:33:28 +0100 Subject: [PATCH 134/417] Refactor registration scorers. --- skfda/_utils/_utils.py | 8 ++- .../preprocessing/registration/validation.py | 68 ++++++++++--------- tests/test_registration.py | 9 +-- 3 files changed, 46 insertions(+), 39 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 0388818f7..8a1787b40 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -32,7 +32,7 @@ RandomStateLike = Optional[Union[int, np.random.RandomState]] if TYPE_CHECKING: - from ..representation import FData + from ..representation import FData, FDataGrid from ..representation.basis import Basis @@ -95,7 +95,11 @@ def check_is_univariate(fd: FData) -> None: ) -def _to_grid(X, y, eval_points=None): +def _to_grid( + X: FData, + y: FData, + eval_points: Optional[np.ndarray] = None, +) -> Tuple[FDataGrid, FDataGrid]: """Transform a pair of FDatas in grids to perform calculations.""" from .. import FDataGrid diff --git a/skfda/preprocessing/registration/validation.py b/skfda/preprocessing/registration/validation.py index 26009b127..92bc99bbf 100644 --- a/skfda/preprocessing/registration/validation.py +++ b/skfda/preprocessing/registration/validation.py @@ -1,8 +1,9 @@ -"""Methods and classes for validation of the registration procedures""" +"""Methods and classes for validation of the registration procedures.""" from __future__ import annotations from abc import ABC, abstractmethod -from typing import NamedTuple, Optional, Union +from dataclasses import dataclass +from typing import Optional import numpy as np @@ -89,7 +90,8 @@ def score_function( pass -class AmplitudePhaseDecompositionStats(NamedTuple): +@dataclass +class AmplitudePhaseDecompositionStats(): r"""Named tuple to store the values of the amplitude-phase decomposition. Values of the amplitude phase decomposition computed in @@ -230,10 +232,10 @@ class AmplitudePhaseDecomposition( >>> round(score, 3) 0.971 - Also it is possible to get all the values of the decomposition. + Also it is possible to get all the values of the decomposition: - >>> scorer = AmplitudePhaseDecomposition(return_stats=True) - >>> stats = scorer(shift_registration, X) + >>> X_reg = shift_registration.transform(X) + >>> stats = scorer.stats(X, X_reg) >>> round(stats.r_squared, 3) 0.971 >>> round(stats.mse_amplitude, 3) @@ -251,34 +253,20 @@ class AmplitudePhaseDecomposition( """ - def __init__( - self, - return_stats: bool = False, - ) -> None: - self.return_stats = return_stats - - def __call__( - self, - estimator: RegistrationTransformer, - X: FData, - y: Optional[FData] = None, - ) -> Union[float, AmplitudePhaseDecompositionStats]: - return super().__call__(estimator, X, y) - - def score_function( + def stats( self, X: FData, y: FData, - ) -> Union[float, AmplitudePhaseDecompositionStats]: - """Compute the score of the transformation performed. + ) -> AmplitudePhaseDecompositionStats: + """ + Compute the decomposition statistics. Args: X: Original functional data. y: Functional data registered. Returns: - Score of the transformation. - + The decomposition statistics. """ from ...misc.metrics import l2_distance, l2_norm @@ -305,15 +293,29 @@ def score_function( # squared correlation measure of proportion of phase variation rsq = mse_phase / mse_total - if self.return_stats is True: - return AmplitudePhaseDecompositionStats( - r_squared=rsq, - mse_amplitude=mse_amplitude, - mse_phase=mse_phase, - c_r=c_r, - ) + return AmplitudePhaseDecompositionStats( + r_squared=rsq, + mse_amplitude=mse_amplitude, + mse_phase=mse_phase, + c_r=c_r, + ) - return float(rsq) + def score_function( + self, + X: FData, + y: FData, + ) -> float: + """Compute the score of the transformation performed. + + Args: + X: Original functional data. + y: Functional data registered. + + Returns: + Score of the transformation. + + """ + return float(self.stats(X, y).r_squared) class LeastSquares(RegistrationScorer): diff --git a/tests/test_registration.py b/tests/test_registration.py index 789a91477..5585a445e 100644 --- a/tests/test_registration.py +++ b/tests/test_registration.py @@ -376,20 +376,21 @@ def test_sobolev_least_squares_score(self) -> None: score = scorer(self.shift_registration, self.X) np.testing.assert_allclose(score, 0.923962, rtol=1e-6) - def test_pairwise_correlation(self): + def test_pairwise_correlation(self) -> None: + """Test PairwiseCorrelation.""" scorer = PairwiseCorrelation() score = scorer(self.shift_registration, self.X) np.testing.assert_allclose(score, 1.816228, rtol=1e-6) def test_mse_decomposition(self) -> None: - + """Test obtaining all stats from AmplitudePhaseDecomposition.""" fd = make_multimodal_samples(n_samples=3, random_state=1) landmarks = make_multimodal_landmarks(n_samples=3, random_state=1) landmarks = landmarks.squeeze() warping = landmark_registration_warping(fd, landmarks) fd_registered = fd.compose(warping) - scorer = AmplitudePhaseDecomposition(return_stats=True) - ret = scorer.score_function(fd, fd_registered) + scorer = AmplitudePhaseDecomposition() + ret = scorer.stats(fd, fd_registered) np.testing.assert_allclose(ret.mse_amplitude, 0.0009465483) np.testing.assert_allclose(ret.mse_phase, 0.1051769136) np.testing.assert_allclose(ret.r_squared, 0.9910806875) From 3ffd96cc6a91992577285c7140f564ceeec950d3 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 14 Mar 2021 17:47:32 +0100 Subject: [PATCH 135/417] Fix style in base registration transformer. --- skfda/preprocessing/registration/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/skfda/preprocessing/registration/base.py b/skfda/preprocessing/registration/base.py index 28491a3fc..8187cf670 100644 --- a/skfda/preprocessing/registration/base.py +++ b/skfda/preprocessing/registration/base.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- -"""Registration method. +"""Registration methods base class. + This module contains the abstract base class for all registration methods. + """ from abc import ABC -import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from ... import FData @@ -18,8 +19,8 @@ class RegistrationTransformer( ): """Base class for the registration methods.""" - def score(self, X: FData, y: None=None) -> np.ndarray: - r"""Returns the percentage of total variation removed. + def score(self, X: FData, y: None = None) -> float: + r"""Return the percentage of total variation removed. Computes the squared multiple correlation index of the proportion of the total variation due to phase, defined as: From 8e2b52a6a72efaf0971735dfe4f2a87347d425a5 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 14 Mar 2021 19:24:56 +0100 Subject: [PATCH 136/417] changes --- .vscode/settings.json | 3 --- skfda/exploratory/visualization/_phase_plane_plot.py | 6 ++---- 2 files changed, 2 insertions(+), 7 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index fafb871bf..000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "python.pythonPath": "C:\\ProgramData\\Anaconda3\\python.exe" -} \ No newline at end of file diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py index e260b537b..dd5e275f4 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -6,7 +6,7 @@ of them with domain 1 and codomain 1. """ -from typing import Any, List, Optional, TypeVar +from typing import Any, List, Optional, Union from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -14,8 +14,6 @@ from ...representation import FData from ._utils import _get_figure_and_axes, _set_figure_layout -S = TypeVar('S', Figure, Axes, List[Axes]) - class PhasePlanePlot: """ @@ -42,7 +40,7 @@ def __init__( def plot( self, - chart: Optional[S] = None, + chart: Union[Figure, Axes, None] = None, *, fig: Optional[Figure] = None, ax: Optional[Axes] = None, From eaf0bf7d689e8ef048bbf7cb7b62d5c83e70aa4f Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 14 Mar 2021 19:33:35 +0100 Subject: [PATCH 137/417] changes --- setup.cfg | 2 -- .../visualization/_phase_plane_plot.py | 24 ++++++------------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/setup.cfg b/setup.cfg index 4a13decb8..69167fdbb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,8 +41,6 @@ ignore = WPS115, # Trailing underscores are a scikit-learn convention WPS120, - # Access should have depth = 5 - WPS219, # The number of imported things may be large, especially for typing WPS235, # We like local imports, thanks diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py index dd5e275f4..3e8b5f5b9 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -6,7 +6,7 @@ of them with domain 1 and codomain 1. """ -from typing import Any, List, Optional, Union +from typing import Any, Optional, Union from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -69,20 +69,9 @@ def plot( if ( self.fdata2 is not None ): - if ( - self.fdata1.dim_domain == self.fdata2.dim_domain - and self.fdata1.dim_codomain == self.fdata2.dim_codomain - and self.fdata1.dim_domain == 1 - and self.fdata1.dim_codomain == 1 - ): - self.fd_final = self.fdata1.concatenate( - self.fdata2, as_coordinates=True, - ) - else: - raise ValueError( - "Error in data arguments,", - "codomain or domain is not correct.", - ) + self.fd_final = self.fdata1.concatenate( + self.fdata2, as_coordinates=True, + ) else: self.fd_final = self.fdata1 @@ -93,9 +82,10 @@ def plot( fig, axes = _set_figure_layout( fig, axes, dim=2, n_axes=1, ) + data_matrix = self.fd_final.data_matrix[0][:, 0] axes[0].plot( - self.fd_final.data_matrix[0][:, 0].tolist(), - self.fd_final.data_matrix[0][:, 1].tolist(), + data_matrix[:, 0].tolist(), + data_matrix[:, 1].tolist(), **kwargs, ) else: From a75014a9cb9f8298f0b16cb1508d1357c3077977 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 14 Mar 2021 19:37:46 +0100 Subject: [PATCH 138/417] done --- .vscode/settings.json | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index fafb871bf..000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "python.pythonPath": "C:\\ProgramData\\Anaconda3\\python.exe" -} \ No newline at end of file From 4b976267f21a2f38710df0d9ed5ad02a089d6c1d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Mar 2021 09:41:52 +0100 Subject: [PATCH 139/417] last correction --- skfda/exploratory/visualization/_phase_plane_plot.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_phase_plane_plot.py index 3e8b5f5b9..c5de0aa8e 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_phase_plane_plot.py @@ -66,9 +66,7 @@ def plot( """ fig, axes = _get_figure_and_axes(chart, fig, ax) - if ( - self.fdata2 is not None - ): + if self.fdata2 is not None: self.fd_final = self.fdata1.concatenate( self.fdata2, as_coordinates=True, ) From 29afb33dd4fa664a6d9e13b52555fdd28ae19fae Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Mar 2021 12:11:56 +0100 Subject: [PATCH 140/417] solved errors --- skfda/exploratory/visualization/_outliergram.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index f65006356..436b8df2d 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -19,8 +19,6 @@ from ..depth._depth import ModifiedBandDepth from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata -S = TypeVar('S', Figure, Axes, List[Axes]) - class Outliergram: """ @@ -77,19 +75,19 @@ def plot( a parabola. The shape outliers will be the points that appear far from this curve. Args: - chart (figure object, axe or list of axes, optional): figure over + chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - fig (figure object, optional): figure over with the graphs are + fig: figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (list of axis objects, optional): axis where the graphs + axes: axis where the graphs are plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure + n_rows: designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols(int, optional): designates the number of columns of the + n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. kwargs: if dim_domain is 1, keyword arguments to be passed to the @@ -97,7 +95,7 @@ def plot( arguments to be passed to the matplotlib.pyplot.plot_surface function. Returns: - fig (figure object): figure object in which the depths will be + fig: figure object in which the depths will be scattered. """ fig, axes_list = _get_figure_and_axes(chart, fig, axes) From e9ddd5f2c1416e920bdc709f1f9d5545ce671289 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Mar 2021 12:12:30 +0100 Subject: [PATCH 141/417] changed --- skfda/exploratory/visualization/_outliergram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 436b8df2d..71a8dd42e 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -7,7 +7,7 @@ magnitude outliers, but there is a necessity of capturing this other type. """ -from typing import List, Optional, TypeVar, Union +from typing import Optional, Union import numpy as np import scipy.integrate as integrate From 8cc512a05dd4d5dc0526dd2fc1cb7764e3ba8ed1 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Mar 2021 12:27:23 +0100 Subject: [PATCH 142/417] changes --- skfda/exploratory/visualization/representation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 2f012e015..d963954f9 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -42,8 +42,8 @@ def _get_label_colors( def _get_color_info( fdata: T, group: Union[Sequence[int], None], - group_names: Union[Sequence[str], None], - group_colors: Union[Sequence[Any], None], + group_names: Union[Sequence[str], dict, None], + group_colors: Union[Sequence[Any], dict, None], legend: bool, kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: From 03f6ab63e5ac6f9cd5252a0373bc65fe021f89be Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 15 Mar 2021 12:27:35 +0100 Subject: [PATCH 143/417] changes --- .../visualization/representation.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index d963954f9..76f0328e1 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple, TypedDict, TypeVar, Union import matplotlib.cm import matplotlib.patches @@ -41,9 +41,9 @@ def _get_label_colors( def _get_color_info( fdata: T, - group: Union[Sequence[int], None], - group_names: Union[Sequence[str], dict, None], - group_colors: Union[Sequence[Any], dict, None], + group: Union[Sequence[Any], None], + group_names: Union[Sequence[str], TypedDict, None], + group_colors: Union[Sequence[Any], TypedDict, None], legend: bool, kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: @@ -183,9 +183,9 @@ def plot( n_cols: Optional[int] = None, n_points: Union[int, Tuple[int, int], None], domain_range: Union[Tuple[int, int], DomainRangeLike, None], - group: Union[Sequence[int], None], - group_colors: Union[Sequence[Any], None], - group_names: Union[Sequence[str], None], + group: Union[Sequence[Any], None], + group_colors: Union[Sequence[Any], TypedDict, None], + group_names: Union[Sequence[str], TypedDict, None], colormap_name: str = 'autumn', legend: bool = False, **kwargs: Any, @@ -367,9 +367,9 @@ def plot( n_rows: Optional[int] = None, n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], Sequence[Tuple[int, int]], None], - group: Union[Sequence[int], None], - group_colors: Union[Sequence[Any], None], - group_names: Union[Sequence[str], None], + group: Union[Sequence[Any], None], + group_colors: Union[Sequence[Any], TypedDict, None], + group_names: Union[Sequence[str], TypedDict, None], legend: bool = False, **kwargs: Any, ) -> Figure: From d936f2c0ef76108fc4f4533fe84016be8c00a8cb Mon Sep 17 00:00:00 2001 From: ElenaPetrunina <61758794+ElenaPetrunina@users.noreply.github.com> Date: Mon, 15 Mar 2021 18:24:43 +0100 Subject: [PATCH 144/417] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Ramos Carreño --- skfda/preprocessing/smoothing/kernel_smoothers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/preprocessing/smoothing/kernel_smoothers.py b/skfda/preprocessing/smoothing/kernel_smoothers.py index 5f099badc..2bf48ad03 100644 --- a/skfda/preprocessing/smoothing/kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/kernel_smoothers.py @@ -85,7 +85,7 @@ class NadarayaWatsonSmoother(_LinearKernelSmoother): values :math:`\hat{X}` at the points :math:`(t_1', t_2', ..., t_m')` can be calculated as :math:`\hat{X} = \hat{H}X` where :math:`X` is the vector of observations at the points of discretisation - :math:`(t_1, t_2, ..., t_n)`. + :math:`(t_1, t_2, ..., t_n)` and .. math:: \hat{H}_{i,j} = \frac{K\left(\frac{t_j-t_i'}{h}\right)}{\sum_{k=1}^{ @@ -185,7 +185,7 @@ class LocalLinearRegressionSmoother(_LinearKernelSmoother): values :math:`\hat{X}` at the points :math:`(t_1', t_2', ..., t_m')` can be calculated as :math:`\hat{X} = \hat{H}X` where :math:`X` is the vector of observations at the points of discretisation - :math:`(t_1, t_2, ..., t_n)`. + :math:`(t_1, t_2, ..., t_n)` and .. math:: \hat{H}_{i,j} = \frac{b_j(t_i')}{\sum_{k=1}^{n}b_k(t_i')} @@ -292,7 +292,7 @@ class KNeighborsSmoother(_LinearKernelSmoother): The smoothed values :math:`\hat{X}` at the points :math:`(t_1', t_2', ..., t_m')` can be calculated as :math:`\hat{X} = \hat{H}X` where :math:`X` is the vector of observations - at the points of discretisation :math:`(t_1, t_2, ..., t_n)`. + at the points of discretisation :math:`(t_1, t_2, ..., t_n)` and .. math:: From 4da5e6411ae08c3b7f7b5e9d43e7f9ea8b693c52 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 18 Mar 2021 12:57:02 +0100 Subject: [PATCH 145/417] Typing synthetic datasets and grid. --- docs/conf.py | 5 + skfda/datasets/_samples_generators.py | 359 ++++++++++++++++---------- skfda/representation/_typing.py | 7 +- skfda/representation/grid.py | 95 +++---- 4 files changed, 284 insertions(+), 182 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 46bc37d2a..d2e2e6d63 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -244,6 +244,11 @@ autodoc_typehints = "description" napoleon_use_rtype = True +autodoc_type_aliases = { + "ArrayLike": "ArrayLike", + "GridPointsLike": "Union[ArrayLike, Sequence[ArrayLike]]", +} + # Napoleon fix for attributes # Taken from # https://michaelgoerz.net/notes/extending-sphinx-napoleon-docstring-sections.html diff --git a/skfda/datasets/_samples_generators.py b/skfda/datasets/_samples_generators.py index fd038c1ea..1fd21cc6b 100644 --- a/skfda/datasets/_samples_generators.py +++ b/skfda/datasets/_samples_generators.py @@ -1,51 +1,70 @@ -import scipy.integrate -from scipy.stats import multivariate_normal -import sklearn.utils +import itertools +from typing import Callable, Optional, Sequence, Union import numpy as np +import sklearn.utils + +import scipy.integrate +from scipy.stats import multivariate_normal from .. import FDataGrid -from .._utils import _cartesian_product +from .._utils import RandomStateLike, _cartesian_product, _to_grid_points from ..misc import covariances from ..preprocessing.registration import normalize_warping +from ..representation._typing import DomainRangeLike, GridPointsLike from ..representation.interpolation import SplineInterpolation +MeanCallable = Callable[[np.ndarray], np.ndarray] +CovarianceCallable = Callable[[np.ndarray, np.ndarray], np.ndarray] + +MeanLike = Union[float, np.ndarray, MeanCallable] +CovarianceLike = Union[None, np.ndarray, CovarianceCallable] -def make_gaussian(n_samples: int = 100, *, - grid_points, - domain_range=None, - mean=0, cov=None, noise: float = 0., - random_state=None): - """Generate Gaussian random fields. - - Args: - n_samples: The total number of trajectories. - grid_points: Sample points for the evaluation grid of the - Gaussian field. - mean: The mean function of the random field. Can be a callable - accepting a vector with the locations, or a vector with - appropriate size. - cov: The covariance function of the process. Can be a - callable accepting two vectors with the locations, or a - matrix with appropriate size. By default, - the Brownian covariance function is used. - noise: Standard deviation of Gaussian noise added to the data. - random_state: Random state. - - Returns: - :class:`FDataGrid` object comprising all the trajectories. - - See also: - :func:`make_gaussian_process`: Simpler function for generating - Gaussian processes. +def make_gaussian( + n_samples: int = 100, + *, + grid_points: GridPointsLike, + domain_range: Optional[DomainRangeLike] = None, + mean: MeanLike = 0, + cov: CovarianceLike = None, + noise: float = 0, + random_state: RandomStateLike = None, +) -> FDataGrid: """ + Generate Gaussian random fields. + Args: + n_samples: The total number of trajectories. + grid_points: Sample points for the evaluation grid of the + Gaussian field. + domain_range: The domain range of the returned functional + observations. + mean: The mean function of the random field. Can be a callable + accepting a vector with the locations, or a vector with + appropriate size. + cov: The covariance function of the process. Can be a + callable accepting two vectors with the locations, or a + matrix with appropriate size. By default, + the Brownian covariance function is used. + noise: Standard deviation of Gaussian noise added to the data. + random_state: Random state. + + Returns: + :class:`FDataGrid` object comprising all the trajectories. + + See also: + :func:`make_gaussian_process`: Simpler function for generating + Gaussian processes. + + """ random_state = sklearn.utils.check_random_state(random_state) if cov is None: cov = covariances.Brownian() + grid_points = _to_grid_points(grid_points) + input_points = _cartesian_product(grid_points) covariance = covariances._execute_covariance( @@ -61,61 +80,85 @@ def make_gaussian(n_samples: int = 100, *, mu += np.ravel(mean) data_matrix = random_state.multivariate_normal( - mu.ravel(), covariance, n_samples) + mu.ravel(), + covariance, + n_samples, + ) data_matrix = data_matrix.reshape( - [n_samples] + [len(t) for t in grid_points] + [-1]) - - return FDataGrid(grid_points=grid_points, data_matrix=data_matrix, - domain_range=domain_range) + [n_samples] + [len(t) for t in grid_points] + [-1], + ) + + return FDataGrid( + grid_points=grid_points, + data_matrix=data_matrix, + domain_range=domain_range, + ) + + +def make_gaussian_process( + n_samples: int = 100, + n_features: int = 100, + *, + start: float = 0, + stop: float = 1, + mean: MeanLike = 0, + cov: CovarianceLike = None, + noise: float = 0, + random_state: RandomStateLike = None, +) -> FDataGrid: + """Generate Gaussian process trajectories. + Args: + n_samples: The total number of trajectories. + n_features: The total number of features (points of evaluation). + start: Starting point of the trajectories. + stop: Ending point of the trajectories. + mean: The mean function of the process. Can be a callable accepting + a vector with the locations, or a vector with length + ``n_features``. + cov: The covariance function of the process. Can be a + callable accepting two vectors with the locations, or a + matrix with size ``n_features`` x ``n_features``. By default, + the Brownian covariance function is used. + noise: Standard deviation of Gaussian noise added to the data. + random_state: Random state. -def make_gaussian_process(n_samples: int = 100, n_features: int = 100, *, - start: float = 0., stop: float = 1., - mean=0, cov=None, noise: float = 0., - random_state=None): - """Generate Gaussian process trajectories. + Returns: + :class:`FDataGrid` object comprising all the trajectories. - Args: - n_samples: The total number of trajectories. - n_features: The total number of features (points of evaluation). - start: Starting point of the trajectories. - stop: Ending point of the trajectories. - mean: The mean function of the process. Can be a callable accepting - a vector with the locations, or a vector with length - ``n_features``. - cov: The covariance function of the process. Can be a - callable accepting two vectors with the locations, or a - matrix with size ``n_features`` x ``n_features``. By default, - the Brownian covariance function is used. - noise: Standard deviation of Gaussian noise added to the data. - random_state: Random state. - - Returns: - :class:`FDataGrid` object comprising all the trajectories. - - See also: - :func:`make_gaussian`: More general function that allows to - select the points of evaluation and to - generate data in higer dimensions. + See also: + :func:`make_gaussian`: More general function that allows to + select the points of evaluation and to + generate data in higer dimensions. """ - t = np.linspace(start, stop, n_features) - return make_gaussian(n_samples=n_samples, - grid_points=[t], - mean=mean, cov=cov, - noise=noise, - random_state=random_state) - - -def make_sinusoidal_process(n_samples: int = 15, n_features: int = 100, *, - start: float = 0., stop: float = 1., - period: float = 1., phase_mean: float = 0., - phase_std: float = .6, amplitude_mean: float = 1., - amplitude_std: float = .05, error_std: float = .2, - random_state=None): + return make_gaussian( + n_samples=n_samples, + grid_points=[t], + mean=mean, + cov=cov, + noise=noise, + random_state=random_state, + ) + + +def make_sinusoidal_process( + n_samples: int = 15, + n_features: int = 100, + *, + start: float = 0, + stop: float = 1, + period: float = 1, + phase_mean: float = 0, + phase_std: float = 0.6, + amplitude_mean: float = 1, + amplitude_std: float = 0.05, + error_std: float = 0.2, + random_state: RandomStateLike = None, +) -> FDataGrid: r"""Generate sinusoidal proccess. Each sample :math:`x_i(t)` is generated as: @@ -146,16 +189,20 @@ def make_sinusoidal_process(n_samples: int = 15, n_features: int = 100, *, :class:`FDataGrid` object comprising all the samples. """ - random_state = sklearn.utils.check_random_state(random_state) t = np.linspace(start, stop, n_features) - alpha = np.diag(random_state.normal(amplitude_mean, amplitude_std, - n_samples)) + alpha = np.diag(random_state.normal( + amplitude_mean, + amplitude_std, + n_samples, + )) - phi = np.outer(random_state.normal(phase_mean, phase_std, n_samples), - np.ones(n_features)) + phi = np.outer( + random_state.normal(phase_mean, phase_std, n_samples), + np.ones(n_features), + ) error = random_state.normal(0, error_std, (n_samples, n_features)) @@ -164,10 +211,17 @@ def make_sinusoidal_process(n_samples: int = 15, n_features: int = 100, *, return FDataGrid(grid_points=t, data_matrix=y) -def make_multimodal_landmarks(n_samples: int = 15, *, n_modes: int = 1, - dim_domain: int = 1, dim_codomain: int = 1, - start: float = -1, stop: float = 1, - std: float = .05, random_state=None): +def make_multimodal_landmarks( + n_samples: int = 15, + *, + n_modes: int = 1, + dim_domain: int = 1, + dim_codomain: int = 1, + start: float = -1, + stop: float = 1, + std: float = 0.05, + random_state: RandomStateLike = None, +) -> np.ndarray: """Generate landmarks points. Used by :func:`make_multimodal_samples` to generate the location of the @@ -196,30 +250,43 @@ def make_multimodal_landmarks(n_samples: int = 15, *, n_modes: int = 1, :class:`np.ndarray` with the location of the modes, where the component (i,j,k) corresponds to the mode k of the image dimension j of the sample i. - """ + """ random_state = sklearn.utils.check_random_state(random_state) modes_location = np.linspace(start, stop, n_modes + 2)[1:-1] - modes_location = np.repeat(modes_location[:, np.newaxis], dim_domain, - axis=1) - - variation = random_state.multivariate_normal((0,) * dim_domain, - std * np.eye(dim_domain), - size=(n_samples, - dim_codomain, - n_modes)) + modes_location = np.repeat( + modes_location[:, np.newaxis], + dim_domain, + axis=1, + ) + + variation = random_state.multivariate_normal( + (0,) * dim_domain, + std * np.eye(dim_domain), + size=(n_samples, dim_codomain, n_modes), + ) return modes_location + variation -def make_multimodal_samples(n_samples: int = 15, *, n_modes: int = 1, - points_per_dim: int = 100, dim_domain: int = 1, - dim_codomain: int = 1, start: float = -1, - stop: float = 1., std: float = .05, - mode_std: float = .02, noise: float = .0, - modes_location=None, random_state=None): - r"""Generate multimodal samples. +def make_multimodal_samples( + n_samples: int = 15, + *, + n_modes: int = 1, + points_per_dim: int = 100, + dim_domain: int = 1, + dim_codomain: int = 1, + start: float = -1, + stop: float = 1, + std: float = 0.05, + mode_std: float = 0.02, + noise: float = 0, + modes_location: Optional[Sequence[float]] = None, + random_state: RandomStateLike = None, +) -> FDataGrid: + r""" + Generate multimodal samples. Each sample :math:`x_i(t)` is proportional to a gaussian mixture, generated as the sum of multiple pdf of multivariate normal distributions with @@ -256,26 +323,28 @@ def make_multimodal_samples(n_samples: int = 15, *, n_modes: int = 1, Returns: :class:`FDataGrid` object comprising all the samples. - """ + """ random_state = sklearn.utils.check_random_state(random_state) if modes_location is None: - location = make_multimodal_landmarks(n_samples=n_samples, - n_modes=n_modes, - dim_domain=dim_domain, - dim_codomain=dim_codomain, - start=start, - stop=stop, - std=std, - random_state=random_state) + location = make_multimodal_landmarks( + n_samples=n_samples, + n_modes=n_modes, + dim_domain=dim_domain, + dim_codomain=dim_codomain, + start=start, + stop=stop, + std=std, + random_state=random_state, + ) else: - location = np.asarray(modes_location) - shape = (n_samples, dim_codomain, n_modes, dim_domain) - location = location.reshape(shape) + location = np.asarray(modes_location).reshape( + (n_samples, dim_codomain, n_modes, dim_domain), + ) axis = np.linspace(start, stop, points_per_dim) @@ -299,13 +368,16 @@ def make_multimodal_samples(n_samples: int = 15, *, n_modes: int = 1, # Covariance matrix of the samples cov = mode_std * np.eye(dim_domain) - import itertools - for i, j, k in itertools.product(range(n_samples), - range(dim_codomain), - range(n_modes)): - data_matrix[i, ..., j] += multivariate_normal.pdf(evaluation_grid, - location[i, j, k], - cov) + for i, j, k in itertools.product( + range(n_samples), + range(dim_codomain), + range(n_modes), + ): + data_matrix[i, ..., j] += multivariate_normal.pdf( + evaluation_grid, + location[i, j, k], + cov, + ) # Constant to make modes value aprox. 1 data_matrix *= (2 * np.pi * mode_std) ** (dim_domain / 2) @@ -315,16 +387,23 @@ def make_multimodal_samples(n_samples: int = 15, *, n_modes: int = 1, return FDataGrid(grid_points=grid_points, data_matrix=data_matrix) -def make_random_warping(n_samples: int = 15, n_features: int = 100, *, - start: float = 0., stop: float = 1., sigma: float = 1., - shape_parameter: float = 50, n_random: int = 4, - random_state=None): +def make_random_warping( + n_samples: int = 15, + n_features: int = 100, + *, + start: float = 0, + stop: float = 1, + sigma: float = 1, + shape_parameter: float = 50, + n_random: int = 4, + random_state: RandomStateLike = None, +) -> FDataGrid: r"""Generate random warping functions. - Let :math:`v(t)` be a randomly generated function defined in :math:`[0,1]` + Let :math:`v(t)` be a randomly generated function defined in :math:`[0,1]` - .. math:: - v(t) = \sum_{j=0}^{N} a_j \sin(\frac{2 \pi j}{K}t) + b_j + .. math:: + sv(t) = \sum_{j=0}^{N} a_j \sin(\frac{2 \pi j}{K}t) + b_j \cos(\frac{2 \pi j}{K}t) where :math:`a_j, b_j \sim N(0, \sigma)`. @@ -357,9 +436,9 @@ def make_random_warping(n_samples: int = 15, n_features: int = 100, *, random_state: Random state. Returns: - :class:`FDataGrid` object comprising all the samples. + Object comprising all the samples. - """ + """ # Based on the original implementation of J. D. Tucker in the # package python_fdasrsf . @@ -376,8 +455,10 @@ def make_random_warping(n_samples: int = 15, n_features: int = 100, *, time = np.outer(np.linspace(0, 1, n_features), np.ones(n_samples)) # Operates trasposed to broadcast dimensions - v = np.outer(np.ones(n_features), - random_state.normal(scale=sqrt_sigma, size=n_samples)) + v = np.outer( + np.ones(n_features), + random_state.normal(scale=sqrt_sigma, size=n_samples), + ) for j in range(2, 2 + n_random): alpha = random_state.normal(scale=sqrt_sigma, size=(2, n_samples)) @@ -397,11 +478,17 @@ def make_random_warping(n_samples: int = 15, n_features: int = 100, *, np.square(v, out=v) # Creation of FDataGrid in the corresponding domain - data_matrix = scipy.integrate.cumtrapz(v, dx=1. / n_features, initial=0, - axis=0) + data_matrix = scipy.integrate.cumtrapz( + v, + dx=1 / n_features, + initial=0, + axis=0, + ) warping = FDataGrid(data_matrix.T, grid_points=time[:, 0]) warping = normalize_warping(warping, domain_range=(start, stop)) - warping.interpolation = SplineInterpolation(interpolation_order=3, - monotone=True) + warping.interpolation = SplineInterpolation( + interpolation_order=3, + monotone=True, + ) return warping diff --git a/skfda/representation/_typing.py b/skfda/representation/_typing.py index 1c14ffbdb..c7228b3d6 100644 --- a/skfda/representation/_typing.py +++ b/skfda/representation/_typing.py @@ -4,6 +4,11 @@ import numpy as np from typing_extensions import Protocol +try: + from numpy.typing import ArrayLike +except ImportError: + ArrayLike = np.ndarray # type:ignore + VectorType = TypeVar("VectorType") DomainRange = Tuple[Tuple[float, float], ...] @@ -17,7 +22,7 @@ LabelTupleLike = Sequence[Optional[str]] GridPoints = Tuple[np.ndarray, ...] -GridPointsLike = Sequence[np.ndarray] +GridPointsLike = Union[ArrayLike, Sequence[ArrayLike]] class Vector(Protocol): diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 5475cda49..bec9b5e26 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -15,9 +15,10 @@ import findiff import numpy as np import pandas.api.extensions -import scipy.stats.mstats from matplotlib.figure import Figure +import scipy.stats.mstats + from .._utils import ( _check_array_key, _int_to_real, @@ -27,6 +28,7 @@ ) from ._functional_data import FData from ._typing import ( + ArrayLike, DomainRange, DomainRangeLike, GridPoints, @@ -122,7 +124,7 @@ class FDataGrid(FData): # noqa: WPS214 def __init__( # noqa: WPS211 self, - data_matrix: np.ndarray, + data_matrix: ArrayLike, grid_points: Optional[GridPointsLike] = None, *, sample_points: Optional[GridPointsLike] = None, @@ -149,7 +151,7 @@ def __init__( # noqa: WPS211 if grid_points is None: self.grid_points = _to_grid_points([ - np.linspace(0.0, 1.0, self.data_matrix.shape[i]) + np.linspace(0, 1, self.data_matrix.shape[i]) for i in range(1, self.data_matrix.ndim) ]) @@ -169,8 +171,8 @@ def __init__( # noqa: WPS211 f"points have shape {grid_points_shape}", ) - self._sample_range = np.array( - [(s[0], s[-1]) for s in self.grid_points], + self._sample_range = tuple( + (s[0], s[-1]) for s in self.grid_points ) if domain_range is None: @@ -527,7 +529,7 @@ def var(self: T) -> T: """ return self.copy( - data_matrix=[np.var(self.data_matrix, 0)], + data_matrix=np.array([np.var(self.data_matrix, 0)]), sample_names=("variance",), ) @@ -610,7 +612,7 @@ def equals(self, other: Any) -> bool: return True - def __eq__(self, other: Any) -> np.ndarray: + def __eq__(self, other: Any) -> np.ndarray: # type: ignore[override] """Elementwise equality of FDataGrid.""" if not isinstance(other, type(self)) or self.dtype != other.dtype: if other is pandas.NA: @@ -637,9 +639,9 @@ def __eq__(self, other: Any) -> np.ndarray: def _get_op_matrix( self, other: Union[T, np.ndarray, float], - ) -> Optional[np.ndarray]: + ) -> Union[None, float, np.ndarray]: if isinstance(other, numbers.Number): - return other + return float(other) elif isinstance(other, np.ndarray): if other.shape in {(), (1,)}: @@ -892,7 +894,7 @@ def copy( # noqa: WPS211 self: T, *, deep: bool = False, # For Pandas compatibility - data_matrix: Optional[np.ndarray] = None, + data_matrix: Optional[ArrayLike] = None, grid_points: Optional[GridPointsLike] = None, sample_points: Optional[GridPointsLike] = None, domain_range: Optional[DomainRangeLike] = None, @@ -968,7 +970,7 @@ def shift( *, restrict_domain: bool = False, extrapolation: Optional[Union[str, Evaluator]] = None, - eval_points: np.ndarray = None, + eval_points: Optional[np.ndarray] = None, ) -> T: """Perform a shift of the curves. @@ -993,45 +995,50 @@ def shift( :class:`FDataGrid` with the shifted data. """ - if np.isscalar(shifts): - shifts = [shifts] - - shifts = np.array(shifts) + arr_shifts = np.array([shifts] if np.isscalar(shifts) else shifts) # Case unidimensional treated as the multidimensional - if self.dim_domain == 1 and shifts.ndim == 1 and shifts.shape[0] != 1: - shifts = shifts[:, np.newaxis] + if ( + self.dim_domain == 1 + and arr_shifts.ndim == 1 + and arr_shifts.shape[0] != 1 + ): + arr_shifts = arr_shifts[:, np.newaxis] # Case same shift for all the curves - if shifts.shape[0] == self.dim_domain and shifts.ndim == 1: + if arr_shifts.shape[0] == self.dim_domain and arr_shifts.ndim == 1: # Column vector with shapes - shifts = np.atleast_2d(shifts).T + arr_shifts = np.atleast_2d(arr_shifts).T - grid_points = self.grid_points + shifts - domain_range = self.domain_range + shifts + grid_points = self.grid_points + arr_shifts + domain_range = self.domain_range + arr_shifts return self.copy( grid_points=grid_points, domain_range=domain_range, ) - if shifts.shape[0] != self.n_samples: + if arr_shifts.shape[0] != self.n_samples: raise ValueError( - f"shifts vector ({shifts.shape[0]}) must have the" + f"shifts vector ({arr_shifts.shape[0]}) must have the" f" same length than the number of samples " f"({self.n_samples})", ) - if eval_points is None: - eval_points = self.grid_points - else: - eval_points = np.atleast_2d(eval_points) + eval_points = ( + self.grid_points if eval_points is None + else np.atleast_2d(eval_points) + ) if restrict_domain: domain = np.asarray(self.domain_range) - a = domain[:, 0] - np.atleast_1d(np.min(np.min(shifts, axis=1), 0)) - b = domain[:, 1] - np.atleast_1d(np.max(np.max(shifts, axis=1), 0)) + a = domain[:, 0] - np.atleast_1d( + np.min(np.min(arr_shifts, axis=1), 0), + ) + b = domain[:, 1] - np.atleast_1d( + np.max(np.max(arr_shifts, axis=1), 0), + ) domain = np.vstack((a, b)).T @@ -1057,15 +1064,15 @@ def shift( # Solve problem with cartesian and matrix indexing if self.dim_domain > 1: - shifts[:, :2] = np.flip(shifts[:, :2], axis=1) + arr_shifts[:, :2] = np.flip(arr_shifts[:, :2], axis=1) - shifts = np.repeat( - shifts[..., np.newaxis], + arr_shifts = np.repeat( + arr_shifts[..., np.newaxis], eval_points.shape[1], axis=2, ) - eval_points_shifted = eval_points_repeat + shifts + eval_points_shifted = eval_points_repeat + arr_shifts data_matrix = self.evaluate( eval_points_shifted, @@ -1084,7 +1091,7 @@ def compose( self: T, fd: T, *, - eval_points: np.ndarray = None, + eval_points: Optional[GridPointsLike] = None, ) -> T: """Composition of functions. @@ -1172,14 +1179,14 @@ def __repr__(self) -> str: """Return repr(self).""" return ( f"FDataGrid(" # noqa: WPS221 - f"\n{repr(self.data_matrix)}," - f"\ngrid_points={repr(self.grid_points)}," - f"\ndomain_range={repr(self.domain_range)}," - f"\ndataset_name={repr(self.dataset_name)}," - f"\nargument_names={repr(self.argument_names)}," - f"\ncoordinate_names={repr(self.coordinate_names)}," - f"\nextrapolation={repr(self.extrapolation)}," - f"\ninterpolation={repr(self.interpolation)})" + f"\n{self.data_matrix!r}," + f"\ngrid_points={self.grid_points!r}," + f"\ndomain_range={self.domain_range!r}," + f"\ndataset_name={self.dataset_name!r}," + f"\nargument_names={self.argument_names!r}," + f"\ncoordinate_names={self.coordinate_names!r}," + f"\nextrapolation={self.extrapolation!r}," + f"\ninterpolation={self.interpolation!r})" ).replace( '\n', '\n ', @@ -1297,9 +1304,7 @@ def __init__( self.grid_points = tuple(tuple(s) for s in grid_points) if domain_range is None: - domain_range = np.array( - [(s[0], s[-1]) for s in self.grid_points], - ) + domain_range = tuple((s[0], s[-1]) for s in self.grid_points) self.domain_range = _to_domain_range(domain_range) self.dim_codomain = dim_codomain From 9ce70889c81deacb7f0877c4f0817f6eef421d57 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 19 Mar 2021 19:10:32 +0100 Subject: [PATCH 146/417] Typing for interpolation. --- skfda/_utils/_utils.py | 22 +- skfda/representation/evaluator.py | 13 +- skfda/representation/interpolation.py | 413 ++++++++++++++++---------- 3 files changed, 282 insertions(+), 166 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 5f480c17f..c8481b78e 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -17,10 +17,12 @@ ) import numpy as np -import scipy.integrate from pandas.api.indexers import check_array_indexer +import scipy.integrate + from ..representation._typing import ( + ArrayLike, DomainRange, DomainRangeLike, GridPoints, @@ -156,7 +158,11 @@ def _to_domain_range(sequence: DomainRangeLike) -> DomainRange: return cast(DomainRange, tuple_aux) -def _to_array_maybe_ragged(array, *, row_shape=None): +def _to_array_maybe_ragged( + array: Sequence[ArrayLike], + *, + row_shape: Optional[Sequence[int]] = None, +) -> np.ndarray: """ Convert to an array where each element may or may not be of equal length. @@ -164,7 +170,7 @@ def _to_array_maybe_ragged(array, *, row_shape=None): Otherwise it is a ragged array. """ - def convert_row(row): + def convert_row(row: ArrayLike) -> np.ndarray: r = np.array(row) if row_shape is not None: @@ -177,13 +183,13 @@ def convert_row(row): if all(s == shapes[0] for s in shapes): return np.array(array_list) - else: - res = np.empty(len(array_list), dtype=np.object_) - for i, a in enumerate(array_list): - res[i] = a + res = np.empty(len(array_list), dtype=np.object_) - return res + for i, a in enumerate(array_list): + res[i] = a + + return res def _cartesian_product( diff --git a/skfda/representation/evaluator.py b/skfda/representation/evaluator.py index 24ad1f1bc..a167c2d2c 100644 --- a/skfda/representation/evaluator.py +++ b/skfda/representation/evaluator.py @@ -5,12 +5,17 @@ evaluation of FDataGrids. """ +from __future__ import annotations + from abc import ABC, abstractmethod -from typing import Any, Callable +from typing import TYPE_CHECKING, Any import numpy as np from typing_extensions import Protocol +if TYPE_CHECKING: + from . import FData + class Evaluator(ABC): """ @@ -32,7 +37,7 @@ class Evaluator(ABC): @abstractmethod def evaluate( self, - fdata: Callable[[np.ndarray], np.ndarray], + fdata: FData, eval_points: np.ndarray, *, aligned: bool = True, @@ -76,7 +81,7 @@ class EvaluateFunction(Protocol): def __call__( self, - fdata: Callable[[np.ndarray], np.ndarray], + fdata: FData, eval_points: np.ndarray, *, aligned: bool = True, @@ -122,7 +127,7 @@ def __init__(self, evaluate_function: EvaluateFunction) -> None: def evaluate( # noqa: D102 self, - fdata: Callable[[np.ndarray], np.ndarray], + fdata: FData, eval_points: np.ndarray, *, aligned: bool = True, diff --git a/skfda/representation/interpolation.py b/skfda/representation/interpolation.py index 0384b5dd0..12a0e87a3 100644 --- a/skfda/representation/interpolation.py +++ b/skfda/representation/interpolation.py @@ -1,62 +1,107 @@ """ Module to interpolate functional data objects. """ - +from __future__ import annotations import abc - -from scipy.interpolate import (PchipInterpolator, UnivariateSpline, - RectBivariateSpline, RegularGridInterpolator) +from typing import TYPE_CHECKING, Any, Callable, Sequence, Tuple, Union import numpy as np +from scipy.interpolate import ( + PchipInterpolator, + RectBivariateSpline, + RegularGridInterpolator, + UnivariateSpline, +) + from .._utils import _to_array_maybe_ragged from .evaluator import Evaluator +if TYPE_CHECKING: + from . import FData + +SplineCallable = Callable[..., np.ndarray] + class _SplineList(abc.ABC): - r"""ABC for list of interpolations.""" + """ABC for list of interpolations.""" - def __init__(self, fdatagrid, - interpolation_order=1, - smoothness_parameter=0.): + def __init__( + self, + fdatagrid: FData, + interpolation_order: Union[int, Sequence[int]] = 1, + smoothness_parameter: float = 0, + ): super().__init__() self.fdatagrid = fdatagrid self.interpolation_order = interpolation_order self.smoothness_parameter = smoothness_parameter + self.splines: Sequence[Sequence[SplineCallable]] + + # @abc.abstractmethod + # @property + # def splines(self) -> Sequence[SplineCallable]: + # pass @abc.abstractmethod - def _evaluate_one(self, spl, t, derivative=0): - """Evaluates one spline of the list.""" + def _evaluate_one( + self, + spline: SplineCallable, + eval_points: np.ndarray, + ) -> np.ndarray: + """Evaluate one spline of the list.""" pass - def _evaluate_codomain(self, spl_m, t, derivative=0): - """Evaluator of multidimensional sample""" - return np.array([self._evaluate_one(spl, t, derivative) - for spl in spl_m]).T - - def evaluate(self, fdata, eval_points, *, derivative=0, aligned=True): + def _evaluate_codomain( + self, + spline_list: Sequence[SplineCallable], + eval_points: np.ndarray, + ) -> np.ndarray: + """Evaluate a multidimensional sample.""" + return np.array([ + self._evaluate_one(spl, eval_points) + for spl in spline_list + ]).T + + def evaluate( + self, + fdata: FData, + eval_points: np.ndarray, + *, + aligned: bool = True, + ) -> np.ndarray: + + res: np.ndarray if aligned: # Points evaluated inside the domain res = np.apply_along_axis( - self._evaluate_codomain, 1, - self.splines, eval_points, derivative) - res = res.reshape(fdata.n_samples, eval_points.shape[0], - fdata.dim_codomain) + self._evaluate_codomain, + 1, + self.splines, + eval_points, + ) + + res = res.reshape( + fdata.n_samples, + eval_points.shape[0], + fdata.dim_codomain, + ) else: - res = _to_array_maybe_ragged([self._evaluate_codomain( - s, e, derivative=derivative) - for s, e in zip(self.splines, eval_points)]) + res = _to_array_maybe_ragged([ + self._evaluate_codomain(s, e) + for s, e in zip(self.splines, eval_points) + ]) return res class _SplineList1D(_SplineList): - r"""List of interpolations for curves. + """List of interpolations for curves. List of interpolations for objects with domain dimension = 1. Calling internally during the creation of the @@ -92,33 +137,45 @@ class _SplineList1D(_SplineList): """ - def __init__(self, fdatagrid, - interpolation_order=1, - smoothness_parameter=0., - monotone=False): + def __init__( + self, + fdatagrid: FData, + interpolation_order: Union[int, Sequence[int]] = 1, + smoothness_parameter: float = 0, + monotone: bool = False, + ): super().__init__( fdatagrid=fdatagrid, interpolation_order=interpolation_order, - smoothness_parameter=smoothness_parameter) + smoothness_parameter=smoothness_parameter, + ) self.monotone = monotone - if self.interpolation_order > 5 or self.interpolation_order < 1: - raise ValueError(f"Invalid degree of interpolation " - f"({self.interpolation_order}). Must be " - f"an integer greater than 0 and lower or " - f"equal than 5.") + if ( + isinstance(self.interpolation_order, Sequence) + or not 1 <= self.interpolation_order <= 5 + ): + raise ValueError( + f"Invalid degree of interpolation " + f"({self.interpolation_order}). Must be " + f"an integer greater than 0 and lower or " + f"equal than 5.", + ) if self.monotone and self.smoothness_parameter != 0: - raise ValueError("Smoothing interpolation is not supported with " - "monotone interpolation") - - if self.monotone and (self.interpolation_order == 2 - or self.interpolation_order == 4): - raise ValueError(f"monotone interpolation of degree " - f"{self.interpolation_order}" - f"not supported.") + raise ValueError( + "Smoothing interpolation is not supported with " + "monotone interpolation", + ) + + if self.monotone and self.interpolation_order in {2, 4}: + raise ValueError( + f"monotone interpolation of degree " + f"{self.interpolation_order}" + f"not supported.", + ) # Monotone interpolation of degree 1 is performed with linear spline monotone = self.monotone @@ -128,31 +185,44 @@ def __init__(self, fdatagrid, grid_points = fdatagrid.grid_points[0] if monotone: - def constructor(data): - """Constructs an unidimensional cubic monotone interpolation""" + def constructor( # noqa: WPS430 + data: np.ndarray, + ) -> SplineCallable: + """Construct an unidimensional cubic monotone interpolation.""" return PchipInterpolator(grid_points, data) else: - def constructor(data): - """Constructs an unidimensional interpolation""" + def constructor( # noqa: WPS430, WPS440 + data: np.ndarray, + ) -> SplineCallable: + """Construct an unidimensional interpolation.""" return UnivariateSpline( - grid_points, data, + grid_points, + data, s=self.smoothness_parameter, - k=self.interpolation_order) + k=self.interpolation_order, + ) self.splines = np.apply_along_axis( - constructor, 1, fdatagrid.data_matrix) - - def _evaluate_one(self, spl, t, derivative=0): + constructor, + 1, + fdatagrid.data_matrix, + ) + + def _evaluate_one( + self, + spline: SplineCallable, + eval_points: np.ndarray, + ) -> np.ndarray: try: - return spl(t, derivative)[:, 0] + return spline(eval_points)[:, 0] except ValueError: - return np.zeros_like(t) + return np.zeros_like(eval_points) class _SplineList2D(_SplineList): - r"""List of interpolations for surfaces. + """List of interpolations for surfaces. List of interpolations for objects with domain dimension = 2. Calling internally during the creationg of the @@ -187,54 +257,70 @@ class _SplineList2D(_SplineList): """ - def __init__(self, fdatagrid, - interpolation_order=1, - smoothness_parameter=0.): + def __init__( + self, + fdatagrid: FData, + interpolation_order: Union[int, Sequence[int]] = 1, + smoothness_parameter: float = 0, + ): super().__init__( fdatagrid=fdatagrid, interpolation_order=interpolation_order, - smoothness_parameter=smoothness_parameter) + smoothness_parameter=smoothness_parameter, + ) - if np.isscalar(self.interpolation_order): - kx = ky = self.interpolation_order - elif len(self.interpolation_order) != 2: - raise ValueError("k should be numeric or a tuple of length 2.") - else: + if isinstance(self.interpolation_order, int): + kx = self.interpolation_order + ky = kx + elif len(self.interpolation_order) == 2: kx = self.interpolation_order[0] ky = self.interpolation_order[1] + else: + raise ValueError("k should be numeric or a tuple of length 2.") if kx > 5 or kx <= 0 or ky > 5 or ky <= 0: - raise ValueError(f"Invalid degree of interpolation ({kx},{ky}). " - f"Must be an integer greater than 0 and lower or " - f"equal than 5.") + raise ValueError( + f"Invalid degree of interpolation ({kx},{ky}). " + f"Must be an integer greater than 0 and lower or " + f"equal than 5.", + ) # Matrix of splines - self.splines = np.empty( - (fdatagrid.n_samples, fdatagrid.dim_codomain), dtype=object) + splines = np.empty( + (fdatagrid.n_samples, fdatagrid.dim_codomain), + dtype=object, + ) for i in range(fdatagrid.n_samples): for j in range(fdatagrid.dim_codomain): - self.splines[i, j] = RectBivariateSpline( + splines[i, j] = RectBivariateSpline( fdatagrid.grid_points[0], fdatagrid.grid_points[1], fdatagrid.data_matrix[i, :, :, j], - kx=kx, ky=ky, - s=self.smoothness_parameter) + kx=kx, + ky=ky, + s=self.smoothness_parameter, + ) + + self.splines = splines - def _evaluate_one(self, spl, t, derivative=0): - if np.isscalar(derivative): - derivative = 2 * [derivative] - elif len(derivative) != 2: - raise ValueError("derivative should be a numeric value " - "or a tuple of length 2 with (dx,dy).") + def _evaluate_one( + self, + spline: SplineCallable, + eval_points: np.ndarray, + ) -> np.ndarray: - return spl(t[:, 0], t[:, 1], dx=derivative[0], dy=derivative[1], - grid=False) + return spline( + eval_points[:, 0], + eval_points[:, 1], + grid=False, + ) class _SplineListND(_SplineList): - r"""List of interpolations. + """ + List of interpolations. List of interpolations for objects with domain dimension > 2. Calling internally during the creationg of the @@ -259,18 +345,23 @@ class _SplineListND(_SplineList): """ - def __init__(self, fdatagrid, - interpolation_order=1, - smoothness_parameter=0.): - + def __init__( + self, + fdatagrid: FData, + interpolation_order: Union[int, Sequence[int]] = 1, + smoothness_parameter: float = 0, + ) -> None: super().__init__( fdatagrid=fdatagrid, interpolation_order=interpolation_order, - smoothness_parameter=smoothness_parameter) + smoothness_parameter=smoothness_parameter, + ) if self.smoothness_parameter != 0: - raise ValueError("Smoothing interpolation is only supported with " - "domain dimension up to 2, s should be 0.") + raise ValueError( + "Smoothing interpolation is only supported with " + "domain dimension up to 2.", + ) # Parses method of interpolation if self.interpolation_order == 0: @@ -278,29 +369,38 @@ def __init__(self, fdatagrid, elif self.interpolation_order == 1: method = 'linear' else: - raise ValueError("interpolation order should be 0 (nearest) or 1 " - "(linear).") + raise ValueError( + "interpolation order should be 0 (nearest) or 1 (linear).", + ) - self.splines = np.empty( - (fdatagrid.n_samples, fdatagrid.dim_codomain), dtype=object) + splines = np.empty( + (fdatagrid.n_samples, fdatagrid.dim_codomain), + dtype=object, + ) for i in range(fdatagrid.n_samples): for j in range(fdatagrid.dim_codomain): - self.splines[i, j] = RegularGridInterpolator( - fdatagrid.grid_points, fdatagrid.data_matrix[i, ..., j], - method, False) + splines[i, j] = RegularGridInterpolator( + fdatagrid.grid_points, + fdatagrid.data_matrix[i, ..., j], + method=method, + bounds_error=False, + ) - def _evaluate_one(self, spl, t, derivative=0): + self.splines = splines - if derivative != 0: - raise ValueError("derivates not suported for functional data " - " with domain dimension greater than 2.") + def _evaluate_one( + self, + spline: SplineCallable, + eval_points: np.ndarray, + ) -> np.ndarray: - return spl(t) + return spline(eval_points) class SplineInterpolation(Evaluator): - r"""Spline interpolation of :class:`FDataGrid`. + """ + Spline interpolation. Spline interpolation of discretized functional objects. Implements different interpolation methods based in splines, using the sample @@ -326,88 +426,93 @@ class SplineInterpolation(Evaluator): """ - def __init__(self, interpolation_order=1, *, smoothness_parameter=0., - monotone=False): - r"""Constructor of the SplineInterpolation. - - Args: - interpolation_order (int, optional): Order of the interpolation, 1 - for linear interpolation, 2 for cuadratic, 3 for cubic and so - on. In case of curves and surfaces there is available - interpolation up to degree 5. For higher dimensional objects - only linear or nearest interpolation is available. Default - lineal interpolation. - smoothness_parameter (float, optional): Penalisation to perform - smoothness interpolation. Option only available for curves and - surfaces. If 0 the residuals of the interpolation will be 0. - Defaults 0. - monotone (boolean, optional): Performs monotone interpolation in - curves using a PCHIP interpolation. Only valid for curves - (domain dimension equal to 1) and interpolation order equal - to 1 or 3. - Defaults false. - - """ + def __init__( + self, + interpolation_order: Union[int, Sequence[int]] = 1, + *, + smoothness_parameter: float = 0, + monotone: bool = False, + ) -> None: self._interpolation_order = interpolation_order self._smoothness_parameter = smoothness_parameter self._monotone = monotone @property - def interpolation_order(self): - "Returns the interpolation order" - return self._interpolation_order + def interpolation_order(self) -> Union[int, Tuple[int, ...]]: + """Interpolation order.""" + + return ( + self._interpolation_order + if isinstance(self._interpolation_order, int) + else tuple(self._interpolation_order) + ) @property - def smoothness_parameter(self): - "Returns the smoothness parameter" + def smoothness_parameter(self) -> float: + """Smoothness parameter.""" return self._smoothness_parameter @property - def monotone(self): - "Returns flag to perform monotone interpolation" + def monotone(self) -> bool: + """Flag to perform monotone interpolation.""" return self._monotone - def _build_interpolator(self, fdatagrid): + def _build_interpolator( + self, + fdatagrid: FData, + ) -> _SplineList: if fdatagrid.dim_domain == 1: return _SplineList1D( fdatagrid=fdatagrid, interpolation_order=self.interpolation_order, smoothness_parameter=self.smoothness_parameter, - monotone=self.monotone) + monotone=self.monotone, + ) elif self.monotone: - raise ValueError("Monotone interpolation is only supported with " - "domain dimension equal to 1.") + raise ValueError( + "Monotone interpolation is only supported with " + "domain dimension equal to 1.", + ) elif fdatagrid.dim_domain == 2: return _SplineList2D( fdatagrid=fdatagrid, interpolation_order=self.interpolation_order, - smoothness_parameter=self.smoothness_parameter) - - else: - return _SplineListND( - fdatagrid=fdatagrid, - interpolation_order=self.interpolation_order, - smoothness_parameter=self.smoothness_parameter) + smoothness_parameter=self.smoothness_parameter, + ) - def evaluate(self, fdata, eval_points, *, aligned=True): + return _SplineListND( + fdatagrid=fdatagrid, + interpolation_order=self.interpolation_order, + smoothness_parameter=self.smoothness_parameter, + ) + + def evaluate( # noqa: D102 + self, + fdata: FData, + eval_points: np.ndarray, + *, + aligned: bool = True, + ) -> np.ndarray: spline_list = self._build_interpolator(fdata) return spline_list.evaluate(fdata, eval_points, aligned=aligned) - def __repr__(self): - """repr method of the interpolation""" - return (f"{type(self).__name__}(" - f"interpolation_order={self.interpolation_order}, " - f"smoothness_parameter={self.smoothness_parameter}, " - f"monotone={self.monotone})") - - def __eq__(self, other): - """Equality operator between SplineInterpolation""" - return (super().__eq__(other) and - self.interpolation_order == other.interpolation_order and - self.smoothness_parameter == other.smoothness_parameter and - self.monotone == other.monotone) + def __repr__(self) -> str: + return ( + f"{type(self).__name__}(" + f"interpolation_order={self.interpolation_order}, " + f"smoothness_parameter={self.smoothness_parameter}, " + f"monotone={self.monotone})" + ) + + def __eq__(self, other: Any) -> bool: + return ( + super().__eq__(other) + and self.interpolation_order == other.interpolation_order + and self.smoothness_parameter == other.smoothness_parameter + and self.monotone == other.monotone + ) From 9e3929e209decedf4486b6095678ca626ec09bf2 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 20 Mar 2021 13:09:13 +0100 Subject: [PATCH 147/417] Typing extrapolation. --- skfda/representation/extrapolation.py | 110 ++++++++++++++++---------- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/skfda/representation/extrapolation.py b/skfda/representation/extrapolation.py index e9e761546..7cbb00324 100644 --- a/skfda/representation/extrapolation.py +++ b/skfda/representation/extrapolation.py @@ -3,19 +3,22 @@ Defines methods to evaluate points outside the :term:`domain` range. """ +from __future__ import annotations -from typing import Optional, Union +from typing import TYPE_CHECKING, Any, NoReturn, Optional, Union import numpy as np from .evaluator import Evaluator +if TYPE_CHECKING: + from . import FData + class PeriodicExtrapolation(Evaluator): - """Extends the :term:`domain` range periodically. + """Extend the :term:`domain` range periodically. Examples: - >>> from skfda.datasets import make_sinusoidal_process >>> from skfda.representation.extrapolation import ( ... PeriodicExtrapolation) @@ -44,7 +47,13 @@ class PeriodicExtrapolation(Evaluator): [-1.086]]]) """ - def evaluate(self, fdata, eval_points, *, aligned=True): + def evaluate( # noqa: D102 + self, + fdata: FData, + eval_points: np.ndarray, + *, + aligned: bool = True, + ) -> np.ndarray: domain_range = np.asarray(fdata.domain_range) @@ -53,16 +62,13 @@ def evaluate(self, fdata, eval_points, *, aligned=True): eval_points %= domain_range[:, 1] - domain_range[:, 0] eval_points += domain_range[:, 0] - res = fdata(eval_points, aligned=aligned) - - return res + return fdata(eval_points, aligned=aligned) class BoundaryExtrapolation(Evaluator): - """Extends the :term:`domain` range using the boundary values. + """Extend the :term:`domain` range using the boundary values. Examples: - >>> from skfda.datasets import make_sinusoidal_process >>> from skfda.representation.extrapolation import ( ... BoundaryExtrapolation) @@ -91,7 +97,13 @@ class BoundaryExtrapolation(Evaluator): [ 1.125]]]) """ - def evaluate(self, fdata, eval_points, *, aligned=True): + def evaluate( # noqa: D102 + self, + fdata: FData, + eval_points: np.ndarray, + *, + aligned: bool = True, + ) -> np.ndarray: domain_range = fdata.domain_range @@ -100,16 +112,13 @@ def evaluate(self, fdata, eval_points, *, aligned=True): eval_points[eval_points[..., i] < a, i] = a eval_points[eval_points[..., i] > b, i] = b - res = fdata(eval_points, aligned=aligned) - - return res + return fdata(eval_points, aligned=aligned) class ExceptionExtrapolation(Evaluator): - """Raise and exception. + """Raise an exception. Examples: - >>> from skfda.datasets import make_sinusoidal_process >>> from skfda.representation.extrapolation import ( ... ExceptionExtrapolation) @@ -135,12 +144,19 @@ class ExceptionExtrapolation(Evaluator): """ - def evaluate(self, fdata, eval_points, *, aligned=True): + def evaluate( # noqa: D102 + self, + fdata: FData, + eval_points: np.ndarray, + *, + aligned: bool = True, + ) -> NoReturn: n_points = eval_points.shape[-2] - raise ValueError(f"Attempt to evaluate {n_points} points outside the " - f"domain range.") + raise ValueError( + f"Attempt to evaluate {n_points} points outside the domain range.", + ) class FillExtrapolation(Evaluator): @@ -148,7 +164,6 @@ class FillExtrapolation(Evaluator): Values outside the :term:`domain` range will be filled with a fixed value. Examples: - >>> from skfda.datasets import make_sinusoidal_process >>> from skfda.representation.extrapolation import FillExtrapolation >>> fd = make_sinusoidal_process(n_samples=2, random_state=0) @@ -177,30 +192,43 @@ class FillExtrapolation(Evaluator): [ nan]]]) """ - def __init__(self, fill_value): + def __init__(self, fill_value: float) -> None: self.fill_value = fill_value - def _fill(self, fdata, eval_points): - shape = (fdata.n_samples, eval_points.shape[-2], - fdata.dim_codomain) + def _fill(self, fdata: FData, eval_points: np.ndarray) -> np.ndarray: + shape = ( + fdata.n_samples, + eval_points.shape[-2], + fdata.dim_codomain, + ) return np.full(shape, self.fill_value) - def evaluate(self, fdata, eval_points, *, aligned=True): + def evaluate( # noqa: D102 + self, + fdata: FData, + eval_points: np.ndarray, + *, + aligned: bool = True, + ) -> np.ndarray: return self._fill(fdata, eval_points) - def __repr__(self): - """repr method of FillExtrapolation""" - return (f"{type(self).__name__}(" - f"fill_value={self.fill_value})") + def __repr__(self) -> str: + return ( + f"{type(self).__name__}(" + f"fill_value={self.fill_value})" + ) - def __eq__(self, other): - """Equality operator bethween FillExtrapolation instances.""" - return (super().__eq__(other) and + def __eq__(self, other: Any) -> bool: + return ( + super().__eq__(other) + and ( self.fill_value == other.fill_value # NaNs compare unequal. Should we distinguish between # different NaN types and payloads? - or np.isnan(self.fill_value) and np.isnan(other.fill_value)) + or (np.isnan(self.fill_value) and np.isnan(other.fill_value)) + ) + ) def _parse_extrapolation( @@ -213,7 +241,6 @@ def _parse_extrapolation( Args: extrapolation (:class:´Extrapolator´, str or Callable): Argument extrapolation to be parsed. - fdata (:class:´FData´): Object with the default extrapolation. Returns: (:class:´Extrapolator´ or Callable): Extrapolation method. @@ -225,14 +252,15 @@ def _parse_extrapolation( elif isinstance(extrapolation, str): return extrapolation_methods[extrapolation.lower()] - else: - return extrapolation + return extrapolation #: Dictionary with the extrapolation methods. -extrapolation_methods = {"bounds": BoundaryExtrapolation(), - "exception": ExceptionExtrapolation(), - "nan": FillExtrapolation(np.nan), - "none": None, - "periodic": PeriodicExtrapolation(), - "zeros": FillExtrapolation(0)} +extrapolation_methods = { + "bounds": BoundaryExtrapolation(), + "exception": ExceptionExtrapolation(), + "nan": FillExtrapolation(np.nan), + "none": None, + "periodic": PeriodicExtrapolation(), + "zeros": FillExtrapolation(0), +} From 892936ef35a0826d3b1ce60a3d89090d353bd206 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 20 Mar 2021 16:15:49 +0100 Subject: [PATCH 148/417] colros --- .../visualization/representation.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 76f0328e1..2a59a33dd 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -1,4 +1,5 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple, TypedDict, TypeVar, Union +from matplotlib import colors import matplotlib.cm import matplotlib.patches @@ -106,7 +107,7 @@ def _get_color_info( class GraphPlot: """ - Class used to plot the FDatGrid object graph as hypersurfaces. + Class used to plot the FDataGrid object graph as hypersurfaces. A list of variables (probably depths) can be used as an argument to display the functions wtih a gradient of colors. @@ -202,48 +203,47 @@ def plot( gradient_color_list (normalized in gradient_list). Args: - chart (figure object, axe or list of axes, optional): figure over + chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - fig (figure object, optional): figure over with the graphs are + fig : figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - ax (list of axis objects, optional): axis over where the graphs - are plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure + ax: axis over where the graphs are plotted. If None, see param fig. + n_rows : designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols(int, optional): designates the number of columns of the + n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_points (int or tuple, optional): Number of points to evaluate in + n_points: Number of points to evaluate in the plot. In case of surfaces a tuple of length 2 can be pased with the number of points to plot in each axis, otherwise the same number of points will be used in the two axes. By default in unidimensional plots will be used 501 points; in surfaces will be used 30 points per axis, wich makes a grid with 900 points. - domain_range (tuple or list of tuples, optional): Range where the + domain_range: Range where the function will be plotted. In objects with unidimensional domain the domain range should be a tuple with the bounds of the interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - group (list of int): contains integers from [0 to number of + group: contains integers from [0 to number of labels) indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors (list of colors): colors in which groups are + group_colors: colors in which groups are represented, there must be one for each group. If None, each group is shown with distict colors in the "Greys" colormap. - group_names (list of str): name of each of the groups which appear + group_names: name of each of the groups which appear in a legend, there must be one for each one. Defaults to None and the legend is not shown. Implies `legend=True`. colormap_name: name of the colormap to be used. By default we will use autumn. - legend (bool): if `True`, show a legend with the groups. If + legend: if `True`, show a legend with the groups. If `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. @@ -253,7 +253,7 @@ def plot( matplotlib.pyplot.plot_surface function. Returns: - fig (figure object): figure object in which the graphs are plotted. + fig: figure object in which the graphs are plotted. """ From 01d0dd967a7e77658d575cb8ffc0592eaa038eb9 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 22 Mar 2021 12:41:47 +0100 Subject: [PATCH 149/417] change name --- skfda/exploratory/visualization/__init__.py | 2 +- .../{_phase_plane_plot.py => _parametric_plot.py} | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) rename skfda/exploratory/visualization/{_phase_plane_plot.py => _parametric_plot.py} (93%) diff --git a/skfda/exploratory/visualization/__init__.py b/skfda/exploratory/visualization/__init__.py index 346068ca6..215b41c8d 100644 --- a/skfda/exploratory/visualization/__init__.py +++ b/skfda/exploratory/visualization/__init__.py @@ -4,5 +4,5 @@ from ._boxplot import Boxplot, SurfaceBoxplot from ._ddplot import DDPlot from ._magnitude_shape_plot import MagnitudeShapePlot -from ._phase_plane_plot import PhasePlanePlot +from ._parametric_plot import ParametricPlot from .fpca import plot_fpca_perturbation_graphs diff --git a/skfda/exploratory/visualization/_phase_plane_plot.py b/skfda/exploratory/visualization/_parametric_plot.py similarity index 93% rename from skfda/exploratory/visualization/_phase_plane_plot.py rename to skfda/exploratory/visualization/_parametric_plot.py index c5de0aa8e..bb70c845e 100644 --- a/skfda/exploratory/visualization/_phase_plane_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -15,9 +15,9 @@ from ._utils import _get_figure_and_axes, _set_figure_layout -class PhasePlanePlot: +class ParametricPlot: """ - Phase-Plane Plot visualization. + Parametric Plot visualization. This class contains the functionality in charge of plotting two different functions as coordinates, this can be done giving @@ -47,7 +47,7 @@ def plot( **kwargs: Any, ) -> Figure: """ - Plot Phase-Plane graph. + ParametricPlot graph. Plot the functions as coordinates. If two functions are passed it will concatenate both into one only FData. @@ -61,7 +61,7 @@ def plot( ax: axis where the graphs are plotted. If None, see param fig. kwargs: optional arguments. Returns: - fig (figure object): figure object in which the phase-plane + fig (figure object): figure object in which the ParametricPlot graph will be plotted. """ fig, axes = _get_figure_and_axes(chart, fig, ax) @@ -92,7 +92,7 @@ def plot( "codomain or domain is not correct.", ) - fig.suptitle("Phase-Plane Plot") + fig.suptitle("Parametric Plot") axes[0].set_xlabel("Function 1") axes[0].set_ylabel("Function 2") From fb87e264fdd67d95005539320dd494f30b165e4b Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 22 Mar 2021 13:19:14 +0100 Subject: [PATCH 150/417] parametric plot solved --- skfda/exploratory/visualization/_parametric_plot.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index bb70c845e..6efc2ab2b 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -80,7 +80,7 @@ def plot( fig, axes = _set_figure_layout( fig, axes, dim=2, n_axes=1, ) - data_matrix = self.fd_final.data_matrix[0][:, 0] + data_matrix = self.fd_final.data_matrix[0] axes[0].plot( data_matrix[:, 0].tolist(), data_matrix[:, 1].tolist(), @@ -93,7 +93,10 @@ def plot( ) fig.suptitle("Parametric Plot") - axes[0].set_xlabel("Function 1") - axes[0].set_ylabel("Function 2") + axes[0].set_xlabel(self.fdata1.coordinate_names[0]) + if self.fdata2 is None: + axes[0].set_ylabel(self.fdata1.coordinate_names[1]) + else: + axes[0].set_ylabel(self.fdata2.coordinate_names[0]) return fig From 3432537dde509f7634eeb957799d66e1365f2acd Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 22 Mar 2021 13:29:40 +0100 Subject: [PATCH 151/417] change --- tests/test_outliergram.py | 63 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 tests/test_outliergram.py diff --git a/tests/test_outliergram.py b/tests/test_outliergram.py new file mode 100644 index 000000000..071b226ce --- /dev/null +++ b/tests/test_outliergram.py @@ -0,0 +1,63 @@ +from skfda import FDataGrid +from skfda.datasets import fetch_weather +from skfda.exploratory.visualization import Outliergram +import unittest + +import numpy as np + + +class TestOutliergram(unittest.TestCase): + + def test_outliergram(self): + fd = fetch_weather()["data"] + fd_temperatures = fd.coordinates[0] + outliergram = Outliergram( + fd_temperatures) + np.testing.assert_allclose(outliergram.mei, + np.array([[0.2112587, 3.0322570], + [1.2823448, 0.8272850], + [0.8646544, 1.8619370], + [1.9862512, 5.5287354], + [0.7534918, 0.7203502], + [1.1325291, 0.2808455], + [-2.650529, 0.9702889], + [0.1434387, 0.9159834], + [-0.402844, 0.6413531], + [0.6354411, 0.6934311], + [0.5727553, 0.4628254], + [3.0524899, 8.8008899], + [2.7355803, 10.338497], + [3.1179374, 7.0686220], + [3.4944047, 11.479432], + [-0.402532, 0.5253690], + [0.5782190, 5.5400704], + [-0.839887, 0.7350041], + [-3.456470, 1.1156415], + [0.2260207, 1.5071672], + [-0.561562, 0.8836978], + [-1.690263, 0.6392155], + [-0.385394, 0.7401909], + [0.1467050, 0.9090058], + [7.1811993, 39.003407], + [6.8943132, 30.968126], + [6.6227164, 41.448548], + [0.0726709, 1.5960063], + [1.4450617, 8.7183435], + [-1.459836, 0.2719813], + [-2.824349, 4.5729382], + [-2.390462, 1.5464775], + [-5.869571, 5.3517279], + [-5.426019, 5.1817219], + [-16.34459, 0.9397117]]), rtol=1e-5) + + np.testing.assert_array_almost_equal(outliergram.mbd, + np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, + 0, 0, 0, 0, 1])) + + +if __name__ == '__main__': + print() + unittest.main() From 575910a72c669371e7af36fa69c37a33716bc54e Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 22 Mar 2021 13:34:21 +0100 Subject: [PATCH 152/417] test --- .gitignore | 2 + tests/test_outliergram.py | 96 ++++++++++++++++++++++----------------- 2 files changed, 56 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index 53f5bcc28..c9ab507ca 100644 --- a/.gitignore +++ b/.gitignore @@ -110,3 +110,5 @@ pip-wheel-metadata/ # macOS DS_Store .DS_Store +.gitignore +.vscode/settings.json diff --git a/tests/test_outliergram.py b/tests/test_outliergram.py index 071b226ce..84406f15f 100644 --- a/tests/test_outliergram.py +++ b/tests/test_outliergram.py @@ -13,49 +13,61 @@ def test_outliergram(self): fd_temperatures = fd.coordinates[0] outliergram = Outliergram( fd_temperatures) - np.testing.assert_allclose(outliergram.mei, - np.array([[0.2112587, 3.0322570], - [1.2823448, 0.8272850], - [0.8646544, 1.8619370], - [1.9862512, 5.5287354], - [0.7534918, 0.7203502], - [1.1325291, 0.2808455], - [-2.650529, 0.9702889], - [0.1434387, 0.9159834], - [-0.402844, 0.6413531], - [0.6354411, 0.6934311], - [0.5727553, 0.4628254], - [3.0524899, 8.8008899], - [2.7355803, 10.338497], - [3.1179374, 7.0686220], - [3.4944047, 11.479432], - [-0.402532, 0.5253690], - [0.5782190, 5.5400704], - [-0.839887, 0.7350041], - [-3.456470, 1.1156415], - [0.2260207, 1.5071672], - [-0.561562, 0.8836978], - [-1.690263, 0.6392155], - [-0.385394, 0.7401909], - [0.1467050, 0.9090058], - [7.1811993, 39.003407], - [6.8943132, 30.968126], - [6.6227164, 41.448548], - [0.0726709, 1.5960063], - [1.4450617, 8.7183435], - [-1.459836, 0.2719813], - [-2.824349, 4.5729382], - [-2.390462, 1.5464775], - [-5.869571, 5.3517279], - [-5.426019, 5.1817219], - [-16.34459, 0.9397117]]), rtol=1e-5) + np.testing.assert_allclose( + outliergram.mei, + np.array( + [[0.46272668], + [0.27840835], + [0.36268754], + [0.27908676], + [0.36112198], + [0.30802348], + [0.82969341], + [0.45904762], + [0.53907371], + [0.38799739], + [0.41283757], + [0.20420091], + [0.23564253], + [0.14737117], + [0.14379648], + [0.54035225], + [0.43459883], + [0.6378604 ], + [0.86964123], + [0.4421396 ], + [0.58906719], + [0.75561644], + [0.54982387], + [0.46095238], + [0.09969993], + [0.13166341], + [0.18776256], + [0.4831833 ], + [0.36816699], + [0.72962818], + [0.80313112], + [0.79934768], + [0.90643183], + [0.90139596], + [0.9685062 ]] + ), + rtol=1e-5) - np.testing.assert_array_almost_equal(outliergram.mbd, - np.array( - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, - 0, 0, 0, 0, 1])) + np.testing.assert_array_almost_equal( + outliergram.mbd, + np.array( + [ + 0.40685162, 0.42460381, 0.43088139, 0.35833775, 0.47847435, + 0.46825985, 0.29228349, 0.51299183, 0.5178558 , 0.49868539, + 0.52408733, 0.34457312, 0.36996431, 0.2973209 , 0.29107555, + 0.53304017, 0.44185565, 0.46346341, 0.23620736, 0.47652354, + 0.4814397 , 0.38233529, 0.51173171, 0.51164882, 0.21551437, + 0.23084916, 0.25650589, 0.46760447, 0.30787767, 0.40929051, + 0.31801082, 0.3234519 , 0.17015617, 0.17977514, 0.05769541 + ] + ) + ) if __name__ == '__main__': From 9c4aff6d4b595a9ac99e33fb333f2ba330db6498 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 22 Mar 2021 13:56:09 +0100 Subject: [PATCH 153/417] Typing fixes. --- skfda/_utils/_utils.py | 199 +++++++++++++++++++---- skfda/representation/_functional_data.py | 167 +++++++++++++++++-- skfda/representation/extrapolation.py | 20 ++- 3 files changed, 341 insertions(+), 45 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index c8481b78e..ba8237d8e 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -1,4 +1,4 @@ -"""Module with generic methods""" +"""Module with generic methods.""" from __future__ import annotations @@ -14,10 +14,12 @@ Tuple, Union, cast, + overload, ) import numpy as np from pandas.api.indexers import check_array_indexer +from typing_extensions import Literal, Protocol import scipy.integrate @@ -28,7 +30,7 @@ GridPoints, GridPointsLike, ) -from ..representation.evaluator import Evaluator +from ..representation.extrapolation import ExtrapolationLike RandomStateLike = Optional[Union[int, np.random.RandomState]] @@ -192,26 +194,53 @@ def convert_row(row: ArrayLike) -> np.ndarray: return res +@overload def _cartesian_product( axes: Sequence[np.ndarray], - flatten: bool=True, - return_shape: bool=False, + *, + flatten: bool = True, + return_shape: Literal[False] = False, ) -> np.ndarray: - """Computes the cartesian product of the axes. + pass + + +@overload +def _cartesian_product( + axes: Sequence[np.ndarray], + *, + flatten: bool = True, + return_shape: Literal[True], +) -> Tuple[np.ndarray, Tuple[int, ...]]: + pass + + +def _cartesian_product( # noqa: WPS234 + axes: Sequence[np.ndarray], + *, + flatten: bool = True, + return_shape: bool = False, +) -> Union[np.ndarray, Tuple[np.ndarray, Tuple[int, ...]]]: + """ + Compute the cartesian product of the axes. Computes the cartesian product of the axes and returns a numpy array of 1 dimension with all the possible combinations, for an arbitrary number of dimensions. Args: - Axes: List with axes. + axes: List with axes. + flatten: Whether to return the flatten array or keep one dimension per + axis. + return_shape: If ``True`` return the shape of the array before + flattening. - Return: + Returns: Numpy 2-D array with all the possible combinations. The entry (i,j) represent the j-th coordinate of the i-th point. + If ``return_shape`` is ``True`` returns also the shape of the array + before flattening. Examples: - >>> from skfda._utils import _cartesian_product >>> axes = [[0,1],[2,3]] >>> _cartesian_product(axes) @@ -242,8 +271,8 @@ def _cartesian_product( if return_shape: return cartesian, shape - else: - return cartesian + + return cartesian def _same_domain(fd: Union[Basis, FData], fd2: Union[Basis, FData]) -> bool: @@ -251,9 +280,42 @@ def _same_domain(fd: Union[Basis, FData], fd2: Union[Basis, FData]) -> bool: return np.array_equal(fd.domain_range, fd2.domain_range) +@overload def _reshape_eval_points( eval_points: np.ndarray, *, + aligned: Literal[True], + n_samples: int, + dim_domain: int, +) -> np.ndarray: + pass + + +@overload +def _reshape_eval_points( + eval_points: Sequence[np.ndarray], + *, + aligned: Literal[True], + n_samples: int, + dim_domain: int, +) -> np.ndarray: + pass + + +@overload +def _reshape_eval_points( + eval_points: Union[np.ndarray, Sequence[np.ndarray]], + *, + aligned: bool, + n_samples: int, + dim_domain: int, +) -> np.ndarray: + pass + + +def _reshape_eval_points( + eval_points: Union[np.ndarray, Sequence[np.ndarray]], + *, aligned: bool, n_samples: int, dim_domain: int, @@ -305,7 +367,11 @@ def _reshape_eval_points( return eval_points -def _one_grid_to_points(axes, *, dim_domain): +def _one_grid_to_points( + axes: Sequence[np.ndarray], + *, + dim_domain: int, +) -> Tuple[np.ndarray, Tuple[int, ...]]: """ Convert a list of ndarrays, one per domain dimension, in the points. @@ -315,8 +381,9 @@ def _one_grid_to_points(axes, *, dim_domain): axes = _to_grid_points(axes) if len(axes) != dim_domain: - raise ValueError(f"Length of axes should be " - f"{dim_domain}") + raise ValueError( + f"Length of axes should be {dim_domain}", + ) cartesian, shape = _cartesian_product(axes, return_shape=True) @@ -326,17 +393,73 @@ def _one_grid_to_points(axes, *, dim_domain): return cartesian, shape +class EvaluateMethod(Protocol): + """Evaluation method.""" + + def __call__( + self, + __eval_points: np.ndarray, # noqa: WPS112 + extrapolation: Optional[ExtrapolationLike], + aligned: bool, + ) -> np.ndarray: + """Evaluate a function.""" + pass + + +@overload def _evaluate_grid( axes: Sequence[np.ndarray], *, - evaluate_method: Any, + evaluate_method: EvaluateMethod, + n_samples: int, + dim_domain: int, + dim_codomain: int, + extrapolation: Optional[ExtrapolationLike] = None, + aligned: Literal[True] = True, +) -> np.ndarray: + pass + + +@overload +def _evaluate_grid( + axes: Sequence[Sequence[np.ndarray]], + *, + evaluate_method: EvaluateMethod, + n_samples: int, + dim_domain: int, + dim_codomain: int, + extrapolation: Optional[ExtrapolationLike] = None, + aligned: Literal[False], +) -> np.ndarray: + pass + + +@overload +def _evaluate_grid( # noqa: WPS234 + axes: Sequence[Union[np.ndarray, Sequence[np.ndarray]]], + *, + evaluate_method: EvaluateMethod, n_samples: int, dim_domain: int, dim_codomain: int, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, aligned: bool = True, ) -> np.ndarray: - """Evaluate the functional object in the cartesian grid. + pass + + +def _evaluate_grid( # noqa: WPS234 + axes: Sequence[Union[np.ndarray, Sequence[np.ndarray]]], + *, + evaluate_method: EvaluateMethod, + n_samples: int, + dim_domain: int, + dim_codomain: int, + extrapolation: Optional[ExtrapolationLike] = None, + aligned: bool = True, +) -> np.ndarray: + """ + Evaluate the functional object in the cartesian grid. This method is called internally by :meth:`evaluate` when the argument `grid` is True. @@ -361,6 +484,10 @@ def _evaluate_grid( Args: axes: List of axes to generated the grid where the object will be evaluated. + evaluate_method: Function used to evaluate the functional object. + n_samples: Number of samples. + dim_domain: Domain dimension. + dim_codomain: Codomain dimension. extrapolation: Controls the extrapolation mode for elements outside the domain range. By default it is used the mode defined during the instance of the @@ -377,41 +504,53 @@ def _evaluate_grid( dimension. """ - # Compute intersection points and resulting shapes if aligned: + axes = cast(Sequence[np.ndarray], axes) + eval_points, shape = _one_grid_to_points(axes, dim_domain=dim_domain) else: - axes = list(axes) + axes_per_sample = cast(Sequence[Sequence[np.ndarray]], axes) - if len(axes) != n_samples: - raise ValueError("Should be provided a list of axis per " - "sample") + axes_per_sample = list(axes_per_sample) - eval_points, shape = zip( - *[_one_grid_to_points(a, dim_domain=dim_domain) for a in axes]) + if len(axes) != n_samples: + raise ValueError( + "Should be provided a list of axis per sample", + ) + + eval_points_tuple, shape_tuple = zip( + *[ + _one_grid_to_points(a, dim_domain=dim_domain) + for a in axes_per_sample + ], + ) - eval_points = _to_array_maybe_ragged(eval_points) + eval_points = _to_array_maybe_ragged(eval_points_tuple) # Evaluate the points - res = evaluate_method(eval_points, - extrapolation=extrapolation, - aligned=aligned) + evaluated = evaluate_method( + eval_points, + extrapolation=extrapolation, + aligned=aligned, + ) # Reshape the result if aligned: - res = res.reshape([n_samples] + - list(shape) + [dim_codomain]) + res = evaluated.reshape( + [n_samples] + list(shape) + [dim_codomain], + ) else: res = _to_array_maybe_ragged([ r.reshape(list(s) + [dim_codomain]) - for r, s in zip(res, shape)]) + for r, s in zip(evaluated, shape_tuple) + ]) return res diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index d8dc4489b..47a00217e 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -18,15 +18,18 @@ Sequence, TypeVar, Union, + cast, + overload, ) import numpy as np import pandas.api.extensions +from typing_extensions import Literal from .._utils import _evaluate_grid, _reshape_eval_points from ._typing import DomainRange, LabelTuple, LabelTupleLike from .evaluator import Evaluator -from .extrapolation import _parse_extrapolation +from .extrapolation import ExtrapolationLike, _parse_extrapolation if TYPE_CHECKING: from . import FDataGrid, FDataBasis @@ -34,6 +37,12 @@ T = TypeVar('T', bound='FData') +EvalPointsType = Union[ + np.ndarray, + Sequence[np.ndarray], + Sequence[Sequence[np.ndarray]], +] + class FData( # noqa: WPS214 ABC, @@ -57,7 +66,7 @@ class FData( # noqa: WPS214 def __init__( self, *, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, dataset_name: Optional[str] = None, dataset_label: Optional[str] = None, axes_labels: Optional[LabelTupleLike] = None, @@ -245,7 +254,7 @@ def extrapolation(self) -> Optional[Evaluator]: return self._extrapolation @extrapolation.setter - def extrapolation(self, value: Optional[Union[str, Evaluator]]) -> None: + def extrapolation(self, value: Optional[ExtrapolationLike]) -> None: """Set the type of extrapolation.""" self._extrapolation = _parse_extrapolation(value) @@ -328,11 +337,29 @@ def _join_evaluation( return res - @abstractmethod + @overload def _evaluate( self, eval_points: np.ndarray, *, + aligned: Literal[True] = True, + ) -> np.ndarray: + pass + + @overload + def _evaluate( + self, + eval_points: Sequence[np.ndarray], + *, + aligned: Literal[False], + ) -> np.ndarray: + pass + + @abstractmethod + def _evaluate( + self, + eval_points: Union[np.ndarray, Sequence[np.ndarray]], + *, aligned: bool = True, ) -> np.ndarray: """Define the evaluation of the FData. @@ -358,12 +385,60 @@ def _evaluate( """ pass + @overload def evaluate( self, eval_points: np.ndarray, *, derivative: int = 0, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[False] = False, + aligned: Literal[True] = True, + ) -> np.ndarray: + pass + + @overload + def evaluate( + self, + eval_points: Sequence[np.ndarray], + *, + derivative: int = 0, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[False] = False, + aligned: Literal[False], + ) -> np.ndarray: + pass + + @overload + def evaluate( + self, + eval_points: Sequence[np.ndarray], + *, + derivative: int = 0, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[True], + aligned: Literal[True] = True, + ) -> np.ndarray: + pass + + @overload + def evaluate( + self, + eval_points: Sequence[Sequence[np.ndarray]], + *, + derivative: int = 0, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[True], + aligned: Literal[False], + ) -> np.ndarray: + pass + + def evaluate( + self, + eval_points: EvalPointsType, + *, + derivative: int = 0, + extrapolation: Optional[ExtrapolationLike] = None, grid: bool = False, aligned: bool = True, ) -> np.ndarray: @@ -409,6 +484,7 @@ def evaluate( ) if grid: # Evaluation of a grid performed in auxiliar function + return _evaluate_grid( eval_points, evaluate_method=self.evaluate, @@ -425,6 +501,11 @@ def evaluate( # Gets the function to perform extrapolation or None extrapolation = _parse_extrapolation(extrapolation) + eval_points = cast( + Union[np.ndarray, Sequence[np.ndarray]], + eval_points, + ) + # Convert to array and check dimensions of eval points eval_points = _reshape_eval_points( eval_points, @@ -480,12 +561,72 @@ def evaluate( aligned=aligned, ) + @overload def __call__( self, eval_points: np.ndarray, *, derivative: int = 0, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[False] = False, + aligned: Literal[True] = True, + ) -> np.ndarray: + pass + + @overload + def __call__( + self, + eval_points: Sequence[np.ndarray], + *, + derivative: int = 0, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[False] = False, + aligned: Literal[False], + ) -> np.ndarray: + pass + + @overload + def __call__( + self, + eval_points: Sequence[np.ndarray], + *, + derivative: int = 0, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[True], + aligned: Literal[True] = True, + ) -> np.ndarray: + pass + + @overload + def __call__( + self, + eval_points: Sequence[Sequence[np.ndarray]], + *, + derivative: int = 0, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[True], + aligned: Literal[False], + ) -> np.ndarray: + pass + + @overload + def __call__( + self, + eval_points: EvalPointsType, + *, + derivative: int = 0, + extrapolation: Optional[ExtrapolationLike] = None, + grid: bool = False, + aligned: bool = True, + ) -> np.ndarray: + pass + + def __call__( + self, + eval_points: EvalPointsType, + *, + derivative: int = 0, + extrapolation: Optional[ExtrapolationLike] = None, grid: bool = False, aligned: bool = True, ) -> np.ndarray: @@ -546,8 +687,8 @@ def shift( shifts: Union[float, np.ndarray], *, restrict_domain: bool = False, - extrapolation: Optional[Union[str, Evaluator]] = None, - eval_points: np.ndarray = None, + extrapolation: Optional[ExtrapolationLike] = None, + eval_points: Optional[np.ndarray] = None, ) -> T: """Perform a shift of the curves. @@ -599,7 +740,7 @@ def copy( argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None, sample_names: Optional[LabelTupleLike] = None, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, ) -> T: """Make a copy of the object.""" pass @@ -675,7 +816,7 @@ def mean( ) @abstractmethod - def to_grid(self, grid_points: np.ndarray = None) -> FDataGrid: + def to_grid(self, grid_points: Optional[np.ndarray] = None) -> FDataGrid: """Return the discrete representation of the object. Args: @@ -735,7 +876,7 @@ def compose( self: T, fd: T, *, - eval_points: np.ndarray = None, + eval_points: Optional[np.ndarray] = None, ) -> FData: """Composition of functions. @@ -766,10 +907,10 @@ def equals(self, other: Any) -> bool: ) @abstractmethod - def __eq__(self, other: Any) -> np.ndarray: + def __eq__(self, other: Any) -> np.ndarray: # type: ignore[override] pass - def __ne__(self, other: Any) -> np.ndarray: + def __ne__(self, other: Any) -> np.ndarray: # type: ignore[override] """Return for `self != other` (element-wise in-equality).""" result = self.__eq__(other) if result is NotImplemented: diff --git a/skfda/representation/extrapolation.py b/skfda/representation/extrapolation.py index 7cbb00324..12cc70e54 100644 --- a/skfda/representation/extrapolation.py +++ b/skfda/representation/extrapolation.py @@ -5,7 +5,7 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING, Any, NoReturn, Optional, Union +from typing import TYPE_CHECKING, Any, NoReturn, Optional, Union, overload import numpy as np @@ -14,6 +14,8 @@ if TYPE_CHECKING: from . import FData +ExtrapolationLike = Union[str, Evaluator] + class PeriodicExtrapolation(Evaluator): """Extend the :term:`domain` range periodically. @@ -231,8 +233,22 @@ def __eq__(self, other: Any) -> bool: ) +@overload +def _parse_extrapolation( + extrapolation: None, +) -> None: + pass + + +@overload +def _parse_extrapolation( + extrapolation: ExtrapolationLike, +) -> Evaluator: + pass + + def _parse_extrapolation( - extrapolation: Optional[Union[str, Evaluator]], + extrapolation: Optional[ExtrapolationLike], ) -> Optional[Evaluator]: """Parse the argument `extrapolation` of `FData`. From eb6d4fb3a53258bb51fd6db3ec5af6fdd50a2569 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 22 Mar 2021 14:04:24 +0100 Subject: [PATCH 154/417] Fix real data bug. --- skfda/datasets/_real_datasets.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index a13013554..aa3b66bb5 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -809,15 +809,14 @@ def fetch_weather( X = curves meta = np.concatenate( ( - np.array(data["place"])[:, np.newaxis], - np.array(data["province"])[:, np.newaxis], - np.asarray(data["coordinates"]), - np.array(data["geogindex"])[:, np.newaxis], + np.array(data["place"], dtype=np.object_)[:, np.newaxis], + np.array(data["province"], dtype=np.object_)[:, np.newaxis], + np.asarray(data["coordinates"], dtype=np.object_), + np.array(data["geogindex"], dtype=np.object_)[:, np.newaxis], np.asarray(data["monthlyTemp"]).T.tolist(), np.asarray(data["monthlyPrecip"]).T.tolist(), ), axis=1, - dtype=np.object_ ) meta_names = [ "place", From 46dc3ef8132eb80f2c723c0c791cf16d04de652c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 22 Mar 2021 15:02:54 +0100 Subject: [PATCH 155/417] out test corrected --- tests/test_outliergram.py | 130 ++++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 49 deletions(-) diff --git a/tests/test_outliergram.py b/tests/test_outliergram.py index 84406f15f..ccde8bcbb 100644 --- a/tests/test_outliergram.py +++ b/tests/test_outliergram.py @@ -1,10 +1,10 @@ -from skfda import FDataGrid -from skfda.datasets import fetch_weather -from skfda.exploratory.visualization import Outliergram import unittest import numpy as np +from skfda.datasets import fetch_weather +from skfda.exploratory.visualization import Outliergram + class TestOutliergram(unittest.TestCase): @@ -12,61 +12,93 @@ def test_outliergram(self): fd = fetch_weather()["data"] fd_temperatures = fd.coordinates[0] outliergram = Outliergram( - fd_temperatures) + fd_temperatures, + ) np.testing.assert_allclose( outliergram.mei, np.array( - [[0.46272668], - [0.27840835], - [0.36268754], - [0.27908676], - [0.36112198], - [0.30802348], - [0.82969341], - [0.45904762], - [0.53907371], - [0.38799739], - [0.41283757], - [0.20420091], - [0.23564253], - [0.14737117], - [0.14379648], - [0.54035225], - [0.43459883], - [0.6378604 ], - [0.86964123], - [0.4421396 ], - [0.58906719], - [0.75561644], - [0.54982387], - [0.46095238], - [0.09969993], - [0.13166341], - [0.18776256], - [0.4831833 ], - [0.36816699], - [0.72962818], - [0.80313112], - [0.79934768], - [0.90643183], - [0.90139596], - [0.9685062 ]] + [ + [0.46272668], + [0.27840835], + [0.36268754], + [0.27908676], + [0.36112198], + [0.30802348], + [0.82969341], + [0.45904762], + [0.53907371], + [0.38799739], + [0.41283757], + [0.20420091], + [0.23564253], + [0.14737117], + [0.14379648], + [0.54035225], + [0.43459883], + [0.6378604], + [0.86964123], + [0.4421396], + [0.58906719], + [0.75561644], + [0.54982387], + [0.46095238], + [0.09969993], + [0.13166341], + [0.18776256], + [0.4831833], + [0.36816699], + [0.72962818], + [0.80313112], + [0.79934768], + [0.90643183], + [0.90139596], + [0.9685062], + ], ), - rtol=1e-5) + rtol=1e-5, + ) np.testing.assert_array_almost_equal( outliergram.mbd, np.array( [ - 0.40685162, 0.42460381, 0.43088139, 0.35833775, 0.47847435, - 0.46825985, 0.29228349, 0.51299183, 0.5178558 , 0.49868539, - 0.52408733, 0.34457312, 0.36996431, 0.2973209 , 0.29107555, - 0.53304017, 0.44185565, 0.46346341, 0.23620736, 0.47652354, - 0.4814397 , 0.38233529, 0.51173171, 0.51164882, 0.21551437, - 0.23084916, 0.25650589, 0.46760447, 0.30787767, 0.40929051, - 0.31801082, 0.3234519 , 0.17015617, 0.17977514, 0.05769541 - ] - ) + 0.40685162, + 0.42460381, + 0.43088139, + 0.35833775, + 0.47847435, + 0.46825985, + 0.29228349, + 0.51299183, + 0.5178558, + 0.49868539, + 0.52408733, + 0.34457312, + 0.36996431, + 0.2973209, + 0.29107555, + 0.53304017, + 0.44185565, + 0.46346341, + 0.23620736, + 0.47652354, + 0.4814397, + 0.38233529, + 0.51173171, + 0.51164882, + 0.21551437, + 0.23084916, + 0.25650589, + 0.46760447, + 0.30787767, + 0.40929051, + 0.31801082, + 0.3234519, + 0.17015617, + 0.17977514, + 0.05769541, + ], + ), ) From 0def2797d5edf65ce5da3a71f9d58190c4c24e55 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 22 Mar 2021 15:05:48 +0100 Subject: [PATCH 156/417] finished --- skfda/exploratory/visualization/_parametric_plot.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 6efc2ab2b..ed4fd7a2f 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -93,10 +93,7 @@ def plot( ) fig.suptitle("Parametric Plot") - axes[0].set_xlabel(self.fdata1.coordinate_names[0]) - if self.fdata2 is None: - axes[0].set_ylabel(self.fdata1.coordinate_names[1]) - else: - axes[0].set_ylabel(self.fdata2.coordinate_names[0]) + axes[0].set_xlabel(self.fd_final.coordinate_names[0]) + axes[0].set_ylabel(self.fd_final.coordinate_names[1]) return fig From 391451cb158a6609ef4429cacd918aba0b88474c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 24 Mar 2021 19:52:29 +0100 Subject: [PATCH 157/417] now allows multiple functions --- .../visualization/_parametric_plot.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index ed4fd7a2f..0ba9b10bb 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -67,11 +67,15 @@ def plot( fig, axes = _get_figure_and_axes(chart, fig, ax) if self.fdata2 is not None: - self.fd_final = self.fdata1.concatenate( - self.fdata2, as_coordinates=True, + self.fd_final = self.fdata2.concatenate( + self.fdata1, as_coordinates=True, ) + x_label = self.fd_final.coordinate_names[1] + y_label = self.fd_final.coordinate_names[0] else: self.fd_final = self.fdata1 + x_label = self.fd_final.coordinate_names[0] + y_label = self.fd_final.coordinate_names[1] if ( self.fd_final.dim_domain == 1 @@ -80,12 +84,13 @@ def plot( fig, axes = _set_figure_layout( fig, axes, dim=2, n_axes=1, ) - data_matrix = self.fd_final.data_matrix[0] - axes[0].plot( - data_matrix[:, 0].tolist(), - data_matrix[:, 1].tolist(), - **kwargs, - ) + #data_matrix = self.fd_final.data_matrix + for data_matrix in self.fd_final.data_matrix: + axes[0].plot( + data_matrix[:, 0].tolist(), + data_matrix[:, 1].tolist(), + **kwargs, + ) else: raise ValueError( "Error in data arguments,", @@ -93,7 +98,7 @@ def plot( ) fig.suptitle("Parametric Plot") - axes[0].set_xlabel(self.fd_final.coordinate_names[0]) - axes[0].set_ylabel(self.fd_final.coordinate_names[1]) + axes[0].set_xlabel(x_label) + axes[0].set_ylabel(y_label) return fig From a497bd75e7fc6fc855546bc0eec66e4847b30add Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 24 Mar 2021 20:53:07 +0100 Subject: [PATCH 158/417] comment correction --- skfda/exploratory/visualization/_parametric_plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 0ba9b10bb..1bb98257d 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -84,7 +84,6 @@ def plot( fig, axes = _set_figure_layout( fig, axes, dim=2, n_axes=1, ) - #data_matrix = self.fd_final.data_matrix for data_matrix in self.fd_final.data_matrix: axes[0].plot( data_matrix[:, 0].tolist(), From 5876bfbb46d84020269bcf474da1e41d13a7982c Mon Sep 17 00:00:00 2001 From: mellamansanchez <38490771+mellamansanchez@users.noreply.github.com> Date: Wed, 24 Mar 2021 22:02:25 +0100 Subject: [PATCH 159/417] Update skfda/exploratory/visualization/_parametric_plot.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Ramos Carreño --- skfda/exploratory/visualization/_parametric_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 1bb98257d..b169ebda4 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -50,7 +50,7 @@ def plot( ParametricPlot graph. Plot the functions as coordinates. If two functions are passed - it will concatenate both into one only FData. + it will concatenate both as coordinates of a vector-valued FData. Args: chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also From 65c202ca6a362f914c274a1711544417e3d3617c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 24 Mar 2021 22:08:26 +0100 Subject: [PATCH 160/417] changes --- skfda/exploratory/visualization/_parametric_plot.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 1bb98257d..56f9950c0 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -1,4 +1,4 @@ -"""Phase-Plane Plot Module. +"""Parametric Plot Module. This module contains the functionality in charge of plotting two different functions as coordinates, this can be done giving @@ -47,10 +47,10 @@ def plot( **kwargs: Any, ) -> Figure: """ - ParametricPlot graph. + Parametric Plot graph. Plot the functions as coordinates. If two functions are passed - it will concatenate both into one only FData. + it will concatenate both as coordinates of a vector-valued FData. Args: chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also @@ -96,7 +96,10 @@ def plot( "codomain or domain is not correct.", ) - fig.suptitle("Parametric Plot") + if self.fd_final.dataset_name is not None: + fig.suptitle(self.fd_final.dataset_name) + else: + fig.suptitle("Graph") axes[0].set_xlabel(x_label) axes[0].set_ylabel(y_label) From 3b94bba9b9a4fefbf3e2becfa645429b6d386298 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 24 Mar 2021 23:27:51 +0100 Subject: [PATCH 161/417] solved --- skfda/exploratory/visualization/_parametric_plot.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 56f9950c0..565137c79 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -67,15 +67,11 @@ def plot( fig, axes = _get_figure_and_axes(chart, fig, ax) if self.fdata2 is not None: - self.fd_final = self.fdata2.concatenate( - self.fdata1, as_coordinates=True, + self.fd_final = self.fdata1.concatenate( + self.fdata2, as_coordinates=True, ) - x_label = self.fd_final.coordinate_names[1] - y_label = self.fd_final.coordinate_names[0] else: self.fd_final = self.fdata1 - x_label = self.fd_final.coordinate_names[0] - y_label = self.fd_final.coordinate_names[1] if ( self.fd_final.dim_domain == 1 @@ -100,7 +96,8 @@ def plot( fig.suptitle(self.fd_final.dataset_name) else: fig.suptitle("Graph") - axes[0].set_xlabel(x_label) - axes[0].set_ylabel(y_label) + + axes[0].set_xlabel(self.fd_final.coordinate_names[0]) + axes[0].set_ylabel(self.fd_final.coordinate_names[1]) return fig From 2869eb569279653416b17d5f8cdb0f4e8274e317 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 25 Mar 2021 00:34:09 +0100 Subject: [PATCH 162/417] chagned --- skfda/exploratory/visualization/_parametric_plot.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 565137c79..791ff0c95 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -97,7 +97,14 @@ def plot( else: fig.suptitle("Graph") - axes[0].set_xlabel(self.fd_final.coordinate_names[0]) + if self.fd_final.coordinate_names[0] is None: + axes[0].set_xlabel("Function 1") + else: + axes[0].set_xlabel(self.fd_final.coordinate_names[0]) + + if self.fd_final.coordinate_names[1] is None: + axes[0].set_ylabel("Function 2") + else: axes[0].set_ylabel(self.fd_final.coordinate_names[1]) return fig From 35b6aee6f65ad2c6f251d6e025d7b1cfa1bb827d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 25 Mar 2021 00:34:28 +0100 Subject: [PATCH 163/417] solved --- skfda/exploratory/visualization/_parametric_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 791ff0c95..832a8104e 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -105,6 +105,6 @@ def plot( if self.fd_final.coordinate_names[1] is None: axes[0].set_ylabel("Function 2") else: - axes[0].set_ylabel(self.fd_final.coordinate_names[1]) + axes[0].set_ylabel(self.fd_final.coordinate_names[1]) return fig From 8493b230317cea9e7c6ad4ebe0141e5bc697602d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 25 Mar 2021 16:15:25 +0100 Subject: [PATCH 164/417] parametric_plot --- .../exploratory/visualization/_parametric_plot.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 832a8104e..4b3da19bc 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -80,8 +80,10 @@ def plot( fig, axes = _set_figure_layout( fig, axes, dim=2, n_axes=1, ) + ax = axes[0] + for data_matrix in self.fd_final.data_matrix: - axes[0].plot( + ax.plot( data_matrix[:, 0].tolist(), data_matrix[:, 1].tolist(), **kwargs, @@ -94,17 +96,15 @@ def plot( if self.fd_final.dataset_name is not None: fig.suptitle(self.fd_final.dataset_name) - else: - fig.suptitle("Graph") if self.fd_final.coordinate_names[0] is None: - axes[0].set_xlabel("Function 1") + ax.set_xlabel("Function 1") else: - axes[0].set_xlabel(self.fd_final.coordinate_names[0]) + ax.set_xlabel(self.fd_final.coordinate_names[0]) if self.fd_final.coordinate_names[1] is None: - axes[0].set_ylabel("Function 2") + ax.set_ylabel("Function 2") else: - axes[0].set_ylabel(self.fd_final.coordinate_names[1]) + ax.set_ylabel(self.fd_final.coordinate_names[1]) return fig From 2018945c29d4c4267ade0409df465477b91fe41d Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 26 Mar 2021 01:17:52 +0100 Subject: [PATCH 165/417] Evaluate typing. --- skfda/representation/basis/_fdatabasis.py | 21 ++++++++++----------- skfda/representation/evaluator.py | 3 --- skfda/representation/grid.py | 2 +- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 492c388b0..87dc56c8f 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -8,6 +8,8 @@ import numpy as np import pandas.api.extensions +from skfda._utils._utils import _to_array_maybe_ragged + from ..._utils import _check_array_key, _int_to_real, constants from .. import grid from .._functional_data import FData @@ -240,13 +242,15 @@ def domain_range(self) -> DomainRange: def _evaluate( self, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Sequence[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: if aligned: + assert isinstance(eval_points, np.ndarray) + # Each row contains the values of one element of the basis basis_values = self.basis.evaluate(eval_points) @@ -256,17 +260,12 @@ def _evaluate( (self.n_samples, len(eval_points), self.dim_codomain), ) - res_matrix = np.empty( - (self.n_samples, eval_points.shape[1], self.dim_codomain), - ) - - for i in range(self.n_samples): - basis_values = self.basis.evaluate(eval_points[i]) - - values = self.coefficients[i] * basis_values.T - np.sum(values.T, axis=0, out=res_matrix[i]) + res_list = [ + np.sum((c * self.basis.evaluate(p).T).T, axis=0) + for c, p in zip(self.coefficients, eval_points) + ] - return res_matrix + return _to_array_maybe_ragged(res_list) def shift( self: T, diff --git a/skfda/representation/evaluator.py b/skfda/representation/evaluator.py index a167c2d2c..d1cae879a 100644 --- a/skfda/representation/evaluator.py +++ b/skfda/representation/evaluator.py @@ -29,9 +29,6 @@ class Evaluator(ABC): The evaluator is called internally by :func:`evaluate`. - Should implement the methods :func:`evaluate` and - :func:`evaluate_composed`. - """ @abstractmethod diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index bec9b5e26..a51ef353b 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -385,7 +385,7 @@ def interpolation(self, new_interpolation: Optional[Evaluator]) -> None: def _evaluate( self, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Sequence[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: From 7cddfc15debc128e5bba3387a5a2c8c2d805a099 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 26 Mar 2021 14:28:06 +0100 Subject: [PATCH 166/417] Evaluation typing. --- skfda/_utils/_utils.py | 31 ++++------- skfda/representation/_functional_data.py | 35 ++++-------- skfda/representation/basis/_fdatabasis.py | 13 ++++- skfda/representation/evaluator.py | 53 +++++++++++++++--- skfda/representation/extrapolation.py | 66 ++++++++++++++++------- skfda/representation/grid.py | 15 ++++-- skfda/representation/interpolation.py | 19 +++++-- 7 files changed, 148 insertions(+), 84 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index ba8237d8e..d631bd078 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -8,6 +8,7 @@ TYPE_CHECKING, Any, Callable, + Iterable, List, Optional, Sequence, @@ -422,7 +423,7 @@ def _evaluate_grid( @overload def _evaluate_grid( - axes: Sequence[Sequence[np.ndarray]], + axes: Iterable[Sequence[np.ndarray]], *, evaluate_method: EvaluateMethod, n_samples: int, @@ -434,22 +435,8 @@ def _evaluate_grid( pass -@overload def _evaluate_grid( # noqa: WPS234 - axes: Sequence[Union[np.ndarray, Sequence[np.ndarray]]], - *, - evaluate_method: EvaluateMethod, - n_samples: int, - dim_domain: int, - dim_codomain: int, - extrapolation: Optional[ExtrapolationLike] = None, - aligned: bool = True, -) -> np.ndarray: - pass - - -def _evaluate_grid( # noqa: WPS234 - axes: Sequence[Union[np.ndarray, Sequence[np.ndarray]]], + axes: Union[Sequence[np.ndarray], Iterable[Sequence[np.ndarray]]], *, evaluate_method: EvaluateMethod, n_samples: int, @@ -513,15 +500,10 @@ def _evaluate_grid( # noqa: WPS234 else: - axes_per_sample = cast(Sequence[Sequence[np.ndarray]], axes) + axes_per_sample = cast(Iterable[Sequence[np.ndarray]], axes) axes_per_sample = list(axes_per_sample) - if len(axes) != n_samples: - raise ValueError( - "Should be provided a list of axis per sample", - ) - eval_points_tuple, shape_tuple = zip( *[ _one_grid_to_points(a, dim_domain=dim_domain) @@ -529,6 +511,11 @@ def _evaluate_grid( # noqa: WPS234 ], ) + if len(eval_points_tuple) != n_samples: + raise ValueError( + "Should be provided a list of axis per sample", + ) + eval_points = _to_array_maybe_ragged(eval_points_tuple) # Evaluate the points diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 47a00217e..e93d72452 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -39,8 +39,9 @@ EvalPointsType = Union[ np.ndarray, + Iterable[np.ndarray], Sequence[np.ndarray], - Sequence[Sequence[np.ndarray]], + Iterable[Sequence[np.ndarray]], ] @@ -337,24 +338,6 @@ def _join_evaluation( return res - @overload - def _evaluate( - self, - eval_points: np.ndarray, - *, - aligned: Literal[True] = True, - ) -> np.ndarray: - pass - - @overload - def _evaluate( - self, - eval_points: Sequence[np.ndarray], - *, - aligned: Literal[False], - ) -> np.ndarray: - pass - @abstractmethod def _evaluate( self, @@ -400,7 +383,7 @@ def evaluate( @overload def evaluate( self, - eval_points: Sequence[np.ndarray], + eval_points: Iterable[np.ndarray], *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -424,7 +407,7 @@ def evaluate( @overload def evaluate( self, - eval_points: Sequence[Sequence[np.ndarray]], + eval_points: Iterable[Sequence[np.ndarray]], *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -485,7 +468,7 @@ def evaluate( if grid: # Evaluation of a grid performed in auxiliar function - return _evaluate_grid( + return _evaluate_grid( # type: ignore eval_points, evaluate_method=self.evaluate, n_samples=self.n_samples, @@ -542,7 +525,7 @@ def evaluate( aligned=aligned, ) - res_extrapolation = extrapolation.evaluate( + res_extrapolation = extrapolation( # type: ignore self, eval_points_extrapolation, aligned=aligned, @@ -576,7 +559,7 @@ def __call__( @overload def __call__( self, - eval_points: Sequence[np.ndarray], + eval_points: Iterable[np.ndarray], *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -600,7 +583,7 @@ def __call__( @overload def __call__( self, - eval_points: Sequence[Sequence[np.ndarray]], + eval_points: Iterable[Sequence[np.ndarray]], *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -660,7 +643,7 @@ def __call__( function at the values specified in eval_points. """ - return self.evaluate( + return self.evaluate( # type: ignore eval_points, derivative=derivative, extrapolation=extrapolation, diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 87dc56c8f..6d77ca2ce 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -3,7 +3,16 @@ import copy import warnings from builtins import isinstance -from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Iterable, + Optional, + Sequence, + Type, + TypeVar, + Union, +) import numpy as np import pandas.api.extensions @@ -242,7 +251,7 @@ def domain_range(self) -> DomainRange: def _evaluate( self, - eval_points: Union[np.ndarray, Sequence[np.ndarray]], + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: diff --git a/skfda/representation/evaluator.py b/skfda/representation/evaluator.py index d1cae879a..26a571cf3 100644 --- a/skfda/representation/evaluator.py +++ b/skfda/representation/evaluator.py @@ -8,10 +8,10 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Iterable, Union, overload import numpy as np -from typing_extensions import Protocol +from typing_extensions import Literal, Protocol if TYPE_CHECKING: from . import FData @@ -32,11 +32,46 @@ class Evaluator(ABC): """ @abstractmethod - def evaluate( + def _evaluate( + self, + fdata: FData, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], + *, + aligned: bool = True, + ) -> np.ndarray: + """ + Evaluation method. + + Must be overriden in subclasses. + + """ + pass + + @overload + def __call__( self, fdata: FData, eval_points: np.ndarray, *, + aligned: Literal[True] = True, + ) -> np.ndarray: + pass + + @overload + def __call__( + self, + fdata: FData, + eval_points: Iterable[np.ndarray], + *, + aligned: Literal[False], + ) -> np.ndarray: + pass + + def __call__( + self, + fdata: FData, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], + *, aligned: bool = True, ) -> np.ndarray: """ @@ -63,7 +98,11 @@ def evaluate( j-th evaluation point. """ - pass + return self._evaluate( + fdata=fdata, + eval_points=eval_points, + aligned=aligned, + ) def __repr__(self) -> str: return f"{type(self)}()" @@ -79,7 +118,7 @@ class EvaluateFunction(Protocol): def __call__( self, fdata: FData, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: @@ -122,10 +161,10 @@ class GenericEvaluator(Evaluator): def __init__(self, evaluate_function: EvaluateFunction) -> None: self.evaluate_function = evaluate_function - def evaluate( # noqa: D102 + def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: diff --git a/skfda/representation/extrapolation.py b/skfda/representation/extrapolation.py index 12cc70e54..31718530b 100644 --- a/skfda/representation/extrapolation.py +++ b/skfda/representation/extrapolation.py @@ -5,16 +5,28 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING, Any, NoReturn, Optional, Union, overload +from typing import ( + TYPE_CHECKING, + Any, + Iterable, + NoReturn, + Optional, + Union, + overload, +) import numpy as np +from typing_extensions import Literal from .evaluator import Evaluator if TYPE_CHECKING: from . import FData -ExtrapolationLike = Union[str, Evaluator] +ExtrapolationLike = Union[ + Evaluator, + Literal["bounds", "exception", "nan", "none", "periodic", "zeros"], +] class PeriodicExtrapolation(Evaluator): @@ -49,10 +61,10 @@ class PeriodicExtrapolation(Evaluator): [-1.086]]]) """ - def evaluate( # noqa: D102 + def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: @@ -99,20 +111,29 @@ class BoundaryExtrapolation(Evaluator): [ 1.125]]]) """ - def evaluate( # noqa: D102 + def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: domain_range = fdata.domain_range - for i in range(fdata.dim_domain): - a, b = domain_range[i] - eval_points[eval_points[..., i] < a, i] = a - eval_points[eval_points[..., i] > b, i] = b + if aligned: + assert isinstance(eval_points, np.ndarray) + + for i in range(fdata.dim_domain): + a, b = domain_range[i] + eval_points[eval_points[..., i] < a, i] = a + eval_points[eval_points[..., i] > b, i] = b + else: + for points_per_sample in eval_points: + for i in range(fdata.dim_domain): + a, b = domain_range[i] + points_per_sample[points_per_sample[..., i] < a, i] = a + points_per_sample[points_per_sample[..., i] > b, i] = b return fdata(eval_points, aligned=aligned) @@ -133,7 +154,7 @@ class ExceptionExtrapolation(Evaluator): ... fd([-.5, 0, 1.5]).round(3) ... except ValueError as e: ... print(e) - Attempt to evaluate 2 points outside the domain range. + Attempt to evaluate points outside the domain range. This extrapolator is equivalent to the string `"exception"`. @@ -142,22 +163,20 @@ class ExceptionExtrapolation(Evaluator): ... fd([-.5, 0, 1.5]).round(3) ... except ValueError as e: ... print(e) - Attempt to evaluate 2 points outside the domain range. + Attempt to evaluate points outside the domain range. """ - def evaluate( # noqa: D102 + def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> NoReturn: - n_points = eval_points.shape[-2] - raise ValueError( - f"Attempt to evaluate {n_points} points outside the domain range.", + "Attempt to evaluate points outside the domain range.", ) @@ -205,15 +224,22 @@ def _fill(self, fdata: FData, eval_points: np.ndarray) -> np.ndarray: ) return np.full(shape, self.fill_value) - def evaluate( # noqa: D102 + def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: + from .._utils import _to_array_maybe_ragged + + if aligned: + assert isinstance(eval_points, np.ndarray) + return self._fill(fdata, eval_points) + + res_list = [self._fill(p) for p in eval_points] # type: ignore - return self._fill(fdata, eval_points) + return _to_array_maybe_ragged(res_list) def __repr__(self) -> str: return ( diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index a51ef353b..975146280 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -10,7 +10,16 @@ import copy import numbers import warnings -from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Iterable, + Optional, + Sequence, + Type, + TypeVar, + Union, +) import findiff import numpy as np @@ -385,12 +394,12 @@ def interpolation(self, new_interpolation: Optional[Evaluator]) -> None: def _evaluate( self, - eval_points: Union[np.ndarray, Sequence[np.ndarray]], + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: - return self.interpolation.evaluate( + return self.interpolation( # type: ignore self, eval_points, aligned=aligned, diff --git a/skfda/representation/interpolation.py b/skfda/representation/interpolation.py index 12a0e87a3..9005d1a02 100644 --- a/skfda/representation/interpolation.py +++ b/skfda/representation/interpolation.py @@ -4,7 +4,15 @@ from __future__ import annotations import abc -from typing import TYPE_CHECKING, Any, Callable, Sequence, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterable, + Sequence, + Tuple, + Union, +) import numpy as np @@ -69,7 +77,7 @@ def _evaluate_codomain( def evaluate( self, fdata: FData, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: @@ -77,6 +85,9 @@ def evaluate( res: np.ndarray if aligned: + + assert isinstance(eval_points, np.ndarray) + # Points evaluated inside the domain res = np.apply_along_axis( self._evaluate_codomain, @@ -489,10 +500,10 @@ def _build_interpolator( smoothness_parameter=self.smoothness_parameter, ) - def evaluate( # noqa: D102 + def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: np.ndarray, + eval_points: Union[np.ndarray, Iterable[np.ndarray]], *, aligned: bool = True, ) -> np.ndarray: From 3cd8ebeeaa14fe6bce7b1bd567a1b2fda7eae1de Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 27 Mar 2021 12:17:39 +0100 Subject: [PATCH 167/417] Improve typing. --- skfda/_utils/_utils.py | 11 ++++----- skfda/representation/_functional_data.py | 6 ++--- skfda/representation/basis/_fdatabasis.py | 27 ++++++++++++++-------- skfda/representation/grid.py | 28 +++++++++++++---------- 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 5265147e7..dbec0e6c3 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -674,11 +674,8 @@ def _int_to_real(array: np.ndarray) -> np.ndarray: return array + 0.0 -def _check_array_key(array, key): - """ - Checks a getitem key. - """ - +def _check_array_key(array: np.ndarray, key: Any) -> Any: + """Check a getitem key.""" key = check_array_indexer(array, key) if isinstance(key, numbers.Integral): # To accept also numpy ints @@ -686,8 +683,8 @@ def _check_array_key(array, key): key = range(len(array))[key] return slice(key, key + 1) - else: - return key + + return key def _check_estimator(estimator): diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index e93d72452..f9972b0d2 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -879,7 +879,7 @@ def __getitem__(self: T, key: Union[int, slice]) -> T: """Return self[key].""" pass - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """Whole object equality.""" return ( isinstance(other, type(self)) # noqa: WPS222 @@ -890,10 +890,10 @@ def equals(self, other: Any) -> bool: ) @abstractmethod - def __eq__(self, other: Any) -> np.ndarray: # type: ignore[override] + def __eq__(self, other: object) -> np.ndarray: # type: ignore[override] pass - def __ne__(self, other: Any) -> np.ndarray: # type: ignore[override] + def __ne__(self, other: object) -> np.ndarray: # type: ignore[override] """Return for `self != other` (element-wise in-equality).""" result = self.__eq__(other) if result is NotImplemented: diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 6d77ca2ce..acf2c42e2 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -12,6 +12,7 @@ Type, TypeVar, Union, + cast, ) import numpy as np @@ -22,8 +23,9 @@ from ..._utils import _check_array_key, _int_to_real, constants from .. import grid from .._functional_data import FData -from .._typing import DomainRange, GridPointsLike, LabelTupleLike +from .._typing import ArrayLike, DomainRange, GridPointsLike, LabelTupleLike from ..evaluator import Evaluator +from ..extrapolation import ExtrapolationLike from . import Basis if TYPE_CHECKING: @@ -81,7 +83,7 @@ class FDataBasis(FData): # noqa: WPS214 def __init__( self, basis: Basis, - coefficients: np.ndarray, + coefficients: ArrayLike, *, dataset_label: Optional[str] = None, dataset_name: Optional[str] = None, @@ -89,7 +91,7 @@ def __init__( argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None, sample_names: Optional[LabelTupleLike] = None, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, ) -> None: """Construct a FDataBasis object.""" coefficients = _int_to_real(np.atleast_2d(coefficients)) @@ -281,11 +283,11 @@ def shift( shifts: np.ndarray, *, restrict_domain: bool = False, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, eval_points: Optional[np.ndarray] = None, **kwargs: Any, ) -> T: - r"""Perform a shift of the curves. + """Perform a shift of the curves. Args: shifts: List with the the shift @@ -697,16 +699,21 @@ def __str__(self) -> str: f"\ncoefficients={self.coefficients})" ).replace('\n', '\n ') - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """Equality of FDataBasis.""" # TODO check all other params + + if not super().equals(other): + return False + + other = cast(grid.FDataGrid, other) + return ( - super().equals(other) - and self.basis == other.basis + self.basis == other.basis and np.array_equal(self.coefficients, other.coefficients) ) - def __eq__(self, other: Any) -> np.ndarray: + def __eq__(self, other: object) -> np.ndarray: # type: ignore[override] """Elementwise equality of FDataBasis.""" if not isinstance(other, type(self)) or self.dtype != other.dtype: if other is pandas.NA: @@ -775,7 +782,7 @@ def compose( self, fd: FData, *, - eval_points: np.ndarray = None, + eval_points: Optional[np.ndarray] = None, **kwargs: Any, ) -> FData: """ diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 975146280..2232f9fa6 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -19,6 +19,7 @@ Type, TypeVar, Union, + cast, ) import findiff @@ -46,6 +47,7 @@ ) from .basis import Basis from .evaluator import Evaluator +from .extrapolation import ExtrapolationLike from .interpolation import SplineInterpolation if TYPE_CHECKING: @@ -144,7 +146,7 @@ def __init__( # noqa: WPS211 coordinate_names: Optional[LabelTupleLike] = None, sample_names: Optional[LabelTupleLike] = None, axes_labels: Optional[LabelTupleLike] = None, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, interpolation: Optional[Evaluator] = None, ): """Construct a FDataGrid object.""" @@ -406,7 +408,7 @@ def _evaluate( ) def derivative(self: T, *, order: int = 1) -> T: - r"""Differentiate a FDataGrid object. + """Differentiate a FDataGrid object. It is calculated using central finite differences when possible. In the extremes, forward and backward finite differences with accuracy @@ -597,11 +599,13 @@ def gmean(self: T) -> T: sample_names=("geometric mean",), ) - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """Comparison of FDataGrid objects.""" if not super().equals(other): return False + other = cast(FDataGrid, other) + if not np.array_equal(self.data_matrix, other.data_matrix): return False @@ -616,12 +620,9 @@ def equals(self, other: Any) -> bool: ): return False - if self.interpolation != other.interpolation: - return False - - return True + return self.interpolation == other.interpolation - def __eq__(self, other: Any) -> np.ndarray: # type: ignore[override] + def __eq__(self, other: object) -> np.ndarray: # type: ignore[override] """Elementwise equality of FDataGrid.""" if not isinstance(other, type(self)) or self.dtype != other.dtype: if other is pandas.NA: @@ -649,7 +650,7 @@ def _get_op_matrix( self, other: Union[T, np.ndarray, float], ) -> Union[None, float, np.ndarray]: - if isinstance(other, numbers.Number): + if isinstance(other, numbers.Real): return float(other) elif isinstance(other, np.ndarray): @@ -891,8 +892,11 @@ def to_grid( # noqa: D102 ) grid_points = sample_points - if grid_points is None: - grid_points = self.grid_points + grid_points = ( + self.grid_points + if grid_points is None + else _to_grid_points(grid_points) + ) return self.copy( data_matrix=self.evaluate(grid_points, grid=True), @@ -911,7 +915,7 @@ def copy( # noqa: WPS211 argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None, sample_names: Optional[LabelTupleLike] = None, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, interpolation: Optional[Evaluator] = None, ) -> T: """ From e49b27e58a0059d6f427db267d7e45c08480b43b Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 27 Mar 2021 12:33:19 +0100 Subject: [PATCH 168/417] Small fix. --- skfda/representation/grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 2232f9fa6..f68e9bb7a 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -982,7 +982,7 @@ def shift( shifts: Union[np.ndarray, float], *, restrict_domain: bool = False, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, eval_points: Optional[np.ndarray] = None, ) -> T: """Perform a shift of the curves. From 4b221fc8cfcfee8e860d17c0bb92d04db944dc87 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 27 Mar 2021 21:05:42 +0100 Subject: [PATCH 169/417] Typing fixes. --- skfda/_utils/_utils.py | 54 +++++------ skfda/preprocessing/registration/__init__.py | 16 ++-- skfda/preprocessing/registration/_warping.py | 95 +++++++++++--------- skfda/preprocessing/registration/elastic.py | 10 +-- skfda/representation/_functional_data.py | 52 +++++------ skfda/representation/basis/_fdatabasis.py | 2 +- skfda/representation/evaluator.py | 29 +++--- skfda/representation/extrapolation.py | 31 ++++--- skfda/representation/grid.py | 2 +- skfda/representation/interpolation.py | 12 ++- 10 files changed, 165 insertions(+), 138 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index dbec0e6c3..3f95847d9 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -105,7 +105,6 @@ def _to_grid( eval_points: Optional[np.ndarray] = None, ) -> Tuple[FDataGrid, FDataGrid]: """Transform a pair of FDatas in grids to perform calculations.""" - from .. import FDataGrid x_is_grid = isinstance(X, FDataGrid) y_is_grid = isinstance(y, FDataGrid) @@ -139,14 +138,10 @@ def _to_grid_points(grid_points_like: GridPointsLike) -> GridPoints: """ unidimensional = False - try: - iter(grid_points_like) - except TypeError: + if not isinstance(grid_points_like, Iterable): grid_points_like = [grid_points_like] - try: - iter(grid_points_like[0]) - except TypeError: + if not isinstance(grid_points_like[0], Iterable): unidimensional = True if unidimensional: @@ -174,7 +169,7 @@ def _to_domain_range(sequence: DomainRangeLike) -> DomainRange: def _to_array_maybe_ragged( - array: Sequence[ArrayLike], + array: Iterable[ArrayLike], *, row_shape: Optional[Sequence[int]] = None, ) -> np.ndarray: @@ -327,14 +322,13 @@ def _reshape_eval_points( def _reshape_eval_points( - eval_points: Union[np.ndarray, Sequence[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool, n_samples: int, dim_domain: int, ) -> np.ndarray: - """Convert and reshape the eval_points to ndarray with the - corresponding shape. + """Convert and reshape the eval_points to ndarray. Args: eval_points: Evaluation points to be reshaped. @@ -351,37 +345,45 @@ def _reshape_eval_points( x `dim_domain`. """ - if aligned: eval_points = np.asarray(eval_points) else: + eval_points = cast(Iterable[ArrayLike], eval_points) + eval_points = _to_array_maybe_ragged( - eval_points, row_shape=(-1, dim_domain)) + eval_points, + row_shape=(-1, dim_domain), + ) # Case evaluation of a single value, i.e., f(0) # Only allowed for aligned evaluation - if aligned and (eval_points.shape == (dim_domain,) - or (eval_points.ndim == 0 and dim_domain == 1)): + if aligned and ( + eval_points.shape == (dim_domain,) + or (eval_points.ndim == 0 and dim_domain == 1) + ): eval_points = np.array([eval_points]) if aligned: # Samples evaluated at same eval points - eval_points = eval_points.reshape((eval_points.shape[0], - dim_domain)) + eval_points = eval_points.reshape( + (eval_points.shape[0], dim_domain), + ) else: # Different eval_points for each sample if eval_points.shape[0] != n_samples: - raise ValueError(f"eval_points should be a list " - f"of length {n_samples} with the " - f"evaluation points for each sample.") + raise ValueError( + f"eval_points should be a list " + f"of length {n_samples} with the " + f"evaluation points for each sample.", + ) return eval_points def _one_grid_to_points( - axes: Sequence[np.ndarray], + axes: GridPointsLike, *, dim_domain: int, ) -> Tuple[np.ndarray, Tuple[int, ...]]: @@ -421,7 +423,7 @@ def __call__( @overload def _evaluate_grid( - axes: Sequence[np.ndarray], + axes: GridPointsLike, *, evaluate_method: EvaluateMethod, n_samples: int, @@ -435,7 +437,7 @@ def _evaluate_grid( @overload def _evaluate_grid( - axes: Iterable[Sequence[np.ndarray]], + axes: Iterable[GridPointsLike], *, evaluate_method: EvaluateMethod, n_samples: int, @@ -448,7 +450,7 @@ def _evaluate_grid( def _evaluate_grid( # noqa: WPS234 - axes: Union[Sequence[np.ndarray], Iterable[Sequence[np.ndarray]]], + axes: Union[GridPointsLike, Iterable[GridPointsLike]], *, evaluate_method: EvaluateMethod, n_samples: int, @@ -506,13 +508,13 @@ def _evaluate_grid( # noqa: WPS234 # Compute intersection points and resulting shapes if aligned: - axes = cast(Sequence[np.ndarray], axes) + axes = cast(GridPointsLike, axes) eval_points, shape = _one_grid_to_points(axes, dim_domain=dim_domain) else: - axes_per_sample = cast(Iterable[Sequence[np.ndarray]], axes) + axes_per_sample = cast(Iterable[GridPointsLike], axes) axes_per_sample = list(axes_per_sample) diff --git a/skfda/preprocessing/registration/__init__.py b/skfda/preprocessing/registration/__init__.py index ce4a52cae..1894f4761 100644 --- a/skfda/preprocessing/registration/__init__.py +++ b/skfda/preprocessing/registration/__init__.py @@ -4,15 +4,13 @@ functional data, in basis as well in discretized form. """ -from ._landmark_registration import (landmark_shift_deltas, - landmark_shift, - landmark_registration_warping, - landmark_registration) - +from . import elastic, validation +from ._landmark_registration import ( + landmark_registration, + landmark_registration_warping, + landmark_shift, + landmark_shift_deltas, +) from ._shift_registration import ShiftRegistration - from ._warping import invert_warping, normalize_warping - from .elastic import ElasticRegistration - -from . import validation, elastic diff --git a/skfda/preprocessing/registration/_warping.py b/skfda/preprocessing/registration/_warping.py index 35fc7ba86..00f8b3d75 100644 --- a/skfda/preprocessing/registration/_warping.py +++ b/skfda/preprocessing/registration/_warping.py @@ -2,21 +2,23 @@ This module contains routines related to the registration procedure. """ -import collections -import scipy.integrate -from scipy.interpolate import PchipInterpolator +from typing import Optional import numpy as np -from ..._utils import check_is_univariate - +from scipy.interpolate import PchipInterpolator -__author__ = "Pablo Marcos Manchón" -__email__ = "pablo.marcosm@estudiante.uam.es" +from ..._utils import _to_domain_range, check_is_univariate +from ...representation import FDataGrid +from ...representation._typing import ArrayLike, DomainRangeLike -def invert_warping(fdatagrid, *, output_points=None): +def invert_warping( + warping: FDataGrid, + *, + output_points: Optional[ArrayLike] = None, +) -> FDataGrid: r"""Compute the inverse of a diffeomorphism. Let :math:`\gamma : [a,b] \rightarrow [a,b]` be a function strictly @@ -27,20 +29,19 @@ def invert_warping(fdatagrid, *, output_points=None): Uses a PCHIP interpolator to compute approximately the inverse. Args: - fdatagrid (:class:`FDataGrid`): Functions to be inverted. - eval_points: (array_like, optional): Set of points where the + warping: Functions to be inverted. + output_points: Set of points where the functions are interpolated to obtain the inverse, by default uses the sample points of the fdatagrid. Returns: - :class:`FDataGrid`: Inverse of the original functions. + Inverse of the original functions. Raises: ValueError: If the functions are not strictly increasing or are multidimensional. Examples: - >>> import numpy as np >>> from skfda import FDataGrid >>> from skfda.preprocessing.registration import invert_warping @@ -71,34 +72,36 @@ def invert_warping(fdatagrid, *, output_points=None): [ 1. ]]]) """ + check_is_univariate(warping) - check_is_univariate(fdatagrid) - - if output_points is None: - output_points = fdatagrid.grid_points[0] + output_points = ( + warping.grid_points[0] + if output_points is None + else np.asarray(output_points) + ) - y = fdatagrid(output_points)[..., 0] + y = warping(output_points)[..., 0] - data_matrix = np.empty((fdatagrid.n_samples, len(output_points))) + data_matrix = np.empty((warping.n_samples, len(output_points))) - for i in range(fdatagrid.n_samples): + for i in range(warping.n_samples): data_matrix[i] = PchipInterpolator(y[i], output_points)(output_points) - return fdatagrid.copy(data_matrix=data_matrix, grid_points=output_points) + return warping.copy(data_matrix=data_matrix, grid_points=output_points) -def _normalize_scale(t, a=0, b=1): +def _normalize_scale(t: np.ndarray, a: float = 0, b: float = 1) -> np.ndarray: """Perfoms an afine translation to normalize an interval. Args: - t (numpy.ndarray): Array of dim 1 or 2 with at least 2 values. - a (float): Starting point of the new interval. Defaults 0. - b (float): Stopping point of the new interval. Defaults 1. + t: Array of dim 1 or 2 with at least 2 values. + a: Starting point of the new interval. Defaults 0. + b: Stopping point of the new interval. Defaults 1. Returns: - (numpy.ndarray): Array with the transformed interval. - """ + Array with the transformed interval. + """ t = t.T # Broadcast to normalize multiple arrays t1 = (t - t[0]).astype(float) # Translation to [0, t[-1] - t[0]] t1 *= (b - a) / (t[-1] - t[0]) # Scale to [0, b-a] @@ -109,7 +112,10 @@ def _normalize_scale(t, a=0, b=1): return t1.T -def normalize_warping(warping, domain_range=None): +def normalize_warping( + warping: FDataGrid, + domain_range: Optional[DomainRangeLike] = None, +) -> FDataGrid: r"""Rescale a warping to normalize their :term:`domain`. Given a set of warpings :math:`\gamma_i:[a,b]\rightarrow [a,b]` it is @@ -118,19 +124,28 @@ def normalize_warping(warping, domain_range=None): [\tilde a, \tilde b]`. Args: - warping (:class:`FDatagrid`): Set of warpings to rescale. - domain_range (tuple, optional): New domain range of the warping. By + warping: Set of warpings to rescale. + domain_range: New domain range of the warping. By default it is used the same domain range. - Return: - (:class:`FDataGrid`): FDataGrid with the warpings normalized. - - """ - if domain_range is None: - domain_range = warping.domain_range[0] - - data_matrix = _normalize_scale(warping.data_matrix[..., 0], *domain_range) - grid_points = _normalize_scale(warping.grid_points[0], *domain_range) + Returns: + Normalized warpings. - return warping.copy(data_matrix=data_matrix, grid_points=grid_points, - domain_range=domain_range) + """ + domain_range_tuple = ( + warping.domain_range[0] + if domain_range is None + else _to_domain_range(domain_range)[0] + ) + + data_matrix = _normalize_scale( + warping.data_matrix[..., 0], + *domain_range_tuple, + ) + grid_points = _normalize_scale(warping.grid_points[0], *domain_range_tuple) + + return warping.copy( + data_matrix=data_matrix, + grid_points=grid_points, + domain_range=domain_range, + ) diff --git a/skfda/preprocessing/registration/elastic.py b/skfda/preprocessing/registration/elastic.py index 03e0de7a2..e10527ac2 100644 --- a/skfda/preprocessing/registration/elastic.py +++ b/skfda/preprocessing/registration/elastic.py @@ -1,19 +1,17 @@ -from fdasrsf.utility_functions import optimum_reparam -import scipy.integrate +import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted -import numpy as np +import scipy.integrate +from fdasrsf.utility_functions import optimum_reparam -from . import invert_warping from ... import FDataGrid from ..._utils import check_is_univariate from ...representation.interpolation import SplineInterpolation -from ._warping import _normalize_scale +from ._warping import _normalize_scale, invert_warping from .base import RegistrationTransformer - __author__ = "Pablo Marcos Manchón" __email__ = "pablo.marcosm@estudiante.uam.es" diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index f9972b0d2..4fea3a0e2 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -27,7 +27,13 @@ from typing_extensions import Literal from .._utils import _evaluate_grid, _reshape_eval_points -from ._typing import DomainRange, LabelTuple, LabelTupleLike +from ._typing import ( + ArrayLike, + DomainRange, + GridPointsLike, + LabelTuple, + LabelTupleLike, +) from .evaluator import Evaluator from .extrapolation import ExtrapolationLike, _parse_extrapolation @@ -38,10 +44,10 @@ T = TypeVar('T', bound='FData') EvalPointsType = Union[ - np.ndarray, - Iterable[np.ndarray], - Sequence[np.ndarray], - Iterable[Sequence[np.ndarray]], + ArrayLike, + Iterable[ArrayLike], + GridPointsLike, + Iterable[GridPointsLike], ] @@ -341,7 +347,7 @@ def _join_evaluation( @abstractmethod def _evaluate( self, - eval_points: Union[np.ndarray, Sequence[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: @@ -371,7 +377,7 @@ def _evaluate( @overload def evaluate( self, - eval_points: np.ndarray, + eval_points: ArrayLike, *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -383,7 +389,7 @@ def evaluate( @overload def evaluate( self, - eval_points: Iterable[np.ndarray], + eval_points: Iterable[ArrayLike], *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -395,7 +401,7 @@ def evaluate( @overload def evaluate( self, - eval_points: Sequence[np.ndarray], + eval_points: GridPointsLike, *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -407,7 +413,7 @@ def evaluate( @overload def evaluate( self, - eval_points: Iterable[Sequence[np.ndarray]], + eval_points: Iterable[GridPointsLike], *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -459,7 +465,7 @@ def evaluate( "derivative function instead.", DeprecationWarning, ) - return self.derivative(order=derivative)( + return self.derivative(order=derivative)( # type: ignore eval_points, extrapolation=extrapolation, grid=grid, @@ -478,6 +484,8 @@ def evaluate( aligned=aligned, ) + eval_points = cast(Union[ArrayLike, Iterable[ArrayLike]], eval_points) + if extrapolation is None: extrapolation = self.extrapolation else: @@ -485,7 +493,7 @@ def evaluate( extrapolation = _parse_extrapolation(extrapolation) eval_points = cast( - Union[np.ndarray, Sequence[np.ndarray]], + Union[ArrayLike, Sequence[ArrayLike]], eval_points, ) @@ -547,7 +555,7 @@ def evaluate( @overload def __call__( self, - eval_points: np.ndarray, + eval_points: ArrayLike, *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -559,7 +567,7 @@ def __call__( @overload def __call__( self, - eval_points: Iterable[np.ndarray], + eval_points: Iterable[ArrayLike], *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -571,7 +579,7 @@ def __call__( @overload def __call__( self, - eval_points: Sequence[np.ndarray], + eval_points: GridPointsLike, *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -583,7 +591,7 @@ def __call__( @overload def __call__( self, - eval_points: Iterable[Sequence[np.ndarray]], + eval_points: Iterable[GridPointsLike], *, derivative: int = 0, extrapolation: Optional[ExtrapolationLike] = None, @@ -592,18 +600,6 @@ def __call__( ) -> np.ndarray: pass - @overload - def __call__( - self, - eval_points: EvalPointsType, - *, - derivative: int = 0, - extrapolation: Optional[ExtrapolationLike] = None, - grid: bool = False, - aligned: bool = True, - ) -> np.ndarray: - pass - def __call__( self, eval_points: EvalPointsType, diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index acf2c42e2..768624382 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -253,7 +253,7 @@ def domain_range(self) -> DomainRange: def _evaluate( self, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: diff --git a/skfda/representation/evaluator.py b/skfda/representation/evaluator.py index 26a571cf3..eb1fe9d94 100644 --- a/skfda/representation/evaluator.py +++ b/skfda/representation/evaluator.py @@ -13,6 +13,8 @@ import numpy as np from typing_extensions import Literal, Protocol +from ._typing import ArrayLike + if TYPE_CHECKING: from . import FData @@ -35,12 +37,12 @@ class Evaluator(ABC): def _evaluate( self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: """ - Evaluation method. + Evaluate the samples at evaluation points. Must be overriden in subclasses. @@ -51,7 +53,7 @@ def _evaluate( def __call__( self, fdata: FData, - eval_points: np.ndarray, + eval_points: ArrayLike, *, aligned: Literal[True] = True, ) -> np.ndarray: @@ -61,7 +63,7 @@ def __call__( def __call__( self, fdata: FData, - eval_points: Iterable[np.ndarray], + eval_points: Iterable[ArrayLike], *, aligned: Literal[False], ) -> np.ndarray: @@ -70,7 +72,7 @@ def __call__( def __call__( self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: @@ -118,7 +120,7 @@ class EvaluateFunction(Protocol): def __call__( self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: @@ -131,7 +133,7 @@ def __call__( Args: fdata: Object to evaluate. - eval_points (numpy.ndarray): Numpy array with shape + eval_points: Numpy array with shape ``(number_eval_points, dim_domain)`` with the evaluation points. aligned: Whether the input points are @@ -139,11 +141,11 @@ def __call__( passed. Returns: - (numpy.darray): Numpy 3d array with shape - ``(n_samples, number_eval_points, dim_codomain)`` with the - result of the evaluation. The entry ``(i,j,k)`` will contain - the value k-th image dimension of the i-th sample, at the - j-th evaluation point. + Numpy 3d array with shape + ``(n_samples, number_eval_points, dim_codomain)`` with the + result of the evaluation. The entry ``(i,j,k)`` will contain + the value k-th image dimension of the i-th sample, at the + j-th evaluation point. """ pass @@ -164,8 +166,9 @@ def __init__(self, evaluate_function: EvaluateFunction) -> None: def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: + return self.evaluate_function(fdata, eval_points, aligned=aligned) diff --git a/skfda/representation/extrapolation.py b/skfda/representation/extrapolation.py index 31718530b..6217014a1 100644 --- a/skfda/representation/extrapolation.py +++ b/skfda/representation/extrapolation.py @@ -12,12 +12,14 @@ NoReturn, Optional, Union, + cast, overload, ) import numpy as np from typing_extensions import Literal +from ._typing import ArrayLike from .evaluator import Evaluator if TYPE_CHECKING: @@ -64,7 +66,7 @@ class PeriodicExtrapolation(Evaluator): def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: @@ -76,7 +78,7 @@ def _evaluate( # noqa: D102 eval_points %= domain_range[:, 1] - domain_range[:, 0] eval_points += domain_range[:, 0] - return fdata(eval_points, aligned=aligned) + return fdata(eval_points, aligned=aligned) # type: ignore class BoundaryExtrapolation(Evaluator): @@ -114,7 +116,7 @@ class BoundaryExtrapolation(Evaluator): def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: @@ -122,20 +124,25 @@ def _evaluate( # noqa: D102 domain_range = fdata.domain_range if aligned: - assert isinstance(eval_points, np.ndarray) + eval_points = np.asarray(eval_points) for i in range(fdata.dim_domain): a, b = domain_range[i] eval_points[eval_points[..., i] < a, i] = a eval_points[eval_points[..., i] > b, i] = b else: + eval_points = cast(Iterable[ArrayLike], eval_points) + for points_per_sample in eval_points: + + points_per_sample = np.asarray(points_per_sample) + for i in range(fdata.dim_domain): a, b = domain_range[i] points_per_sample[points_per_sample[..., i] < a, i] = a points_per_sample[points_per_sample[..., i] > b, i] = b - return fdata(eval_points, aligned=aligned) + return fdata(eval_points, aligned=aligned) # type: ignore class ExceptionExtrapolation(Evaluator): @@ -170,7 +177,7 @@ class ExceptionExtrapolation(Evaluator): def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> NoReturn: @@ -216,7 +223,9 @@ class FillExtrapolation(Evaluator): def __init__(self, fill_value: float) -> None: self.fill_value = fill_value - def _fill(self, fdata: FData, eval_points: np.ndarray) -> np.ndarray: + def _fill(self, fdata: FData, eval_points: ArrayLike) -> np.ndarray: + eval_points = np.asarray(eval_points) + shape = ( fdata.n_samples, eval_points.shape[-2], @@ -227,17 +236,19 @@ def _fill(self, fdata: FData, eval_points: np.ndarray) -> np.ndarray: def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: from .._utils import _to_array_maybe_ragged if aligned: - assert isinstance(eval_points, np.ndarray) + eval_points = cast(ArrayLike, eval_points) return self._fill(fdata, eval_points) - res_list = [self._fill(p) for p in eval_points] # type: ignore + eval_points = cast(Iterable[ArrayLike], eval_points) + + res_list = [self._fill(fdata, p) for p in eval_points] return _to_array_maybe_ragged(res_list) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index f68e9bb7a..41421af95 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -396,7 +396,7 @@ def interpolation(self, new_interpolation: Optional[Evaluator]) -> None: def _evaluate( self, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: diff --git a/skfda/representation/interpolation.py b/skfda/representation/interpolation.py index 9005d1a02..5ba146f8d 100644 --- a/skfda/representation/interpolation.py +++ b/skfda/representation/interpolation.py @@ -12,6 +12,7 @@ Sequence, Tuple, Union, + cast, ) import numpy as np @@ -24,6 +25,7 @@ ) from .._utils import _to_array_maybe_ragged +from ._typing import ArrayLike from .evaluator import Evaluator if TYPE_CHECKING: @@ -77,7 +79,7 @@ def _evaluate_codomain( def evaluate( self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: @@ -86,7 +88,7 @@ def evaluate( if aligned: - assert isinstance(eval_points, np.ndarray) + eval_points = np.asarray(eval_points) # Points evaluated inside the domain res = np.apply_along_axis( @@ -103,8 +105,10 @@ def evaluate( ) else: + eval_points = cast(Iterable[ArrayLike], eval_points) + res = _to_array_maybe_ragged([ - self._evaluate_codomain(s, e) + self._evaluate_codomain(s, np.asarray(e)) for s, e in zip(self.splines, eval_points) ]) @@ -503,7 +507,7 @@ def _build_interpolator( def _evaluate( # noqa: D102 self, fdata: FData, - eval_points: Union[np.ndarray, Iterable[np.ndarray]], + eval_points: Union[ArrayLike, Iterable[ArrayLike]], *, aligned: bool = True, ) -> np.ndarray: From 7baef21fa01be0e1870aab6dbf18dc63687a4178 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 29 Mar 2021 12:53:58 +0200 Subject: [PATCH 170/417] Fix shift. --- .../registration/_landmark_registration.py | 2 +- .../registration/_shift_registration.py | 113 +++++---- skfda/representation/_functional_data.py | 43 ++-- skfda/representation/basis/_fdatabasis.py | 59 +++-- skfda/representation/grid.py | 224 +++++++++++------- tests/test_registration.py | 2 +- 6 files changed, 266 insertions(+), 177 deletions(-) diff --git a/skfda/preprocessing/registration/_landmark_registration.py b/skfda/preprocessing/registration/_landmark_registration.py index 8f54466a3..32b269397 100644 --- a/skfda/preprocessing/registration/_landmark_registration.py +++ b/skfda/preprocessing/registration/_landmark_registration.py @@ -153,7 +153,7 @@ def landmark_shift(fd, landmarks, location=None, *, restrict_domain=False, return fd.shift(shifts, restrict_domain=restrict_domain, extrapolation=extrapolation, - eval_points=eval_points, **kwargs) + grid_points=eval_points, **kwargs) def landmark_registration_warping(fd, landmarks, *, location=None, diff --git a/skfda/preprocessing/registration/_shift_registration.py b/skfda/preprocessing/registration/_shift_registration.py index 19aae9f6c..c6300f624 100644 --- a/skfda/preprocessing/registration/_shift_registration.py +++ b/skfda/preprocessing/registration/_shift_registration.py @@ -1,16 +1,24 @@ """Class to apply Shift Registration to functional data""" +from __future__ import annotations -# Pablo Marcos Manchón -# pablo.marcosm@protonmail.com +from typing import Callable, Optional, Tuple, TypeVar, Union import numpy as np -from scipy.integrate import simps from sklearn.utils.validation import check_is_fitted +from typing_extensions import Literal + +from scipy.integrate import simps from ... import FData, FDataGrid from ..._utils import check_is_univariate, constants +from ...misc.metrics._lp_norms import l2_norm +from ...representation._typing import ArrayLike +from ...representation.extrapolation import ExtrapolationLike from .base import RegistrationTransformer +T = TypeVar("T", bound=FData) +TemplateFunction = Callable[[FData], FData] + class ShiftRegistration(RegistrationTransformer): r"""Register a functional dataset using shift alignment. @@ -82,7 +90,6 @@ class ShiftRegistration(RegistrationTransformer): the method. Examples: - >>> from skfda.preprocessing.registration import ShiftRegistration >>> from skfda.datasets import make_sinusoidal_process >>> from skfda.representation.basis import Fourier @@ -122,9 +129,17 @@ class ShiftRegistration(RegistrationTransformer): Data Analysis* (pp. 142-144). Springer. """ - def __init__(self, max_iter=5, tol=1e-2, template="mean", - extrapolation=None, step_size=1, restrict_domain=False, - initial="zeros", output_points=None): + def __init__( + self, + max_iter: int = 5, + tol: float = 1e-2, + template: Union[Literal["mean"], FData, TemplateFunction] = "mean", + extrapolation: Optional[ExtrapolationLike] = None, + step_size: float = 1, + restrict_domain: bool = False, + initial: Union[Literal["zeros"], ArrayLike] = "zeros", + output_points: Optional[ArrayLike] = None, + ) -> None: self.max_iter = max_iter self.tol = tol self.template = template @@ -134,50 +149,50 @@ def __init__(self, max_iter=5, tol=1e-2, template="mean", self.initial = initial self.output_points = output_points - def _compute_deltas(self, fd, template): - r"""Compute the shifts to perform the registration. + def _compute_deltas( + self, + fd: FData, + template: Union[Literal["mean"], FData, TemplateFunction], + ) -> Tuple[np.ndarray, FDataGrid]: + """Compute the shifts to perform the registration. Args: - fd (FData: Functional object to be registered. - template (str, FData or callable): Template to align the + fd: Functional object to be registered. + template: Template to align the the samples. "mean" to compute the mean iteratively as in the original paper, an FData with the templated calculated or a callable wich constructs the template. Returns: - tuple: A tuple with an array of deltas and an FDataGrid with the - template. + A tuple with an array of deltas and an FDataGrid with the template. """ check_is_univariate(fd) + if not isinstance(fd, FDataGrid): + fd = fd.to_grid() + domain_range = fd.domain_range[0] # Initial estimation of the shifts if self.initial == "zeros": delta = np.zeros(fd.n_samples) - - elif len(self.initial) != fd.n_samples: - raise ValueError(f"the initial shift ({len(self.initial)}) must " - f"have the same length than the number of samples" - f" ({fd.n_samples})") else: delta = np.asarray(self.initial) + if len(delta) != fd.n_samples: + raise ValueError( + f"The length of the initial shift ({len(delta)}) must " + f"be the same than the number of samples ({fd.n_samples})", + ) + # Fine equispaced mesh to evaluate the samples if self.output_points is None: - - try: - output_points = fd.grid_points[0] - nfine = len(output_points) - except AttributeError: - nfine = max(fd.n_basis * constants.BASIS_MIN_FACTOR + 1, - constants.N_POINTS_COARSE_MESH) - output_points = np.linspace(*domain_range, nfine) - + output_points = fd.grid_points[0] + nfine = len(output_points) else: - nfine = len(self.output_points) output_points = np.asarray(self.output_points) + nfine = len(output_points) # Auxiliar array to avoid multiple memory allocations delta_aux = np.empty(fd.n_samples) @@ -188,20 +203,21 @@ def _compute_deltas(self, fd, template): # Second term of the second derivate estimation of REGSSE. The # first term has been dropped to improve convergence (see references) - d2_regsse = simps(np.square(D1x), output_points, axis=1) + d2_regsse = l2_norm(fd_deriv)**2 max_diff = self.tol + 1 self.n_iter_ = 0 + template_fixed = False + # Case template fixed if isinstance(template, FData): - original_template = template tfine_aux = template.evaluate(output_points)[0, ..., 0] if self.restrict_domain: template_points_aux = tfine_aux - template = "fixed" + template_fixed = True else: tfine_aux = np.empty(nfine) @@ -238,15 +254,17 @@ def _compute_deltas(self, fd, template): output_points_rep = np.outer(ones, output_points) # Computes the new values shifted - x = fd(output_points_rep + np.atleast_2d(delta).T, - aligned=False, - extrapolation=self.extrapolation)[..., 0] + x = fd( + output_points_rep + np.atleast_2d(delta).T, + aligned=False, + extrapolation=self.extrapolation, + )[..., 0] if template == "mean": x.mean(axis=0, out=tfine_aux) - elif template == "fixed" and self.restrict_domain: + elif template_fixed and self.restrict_domain: tfine_aux = template_points_aux[domain] - elif callable(template): # Callable + elif not template_fixed and callable(template): # Callable fd_x = FDataGrid(x, grid_points=output_points) fd_tfine = template(fd_x) tfine_aux = fd_tfine.data_matrix.ravel() @@ -266,33 +284,28 @@ def _compute_deltas(self, fd, template): max_diff = np.abs(delta_aux, out=delta_aux).max() self.n_iter_ += 1 - if template == "fixed": - - # Stores the original template instead of building it again - template = original_template - else: - - # Stores the template in an FDataGrid + if template_fixed is False: + # Stores the template in an FDataGrid template = FDataGrid(tfine_aux, grid_points=output_points) return delta, template - def fit_transform(self, X: FData, y=None): + def fit_transform(self, X: T, y: None = None) -> T: """Fit the estimator and transform the data. Args: - X (FData): Functional dataset to be transformed. - y (ignored): not used, present for API consistency by convention. + X: Functional dataset to be transformed. + y: not used, present for API consistency by convention. Returns: - FData: Functional data registered. + Functional data registered. """ self.deltas_, self.template_ = self._compute_deltas(X, self.template) return X.shift(self.deltas_, restrict_domain=self.restrict_domain, extrapolation=self.extrapolation, - eval_points=self.output_points) + grid_points=self.output_points) def fit(self, X: FData, y=None): """Fit the estimator. @@ -359,7 +372,7 @@ def transform(self, X: FData, y=None): return X.shift(deltas, restrict_domain=self.restrict_domain, extrapolation=self.extrapolation, - eval_points=self.output_points) + grid_points=self.output_points) def inverse_transform(self, X: FData, y=None): """Applies the inverse transformation. @@ -403,4 +416,4 @@ def inverse_transform(self, X: FData, y=None): return X.shift(-self.deltas_, restrict_domain=self.restrict_domain, extrapolation=self.extrapolation, - eval_points=self.output_points) + grid_points=self.output_points) diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 4fea3a0e2..232117805 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -667,30 +667,42 @@ def shift( *, restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, - eval_points: Optional[np.ndarray] = None, + grid_points: Optional[GridPointsLike] = None, ) -> T: - """Perform a shift of the curves. + r""" + Perform a shift of the curves. + + The i-th shifted function :math:`y_i` has the form + + .. math:: + y_i(t) = x_i(t + \delta_i) + + where :math:`x_i` is the i-th original function and :math:`delta_i` is + the shift performed for that function, that must be a vector in the + domain space. + + Note that a positive shift moves the graph of the function in the + negative direction and vice versa. Args: - shifts: List with the shift corresponding - for each sample or numeric with the shift to apply to all - samples. - restrict_domain: If True restricts the domain to - avoid evaluate points outside the domain using extrapolation. + shifts: List with the shifts + corresponding for each sample or numeric with the shift to + apply to all samples. + restrict_domain: If True restricts the domain to avoid the + evaluation of points outside the domain using extrapolation. Defaults uses extrapolation. extrapolation: Controls the extrapolation mode for elements outside the domain range. By default uses the method defined in fd. See extrapolation to more information. - eval_points: Set of points where + grid_points: Grid of points where the functions are evaluated to obtain the discrete - representation of the object to operate. If an empty list is - passed it calls np.linspace with bounds equal to the ones - defined in fd.domain_range and the number of points the maximum - between 201 and 10 times the number of basis plus 1. + representation of the object to operate. If ``None`` the + current grid_points are used to unificate the domain of the + shifted data. Returns: - :class:`FData` with the shifted functional data. + Shifted functions. """ pass @@ -795,7 +807,10 @@ def mean( ) @abstractmethod - def to_grid(self, grid_points: Optional[np.ndarray] = None) -> FDataGrid: + def to_grid( + self, + grid_points: Optional[GridPointsLike] = None, + ) -> FDataGrid: """Return the discrete representation of the object. Args: diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 768624382..677ee80bd 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -284,32 +284,43 @@ def shift( *, restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, - eval_points: Optional[np.ndarray] = None, + grid_points: Optional[GridPointsLike] = None, **kwargs: Any, ) -> T: - """Perform a shift of the curves. + """ + Perform a shift of the curves. + + The i-th shifted function :math:`y_i` has the form + + .. math:: + y_i(t) = x_i(t + \delta_i) + + where :math:`x_i` is the i-th original function and :math:`delta_i` is + the shift performed for that function, that must be a vector in the + domain space. + + Note that a positive shift moves the graph of the function in the + negative direction and vice versa. Args: - shifts: List with the the shift + shifts: List with the shifts corresponding for each sample or numeric with the shift to apply to all samples. - restrict_domain: If True restricts the domain to - avoid evaluate points outside the domain using extrapolation. + restrict_domain: If True restricts the domain to avoid the + evaluation of points outside the domain using extrapolation. Defaults uses extrapolation. extrapolation: Controls the extrapolation mode for elements outside the domain range. By default uses the method defined in fd. See extrapolation to more information. - eval_points: Set of points where + grid_points: Grid of points where the functions are evaluated to obtain the discrete - representation of the object to operate. If an empty list is - passed it calls numpy.linspace with bounds equal to the ones - defined in fd.domain_range and the number of points the maximum - between 201 and 10 times the number of basis plus 1. - kwargs: Keyword arguments to be passed to :meth:`from_data`. + representation of the object to operate. If ``None`` the + current grid_points are used to unificate the domain of the + shifted data. Returns: - :obj:`FDataBasis` with the shifted data. + Shifted functions. """ if self.dim_codomain > 1 or self.dim_domain > 1: @@ -317,11 +328,11 @@ def shift( domain_range = self.domain_range[0] - if eval_points is None: # Grid to discretize the function + if grid_points is None: # Grid to discretize the function nfine = max(self.n_basis * 10 + 1, constants.N_POINTS_COARSE_MESH) - eval_points = np.linspace(*domain_range, nfine) + grid_points = np.linspace(*domain_range, nfine) else: - eval_points = np.asarray(eval_points) + grid_points = np.asarray(grid_points) if np.isscalar(shifts): # Special case, all curves with same shift @@ -331,8 +342,8 @@ def shift( )) return FDataBasis.from_data( - self.evaluate(eval_points), - grid_points=eval_points + shifts, + self.evaluate(grid_points), + grid_points=grid_points + shifts, basis=basis, **kwargs, ) @@ -348,10 +359,10 @@ def shift( a = domain_range[0] - min(np.min(shifts), 0) b = domain_range[1] - max(np.max(shifts), 0) domain = (a, b) - eval_points = eval_points[ + grid_points = grid_points[ np.logical_and( - eval_points >= a, - eval_points <= b, + grid_points >= a, + grid_points <= b, ) ] else: @@ -359,7 +370,7 @@ def shift( points_shifted = np.outer( np.ones(self.n_samples), - eval_points, + grid_points, ) points_shifted += np.atleast_2d(shifts).T @@ -375,7 +386,7 @@ def shift( return FDataBasis.from_data( data_matrix, - grid_points=eval_points, + grid_points=grid_points, basis=basis, **kwargs, ) @@ -514,7 +525,7 @@ def to_grid( self, grid_points: Optional[GridPointsLike] = None, *, - sample_points: np.ndarray = None, + sample_points: Optional[GridPointsLike] = None, ) -> FDataGrid: """Return the discrete representation of the object. @@ -610,7 +621,7 @@ def copy( argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None, sample_names: Optional[LabelTupleLike] = None, - extrapolation: Optional[Union[str, Evaluator]] = None, + extrapolation: Optional[ExtrapolationLike] = None, ) -> T: """Copy the FDataBasis.""" if basis is None: diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 41421af95..f176eb366 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -26,6 +26,7 @@ import numpy as np import pandas.api.extensions from matplotlib.figure import Figure +from typing_extensions import Literal import scipy.stats.mstats @@ -196,15 +197,6 @@ def __init__( # noqa: WPS211 if len(self._domain_range) != self.dim_domain: raise ValueError("Incorrect shape of domain_range.") - for i in range(self.dim_domain): - if ( - self._domain_range[i][0] > self.grid_points[i][0] - or self._domain_range[i][-1] < self.grid_points[i][-1] - ): - raise ValueError( - "Sample points must be within the domain range.", - ) - # Adjust the data matrix if the dimension of the image is one if self.data_matrix.ndim == 1 + self.dim_domain: self.data_matrix = self.data_matrix[..., np.newaxis] @@ -977,127 +969,185 @@ def copy( # noqa: WPS211 interpolation=interpolation, ) + def restrict( + self: T, + domain_range: DomainRangeLike, + ) -> T: + """ + Restrict the functions to a new domain range. + + Args: + domain_range: New domain range. + + Returns: + Restricted function. + + """ + domain_range = _to_domain_range(domain_range) + assert all( + c <= a < b <= d # noqa: WPS228 + for ((a, b), (c, d)) in zip(domain_range, self.domain_range) + ) + + # We could in principle eliminate points outside the new range. + + return self.copy(domain_range=domain_range) + def shift( self: T, - shifts: Union[np.ndarray, float], + shifts: Union[ArrayLike, float], *, restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, - eval_points: Optional[np.ndarray] = None, + grid_points: Optional[GridPointsLike] = None, ) -> T: - """Perform a shift of the curves. + r""" + Perform a shift of the curves. + + The i-th shifted function :math:`y_i` has the form + + .. math:: + y_i(t) = x_i(t + \delta_i) + + where :math:`x_i` is the i-th original function and :math:`delta_i` is + the shift performed for that function, that must be a vector in the + domain space. + + Note that a positive shift moves the graph of the function in the + negative direction and vice versa. Args: shifts: List with the shifts corresponding for each sample or numeric with the shift to apply to all samples. - restrict_domain: If True restricts the domain to - avoid evaluate points outside the domain using extrapolation. + restrict_domain: If True restricts the domain to avoid the + evaluation of points outside the domain using extrapolation. Defaults uses extrapolation. extrapolation: Controls the extrapolation mode for elements outside the domain range. By default uses the method defined in fd. See extrapolation to more information. - eval_points: Set of points where + grid_points: Grid of points where the functions are evaluated to obtain the discrete - representation of the object to operate. If an empty list the + representation of the object to operate. If ``None`` the current grid_points are used to unificate the domain of the shifted data. Returns: - :class:`FDataGrid` with the shifted data. + Shifted functions. + + Examples: + >>> import numpy as np + >>> import skfda + >>> + >>> t = np.linspace(0, 1, 6) + >>> x = np.array([t, t**2, t**3]) + >>> fd = FDataGrid(x, t) + >>> fd.domain_range[0] + (0.0, 1.0) + >>> fd.grid_points[0] + array([ 0. , 0.2, 0.4, 0.6, 0.8, 1. ]) + >>> fd.data_matrix[..., 0] + array([[ 0. , 0.2 , 0.4 , 0.6 , 0.8 , 1. ], + [ 0. , 0.04 , 0.16 , 0.36 , 0.64 , 1. ], + [ 0. , 0.008, 0.064, 0.216, 0.512, 1. ]]) + + Shift all curves by the same amount: + + >>> shifted = fd.shift(0.2) + >>> shifted.domain_range[0] + (0.0, 1.0) + >>> shifted.grid_points[0] + array([ 0. , 0.2, 0.4, 0.6, 0.8, 1. ]) + >>> shifted.data_matrix[..., 0] + array([[ 0.2 , 0.4 , 0.6 , 0.8 , 1. , 1.2 ], + [ 0.04 , 0.16 , 0.36 , 0.64 , 1. , 1.36 ], + [ 0.008, 0.064, 0.216, 0.512, 1. , 1.488]]) + + + Different shift per curve: + + >>> shifted = fd.shift([-0.2, 0.0, 0.2]) + >>> shifted.domain_range[0] + (0.0, 1.0) + >>> shifted.grid_points[0] + array([ 0. , 0.2, 0.4, 0.6, 0.8, 1. ]) + >>> shifted.data_matrix[..., 0] + array([[-0.2 , 0. , 0.2 , 0.4 , 0.6 , 0.8 ], + [ 0. , 0.04 , 0.16 , 0.36 , 0.64 , 1. ], + [ 0.008, 0.064, 0.216, 0.512, 1. , 1.488]]) + + It is possible to restrict the domain to prevent the need for + extrapolations: + + >>> shifted = fd.shift([-0.3, 0.1, 0.2], restrict_domain=True) + >>> shifted.domain_range[0] + (0.3, 0.8) """ arr_shifts = np.array([shifts] if np.isscalar(shifts) else shifts) - # Case unidimensional treated as the multidimensional - if ( - self.dim_domain == 1 - and arr_shifts.ndim == 1 - and arr_shifts.shape[0] != 1 - ): - arr_shifts = arr_shifts[:, np.newaxis] - - # Case same shift for all the curves - if arr_shifts.shape[0] == self.dim_domain and arr_shifts.ndim == 1: - - # Column vector with shapes - arr_shifts = np.atleast_2d(arr_shifts).T - - grid_points = self.grid_points + arr_shifts - domain_range = self.domain_range + arr_shifts - - return self.copy( - grid_points=grid_points, - domain_range=domain_range, + # Accept unidimensional array when the domain dimension is one or when + # the shift is the same for each sample + if arr_shifts.ndim == 1: + arr_shifts = ( + arr_shifts[np.newaxis, :] # Same shift for each sample + if len(arr_shifts) == self.dim_domain + else arr_shifts[:, np.newaxis] ) - if arr_shifts.shape[0] != self.n_samples: + if len(arr_shifts) not in {1, self.n_samples}: raise ValueError( - f"shifts vector ({arr_shifts.shape[0]}) must have the" - f" same length than the number of samples " + f"The length of the shift vector ({len(arr_shifts)}) must " + f"have length equal to 1 or to the number of samples " f"({self.n_samples})", ) - eval_points = ( - self.grid_points if eval_points is None - else np.atleast_2d(eval_points) + grid_points = ( + self.grid_points if grid_points is None + else _to_grid_points(grid_points) ) + domain_range: DomainRangeLike if restrict_domain: domain = np.asarray(self.domain_range) - a = domain[:, 0] - np.atleast_1d( - np.min(np.min(arr_shifts, axis=1), 0), - ) - b = domain[:, 1] - np.atleast_1d( - np.max(np.max(arr_shifts, axis=1), 0), - ) - domain = np.vstack((a, b)).T + a = domain[:, 0] - np.min(np.min(arr_shifts, axis=0), 0) + b = domain[:, 1] - np.max(np.max(arr_shifts, axis=1), 0) - eval_points = [ - eval_points[i][ - np.logical_and( - eval_points[i] >= domain[i, 0], - eval_points[i] <= domain[i, 1], - )] - for i in range(self.dim_domain) - ] + domain = np.hstack((a, b)) + domain_range = tuple(domain) else: - domain = self.domain_range - - eval_points = np.asarray(eval_points) - - eval_points_repeat = np.repeat( - eval_points[np.newaxis, :], - self.n_samples, - axis=0, - ) - - # Solve problem with cartesian and matrix indexing - if self.dim_domain > 1: - arr_shifts[:, :2] = np.flip(arr_shifts[:, :2], axis=1) - - arr_shifts = np.repeat( - arr_shifts[..., np.newaxis], - eval_points.shape[1], - axis=2, - ) - - eval_points_shifted = eval_points_repeat + arr_shifts + domain_range = self.domain_range - data_matrix = self.evaluate( - eval_points_shifted, - extrapolation=extrapolation, - aligned=False, - grid=True, - ) + if len(arr_shifts) == 1: + shifted_grid_points = tuple( + g + s for g, s in zip(grid_points, arr_shifts[0]) + ) + data_matrix = self( + shifted_grid_points, + extrapolation=extrapolation, + aligned=True, + grid=True, + ) + else: + shifted_grid_points_per_sample = ( + tuple( + g + s for g, s in zip(grid_points, shift) + ) for shift in arr_shifts + ) + data_matrix = self( + shifted_grid_points_per_sample, + extrapolation=extrapolation, + aligned=False, + grid=True, + ) return self.copy( data_matrix=data_matrix, - grid_points=eval_points, - domain_range=domain, + grid_points=grid_points, + domain_range=domain_range, ) def compose( diff --git a/tests/test_registration.py b/tests/test_registration.py index 5585a445e..bf3052e17 100644 --- a/tests/test_registration.py +++ b/tests/test_registration.py @@ -357,7 +357,7 @@ def test_amplitude_phase_score_with_basis(self) -> None: scorer = AmplitudePhaseDecomposition() X = self.X.to_basis(Fourier()) score = scorer(self.shift_registration, X) - np.testing.assert_allclose(score, 0.995087, rtol=1e-6) + np.testing.assert_allclose(score, 0.992519, rtol=1e-6) def test_default_score(self) -> None: """Test default score of a registration transformer.""" From ddf37f169992e1031cdffad7d1b795326d4823df Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 29 Mar 2021 13:39:48 +0200 Subject: [PATCH 171/417] Fixed shift also for FDataBasis. --- .../registration/_shift_registration.py | 8 +- skfda/representation/_functional_data.py | 72 ++++++++++++- skfda/representation/basis/_fdatabasis.py | 100 ++++-------------- skfda/representation/grid.py | 67 ++---------- 4 files changed, 101 insertions(+), 146 deletions(-) diff --git a/skfda/preprocessing/registration/_shift_registration.py b/skfda/preprocessing/registration/_shift_registration.py index c6300f624..fc61955ad 100644 --- a/skfda/preprocessing/registration/_shift_registration.py +++ b/skfda/preprocessing/registration/_shift_registration.py @@ -110,14 +110,16 @@ class ShiftRegistration(RegistrationTransformer): array([-0.128, 0.187, 0.027, 0.034, -0.106, 0.114, ..., -0.06 ]) - Registration and creation of a dataset in basis form using the - transformation previosly fitted: + Registration of a dataset in basis form using the + transformation previosly fitted. The result is a dataset in + discretized form, as it is not possible to express shifted functions + exactly as a basis expansion: >>> fd = make_sinusoidal_process(n_samples=2, error_std=0, ... random_state=2) >>> fd_basis = fd.to_basis(Fourier()) >>> reg.transform(fd_basis) - FDataBasis(...) + FDataGrid(...) References: diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 232117805..916b1bbba 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -26,10 +26,11 @@ import pandas.api.extensions from typing_extensions import Literal -from .._utils import _evaluate_grid, _reshape_eval_points +from .._utils import _evaluate_grid, _reshape_eval_points, _to_grid_points from ._typing import ( ArrayLike, DomainRange, + DomainRangeLike, GridPointsLike, LabelTuple, LabelTupleLike, @@ -662,13 +663,13 @@ def derivative(self: T, *, order: int = 1) -> T: @abstractmethod def shift( - self: T, - shifts: Union[float, np.ndarray], + self, + shifts: Union[ArrayLike, float], *, restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, grid_points: Optional[GridPointsLike] = None, - ) -> T: + ) -> FDataGrid: r""" Perform a shift of the curves. @@ -705,7 +706,68 @@ def shift( Shifted functions. """ - pass + assert grid_points is not None + grid_points = _to_grid_points(grid_points) + + arr_shifts = np.array([shifts] if np.isscalar(shifts) else shifts) + + # Accept unidimensional array when the domain dimension is one or when + # the shift is the same for each sample + if arr_shifts.ndim == 1: + arr_shifts = ( + arr_shifts[np.newaxis, :] # Same shift for each sample + if len(arr_shifts) == self.dim_domain + else arr_shifts[:, np.newaxis] + ) + + if len(arr_shifts) not in {1, self.n_samples}: + raise ValueError( + f"The length of the shift vector ({len(arr_shifts)}) must " + f"have length equal to 1 or to the number of samples " + f"({self.n_samples})", + ) + + domain_range: DomainRangeLike + if restrict_domain: + domain = np.asarray(self.domain_range) + + a = domain[:, 0] - np.min(np.min(arr_shifts, axis=0), 0) + b = domain[:, 1] - np.max(np.max(arr_shifts, axis=1), 0) + + domain = np.hstack((a, b)) + domain_range = tuple(domain) + + else: + domain_range = self.domain_range + + if len(arr_shifts) == 1: + shifted_grid_points = tuple( + g + s for g, s in zip(grid_points, arr_shifts[0]) + ) + data_matrix = self( + shifted_grid_points, + extrapolation=extrapolation, + aligned=True, + grid=True, + ) + else: + shifted_grid_points_per_sample = ( + tuple( + g + s for g, s in zip(grid_points, shift) + ) for shift in arr_shifts + ) + data_matrix = self( + shifted_grid_points_per_sample, + extrapolation=extrapolation, + aligned=False, + grid=True, + ) + + return self.to_grid().copy( + data_matrix=data_matrix, + grid_points=grid_points, + domain_range=domain_range, + ) def plot(self, *args: Any, **kwargs: Any) -> Any: """Plot the FDatGrid object. diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 677ee80bd..385a13001 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -24,7 +24,6 @@ from .. import grid from .._functional_data import FData from .._typing import ArrayLike, DomainRange, GridPointsLike, LabelTupleLike -from ..evaluator import Evaluator from ..extrapolation import ExtrapolationLike from . import Basis @@ -260,7 +259,7 @@ def _evaluate( if aligned: - assert isinstance(eval_points, np.ndarray) + eval_points = np.asarray(eval_points) # Each row contains the values of one element of the basis basis_values = self.basis.evaluate(eval_points) @@ -271,23 +270,24 @@ def _evaluate( (self.n_samples, len(eval_points), self.dim_codomain), ) + eval_points = cast(Iterable[ArrayLike], eval_points) + res_list = [ - np.sum((c * self.basis.evaluate(p).T).T, axis=0) + np.sum((c * self.basis.evaluate(np.asarray(p)).T).T, axis=0) for c, p in zip(self.coefficients, eval_points) ] return _to_array_maybe_ragged(res_list) def shift( - self: T, - shifts: np.ndarray, + self, + shifts: Union[ArrayLike, float], *, restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, grid_points: Optional[GridPointsLike] = None, - **kwargs: Any, - ) -> T: - """ + ) -> FDataGrid: + r""" Perform a shift of the curves. The i-th shifted function :math:`y_i` has the form @@ -323,72 +323,16 @@ def shift( Shifted functions. """ - if self.dim_codomain > 1 or self.dim_domain > 1: - raise ValueError - - domain_range = self.domain_range[0] - - if grid_points is None: # Grid to discretize the function - nfine = max(self.n_basis * 10 + 1, constants.N_POINTS_COARSE_MESH) - grid_points = np.linspace(*domain_range, nfine) - else: - grid_points = np.asarray(grid_points) - - if np.isscalar(shifts): # Special case, all curves with same shift - - basis = self.basis.rescale(( - domain_range[0] + shifts, - domain_range[1] + shifts, - )) - - return FDataBasis.from_data( - self.evaluate(grid_points), - grid_points=grid_points + shifts, - basis=basis, - **kwargs, - ) - - elif len(shifts) != self.n_samples: - raise ValueError( - f"shifts vector ({len(shifts)}) must have the " - f"same length than the number of samples " - f"({self.n_samples})", - ) - - if restrict_domain: - a = domain_range[0] - min(np.min(shifts), 0) - b = domain_range[1] - max(np.max(shifts), 0) - domain = (a, b) - grid_points = grid_points[ - np.logical_and( - grid_points >= a, - grid_points <= b, - ) - ] - else: - domain = domain_range - - points_shifted = np.outer( - np.ones(self.n_samples), - grid_points, + grid_points = ( + self._default_grid_points() if grid_points is None + else grid_points ) - points_shifted += np.atleast_2d(shifts).T - - # Matrix of shifted values - data_matrix = self( - points_shifted, - aligned=False, + return super().shift( + shifts=shifts, + restrict_domain=restrict_domain, extrapolation=extrapolation, - )[..., 0] - - basis = self.basis.rescale(domain) - - return FDataBasis.from_data( - data_matrix, grid_points=grid_points, - basis=basis, - **kwargs, ) def derivative(self: T, *, order: int = 1) -> T: # noqa: D102 @@ -570,14 +514,7 @@ def to_grid( grid_points = sample_points if grid_points is None: - npoints = max( - constants.N_POINTS_FINE_MESH, - constants.BASIS_MIN_FACTOR * self.n_basis, - ) - grid_points = [ - np.linspace(*r, npoints) - for r in self.domain_range - ] + grid_points = self._default_grid_points() return grid.FDataGrid( self.evaluate(grid_points, grid=True), @@ -658,6 +595,13 @@ def copy( extrapolation=extrapolation, ) + def _default_grid_points(self) -> GridPointsLike: + npoints = constants.N_POINTS_FINE_MESH + return [ + np.linspace(*r, npoints) + for r in self.domain_range + ] + def _to_R(self) -> str: # noqa: N802 """Return the code to build the object on fda package on R.""" return ( diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index f176eb366..71244e9b5 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -994,13 +994,13 @@ def restrict( return self.copy(domain_range=domain_range) def shift( - self: T, + self, shifts: Union[ArrayLike, float], *, restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, grid_points: Optional[GridPointsLike] = None, - ) -> T: + ) -> FDataGrid: r""" Perform a shift of the curves. @@ -1085,69 +1085,16 @@ def shift( (0.3, 0.8) """ - arr_shifts = np.array([shifts] if np.isscalar(shifts) else shifts) - - # Accept unidimensional array when the domain dimension is one or when - # the shift is the same for each sample - if arr_shifts.ndim == 1: - arr_shifts = ( - arr_shifts[np.newaxis, :] # Same shift for each sample - if len(arr_shifts) == self.dim_domain - else arr_shifts[:, np.newaxis] - ) - - if len(arr_shifts) not in {1, self.n_samples}: - raise ValueError( - f"The length of the shift vector ({len(arr_shifts)}) must " - f"have length equal to 1 or to the number of samples " - f"({self.n_samples})", - ) - grid_points = ( self.grid_points if grid_points is None - else _to_grid_points(grid_points) + else grid_points ) - domain_range: DomainRangeLike - if restrict_domain: - domain = np.asarray(self.domain_range) - - a = domain[:, 0] - np.min(np.min(arr_shifts, axis=0), 0) - b = domain[:, 1] - np.max(np.max(arr_shifts, axis=1), 0) - - domain = np.hstack((a, b)) - domain_range = tuple(domain) - - else: - domain_range = self.domain_range - - if len(arr_shifts) == 1: - shifted_grid_points = tuple( - g + s for g, s in zip(grid_points, arr_shifts[0]) - ) - data_matrix = self( - shifted_grid_points, - extrapolation=extrapolation, - aligned=True, - grid=True, - ) - else: - shifted_grid_points_per_sample = ( - tuple( - g + s for g, s in zip(grid_points, shift) - ) for shift in arr_shifts - ) - data_matrix = self( - shifted_grid_points_per_sample, - extrapolation=extrapolation, - aligned=False, - grid=True, - ) - - return self.copy( - data_matrix=data_matrix, + return super().shift( + shifts=shifts, + restrict_domain=restrict_domain, + extrapolation=extrapolation, grid_points=grid_points, - domain_range=domain_range, ) def compose( From 597676d2fea220c0a6b2ec6ad2a07739ebfd3099 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 29 Mar 2021 22:04:24 +0200 Subject: [PATCH 172/417] Refactor _compute_deltas. --- setup.cfg | 2 + .../registration/_shift_registration.py | 129 ++++++------------ tests/test_registration.py | 8 +- 3 files changed, 49 insertions(+), 90 deletions(-) diff --git a/setup.cfg b/setup.cfg index 284f60d02..15dd3e8c9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -59,6 +59,8 @@ ignore = WPS338, # We need multine loops WPS352, + # Assign to a subcript slice is normal behaviour in numpy + WPS362, # All keywords are beautiful WPS420, # We use nested imports sometimes, and it is not THAT bad diff --git a/skfda/preprocessing/registration/_shift_registration.py b/skfda/preprocessing/registration/_shift_registration.py index fc61955ad..e78277da7 100644 --- a/skfda/preprocessing/registration/_shift_registration.py +++ b/skfda/preprocessing/registration/_shift_registration.py @@ -7,17 +7,16 @@ from sklearn.utils.validation import check_is_fitted from typing_extensions import Literal -from scipy.integrate import simps - from ... import FData, FDataGrid -from ..._utils import check_is_univariate, constants +from ..._utils import check_is_univariate +from ...misc._math import inner_product from ...misc.metrics._lp_norms import l2_norm -from ...representation._typing import ArrayLike +from ...representation._typing import ArrayLike, GridPointsLike from ...representation.extrapolation import ExtrapolationLike from .base import RegistrationTransformer T = TypeVar("T", bound=FData) -TemplateFunction = Callable[[FData], FData] +TemplateFunction = Callable[[FDataGrid], FDataGrid] class ShiftRegistration(RegistrationTransformer): @@ -70,7 +69,7 @@ class ShiftRegistration(RegistrationTransformer): and transformation must be done together. Defaults to False. initial (str or array_like, optional): Array with an initial estimation of shifts. Default uses a list of zeros for the initial shifts. - output_points (array_like, optional): Set of points where the + grid_points (array_like, optional): Set of points where the functions are evaluated to obtain the discrete representation of the object to integrate. If None is passed it calls numpy.linspace in FDataBasis and uses the @@ -107,7 +106,7 @@ class ShiftRegistration(RegistrationTransformer): Shifts applied during the transformation >>> reg.deltas_.round(3) - array([-0.128, 0.187, 0.027, 0.034, -0.106, 0.114, ..., -0.06 ]) + array([-0.131, 0.188, 0.026, 0.033, -0.109, 0.115, ..., -0.062]) Registration of a dataset in basis form using the @@ -140,7 +139,7 @@ def __init__( step_size: float = 1, restrict_domain: bool = False, initial: Union[Literal["zeros"], ArrayLike] = "zeros", - output_points: Optional[ArrayLike] = None, + grid_points: Optional[GridPointsLike] = None, ) -> None: self.max_iter = max_iter self.tol = tol @@ -149,7 +148,7 @@ def __init__( self.extrapolation = extrapolation self.step_size = step_size self.initial = initial - self.output_points = output_points + self.grid_points = grid_points def _compute_deltas( self, @@ -171,9 +170,6 @@ def _compute_deltas( """ check_is_univariate(fd) - if not isinstance(fd, FDataGrid): - fd = fd.to_grid() - domain_range = fd.domain_range[0] # Initial estimation of the shifts @@ -188,109 +184,70 @@ def _compute_deltas( f"be the same than the number of samples ({fd.n_samples})", ) - # Fine equispaced mesh to evaluate the samples - if self.output_points is None: - output_points = fd.grid_points[0] - nfine = len(output_points) - else: - output_points = np.asarray(self.output_points) - nfine = len(output_points) - # Auxiliar array to avoid multiple memory allocations delta_aux = np.empty(fd.n_samples) # Computes the derivate of originals curves in the mesh points - fd_deriv = fd.derivative(order=1) - D1x = fd_deriv(output_points)[..., 0] + fd_deriv = fd.derivative() # Second term of the second derivate estimation of REGSSE. The # first term has been dropped to improve convergence (see references) d2_regsse = l2_norm(fd_deriv)**2 + # We need the discretized derivative to compute the inner product later + fd_deriv = fd_deriv.to_grid(grid_points=self.grid_points) + max_diff = self.tol + 1 self.n_iter_ = 0 - template_fixed = False - - # Case template fixed - if isinstance(template, FData): - tfine_aux = template.evaluate(output_points)[0, ..., 0] - - if self.restrict_domain: - template_points_aux = tfine_aux - - template_fixed = True - else: - tfine_aux = np.empty(nfine) - - # Auxiliar array if the domain will be restricted - if self.restrict_domain: - D1x_tmp = D1x - tfine_tmp = output_points - tfine_aux_tmp = tfine_aux - domain = np.empty(nfine, dtype=np.dtype(bool)) - - ones = np.ones(fd.n_samples) - output_points_rep = np.outer(ones, output_points) - # Newton-Rhapson iteration while max_diff > self.tol and self.n_iter_ < self.max_iter: + # Computes the new values shifted + x = fd.shift(delta, grid_points=self.grid_points) + + if isinstance(template, str): + assert template == "mean" + template_iter = x.mean() + elif isinstance(template, FData): + template_iter = template.to_grid(grid_points=x.grid_points) + else: # Callable + template_iter = template(x) + # Updates the limits for non periodic functions ignoring the ends if self.restrict_domain: # Calculates the new limits a = domain_range[0] - min(np.min(delta), 0) b = domain_range[1] - max(np.max(delta), 0) - # New interval is (a,b) - np.logical_and(tfine_tmp >= a, tfine_tmp <= b, out=domain) - output_points = tfine_tmp[domain] - tfine_aux = tfine_aux_tmp[domain] - D1x = D1x_tmp[:, domain] - # Reescale the second derivate could be other approach - # d2_regsse = - # d2_regsse_original * ( 1 + (a - b) / (domain[1] - domain[0])) - d2_regsse = simps(np.square(D1x), output_points, axis=1) + restricted_domain = ( + max(a, template_iter.domain_range[0][0]), + min(b, template_iter.domain_range[0][1]), + ) - # Recompute base points for evaluation - output_points_rep = np.outer(ones, output_points) + template_iter = template_iter.restrict(restricted_domain) - # Computes the new values shifted - x = fd( - output_points_rep + np.atleast_2d(delta).T, - aligned=False, - extrapolation=self.extrapolation, - )[..., 0] - - if template == "mean": - x.mean(axis=0, out=tfine_aux) - elif template_fixed and self.restrict_domain: - tfine_aux = template_points_aux[domain] - elif not template_fixed and callable(template): # Callable - fd_x = FDataGrid(x, grid_points=output_points) - fd_tfine = template(fd_x) - tfine_aux = fd_tfine.data_matrix.ravel() + x = x.restrict(restricted_domain) + fd_deriv = fd_deriv.restrict(restricted_domain) + d2_regsse = l2_norm(fd_deriv)**2 # Calculates x - mean - np.subtract(x, tfine_aux, out=x) + x -= template_iter + + d1_regsse = inner_product(x, fd_deriv) - d1_regsse = simps(np.multiply(x, D1x, out=x), - output_points, axis=1) # Updates the shifts by the Newton-Rhapson iteration - # delta = delta - step_size * d1_regsse / d2_regsse - np.divide(d1_regsse, d2_regsse, out=delta_aux) - np.multiply(delta_aux, self.step_size, out=delta_aux) - np.subtract(delta, delta_aux, out=delta) + # Same as delta = delta - step_size * d1_regsse / d2_regsse + delta_aux[:] = d1_regsse + delta_aux[:] /= d2_regsse + delta_aux[:] *= self.step_size + delta[:] -= delta_aux # Updates convergence criterions max_diff = np.abs(delta_aux, out=delta_aux).max() self.n_iter_ += 1 - if template_fixed is False: - # Stores the template in an FDataGrid - template = FDataGrid(tfine_aux, grid_points=output_points) - - return delta, template + return delta, template_iter def fit_transform(self, X: T, y: None = None) -> T: """Fit the estimator and transform the data. @@ -307,7 +264,7 @@ def fit_transform(self, X: T, y: None = None) -> T: return X.shift(self.deltas_, restrict_domain=self.restrict_domain, extrapolation=self.extrapolation, - grid_points=self.output_points) + grid_points=self.grid_points) def fit(self, X: FData, y=None): """Fit the estimator. @@ -374,7 +331,7 @@ def transform(self, X: FData, y=None): return X.shift(deltas, restrict_domain=self.restrict_domain, extrapolation=self.extrapolation, - grid_points=self.output_points) + grid_points=self.grid_points) def inverse_transform(self, X: FData, y=None): """Applies the inverse transformation. @@ -418,4 +375,4 @@ def inverse_transform(self, X: FData, y=None): return X.shift(-self.deltas_, restrict_domain=self.restrict_domain, extrapolation=self.extrapolation, - grid_points=self.output_points) + grid_points=self.grid_points) diff --git a/tests/test_registration.py b/tests/test_registration.py index bf3052e17..f4fdb2636 100644 --- a/tests/test_registration.py +++ b/tests/test_registration.py @@ -311,7 +311,7 @@ def test_restrict_domain(self): fd_registered_1 = reg.fit_transform(self.fd) np.testing.assert_array_almost_equal( - np.array(fd_registered_1.domain_range).round(3), [[0.022, 0.969]]) + np.array(fd_registered_1.domain_range).round(3), [[0.022, 0.97]]) reg2 = ShiftRegistration(restrict_domain=True, template=reg.template_) fd_registered_2 = reg2.fit_transform(self.fd) @@ -333,8 +333,8 @@ def test_initial_estimation(self): # Only needed 1 iteration until convergence self.assertEqual(reg.n_iter_, 1) - def test_custom_output_points(self): - reg = ShiftRegistration(output_points=np.linspace(0, 1, 50)) + def test_custom_grid_points(self): + reg = ShiftRegistration(grid_points=np.linspace(0, 1, 50)) reg.fit_transform(self.fd) @@ -357,7 +357,7 @@ def test_amplitude_phase_score_with_basis(self) -> None: scorer = AmplitudePhaseDecomposition() X = self.X.to_basis(Fourier()) score = scorer(self.shift_registration, X) - np.testing.assert_allclose(score, 0.992519, rtol=1e-6) + np.testing.assert_allclose(score, 0.995086, rtol=1e-6) def test_default_score(self) -> None: """Test default score of a registration transformer.""" From ad7bcfeb7af2ae48ef090d0cc232ffe420230890 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 31 Mar 2021 00:30:54 +0200 Subject: [PATCH 173/417] Finish refactoring shift registration. --- .../registration/_shift_registration.py | 132 ++++++++++-------- 1 file changed, 77 insertions(+), 55 deletions(-) diff --git a/skfda/preprocessing/registration/_shift_registration.py b/skfda/preprocessing/registration/_shift_registration.py index e78277da7..d298d6546 100644 --- a/skfda/preprocessing/registration/_shift_registration.py +++ b/skfda/preprocessing/registration/_shift_registration.py @@ -39,13 +39,13 @@ class ShiftRegistration(RegistrationTransformer): Method only implemented for univariate functional data. Args: - max_iter (int, optional): Maximun number of iterations. + max_iter: Maximun number of iterations. Defaults sets to 5. Generally 2 or 3 iterations are sufficient to obtain a good alignment. - tol (float, optional): Tolerance allowable. The process will stop if + tol: Tolerance allowable. The process will stop if :math:`\max_{i}|\delta_{i}^{(\nu)}-\delta_{i}^{(\nu-1)}| T: Functional data registered. """ - self.deltas_, self.template_ = self._compute_deltas(X, self.template) + deltas, template = self._compute_deltas(X, self.template) - return X.shift(self.deltas_, restrict_domain=self.restrict_domain, - extrapolation=self.extrapolation, - grid_points=self.grid_points) + self.deltas_ = deltas + self.template_ = template - def fit(self, X: FData, y=None): + return X.shift( + self.deltas_, + restrict_domain=self.restrict_domain, + extrapolation=self.extrapolation, + grid_points=self.grid_points, + ) + + def fit(self, X: FData, y: None = None) -> ShiftRegistration: """Fit the estimator. Args: - X (FData): Functional dataset used to construct the template for + X: Functional dataset used to construct the template for the alignment. - y (ignored): not used, present for API consistency by convention. + y: not used, present for API consistency by convention. Returns: - RegistrationTransformer: self + self Raises: AttributeError: If this method is call when restrict_domain=True. """ if self.restrict_domain: - raise AttributeError("fit and predict are not available when " - "restrict_domain=True, fitting and " - "transformation should be done together. Use " - "an extrapolation method with " - "restrict_domain=False or fit_predict") + raise AttributeError( + "fit and predict are not available when " + "restrict_domain=True, fitting and " + "transformation should be done together. Use " + "an extrapolation method with " + "restrict_domain=False or fit_predict", + ) # If the template is an FData, fit doesnt learn anything if isinstance(self.template, FData): self.template_ = self.template else: - _, self.template_ = self._compute_deltas(X, self.template) + _, template = self._compute_deltas(X, self.template) + + self.template_ = template return self - def transform(self, X: FData, y=None): + def transform(self, X: FData, y: None = None) -> FDataGrid: """Register the data. Transforms the data using the template previously learned during fitting. Args: - X (FData): Functional dataset to be transformed. - y (ignored): not used, present for API consistency by convention. + X: Functional dataset to be transformed. + y: not used, present for API consistency by convention. Returns: - FData: Functional data registered. + Functional data registered. Raises: AttributeError: If this method is call when restrict_domain=True. """ - if self.restrict_domain: - raise AttributeError("fit and predict are not available when " - "restrict_domain=True, fitting and " - "transformation should be done together. Use " - "an extrapolation method with " - "restrict_domain=False or fit_predict") + raise AttributeError( + "fit and predict are not available when " + "restrict_domain=True, fitting and " + "transformation should be done together. Use " + "an extrapolation method with " + "restrict_domain=False or fit_predict", + ) # Check is fitted - check_is_fitted(self, 'template_') + check_is_fitted(self) - deltas, template = self._compute_deltas(X, self.template_) - self.template_ = template + deltas, _ = self._compute_deltas(X, self.template_) self.deltas_ = deltas - return X.shift(deltas, restrict_domain=self.restrict_domain, - extrapolation=self.extrapolation, - grid_points=self.grid_points) + return X.shift( + deltas, + restrict_domain=self.restrict_domain, + extrapolation=self.extrapolation, + grid_points=self.grid_points, + ) - def inverse_transform(self, X: FData, y=None): + def inverse_transform(self, X: FData, y: None = None) -> FDataGrid: """Applies the inverse transformation. Applies the opossite shift used in the last call to `transform`. Args: - X (FData): Functional dataset to be transformed. - y (ignored): not used, present for API consistency by convention. + X: Functional dataset to be transformed. + y: not used, present for API consistency by convention. Returns: - FData: Functional data registered. + Functional data registered. Examples: @@ -366,13 +379,22 @@ def inverse_transform(self, X: FData, y=None): FDataGrid(...) """ - if not hasattr(self, "deltas_"): - raise AttributeError("Data must be previously transformed to learn" - " the inverse transformation") - elif len(X) != len(self.deltas_): - raise ValueError("Data must contain the same number of samples " - "than the dataset previously transformed") - - return X.shift(-self.deltas_, restrict_domain=self.restrict_domain, - extrapolation=self.extrapolation, - grid_points=self.grid_points) + deltas = getattr(self, "deltas_", None) + + if deltas is None: + raise AttributeError( + "Data must be previously transformed to learn" + " the inverse transformation", + ) + elif len(X) != len(deltas): + raise ValueError( + "Data must contain the same number of samples " + "than the dataset previously transformed", + ) + + return X.shift( + -deltas, + restrict_domain=self.restrict_domain, + extrapolation=self.extrapolation, + grid_points=self.grid_points, + ) From f7fa87147dd686d3bd026518e01157da59dcb321 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 31 Mar 2021 19:52:44 +0200 Subject: [PATCH 174/417] commit --- skfda/exploratory/visualization/representation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 2a59a33dd..4d8eab46e 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -43,8 +43,8 @@ def _get_label_colors( def _get_color_info( fdata: T, group: Union[Sequence[Any], None], - group_names: Union[Sequence[str], TypedDict, None], - group_colors: Union[Sequence[Any], TypedDict, None], + group_names: Union[Sequence[str], TypedDict[Any], None], + group_colors: Union[Sequence[Any], TypedDict[Any], None], legend: bool, kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: From c05b5899aedceef9602e72a56db31e337e431e9f Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 31 Mar 2021 20:39:24 +0200 Subject: [PATCH 175/417] changes --- .../visualization/representation.py | 122 ++++++++++++------ 1 file changed, 84 insertions(+), 38 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 4d8eab46e..62cabe49d 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -1,5 +1,23 @@ -from typing import Any, Dict, List, Optional, Sequence, Tuple, TypedDict, TypeVar, Union -from matplotlib import colors +"""Representation Module. + +This module contains the functionality related +with plotting and scattering our different datasets. +It allows multiple modes and colors, which could +be set manually or automatically depending on values +like depth measures. +""" + +from typing import ( + Any, + Dict, + List, + Optional, + Sequence, + Tuple, + TypedDict, + TypeVar, + Union, +) import matplotlib.cm import matplotlib.patches @@ -43,11 +61,21 @@ def _get_label_colors( def _get_color_info( fdata: T, group: Union[Sequence[Any], None], - group_names: Union[Sequence[str], TypedDict[Any], None], - group_colors: Union[Sequence[Any], TypedDict[Any], None], + group_names: Union + [ + Sequence[str], + TypedDict('group_names', {'ind': Any, 'str':str}), + None, + ], + group_colors: Union + [ + Sequence[str], + TypedDict('group_names', {'ind': Any, 'val':Any}), + None, + ], legend: bool, kwargs: Any, -) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: +) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: patches = None @@ -182,11 +210,21 @@ def plot( ax: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, - n_points: Union[int, Tuple[int, int], None], - domain_range: Union[Tuple[int, int], DomainRangeLike, None], - group: Union[Sequence[Any], None], - group_colors: Union[Sequence[Any], TypedDict, None], - group_names: Union[Sequence[str], TypedDict, None], + n_points: Union[int, Tuple[int, int], None] = None, + domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, + group: Union[Sequence[Any], None] = None, + group_colors: Union + [ + Sequence[Any], + TypedDict('group_names', {'ind': Any, 'val':Any}), + None, + ] = None, + group_names: Union + [ + Sequence[str], + TypedDict('group_names', {'ind': Any, 'str':str}), + None, + ] = None, colormap_name: str = 'autumn', legend: bool = False, **kwargs: Any, @@ -263,9 +301,9 @@ def plot( ) if domain_range is None: - domain_range = self.fdata.domain_range + self.domain_range = self.fdata.domain_range else: - domain_range = _to_domain_range(domain_range) + self.domain_range = _to_domain_range(domain_range) if len(self.gradient_list) == 0: sample_colors, patches = _get_color_info( @@ -286,7 +324,7 @@ def plot( self.n_points = constants.N_POINTS_UNIDIMENSIONAL_PLOT_MESH # Evaluates the object in a linspace - eval_points = np.linspace(*domain_range[0], n_points) + eval_points = np.linspace(*self.domain_range[0], self.n_points) mat = self.fdata(eval_points) color_dict: Dict[str, Any] = {} @@ -314,8 +352,8 @@ def plot( ) # Axes where will be evaluated - x = np.linspace(*domain_range[0], n_points_tuple[0]) - y = np.linspace(*domain_range[1], n_points_tuple[1]) + x = np.linspace(*self.domain_range[0], n_points_tuple[0]) + y = np.linspace(*self.domain_range[1], n_points_tuple[1]) # Evaluation of the functional object Z = self.fdata((x, y), grid=True) @@ -346,7 +384,7 @@ class ScatterPlot: Args: fdata: functional data set that we want to plot. - grid_points (ndarray): points to plot. + grid_points: points to plot. """ @@ -366,10 +404,20 @@ def plot( ax: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, - domain_range: Union[Tuple[int, int], Sequence[Tuple[int, int]], None], - group: Union[Sequence[Any], None], - group_colors: Union[Sequence[Any], TypedDict, None], - group_names: Union[Sequence[str], TypedDict, None], + domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, + group: Union[Sequence[Any], None] = None, + group_colors: Union + [ + Sequence[Any], + TypedDict('group_names', {'ind': Any, 'val':Any}), + None, + ] = None, + group_names: Union + [ + Sequence[str], + TypedDict('group_names', {'ind': Any, 'str':str}), + None, + ] = None, legend: bool = False, **kwargs: Any, ) -> Figure: @@ -377,39 +425,37 @@ def plot( Scatter FDataGrid object. Args: - chart (figure object, axe or list of axes, optional): figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - fig (figure object, optional): figure over with the graphs are - plotted in case ax is not specified. If None and ax is also + chart: figure over with the graphs are plotted or axis + over where the graphs are plotted. If None and ax + is also None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is initialized. - ax (list of axis objects, optional): axis over where the graphs - are plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure + ax: axis over where the graphs are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols(int, optional): designates the number of columns of the + n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - domain_range (tuple or list of tuples, optional): Range where the - function will be plotted. In objects with unidimensional domain + domain_range: Range where the function will be + plotted. In objects with unidimensional domain the domain range should be a tuple with the bounds of the interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - group (list of int): contains integers from [0 to number of - labels) indicating to which group each sample belongs to. Then, + group: contains integers from [0 to number of labels) + indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors (list of colors): colors in which groups are - represented, there must be one for each group. If None, each + group_colors: colors in which groups are represented, + there must be one for each group. If None, each group is shown with distict colors in the "Greys" colormap. - group_names (list of str): name of each of the groups which appear + group_names: name of each of the groups which appear in a legend, there must be one for each one. Defaults to None and the legend is not shown. Implies `legend=True`. - legend (bool): if `True`, show a legend with the groups. If + legend: if `True`, show a legend with the groups. If `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. From de3538a68cf6bfeddf3696113bed4ea7f87d6525 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 1 Apr 2021 19:38:00 +0200 Subject: [PATCH 176/417] change --- .../visualization/representation.py | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 62cabe49d..ec740e20f 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -61,16 +61,14 @@ def _get_label_colors( def _get_color_info( fdata: T, group: Union[Sequence[Any], None], - group_names: Union - [ + group_names: Union[ Sequence[str], - TypedDict('group_names', {'ind': Any, 'str':str}), + TypedDict('group_names', {'ind': Any, 'str': str}), None, ], - group_colors: Union - [ + group_colors: Union[ Sequence[str], - TypedDict('group_names', {'ind': Any, 'val':Any}), + TypedDict('group_names', {'ind': Any, 'val': Any}), None, ], legend: bool, @@ -213,16 +211,14 @@ def plot( n_points: Union[int, Tuple[int, int], None] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union - [ + group_colors: Union[ Sequence[Any], - TypedDict('group_names', {'ind': Any, 'val':Any}), + TypedDict('group_names', {'ind': Any, 'val': Any}), None, ] = None, - group_names: Union - [ + group_names: Union[ Sequence[str], - TypedDict('group_names', {'ind': Any, 'str':str}), + TypedDict('group_names', {'ind': Any, 'str': str}), None, ] = None, colormap_name: str = 'autumn', @@ -406,16 +402,14 @@ def plot( n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union - [ + group_colors: Union[ Sequence[Any], - TypedDict('group_names', {'ind': Any, 'val':Any}), + TypedDict('group_names', {'ind': Any, 'val': Any}), None, ] = None, - group_names: Union - [ + group_names: Union[ Sequence[str], - TypedDict('group_names', {'ind': Any, 'str':str}), + TypedDict('group_names', {'ind': Any, 'str': str}), None, ] = None, legend: bool = False, From 86711ce6245b30f25a6ad4cbb508d2e3c38b72dc Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 1 Apr 2021 19:47:02 +0200 Subject: [PATCH 177/417] typechagnes --- skfda/exploratory/visualization/representation.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index ec740e20f..9f4188ff7 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -14,7 +14,6 @@ Optional, Sequence, Tuple, - TypedDict, TypeVar, Union, ) @@ -63,12 +62,12 @@ def _get_color_info( group: Union[Sequence[Any], None], group_names: Union[ Sequence[str], - TypedDict('group_names', {'ind': Any, 'str': str}), + Dict[Any, str], None, ], group_colors: Union[ Sequence[str], - TypedDict('group_names', {'ind': Any, 'val': Any}), + Dict[Any, Any], None, ], legend: bool, @@ -213,12 +212,12 @@ def plot( group: Union[Sequence[Any], None] = None, group_colors: Union[ Sequence[Any], - TypedDict('group_names', {'ind': Any, 'val': Any}), + Dict[Any, Any], None, ] = None, group_names: Union[ Sequence[str], - TypedDict('group_names', {'ind': Any, 'str': str}), + Dict[Any, str], None, ] = None, colormap_name: str = 'autumn', @@ -404,12 +403,12 @@ def plot( group: Union[Sequence[Any], None] = None, group_colors: Union[ Sequence[Any], - TypedDict('group_names', {'ind': Any, 'val': Any}), + Dict[Any, Any], None, ] = None, group_names: Union[ Sequence[str], - TypedDict('group_names', {'ind': Any, 'str': str}), + Dict[Any, str], None, ] = None, legend: bool = False, From 68e206740e683014dd59e4010e58204fbb8407f2 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 1 Apr 2021 19:52:13 +0200 Subject: [PATCH 178/417] SOME STUFF SOLVED --- .../visualization/representation.py | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 9f4188ff7..886212958 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -61,15 +61,15 @@ def _get_color_info( fdata: T, group: Union[Sequence[Any], None], group_names: Union[ - Sequence[str], - Dict[Any, str], - None, - ], + Sequence[str], + Dict[Any, str], + None, + ], group_colors: Union[ - Sequence[str], - Dict[Any, Any], - None, - ], + Sequence[str], + Dict[Any, Any], + None, + ], legend: bool, kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: @@ -211,15 +211,15 @@ def plot( domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, group_colors: Union[ - Sequence[Any], - Dict[Any, Any], - None, - ] = None, + Sequence[Any], + Dict[Any, Any], + None, + ] = None, group_names: Union[ - Sequence[str], - Dict[Any, str], - None, - ] = None, + Sequence[str], + Dict[Any, str], + None, + ] = None, colormap_name: str = 'autumn', legend: bool = False, **kwargs: Any, @@ -402,15 +402,15 @@ def plot( domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, group_colors: Union[ - Sequence[Any], - Dict[Any, Any], - None, - ] = None, + Sequence[Any], + Dict[Any, Any], + None, + ] = None, group_names: Union[ - Sequence[str], - Dict[Any, str], - None, - ] = None, + Sequence[str], + Dict[Any, str], + None, + ] = None, legend: bool = False, **kwargs: Any, ) -> Figure: @@ -418,7 +418,7 @@ def plot( Scatter FDataGrid object. Args: - chart: figure over with the graphs are plotted or axis + chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. fig: figure over with the graphs are plotted in case ax is not @@ -431,13 +431,13 @@ def plot( n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - domain_range: Range where the function will be + domain_range: Range where the function will be plotted. In objects with unidimensional domain the domain range should be a tuple with the bounds of the interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - group: contains integers from [0 to number of labels) + group: contains integers from [0 to number of labels) indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color From 8af2a6bbf13558cfd9ad63c7416fdd620f4f2d8a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 1 Apr 2021 20:03:40 +0200 Subject: [PATCH 179/417] DONE --- .../visualization/representation.py | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 886212958..4567406a8 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -7,16 +7,7 @@ like depth measures. """ -from typing import ( - Any, - Dict, - List, - Optional, - Sequence, - Tuple, - TypeVar, - Union, -) +from typing import Any, Dict, List, Optional, Sequence, Tuple, TypeVar, Union import matplotlib.cm import matplotlib.patches @@ -60,12 +51,14 @@ def _get_label_colors( def _get_color_info( fdata: T, group: Union[Sequence[Any], None], - group_names: Union[ + group_names: Union + [ Sequence[str], Dict[Any, str], None, ], - group_colors: Union[ + group_colors: Union + [ Sequence[str], Dict[Any, Any], None, @@ -210,12 +203,14 @@ def plot( n_points: Union[int, Tuple[int, int], None] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union[ + group_colors: Union + [ Sequence[Any], Dict[Any, Any], None, ] = None, - group_names: Union[ + group_names: Union + [ Sequence[str], Dict[Any, str], None, @@ -401,12 +396,14 @@ def plot( n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union[ + group_colors: Union + [ Sequence[Any], Dict[Any, Any], None, ] = None, - group_names: Union[ + group_names: Union + [ Sequence[str], Dict[Any, str], None, From 6e2fe4ff1d529be761f71c2963b291b8ceba4641 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 1 Apr 2021 20:25:13 +0200 Subject: [PATCH 180/417] correction --- skfda/exploratory/visualization/representation.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 4567406a8..301d13f74 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -492,8 +492,7 @@ def plot( for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): - if sample_colors is not None: - color_dict["color"] = sample_colors[j] + self.set_color_dict(sample_colors, j, color_dict) axes[i].scatter( self.grid_points[0], @@ -513,8 +512,7 @@ def plot( for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): - if sample_colors is not None: - color_dict["color"] = sample_colors[j] + self.set_color_dict(sample_colors, j, color_dict) axes[i].scatter( X, @@ -527,3 +525,12 @@ def plot( _set_labels(self.fdata, fig, axes, patches) return fig + + def set_color_dict( + sample_colors: Any, + ind: int, + color_dict: Dict[str, Any], + ) -> None: + if sample_colors is not None: + color_dict["color"] = sample_colors[ind] + From 602e6c5b63f2af3f5b0cf6e6fb416fe0eac6847d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 1 Apr 2021 20:28:00 +0200 Subject: [PATCH 181/417] cc --- skfda/exploratory/visualization/representation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 301d13f74..9dacacca2 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -527,6 +527,7 @@ def plot( return fig def set_color_dict( + self, sample_colors: Any, ind: int, color_dict: Dict[str, Any], From 8ad143216a937d5646b53652f7d2fc7072e8a5ea Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 1 Apr 2021 20:31:34 +0200 Subject: [PATCH 182/417] change --- skfda/exploratory/visualization/representation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 9dacacca2..387592c34 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -534,4 +534,4 @@ def set_color_dict( ) -> None: if sample_colors is not None: color_dict["color"] = sample_colors[ind] - + \ No newline at end of file From d253218c84b9a63311fd610cd32915220ceaa139 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 1 Apr 2021 20:50:40 +0200 Subject: [PATCH 183/417] change --- .../visualization/representation.py | 44 +++---------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 387592c34..f935bec81 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -51,21 +51,11 @@ def _get_label_colors( def _get_color_info( fdata: T, group: Union[Sequence[Any], None], - group_names: Union - [ - Sequence[str], - Dict[Any, str], - None, - ], - group_colors: Union - [ - Sequence[str], - Dict[Any, Any], - None, - ], + group_names: Union[Sequence[str], Dict[Any, str], None], + group_colors: Union[Sequence[Any], Dict[Any, Any], None], legend: bool, kwargs: Any, -) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: +) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: patches = None @@ -203,18 +193,8 @@ def plot( n_points: Union[int, Tuple[int, int], None] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union - [ - Sequence[Any], - Dict[Any, Any], - None, - ] = None, - group_names: Union - [ - Sequence[str], - Dict[Any, str], - None, - ] = None, + group_colors: Union[Sequence[Any], Dict[Any, Any], None] = None, + group_names: Union[Sequence[str], Dict[Any, str], None] = None, colormap_name: str = 'autumn', legend: bool = False, **kwargs: Any, @@ -396,18 +376,8 @@ def plot( n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union - [ - Sequence[Any], - Dict[Any, Any], - None, - ] = None, - group_names: Union - [ - Sequence[str], - Dict[Any, str], - None, - ] = None, + group_colors: Union[Sequence[Any], Dict[Any, Any], None] = None, + group_names: Union[Sequence[str], Dict[Any, str], None] = None, legend: bool = False, **kwargs: Any, ) -> Figure: From 4cf67cc940dff5d8668b0c9ccf18eae65eab1870 Mon Sep 17 00:00:00 2001 From: mellamansanchez <38490771+mellamansanchez@users.noreply.github.com> Date: Fri, 2 Apr 2021 20:57:06 +0200 Subject: [PATCH 184/417] Update skfda/exploratory/visualization/representation.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Ramos Carreño --- skfda/exploratory/visualization/representation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index f935bec81..275af27d5 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -191,7 +191,7 @@ def plot( n_rows: Optional[int] = None, n_cols: Optional[int] = None, n_points: Union[int, Tuple[int, int], None] = None, - domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, + domain_range: Optional[DomainRangeLike] = None, group: Union[Sequence[Any], None] = None, group_colors: Union[Sequence[Any], Dict[Any, Any], None] = None, group_names: Union[Sequence[str], Dict[Any, str], None] = None, @@ -504,4 +504,4 @@ def set_color_dict( ) -> None: if sample_colors is not None: color_dict["color"] = sample_colors[ind] - \ No newline at end of file + From 1b3a52abccf2318a0f35c6ebee6a502a599e5d45 Mon Sep 17 00:00:00 2001 From: mellamansanchez <38490771+mellamansanchez@users.noreply.github.com> Date: Fri, 2 Apr 2021 20:57:17 +0200 Subject: [PATCH 185/417] Update skfda/exploratory/visualization/representation.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Ramos Carreño --- skfda/exploratory/visualization/representation.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 275af27d5..08e599fed 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -125,13 +125,11 @@ class GraphPlot: gradient_color_list: list of real values used to determine the color in which each of the instances will be plotted. The size max_grad: maximum value that the gradient_list can take, it will be - used to normalize the gradient_color_list in order to get values - thatcan be used in the funcion colormap.__call__(). If not + used to normalize the ``gradient_color_list``. If not declared it will be initialized to the maximum value of gradient_list min_grad: minimum value that the gradient_list can take, it will be - used to normalize the gradient_color_list in order to get values - thatcan be used in the funcion colormap.__call__(). If not + used to normalize the ``gradient_color_list``. If not declared it will be initialized to the minimum value of gradient_list. From c6db9d6fb0981c7c3a78e2fc203476244f9e034d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 2 Apr 2021 20:58:24 +0200 Subject: [PATCH 186/417] change --- .../visualization/representation.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index f935bec81..76d52319b 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -32,8 +32,7 @@ def _get_label_colors( n_labels: int, group_colors: Union[Sequence[Any], None], ) -> np.ndarray: - """Get the colors of each label""" - + """Get the colors of each label.""" if group_colors is not None: if len(group_colors) != n_labels: raise ValueError( @@ -141,6 +140,7 @@ class GraphPlot: each function will have. """ + def __init__( self, fdata: FData, @@ -261,10 +261,9 @@ def plot( matplotlib.pyplot.plot_surface function. Returns: - fig: figure object in which the graphs are plotted. + fig: figure in which the graphs are plotted. """ - fig, axes = _get_figure_and_axes(chart, fig, ax) fig, axes = _set_figure_layout_for_fdata( self.fdata, fig, axes, n_rows, n_cols, @@ -332,14 +331,14 @@ def plot( color_dict = {} - for i in range(self.fdata.dim_codomain): - for j in range(self.fdata.n_samples): + for k in range(self.fdata.dim_codomain): + for l in range(self.fdata.n_samples): if sample_colors is not None: - color_dict["color"] = sample_colors[j] + color_dict["color"] = sample_colors[l] - axes[i].plot_surface( - X, Y, Z[j, ..., i], + axes[k].plot_surface( + X, Y, Z[l, ..., k], **color_dict, **kwargs, ) @@ -388,7 +387,7 @@ def plot( chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not + fig: figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. ax: axis over where the graphs are plotted. If None, see param fig. @@ -428,7 +427,6 @@ def plot( fig (figure object): figure object in which the graphs are plotted. """ - evaluated_points = None if self.grid_points is None: @@ -502,6 +500,8 @@ def set_color_dict( ind: int, color_dict: Dict[str, Any], ) -> None: + """Auxiliary method that, sets the new color of the color + dict thanks to sample colors and index.""" if sample_colors is not None: color_dict["color"] = sample_colors[ind] - \ No newline at end of file + \ No newline at end of file From ea7302619eba4f94e91eaec31cc4d809c42e9812 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 2 Apr 2021 21:41:35 +0200 Subject: [PATCH 187/417] changes --- skfda/exploratory/visualization/_utils.py | 10 ++- .../visualization/representation.py | 84 ++++++++++--------- 2 files changed, 51 insertions(+), 43 deletions(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 7b04ccd14..aa8569a6e 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,7 +1,7 @@ import io import math import re -from typing import List, Optional, Sequence, Tuple, Union +from typing import Optional, Sequence, Tuple, Union import matplotlib.backends.backend_svg import matplotlib.pyplot as plt @@ -12,10 +12,12 @@ non_close_text = '[^>]*?' svg_width_regex = re.compile( - f'()') + f'()' +) svg_width_replacement = r'\g<1>100%\g<2>' svg_height_regex = re.compile( - f'()') + f'()' +) svg_height_replacement = r'\g<1>\g<2>' @@ -211,7 +213,7 @@ def _set_labels( fdata: FData, fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, - patches: Optional[List[matplotlib.patches.Patch]] = None, + patches: Optional[Sequence[matplotlib.patches.Patch]] = None, ) -> None: """Set labels if any. diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 549ff47b0..c14bb5220 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -7,7 +7,17 @@ like depth measures. """ -from typing import Any, Dict, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import ( + Any, + Dict, + List, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) import matplotlib.cm import matplotlib.patches @@ -25,12 +35,13 @@ _set_labels, ) +K = TypeVar('K') T = TypeVar('T', FDataGrid, np.ndarray) def _get_label_colors( n_labels: int, - group_colors: Union[Sequence[Any], None], + group_colors: Union[Sequence[Any], Mapping[K, Any], None], ) -> np.ndarray: """Get the colors of each label.""" if group_colors is not None: @@ -50,8 +61,8 @@ def _get_label_colors( def _get_color_info( fdata: T, group: Union[Sequence[Any], None], - group_names: Union[Sequence[str], Dict[Any, str], None], - group_colors: Union[Sequence[Any], Dict[Any, Any], None], + group_names: Union[Sequence[str], Mapping[K, str], None], + group_colors: Union[Sequence[Any], Mapping[K, Any], None], legend: bool, kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: @@ -116,13 +127,15 @@ class GraphPlot: """ Class used to plot the FDataGrid object graph as hypersurfaces. - A list of variables (probably depths) can be used as an argument to - display the functions wtih a gradient of colors. + When plotting functional data, we can either choose manually a color, + a group of colors for the representations. Besides, we can use a list of + variables (depths, scalar regression targets...) can be used as an + argument to display the functions wtih a gradient of colors. Args: fdata: functional data set that we want to plot. gradient_color_list: list of real values used to determine the color - in which each of the instances will be plotted. The size + in which each of the instances will be plotted. max_grad: maximum value that the gradient_list can take, it will be used to normalize the ``gradient_color_list``. If not declared it will be initialized to the maximum value of @@ -132,17 +145,12 @@ class GraphPlot: declared it will be initialized to the minimum value of gradient_list. - Attributes: - gradient_list: normalization of the values from gradient color_list - that will be used to determine the intensity of the color - each function will have. - """ def __init__( self, fdata: FData, - gradient_color_list: Union[Sequence[float], None] = None, + gradient_color_list: Optional[Sequence[float]] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, ) -> None: @@ -191,8 +199,8 @@ def plot( n_points: Union[int, Tuple[int, int], None] = None, domain_range: Optional[DomainRangeLike] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union[Sequence[Any], Dict[Any, Any], None] = None, - group_names: Union[Sequence[str], Dict[Any, str], None] = None, + group_colors: Union[Sequence[Any], Mapping[K, Any], None] = None, + group_names: Union[Sequence[str], Mapping[K, str], None] = None, colormap_name: str = 'autumn', legend: bool = False, **kwargs: Any, @@ -294,12 +302,12 @@ def plot( eval_points = np.linspace(*self.domain_range[0], self.n_points) mat = self.fdata(eval_points) - color_dict: Dict[str, Any] = {} + color_dict: Mapping[str, Any] = {} for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): - if sample_colors is not None: - color_dict["color"] = sample_colors[j] + + set_color_dict(sample_colors, j, color_dict) axes[i].plot( eval_points, mat[j, ..., i].T, **color_dict, **kwargs, @@ -327,16 +335,15 @@ def plot( X, Y = np.meshgrid(x, y, indexing='ij') - color_dict = {} + color_dict: Mapping[str, Any] = {} for k in range(self.fdata.dim_codomain): - for l in range(self.fdata.n_samples): + for h in range(self.fdata.n_samples): - if sample_colors is not None: - color_dict["color"] = sample_colors[l] + set_color_dict(sample_colors, h, color_dict) axes[k].plot_surface( - X, Y, Z[l, ..., k], + X, Y, Z[h, ..., k], **color_dict, **kwargs, ) @@ -373,8 +380,8 @@ def plot( n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union[Sequence[Any], Dict[Any, Any], None] = None, - group_names: Union[Sequence[str], Dict[Any, str], None] = None, + group_colors: Union[Sequence[Any], Mapping[K, Any], None] = None, + group_names: Union[Sequence[str], Mapping[K, str], None] = None, legend: bool = False, **kwargs: Any, ) -> Figure: @@ -453,12 +460,12 @@ def plot( if self.fdata.dim_domain == 1: - color_dict = {} + color_dict: Mapping[str, Any] = {} for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): - self.set_color_dict(sample_colors, j, color_dict) + set_color_dict(sample_colors, j, color_dict) axes[i].scatter( self.grid_points[0], @@ -473,12 +480,12 @@ def plot( Y = self.fdata.grid_points[1] X, Y = np.meshgrid(X, Y) - color_dict = {} + color_dict: Mapping[str, Any] = {} for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): - self.set_color_dict(sample_colors, j, color_dict) + set_color_dict(sample_colors, j, color_dict) axes[i].scatter( X, @@ -492,14 +499,13 @@ def plot( return fig - def set_color_dict( - self, - sample_colors: Any, - ind: int, - color_dict: Dict[str, Any], - ) -> None: - """Auxiliary method that, sets the new color of the color - dict thanks to sample colors and index.""" - if sample_colors is not None: - color_dict["color"] = sample_colors[ind] +def set_color_dict( + sample_colors: Any, + ind: int, + color_dict: Mapping[str, Any], +) -> None: + """Auxiliary method that, sets the new color of the color + dict thanks to sample colors and index.""" + if sample_colors is not None: + color_dict["color"] = sample_colors[ind] From d646197855fb0c55a960703949b3811855b2f25a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 2 Apr 2021 21:51:37 +0200 Subject: [PATCH 188/417] change --- setup.cfg | 2 ++ .../visualization/representation.py | 23 +++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/setup.cfg b/setup.cfg index 284f60d02..0b7049cc2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,6 +41,8 @@ ignore = WPS115, # Trailing underscores are a scikit-learn convention WPS120, + # Cognitive complexity cannot be avoided at some modules + WPS232, # The number of imported things may be large, especially for typing WPS235, # We like local imports, thanks diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index c14bb5220..2c6cf342c 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -9,7 +9,6 @@ from typing import ( Any, - Dict, List, Mapping, Optional, @@ -128,7 +127,7 @@ class GraphPlot: Class used to plot the FDataGrid object graph as hypersurfaces. When plotting functional data, we can either choose manually a color, - a group of colors for the representations. Besides, we can use a list of + a group of colors for the representations. Besides, we can use a list of variables (depths, scalar regression targets...) can be used as an argument to display the functions wtih a gradient of colors. @@ -482,15 +481,15 @@ def plot( color_dict: Mapping[str, Any] = {} - for i in range(self.fdata.dim_codomain): - for j in range(self.fdata.n_samples): + for k in range(self.fdata.dim_codomain): + for h in range(self.fdata.n_samples): - set_color_dict(sample_colors, j, color_dict) + set_color_dict(sample_colors, h, color_dict) - axes[i].scatter( + axes[k].scatter( X, Y, - evaluated_points[j, ..., i].T, + evaluated_points[h, ..., k].T, **color_dict, **kwargs, ) @@ -499,13 +498,17 @@ def plot( return fig + def set_color_dict( sample_colors: Any, ind: int, color_dict: Mapping[str, Any], ) -> None: - """Auxiliary method that, sets the new color of the color - dict thanks to sample colors and index.""" + """ + Auxiliary method used to update color_dict. + + Sets the new color of the color + dict thanks to sample colors and index. + """ if sample_colors is not None: color_dict["color"] = sample_colors[ind] - From 1b07dabb375a62e10e72d9c8cd6a9af8bf0ce832 Mon Sep 17 00:00:00 2001 From: mellamansanchez <38490771+mellamansanchez@users.noreply.github.com> Date: Fri, 2 Apr 2021 21:53:57 +0200 Subject: [PATCH 189/417] Update skfda/exploratory/visualization/_outliergram.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Ramos Carreño --- skfda/exploratory/visualization/_outliergram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 71a8dd42e..c45f26069 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -3,7 +3,7 @@ This module contains the methods used to plot shapes in order to detect shape outliers in our dataset. In order to do this, we plot the Modified Band Depth and Modified Epigraph Index, that will help us detect -this outliers. The motivation of the method is that it is easy to find +these outliers. The motivation of the method is that it is easy to find magnitude outliers, but there is a necessity of capturing this other type. """ From 952490002602daf83df2cbfbf99744049ba70905 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 2 Apr 2021 22:15:10 +0200 Subject: [PATCH 190/417] test corrected --- .../exploratory/visualization/_outliergram.py | 5 +- tests/test_outliergram.py | 108 ++++++------------ 2 files changed, 37 insertions(+), 76 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 71a8dd42e..ed76a6d34 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -114,7 +114,8 @@ def plot( ) # Set labels of graph - fig.suptitle("Outliergram") + if self.fdata.dataset_name is not None: + fig.suptitle(self.fdata.dataset_name) ax.set_xlabel("MEI") ax.set_ylabel("MBD") ax.set_xlim([0, 1]) @@ -152,4 +153,4 @@ def modified_epigraph_index_list(self) -> np.ndarray: integrand /= (interval_len * self.fdata.n_samples) - return integrand + return integrand.flatten() diff --git a/tests/test_outliergram.py b/tests/test_outliergram.py index ccde8bcbb..d57d5a9ec 100644 --- a/tests/test_outliergram.py +++ b/tests/test_outliergram.py @@ -1,3 +1,8 @@ +"""Outliergram testing module. + +Module containing the test coverage of outliergram module. +""" + import unittest import numpy as np @@ -7,52 +12,35 @@ class TestOutliergram(unittest.TestCase): + """ + Outliergram testing class. + + Class containing the test coverage of outliergram module. + """ + + def test_outliergram(self) -> None: + """ + Outliergram testing method. - def test_outliergram(self): + Method containing the test coverage of outliergram module. + """ fd = fetch_weather()["data"] fd_temperatures = fd.coordinates[0] outliergram = Outliergram( fd_temperatures, ) + # noqa: WPS317 np.testing.assert_allclose( outliergram.mei, np.array( - [ - [0.46272668], - [0.27840835], - [0.36268754], - [0.27908676], - [0.36112198], - [0.30802348], - [0.82969341], - [0.45904762], - [0.53907371], - [0.38799739], - [0.41283757], - [0.20420091], - [0.23564253], - [0.14737117], - [0.14379648], - [0.54035225], - [0.43459883], - [0.6378604], - [0.86964123], - [0.4421396], - [0.58906719], - [0.75561644], - [0.54982387], - [0.46095238], - [0.09969993], - [0.13166341], - [0.18776256], - [0.4831833], - [0.36816699], - [0.72962818], - [0.80313112], - [0.79934768], - [0.90643183], - [0.90139596], - [0.9685062], + [ # noqa: WPS317 + 0.46272668, 0.27840835, 0.36268754, 0.27908676, 0.36112198, + 0.30802348, 0.82969341, 0.45904762, 0.53907371, 0.38799739, + 0.41283757, 0.20420091, 0.23564253, 0.14737117, 0.14379648, + 0.54035225, 0.43459883, 0.6378604, 0.86964123, 0.4421396, + 0.58906719, 0.75561644, 0.54982387, 0.46095238, 0.09969993, + 0.13166341, 0.18776256, 0.4831833, 0.36816699, 0.72962818, + 0.80313112, 0.79934768, 0.90643183, 0.90139596, 0.9685062, ], ), rtol=1e-5, @@ -61,47 +49,19 @@ def test_outliergram(self): np.testing.assert_array_almost_equal( outliergram.mbd, np.array( - [ - 0.40685162, - 0.42460381, - 0.43088139, - 0.35833775, - 0.47847435, - 0.46825985, - 0.29228349, - 0.51299183, - 0.5178558, - 0.49868539, - 0.52408733, - 0.34457312, - 0.36996431, - 0.2973209, - 0.29107555, - 0.53304017, - 0.44185565, - 0.46346341, - 0.23620736, - 0.47652354, - 0.4814397, - 0.38233529, - 0.51173171, - 0.51164882, - 0.21551437, - 0.23084916, - 0.25650589, - 0.46760447, - 0.30787767, - 0.40929051, - 0.31801082, - 0.3234519, - 0.17015617, - 0.17977514, - 0.05769541, + [ # noqa: WPS317 + 0.40685162, 0.42460381, 0.43088139, 0.35833775, 0.47847435, + 0.46825985, 0.29228349, 0.51299183, 0.5178558, 0.49868539, + 0.52408733, 0.34457312, 0.36996431, 0.2973209, 0.29107555, + 0.53304017, 0.44185565, 0.46346341, 0.23620736, 0.47652354, + 0.4814397, 0.38233529, 0.51173171, 0.51164882, 0.21551437, + 0.23084916, 0.25650589, 0.46760447, 0.30787767, 0.40929051, + 0.31801082, 0.3234519, 0.17015617, 0.17977514, 0.05769541, ], ), ) if __name__ == '__main__': - print() + print() # noqa: WPS421 unittest.main() From f867cd3ee403df919f180d075301c5c6ba6593c8 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 3 Apr 2021 17:11:27 +0200 Subject: [PATCH 191/417] solved --- .../visualization/representation.py | 146 ++++++++++++++++-- 1 file changed, 134 insertions(+), 12 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 2c6cf342c..1d3f3af81 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -16,6 +16,7 @@ Tuple, TypeVar, Union, + overload, ) import matplotlib.cm @@ -56,6 +57,128 @@ def _get_label_colors( return group_colors +@overload +def _get_color_info( + fdata: T, + group: None, + group_names: None, + group_colors: None, + legend: bool, + kwargs: Any, +) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: + + patches = None + # In this case, each curve has a different color unless specified + # otherwise + + if 'color' in kwargs: + sample_colors = fdata.n_samples * [kwargs.get("color")] + kwargs.pop('color') + + elif 'c' in kwargs: + sample_colors = fdata.n_samples * [kwargs.get("c")] + kwargs.pop('c') + + else: + sample_colors = None + + return sample_colors, patches + +@overload +def _get_color_info( + fdata: T, + group: Sequence[int], + group_names: Union[Sequence[str], Mapping[K, str], None], + group_colors: Union[Sequence[Any], Mapping[K, Any], None], + legend: bool, + kwargs: Any, +) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: + + patches = None + # In this case, each curve has a label, and all curves with the same + # label should have the same color + + group_unique, group_indexes = np.unique(group, return_inverse=True) + n_labels = len(group_unique) + + if group_colors is not None: + group_colors_array = np.array( + [group_colors[g] for g in group_unique], + ) + else: + prop_cycle = matplotlib.rcParams['axes.prop_cycle'] + cycle_colors = prop_cycle.by_key()['color'] + + group_colors_array = np.take( + cycle_colors, np.arange(n_labels), mode='wrap', + ) + + sample_colors = group_colors_array[group_indexes] + + group_names_array = None + + if group_names is not None: + group_names_array = np.array( + [group_names[g] for g in group_unique], + ) + elif legend is True: + group_names_array = group_unique + + if group_names_array is not None: + patches = [ + matplotlib.patches.Patch(color=c, label=l) + for c, l in zip(group_colors_array, group_names_array) + ] + + return sample_colors, patches + +@overload +def _get_color_info( + fdata: T, + group: Sequence[K], + group_names: Optional[Mapping[K, str]], + group_colors: Optional[Mapping[K, Any]], + legend: bool, + kwargs: Any, +) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: + + patches = None + # In this case, each curve has a label, and all curves with the same + # label should have the same color + + group_unique, group_indexes = np.unique(group, return_inverse=True) + n_labels = len(group_unique) + + if group_colors is not None: + group_colors_array = np.array( + [group_colors[g] for g in group_unique], + ) + else: + prop_cycle = matplotlib.rcParams['axes.prop_cycle'] + cycle_colors = prop_cycle.by_key()['color'] + + group_colors_array = np.take( + cycle_colors, np.arange(n_labels), mode='wrap', + ) + + sample_colors = group_colors_array[group_indexes] + + group_names_array = None + + if group_names is not None: + group_names_array = np.array( + [group_names[g] for g in group_unique], + ) + elif legend is True: + group_names_array = group_unique + + if group_names_array is not None: + patches = [ + matplotlib.patches.Patch(color=c, label=l) + for c, l in zip(group_colors_array, group_names_array) + ] + + return sample_colors, patches def _get_color_info( fdata: T, @@ -266,7 +389,7 @@ def plot( matplotlib.pyplot.plot_surface function. Returns: - fig: figure in which the graphs are plotted. + fig: the figure in which the graphs are plotted. """ fig, axes = _get_figure_and_axes(chart, fig, ax) @@ -292,6 +415,8 @@ def plot( for i in range(self.fdata.n_samples): sample_colors[i] = colormap(self.gradient_list[i]) + color_dict: Mapping[str, Any] = {} + if self.fdata.dim_domain == 1: if n_points is None: @@ -301,8 +426,6 @@ def plot( eval_points = np.linspace(*self.domain_range[0], self.n_points) mat = self.fdata(eval_points) - color_dict: Mapping[str, Any] = {} - for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): @@ -334,16 +457,17 @@ def plot( X, Y = np.meshgrid(x, y, indexing='ij') - color_dict: Mapping[str, Any] = {} - for k in range(self.fdata.dim_codomain): for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) axes[k].plot_surface( - X, Y, Z[h, ..., k], - **color_dict, **kwargs, + X, + Y, + Z[h, ..., k], + **color_dict, + **kwargs, ) _set_labels(self.fdata, fig, axes, patches) @@ -457,9 +581,9 @@ def plot( self.fdata, group, group_names, group_colors, legend, kwargs, ) - if self.fdata.dim_domain == 1: + color_dict: Mapping[str, Any] = {} - color_dict: Mapping[str, Any] = {} + if self.fdata.dim_domain == 1: for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): @@ -479,8 +603,6 @@ def plot( Y = self.fdata.grid_points[1] X, Y = np.meshgrid(X, Y) - color_dict: Mapping[str, Any] = {} - for k in range(self.fdata.dim_codomain): for h in range(self.fdata.n_samples): @@ -506,7 +628,7 @@ def set_color_dict( ) -> None: """ Auxiliary method used to update color_dict. - + Sets the new color of the color dict thanks to sample colors and index. """ From 6d4fa3bc8f8252a88cfe2a806d8c1918e800bc06 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 3 Apr 2021 17:42:14 +0200 Subject: [PATCH 192/417] change --- .../visualization/representation.py | 67 +------------------ 1 file changed, 3 insertions(+), 64 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 1d3f3af81..b2b007819 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -57,6 +57,7 @@ def _get_label_colors( return group_colors + @overload def _get_color_info( fdata: T, @@ -84,6 +85,7 @@ def _get_color_info( return sample_colors, patches + @overload def _get_color_info( fdata: T, @@ -132,6 +134,7 @@ def _get_color_info( return sample_colors, patches + @overload def _get_color_info( fdata: T, @@ -180,70 +183,6 @@ def _get_color_info( return sample_colors, patches -def _get_color_info( - fdata: T, - group: Union[Sequence[Any], None], - group_names: Union[Sequence[str], Mapping[K, str], None], - group_colors: Union[Sequence[Any], Mapping[K, Any], None], - legend: bool, - kwargs: Any, -) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: - - patches = None - - if group is not None: - # In this case, each curve has a label, and all curves with the same - # label should have the same color - - group_unique, group_indexes = np.unique(group, return_inverse=True) - n_labels = len(group_unique) - - if group_colors is not None: - group_colors_array = np.array( - [group_colors[g] for g in group_unique], - ) - else: - prop_cycle = matplotlib.rcParams['axes.prop_cycle'] - cycle_colors = prop_cycle.by_key()['color'] - - group_colors_array = np.take( - cycle_colors, np.arange(n_labels), mode='wrap', - ) - - sample_colors = group_colors_array[group_indexes] - - group_names_array = None - - if group_names is not None: - group_names_array = np.array( - [group_names[g] for g in group_unique], - ) - elif legend is True: - group_names_array = group_unique - - if group_names_array is not None: - patches = [ - matplotlib.patches.Patch(color=c, label=l) - for c, l in zip(group_colors_array, group_names_array) - ] - - else: - # In this case, each curve has a different color unless specified - # otherwise - - if 'color' in kwargs: - sample_colors = fdata.n_samples * [kwargs.get("color")] - kwargs.pop('color') - - elif 'c' in kwargs: - sample_colors = fdata.n_samples * [kwargs.get("c")] - kwargs.pop('c') - - else: - sample_colors = None - - return sample_colors, patches - class GraphPlot: """ From 1a4d0e12441ee5c6db198b03e6387f46721188d1 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 3 Apr 2021 22:40:23 +0200 Subject: [PATCH 193/417] change --- skfda/exploratory/visualization/representation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index b2b007819..f7a08e155 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -57,7 +57,6 @@ def _get_label_colors( return group_colors - @overload def _get_color_info( fdata: T, @@ -85,7 +84,6 @@ def _get_color_info( return sample_colors, patches - @overload def _get_color_info( fdata: T, @@ -134,7 +132,6 @@ def _get_color_info( return sample_colors, patches - @overload def _get_color_info( fdata: T, From 1899508da0e528f4d3d8f9e50b5548863be2d2fb Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sat, 3 Apr 2021 23:42:50 +0200 Subject: [PATCH 194/417] solved --- skfda/exploratory/visualization/representation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index f7a08e155..05edc8df8 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -57,6 +57,7 @@ def _get_label_colors( return group_colors + @overload def _get_color_info( fdata: T, @@ -84,6 +85,7 @@ def _get_color_info( return sample_colors, patches + @overload def _get_color_info( fdata: T, @@ -132,6 +134,7 @@ def _get_color_info( return sample_colors, patches + @overload def _get_color_info( fdata: T, @@ -178,7 +181,7 @@ def _get_color_info( for c, l in zip(group_colors_array, group_names_array) ] - return sample_colors, patches + return sample_colors, patches class GraphPlot: @@ -380,7 +383,7 @@ def plot( n_points_tuple = (n_points, n_points) elif len(n_points) != 2: raise ValueError( - f"n_points should be a number or a tuple of " + "n_points should be a number or a tuple of " f"length 2, and has length {len(n_points)}", ) From 57726fc50bdcbb2c06141b79ab54232de5eb2213 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 4 Apr 2021 20:54:20 +0200 Subject: [PATCH 195/417] chgange --- .../visualization/representation.py | 139 +++++++----------- 1 file changed, 57 insertions(+), 82 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 05edc8df8..e5b677433 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -11,6 +11,7 @@ Any, List, Mapping, + Dict, Optional, Sequence, Tuple, @@ -68,23 +69,6 @@ def _get_color_info( kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: - patches = None - # In this case, each curve has a different color unless specified - # otherwise - - if 'color' in kwargs: - sample_colors = fdata.n_samples * [kwargs.get("color")] - kwargs.pop('color') - - elif 'c' in kwargs: - sample_colors = fdata.n_samples * [kwargs.get("c")] - kwargs.pop('c') - - else: - sample_colors = None - - return sample_colors, patches - @overload def _get_color_info( @@ -96,46 +80,18 @@ def _get_color_info( kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: - patches = None - # In this case, each curve has a label, and all curves with the same - # label should have the same color - - group_unique, group_indexes = np.unique(group, return_inverse=True) - n_labels = len(group_unique) - if group_colors is not None: - group_colors_array = np.array( - [group_colors[g] for g in group_unique], - ) - else: - prop_cycle = matplotlib.rcParams['axes.prop_cycle'] - cycle_colors = prop_cycle.by_key()['color'] - - group_colors_array = np.take( - cycle_colors, np.arange(n_labels), mode='wrap', - ) - - sample_colors = group_colors_array[group_indexes] - - group_names_array = None - - if group_names is not None: - group_names_array = np.array( - [group_names[g] for g in group_unique], - ) - elif legend is True: - group_names_array = group_unique - - if group_names_array is not None: - patches = [ - matplotlib.patches.Patch(color=c, label=l) - for c, l in zip(group_colors_array, group_names_array) - ] - - return sample_colors, patches +@overload +def _get_color_info( + fdata: T, + group: Sequence[K], + group_names: Optional[Mapping[K, str]], + group_colors: Optional[Mapping[K, Any]], + legend: bool, + kwargs: Any, +) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: -@overload def _get_color_info( fdata: T, group: Sequence[K], @@ -146,40 +102,57 @@ def _get_color_info( ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: patches = None - # In this case, each curve has a label, and all curves with the same - # label should have the same color - group_unique, group_indexes = np.unique(group, return_inverse=True) - n_labels = len(group_unique) + if group is not None: + # In this case, each curve has a label, and all curves with the same + # label should have the same color - if group_colors is not None: - group_colors_array = np.array( - [group_colors[g] for g in group_unique], - ) - else: - prop_cycle = matplotlib.rcParams['axes.prop_cycle'] - cycle_colors = prop_cycle.by_key()['color'] + group_unique, group_indexes = np.unique(group, return_inverse=True) + n_labels = len(group_unique) - group_colors_array = np.take( - cycle_colors, np.arange(n_labels), mode='wrap', - ) + if group_colors is not None: + group_colors_array = np.array( + [group_colors[g] for g in group_unique], + ) + else: + prop_cycle = matplotlib.rcParams['axes.prop_cycle'] + cycle_colors = prop_cycle.by_key()['color'] - sample_colors = group_colors_array[group_indexes] + group_colors_array = np.take( + cycle_colors, np.arange(n_labels), mode='wrap', + ) - group_names_array = None + sample_colors = group_colors_array[group_indexes] - if group_names is not None: - group_names_array = np.array( - [group_names[g] for g in group_unique], - ) - elif legend is True: - group_names_array = group_unique + group_names_array = None + + if group_names is not None: + group_names_array = np.array( + [group_names[g] for g in group_unique], + ) + elif legend is True: + group_names_array = group_unique + + if group_names_array is not None: + patches = [ + matplotlib.patches.Patch(color=c, label=l) + for c, l in zip(group_colors_array, group_names_array) + ] + + else: + # In this case, each curve has a different color unless specified + # otherwise - if group_names_array is not None: - patches = [ - matplotlib.patches.Patch(color=c, label=l) - for c, l in zip(group_colors_array, group_names_array) - ] + if 'color' in kwargs: + sample_colors = fdata.n_samples * [kwargs.get("color")] + kwargs.pop('color') + + elif 'c' in kwargs: + sample_colors = fdata.n_samples * [kwargs.get("c")] + kwargs.pop('c') + + else: + sample_colors = None return sample_colors, patches @@ -353,6 +326,8 @@ def plot( sample_colors = [None] * self.fdata.n_samples for i in range(self.fdata.n_samples): sample_colors[i] = colormap(self.gradient_list[i]) + + self.sample_colors = sample_colors color_dict: Mapping[str, Any] = {} @@ -384,7 +359,7 @@ def plot( elif len(n_points) != 2: raise ValueError( "n_points should be a number or a tuple of " - f"length 2, and has length {len(n_points)}", + "length 2, and has length {}.".format(len(n_points)), ) # Axes where will be evaluated From babef4872ac133a6972e2647deb3ccfd67b0f034 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 4 Apr 2021 23:33:48 +0200 Subject: [PATCH 196/417] Add types and fix styling of landmark registration. --- .../registration/_landmark_registration.py | 237 +++++++++++------- 1 file changed, 140 insertions(+), 97 deletions(-) diff --git a/skfda/preprocessing/registration/_landmark_registration.py b/skfda/preprocessing/registration/_landmark_registration.py index 32b269397..5997ca610 100644 --- a/skfda/preprocessing/registration/_landmark_registration.py +++ b/skfda/preprocessing/registration/_landmark_registration.py @@ -2,18 +2,27 @@ This module contains methods to perform the landmark registration. """ +from __future__ import annotations + +from typing import Callable, Optional, Sequence, Union import numpy as np -from ... import FDataGrid +from ...representation import FData, FDataGrid +from ...representation._typing import ArrayLike, GridPointsLike +from ...representation.extrapolation import ExtrapolationLike from ...representation.interpolation import SplineInterpolation -__author__ = "Pablo Marcos Manchón" -__email__ = "pablo.marcosm@estudiante.uam.es" +_FixedLocation = Union[float, Sequence[float]] +_LocationCallable = Callable[[np.ndarray], _FixedLocation] -def landmark_shift_deltas(fd, landmarks, location=None): - r"""Returns the corresponding shifts to align the landmarks of the curves. +def landmark_shift_deltas( + fd: FData, + landmarks: ArrayLike, + location: Union[_FixedLocation, _LocationCallable, None] = None, +) -> np.ndarray: + r"""Return the corresponding shifts to align the landmarks of the curves. Let :math:`t^*` the time where the landmarks of the curves will be aligned, and :math:`t_i` the location of the landmarks for each curve. @@ -24,10 +33,10 @@ def landmark_shift_deltas(fd, landmarks, location=None): :term:`domain` and the :term:`codomain`. Args: - fd (:class:`FData`): Functional data object. - landmarks (array_like): List with the landmarks of the samples. - location (numeric or callable, optional): Defines where - the landmarks will be alligned. If a numer or list is passed the + fd: Functional data object. + landmarks: List with the landmarks of the samples. + location: Defines where + the landmarks will be alligned. If a number or list is passed the landmarks will be alligned to it. In case of a callable is passed the location will be the result of the the call, the function should be accept as an unique parameter a numpy array @@ -37,14 +46,13 @@ def landmark_shift_deltas(fd, landmarks, location=None): max shift. Returns: - :class:`numpy.ndarray`: Array containing the corresponding shifts. + Array containing the corresponding shifts. Raises: ValueError: If the list of landmarks does not match with the number of samples. Examples: - >>> from skfda.datasets import make_multimodal_landmarks >>> from skfda.datasets import make_multimodal_samples >>> from skfda.preprocessing.registration import landmark_shift_deltas @@ -67,32 +75,41 @@ def landmark_shift_deltas(fd, landmarks, location=None): FDataGrid(...) """ + landmarks = np.atleast_1d(landmarks) if len(landmarks) != fd.n_samples: - raise ValueError(f"landmark list ({len(landmarks)}) must have the same" - f" length than the number of samples ({fd.n_samples})") + raise ValueError( + f"landmark list ({len(landmarks)}) must have the same" + f" length than the number of samples ({fd.n_samples})", + ) - landmarks = np.atleast_1d(landmarks) + loc_array: Union[float, Sequence[float], np.ndarray] # Parses location if location is None: - p = (np.max(landmarks, axis=0) + np.min(landmarks, axis=0)) / 2. + loc_array = ( + np.max(landmarks, axis=0) + + np.min(landmarks, axis=0) + ) / 2 elif callable(location): - p = location(landmarks) + loc_array = location(landmarks) else: - try: - p = np.atleast_1d(location) - except: - raise ValueError("Invalid location, must be None, a callable or a " - "number in the domain") + loc_array = location - shifts = landmarks - p + loc_array = np.atleast_1d(loc_array) - return shifts + return landmarks - loc_array -def landmark_shift(fd, landmarks, location=None, *, restrict_domain=False, - extrapolation=None, eval_points=None, **kwargs): +def landmark_shift( + fd: FData, + landmarks: ArrayLike, + location: Union[_FixedLocation, _LocationCallable, None] = None, + *, + restrict_domain: bool = False, + extrapolation: Optional[ExtrapolationLike] = None, + grid_points: Optional[GridPointsLike] = None, +) -> FDataGrid: r"""Perform a shift of the curves to align the landmarks. Let :math:`t^*` the time where the landmarks of the curves will be @@ -105,9 +122,9 @@ def landmark_shift(fd, landmarks, location=None, *, restrict_domain=False, x_i^*(t^*)=x_i(t^* + \delta_i)=x_i(t_i) Args: - fd (:class:`FData`): Functional data object. - landmarks (array_like): List with the landmarks of the samples. - location (numeric or callable, optional): Defines where + fd: Functional data object. + landmarks: List with the landmarks of the samples. + location: Defines where the landmarks will be alligned. If a numeric value is passed the landmarks will be alligned to it. In case of a callable is passed the location will be the result of the the call, the @@ -116,22 +133,20 @@ def landmark_shift(fd, landmarks, location=None, *, restrict_domain=False, By default it will be used as location :math:`\frac{1}{2}(max( \text{landmarks})+ min(\text{landmarks}))` wich minimizes the max shift. - restrict_domain (bool, optional): If True restricts the domain to + restrict_domain: If True restricts the domain to avoid evaluate points outside the domain using extrapolation. Defaults uses extrapolation. - extrapolation (str or Extrapolation, optional): Controls the + extrapolation: Controls the extrapolation mode for elements outside the domain range. By default uses the method defined in fd. See extrapolation to more information. - eval_points (array_like, optional): Set of points where + grid_points: Grid of points where the functions are evaluated in :func:`shift`. - **kwargs: Keyword arguments to be passed to :func:`shift`. Returns: - :class:`FData`: Functional data object with the registered samples. + Functional data object with the registered samples. Examples: - >>> from skfda.datasets import make_multimodal_landmarks >>> from skfda.datasets import make_multimodal_samples >>> from skfda.preprocessing.registration import landmark_shift @@ -148,16 +163,23 @@ def landmark_shift(fd, landmarks, location=None, *, restrict_domain=False, FDataGrid(...) """ - shifts = landmark_shift_deltas(fd, landmarks, location=location) - return fd.shift(shifts, restrict_domain=restrict_domain, - extrapolation=extrapolation, - grid_points=eval_points, **kwargs) - - -def landmark_registration_warping(fd, landmarks, *, location=None, - eval_points=None): + return fd.shift( + shifts, + restrict_domain=restrict_domain, + extrapolation=extrapolation, + grid_points=grid_points, + ) + + +def landmark_registration_warping( + fd: FData, + landmarks: ArrayLike, + *, + location: Optional[ArrayLike] = None, + grid_points: Optional[GridPointsLike] = None, +) -> FDataGrid: """Calculate the transformation used in landmark registration. Let :math:`t_{ij}` the time where the sample :math:`i` has the feature @@ -169,16 +191,17 @@ def landmark_registration_warping(fd, landmarks, *, location=None, See [RS05-7-3-1]_ for a detailed explanation. Args: - fd (:class:`FData`): Functional data object. - landmarks (array_like): List containing landmarks for each samples. - location (array_like, optional): Defines where + fd: Functional data object. + landmarks: List containing landmarks for each samples. + location: Defines where the landmarks will be alligned. By default it will be used as location the mean of the landmarks. - eval_points (array_like, optional): Set of points where + grid_points: Grid of points where the functions are evaluated to obtain a discrete representation of the object. + Returns: - :class:`FDataGrid`: FDataGrid with the warpings function needed to + FDataGrid with the warpings function needed to register the functional data object. Raises: @@ -187,12 +210,11 @@ def landmark_registration_warping(fd, landmarks, *, location=None, the number of samples. References: - - .. [RS05-7-3-1] Ramsay, J., Silverman, B. W. (2005). Feature or landmark - registration. In *Functional Data Analysis* (pp. 132-136). Springer. + .. [RS05-7-3-1] Ramsay, J., Silverman, B. W. (2005). Feature or + landmark registration. In *Functional Data Analysis* (pp. 132-136). + Springer. Examples: - >>> from skfda.datasets import make_multimodal_landmarks >>> from skfda.datasets import make_multimodal_samples >>> from skfda.preprocessing.registration import ( @@ -216,89 +238,107 @@ def landmark_registration_warping(fd, landmarks, *, location=None, >>> fd.compose(warping) FDataGrid(...) + """ + landmarks = np.asarray(landmarks) if fd.dim_domain > 1: - raise NotImplementedError("Method only implemented for objects with" - "domain dimension up to 1.") + raise NotImplementedError( + "Method only implemented for objects with " + "domain dimension up to 1.", + ) if len(landmarks) != fd.n_samples: - raise ValueError("The number of list of landmarks should be equal to " - "the number of samples") + raise ValueError( + "The number of list of landmarks should be equal to " + "the number of samples", + ) + + landmarks = landmarks.reshape((fd.n_samples, -1)) - landmarks = np.asarray(landmarks).reshape((fd.n_samples, -1)) + location = ( + np.mean(landmarks, axis=0) + if location is None + else np.asarray(location) + ) + + assert isinstance(location, np.ndarray) n_landmarks = landmarks.shape[-1] data_matrix = np.empty((fd.n_samples, n_landmarks + 2)) - data_matrix[:, 0] = fd.domain_range[0][0] data_matrix[:, -1] = fd.domain_range[0][1] - data_matrix[:, 1:-1] = landmarks - if location is None: - grid_points = np.mean(data_matrix, axis=0) - - elif n_landmarks != len(location): + if n_landmarks == len(location): + if grid_points is None: + grid_points = np.empty(n_landmarks + 2) + grid_points[0] = fd.domain_range[0][0] + grid_points[-1] = fd.domain_range[0][1] + grid_points[1:-1] = location - raise ValueError(f"Number of landmark locations should be equal than " - f"the number of landmarks ({len(location)}) != " - f"({n_landmarks})") else: - grid_points = np.empty(n_landmarks + 2) - grid_points[0] = fd.domain_range[0][0] - grid_points[-1] = fd.domain_range[0][1] - grid_points[1:-1] = location + raise ValueError( + f"Number of landmark locations should be equal than " + f"the number of landmarks ({len(location)}) != ({n_landmarks})", + ) interpolation = SplineInterpolation(interpolation_order=3, monotone=True) - warping = FDataGrid(data_matrix=data_matrix, - grid_points=grid_points, - interpolation=interpolation, - extrapolation='bounds') + warping = FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + interpolation=interpolation, + extrapolation='bounds', + ) try: warping_points = fd.grid_points except AttributeError: - warping_points = [np.linspace(*domain, 201) - for domain in fd.domain_range] + warping_points = None return warping.to_grid(warping_points) -def landmark_registration(fd, landmarks, *, location=None, eval_points=None): - """Perform landmark registration of the curves. +def landmark_registration( + fd: FData, + landmarks: ArrayLike, + *, + location: Optional[ArrayLike] = None, + grid_points: Optional[GridPointsLike] = None, +) -> FDataGrid: + """ + Perform landmark registration of the curves. - Let :math:`t_{ij}` the time where the sample :math:`i` has the feature - :math:`j` and :math:`t^*_j` the new time for the feature. - The registered samples will have their features aligned, i.e., - :math:`x^*_i(t^*_j)=x_i(t_{ij})`. + Let :math:`t_{ij}` the time where the sample :math:`i` has the feature + :math:`j` and :math:`t^*_j` the new time for the feature. + The registered samples will have their features aligned, i.e., + :math:`x^*_i(t^*_j)=x_i(t_{ij})`. - See [RS05-7-3]_ for a detailed explanation. + See [RS05-7-3]_ for a detailed explanation. Args: - fd (:class:`FData`): Functional data object. - landmarks (array_like): List containing landmarks for each samples. - location (array_like, optional): Defines where + fd: Functional data object. + landmarks: List containing landmarks for each samples. + location: Defines where the landmarks will be alligned. By default it will be used as location the mean of the landmarks. - eval_points (array_like, optional): Set of points where + grid_points: Grid of points where the functions are evaluated to obtain a discrete representation of the object. In case of objects with multidimensional :term:`domain` a list axis with points of evaluation for each dimension. Returns: - :class:`FData`: FData with the functional data object registered. + FDataGrid with the functional data object registered. References: - - .. [RS05-7-3] Ramsay, J., Silverman, B. W. (2005). Feature or landmark - registration. In *Functional Data Analysis* (pp. 132-136). Springer. + .. [RS05-7-3] Ramsay, J., Silverman, B. W. (2005). Feature or landmark + registration. In *Functional Data Analysis* (pp. 132-136). + Springer. Examples: - >>> from skfda.datasets import make_multimodal_landmarks >>> from skfda.datasets import make_multimodal_samples >>> from skfda.preprocessing.registration import landmark_registration @@ -321,11 +361,14 @@ def landmark_registration(fd, landmarks, *, location=None, eval_points=None): >>> fd = fd.to_basis(BSpline(n_basis=12)) >>> landmark_registration(fd, landmarks) - FDataBasis(...) + FDataGrid(...) """ - - warping = landmark_registration_warping(fd, landmarks, location=location, - eval_points=eval_points) - - return fd.compose(warping) + warping = landmark_registration_warping( + fd, + landmarks, + location=location, + grid_points=grid_points, + ) + + return fd.to_grid(grid_points).compose(warping) From 61cf2fbcbc270c84ade972387ba5d5001f0a4b91 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 17:39:18 +0200 Subject: [PATCH 197/417] protocol --- .../visualization/representation.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index e5b677433..6758b7db0 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -12,6 +12,8 @@ List, Mapping, Dict, + Generic, + Protocol, Optional, Sequence, Tuple, @@ -37,9 +39,14 @@ ) K = TypeVar('K') +V = TypeVar('V') T = TypeVar('T', FDataGrid, np.ndarray) +class Indexable(Protocol[K, V]): + def __getitem__(self, __key: K) -> V: + pass + def _get_label_colors( n_labels: int, group_colors: Union[Sequence[Any], Mapping[K, Any], None], @@ -58,7 +65,7 @@ def _get_label_colors( return group_colors - +""" @overload def _get_color_info( fdata: T, @@ -80,7 +87,6 @@ def _get_color_info( kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: - @overload def _get_color_info( fdata: T, @@ -89,14 +95,14 @@ def _get_color_info( group_colors: Optional[Mapping[K, Any]], legend: bool, kwargs: Any, -) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: +) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]:""" def _get_color_info( fdata: T, - group: Sequence[K], - group_names: Optional[Mapping[K, str]], - group_colors: Optional[Mapping[K, Any]], + group: Optional[Sequence[K]], + group_names: Optional[Indexable[K, str]], + group_colors: Optional[Indexable[K, Any]], legend: bool, kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: From 8783656fc81700f7163d9e0dcf8ddf10e96c16d6 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 18:34:39 +0200 Subject: [PATCH 198/417] change --- skfda/exploratory/visualization/representation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 6758b7db0..bd3e2a847 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -11,8 +11,6 @@ Any, List, Mapping, - Dict, - Generic, Protocol, Optional, Sequence, From b8c898f625e4e8d2cb3089fff2ca1e5ab0efdae2 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 18:37:47 +0200 Subject: [PATCH 199/417] contravariant --- .../visualization/representation.py | 38 +------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index bd3e2a847..192136e55 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -17,7 +17,6 @@ Tuple, TypeVar, Union, - overload, ) import matplotlib.cm @@ -36,8 +35,8 @@ _set_labels, ) -K = TypeVar('K') -V = TypeVar('V') +K = TypeVar('K', contravariant=True) +V = TypeVar('V', covariant=True) T = TypeVar('T', FDataGrid, np.ndarray) @@ -63,39 +62,6 @@ def _get_label_colors( return group_colors -""" -@overload -def _get_color_info( - fdata: T, - group: None, - group_names: None, - group_colors: None, - legend: bool, - kwargs: Any, -) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: - - -@overload -def _get_color_info( - fdata: T, - group: Sequence[int], - group_names: Union[Sequence[str], Mapping[K, str], None], - group_colors: Union[Sequence[Any], Mapping[K, Any], None], - legend: bool, - kwargs: Any, -) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: - -@overload -def _get_color_info( - fdata: T, - group: Sequence[K], - group_names: Optional[Mapping[K, str]], - group_colors: Optional[Mapping[K, Any]], - legend: bool, - kwargs: Any, -) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]:""" - - def _get_color_info( fdata: T, group: Optional[Sequence[K]], From d42a7e8f28ab2d13e19187afc94978863c6852fc Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 18:39:42 +0200 Subject: [PATCH 200/417] change --- skfda/exploratory/visualization/representation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 192136e55..86ce1730e 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -44,6 +44,7 @@ class Indexable(Protocol[K, V]): def __getitem__(self, __key: K) -> V: pass + def _get_label_colors( n_labels: int, group_colors: Union[Sequence[Any], Mapping[K, Any], None], From 735b68835ff74091bf025845620286239181c054 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 18:40:44 +0200 Subject: [PATCH 201/417] tpye --- skfda/exploratory/visualization/representation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 86ce1730e..ed5689ce6 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -41,6 +41,9 @@ class Indexable(Protocol[K, V]): + """ + Class Indexable used to type _get_color_info. + """ def __getitem__(self, __key: K) -> V: pass From 641e2ca426ce171bff8f1a8df280f6464f453e78 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 18:44:32 +0200 Subject: [PATCH 202/417] change --- skfda/exploratory/visualization/representation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index ed5689ce6..c2c637905 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -11,8 +11,8 @@ Any, List, Mapping, - Protocol, Optional, + Protocol, Sequence, Tuple, TypeVar, @@ -300,7 +300,7 @@ def plot( sample_colors = [None] * self.fdata.n_samples for i in range(self.fdata.n_samples): sample_colors[i] = colormap(self.gradient_list[i]) - + self.sample_colors = sample_colors color_dict: Mapping[str, Any] = {} @@ -333,7 +333,7 @@ def plot( elif len(n_points) != 2: raise ValueError( "n_points should be a number or a tuple of " - "length 2, and has length {}.".format(len(n_points)), + "length 2, and has length {0}.".format(len(n_points)), ) # Axes where will be evaluated From ca0f81cd4bc2b41a7b51cbe0258939d5a52001f5 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 18:48:31 +0200 Subject: [PATCH 203/417] solved protocol import --- skfda/exploratory/visualization/representation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index c2c637905..b0471dae9 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -12,12 +12,12 @@ List, Mapping, Optional, - Protocol, Sequence, Tuple, TypeVar, Union, ) +from typing_extensions import Protocol import matplotlib.cm import matplotlib.patches @@ -298,8 +298,8 @@ def plot( colormap = colormap.reversed() sample_colors = [None] * self.fdata.n_samples - for i in range(self.fdata.n_samples): - sample_colors[i] = colormap(self.gradient_list[i]) + for m in range(self.fdata.n_samples): + sample_colors[m] = colormap(self.gradient_list[m]) self.sample_colors = sample_colors From 3201891ae2439a7998704af3e73275181c6f8a6f Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 18:49:39 +0200 Subject: [PATCH 204/417] imports --- skfda/exploratory/visualization/representation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index b0471dae9..54ac3ef77 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -17,13 +17,13 @@ TypeVar, Union, ) -from typing_extensions import Protocol import matplotlib.cm import matplotlib.patches import numpy as np from matplotlib.axes import Axes from matplotlib.figure import Figure +from typing_extensions import Protocol from ... import FDataGrid from ..._utils import _to_domain_range, constants From ad178abb376baff7d41ab134ed3b75e394f340f0 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 18:55:02 +0200 Subject: [PATCH 205/417] changed --- skfda/exploratory/visualization/representation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 54ac3ef77..23918419e 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -41,9 +41,8 @@ class Indexable(Protocol[K, V]): - """ - Class Indexable used to type _get_color_info. - """ + """Class Indexable used to type _get_color_info.""" + def __getitem__(self, __key: K) -> V: pass @@ -66,6 +65,7 @@ def _get_label_colors( return group_colors + def _get_color_info( fdata: T, group: Optional[Sequence[K]], From 259683eafda505ece79a841ada0fc5bd5d76fceb Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 5 Apr 2021 19:57:46 +0200 Subject: [PATCH 206/417] --- skfda/exploratory/visualization/_utils.py | 6 ++++++ skfda/exploratory/visualization/representation.py | 9 +++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index aa8569a6e..77f590c0c 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -20,6 +20,12 @@ ) svg_height_replacement = r'\g<1>\g<2>' +ColorLike = Union[ + Tuple[float, float, float], + Tuple[float, float, float, float], + str, + Sequence[float], +] def _create_figure(): """Create figure using the default backend.""" diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 23918419e..025431640 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -33,6 +33,7 @@ _get_figure_and_axes, _set_figure_layout_for_fdata, _set_labels, + ColorLike, ) K = TypeVar('K', contravariant=True) @@ -49,7 +50,7 @@ def __getitem__(self, __key: K) -> V: def _get_label_colors( n_labels: int, - group_colors: Union[Sequence[Any], Mapping[K, Any], None], + group_colors: Union[Sequence[ColorLike], Mapping[K, ColorLike], None], ) -> np.ndarray: """Get the colors of each label.""" if group_colors is not None: @@ -70,7 +71,7 @@ def _get_color_info( fdata: T, group: Optional[Sequence[K]], group_names: Optional[Indexable[K, str]], - group_colors: Optional[Indexable[K, Any]], + group_colors: Optional[Indexable[K, ColorLike]], legend: bool, kwargs: Any, ) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: @@ -207,7 +208,7 @@ def plot( n_points: Union[int, Tuple[int, int], None] = None, domain_range: Optional[DomainRangeLike] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union[Sequence[Any], Mapping[K, Any], None] = None, + group_colors: Union[Sequence[ColorLike], Mapping[K, ColorLike], None] = None, group_names: Union[Sequence[str], Mapping[K, str], None] = None, colormap_name: str = 'autumn', legend: bool = False, @@ -391,7 +392,7 @@ def plot( n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union[Sequence[Any], Mapping[K, Any], None] = None, + group_colors: Union[Sequence[ColorLike], Mapping[K, ColorLike], None] = None, group_names: Union[Sequence[str], Mapping[K, str], None] = None, legend: bool = False, **kwargs: Any, From 0f0997a1981e5545a0475f0318872686432c83f8 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 6 Apr 2021 14:38:32 +0200 Subject: [PATCH 207/417] Improve style of registration examples. --- examples/plot_landmark_registration.py | 37 ++++++++++++++-------- examples/plot_landmark_shift.py | 43 +++++++++++++++----------- examples/plot_shift_registration.py | 9 ++++-- setup.cfg | 3 ++ 4 files changed, 58 insertions(+), 34 deletions(-) diff --git a/examples/plot_landmark_registration.py b/examples/plot_landmark_registration.py index 2555d1922..dbcfc7d51 100644 --- a/examples/plot_landmark_registration.py +++ b/examples/plot_landmark_registration.py @@ -10,8 +10,8 @@ import matplotlib.pyplot as plt import numpy as np -import skfda +import skfda ############################################################################## # The simplest curve alignment procedure is landmark registration. This @@ -28,9 +28,14 @@ # We will use a dataset synthetically generated by # :func:`~skfda.datasets.make_multimodal_samples`, which in this case will # be used to generate bimodal curves. -# -fd = skfda.datasets.make_multimodal_samples(n_samples=4, n_modes=2, std=.002, - mode_std=.005, random_state=1) + +fd = skfda.datasets.make_multimodal_samples( + n_samples=4, + n_modes=2, + std=0.002, + mode_std=0.005, + random_state=1, +) fd.plot() ############################################################################## @@ -45,11 +50,13 @@ # # In general it will be necessary to use numerical or other methods to # determine the location of the landmarks. -# -landmarks = skfda.datasets.make_multimodal_landmarks(n_samples=4, n_modes=2, - std=.002, random_state=1 - ).squeeze() +landmarks = skfda.datasets.make_multimodal_landmarks( + n_samples=4, + n_modes=2, + std=0.002, + random_state=1, +).squeeze() print(landmarks) @@ -71,10 +78,12 @@ # the example of interpolation for more details). # # In this case we will place the landmarks at -0.5 and 0.5. -# warping = skfda.preprocessing.registration.landmark_registration_warping( - fd, landmarks, location=[-0.5, 0.5]) + fd, + landmarks, + location=[-0.5, 0.5], +) # Plots warping fig = warping.plot() @@ -107,7 +116,9 @@ # fd_registered = skfda.preprocessing.registration.landmark_registration( - fd, landmarks) + fd, + landmarks, +) fd_registered.plot() plt.scatter(np.mean(landmarks, axis=0), [1, 1]) @@ -119,5 +130,5 @@ # .. [RaSi2005] Ramsay, J., Silverman, B. W. (2005). Functional Data Analysis. # Springer. # -# .. [RaHoGr2009] Ramsay, J., Hooker, G. & Graves S. (2009). Functional Data Analysis -# with R and Matlab. Springer. +# .. [RaHoGr2009] Ramsay, J., Hooker, G. & Graves S. (2009). Functional Data +# Analysis with R and Matlab. Springer. diff --git a/examples/plot_landmark_shift.py b/examples/plot_landmark_shift.py index bc1d47f84..ec1722581 100644 --- a/examples/plot_landmark_shift.py +++ b/examples/plot_landmark_shift.py @@ -14,8 +14,8 @@ import matplotlib.pyplot as plt import numpy as np -import skfda +import skfda ############################################################################## # We will use an example dataset synthetically generated by @@ -23,7 +23,7 @@ # used to generate gaussian-like samples with a mode near to 0. # Each sample will be shifted to align their modes to a reference point using # the function :func:`~skfda.preprocessing.registration.landmark_shift`. -# + fd = skfda.datasets.make_multimodal_samples(random_state=1) fd.extrapolation = 'bounds' #  See extrapolation for a detailed explanation. @@ -46,7 +46,6 @@ # # In general it will be necessary to use numerical or other methods to # determine the location of the landmarks. -# landmarks = skfda.datasets.make_multimodal_landmarks(random_state=1).squeeze() @@ -57,17 +56,18 @@ ############################################################################## # Location of the landmarks: -# print(landmarks) ############################################################################## # The following figure shows the result of shifting the curves to align their # landmarks at 0. -# fd_registered = skfda.preprocessing.registration.landmark_shift( - fd, landmarks, location=0) + fd, + landmarks, + location=0, +) fig = fd_registered.plot() fig.axes[0].scatter(0, 1) @@ -79,15 +79,19 @@ # # If the location of the new reference point is not specified it is choosen # the point that minimizes the maximum amount of shift. -# # Curves aligned restricting the domain fd_restricted = skfda.preprocessing.registration.landmark_shift( - fd, landmarks, restrict_domain=True) + fd, + landmarks, + restrict_domain=True, +) # Curves aligned to default point without restrict domain fd_extrapolated = skfda.preprocessing.registration.landmark_shift( - fd, landmarks) + fd, + landmarks, +) fig = fd_extrapolated.plot(linestyle='dashed', label='Extrapolated samples') @@ -98,26 +102,30 @@ # without limitation of the domain or image dimension. As an example we are # going to create a datset with surfaces, in a similar way to the previous # case. -# -fd = skfda.datasets.make_multimodal_samples(n_samples=3, points_per_dim=30, - dim_domain=2, random_state=1) +fd = skfda.datasets.make_multimodal_samples( + n_samples=3, + points_per_dim=30, + dim_domain=2, + random_state=1, +) fd.plot() ############################################################################## # In this case the landmarks will be defined by tuples with 2 coordinates. -# landmarks = skfda.datasets.make_multimodal_landmarks( - n_samples=3, dim_domain=2, random_state=1).squeeze() + n_samples=3, + dim_domain=2, + random_state=1, +).squeeze() print(landmarks) ############################################################################## # As in the previous case, we can align the curves to a specific point, # or by default will be chosen the point that minimizes the maximum amount # of displacement. -# fd_registered = skfda.preprocessing.registration.landmark_shift(fd, landmarks) @@ -126,6 +134,5 @@ plt.show() ############################################################################### -# .. [RaSi2005-2] Ramsay, J., Silverman, B. W. (2005). Functional Data Analysis. -# Springer. -# +# .. [RaSi2005-2] Ramsay, J., Silverman, B. W. (2005). +# Functional Data Analysis. Springer. diff --git a/examples/plot_shift_registration.py b/examples/plot_shift_registration.py index e4838186f..4b6c128e6 100644 --- a/examples/plot_shift_registration.py +++ b/examples/plot_shift_registration.py @@ -63,8 +63,12 @@ # however, this effect is mitigated after the registration. # sinusoidal process without variation and noise -sine = make_sinusoidal_process(n_samples=1, phase_std=0, - amplitude_std=0, error_std=0) +sine = make_sinusoidal_process( + n_samples=1, + phase_std=0, + amplitude_std=0, + error_std=0, +) fig = fd_basis.mean().plot() fd_registered.mean().plot(fig) @@ -76,7 +80,6 @@ # The values of the shifts :math:`\delta_i`, stored in the attribute `deltas_` # may be relevant for further analysis, as they may be considered as nuisance # or random effects. -# print(shift_registration.deltas_) diff --git a/setup.cfg b/setup.cfg index 15dd3e8c9..5ea9e818b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -93,6 +93,9 @@ per-file-ignores = # Tests benefit from magic numbers and fixtures test_*.py: WPS432, WPS442 + + # Examples are allowed to have "commented code", call print and have magic numbers + plot_*.py: E800, WPS421, WPS432 rst-directives = # These are sorted alphabetically - but that does not matter From 6c13ff719fbfcab69a37c4755a223c53cedb6c52 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 7 Apr 2021 16:14:49 +0200 Subject: [PATCH 208/417] solved last issues --- skfda/exploratory/visualization/representation.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 025431640..3691f7128 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -30,10 +30,10 @@ from ...representation._functional_data import FData from ...representation._typing import DomainRangeLike from ._utils import ( + ColorLike, _get_figure_and_axes, _set_figure_layout_for_fdata, _set_labels, - ColorLike, ) K = TypeVar('K', contravariant=True) @@ -208,7 +208,11 @@ def plot( n_points: Union[int, Tuple[int, int], None] = None, domain_range: Optional[DomainRangeLike] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union[Sequence[ColorLike], Mapping[K, ColorLike], None] = None, + group_colors: Union[ + Sequence[ColorLike], + Mapping[K, ColorLike], + None, + ] = None, group_names: Union[Sequence[str], Mapping[K, str], None] = None, colormap_name: str = 'autumn', legend: bool = False, @@ -392,7 +396,11 @@ def plot( n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Union[Sequence[Any], None] = None, - group_colors: Union[Sequence[ColorLike], Mapping[K, ColorLike], None] = None, + group_colors: Union[ + Sequence[ColorLike], + Mapping[K, ColorLike], + None, + ] = None, group_names: Union[Sequence[str], Mapping[K, str], None] = None, legend: bool = False, **kwargs: Any, From 0792cb6591bd6058c6f0eaa37c7cd2eebb55cb71 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 7 Apr 2021 16:15:45 +0200 Subject: [PATCH 209/417] issues From 60853b393b957a0fd36200323c93c5f6e51ecfa0 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 7 Apr 2021 21:31:02 +0200 Subject: [PATCH 210/417] Add typing to elastic/SRVF functions. --- setup.cfg | 3 + skfda/preprocessing/registration/elastic.py | 826 +++++++++++--------- tests/test_elastic.py | 205 ++--- 3 files changed, 580 insertions(+), 454 deletions(-) diff --git a/setup.cfg b/setup.cfg index 5ea9e818b..83b84412d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -150,6 +150,9 @@ implicit_reexport = True [mypy-dcor.*] ignore_missing_imports = True +[mypy-fdasrsf.*] +ignore_missing_imports = True + [mypy-findiff.*] ignore_missing_imports = True diff --git a/skfda/preprocessing/registration/elastic.py b/skfda/preprocessing/registration/elastic.py index e10527ac2..6d189d29a 100644 --- a/skfda/preprocessing/registration/elastic.py +++ b/skfda/preprocessing/registration/elastic.py @@ -1,4 +1,8 @@ +from __future__ import annotations + +from typing import Callable, Optional, Union + import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted @@ -8,21 +12,21 @@ from ... import FDataGrid from ..._utils import check_is_univariate +from ...representation._typing import ArrayLike from ...representation.interpolation import SplineInterpolation from ._warping import _normalize_scale, invert_warping from .base import RegistrationTransformer -__author__ = "Pablo Marcos Manchón" -__email__ = "pablo.marcosm@estudiante.uam.es" - ############################################################################### # Based on the original implementation of J. Derek Tucker in # # *fdasrsf_python* (https://github.com/jdtuck/fdasrsf_python) # # and *ElasticFDA.jl* (https://github.com/jdtuck/ElasticFDA.jl). # ############################################################################### +_MeanType = Callable[[FDataGrid], FDataGrid] + -class SRSF(BaseEstimator, TransformerMixin): +class SRSF(BaseEstimator, TransformerMixin): # type: ignore r"""Square-Root Slope Function (SRSF) transform. Let :math:`f : [a,b] \rightarrow \mathbb{R}` be an absolutely continuous @@ -47,11 +51,20 @@ class SRSF(BaseEstimator, TransformerMixin): which is dropped due to derivation. If it is applied the inverse transformation without fit the estimator it is assumed that :math:`f(a)=0`. + Args: + eval_points: (array_like, optional): Set of points where the + functions are evaluated, by default uses the sample points of + the :class:`FDataGrid ` transformed. + initial_value (float, optional): Initial value to apply in the + inverse transformation. If `None` there are stored the initial + values of the functions during the transformation to apply + during the inverse transformation. Defaults None. + Attributes: - eval_points (array_like, optional): Set of points where the - functions are evaluated, by default uses the sample points of the + eval_points: Set of points where the + functions are evaluated, by default uses the grid points of the fdatagrid. - initial_value (float, optional): Initial value to apply in the + initial_value: Initial value to apply in the inverse transformation. If `None` there are stored the initial values of the functions during the transformation to apply during the inverse transformation. Defaults None. @@ -67,7 +80,6 @@ class SRSF(BaseEstimator, TransformerMixin): Representation* (pp. 91-93). Springer. Examples: - Create a toy dataset and apply the transformation and its inverse. >>> from skfda.datasets import make_sinusoidal_process @@ -93,28 +105,21 @@ class SRSF(BaseEstimator, TransformerMixin): """ - def __init__(self, output_points=None, initial_value=None): - """Initializes the transformer. - - Args: - eval_points: (array_like, optional): Set of points where the - functions are evaluated, by default uses the sample points of - the :class:`FDataGrid ` transformed. - initial_value (float, optional): Initial value to apply in the - inverse transformation. If `None` there are stored the initial - values of the functions during the transformation to apply - during the inverse transformation. Defaults None. - - """ + def __init__( + self, + output_points: Optional[ArrayLike] = None, + initial_value: Optional[float] = None, + ) -> None: self.output_points = output_points self.initial_value = initial_value - def fit(self, X=None, y=None): - """This transformer do not need to be fitted. + def fit(self, X: FDataGrid, y: None = None) -> SRSF: + """ + Return self. This transformer does not need to be fitted. Args: - X (Ignored): Present for API conventions. - y (Ignored): Present for API conventions. + X: Present for API conventions. + y: Present for API conventions. Returns: (Estimator): self @@ -122,22 +127,22 @@ def fit(self, X=None, y=None): """ return self - def transform(self, X: FDataGrid, y=None): - r"""Computes the square-root slope function (SRSF) transform. + def transform(self, X: FDataGrid, y: None = None) -> FDataGrid: + r"""Compute the square-root slope function (SRSF) transform. - Let :math:`f : [a,b] \rightarrow \mathbb{R}` be an absolutely continuous - function, the SRSF transform is defined as [SK16-4-6-1]_: + Let :math:`f : [a,b] \rightarrow \mathbb{R}` be an absolutely + continuous function, the SRSF transform is defined as [SK16-4-6-1]_: .. math:: SRSF(f(t)) = sgn(f(t)) \sqrt{\dot f(t)|} = q(t) Args: - X (:class:`FDataGrid`): Functions to be transformed. - y (Ignored): Present for API conventions. + X: Functions to be transformed. + y: Present for API conventions. Returns: - :class:`FDataGrid`: SRSF functions. + SRSF functions. Raises: ValueError: If functions are not univariate. @@ -153,7 +158,7 @@ def transform(self, X: FDataGrid, y=None): if self.output_points is None: output_points = X.grid_points[0] else: - output_points = self.output_points + output_points = np.asarray(self.output_points) g = X.derivative() @@ -173,8 +178,8 @@ def transform(self, X: FDataGrid, y=None): return X.copy(data_matrix=data_matrix, grid_points=output_points) - def inverse_transform(self, X: FDataGrid, y=None): - r"""Computes the inverse SRSF transform. + def inverse_transform(self, X: FDataGrid, y: None = None) -> FDataGrid: + r"""Compute the inverse SRSF transform. Given the srsf and the initial value the original function can be obtained as [SK16-4-6-2]_ : @@ -188,11 +193,11 @@ def inverse_transform(self, X: FDataGrid, y=None): estimator it is assumed that :math:`f(a)=0`. Args: - X (:class:`FDataGrid`): SRSF to be transformed. - y (Ignored): Present for API conventions. + X: SRSF to be transformed. + y: Present for API conventions. Returns: - :class:`FDataGrid`: Functions in the original space. + Functions in the original space. Raises: ValueError: If functions are multidimensional. @@ -205,26 +210,34 @@ def inverse_transform(self, X: FDataGrid, y=None): """ check_is_univariate(X) - if self.initial_value is None and not hasattr(self, 'initial_value_'): - raise AttributeError("When initial_value=None is expected a " - "previous transformation of the data to " - "store the initial values to apply in the " - "inverse transformation. Also it is possible " - "to fix these values setting the attribute" - "initial value without a previous " - "transformation.") + stored_initial_value = getattr(self, 'initial_value_', None) + + if self.initial_value is None and stored_initial_value is None: + raise AttributeError( + "When initial_value=None is expected a " + "previous transformation of the data to " + "store the initial values to apply in the " + "inverse transformation. Also it is possible " + "to fix these values setting the attribute" + "initial value without a previous " + "transformation.", + ) if self.output_points is None: output_points = X.grid_points[0] else: - output_points = self.output_points + output_points = np.asarray(self.output_points) data_matrix = X(output_points) data_matrix *= np.abs(data_matrix) - f_data_matrix = scipy.integrate.cumtrapz(data_matrix, x=output_points, - axis=1, initial=0) + f_data_matrix = scipy.integrate.cumtrapz( + data_matrix, + x=output_points, + axis=1, + initial=0, + ) # If the transformer was fitted, sum the initial value if self.initial_value is None: @@ -235,31 +248,343 @@ def inverse_transform(self, X: FDataGrid, y=None): return X.copy(data_matrix=f_data_matrix, grid_points=output_points) -def _elastic_alignment_array(template_data, q_data, - eval_points, penalty, grid_dim): - r"""Wrapper between the cython interface and python. +def _elastic_alignment_array( + template_data: np.ndarray, + q_data: np.ndarray, + eval_points: np.ndarray, + penalty: float, + grid_dim: int, +) -> np.ndarray: + """ + Wrap the :func:`optimum_reparam` function of fdasrsf. Selects the corresponding routine depending on the dimensions of the arrays. Args: - template_data (numpy.ndarray): Array with the srsf of the template. - q_data (numpy.ndarray): Array with the srsf of the curves - to be aligned. - eval_points (numpy.ndarray): Discretisation points of the functions. - penalty (float): Penalisation term. - grid_dim (int): Dimension of the grid used in the alignment algorithm. - - Return: - (numpy.ndarray): Array with the same shape than q_data with the srsf of + template_data: Array with the srsf of the template. + q_data: Array with the srsf of the curves + to be aligned. + eval_points: Discretisation points of the functions. + penalty: Penalisation term. + grid_dim: Dimension of the grid used in the alignment algorithm. + + Returns: + Array with the same shape than q_data with the srsf of the functions aligned to the template(s). + """ + return optimum_reparam( + np.ascontiguousarray(template_data.T), + np.ascontiguousarray(eval_points), + np.ascontiguousarray(q_data.T), + method="DP2", + lam=penalty, grid_dim=grid_dim, + ).T + + +def warping_mean( + warping: FDataGrid, + *, + max_iter: int = 100, + tol: float = 1e-6, + step_size: float = 0.3, +) -> FDataGrid: + r"""Compute the karcher mean of a set of warpings. + + Let :math:`\gamma_i i=1...n` be a set of warping functions + :math:`\gamma_i:[a,b] \rightarrow [a,b]` in :math:`\Gamma`, i.e., + monotone increasing and with the restriction :math:`\gamma_i(a)=a \, + \gamma_i(b)=b`. + + The karcher mean :math:`\bar \gamma` is defined as the warping that + minimises locally the sum of Fisher-Rao squared distances. + [SK16-8-3-2]_. + + .. math:: + \bar \gamma = argmin_{\gamma \in \Gamma} \sum_{i=1}^{n} + d_{FR}^2(\gamma, \gamma_i) + + The computation is performed using the structure of Hilbert Sphere obtained + after a transformation of the warpings, see [S11-3-3]_. - return optimum_reparam(np.ascontiguousarray(template_data.T), - np.ascontiguousarray(eval_points), - np.ascontiguousarray(q_data.T), - method="DP2", - lam=penalty, grid_dim=grid_dim).T + Args: + warping: Set of warpings. + max_iter: Maximum number of interations. Defaults to 100. + tol: Convergence criterion, if the norm of the mean of the + shooting vectors, :math:`| \bar v | 1e-10: + vmean += theta / np.sin(theta) * (psi_i - np.cos(theta) * mu) + + # Mean of shooting vectors + vmean /= warping.n_samples + v_norm = np.sqrt(scipy.integrate.simps(np.square(vmean))) + + # Convergence criterion + if v_norm < tol: + break + + # Calculate exponential map of mu + a = np.cos(step_size * v_norm) + b = np.sin(step_size * v_norm) / v_norm + mu = a * mu + b * vmean + + # Recover mean in original gamma space + warping_mean = scipy.integrate.cumtrapz( + np.square(mu, out=mu)[0], + x=eval_points, + initial=0, + ) + + # Affine traslation to original scale + warping_mean = _normalize_scale( + warping_mean, + a=original_eval_points[0], + b=original_eval_points[-1], + ) + + monotone_interpolation = SplineInterpolation( + interpolation_order=3, + monotone=True, + ) + + return FDataGrid( + [warping_mean], + grid_points=original_eval_points, + interpolation=monotone_interpolation, + ) + + +def elastic_mean( + fdatagrid: FDataGrid, + *, + penalty: float = 0, + center: bool = True, + max_iter: int = 20, + tol: float = 1e-3, + initial: Optional[float] = None, + grid_dim: int = 7, + **kwargs, +) -> FDataGrid: + r"""Compute the karcher mean under the elastic metric. + + Calculates the karcher mean of a set of functional samples in the amplitude + space :math:`\mathcal{A}=\mathcal{F}/\Gamma`. + + Let :math:`q_i` the corresponding SRSF of the observation :math:`f_i`. + The space :math:`\mathcal{A}` is defined using the equivalence classes + :math:`[q_i]=\{ q_i \circ \gamma \| \gamma \in \Gamma \}`, where + :math:`\Gamma` denotes the space of warping functions. The karcher mean + in this space is defined as + + .. math:: + [\mu_q] = argmin_{[q] \in \mathcal{A}} \sum_{i=1}^n + d_{\lambda}^2([q],[q_i]) + + Once :math:`[\mu_q]` is obtained it is selected the element of the + equivalence class which makes the mean of the warpings employed be the + identity. + + See [SK16-8-3-1]_ and [S11-3]_. + + Args: + fdatagrid: Set of functions to compute the + mean. + penalty: Penalisation term. Defaults to 0. + center: If ``True`` it is computed the mean of the warpings and + used to select a central mean. Defaults ``True``. + max_iter: Maximum number of iterations. Defaults to 20. + tol: Convergence criterion, the algorithm will stop if + :math:`|mu_{(\nu)} - mu_{(\nu - 1)}|_2 / | mu_{(\nu-1)} |_2 < tol`. + initial: Value of the mean at the starting point. By default + takes the average of the initial points of the samples. + grid_dim: Dimension of the grid used in the alignment + algorithm. Defaults 7. + kwargs: Named options to be pased to :func:`warping_mean`. + + Returns: + FDatagrid with the mean of the functions. + + Raises: + ValueError: If the object is multidimensional or the shape of the srsf + do not match with the fdatagrid. + + References: + .. [SK16-8-3-1] Srivastava, Anuj & Klassen, Eric P. (2016). Functional + and shape data analysis. In *Karcher Mean of Amplitudes* + (pp. 273-274). Springer. + + .. [S11-3] Srivastava, Anuj et. al. Registration of Functional Data + Using Fisher-Rao Metric (2011). In *Karcher Mean and Function + Alignment* (pp. 7-10). arXiv:1103.3817v2. + + """ + check_is_univariate(fdatagrid) + + srsf_transformer = SRSF(initial_value=0) + fdatagrid_srsf = srsf_transformer.fit_transform(fdatagrid) + eval_points = fdatagrid.grid_points[0] + + eval_points_normalized = _normalize_scale(eval_points) + y_scale = eval_points[-1] - eval_points[0] + + interpolation = SplineInterpolation(interpolation_order=3, monotone=True) + + # Discretisation points + fdatagrid_normalized = FDataGrid( + fdatagrid(eval_points) / y_scale, + grid_points=eval_points_normalized, + ) + + srsf = fdatagrid_srsf(eval_points)[..., 0] + + # Initialize with function closest to the L2 mean with the L2 distance + centered = (srsf.T - srsf.mean(axis=0, keepdims=True).T).T + + distances = scipy.integrate.simps( + np.square(centered, out=centered), + eval_points_normalized, axis=1, + ) + + # Initialization of iteration + mu = srsf[np.argmin(distances)] + mu_aux = np.empty(mu.shape) + mu_1 = np.empty(mu.shape) + + # Main iteration + for _ in range(max_iter): + + gammas_matrix = _elastic_alignment_array( + mu, + srsf, + eval_points_normalized, + penalty, + grid_dim, + ) + + gammas = FDataGrid( + gammas_matrix, + grid_points=eval_points_normalized, + interpolation=interpolation, + ) + + fdatagrid_normalized = fdatagrid_normalized.compose(gammas) + srsf = srsf_transformer.transform( + fdatagrid_normalized, + ).data_matrix[..., 0] + + # Next iteration + mu_1 = srsf.mean(axis=0, out=mu_1) + + # Convergence criterion + mu_norm = np.sqrt( + scipy.integrate.simps( + np.square(mu, out=mu_aux), + eval_points_normalized, + ), + ) + + mu_diff = np.sqrt( + scipy.integrate.simps( + np.square(mu - mu_1, out=mu_aux), + eval_points_normalized, + ), + ) + + if mu_diff / mu_norm < tol: + break + + mu = mu_1 + + if initial is None: + initial = fdatagrid.data_matrix[:, 0].mean() + + srsf_transformer.set_params(initial_value=initial) + + # Karcher mean orbit in space L2/Gamma + karcher_mean = srsf_transformer.inverse_transform( + fdatagrid.copy( + data_matrix=[mu], + grid_points=eval_points, + sample_names=("Karcher mean",), + ), + ) + + if center: + # Gamma mean in Hilbert Sphere + mean_normalized = warping_mean(gammas, **kwargs) + + gamma_mean = FDataGrid( + _normalize_scale( + mean_normalized.data_matrix[..., 0], + a=eval_points[0], + b=eval_points[-1], + ), + grid_points=eval_points, + ) + + gamma_inverse = invert_warping(gamma_mean) + + karcher_mean = karcher_mean.compose(gamma_inverse) + + # Return center of the orbit + return karcher_mean class ElasticRegistration(RegistrationTransformer): @@ -314,9 +639,9 @@ class ElasticRegistration(RegistrationTransformer): alignment algorithm. Defaults 7. Attributes: - template_ (:class:`FDataGrid`): Template learned during fitting, + template\_: Template learned during fitting, used for alignment in :meth:`transform`. - warping_ (:class:`FDataGrid`): Warping applied during the last + warping\_: Warping applied during the last transformation. References: @@ -325,7 +650,6 @@ class ElasticRegistration(RegistrationTransformer): Registration* (pp. 73-122). Springer. Examples: - Elastic registration of with train/test sets. >>> from skfda.preprocessing.registration import \ @@ -348,38 +672,36 @@ class ElasticRegistration(RegistrationTransformer): """ - def __init__(self, template="elastic mean", penalty=0., output_points=None, - grid_dim=7): - """Initializes the registration transformer""" - + def __init__( + self, + template: Union[FDataGrid, _MeanType] = elastic_mean, + penalty: float = 0, + output_points: Optional[ArrayLike] = None, + grid_dim: int = 7, + ) -> None: self.template = template self.penalty = penalty self.output_points = output_points self.grid_dim = grid_dim - def fit(self, X: FDataGrid=None, y=None): + def fit(self, X: FDataGrid, y: None = None) -> RegistrationTransformer: """Fit the transformer. Learns the template used during the transformation. Args: - X (FDataGrid, optionl): Functional samples used as training - samples. If the template provided it is an FDataGrid this - samples are it is not need to construct the template from the - samples and this argument is ignored. - y (Ignored): Present for API conventions. + X: Functional observations used as training samples. If the + template provided is a FDataGrid this argument is ignored, as + it is not necessary to learn the template from the training + data. + y: Present for API conventions. Returns: - RegistrationTransformer: self. + self. """ if isinstance(self.template, FDataGrid): self.template_ = self.template # Template already constructed - elif X is None: - raise ValueError("Must be provided a dataset X to construct the " - "template.") - elif self.template == "elastic mean": - self.template_ = elastic_mean(X) else: self.template_ = self.template(X) @@ -389,26 +711,30 @@ def fit(self, X: FDataGrid=None, y=None): return self - def transform(self, X: FDataGrid, y=None): + def transform(self, X: FDataGrid, y: None = None) -> FDataGrid: """Apply elastic registration to the data. Args: - X (:class:`FDataGrid`): Functional data to be registered. - y (ignored): Present for API conventions. + X: Functional data to be registered. + y: Present for API conventions. Returns: - :class:`FDataGrid`: Registered samples. + Registered samples. """ check_is_fitted(self, '_template_srsf') check_is_univariate(X) - if (len(self._template_srsf) != 1 and - len(X) != len(self._template_srsf)): + if ( + len(self._template_srsf) != 1 + and len(X) != len(self._template_srsf) + ): - raise ValueError("The template should contain one sample to align " - "all the curves to the same function or the " - "same number of samples than X.") + raise ValueError( + "The template should contain one sample to align " + "all the curves to the same function or the " + "same number of samples than X.", + ) srsf = SRSF(output_points=self.output_points, initial_value=0) fdatagrid_srsf = srsf.fit_transform(X) @@ -430,24 +756,36 @@ def transform(self, X: FDataGrid, y=None): template_data = template_data[0] # Values of the warping - gamma = _elastic_alignment_array(template_data, q_data, - _normalize_scale(output_points), - self.penalty, self.grid_dim) + gamma = _elastic_alignment_array( + template_data, + q_data, + _normalize_scale(output_points), + self.penalty, + self.grid_dim, + ) # Normalize warping to original interval gamma = _normalize_scale( - gamma, a=output_points[0], b=output_points[-1]) + gamma, + a=output_points[0], + b=output_points[-1], + ) # Interpolation interpolation = SplineInterpolation( - interpolation_order=3, monotone=True) + interpolation_order=3, + monotone=True, + ) - self.warping_ = FDataGrid(gamma, output_points, - interpolation=interpolation) + self.warping_ = FDataGrid( + gamma, + output_points, + interpolation=interpolation, + ) return X.compose(self.warping_, eval_points=output_points) - def inverse_transform(self, X: FDataGrid, y=None): + def inverse_transform(self, X: FDataGrid, y: None = None) -> FDataGrid: r"""Reverse the registration procedure previosly applied. Let :math:`gamma(t)` the warping applied to construct a registered @@ -458,17 +796,18 @@ def inverse_transform(self, X: FDataGrid, y=None): :math:`f(t)=f^*(\gamma^{-1}(t))`. Args: - X (:class:`FDataGrid`): Functional data to apply the reverse + X: Functional data to apply the reverse transform. - y (Ignored): Present for API conventions. + y: Present for API conventions. Returns: - :class:`FDataGrid`: Functional data compose by the inverse warping. + Functional data compose by the inverse warping. Raises: ValueError: If the warpings :math:`\gamma` were not build via - :meth:`transform` or if the number of samples of `X` is different - than the number of samples of the dataset previosly transformed. + :meth:`transform` or if the number of samples of `X` is + different than the number of samples of the dataset + previously transformed. Examples: @@ -497,267 +836,20 @@ def inverse_transform(self, X: FDataGrid, y=None): :func:`invert_warping` """ - if not hasattr(self, 'warping_'): - raise ValueError("Data must be previosly transformed to apply the " - "inverse transform") - elif len(X) != len(self.warping_): - raise ValueError("Data must contain the same number of samples " - "than the dataset previously transformed") - - inverse_warping = invert_warping(self.warping_) - - return X.compose(inverse_warping, eval_points=self.output_points) - - -def warping_mean(warping, *, max_iter=100, tol=1e-6, step_size=.3): - r"""Compute the karcher mean of a set of warpings. - - Let :math:`\gamma_i i=1...n` be a set of warping functions - :math:`\gamma_i:[a,b] \rightarrow [a,b]` in :math:`\Gamma`, i.e., - monotone increasing and with the restriction :math:`\gamma_i(a)=a \, - \gamma_i(b)=b`. - - The karcher mean :math:`\bar \gamma` is defined as the warping that - minimises locally the sum of Fisher-Rao squared distances. - [SK16-8-3-2]_. - - .. math:: - \bar \gamma = argmin_{\gamma \in \Gamma} \sum_{i=1}^{n} - d_{FR}^2(\gamma, \gamma_i) - - The computation is performed using the structure of Hilbert Sphere obtained - after a transformation of the warpings, see [S11-3-3]_. - - Args: - warping (:class:`~skfda.FDataGrid`): Set of warpings. - max_iter (int): Maximum number of interations. Defaults to 100. - tol (float): Convergence criterion, if the norm of the mean of the - shooting vectors, :math:`| \bar v | 1e-10: - vmean += theta / np.sin(theta) * (psi_i - np.cos(theta) * mu) - - # Mean of shooting vectors - vmean /= warping.n_samples - v_norm = np.sqrt(scipy.integrate.simps(np.square(vmean))) - - # Convergence criterion - if v_norm < tol: - break - - # Calculate exponential map of mu - a = np.cos(step_size * v_norm) - b = np.sin(step_size * v_norm) / v_norm - mu = a * mu + b * vmean - - # Recover mean in original gamma space - warping_mean = scipy.integrate.cumtrapz(np.square(mu, out=mu)[0], - x=eval_points, initial=0) - - # Affine traslation to original scale - warping_mean = _normalize_scale(warping_mean, - a=original_eval_points[0], - b=original_eval_points[-1]) - - monotone_interpolation = SplineInterpolation(interpolation_order=3, - monotone=True) - - mean = FDataGrid([warping_mean], grid_points=original_eval_points, - interpolation=monotone_interpolation) - - return mean + warping = getattr(self, 'warping_', None) -def elastic_mean(fdatagrid, *, penalty=0., center=True, max_iter=20, tol=1e-3, - initial=None, grid_dim=7, **kwargs): - r"""Compute the karcher mean under the elastic metric. - - Calculates the karcher mean of a set of functional samples in the amplitude - space :math:`\mathcal{A}=\mathcal{F}/\Gamma`. - - Let :math:`q_i` the corresponding SRSF of the observation :math:`f_i`. - The space :math:`\mathcal{A}` is defined using the equivalence classes - :math:`[q_i]=\{ q_i \circ \gamma \| \gamma \in \Gamma \}`, where - :math:`\Gamma` denotes the space of warping functions. The karcher mean - in this space is defined as - - .. math:: - [\mu_q] = argmin_{[q] \in \mathcal{A}} \sum_{i=1}^n - d_{\lambda}^2([q],[q_i]) - - Once :math:`[\mu_q]` is obtained it is selected the element of the - equivalence class which makes the mean of the warpings employed be the - identity. - - See [SK16-8-3-1]_ and [S11-3]_. - - Args: - fdatagrid (:class:`~skfda.FDataGrid`): Set of functions to compute the - mean. - penalty (float): Penalisation term. Defaults to 0. - center (boolean): If true it is computed the mean of the warpings and - used to select a central mean. Defaults True. - max_iter (int): Maximum number of iterations. Defaults to 20. - tol (float): Convergence criterion, the algorithm will stop if - :math:`|mu_{(\nu)} - mu_{(\nu - 1)}|_2 / | mu_{(\nu-1)} |_2 < tol`. - initial (float): Value of the mean at the starting point. By default - takes the average of the initial points of the samples. - grid_dim (int, optional): Dimension of the grid used in the alignment - algorithm. Defaults 7. - ** kwargs : Named options to be pased to :func:`warping_mean`. - - Return: - :class:`~skfda.FDataGrid`: FDatagrid with the mean of the functions. - - Raises: - ValueError: If the object is multidimensional or the shape of the srsf - do not match with the fdatagrid. - - References: - .. [SK16-8-3-1] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Karcher Mean of Amplitudes* - (pp. 273-274). Springer. - - .. [S11-3] Srivastava, Anuj et. al. Registration of Functional Data - Using Fisher-Rao Metric (2011). In *Karcher Mean and Function - Alignment* (pp. 7-10). arXiv:1103.3817v2. - - """ - check_is_univariate(fdatagrid) - - srsf_transformer = SRSF(initial_value=0) - fdatagrid_srsf = srsf_transformer.fit_transform(fdatagrid) - eval_points = fdatagrid.grid_points[0] - - eval_points_normalized = _normalize_scale(eval_points) - y_scale = eval_points[-1] - eval_points[0] - - interpolation = SplineInterpolation(interpolation_order=3, monotone=True) - - # Discretisation points - fdatagrid_normalized = FDataGrid(fdatagrid(eval_points) / y_scale, - grid_points=eval_points_normalized) - - srsf = fdatagrid_srsf(eval_points)[..., 0] - - # Initialize with function closest to the L2 mean with the L2 distance - centered = (srsf.T - srsf.mean(axis=0, keepdims=True).T).T - - distances = scipy.integrate.simps(np.square(centered, out=centered), - eval_points_normalized, axis=1) - - # Initialization of iteration - mu = srsf[np.argmin(distances)] - mu_aux = np.empty(mu.shape) - mu_1 = np.empty(mu.shape) - - # Main iteration - for _ in range(max_iter): - - gammas = _elastic_alignment_array( - mu, srsf, eval_points_normalized, penalty, grid_dim) - gammas = FDataGrid(gammas, grid_points=eval_points_normalized, - interpolation=interpolation) - - fdatagrid_normalized = fdatagrid_normalized.compose(gammas) - srsf = srsf_transformer.transform( - fdatagrid_normalized).data_matrix[..., 0] - - # Next iteration - mu_1 = srsf.mean(axis=0, out=mu_1) - - # Convergence criterion - mu_norm = np.sqrt(scipy.integrate.simps(np.square(mu, out=mu_aux), - eval_points_normalized)) + if warping is None: + raise ValueError( + "Data must be previosly transformed to apply the " + "inverse transform", + ) + elif len(X) != len(warping): + raise ValueError( + "Data must contain the same number of samples " + "than the dataset previously transformed", + ) - mu_diff = np.sqrt(scipy.integrate.simps(np.square(mu - mu_1, - out=mu_aux), - eval_points_normalized)) + inverse_warping = invert_warping(warping) - if mu_diff / mu_norm < tol: - break - - mu = mu_1 - - if initial is None: - initial = fdatagrid.data_matrix[:, 0].mean() - - srsf_transformer.set_params(initial_value=initial) - - # Karcher mean orbit in space L2/Gamma - karcher_mean = srsf_transformer.inverse_transform( - fdatagrid.copy(data_matrix=[mu], grid_points=eval_points, - sample_names=("Karcher mean",))) - - if center: - # Gamma mean in Hilbert Sphere - mean_normalized = warping_mean(gammas, **kwargs) - - gamma_mean = FDataGrid(_normalize_scale( - mean_normalized.data_matrix[..., 0], - a=eval_points[0], - b=eval_points[-1]), - grid_points=eval_points) - - gamma_inverse = invert_warping(gamma_mean) - - karcher_mean = karcher_mean.compose(gamma_inverse) - - # Return center of the orbit - return karcher_mean + return X.compose(inverse_warping, eval_points=self.output_points) diff --git a/tests/test_elastic.py b/tests/test_elastic.py index 71f92ff3d..47ea176c5 100644 --- a/tests/test_elastic.py +++ b/tests/test_elastic.py @@ -1,3 +1,5 @@ +"""Tests for elastic registration and functions in the SRVF framework.""" + import unittest import numpy as np @@ -28,62 +30,74 @@ class TestElasticRegistration(unittest.TestCase): - """Test elastic registration""" + """Test elastic registration.""" - def setUp(self): - """Initialization of samples""" + def setUp(self) -> None: + """Initialize the samples.""" template = make_multimodal_samples(n_samples=1, std=0, random_state=1) self.template = template self.template_rep = template.concatenate( - template).concatenate(template) - self.unimodal_samples = make_multimodal_samples(n_samples=3, - random_state=1) + template, + ).concatenate(template) + self.unimodal_samples = make_multimodal_samples( + n_samples=3, + random_state=1, + ) t = np.linspace(-3, 3, 9) self.dummy_sample = FDataGrid([np.sin(t)], t) - def test_to_srsf(self): - """Test to srsf""" + def test_to_srsf(self) -> None: + """Test to srsf.""" # Checks SRSF conversion - srsf = SRSF().fit_transform(self.dummy_sample) - data_matrix = [[[-1.061897], [-0.75559027], [0.25355399], - [0.81547327], [0.95333713], [0.81547327], - [0.25355399], [-0.75559027], [-1.06189697]]] + data_matrix = [ + [ # noqa: WPS317 + [-1.061897], [-0.75559027], [0.25355399], + [0.81547327], [0.95333713], [0.81547327], + [0.25355399], [-0.75559027], [-1.06189697], + ], + ] np.testing.assert_almost_equal(data_matrix, srsf.data_matrix) - def test_from_srsf(self): - """Test from srsf""" - + def test_from_srsf(self) -> None: + """Test from srsf.""" # Checks SRSF conversion srsf = SRSF(initial_value=0).inverse_transform(self.dummy_sample) - data_matrix = [[[0.], [-0.23449228], [-0.83464009], - [-1.38200046], [-1.55623723], [-1.38200046], - [-0.83464009], [-0.23449228], [0.]]] + data_matrix = [ + [ # noqa: WPS317 + [0], [-0.23449228], [-0.83464009], + [-1.38200046], [-1.55623723], [-1.38200046], + [-0.83464009], [-0.23449228], [0], + ], + ] np.testing.assert_almost_equal(data_matrix, srsf.data_matrix) - def test_from_srsf_with_output_points(self): - """Test from srsf""" - + def test_from_srsf_with_output_points(self) -> None: + """Test from srsf.""" # Checks SRSF conversion srsf_transformer = SRSF( initial_value=0, - output_points=self.dummy_sample.grid_points[0]) + output_points=self.dummy_sample.grid_points[0], + ) srsf = srsf_transformer.inverse_transform(self.dummy_sample) - data_matrix = [[[0.], [-0.23449228], [-0.83464009], - [-1.38200046], [-1.55623723], [-1.38200046], - [-0.83464009], [-0.23449228], [0.]]] + data_matrix = [ + [ # noqa: WPS317 + [0], [-0.23449228], [-0.83464009], + [-1.38200046], [-1.55623723], [-1.38200046], + [-0.83464009], [-0.23449228], [0], + ], + ] np.testing.assert_almost_equal(data_matrix, srsf.data_matrix) - def test_srsf_conversion(self): - """Converts to srsf and pull backs""" - + def test_srsf_conversion(self) -> None: + """Converts to srsf and pull backs.""" srsf = SRSF() converted = srsf.fit_transform(self.unimodal_samples) @@ -94,24 +108,24 @@ def test_srsf_conversion(self): np.testing.assert_allclose(distances, 0, atol=8e-3) - def test_template_alignment(self): - """Test alignment to 1 template""" + def test_template_alignment(self) -> None: + """Test alignment to 1 template.""" reg = ElasticRegistration(template=self.template) register = reg.fit_transform(self.unimodal_samples) distances = metric(self.template, register) np.testing.assert_allclose(distances, 0, atol=12e-3) - def test_one_to_one_alignment(self): - """Test alignment to 1 sample to a template""" + def test_one_to_one_alignment(self) -> None: + """Test alignment to 1 sample to a template.""" reg = ElasticRegistration(template=self.template) register = reg.fit_transform(self.unimodal_samples[0]) distances = metric(self.template, register) np.testing.assert_allclose(distances, 0, atol=12e-3) - def test_set_alignment(self): - """Test alignment 3 curves to set with 3 templates""" + def test_set_alignment(self) -> None: + """Test alignment 3 curves to set with 3 templates.""" # Should give same result than test_template_alignment reg = ElasticRegistration(template=self.template_rep) register = reg.fit_transform(self.unimodal_samples) @@ -119,41 +133,51 @@ def test_set_alignment(self): np.testing.assert_allclose(distances, 0, atol=12e-3) - def test_default_alignment(self): - """Test alignment by default""" + def test_default_alignment(self) -> None: + """Test alignment by default.""" # Should give same result than test_template_alignment reg = ElasticRegistration() register = reg.fit_transform(self.unimodal_samples) - values = register([-.25, -.1, 0, .1, .25]) + values = register([-0.25, -0.1, 0, 0.1, 0.25]) - expected = [[[0.599058], [0.997427], [0.772248], - [0.412342], [0.064725]], - [[0.626875], [0.997155], [0.791649], - [0.382181], [0.050098]], - [[0.620992], [0.997369], [0.785886], - [0.376556], [0.048804]]] + expected = [ + [ + [0.599058], [0.997427], [0.772248], [0.412342], [0.064725], + ], + [ + [0.626875], [0.997155], [0.791649], [0.382181], [0.050098], + ], + [ + [0.620992], [0.997369], [0.785886], [0.376556], [0.048804], + ], + ] np.testing.assert_allclose(values, expected, atol=1e-4) - def test_callable_alignment(self): - """Test alignment by default""" + def test_callable_alignment(self) -> None: + """Test alignment by default.""" # Should give same result than test_template_alignment reg = ElasticRegistration(template=elastic_mean) register = reg.fit_transform(self.unimodal_samples) - values = register([-.25, -.1, 0, .1, .25]) - expected = [[[0.599058], [0.997427], [0.772248], - [0.412342], [0.064725]], - [[0.626875], [0.997155], [0.791649], - [0.382181], [0.050098]], - [[0.620992], [0.997369], [0.785886], - [0.376556], [0.048804]]] + values = register([-0.25, -0.1, 0, 0.1, 0.25]) + expected = [ + [ + [0.599058], [0.997427], [0.772248], [0.412342], [0.064725], + ], + [ + [0.626875], [0.997155], [0.791649], [0.382181], [0.050098], + ], + [ + [0.620992], [0.997369], [0.785886], [0.376556], [0.048804], + ], + ] np.testing.assert_allclose(values, expected, atol=1e-4) - def test_simmetry_of_aligment(self): - """Check registration using inverse composition""" + def test_simmetry_of_aligment(self) -> None: + """Check registration using inverse composition.""" reg = ElasticRegistration(template=self.template) reg.fit_transform(self.unimodal_samples) warping = reg.warping_ @@ -163,13 +187,10 @@ def test_simmetry_of_aligment(self): np.testing.assert_allclose(distances, 0, atol=12e-3) - def test_raises(self): + def test_raises(self) -> None: + """Test that the assertions raise when appropriate.""" reg = ElasticRegistration() - # X not in fit, but template is not an FDataGrid - with np.testing.assert_raises(ValueError): - reg.fit() - # Inverse transform without previous transform with np.testing.assert_raises(ValueError): reg.inverse_transform(self.unimodal_samples) @@ -180,7 +201,9 @@ def test_raises(self): reg.inverse_transform(self.unimodal_samples[0]) # FDataGrid as template with n != 1 and n!= n_samples to transform - reg = ElasticRegistration(template=self.unimodal_samples).fit() + reg = ElasticRegistration(template=self.unimodal_samples).fit( + self.unimodal_samples[0], + ) with np.testing.assert_raises(ValueError): reg.transform(self.unimodal_samples[0]) @@ -191,20 +214,20 @@ def test_score(self) -> None: score = reg.score(self.unimodal_samples) np.testing.assert_almost_equal(score, 0.999389) - def test_warping_mean(self): + def test_warping_mean(self) -> None: + """Test the warping_mean function.""" warping = make_random_warping(start=-1, random_state=0) mean = warping_mean(warping) - values = mean([-1, -.5, 0, .5, 1]) - expected = [[[-1.], [-0.376241], [0.136193], [0.599291], [1.]]] + values = mean([-1, -0.5, 0, 0.5, 1]) + expected = [[[-1], [-0.376241], [0.136193], [0.599291], [1]]] np.testing.assert_array_almost_equal(values, expected) class TestElasticDistances(unittest.TestCase): - """Test elastic distances""" - - def test_fisher_rao(self): - """Test fisher rao distance""" + """Test elastic distances.""" + def test_fisher_rao(self) -> None: + """Test fisher rao distance.""" t = np.linspace(0, 1, 100) sample = FDataGrid([t, 1 - t], t) f = np.square(sample) @@ -215,9 +238,8 @@ def test_fisher_rao(self): np.testing.assert_almost_equal(res, distance, decimal=3) - def test_fisher_rao_invariance(self): - """Test invariance of fisher rao metric: d(f,g)= d(foh, goh)""" - + def test_fisher_rao_invariance(self) -> None: + """Test invariance of fisher rao metric: d(f,g)= d(foh, goh).""" t = np.linspace(0, np.pi, 1000) id = FDataGrid([t], t) cos = np.cos(id) @@ -228,21 +250,30 @@ def test_fisher_rao_invariance(self): distance_original = fisher_rao_distance(cos, sin) # Construction of 2 warpings - distance_warping = fisher_rao_distance(cos.compose(gamma), - sin.compose(gamma)) - distance_warping2 = fisher_rao_distance(cos.compose(gamma2), - sin.compose(gamma2)) + distance_warping = fisher_rao_distance( + cos.compose(gamma), + sin.compose(gamma), + ) + distance_warping2 = fisher_rao_distance( + cos.compose(gamma2), + sin.compose(gamma2), + ) # The error ~0.001 due to the derivation - np.testing.assert_allclose(distance_original, distance_warping, - atol=0.01) - - np.testing.assert_allclose(distance_original, distance_warping2, - atol=0.01) - - def test_amplitude_distance_limit(self): - """Test limit of amplitude distance penalty""" - + np.testing.assert_allclose( + distance_original, + distance_warping, + atol=0.01, + ) + + np.testing.assert_allclose( + distance_original, + distance_warping2, + atol=0.01, + ) + + def test_amplitude_distance_limit(self) -> None: + """Test limit of amplitude distance penalty.""" f = make_multimodal_samples(n_samples=1, random_state=1) g = make_multimodal_samples(n_samples=1, random_state=9999) @@ -251,16 +282,16 @@ def test_amplitude_distance_limit(self): np.testing.assert_almost_equal(amplitude_limit, fr_distance) - def test_phase_distance_id(self): - """Test of phase distance invariance""" + def test_phase_distance_id(self) -> None: + """Test of phase distance invariance.""" f = make_multimodal_samples(n_samples=1, random_state=1) phase = phase_distance(f, 2 * f) np.testing.assert_allclose(phase, 0, atol=1e-7) - def test_warping_distance(self): - """Test of warping distance""" + def test_warping_distance(self) -> None: + """Test of warping distance.""" t = np.linspace(0, 1, 1000) w1 = FDataGrid([t**5], t) w2 = FDataGrid([t**3], t) From 16c60c4c782127f942c843251a3423771f0d2067 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 7 Apr 2021 22:19:09 +0200 Subject: [PATCH 211/417] solved --- .../visualization/representation.py | 62 ++++++++----------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 3691f7128..f19e552bf 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -28,7 +28,7 @@ from ... import FDataGrid from ..._utils import _to_domain_range, constants from ...representation._functional_data import FData -from ...representation._typing import DomainRangeLike +from ...representation._typing import DomainRangeLike, GridPointsLike from ._utils import ( ColorLike, _get_figure_and_axes, @@ -50,19 +50,19 @@ def __getitem__(self, __key: K) -> V: def _get_label_colors( n_labels: int, - group_colors: Union[Sequence[ColorLike], Mapping[K, ColorLike], None], -) -> np.ndarray: + group_colors: Optional[Indexable[K, ColorLike]] = None, +) -> Union[np.ndarray, None]: """Get the colors of each label.""" - if group_colors is not None: + if group_colors is None: + colormap = matplotlib.cm.get_cmap() + group_colors = colormap(np.arange(n_labels) / (n_labels - 1)) + else: if len(group_colors) != n_labels: raise ValueError( "There must be a color in group_colors " "for each of the labels that appear in " "group.", ) - else: - colormap = matplotlib.cm.get_cmap() - group_colors = colormap(np.arange(n_labels) / (n_labels - 1)) return group_colors @@ -74,7 +74,7 @@ def _get_color_info( group_colors: Optional[Indexable[K, ColorLike]], legend: bool, kwargs: Any, -) -> Tuple[Any, Optional[List[matplotlib.patches.Patch]]]: +) -> Tuple[np.ndarray, Optional[List[matplotlib.patches.Patch]]]: patches = None @@ -143,14 +143,14 @@ class GraphPlot: Args: fdata: functional data set that we want to plot. - gradient_color_list: list of real values used to determine the color + gradient_values: list of real values used to determine the color in which each of the instances will be plotted. max_grad: maximum value that the gradient_list can take, it will be - used to normalize the ``gradient_color_list``. If not + used to normalize the ``gradient_values``. If not declared it will be initialized to the maximum value of gradient_list min_grad: minimum value that the gradient_list can take, it will be - used to normalize the ``gradient_color_list``. If not + used to normalize the ``gradient_values``. If not declared it will be initialized to the minimum value of gradient_list. @@ -159,14 +159,14 @@ class GraphPlot: def __init__( self, fdata: FData, - gradient_color_list: Optional[Sequence[float]] = None, + gradient_values: Optional[Sequence[float]] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, ) -> None: self.fdata = fdata - self.gradient_color_list = gradient_color_list - if self.gradient_color_list is not None: - if len(self.gradient_color_list) != fdata.n_samples: + self.gradient_values = gradient_values + if self.gradient_values is not None: + if len(self.gradient_values) != fdata.n_samples: raise ValueError( "The length of the gradient color" "list should be the same as the number" @@ -174,21 +174,21 @@ def __init__( ) if min_grad is None: - self.min_grad = min(self.gradient_color_list) + self.min_grad = min(self.gradient_values) else: self.min_grad = min_grad if max_grad is None: - self.max_grad = max(self.gradient_color_list) + self.max_grad = max(self.gradient_values) else: self.max_grad = max_grad aux_list = [ grad_color - self.min_grad - for grad_color in self.gradient_color_list + for grad_color in self.gradient_values ] - self.gradient_list: Sequence[float] = ( + self.gradient_list = ( [ aux / (self.max_grad - self.min_grad) for aux in aux_list @@ -207,13 +207,9 @@ def plot( n_cols: Optional[int] = None, n_points: Union[int, Tuple[int, int], None] = None, domain_range: Optional[DomainRangeLike] = None, - group: Union[Sequence[Any], None] = None, - group_colors: Union[ - Sequence[ColorLike], - Mapping[K, ColorLike], - None, - ] = None, - group_names: Union[Sequence[str], Mapping[K, str], None] = None, + group: Optional[Sequence[K]] = None, + group_colors: Optional[Indexable[K, ColorLike]] = None, + group_names: Optional[Indexable[K, str]], colormap_name: str = 'autumn', legend: bool = False, **kwargs: Any, @@ -227,7 +223,7 @@ def plot( visualizations, one that displays the functions without any criteria choosing the colors and a new one that displays the function with a gradient of colors depending on the initial - gradient_color_list (normalized in gradient_list). + gradient_values (normalized in gradient_list). Args: chart: figure over @@ -381,7 +377,7 @@ class ScatterPlot: def __init__( self, fdata: FData, - grid_points: np.ndarray = None, + grid_points: Optional[GridPointsLike] = None, ) -> None: self.fdata = fdata self.grid_points = grid_points @@ -395,13 +391,9 @@ def plot( n_rows: Optional[int] = None, n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, - group: Union[Sequence[Any], None] = None, - group_colors: Union[ - Sequence[ColorLike], - Mapping[K, ColorLike], - None, - ] = None, - group_names: Union[Sequence[str], Mapping[K, str], None] = None, + group: Optional[Sequence[K]] = None, + group_colors: Optional[Indexable[K, ColorLike]] = None, + group_names: Optional[Indexable[K, str]], legend: bool = False, **kwargs: Any, ) -> Figure: From 9ea089bbae1f19c09170a120456211db125f6be8 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 7 Apr 2021 22:22:26 +0200 Subject: [PATCH 212/417] added __len__ --- skfda/exploratory/visualization/representation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index f19e552bf..5bd6d25b4 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -47,6 +47,9 @@ class Indexable(Protocol[K, V]): def __getitem__(self, __key: K) -> V: pass + def __len__(self) -> int: + pass + def _get_label_colors( n_labels: int, From de30c84f6e5a28e0cf0cacf10e50fa86c0d5b68a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 7 Apr 2021 22:39:02 +0200 Subject: [PATCH 213/417] solved --- .../visualization/representation.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 5bd6d25b4..ec17fc059 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -59,24 +59,23 @@ def _get_label_colors( if group_colors is None: colormap = matplotlib.cm.get_cmap() group_colors = colormap(np.arange(n_labels) / (n_labels - 1)) - else: - if len(group_colors) != n_labels: - raise ValueError( - "There must be a color in group_colors " - "for each of the labels that appear in " - "group.", - ) + elif len(group_colors) != n_labels: + raise ValueError( + "There must be a color in group_colors " + "for each of the labels that appear in " + "group.", + ) return group_colors def _get_color_info( fdata: T, - group: Optional[Sequence[K]], - group_names: Optional[Indexable[K, str]], - group_colors: Optional[Indexable[K, ColorLike]], - legend: bool, - kwargs: Any, + group: Optional[Sequence[K]] = None, + group_names: Optional[Indexable[K, str]] = None, + group_colors: Optional[Indexable[K, ColorLike]] = None, + legend: bool = False, + kwargs: Any = None, ) -> Tuple[np.ndarray, Optional[List[matplotlib.patches.Patch]]]: patches = None @@ -212,7 +211,7 @@ def plot( domain_range: Optional[DomainRangeLike] = None, group: Optional[Sequence[K]] = None, group_colors: Optional[Indexable[K, ColorLike]] = None, - group_names: Optional[Indexable[K, str]], + group_names: Optional[Indexable[K, str]] = None, colormap_name: str = 'autumn', legend: bool = False, **kwargs: Any, @@ -396,7 +395,7 @@ def plot( domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Optional[Sequence[K]] = None, group_colors: Optional[Indexable[K, ColorLike]] = None, - group_names: Optional[Indexable[K, str]], + group_names: Optional[Indexable[K, str]] = None, legend: bool = False, **kwargs: Any, ) -> Figure: From cdbfaab128d1c9b46d5264bcf06e8476e99ab568 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 8 Apr 2021 13:44:37 +0200 Subject: [PATCH 214/417] First version of the tutorial. --- docs/.gitignore | 1 + docs/conf.py | 4 +- docs/index.rst | 5 ++ tutorial/README.txt | 4 ++ tutorial/__init__.py | 0 tutorial/plot_getting_data.py | 101 ++++++++++++++++++++++++++++++++++ 6 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 tutorial/README.txt create mode 100644 tutorial/__init__.py create mode 100644 tutorial/plot_getting_data.py diff --git a/docs/.gitignore b/docs/.gitignore index 1588679a9..6efaba914 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,3 +1,4 @@ /auto_examples/ +/auto_tutorial/ /backreferences/ **/autosummary/ \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index d2e2e6d63..6e8437a8c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -229,9 +229,9 @@ sphinx_gallery_conf = { # path to your examples scripts - 'examples_dirs': '../examples', + 'examples_dirs': ['../examples', '../tutorial'], # path where to save gallery generated examples - 'gallery_dirs': 'auto_examples', + 'gallery_dirs': ['auto_examples', 'auto_tutorial'], 'reference_url': { # The module you locally document uses None 'skfda': None, diff --git a/docs/index.rst b/docs/index.rst index 272a0438c..dbf920de0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,6 +23,11 @@ Github you can find more information related to the development of the package. apilist glossary +.. toctree:: + :maxdepth: 2 + :titlesonly: + + auto_tutorial/index .. toctree:: :maxdepth: 1 diff --git a/tutorial/README.txt b/tutorial/README.txt new file mode 100644 index 000000000..57e93b2fa --- /dev/null +++ b/tutorial/README.txt @@ -0,0 +1,4 @@ +Tutorial +======== + +Step by step guide on how to use the package. \ No newline at end of file diff --git a/tutorial/__init__.py b/tutorial/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tutorial/plot_getting_data.py b/tutorial/plot_getting_data.py new file mode 100644 index 000000000..22f6056b5 --- /dev/null +++ b/tutorial/plot_getting_data.py @@ -0,0 +1,101 @@ +""" +Getting the data +================ + +How to get data to use in scikit-fda. + +""" + +# Author: Carlos Ramos Carreño +# License: MIT + +############################################################################## +# The FDataGrid class +# ------------------- +# +# In order to use scikit-fda, first we need functional data to analyze. +# A common case is to have each functional observation measured at the same +# points. +# This kind of functional data is easily representable in scikit-fda using +# the :class:`~skfda.representation.grid.FDataGrid` class. +# +# The :class:`~skfda.representation.grid.FDataGrid` has two important +# attributes: ``data_matrix`` and ``grid_points``. The attribute +# ``grid_points`` is a tuple with the same length as the number of domain +# dimensions (that is, one for curves, two for surfaces...). Each of its +# elements is a 1D numpy :class:`~numpy.ndarray` containing the measurement +# points for that particular dimension. The attribute ``data_matrix`` is a +# numpy :class:`~numpy.ndarray` containing the measured values of the +# functions in the grid spanned by the grid points. For functions +# :math:`\{f_i: \mathbb{R}^p \to \mathbb{R}^q\}_{i=1}^N` this is a tensor +# with dimensions :math:`N \times M_1 \times \ldots \times M_p \times q`, +# where :math:`M_i` is the number of measurement points for the domain +# dimension :math:`i`. + +############################################################################## +# In order to create a :class:`~skfda.representation.grid.FDataGrid`, these +# attributes may be provided. The attributes are converted to +# :class:`~numpy.ndarray` when necessary. + +############################################################################## +# .. note:: +# +# The grid points can be omitted, +# and in that case their number is inferred from the dimensions of +# ``data_matrix`` and they are automatically assigned as equispaced points +# in the unitary cube in the domain set. +# +# In the common case of functions with domain dimension of 1, the list of +# grid points can be passed directly as ``grid_points``. +# +# If the codomain dimension is 1, the last dimension of ``data_matrix`` +# can be dropped. + +############################################################################## +# In order to better understand the FDataGrid structure, consider the +# following example: + +import skfda + +grid_points = [ + [0.2, 0.5, 0.7], # Measurement points in first domain dimension + [0, 1], # Measurement points in second domain dimension +] + +data_matrix = [ + [ # First observation + [ # 0.2 + [ # Value at (0.2, 0) + [1, 2, 3, 4], + ], + [ # Value at (0.2, 1) + [0, 1, -1.3, 2], + ], + ], + [ # 0.5 + [ # Value at (0.5, 0) + [-2, 0, 5.5, 7], + ], + [ # Value at (0.5, 1) + [2, 1.1, -1, -2], + ], + ], + [ # 0.7 + [ # Value at (0.7, 0) + [0, 0, 1, 1], + ], + [ # Value at (0.7, 1) + [-3, 5, -0.5, -2], + ], + ], + ], + # This example has only one observation. Next observations would be + # added here. +] + +fd = skfda.FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, +) + +fd.plot() From 021bb63962e402124d5ebafc2c8729155b3660c7 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 8 Apr 2021 15:37:27 +0200 Subject: [PATCH 215/417] Improve the tutorial. --- tutorial/plot_getting_data.py | 142 +++++++++++++++++++++++++++------- 1 file changed, 113 insertions(+), 29 deletions(-) diff --git a/tutorial/plot_getting_data.py b/tutorial/plot_getting_data.py index 22f6056b5..d66d46051 100644 --- a/tutorial/plot_getting_data.py +++ b/tutorial/plot_getting_data.py @@ -52,41 +52,64 @@ # can be dropped. ############################################################################## -# In order to better understand the FDataGrid structure, consider the -# following example: +# The following example shows the creation of a +# :class:`~skfda.representation.grid.FDataGrid` with two functions (curves) +# :math:`\{f_i: \mathbb{R} \to \mathbb{R}\}, i=1,2` measured at the same +# (non-equispaced) points. import skfda -grid_points = [ +grid_points = [0, 0.2, 0.5, 0.9, 1] # Grid points of the curves +data_matrix = [ + [0, 0.2, 0.5, 0.9, 1], # First observation + [0, 0.04, 0.25, 0.81, 1], # Second observation +] + +fd = skfda.FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, +) + +fd.plot() + +############################################################################## +# Advanced example +# ^^^^^^^^^^^^^^^^ +# +# In order to better understand the FDataGrid structure, you can consider the +# following example, in which a :class:`~skfda.representation.grid.FDataGrid` +# object is created, containing just one function (vector-valued surface) +# :math:`\{f_i: \mathbb{R}^2 \to \mathbb{R}^4`. + + +grid_points_surface = [ [0.2, 0.5, 0.7], # Measurement points in first domain dimension - [0, 1], # Measurement points in second domain dimension + [0, 1.5], # Measurement points in second domain dimension ] -data_matrix = [ - [ # First observation - [ # 0.2 - [ # Value at (0.2, 0) - [1, 2, 3, 4], - ], - [ # Value at (0.2, 1) - [0, 1, -1.3, 2], - ], +data_matrix_surface = [ + # First observation + [ + # 0.2 + [ + # Value at (0.2, 0) + [1, 2, 3, 4], + # Value at (0.2, 1.5) + [0, 1, -1.3, 2], ], - [ # 0.5 - [ # Value at (0.5, 0) - [-2, 0, 5.5, 7], - ], - [ # Value at (0.5, 1) - [2, 1.1, -1, -2], - ], + # 0.5 + [ + # Value at (0.5, 0) + [-2, 0, 5.5, 7], + # Value at (0.5, 1.5) + [2, 1.1, -1, -2], ], - [ # 0.7 - [ # Value at (0.7, 0) - [0, 0, 1, 1], - ], - [ # Value at (0.7, 1) - [-3, 5, -0.5, -2], - ], + # 0.7 + [ + # Value at (0.7, 0) + [0, 0, 1, 1], + # Value at (0.7, 1.5) + [-3, 5, -0.5, -2], ], ], # This example has only one observation. Next observations would be @@ -94,8 +117,69 @@ ] fd = skfda.FDataGrid( - data_matrix=data_matrix, - grid_points=grid_points, + data_matrix=data_matrix_surface, + grid_points=grid_points_surface, ) fd.plot() + +############################################################################## +# Importing data +# -------------- +# +# Usually one does not construct manually the functions, but instead uses +# measurements already formatted in a common format, such as comma-separated +# values (CSV), attribute-relation file format (ARFF) or Matlab and R formats. +# +# If your data is in one of these formats, you can import it into a numpy +# array using the IO functions available in +# `Numpy `_ (for simple +# text-based or binary formats, such as CSV) or in +# `Scipy `_ (for Matlab, +# Fortran or ARFF files). For importing data in the R format one can also +# use the package `RData `_ with is already a +# dependency of scikit-fda, as it is used to load the example datasets. + +############################################################################## +# Common datasets +# --------------- +# +# scikit-fda can download and import for you several of the most popular +# datasets in the :term:`FDA` literature, such as the Berkeley Growth +# dataset (function :func:`~skfda.datasets.fetch_growth`) or the Canadian +# Weather dataset (function :func:`~skfda.datasets.fetch_weather`). + +X, y = skfda.datasets.fetch_growth(return_X_y=True) + +X.plot(group=y) + +############################################################################## +# Datasets from CRAN +# ^^^^^^^^^^^^^^^^^^ +# +# If you want to work with a dataset for which no fetching function exist, and +# you know that is available inside a R package in the CRAN repository, you +# can try using the function :func:`~skfda.datasets.fetch_cran`. This function +# will load the package, fetch the dataset and convert it to Python objects +# using the packages +# `scikit-datasets `_ and +# `RData `_. As datasets in CRAN follow no +# particular structure, you will need to know how it is structured internally +# in order to use it properly. + +############################################################################## +# .. note:: +# +# Functional data objects from some packages, such as +# `fda.usc `_ +# are automatically recognized as such and converted to +# :class:`~skfda.representation.grid.FDataGrid` instances. This +# behaviour can be disabled or customized to work with more packages. + +data = skfda.datasets.fetch_cran("poblenou", "fda.usc") +data["poblenou"]["nox"].plot() + +############################################################################## +# In order to know all the available functionalities to load existing and +# synthetic datasets it is recommended to look at the documentation of the +# :doc:`datasets ` module. From 6221bcadc3ba9089ca3b1b87dd3b173e9d1c87f3 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 8 Apr 2021 19:22:59 +0200 Subject: [PATCH 216/417] Tutorial Getting Data --- docs/Makefile | 1 + docs/index.rst | 2 - setup.cfg | 6 +- tutorial/plot_getting_data.py | 112 ++++++++++++++++++++++++++++++++-- 4 files changed, 112 insertions(+), 9 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index e5d3c5645..ad2c23326 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -51,6 +51,7 @@ help: clean: rm -rf $(BUILDDIR)/* rm -rf auto_examples + rm -rf auto_tutorial rm -rf modules/autosummary rm -rf modules/exploratory/visualization/autosummary rm -rf modules/exploratory/autosummary diff --git a/docs/index.rst b/docs/index.rst index dbf920de0..5fa9c798c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -24,8 +24,6 @@ Github you can find more information related to the development of the package. glossary .. toctree:: - :maxdepth: 2 - :titlesonly: auto_tutorial/index diff --git a/setup.cfg b/setup.cfg index 83b84412d..3401101ae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ test=pytest [tool:pytest] addopts = --doctest-modules doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS -norecursedirs = '.*', 'build', 'dist' '*.egg' 'venv' .svn _build docs/auto_examples examples +norecursedirs = '.*', 'build', 'dist' '*.egg' 'venv' .svn _build docs/auto_examples examples docs/auto_tutorial tutorial [flake8] ignore = @@ -94,8 +94,8 @@ per-file-ignores = # Tests benefit from magic numbers and fixtures test_*.py: WPS432, WPS442 - # Examples are allowed to have "commented code", call print and have magic numbers - plot_*.py: E800, WPS421, WPS432 + # Examples are allowed to have imports in the middle, "commented code", call print and have magic numbers + plot_*.py: E402, E800, WPS421, WPS432 rst-directives = # These are sorted alphabetically - but that does not matter diff --git a/tutorial/plot_getting_data.py b/tutorial/plot_getting_data.py index d66d46051..9e092fc94 100644 --- a/tutorial/plot_getting_data.py +++ b/tutorial/plot_getting_data.py @@ -4,10 +4,14 @@ How to get data to use in scikit-fda. +isort:skip_file + """ # Author: Carlos Ramos Carreño # License: MIT +# +# sphinx_gallery_thumbnail_number = 6 ############################################################################## # The FDataGrid class @@ -79,7 +83,7 @@ # In order to better understand the FDataGrid structure, you can consider the # following example, in which a :class:`~skfda.representation.grid.FDataGrid` # object is created, containing just one function (vector-valued surface) -# :math:`\{f_i: \mathbb{R}^2 \to \mathbb{R}^4`. +# :math:`f: \mathbb{R}^2 \to \mathbb{R}^4`. grid_points_surface = [ @@ -140,6 +144,40 @@ # use the package `RData `_ with is already a # dependency of scikit-fda, as it is used to load the example datasets. +############################################################################## +# Once your data has been introduced as a :class:`~numpy.ndarray` instance, +# you will need to give it the proper dimensions and use it to instantiate +# a functional data object. + +############################################################################## +# .. note:: +# +# :class:`Pandas DataFrames ` are also popular as +# datasets containers in the Python scientific ecosystem. If you have +# data in a Pandas DataFrame, you can extract its content as a Numpy +# array using the method :meth:`pandas.DataFrame.to_numpy` of the +# DataFrame. + +############################################################################## +# As an example, we will load the +# :func:`digits dataset ` of scikit-learn, which +# is a preprocessed subset of the MNIST dataset, containing digit images. The +# data is already a numpy array. As the data has been flattened into a 1D +# vector of pixels, we need to reshape the arrays to their original 8x8 shape. +# Then this array can be used to construct the digits as surfaces. + +from sklearn.datasets import load_digits + +X, y = load_digits(return_X_y=True) +X = X.reshape(-1, 8, 8) + +fd = skfda.FDataGrid(X) + +# Plot the first 2 observations +fd[0].plot() +fd[1].plot() + + ############################################################################## # Common datasets # --------------- @@ -147,7 +185,10 @@ # scikit-fda can download and import for you several of the most popular # datasets in the :term:`FDA` literature, such as the Berkeley Growth # dataset (function :func:`~skfda.datasets.fetch_growth`) or the Canadian -# Weather dataset (function :func:`~skfda.datasets.fetch_weather`). +# Weather dataset (function :func:`~skfda.datasets.fetch_weather`). These +# datasets are often useful as benchmarks, in order to compare results +# between different algorithms, or simply as examples to use in teaching or +# research. X, y = skfda.datasets.fetch_growth(return_X_y=True) @@ -176,8 +217,71 @@ # :class:`~skfda.representation.grid.FDataGrid` instances. This # behaviour can be disabled or customized to work with more packages. -data = skfda.datasets.fetch_cran("poblenou", "fda.usc") -data["poblenou"]["nox"].plot() +data = skfda.datasets.fetch_cran("MCO", "fda.usc") + +data["MCO"]["intact"].plot() + +############################################################################## +# Datasets from the UEA & UCR Time Series Classification Repository +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# The `UEA & UCR Time Series Classification Repository +# `_ is a popular repository +# for classification problems involving time series data. The datasets used +# can be considered also as functional observations, where the functions +# involved have domain dimension of 1, and the grid points are +# equispaced. Thus, they have also been used in the :term:`FDA` literature. +# The original UCR datasets are univariate time series, while the new UEA +# datasets incorporate also vector-valued data. +# In scikit-fda, the function :func:`~skfda.datasets.fetch_ucr` can be used +# to obtain both kinds of datasets as +# :class:`~skfda.representation.grid.FDataGrid` instances. + +# Load ArrowHead dataset from UCR +dataset = skfda.datasets.fetch_ucr("ArrowHead") +dataset["data"].plot() + +############################################################################## + +# Load BasicMotions dataset from UEA +dataset = skfda.datasets.fetch_ucr("BasicMotions") +dataset["data"].plot() + +############################################################################## +# Synthetic data +# -------------- +# +# Sometimes it is not enough to have real-world data at your disposal. +# Perhaps the messy nature of real-world data makes difficult to detect when +# a particular algorithm has a strange behaviour. Perhaps you want to see how +# it performs under a simplified model. Maybe you want to see what happens +# when your data has particular characteristics, for which no dataset is +# available. Or maybe you only want to illustrate a concept without having +# to introduce a particular set of data. +# +# In those cases, the ability to use generated data is desirable. To aid this +# use case, scikit-learn provides several functions that generate data +# according to some model. These functions are in the +# :doc:`datasets ` module and have the prefix ``make_``. +# Maybe the most useful of those are the functions +# :func:`skfda.datasets.make_gaussian_process` and +# :func:`skfda.datasets.make_gaussian` which can be used to generate Gaussian +# processes and Gaussian fields with different covariance functions. + +import numpy as np + +cov = skfda.misc.covariances.Exponential(length_scale=0.1) + +fd = skfda.datasets.make_gaussian_process( + start=0, + stop=4, + n_samples=5, + n_features=100, + mean=lambda t: np.power(t, 2), + cov=cov, +) + +fd.plot() ############################################################################## # In order to know all the available functionalities to load existing and From aba715fadacbaa5246d8ed68bfcf40b44ada0fad Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 8 Apr 2021 21:03:42 +0200 Subject: [PATCH 217/417] Disable isort. --- setup.cfg | 1 + tutorial/plot_getting_data.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 3401101ae..56aa08010 100644 --- a/setup.cfg +++ b/setup.cfg @@ -141,6 +141,7 @@ multi_line_output = 3 include_trailing_comma = true use_parentheses = true combine_as_imports = 1 +skip_glob = **/plot_*.py plot_*.py [mypy] strict = True diff --git a/tutorial/plot_getting_data.py b/tutorial/plot_getting_data.py index 9e092fc94..c5642dd6f 100644 --- a/tutorial/plot_getting_data.py +++ b/tutorial/plot_getting_data.py @@ -4,7 +4,8 @@ How to get data to use in scikit-fda. -isort:skip_file +.. Disable isort + isort:skip_file """ From 8432f7ee7672da77e314add007f2460436358fda Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 9 Apr 2021 14:50:52 +0200 Subject: [PATCH 218/417] Add scikit-fda/scikit-learn tutorial --- tutorial/plot_getting_data.py | 8 +- tutorial/plot_skfda_sklearn.py | 213 ++++++++++++++++++++++++++++++++ tutorial/plot_smoothing_data.py | 19 +++ 3 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 tutorial/plot_skfda_sklearn.py create mode 100644 tutorial/plot_smoothing_data.py diff --git a/tutorial/plot_getting_data.py b/tutorial/plot_getting_data.py index c5642dd6f..7d8c92ef4 100644 --- a/tutorial/plot_getting_data.py +++ b/tutorial/plot_getting_data.py @@ -2,7 +2,13 @@ Getting the data ================ -How to get data to use in scikit-fda. +In this section of the tutorial, we will dicuss how to get functional data to +use in scikit-fda. We will briefly describe the +:class:`~skfda.representation.grid.FDataGrid` class, which is the type that +scikit-fda uses for storing and working with functional data in discretized +form. We will discuss also how to import functional data from several sources +and show how to fetch and load existing datasets popular in the :term:`FDA` +literature. .. Disable isort isort:skip_file diff --git a/tutorial/plot_skfda_sklearn.py b/tutorial/plot_skfda_sklearn.py new file mode 100644 index 000000000..0aaf68c13 --- /dev/null +++ b/tutorial/plot_skfda_sklearn.py @@ -0,0 +1,213 @@ +""" +Scikit-fda and scikit-learn +=========================== + +In this section, we will explain how scikit-fda interacts with the popular +machine learning package scikit-learn. We will introduce briefly the main +concepts of scikit-learn and how scikit-fda reuses the same concepts extending +them to the :term:`functional data analysis` field. + +.. Disable isort + isort:skip_file + +""" + +# Author: Carlos Ramos Carreño +# License: MIT + +############################################################################## +# A brief summary of scikit-learn architecture +# -------------------------------------------- +# +# The library scikit-learn is probably the most well-known Python package +# for machine learning. This package focuses in machine learning using +# multivariate data, which should be stored in a numpy +# :class:`~numpy.ndarray` in order to process it. However, this library has +# defined a particular architecture that can be followed in order to provide +# new tools that work in situations not even imagined by the original authors, +# while remaining compatible with the tools already provided in scikit-learn. +# +# In scikit-fda, the same architecture is applied in order to work with +# functional data observations. As a result, scikit-fda tools are +# largely compatible with scikit-learn tools, and it is possible to reuse +# objects such as :class:`pipelines ` or even +# hyperparameter selection methods such as +# :class:`grid search cross-validation ` +# in the functional data setting. +# +# We will introduce briefly the main concepts in scikit-learn, and explain how +# the tools in scikit-fda are related with them. This is not intended as a full +# explanation of scikit-learn architecture, and the reader is encouraged to +# look at the `scikit-learn tutorials +# `_ in order to achieve +# a deeper understanding of it. + +############################################################################## +# The Estimator object +# ^^^^^^^^^^^^^^^^^^^^ +# +# A central concept in scikit-learn (and scikit-fda) is what is called an +# estimator. An estimator in this context is an object that can learn from +# the data. Thus, classification, regression and clustering methods, as well +# as transformations with parameters learned from the training data are +# particular kinds of estimators. Estimators can also be instanced passing +# parameters, which can be tuned to the data using hyperparameter selection +# methods. +# +# Estimator objects have a ``fit`` method, with receive the training data +# and (if necessary) the training targets. This method uses the training data +# in order to learn some parameters of a model. When the learned parameters +# are part of the user-facing API, then by convention they are attributes of +# the estimator ending in with the ``_`` character. + +############################################################################## +# As a concrete example of this, consider a nearest centroid classifier +# for functional data. The object +# :class:`~skfda.ml.classification.NearestCentroid` is a classifier, and +# thus an estimator. As part of the training process the centroids of +# the classes are computed and available as the learned parameter +# ``centroids_``. +# +# .. note:: +# The function :func:`~sklearn.model_selection.train_test_split` is +# one of the functions originally from scikit-learn that can be +# directly reused in scikit-fda. + +import skfda +from sklearn.model_selection import train_test_split + +X, y = skfda.datasets.fetch_growth(return_X_y=True) + +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +classifier = skfda.ml.classification.NearestCentroid() +classifier.fit(X_train, y_train) +classifier.centroids_.plot() + +############################################################################## +# Transformers +# ^^^^^^^^^^^^ +# +# :term:`Transformers ` are estimators which can convert +# data to a new form. Examples of them are preprocessing methods, such as +# smoothing, registration and dimensionality reduction methods. They always +# implement ``fit_transform`` for fitting and transforming the data in one +# step. The transformers may be :term:`sklearn:inductive`, which means that +# can transform new data using the learned parameters. In that case they +# implement the ``transform`` method to transform new data. If the +# transformation is reversible, they usually also implement +# ``ìnverse_transform``. + +############################################################################## +# As an example consider the smoothing method +# :class:`skfda.preprocessing.smoothing.NadarayaWatson`. Smoothing methods +# attempt to remove noise from the data leveraging its continuous nature. +# As these methods discard information of the original data they usually are +# not reversible. + +import skfda.preprocessing.smoothing.kernel_smoothers as ks + +X, y = skfda.datasets.fetch_phoneme(return_X_y=True) + +# Keep the first 5 functions +X = X[:5] + +X.plot() + +smoother = ks.NadarayaWatsonSmoother() +X_smooth = smoother.fit_transform(X) + +X_smooth.plot() + +############################################################################## +# Predictors (classifiers, regressors, clusterers...) +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# :term:`Predictors ` in scikit-learn are estimators that +# can assign a certain target to a particular observation. This includes +# supervised methods such as classifiers (for which the target will be a class +# label), or regressors (for which the target is a real value, a vector, or, +# in functional data analysis, even a function!) and also unsupervised methods +# such as clusterers or outlying detector methods. +# +# Predictors should implement the ``fit_predict`` method for fitting the +# estimators and predicting the targets in one step and/or the ``predict`` +# method for predicting the targets of possibly non previously bserved data. +# Usually :term:`sklearn:transductive` estimators implement only the former +# one, while :term:`sklearn:inductive` estimators implement the latter one (or +# both). +# +# Predictors can have additional non-mandatory methods, such as +# ``predict-proba`` for obtaining the probability of a particular prediction +# or ``score`` for evaluating the results of the prediction. + +############################################################################## +# As an example, we can look at the :class:`~skfda.ml.clustering.KMeans` +# clustering method for functional data. This method will try to separate +# the data into different clusters according to the distance between +# observations. + +X, y = skfda.datasets.fetch_weather(return_X_y=True) + +# Use only the first value (temperature) +X = X.coordinates[0] + +clusterer = skfda.ml.clustering.KMeans(n_clusters=3) +y_pred = clusterer.fit_predict(X) + +X.plot(group=y_pred) + +############################################################################## +# Metaestimators +# ^^^^^^^^^^^^^^ +# +# In scikit-learn jargon, a :term:`sklearn:metaestimator` is an estimator +# that takes other estimators as parameters. There are several reasons for +# doing that, which will be explained now. + +############################################################################## +# Composition metaestimators +# ++++++++++++++++++++++++++ +# +# It is very common in machine learning to apply one or more preprocessing +# steps one after the other, before applying a final predictor. For this +# purpose scikit-learn offers the :class:`~sklearn.pipeline.Pipeline`, which +# join the steps together and uses the same estimator API for performing all +# steps in order (this is usually referred as the composite pattern in +# software engineering). The :class:`~sklearn.pipeline.Pipeline` estimator +# can be used with the functional data estimators available in scikit-fda. +# Moreover, as transformers such as dimensionality reduction methods can +# convert functional data to multivariate data usable by scikit-learn methods +# it is possible to mix methods from scikit-fda and scikit-learn in the same +# pipeline. +# +# .. warning:: +# In addition, scikit-learn offers estimators that can join several +# transformations as new features of the same dataset ( +# :class:`~sklearn.pipeline.FeatureUnion`) or that can apply different +# transformers to different columns of the data +# :class:`~sklearn.compose.ColumnTransformer`. These transformers +# are not yet usable with functional data. + +############################################################################## +# As an example, we can construct a pipeline that registers the data using +# shift registation, then applies a variable selection method to +# transform each observation to a 3D vector and then uses a SVM classifier +# to classify the data. + +from skfda.preprocessing.dim_reduction import variable_selection as vs +from sklearn.pipeline import Pipeline +from sklearn.svm import SVC + +X, y = skfda.datasets.fetch_growth(return_X_y=True) + +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +pipeline = Pipeline([ + ("registration", skfda.preprocessing.registration.ShiftRegistration()), + ("dim_reduction", vs.RKHSVariableSelection(n_features_to_select=3)), + ("classifier", SVC()), +]) + +pipeline.fit(X_train, y_train) +pipeline.score(X_test, y_test) diff --git a/tutorial/plot_smoothing_data.py b/tutorial/plot_smoothing_data.py new file mode 100644 index 000000000..b5d5757ca --- /dev/null +++ b/tutorial/plot_smoothing_data.py @@ -0,0 +1,19 @@ +""" +Smoothing the data +================== + +In this section, we will explain how it is possible to remove noise of the +data leveraging the continuous nature of functional observations. We will +introduce the smoothing procedures available in scikit-fda. + +.. Disable isort + isort:skip_file + +""" + +# Author: Carlos Ramos Carreño +# License: MIT + +############################################################################## +# The FDataGrid class +# ------------------- From 602d3958469f5ada6fc8a9c198f2484d59ff7f56 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 9 Apr 2021 19:15:49 +0200 Subject: [PATCH 219/417] Improve skfda-sklearn tutorial. --- tutorial/plot_skfda_sklearn.py | 142 ++++++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 1 deletion(-) diff --git a/tutorial/plot_skfda_sklearn.py b/tutorial/plot_skfda_sklearn.py index 0aaf68c13..e29008472 100644 --- a/tutorial/plot_skfda_sklearn.py +++ b/tutorial/plot_skfda_sklearn.py @@ -75,6 +75,7 @@ import skfda from sklearn.model_selection import train_test_split +import sklearn X, y = skfda.datasets.fetch_growth(return_X_y=True) @@ -186,7 +187,7 @@ # transformations as new features of the same dataset ( # :class:`~sklearn.pipeline.FeatureUnion`) or that can apply different # transformers to different columns of the data -# :class:`~sklearn.compose.ColumnTransformer`. These transformers +# (:class:`~sklearn.compose.ColumnTransformer`). These transformers # are not yet usable with functional data. ############################################################################## @@ -211,3 +212,142 @@ pipeline.fit(X_train, y_train) pipeline.score(X_test, y_test) + +############################################################################## +# Hyperparameter optimizers +# +++++++++++++++++++++++++ +# +# Some of the parameters used for the creation of an estimator need to be +# tuned to each particular dataset in order to improve the prediction accuracy +# and generalization. There are several techniques to do that already +# available in scikit-learn, such as grid search cross-validation +# (:class:`~sklearn.model_selection.GridSearchCV`) or randomized search +# (:class:`~sklearn.model_selection.RandomizedSearchCV`). As these +# hyperparameter optimizers only need to split the data and call ``score`` in +# the predictor, they can be directly used with the methods in scikit-fda. +# +# .. note:: +# In addition one could use any optimizer that understand the scikit-learn +# API such as those in `scikit-optimize +# `_. + +############################################################################## +# As an example, we will use :class:`~sklearn.model_selection.GridSearchCV` +# to select the number of neighbors used in a +# :class:`~skfda.ml.classification.KNeighborsClassifier`. + +from sklearn.model_selection import GridSearchCV + +X, y = skfda.datasets.fetch_growth(return_X_y=True) + +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +classifier = skfda.ml.classification.KNeighborsClassifier() + +grid_search = GridSearchCV( + estimator=classifier, + param_grid={"n_neighbors": range(1, 10, 2)}, +) + +grid_search.fit(X_train, y_train) +n_neighbors = grid_search.best_estimator_.n_neighbors +score = grid_search.score(X_test, y_test) + +print(n_neighbors, score) + +############################################################################## +# Ensemble methods +# ++++++++++++++++ +# +# The ensemble methods :class:`~sklearn.ensemble.VotingClassifier` and +# :class:`~sklearn.ensemble.VotingRegressor` in scikit-learn use several +# different estimators in order to predict the targets. As this is done +# by evaluating the passed estimators as black boxes, these predictors can +# also be combined with scikit-fda predictors. +# +# .. warning:: +# Other ensemble methods, such as +# :class:`~sklearn.ensemble.BaggingClassifier` or +# :class:`~sklearn.ensemble.AdaBoostClassifier` construct estimators +# that only use a subset of the features. As in :term:`FDA` the features +# are the evaluations of the functions at different points, these +# estimators cannot work with functional data unless it has been +# transformed to a multivariate dataset. + +############################################################################## +# As an example we will use a voting classifier to classify data using as +# classifiers a knn-classifier, a nearest centroid classifier and a +# maximum depth classifier. + +from sklearn.ensemble import VotingClassifier + +X, y = skfda.datasets.fetch_growth(return_X_y=True) + +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +knn = skfda.ml.classification.KNeighborsClassifier() +nearest_centroid = skfda.ml.classification.NearestCentroid() +mdc = skfda.ml.classification.MaximumDepthClassifier() + +voting = VotingClassifier([ + ("knn", knn), + ("nearest_centroid", nearest_centroid), + ("mdc", mdc), +]) + +voting.fit(X_train, y_train) +voting.score(X_test, y_test) + +############################################################################## +# Multiclass and multioutput classification utilities +# +++++++++++++++++++++++++++++++++++++++++++++++++++ +# +# The scikit-learn library also offers additional utilities that can convert +# a binary classifier into a multiclass classifier (such as +# :class:`~sklearn.multiclass.OneVsRestClassifier`) or to extend a single +# output classifier or regressor to accept also multioutput (vector-valued) +# targets. + +############################################################################## +# In this example we want to use as a classifier the combination of a +# dimensionality reduction method ( +# :class:`~skfda.preprocessing.dim_reduction.variable_selection.RKHSVariableSelection`) +# and a SVM classifier (:class:`~sklearn.svm.SVC`). As that particular +# dimensionality reduction method is only suitable for binary data, we use +# :class:`~sklearn.multiclass.OneVsRestClassifier` to classify in a +# multiclass dataset. + +from sklearn.multiclass import OneVsRestClassifier + +X, y = skfda.datasets.fetch_phoneme(return_X_y=True) + +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +pipeline = Pipeline([ + ("dim_reduction", vs.RKHSVariableSelection(n_features_to_select=3)), + ("classifier", SVC()), +]) + +multiclass = OneVsRestClassifier(pipeline) + +multiclass.fit(X_train, y_train) +multiclass.score(X_test, y_test) + +############################################################################## +# Other scikit-learn utilities +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# In addition to the aforementioned objects, there are plenty of objects in +# scikit-learn that can be applied directly to functional data. We have +# already seen in the examples the function +# :func:`~sklearn.model_selection.train_test_split`. Other objects and +# functions such as :class:`~sklearn.model_selection.KFold` can be directly +# applied to functional data in order to split it into folds. Scorers for +# classification or regression, such as `~sklearn.metrics.accuracy_score` can +# be directly applied to functional data problems. +# +# Moreover, there are plenty of libraries that aim to extend scikit-learn in +# several directions (take a look at the `list of related projects +# `_). You will +# probably see that a lot of the functionality can be applied to scikit-fda, +# as it uses the same API as scikit-learn. From b34a0e8bf32897b89ae870a6ec88aba74302dc1e Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 9 Apr 2021 19:32:00 +0200 Subject: [PATCH 220/417] Put tutorials in order. --- docs/conf.py | 30 ++++++++++++++++++++++++++++++ tutorial/plot_skfda_sklearn.py | 5 +++-- tutorial/plot_smoothing_data.py | 19 ------------------- 3 files changed, 33 insertions(+), 21 deletions(-) delete mode 100644 tutorial/plot_smoothing_data.py diff --git a/docs/conf.py b/docs/conf.py index 6e8437a8c..a171cb01b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -227,6 +227,35 @@ 'mpldatacursor': ('https://pypi.org/project/mpldatacursor/', None), } + +tutorial_list = [ + "plot_getting_data.py", + "plot_skfda_sklearn.py", +] + + +class SkfdaExplicitSubOrder(object): + """ + Class for use within the 'within_subsection_order' key. + + Inspired by Matplotlib gallery. + + """ + + def __init__(self, src_dir: str) -> None: + self.src_dir = src_dir # src_dir is unused here + self.ordered_list = tutorial_list + + def __call__(self, filename: str) -> str: + """Return a string determining the sort order.""" + if filename in self.ordered_list: + ind = self.ordered_list.index(filename) + return f"{ind:04d}" + + # ensure not explicitly listed items come last. + return f"zzz{filename}" + + sphinx_gallery_conf = { # path to your examples scripts 'examples_dirs': ['../examples', '../tutorial'], @@ -238,6 +267,7 @@ }, 'backreferences_dir': 'backreferences', 'doc_module': 'skfda', + 'within_subsection_order': SkfdaExplicitSubOrder, } autosummary_generate = True diff --git a/tutorial/plot_skfda_sklearn.py b/tutorial/plot_skfda_sklearn.py index e29008472..cc9dc7fd5 100644 --- a/tutorial/plot_skfda_sklearn.py +++ b/tutorial/plot_skfda_sklearn.py @@ -343,8 +343,9 @@ # :func:`~sklearn.model_selection.train_test_split`. Other objects and # functions such as :class:`~sklearn.model_selection.KFold` can be directly # applied to functional data in order to split it into folds. Scorers for -# classification or regression, such as `~sklearn.metrics.accuracy_score` can -# be directly applied to functional data problems. +# classification or regression, such as +# :func:`~sklearn.metrics.accuracy_score` can be directly applied to +# functional data problems. # # Moreover, there are plenty of libraries that aim to extend scikit-learn in # several directions (take a look at the `list of related projects diff --git a/tutorial/plot_smoothing_data.py b/tutorial/plot_smoothing_data.py deleted file mode 100644 index b5d5757ca..000000000 --- a/tutorial/plot_smoothing_data.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -Smoothing the data -================== - -In this section, we will explain how it is possible to remove noise of the -data leveraging the continuous nature of functional observations. We will -introduce the smoothing procedures available in scikit-fda. - -.. Disable isort - isort:skip_file - -""" - -# Author: Carlos Ramos Carreño -# License: MIT - -############################################################################## -# The FDataGrid class -# ------------------- From cd86f53746332d2f9bf371b2ba3cfb6d5ec02f30 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 11 Apr 2021 01:12:57 +0200 Subject: [PATCH 221/417] Improve basis tutorial. --- skfda/representation/basis/_finite_element.py | 6 +- tutorial/plot_basis_representation.py | 396 ++++++++++++++++++ 2 files changed, 399 insertions(+), 3 deletions(-) create mode 100644 tutorial/plot_basis_representation.py diff --git a/skfda/representation/basis/_finite_element.py b/skfda/representation/basis/_finite_element.py index 33412f6fd..0e860c06f 100644 --- a/skfda/representation/basis/_finite_element.py +++ b/skfda/representation/basis/_finite_element.py @@ -2,7 +2,7 @@ import numpy as np -from .._typing import DomainRangeLike +from .._typing import ArrayLike, DomainRangeLike from ._basis import Basis T = TypeVar("T", bound='FiniteElement') @@ -67,8 +67,8 @@ class FiniteElement(Basis): def __init__( self, - vertices: np.ndarray, - cells: np.ndarray, + vertices: ArrayLike, + cells: ArrayLike, domain_range: Optional[DomainRangeLike] = None, ) -> None: super().__init__( diff --git a/tutorial/plot_basis_representation.py b/tutorial/plot_basis_representation.py new file mode 100644 index 000000000..9467de99f --- /dev/null +++ b/tutorial/plot_basis_representation.py @@ -0,0 +1,396 @@ +""" +Basis representation +==================== + +In this section of the tutorial, we will introduce the basis representation of +functional data. This is a very useful representation for functions that +belong (or can be reasonably projected) to the space spanned by a finite set +of basis functions. + +.. Disable isort + isort:skip_file + +""" + +############################################################################## +# Functions and vector spaces +# --------------------------- +# +# Functions, which are the objects of study of :term:`FDA`, can be added and +# multiplied by scalars, and these operations verify the necessary properties +# to consider these functions as vectors in a vector space. +# +# The :class:`~skfda.representation.grid.FDataGrid` objects that are used to +# represent functional observations in scikit-fda also support these +# operations. + +############################################################################## +# In order to show these operations, we create the first FDatagrid and plot +# it. + +import numpy as np +import skfda + +t = np.linspace(0, 1, 100) + +fd = skfda.FDataGrid( + data_matrix=[ + np.sin(6 * t), # First function + 2 * t, # Second function + ], + grid_points=t, +) + +fd.plot() + +############################################################################## +# Functions can be multiplied by an scalar. This only changes the scale of +# the functions, but not their shape. + +scalar_mul = 3 * fd + +scalar_mul.plot() + +############################################################################## +# We need two objects to show the sum. Thus we create a second FDatagrid and +# plot it. + +fd2 = skfda.FDataGrid( + data_matrix=[ + 3 * t**2, # First function + np.log(t), # Second function + ], + grid_points=t, +) + +fd2.plot() + +############################################################################## +# We can now plot the sum of both :class:`~skfda.representation.grid.FDataGrid` +# objects. + +fd_sum = fd + fd2 + +fd_sum.plot() + +############################################################################## +# Infinite (Schauder) basis +# ------------------------- +# +# Some functional topological vector spaces admit a Schauder basis. This is +# a sequence of functions :math:`\Phi = \{\phi_i\}_{i=1}^{\infty}` so that +# for every function :math:`x` in the space exists a sequence of scalars +# :math:`\{a_i\}_{i=1}^{\infty}` such that +# +# .. math:: +# x(t) = \sum_{i=1}^{\infty} a_i \phi_i(t) +# +# where the convergence of this series is with respect to the vector space +# topology. +# +# If you know that your functions of interest belong to one of these vector +# spaces, it may be interesting to express your functions in a basis. +# As computers have limited memory and computation resources, it is not +# possible to obtain the infinite basis expansion. Instead, one typically +# truncates the expansion to a few basis functions, which are enough to +# approximate your observations with a certain degree of accuracy. This +# truncation also has the effect of smoothing the data, as less important +# variations, such as noise, are eliminated in the process. Moreover, as basis +# are truncated, the vector space generated by the truncated set of basis +# functions is different to the original space, and also different between +# different basis families. Thus, the choice of basis matters, even if +# originally they would have generated the same space. +# +# In scikit-fda, functions expressed as a basis expansion can be represented +# using the class :class:`~skfda.representation.basis.FDataBasis`. The main +# attributes of objects of this class are ``basis``, an object representing a +# basis family of functions, and ``coefficients``, a matrix with the scalar +# coefficients of the functions in the basis. + +############################################################################## +# As an example, we can create the following function, which is expressed in +# a truncated monomial basis (and thus it is a polynomial): +# +# .. math:: +# x(t) = 3 + 2x - 4x^2 + x^3 + +basis = skfda.representation.basis.Monomial( + n_basis=4, + domain_range=(-10, 10), +) + +fd_basis = skfda.FDataBasis( + basis=basis, + coefficients=[ + [3, 2, -4, 1], # First (and unique) observation + ], +) + +fd_basis.plot() + +############################################################################## +# Conversion between FDataGrid and FDataBasis +# ------------------------------------------- +# +# It is possible to convert between functions in discretized form (class +# :class:`~skfda.representation.grid.FDataGrid`) and basis expansion form ( +# class :class:`~skfda.representation.basis.FDataBasis`). In order to convert +# :class:`~skfda.representation.grid.FDataGrid` objects to a basis +# representation you will need to call the method ``to_basis``, passing the +# desired basis as an argument. The functions will then be projected to the +# functional basis, solving a least squares problem in order to find the +# optimal coefficients of the expansion. In order to convert a +# :class:`~skfda.representation.basis.FDataBasis` to a discretized +# representation you should call the method ``to_grid``. This method evaluates +# the functions in a grid that can be supplied as an argument in order to +# obtain the values of the discretized representation. + +############################################################################## +# We now can see how the number of basis functions affect the basis expansion +# representation of a few observations taken from a real-world dataset. You +# can see that as more basis functions are used, the basis representation +# provides a better representation of the real data. + +import matplotlib.pyplot as plt + +max_basis = 9 + +X, y = skfda.datasets.fetch_phoneme(return_X_y=True) + +# Select only the first 5 samples +X = X[:5] + +X.plot() + +fig, axes = plt.subplots(nrows=3, ncols=3) + +for n_basis in range(1, max_basis + 1): + basis = skfda.representation.basis.Monomial(n_basis=n_basis) + X_basis = X.to_basis(basis) + + ax = axes.ravel()[n_basis - 1] + fig = X_basis.plot(axes=ax) + ax.set_title(f"{n_basis} basis functions") + +fig.tight_layout() + +############################################################################## +# List of available basis functions +# --------------------------------- +# +# In this section we will provide a list of the available basis in scikit-fda. +# As explained before, the basis family is important when the basis expansion +# is truncated (which always happens in order to represent it in a computer). +# Thus, it is recommended to take a look at the available basis in order to +# pick one that provides the best representation of the original data. + +############################################################################## +# Monomial basis +# ^^^^^^^^^^^^^^ +# +# The monomial basis (class :class:`~skfda.representation.basis.Monomial`) is +# probably one of the simpler and more well-known basis +# of functions. Often Taylor and McLaurin series are explained in the very +# first courses of Science and Engineering degrees, and students are familiar +# with polynomials since much before. Thus, the monomial basis is useful for +# teaching purposes (and that is why we have used it in the examples). It is +# also very useful for testing purposes, as it easy to manually derive the +# expected results of operations involving this basis. +# +# As a basis for functional data analysis, however, it has several issues that +# usually make preferrable to use other basis instead. First, the usual basis +# :math:`\{1, x, x^2, x^3, \ldots\}` is not orthogonal under the standard +# inner product in :math:`L^2`, that is :math:`\langle x, y \rangle = +# \int_{\mathcal{T}} x(t) y(t) dt`. This inhibits some +# performance optimizations that are available for operations that require +# inner products. It is possible to find an orthogonal basis of polynomials, +# but it will not be as easy to understand, losing many of its advantages. +# Another problems with this basis are the necessity of a large +# number of basis functions to express local features, the bad behaviour at +# the extremes of the function and the fact that the derivatives of the basis +# expansion are not good approximations of the derivatives of the original +# data, as high order polynomials tend to have very large oscillations. + +############################################################################## +# Here we show the first five elements of the monomial basis. + +basis = skfda.representation.basis.Monomial(n_basis=5) +basis.plot() + +############################################################################## +# We now show how the previous observations are represented using the first +# five elements of this basis. + +X_basis = X.to_basis(basis) +X_basis.plot() + +############################################################################## +# Fourier basis +# ^^^^^^^^^^^^^^ +# +# Probably the second most well known series expansion for staticians, +# engineers, physicists and mathematicians is the Fourier series. The Fourier +# basis (class :class:`~skfda.representation.basis.Fourier`) consist on a +# constant term plus sines and cosines of varying frequency, +# all of them normalized to unit (:math:`L^2`) norm. +# This basis is a good choice for periodic functions (as a function +# expressed in this basis has the same value at the beginning and at the end +# of its domain interval if it has the same lenght as the period +# :math:`\omega`. Moreover, in this case the functions are orthonormal (that +# is why the basis used are normalized). +# +# This basis is specially indicated for functions without strong local +# features and with almost the same order of curvature everywhere, as +# otherwise the expansion require again a large number of basis to represent +# those details. + +############################################################################## +# Here we show the first five elements of a Fourier basis. + +basis = skfda.representation.basis.Fourier(n_basis=5) +basis.plot() + +############################################################################## +# We now show how the previous observations are represented using the first +# five elements of this basis. + +X_basis = X.to_basis(basis) +X_basis.plot() + +############################################################################## +# B-spline basis +# ^^^^^^^^^^^^^^ +# +# Splines are a family of functions that has taken importance with the advent +# of the modern computers, and nowadays are well known for a lot of engineers +# and designers. Esentially, they are piecewise polynomials that join smoothly +# at the separation points (usually called knots). Thus, both polynomials +# and piecewise linear functions are included in this family. Given a set of +# knots, a B-spline basis (class :class:`~skfda.representation.basis.BSpline`) +# of a given order can be used to express every spline of the same order that +# uses the same knots. +# +# This basis is a very powerful basis, as the knots can be adjusted to be able +# to express local features, and it is even possible to create points where +# the functions are not necessarily smooth or continuous by placing several +# knots together. Also the elements of the basis have the compact support +# property, which allows more efficient computations. Thus, this basis is +# indicated for non-periodic functions or functions with local features or with +# different orders of curvature along their domain. + +############################################################################## +# Here we show the first five elements of a B-spline basis. + +basis = skfda.representation.basis.BSpline(n_basis=5) +basis.plot() + +############################################################################## +# We now show how the previous observations are represented using the first +# five elements of this basis. + +X_basis = X.to_basis(basis) +X_basis.plot() + +############################################################################## +# Constant basis +# ^^^^^^^^^^^^^^ +# +# Sometimes it is useful to consider the basis whose only function is the +# constant one. In particular, using this basis we can view scalar values +# as functional observations, which can be used to combine multivariate +# and functional data in the same model. + +############################################################################## +# Tensor product basis +# ^^^^^^^^^^^^^^^^^^^^ +# +# The previously explained bases are useful for data that comes in the form +# of curves, that is, functions :math:`\{f_i: \mathbb{R} \to +# \mathbb{R}\}_{i=1}^N`. However, scikit-fda allows also the representation +# of surfaces or functions in higher dimensions. In this case it is even more +# useful to be able to represent them using basis expansions, as the number +# of parameters in the discretized representation grows as the product of the +# grid points in each dimension of the domain. +# +# The tensor product basis (class :class:`~skfda.representation.basis.Tensor`) +# allows the construction of basis for these higher dimensional functions as +# tensor products of :math:`\mathbb{R} \to \mathbb{R}` basis. + +############################################################################## +# As an example, we can import the digits datasets of scikit-learn, which are +# surfaces, and convert it to a basis expansion. Note that we use different +# basis for the different continuous parameters of the function in order to +# show how it works, although it probably makes no sense in this particular +# example. + +from sklearn.datasets import load_digits + +X, y = load_digits(return_X_y=True) +X = X.reshape(-1, 8, 8) + +fd = skfda.FDataGrid(X) + +basis = skfda.representation.basis.Tensor([ + skfda.representation.basis.Fourier(n_basis=5), # X axis + skfda.representation.basis.BSpline(n_basis=6), # Y axis +]) + +fd_basis = fd.to_basis(basis) + +# We only plot the first function +fd_basis[0].plot() + +############################################################################## +# Finite element basis +# ^^^^^^^^^^^^^^^^^^^^ +# +# A finite element basis (class +# :class:`~skfda.representation.basis.FiniteElement`) is a basis used in the +# finite element method (FEM). In order to instantiate a basis, it is +# necessary to pass a set of vertices and a set of simplices, or cells, that +# join them, conforming a grid. The basis elements are then functions that +# are one at exactly one of these vertices and zero in the rest of them. +# +# The advantage of this basis for higher dimensional functions is that one can +# have more control of the basis, placing more vertices in regions with +# interesting behaviour, such as local features and less elsewhere. + +############################################################################## +# Here we show an example where the + +vertices = [ + (0, 0), + (0, 1), + (1, 0), + (1, 1), + (0.25, 0.5), + (0.5, 0.25), + (0.5, 0.75), + (0.75, 0.5), + (0.5, 0.5), +] + +cells = [ + (0, 1, 4), + (0, 2, 5), + (1, 3, 6), + (2, 3, 7), + (0, 4, 5), + (1, 4, 6), + (2, 5, 7), + (3, 6, 7), + (4, 5, 8), + (4, 6, 8), + (5, 7, 8), + (6, 7, 8), +] + +basis = skfda.representation.basis.FiniteElement( + vertices=vertices, + cells=cells, +) + +fd_basis = fd.to_basis(basis) + +# We only plot the first function +fd_basis[0].plot() From e1578dc1c597b149dfe3d913b7619c9960108e96 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 11 Apr 2021 14:02:15 +0200 Subject: [PATCH 222/417] Add tutorial for basis representation. --- docs/conf.py | 1 + tutorial/plot_basis_representation.py | 69 ++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index a171cb01b..61a140902 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -230,6 +230,7 @@ tutorial_list = [ "plot_getting_data.py", + "plot_basis_representation.py", "plot_skfda_sklearn.py", ] diff --git a/tutorial/plot_basis_representation.py b/tutorial/plot_basis_representation.py index 9467de99f..24ef11372 100644 --- a/tutorial/plot_basis_representation.py +++ b/tutorial/plot_basis_representation.py @@ -331,8 +331,14 @@ fd = skfda.FDataGrid(X) basis = skfda.representation.basis.Tensor([ - skfda.representation.basis.Fourier(n_basis=5), # X axis - skfda.representation.basis.BSpline(n_basis=6), # Y axis + skfda.representation.basis.Fourier( # X axis + n_basis=5, + domain_range=fd.domain_range[0], + ), + skfda.representation.basis.BSpline( # Y axis + n_basis=6, + domain_range=fd.domain_range[1], + ), ]) fd_basis = fd.to_basis(basis) @@ -356,9 +362,11 @@ # interesting behaviour, such as local features and less elsewhere. ############################################################################## -# Here we show an example where the +# Here we show an example where the digits dataset of scikit-learn is +# expressed in the finite element basis. First we create the vertices and +# simplices that we will use and we plot them. -vertices = [ +vertices = np.array([ (0, 0), (0, 1), (1, 0), @@ -368,9 +376,9 @@ (0.5, 0.75), (0.75, 0.5), (0.5, 0.5), -] +]) -cells = [ +cells = np.array([ (0, 1, 4), (0, 2, 5), (1, 3, 6), @@ -383,7 +391,12 @@ (4, 6, 8), (5, 7, 8), (6, 7, 8), -] +]) + +plt.triplot(vertices[:, 0], vertices[:, 1], cells) + +############################################################################## +# We now represent the digits dataset in this basis. basis = skfda.representation.basis.FiniteElement( vertices=vertices, @@ -394,3 +407,45 @@ # We only plot the first function fd_basis[0].plot() + +############################################################################## +# Vector-valued basis +# ^^^^^^^^^^^^^^^^^^^ +# +# With the aforementioned bases, one could express +# :math:`\mathbb{R}^p \to \mathbb{R}` functions. In order to express vector +# valued functions as a basis expansion, one just need to express each +# coordinate function as a basis expansion and multiply it by the +# corresponding unitary vector in the coordinate direction, adding finally all +# of them together. +# +# The vector-valued basis (:class:`~skfda.representation.basis.VectorValued`) +# allows the representation of vector-valued functions doing just that. + +############################################################################## +# As an example, consider the Canadian Weather dataset, including both +# temperature and precipitation data as coordinate functions, and plotted +# below. + +X, y = skfda.datasets.fetch_weather(return_X_y=True) + +X.plot() + +############################################################################## +# We will express this dataset as a basis expansion. Temperatures +# are now expressed in a Fourier basis, while we express precipitations as +# B-splines. + +basis = skfda.representation.basis.VectorValued([ + skfda.representation.basis.Fourier( # First coordinate function + n_basis=5, + domain_range=X.domain_range, + ), + skfda.representation.basis.BSpline( # Second coordinate function + n_basis=10, + domain_range=X.domain_range, + ), +]) + +X_basis = X.to_basis(basis) +X_basis.plot() From 76416594dc28d8874d8ffb00646631cd15745bbc Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 11 Apr 2021 16:33:42 +0200 Subject: [PATCH 223/417] Improve index. --- docs/index.rst | 31 ++++++++++++++++++--------- tutorial/plot_basis_representation.py | 5 +++++ tutorial/plot_getting_data.py | 2 +- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 5fa9c798c..48bcb3d6c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,25 +14,28 @@ or clustering of functional data. In the `project page `_ hosted by Github you can find more information related to the development of the package. - .. toctree:: - :maxdepth: 2 - :caption: Contents: - :titlesonly: - - apilist - glossary - -.. toctree:: - + :caption: Using scikit-fda + :hidden: + auto_tutorial/index .. toctree:: :maxdepth: 1 :titlesonly: + :hidden: auto_examples/index +.. toctree:: + :maxdepth: 2 + :titlesonly: + :hidden: + :caption: More documentation + + apilist + glossary + An exhaustive list of all the contents of the package can be found in the :ref:`genindex`. @@ -61,6 +64,14 @@ In this type of installation make sure that your default Python version is currently supported, or change the python and pip commands by specifying a version, such as python3.6. +How do I start? +--------------- + +If you want a quick overview of the package, we recommend you to try the +new :doc:`tutorial `. For articles about specific +topics, feel free to explore the :doc:`examples `. Want +to check the documentation of a particular class or function? Try searching +for it in the :doc:`API list `. Contributions ------------- diff --git a/tutorial/plot_basis_representation.py b/tutorial/plot_basis_representation.py index 24ef11372..0fdf24d95 100644 --- a/tutorial/plot_basis_representation.py +++ b/tutorial/plot_basis_representation.py @@ -12,6 +12,11 @@ """ +# Author: Carlos Ramos Carreño +# License: MIT +# +# sphinx_gallery_thumbnail_number = 7 + ############################################################################## # Functions and vector spaces # --------------------------- diff --git a/tutorial/plot_getting_data.py b/tutorial/plot_getting_data.py index 7d8c92ef4..b2f2d6593 100644 --- a/tutorial/plot_getting_data.py +++ b/tutorial/plot_getting_data.py @@ -162,7 +162,7 @@ # :class:`Pandas DataFrames ` are also popular as # datasets containers in the Python scientific ecosystem. If you have # data in a Pandas DataFrame, you can extract its content as a Numpy -# array using the method :meth:`pandas.DataFrame.to_numpy` of the +# array using the method :meth:`~pandas.DataFrame.to_numpy` of the # DataFrame. ############################################################################## From 7eba020d95593915e50b7e43453ace8ed0f25dc6 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 11 Apr 2021 20:11:49 +0200 Subject: [PATCH 224/417] Add introduction. --- docs/conf.py | 1 + tutorial/plot_basis_representation.py | 2 +- tutorial/plot_getting_data.py | 2 +- tutorial/plot_introduction.py | 137 ++++++++++++++++++++++++++ tutorial/plot_skfda_sklearn.py | 6 +- 5 files changed, 143 insertions(+), 5 deletions(-) create mode 100644 tutorial/plot_introduction.py diff --git a/docs/conf.py b/docs/conf.py index 61a140902..bfdd98b4e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -229,6 +229,7 @@ tutorial_list = [ + "plot_introduction.py", "plot_getting_data.py", "plot_basis_representation.py", "plot_skfda_sklearn.py", diff --git a/tutorial/plot_basis_representation.py b/tutorial/plot_basis_representation.py index 0fdf24d95..bf3b626f4 100644 --- a/tutorial/plot_basis_representation.py +++ b/tutorial/plot_basis_representation.py @@ -2,7 +2,7 @@ Basis representation ==================== -In this section of the tutorial, we will introduce the basis representation of +In this section, we will introduce the basis representation of functional data. This is a very useful representation for functions that belong (or can be reasonably projected) to the space spanned by a finite set of basis functions. diff --git a/tutorial/plot_getting_data.py b/tutorial/plot_getting_data.py index b2f2d6593..ef0e48ff5 100644 --- a/tutorial/plot_getting_data.py +++ b/tutorial/plot_getting_data.py @@ -2,7 +2,7 @@ Getting the data ================ -In this section of the tutorial, we will dicuss how to get functional data to +In this section, we will dicuss how to get functional data to use in scikit-fda. We will briefly describe the :class:`~skfda.representation.grid.FDataGrid` class, which is the type that scikit-fda uses for storing and working with functional data in discretized diff --git a/tutorial/plot_introduction.py b/tutorial/plot_introduction.py new file mode 100644 index 000000000..51b0c2ad1 --- /dev/null +++ b/tutorial/plot_introduction.py @@ -0,0 +1,137 @@ +""" +Introduction +============ + +In this section, we will briefly explain what is +:term:`functional data analysis (FDA) `, and we will introduce +scikit-fda, a library that provides FDA tools to staticians, engineers or +machine learning practicioners in the Python scientific ecosystem. + +.. Disable isort + isort:skip_file + +""" + +# Author: Carlos Ramos Carreño +# License: MIT + +############################################################################## +# What is functional data analysis? +# --------------------------------- +# +# Traditional multivariate statistics focus on the simultaneous analysis of +# a finite number of variables. In this setting, we have several observations, +# each of them consisting on a vector of measured values. The variables, or +# coordinates of this vector, could be correlated, but otherwise they can be +# arbitrarily ordered inside the observed vector, provided that the order is +# the same for each observation. Usually these observations are considered +# to be instances of a random vector, and a big part of the analysis is +# centered in finding the distribution associated with it. +# +# In contrast, in functional data analysis each observation is a function of +# one or several variables, such as curves or surfaces. These functions are +# usually continuous and often they are smooth, and derivatives can be +# computed. The number of variables of these objects is then infinite, as each +# evaluation of the function at one point could be considered as one variable. +# Moreover, now it is not possible to reorder the variables of the +# observations without altering substantially its structure. If the functions +# are continuous, nearby variables are highly correlated, a characteristic +# that makes some classical multivariate methods unsuitable to work with this +# data. +# +# In this setting observations can also be considered to be instances +# of a "functional random variable", usually called stochastic processes and +# random fields. However, some of the concepts that proved very useful to +# analyze multivariate data, such as density functions, are not applicable +# to :term:`functional data`, while new tools, such as taking derivatives, +# become available. +# +# As such, functional data can benefit of a separate analysis from +# multivariate statistics, but also adapting and extending multivariate +# techniques when possible. + +############################################################################## +# What is scikit-fda? +# ------------------- +# +# scikit-fda is a Python library containing classes and functions that allow +# you to perform functional data analysis tasks. Using it you can: +# +# - Represent functions as Python objects, both in a discretized fashion +# and as a basis expansion. +# - Apply preprocessing methods to functional data, including smoothing, +# registration and dimensionality reduction. +# - Perform a complete exploratory analysis of de data, summarizing its +# main properties, detecting possible outliers and visualizing the data +# in several ways. +# - Apply statistical inference tools developed for functional data, such +# as functional ANOVA. +# - Perform usual machine learning tasks, such as classification, +# regression or clustering, using functional observations. +# - Combine the tools offered by scikit-fda with other tools of the Python +# scientific ecosystem, such as those provided by the popular machine +# learning library `scikit-learn `_. + + +############################################################################## +# Anatomy of a function +# --------------------- +# +# We would like to briefly remind the reader the basic concepts that are +# employed to talk about functions. Functions in math are a relation between +# two sets, the :term:`domain` and the :term:`codomain` in which each element +# of the :term:`domain` is restricted to be related to exactly one element of +# the :term:`codomain`. The intuition behind this is that a function +# represents some type of deterministic process, that takes elements of the +# :term:`domain` as inputs and produces elements of the :term:`codomain` as +# outputs. +# +# In :term:`FDA`, the inputs, or parameters, of a function are assumed to be +# continuous parameters, and so are the outputs, or values of the function. +# Thus, it is usual to restrict our functional observations to be functions +# :math:`\{f_i: \mathcal{T} \subseteq \mathbb{R}^p \to \mathbb{R}^q\}_{i=1}^N`. +# In this case both the domain and codomain are (subsets of) vector spaces of +# real numbers, and one could talk of the dimension of each of them as a +# vector space (in this case the domain dimension is :math:`p` and the +# codomain dimension is :math:`q`). +# +# The most common case of functional observation, and the one that has +# received more attention in the functional data literature, is the case of +# functions +# :math:`\{f_i: \mathcal{T} \subseteq \mathbb{R} \to \mathbb{R}\}_{i=1}^N` +# (curves or trajectories). + +############################################################################## +# As an example, the following code shows the Berkeley Growth dataset, one +# of the classical datasets used in :term:`FDA`. The curves are heights of +# several boys and girls measured at several points since their birth to +# their 18th birthday. Here the domain :math:`\mathcal{T}` is the interval +# :math:`[0, 18]` and both the domain and codomain have a dimension of one. + +import skfda + +X, y = skfda.datasets.fetch_growth(return_X_y=True) + +X.plot() + +############################################################################## +# Functions where the domain dimension is greater than one ( +# such as surfaces or higher dimensional objects) are referred to as functions +# of several variables. Functions where the codomain dimension is greater than +# one are called vector-valued functions. + +############################################################################## +# As an example we show another popular dataset: Canadian Weather. Here each +# observation correspond to data taken from a different weather station in +# Canada. For each day of the year we have two values: the average temperature +# at that day among several years and the average precipitation among the same +# years. Thus, here the domain :math:`\mathcal{T}` is the interval +# :math:`[0, 365)`, the domain dimension is one and the codomain dimension +# is two. We can see that by default each coordinate of the values of the +# function is plotted as a separate coordinate function. + +import skfda + +X, y = skfda.datasets.fetch_weather(return_X_y=True) + +X.plot() diff --git a/tutorial/plot_skfda_sklearn.py b/tutorial/plot_skfda_sklearn.py index cc9dc7fd5..d2f06a4fe 100644 --- a/tutorial/plot_skfda_sklearn.py +++ b/tutorial/plot_skfda_sklearn.py @@ -19,9 +19,9 @@ # A brief summary of scikit-learn architecture # -------------------------------------------- # -# The library scikit-learn is probably the most well-known Python package -# for machine learning. This package focuses in machine learning using -# multivariate data, which should be stored in a numpy +# The library `scikit-learn `_ is probably the most +# well-known Python package for machine learning. This package focuses in +# machine learning using multivariate data, which should be stored in a numpy # :class:`~numpy.ndarray` in order to process it. However, this library has # defined a particular architecture that can be followed in order to provide # new tools that work in situations not even imagined by the original authors, From 3990872f371f1a8ceac62911ead0b38320f186ed Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 11 Apr 2021 23:43:17 +0200 Subject: [PATCH 225/417] Fix typos. --- tutorial/plot_introduction.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tutorial/plot_introduction.py b/tutorial/plot_introduction.py index 51b0c2ad1..bda6df5b7 100644 --- a/tutorial/plot_introduction.py +++ b/tutorial/plot_introduction.py @@ -39,9 +39,9 @@ # that makes some classical multivariate methods unsuitable to work with this # data. # -# In this setting observations can also be considered to be instances -# of a "functional random variable", usually called stochastic processes and -# random fields. However, some of the concepts that proved very useful to +# In this setting, observations can also be considered to be instances +# of a "functional random variable", usually called a stochastic process or +# a random field. However, some of the concepts that proved very useful to # analyze multivariate data, such as density functions, are not applicable # to :term:`functional data`, while new tools, such as taking derivatives, # become available. @@ -61,7 +61,7 @@ # and as a basis expansion. # - Apply preprocessing methods to functional data, including smoothing, # registration and dimensionality reduction. -# - Perform a complete exploratory analysis of de data, summarizing its +# - Perform a complete exploratory analysis of the data, summarizing its # main properties, detecting possible outliers and visualizing the data # in several ways. # - Apply statistical inference tools developed for functional data, such @@ -86,7 +86,7 @@ # :term:`domain` as inputs and produces elements of the :term:`codomain` as # outputs. # -# In :term:`FDA`, the inputs, or parameters, of a function are assumed to be +# In :term:`FDA`, the inputs or parameters of a function are assumed to be # continuous parameters, and so are the outputs, or values of the function. # Thus, it is usual to restrict our functional observations to be functions # :math:`\{f_i: \mathcal{T} \subseteq \mathbb{R}^p \to \mathbb{R}^q\}_{i=1}^N`. @@ -104,7 +104,7 @@ ############################################################################## # As an example, the following code shows the Berkeley Growth dataset, one # of the classical datasets used in :term:`FDA`. The curves are heights of -# several boys and girls measured at several points since their birth to +# 93 boys and girls measured at several points since their birth to # their 18th birthday. Here the domain :math:`\mathcal{T}` is the interval # :math:`[0, 18]` and both the domain and codomain have a dimension of one. From bca773d58671d39c2a90464368542f552b1d31a1 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 11 Apr 2021 23:47:03 +0200 Subject: [PATCH 226/417] Fix import. --- tutorial/plot_skfda_sklearn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tutorial/plot_skfda_sklearn.py b/tutorial/plot_skfda_sklearn.py index d2f06a4fe..82f5c9d30 100644 --- a/tutorial/plot_skfda_sklearn.py +++ b/tutorial/plot_skfda_sklearn.py @@ -75,7 +75,6 @@ import skfda from sklearn.model_selection import train_test_split -import sklearn X, y = skfda.datasets.fetch_growth(return_X_y=True) From 1ce1c679a0a3af5572e0025b9ef1a9de35ef78b2 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 12 Apr 2021 17:28:25 +0200 Subject: [PATCH 227/417] changes --- skfda/exploratory/visualization/representation.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index ec17fc059..cbf3cd2b9 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -54,7 +54,7 @@ def __len__(self) -> int: def _get_label_colors( n_labels: int, group_colors: Optional[Indexable[K, ColorLike]] = None, -) -> Union[np.ndarray, None]: +) -> np.ndarray: """Get the colors of each label.""" if group_colors is None: colormap = matplotlib.cm.get_cmap() @@ -76,7 +76,10 @@ def _get_color_info( group_colors: Optional[Indexable[K, ColorLike]] = None, legend: bool = False, kwargs: Any = None, -) -> Tuple[np.ndarray, Optional[List[matplotlib.patches.Patch]]]: +) -> Tuple[ + Union[ColorLike, str, None], + Optional[List[matplotlib.patches.Patch] + ]]: patches = None @@ -306,7 +309,7 @@ def plot( self.sample_colors = sample_colors - color_dict: Mapping[str, Any] = {} + color_dict: Mapping[str, Union[ColorLike, str, None]] = {} if self.fdata.dim_domain == 1: @@ -472,7 +475,7 @@ def plot( self.fdata, group, group_names, group_colors, legend, kwargs, ) - color_dict: Mapping[str, Any] = {} + color_dict: Mapping[str, Union[ColorLike, str, None]] = {} if self.fdata.dim_domain == 1: @@ -515,7 +518,7 @@ def plot( def set_color_dict( sample_colors: Any, ind: int, - color_dict: Mapping[str, Any], + color_dict: Mapping[str, Union[ColorLike, str, None]], ) -> None: """ Auxiliary method used to update color_dict. From d987473d388579c484c600ebedd2cd3281b7b4c6 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 12 Apr 2021 17:33:10 +0200 Subject: [PATCH 228/417] final changes, typed corrected and function deleted --- .../visualization/representation.py | 26 +++---------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index cbf3cd2b9..ed6d61d30 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -51,24 +51,6 @@ def __len__(self) -> int: pass -def _get_label_colors( - n_labels: int, - group_colors: Optional[Indexable[K, ColorLike]] = None, -) -> np.ndarray: - """Get the colors of each label.""" - if group_colors is None: - colormap = matplotlib.cm.get_cmap() - group_colors = colormap(np.arange(n_labels) / (n_labels - 1)) - elif len(group_colors) != n_labels: - raise ValueError( - "There must be a color in group_colors " - "for each of the labels that appear in " - "group.", - ) - - return group_colors - - def _get_color_info( fdata: T, group: Optional[Sequence[K]] = None, @@ -77,7 +59,7 @@ def _get_color_info( legend: bool = False, kwargs: Any = None, ) -> Tuple[ - Union[ColorLike, str, None], + Union[ColorLike, None], Optional[List[matplotlib.patches.Patch] ]]: @@ -309,7 +291,7 @@ def plot( self.sample_colors = sample_colors - color_dict: Mapping[str, Union[ColorLike, str, None]] = {} + color_dict: Mapping[str, Union[ColorLike, None]] = {} if self.fdata.dim_domain == 1: @@ -475,7 +457,7 @@ def plot( self.fdata, group, group_names, group_colors, legend, kwargs, ) - color_dict: Mapping[str, Union[ColorLike, str, None]] = {} + color_dict: Mapping[str, Union[ColorLike, None]] = {} if self.fdata.dim_domain == 1: @@ -518,7 +500,7 @@ def plot( def set_color_dict( sample_colors: Any, ind: int, - color_dict: Mapping[str, Union[ColorLike, str, None]], + color_dict: Mapping[str, Union[ColorLike, None]], ) -> None: """ Auxiliary method used to update color_dict. From 46fc6dc2a80f6dfb49a1db1ec899ebb4729be13d Mon Sep 17 00:00:00 2001 From: pedrorponga <32200195+pedrorponga@users.noreply.github.com> Date: Mon, 12 Apr 2021 19:18:11 +0200 Subject: [PATCH 229/417] Update _depth_classifiers.py --- skfda/ml/classification/_depth_classifiers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 1b67f7cfd..61962e9cb 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -205,6 +205,10 @@ def fit(self, X: T, y: ndarray) -> DDClassifier[T]: self._classes = classes self.class_depth_methods_ = class_depth_methods + + if (self._classes != 2) { + raise ValueError + } dd_coordinates = [ depth_method.predict(X) From 6c9aee669910d3749b426d5b63ea8e05cc10283d Mon Sep 17 00:00:00 2001 From: pedrorponga <32200195+pedrorponga@users.noreply.github.com> Date: Mon, 12 Apr 2021 19:23:46 +0200 Subject: [PATCH 230/417] Update _depth_classifiers.py --- skfda/ml/classification/_depth_classifiers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 61962e9cb..0fc0e4bb9 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -206,9 +206,8 @@ def fit(self, X: T, y: ndarray) -> DDClassifier[T]: self._classes = classes self.class_depth_methods_ = class_depth_methods - if (self._classes != 2) { + if (self._classes != 2): raise ValueError - } dd_coordinates = [ depth_method.predict(X) From 3f2f3538d6154a99de8b34467758e4d465456993 Mon Sep 17 00:00:00 2001 From: pedrorponga <32200195+pedrorponga@users.noreply.github.com> Date: Mon, 12 Apr 2021 19:36:37 +0200 Subject: [PATCH 231/417] Update _depth_classifiers.py --- skfda/ml/classification/_depth_classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 0fc0e4bb9..aeae99c21 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -206,7 +206,7 @@ def fit(self, X: T, y: ndarray) -> DDClassifier[T]: self._classes = classes self.class_depth_methods_ = class_depth_methods - if (self._classes != 2): + if (len(self._classes) != 2): raise ValueError dd_coordinates = [ From 683c4248dc875a7cef2a7c7118fdb037b6fb5a7f Mon Sep 17 00:00:00 2001 From: pedrorponga <32200195+pedrorponga@users.noreply.github.com> Date: Mon, 12 Apr 2021 20:01:40 +0200 Subject: [PATCH 232/417] Update _depth_classifiers.py --- skfda/ml/classification/_depth_classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index aeae99c21..9d3da972d 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -207,7 +207,7 @@ def fit(self, X: T, y: ndarray) -> DDClassifier[T]: self.class_depth_methods_ = class_depth_methods if (len(self._classes) != 2): - raise ValueError + raise ValueError("DDClassifier only accepts two classes.") dd_coordinates = [ depth_method.predict(X) From 27ed914636cca057fe7c83b3da9bae04eebd807d Mon Sep 17 00:00:00 2001 From: pedrorponga <32200195+pedrorponga@users.noreply.github.com> Date: Mon, 12 Apr 2021 20:44:45 +0200 Subject: [PATCH 233/417] Update _utils.py --- skfda/_utils/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 30449fb9c..fa6fd0fb9 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -523,7 +523,7 @@ def _evaluate_grid( # noqa: WPS234 axes_per_sample = cast(Iterable[GridPointsLike], axes) axes_per_sample = list(axes_per_sample) - + eval_points_tuple, shape_tuple = zip( *[ _one_grid_to_points(a, dim_domain=dim_domain) From 34d22fd90013f2af384a61d065eaadcc6d791ff2 Mon Sep 17 00:00:00 2001 From: pedrorponga <32200195+pedrorponga@users.noreply.github.com> Date: Mon, 12 Apr 2021 20:47:17 +0200 Subject: [PATCH 234/417] Update _depth_classifiers.py --- skfda/ml/classification/_depth_classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/ml/classification/_depth_classifiers.py b/skfda/ml/classification/_depth_classifiers.py index 9d3da972d..acd85adf6 100644 --- a/skfda/ml/classification/_depth_classifiers.py +++ b/skfda/ml/classification/_depth_classifiers.py @@ -205,7 +205,7 @@ def fit(self, X: T, y: ndarray) -> DDClassifier[T]: self._classes = classes self.class_depth_methods_ = class_depth_methods - + if (len(self._classes) != 2): raise ValueError("DDClassifier only accepts two classes.") From b4b8f232d1ba2312883dc8fe80639f296682654e Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 13 Apr 2021 15:48:20 +0200 Subject: [PATCH 235/417] Fix domain restriction. --- skfda/representation/_functional_data.py | 12 +++---- skfda/representation/grid.py | 45 +++++++++++++++++++++--- tests/test_registration.py | 9 +++-- 3 files changed, 52 insertions(+), 14 deletions(-) diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 916b1bbba..6776475cf 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -727,7 +727,6 @@ def shift( f"({self.n_samples})", ) - domain_range: DomainRangeLike if restrict_domain: domain = np.asarray(self.domain_range) @@ -737,9 +736,6 @@ def shift( domain = np.hstack((a, b)) domain_range = tuple(domain) - else: - domain_range = self.domain_range - if len(arr_shifts) == 1: shifted_grid_points = tuple( g + s for g, s in zip(grid_points, arr_shifts[0]) @@ -763,12 +759,16 @@ def shift( grid=True, ) - return self.to_grid().copy( + shifted = self.to_grid().copy( data_matrix=data_matrix, grid_points=grid_points, - domain_range=domain_range, ) + if restrict_domain: + shifted = shifted.restrict(domain_range) + + return shifted + def plot(self, *args: Any, **kwargs: Any) -> Any: """Plot the FDatGrid object. diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 71244e9b5..502bfa6e0 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -197,6 +197,18 @@ def __init__( # noqa: WPS211 if len(self._domain_range) != self.dim_domain: raise ValueError("Incorrect shape of domain_range.") + for domain_range, grid_points in zip( + self._domain_range, + self.grid_points, + ): + if ( + domain_range[0] > grid_points[0] + or domain_range[-1] < grid_points[-1] + ): + raise ValueError( + "Grid points must be within the domain range.", + ) + # Adjust the data matrix if the dimension of the image is one if self.data_matrix.ndim == 1 + self.dim_domain: self.data_matrix = self.data_matrix[..., np.newaxis] @@ -468,7 +480,7 @@ def _check_same_dimensions(self: T, other: T) -> None: if self.data_matrix.shape[1:-1] != other.data_matrix.shape[1:-1]: raise ValueError("Error in columns dimensions") if not np.array_equal(self.grid_points, other.grid_points): - raise ValueError("Sample points for both objects must be equal") + raise ValueError("Grid points for both objects must be equal") def sum( # noqa: WPS125 self: T, @@ -765,7 +777,7 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: ): raise ValueError( "All the FDataGrids must be sampled in the same " - "sample points.", + "grid points.", ) elif any(self.n_samples != other.n_samples for other in others): @@ -930,7 +942,7 @@ def copy( # noqa: WPS211 data_matrix = self.data_matrix if grid_points is None: - # Sample points won`t be writeable + # Grid points won`t be writeable grid_points = self.grid_points if domain_range is None: @@ -989,9 +1001,32 @@ def restrict( for ((a, b), (c, d)) in zip(domain_range, self.domain_range) ) - # We could in principle eliminate points outside the new range. + index_list = [] + new_grid_points = [] + + # Eliminate points outside the new range. + for dr, grid_points in zip( + domain_range, + self.grid_points, + ): + keep_index = ( + (dr[0] <= grid_points) + & (grid_points <= dr[1]) + ) - return self.copy(domain_range=domain_range) + index_list.append(keep_index) + + new_grid_points.append( + grid_points[keep_index], + ) + + data_matrix = self.data_matrix[tuple([slice(None)] + index_list)] + + return self.copy( + domain_range=domain_range, + grid_points=new_grid_points, + data_matrix=data_matrix, + ) def shift( self, diff --git a/tests/test_registration.py b/tests/test_registration.py index f4fdb2636..0398899b3 100644 --- a/tests/test_registration.py +++ b/tests/test_registration.py @@ -306,14 +306,17 @@ def test_template(self): np.testing.assert_array_almost_equal(fd_registered_2.data_matrix, fd_registered_4.data_matrix) - def test_restrict_domain(self): + def test_restrict_domain(self) -> None: reg = ShiftRegistration(restrict_domain=True) fd_registered_1 = reg.fit_transform(self.fd) np.testing.assert_array_almost_equal( - np.array(fd_registered_1.domain_range).round(3), [[0.022, 0.97]]) + np.array(fd_registered_1.domain_range).round(3), [[0.022, 0.969]]) - reg2 = ShiftRegistration(restrict_domain=True, template=reg.template_) + reg2 = ShiftRegistration( + restrict_domain=True, + template=reg.template_.copy(domain_range=self.fd.domain_range), + ) fd_registered_2 = reg2.fit_transform(self.fd) np.testing.assert_array_almost_equal( From 8cba80d9f5d99e09fbc2baf5618eda8c4073d7ca Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 13 Apr 2021 20:30:48 +0200 Subject: [PATCH 236/417] Fix warning. --- tutorial/plot_skfda_sklearn.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tutorial/plot_skfda_sklearn.py b/tutorial/plot_skfda_sklearn.py index 82f5c9d30..1b51bf765 100644 --- a/tutorial/plot_skfda_sklearn.py +++ b/tutorial/plot_skfda_sklearn.py @@ -267,10 +267,8 @@ # .. warning:: # Other ensemble methods, such as # :class:`~sklearn.ensemble.BaggingClassifier` or -# :class:`~sklearn.ensemble.AdaBoostClassifier` construct estimators -# that only use a subset of the features. As in :term:`FDA` the features -# are the evaluations of the functions at different points, these -# estimators cannot work with functional data unless it has been +# :class:`~sklearn.ensemble.AdaBoostClassifier` cannot yet +# be used with functional data unless it has been # transformed to a multivariate dataset. ############################################################################## From 7b491476b4b135d21731668d2e9f312c13e2bceb Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 14 Apr 2021 01:05:48 +0200 Subject: [PATCH 237/417] Fix example. --- tutorial/plot_basis_representation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorial/plot_basis_representation.py b/tutorial/plot_basis_representation.py index bf3b626f4..357481d0f 100644 --- a/tutorial/plot_basis_representation.py +++ b/tutorial/plot_basis_representation.py @@ -174,7 +174,7 @@ X_basis = X.to_basis(basis) ax = axes.ravel()[n_basis - 1] - fig = X_basis.plot(axes=ax) + fig = X_basis.plot(ax=ax) ax.set_title(f"{n_basis} basis functions") fig.tight_layout() From aec9b182c23f619ce2758f7e9fb059f7df00dcec Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 14 Apr 2021 12:36:58 +0200 Subject: [PATCH 238/417] Modified tutorial. --- tutorial/plot_basis_representation.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tutorial/plot_basis_representation.py b/tutorial/plot_basis_representation.py index 357481d0f..5a73e2428 100644 --- a/tutorial/plot_basis_representation.py +++ b/tutorial/plot_basis_representation.py @@ -189,6 +189,14 @@ # Thus, it is recommended to take a look at the available basis in order to # pick one that provides the best representation of the original data. +############################################################################## +# First we will load a dataset to test the basis representations + +X, y = skfda.datasets.fetch_phoneme(return_X_y=True) + +# Select only the first 5 samples +X = X[:5] + ############################################################################## # Monomial basis # ^^^^^^^^^^^^^^ From 7df887a7c935cf930fd1633c7a14282aa3034695 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 14 Apr 2021 13:00:42 +0200 Subject: [PATCH 239/417] Plot the dataset. --- tutorial/plot_basis_representation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tutorial/plot_basis_representation.py b/tutorial/plot_basis_representation.py index 5a73e2428..1c02c6f16 100644 --- a/tutorial/plot_basis_representation.py +++ b/tutorial/plot_basis_representation.py @@ -190,10 +190,12 @@ # pick one that provides the best representation of the original data. ############################################################################## -# First we will load a dataset to test the basis representations +# First we will load a dataset to test the basis representations. X, y = skfda.datasets.fetch_phoneme(return_X_y=True) +X.plot() + # Select only the first 5 samples X = X[:5] From 6d1e8394230d79602d885c43225ecbfe740f6cfd Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 14 Apr 2021 13:41:31 +0200 Subject: [PATCH 240/417] Fix plot. --- tutorial/plot_basis_representation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorial/plot_basis_representation.py b/tutorial/plot_basis_representation.py index 1c02c6f16..ac85e8adf 100644 --- a/tutorial/plot_basis_representation.py +++ b/tutorial/plot_basis_representation.py @@ -194,11 +194,11 @@ X, y = skfda.datasets.fetch_phoneme(return_X_y=True) -X.plot() - # Select only the first 5 samples X = X[:5] +X.plot() + ############################################################################## # Monomial basis # ^^^^^^^^^^^^^^ From 67e19003a53092098188d86205dfa09187109655 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 14 Apr 2021 15:55:32 +0200 Subject: [PATCH 241/417] added base plot (not commented) --- skfda/exploratory/visualization/__init__.py | 1 + skfda/exploratory/visualization/_baseplot.py | 48 ++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 skfda/exploratory/visualization/_baseplot.py diff --git a/skfda/exploratory/visualization/__init__.py b/skfda/exploratory/visualization/__init__.py index 12901e183..3cf3414d5 100644 --- a/skfda/exploratory/visualization/__init__.py +++ b/skfda/exploratory/visualization/__init__.py @@ -1,6 +1,7 @@ """Initialization module of visualization folder.""" from . import clustering, representation +from ._baseplot import BasePlot from ._boxplot import Boxplot, SurfaceBoxplot from ._ddplot import DDPlot from ._magnitude_shape_plot import MagnitudeShapePlot diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py new file mode 100644 index 000000000..a784a4d75 --- /dev/null +++ b/skfda/exploratory/visualization/_baseplot.py @@ -0,0 +1,48 @@ +from abc import ABC, abstractmethod +from typing import List, Optional, Sequence, TypeVar, Union + +import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure + +from ._utils import _figure_to_svg + +S = TypeVar('S', Figure, Axes, List[Axes]) + + +class BasePlot(ABC): + @abstractmethod + def __init__( + self, + ) -> None: + self.id_function = [] + + @abstractmethod + def plot( + self, + ) -> Figure: + pass + + @abstractmethod + def num_instances(self) -> int: + pass + + @abstractmethod + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + ) -> None: + pass + + def clear_ax(self) -> None: + for ax in self.axes: + ax.clear() + if len(self.id_function) != 0: + self.id_function = [] + + def _repr_svg_(self): + self.fig = self.plot() + plt.close(self.fig) + return _figure_to_svg(self.fig) From 9c88e25327a488a1369a746e7de2ccdf93a92912 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 14 Apr 2021 16:31:53 +0200 Subject: [PATCH 242/417] baseplot ended all commented --- skfda/exploratory/visualization/_baseplot.py | 58 ++++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index a784a4d75..f39a7d6a6 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -1,5 +1,12 @@ +"""BasePlot Module. + +This module contains the abstract class of which inherit all +the visualization modules, containing the basic functionality +common to all of them. +""" + from abc import ABC, abstractmethod -from typing import List, Optional, Sequence, TypeVar, Union +from typing import Optional, Sequence, Union import matplotlib.pyplot as plt from matplotlib.axes import Axes @@ -7,24 +14,43 @@ from ._utils import _figure_to_svg -S = TypeVar('S', Figure, Axes, List[Axes]) - class BasePlot(ABC): + """ + BasePlot class. + + Attributes: + id_function: list of PathCollection objects corresponding + to every instance of our plot. They will be used to modify + the visualization with interactivity and widgets. + fig: figure over with the graph is plotted. + axes: axis where the graph is plotted. + """ + @abstractmethod def __init__( self, ) -> None: self.id_function = [] + self.axes = None + self.fig = None @abstractmethod def plot( self, ) -> Figure: + """ + Abstract method used to plot the object and its data. + + Returns: + Figure: figure object in which the displays and + widgets will be plotted. + """ pass @abstractmethod def num_instances(self) -> int: + """Get the number of instances that will be used for interactivity.""" pass @abstractmethod @@ -34,15 +60,39 @@ def set_figure_and_axes( fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, ) -> None: + """ + Initialize the axes and fig of the plot. + + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + """ pass def clear_ax(self) -> None: + """ + Resets the basic attributes of the BasePlot. + + Clear the old axes of the BasePlot and reset the + id_function list. + """ for ax in self.axes: ax.clear() if len(self.id_function) != 0: self.id_function = [] - def _repr_svg_(self): + def _repr_svg_(self) -> str: + """ + Automatically represents the object as an svg when calling it. + + Returns: + str: string containing the xml code used to get the svg. + """ self.fig = self.plot() plt.close(self.fig) return _figure_to_svg(self.fig) From 2ba18326de4f689f025c0bd6aed64bedfcf5c0ec Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 14 Apr 2021 16:34:55 +0200 Subject: [PATCH 243/417] corrected imperative --- skfda/exploratory/visualization/_baseplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index f39a7d6a6..1a60fddb3 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -76,7 +76,7 @@ def set_figure_and_axes( def clear_ax(self) -> None: """ - Resets the basic attributes of the BasePlot. + Reset the basic attributes of the BasePlot. Clear the old axes of the BasePlot and reset the id_function list. From 050693968de397c0f448d377b19ca3c80db016e7 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 14 Apr 2021 16:43:22 +0200 Subject: [PATCH 244/417] solved style errors --- skfda/exploratory/visualization/_baseplot.py | 18 +++++------------- skfda/exploratory/visualization/_utils.py | 2 +- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 1a60fddb3..4f871a9ca 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -6,9 +6,10 @@ """ from abc import ABC, abstractmethod -from typing import Optional, Sequence, Union +from typing import List, Optional, Sequence, Union import matplotlib.pyplot as plt +from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -20,20 +21,16 @@ class BasePlot(ABC): BasePlot class. Attributes: - id_function: list of PathCollection objects corresponding + id_function: list of Artist objects corresponding to every instance of our plot. They will be used to modify the visualization with interactivity and widgets. - fig: figure over with the graph is plotted. - axes: axis where the graph is plotted. """ @abstractmethod def __init__( self, ) -> None: - self.id_function = [] - self.axes = None - self.fig = None + self.id_function: List[Artist] = [] @abstractmethod def plot( @@ -87,12 +84,7 @@ def clear_ax(self) -> None: self.id_function = [] def _repr_svg_(self) -> str: - """ - Automatically represents the object as an svg when calling it. - - Returns: - str: string containing the xml code used to get the svg. - """ + """Automatically represents the object as an svg when calling it.""" self.fig = self.plot() plt.close(self.fig) return _figure_to_svg(self.fig) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 77f590c0c..72e20db9c 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -34,7 +34,7 @@ def _create_figure(): return fig -def _figure_to_svg(figure): +def _figure_to_svg(figure: Figure) -> str: """Return the SVG representation of a figure.""" old_canvas = figure.canvas From 18d975e6018c075eab418ab756037298a22fc4ce Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 14 Apr 2021 16:49:09 +0200 Subject: [PATCH 245/417] solved axes --- skfda/exploratory/visualization/_baseplot.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 4f871a9ca..ed5441139 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -24,6 +24,8 @@ class BasePlot(ABC): id_function: list of Artist objects corresponding to every instance of our plot. They will be used to modify the visualization with interactivity and widgets. + fig: figure over with the graphs are plotted. + axes: sequence of axes where the graphs are plotted. """ @abstractmethod @@ -31,6 +33,8 @@ def __init__( self, ) -> None: self.id_function: List[Artist] = [] + self.fig: Figure = None + self.axes: Sequence[Axes] = [] @abstractmethod def plot( From 3246b8b77626f8afe209f8ac231ed18fcbb362a2 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 16 Apr 2021 13:16:13 +0200 Subject: [PATCH 246/417] changed optional --- skfda/exploratory/visualization/_baseplot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index ed5441139..280d42365 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -21,11 +21,11 @@ class BasePlot(ABC): BasePlot class. Attributes: - id_function: list of Artist objects corresponding + id_function: List of Artist objects corresponding to every instance of our plot. They will be used to modify the visualization with interactivity and widgets. - fig: figure over with the graphs are plotted. - axes: sequence of axes where the graphs are plotted. + fig: Figure over with the graphs are plotted. + axes: Sequence of axes where the graphs are plotted. """ @abstractmethod @@ -33,7 +33,7 @@ def __init__( self, ) -> None: self.id_function: List[Artist] = [] - self.fig: Figure = None + self.fig: Optional[Figure] = None self.axes: Sequence[Axes] = [] @abstractmethod From d4b6c53c48a318cf25c6a2f35962cced71d58def Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 16 Apr 2021 14:35:40 +0200 Subject: [PATCH 247/417] Add overloads for anova. --- skfda/inference/anova/_anova_oneway.py | 98 ++++++++++++++++++-------- 1 file changed, 69 insertions(+), 29 deletions(-) diff --git a/skfda/inference/anova/_anova_oneway.py b/skfda/inference/anova/_anova_oneway.py index 41fe8f5d6..aa313843f 100644 --- a/skfda/inference/anova/_anova_oneway.py +++ b/skfda/inference/anova/_anova_oneway.py @@ -1,17 +1,23 @@ -from typing import List, Tuple, Union +from __future__ import annotations + +from typing import List, Tuple, Union, overload import numpy as np from sklearn.utils import check_random_state +from typing_extensions import Literal from ... import concatenate from ..._utils import RandomStateLike from ...datasets import make_gaussian_process from ...misc.metrics import lp_distance from ...representation import FData, FDataGrid +from ...representation._typing import ArrayLike -def v_sample_stat(fd: FData, weights: List[int], p: int = 2) -> float: +def v_sample_stat(fd: FData, weights: ArrayLike, p: int = 2) -> float: r""" + Compute sample statistic. + Calculates a statistic that measures the variability between groups of samples in a :class:`skfda.representation.FData` object. @@ -46,7 +52,6 @@ def v_sample_stat(fd: FData, weights: List[int], p: int = 2) -> float: ValueError Examples: - >>> from skfda.inference.anova import v_sample_stat >>> from skfda.representation.grid import FDataGrid >>> import numpy as np @@ -70,8 +75,8 @@ def v_sample_stat(fd: FData, weights: List[int], p: int = 2) -> float: [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An anova test for functional data". *Computational Statistics Data Analysis*, 47:111-112, 02 2004 - """ + """ weights = np.asarray(weights) if not isinstance(fd, FData): raise ValueError("Argument type must inherit FData.") @@ -83,8 +88,10 @@ def v_sample_stat(fd: FData, weights: List[int], p: int = 2) -> float: return np.sum(coef * lp_distance(fd[t_ind[0]], fd[t_ind[1]], p=p) ** p) -def v_asymptotic_stat(fd: FData, weights: List[int], p: int = 2) -> float: +def v_asymptotic_stat(fd: FData, weights: ArrayLike, p: int = 2) -> float: r""" + Compute asymptitic statistic. + Calculates a statistic that measures the variability between groups of samples in a :class:`skfda.representation.FData` object. @@ -119,7 +126,6 @@ def v_asymptotic_stat(fd: FData, weights: List[int], p: int = 2) -> float: ValueError Examples: - >>> from skfda.inference.anova import v_asymptotic_stat >>> from skfda.representation.grid import FDataGrid >>> import numpy as np @@ -143,6 +149,7 @@ def v_asymptotic_stat(fd: FData, weights: List[int], p: int = 2) -> float: [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An anova test for functional data". *Computational Statistics Data Analysis*, 47:111-112, 02 2004 + """ weights = np.asarray(weights) if not isinstance(fd, FData): @@ -173,8 +180,9 @@ def _anova_bootstrap( for fd in fd_grouped[1:]: if not np.array_equal(fd.domain_range, fd_grouped[0].domain_range): - raise ValueError("Domain range must match for every FData in " - "fd_grouped.") + raise ValueError( + "Domain range must match for every FData in fd_grouped.", + ) start, stop = fd_grouped[0].domain_range[0] @@ -197,10 +205,17 @@ def _anova_bootstrap( # Simulating n_reps observations for each of the n_groups gaussian # processes - sim = [make_gaussian_process(n_reps, n_features=n_features, start=start, - stop=stop, cov=k_est[i], - random_state=random_state) - for i in range(n_groups)] + sim = [ + make_gaussian_process( + n_reps, + n_features=n_features, + start=start, + stop=stop, + cov=k_est[i], + random_state=random_state, + ) + for i in range(n_groups) + ] v_samples = np.empty(n_reps) for i in range(n_reps): @@ -209,6 +224,30 @@ def _anova_bootstrap( return v_samples +@overload +def oneway_anova( + *args: FData, + n_reps: int = 2000, + return_dist: Literal[False] = False, + random_state: RandomStateLike = None, + p: int = 2, + equal_var: bool = True, +) -> Tuple[float, float]: + pass + + +@overload +def oneway_anova( + *args: FData, + n_reps: int = 2000, + return_dist: Literal[True], + random_state: RandomStateLike = None, + p: int = 2, + equal_var: bool = True, +) -> Tuple[float, float, np.ndarray]: + pass + + def oneway_anova( *args: FData, n_reps: int = 2000, @@ -218,7 +257,7 @@ def oneway_anova( equal_var: bool = True, ) -> Union[Tuple[float, float], Tuple[float, float, np.ndarray]]: r""" - Performs one-way functional ANOVA. + Perform one-way functional ANOVA. This function implements an asymptotic method to test the following null hypothesis: @@ -249,20 +288,15 @@ def oneway_anova( Args: args: The sample measurements for each each group. - n_reps: Number of simulations for the bootstrap procedure. Defaults to 2000 (This value may change in future versions). - return_dist: Flag to indicate if the function should return a numpy.array with the sampling distribution simulated. - random_state: Random state. - p: p of the lp norm. Must be greater or equal than 1. If p='inf' or p=np.inf it is used the L infinity metric. Defaults to 2. - equal_var: If True (default), perform a One-way ANOVA assuming the same covariance operator for all the groups, else considers an independent covariance operator for each group. @@ -293,8 +327,8 @@ def oneway_anova( [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An anova test for functional data". *Computational Statistics Data Analysis*, 47:111-112, 02 2004 - """ + """ if len(args) < 2: raise ValueError("At least two groups must be passed as parameter.") if not all(isinstance(fd, FData) for fd in args): @@ -303,7 +337,7 @@ def oneway_anova( raise ValueError("Number of simulations must be positive.") fd_groups = args - if not all([isinstance(fd, type(fd_groups[0])) for fd in fd_groups[1:]]): + if not all(isinstance(fd, type(fd_groups[0])) for fd in fd_groups[1:]): raise TypeError('Found mixed FData types in arguments.') for fd in fd_groups[1:]: @@ -314,14 +348,16 @@ def oneway_anova( # Creating list with all the sample points list_sample = [fd.grid_points[0].tolist() for fd in fd_groups] # Checking that the all the entries in the list are the same - if not list_sample.count(list_sample[0]) == len(list_sample): - raise ValueError("All FDataGrid passed must have the same sample " - "points.") + if list_sample.count(list_sample[0]) != len(list_sample): + raise ValueError( + "All FDataGrid passed must have the same grid points.", + ) else: # If type is FDataBasis, check same basis list_basis = [fd.basis for fd in fd_groups] - if not list_basis.count(list_basis[0]) == len(list_basis): - raise NotImplementedError("Not implemented for FDataBasis with " - "different basis.") + if list_basis.count(list_basis[0]) != len(list_basis): + raise NotImplementedError( + "Not implemented for FDataBasis with different basis.", + ) # FData where each sample is the mean of each group fd_means = concatenate([fd.mean() for fd in fd_groups]) @@ -330,9 +366,13 @@ def oneway_anova( vn = v_sample_stat(fd_means, [fd.n_samples for fd in fd_groups], p=p) # Computing sampling distribution - simulation = _anova_bootstrap(fd_groups, n_reps, - random_state=random_state, p=p, - equal_var=equal_var) + simulation = _anova_bootstrap( + fd_groups, + n_reps, + random_state=random_state, + p=p, + equal_var=equal_var, + ) p_value = np.sum(simulation > vn) / len(simulation) From 65800d996b9ce746826d723ffac4fff13e412da1 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 18 Apr 2021 21:55:35 +0200 Subject: [PATCH 248/417] Typing and style for Hotelling tests. --- skfda/inference/hotelling/__init__.py | 6 +- .../hotelling/{hotelling.py => _hotelling.py} | 197 +++++++++++------- skfda/representation/_functional_data.py | 3 +- skfda/representation/basis/_fdatabasis.py | 2 +- skfda/representation/grid.py | 3 +- 5 files changed, 125 insertions(+), 86 deletions(-) rename skfda/inference/hotelling/{hotelling.py => _hotelling.py} (57%) diff --git a/skfda/inference/hotelling/__init__.py b/skfda/inference/hotelling/__init__.py index 6498f54bc..d637b2634 100644 --- a/skfda/inference/hotelling/__init__.py +++ b/skfda/inference/hotelling/__init__.py @@ -1,2 +1,4 @@ -from . import hotelling -from .hotelling import hotelling_t2, hotelling_test_ind +""" +Hotelling statistic and test. +""" +from ._hotelling import hotelling_t2, hotelling_test_ind diff --git a/skfda/inference/hotelling/hotelling.py b/skfda/inference/hotelling/_hotelling.py similarity index 57% rename from skfda/inference/hotelling/hotelling.py rename to skfda/inference/hotelling/_hotelling.py index f5fde264a..24258956a 100644 --- a/skfda/inference/hotelling/hotelling.py +++ b/skfda/inference/hotelling/_hotelling.py @@ -1,69 +1,79 @@ -from skfda.representation import FDataBasis, FData -import numpy as np import itertools -import scipy +from typing import Optional, Tuple, Union, overload + +import numpy as np from sklearn.utils import check_random_state +from typing_extensions import Literal +import scipy.special -def hotelling_t2(fd1, fd2): +from ..._utils import RandomStateLike +from ...representation import FData, FDataBasis + + +def hotelling_t2( + fd1: FData, + fd2: FData, +) -> float: r""" - Calculates Hotelling's :math:`T^2` over two samples in - :class:`skfda.representation.FData` objects with sizes :math:`n_1` - and :math:`n_2`. - - .. math:: - T^2 = n(\mathbf{m}_1 - \mathbf{m}_2)^\top \mathbf{W}^{1/2}( - \mathbf{W}^{1/2}\mathbf{K_{\operatorname{pooled}}} \mathbf{W}^{ - 1/2})^+ - \mathbf{W}^{1/2} (\mathbf{m}_1 - \mathbf{m}_2), - - where :math:`(\cdot)^{+}` indicates the Moore-Penrose pseudo-inverse - operator, :math:`n=n_1+n_2`, `W` is Gram matrix (identity in case of - discretized data), :math:`\mathbf{m}_1, \mathbf{m}_2` are the - means of each ample and :math:`\mathbf{K}_{\operatorname{pooled}}` - matrix is defined as - - .. math:: - \mathbf{K}_{\operatorname{pooled}} := - \cfrac{n_1 - 1}{n_1 + n_2 - 2} \mathbf{K}_{n_1} + - \cfrac{n_2 - 1}{n_1 + n_2 - 2} \mathbf{K}_{n_2}, - - where :math:`\mathbf{K}_{n_1}`, :math:`\mathbf{K}_{n_2}` are the sample - covariance matrices, computed with the basis coefficients or using - the discrete representation, depending on the input. - - This statistic is defined in Pini, Stamm and Vantini[1]. - - Args: - fd1 (FData): Object with the first sample. - fd2 (FData): Object containing second sample. - - Returns: - The value of the statistic. - - Raises: - TypeError. - - Examples: - - >>> from skfda.inference.hotelling import hotelling_t2 - >>> from skfda.representation import FDataGrid, basis - - >>> fd1 = FDataGrid([[1, 1, 1], [3, 3, 3]]) - >>> fd2 = FDataGrid([[3, 3, 3], [5, 5, 5]]) - >>> '%.2f' % hotelling_t2(fd1, fd2) - '2.00' - >>> fd1 = fd1.to_basis(basis.Fourier(n_basis=3)) - >>> fd2 = fd2.to_basis(basis.Fourier(n_basis=3)) - >>> '%.2f' % hotelling_t2(fd1, fd2) - '2.00' - - References: - [1] A. Pini, A. Stamm and S. Vantini, "Hotelling's t2 in - separable hilbert spaces", *Jounal of Multivariate Analysis*, - 167 (2018), pp.284-305. - - """ + Compute Hotelling's :math:`T^2` statistic. + + Calculates Hotelling's :math:`T^2` over two samples in + :class:`skfda.representation.FData` objects with sizes :math:`n_1` + and :math:`n_2`. + + .. math:: + T^2 = n(\mathbf{m}_1 - \mathbf{m}_2)^\top \mathbf{W}^{1/2}( + \mathbf{W}^{1/2}\mathbf{K_{\operatorname{pooled}}} \mathbf{W}^{ + 1/2})^+ + \mathbf{W}^{1/2} (\mathbf{m}_1 - \mathbf{m}_2), + + where :math:`(\cdot)^{+}` indicates the Moore-Penrose pseudo-inverse + operator, :math:`n=n_1+n_2`, `W` is Gram matrix (identity in case of + discretized data), :math:`\mathbf{m}_1, \mathbf{m}_2` are the + means of each ample and :math:`\mathbf{K}_{\operatorname{pooled}}` + matrix is defined as + + .. math:: + \mathbf{K}_{\operatorname{pooled}} := + \cfrac{n_1 - 1}{n_1 + n_2 - 2} \mathbf{K}_{n_1} + + \cfrac{n_2 - 1}{n_1 + n_2 - 2} \mathbf{K}_{n_2}, + + where :math:`\mathbf{K}_{n_1}`, :math:`\mathbf{K}_{n_2}` are the sample + covariance matrices, computed with the basis coefficients or using + the discrete representation, depending on the input. + + This statistic is defined in Pini, Stamm and Vantini[1]. + + Args: + fd1: Object with the first sample. + fd2: Object containing second sample. + + Returns: + The value of the statistic. + + Raises: + TypeError. + + Examples: + >>> from skfda.inference.hotelling import hotelling_t2 + >>> from skfda.representation import FDataGrid, basis + + >>> fd1 = FDataGrid([[1, 1, 1], [3, 3, 3]]) + >>> fd2 = FDataGrid([[3, 3, 3], [5, 5, 5]]) + >>> '%.2f' % hotelling_t2(fd1, fd2) + '2.00' + >>> fd1 = fd1.to_basis(basis.Fourier(n_basis=3)) + >>> fd2 = fd2.to_basis(basis.Fourier(n_basis=3)) + >>> '%.2f' % hotelling_t2(fd1, fd2) + '2.00' + + References: + [1] A. Pini, A. Stamm and S. Vantini, "Hotelling's t2 in + separable hilbert spaces", *Jounal of Multivariate Analysis*, + 167 (2018), pp.284-305. + + """ if not isinstance(fd1, FData): raise TypeError("Argument type must inherit FData.") @@ -76,8 +86,9 @@ def hotelling_t2(fd1, fd2): if isinstance(fd1, FDataBasis): if fd1.basis != fd2.basis: - raise ValueError("Both FDataBasis objects must share the same " - "basis.") + raise ValueError( + "Both FDataBasis objects must share the same basis.", + ) # When working on basis representation we use the coefficients m = m.coefficients[0] k1 = np.cov(fd1.coefficients, rowvar=False) @@ -95,6 +106,8 @@ def hotelling_t2(fd1, fd2): k_pool = ((n1 - 1) * k1 + (n2 - 1) * k2) / (n - 2) # Combination of covs if isinstance(fd1, FDataBasis): + assert weights is not None + # Product of pooled covariance with the weights and Moore-Penrose inv. k_inv = np.linalg.pinv(np.linalg.multi_dot([weights, k_pool, weights])) k_inv = weights.dot(k_inv).dot(weights) @@ -102,12 +115,44 @@ def hotelling_t2(fd1, fd2): # If data is discrete no weights are needed k_inv = np.linalg.pinv(k_pool) - return n1 * n2 / n * m.T.dot(k_inv).dot(m)[0][0] - + return float(n1 * n2 / n * m.T.dot(k_inv).dot(m)[0][0]) + + +@overload +def hotelling_test_ind( + fd1: FData, + fd2: FData, + *, + n_reps: Optional[int] = None, + random_state: RandomStateLike = None, + return_dist: Literal[False] = False, +) -> Tuple[float, float]: + pass + + +@overload +def hotelling_test_ind( + fd1: FData, + fd2: FData, + *, + n_reps: Optional[int] = None, + random_state: RandomStateLike = None, + return_dist: Literal[True], +) -> Tuple[float, float, np.ndarray]: + pass + + +def hotelling_test_ind( + fd1: FData, + fd2: FData, + *, + n_reps: Optional[int] = None, + random_state: RandomStateLike = None, + return_dist: bool = False, +) -> Union[Tuple[float, float], Tuple[float, float, np.ndarray]]: + """ + Compute Hotelling :math:`T^2`-test. -def hotelling_test_ind(fd1, fd2, *, n_reps=None, random_state=None, - return_dist=False): - r""" Calculate the :math:`T^2`-test for the means of two independent samples of functional data. @@ -123,26 +168,20 @@ def hotelling_test_ind(fd1, fd2, *, n_reps=None, random_state=None, This procedure is from Pini, Stamm and Vantinni[1]. Args: - fd1,fd2 (FData): Samples of data. The FData objects must have the same + fd1: First sample of data. + fd2: Second sample of data. The data objects must have the same type. - - n_reps (int, optional): Maximum number of repetitions to compute + n_reps: Maximum number of repetitions to compute p-value. Default value is None. - - random_state (optional): Random state. - - return_dist (bool, optional): Flag to indicate if the function should + random_state: Random state. + return_dist: Flag to indicate if the function should return a numpy.array with the values of the statistic computed over each permutation. - Returns: Value of the sample statistic, one tailed p-value and a collection of statistic values from permutations of the sample. - Return type: - (float, float, numpy.array) - Raises: TypeError: In case of bad arguments. diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index dd00b5cbc..f44236a11 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -30,7 +30,6 @@ from ._typing import ( ArrayLike, DomainRange, - DomainRangeLike, GridPointsLike, LabelTuple, LabelTupleLike, @@ -948,7 +947,7 @@ def compose( pass @abstractmethod - def __getitem__(self: T, key: Union[int, slice]) -> T: + def __getitem__(self: T, key: Union[int, slice, np.ndarray]) -> T: """Return self[key].""" pass diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 385a13001..d8d3e4574 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -767,7 +767,7 @@ def compose( return composition - def __getitem__(self: T, key: Union[int, slice]) -> T: + def __getitem__(self: T, key: Union[int, slice, np.ndarray]) -> T: """Return self[key].""" key = _check_array_key(self.coefficients, key) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 7df386a2a..9622c1007 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -26,7 +26,6 @@ import numpy as np import pandas.api.extensions from matplotlib.figure import Figure -from typing_extensions import Literal import scipy.stats.mstats @@ -1237,7 +1236,7 @@ def __repr__(self) -> str: '\n ', ) - def __getitem__(self: T, key: Union[int, slice]) -> T: + def __getitem__(self: T, key: Union[int, slice, np.ndarray]) -> T: """Return self[key].""" key = _check_array_key(self.data_matrix, key) From 75e2d05b6b1901d8219d2564ab17adf97620834f Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 18 Apr 2021 23:45:43 +0200 Subject: [PATCH 249/417] Small fixes anova. --- skfda/inference/anova/_anova_oneway.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/skfda/inference/anova/_anova_oneway.py b/skfda/inference/anova/_anova_oneway.py index aa313843f..39ef66687 100644 --- a/skfda/inference/anova/_anova_oneway.py +++ b/skfda/inference/anova/_anova_oneway.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import List, Tuple, Union, overload +from typing import Tuple, Union, overload import numpy as np from sklearn.utils import check_random_state @@ -85,7 +85,13 @@ def v_sample_stat(fd: FData, weights: ArrayLike, p: int = 2) -> float: t_ind = np.tril_indices(fd.n_samples, -1) coef = weights[t_ind[1]] - return np.sum(coef * lp_distance(fd[t_ind[0]], fd[t_ind[1]], p=p) ** p) + return float(np.sum( + coef * lp_distance( + fd[t_ind[0]], + fd[t_ind[1]], + p=p, + ) ** p, + )) def v_asymptotic_stat(fd: FData, weights: ArrayLike, p: int = 2) -> float: @@ -163,7 +169,7 @@ def v_asymptotic_stat(fd: FData, weights: ArrayLike, p: int = 2) -> float: coef = np.sqrt(weights[t_ind[1]] / weights[t_ind[0]]) left_fd = fd[t_ind[1]] right_fd = fd[t_ind[0]] * coef - return np.sum(lp_distance(left_fd, right_fd, p=p) ** p) + return float(np.sum(lp_distance(left_fd, right_fd, p=p) ** p)) def _anova_bootstrap( From 01bd17f3ec807c06382ed2e0f75e335b8a3fa6e9 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 19 Apr 2021 20:26:35 +0200 Subject: [PATCH 250/417] Typing and style for RKHS-VS. --- .../dim_reduction/variable_selection/_rkvs.py | 144 +++++++++++------- 1 file changed, 87 insertions(+), 57 deletions(-) diff --git a/skfda/preprocessing/dim_reduction/variable_selection/_rkvs.py b/skfda/preprocessing/dim_reduction/variable_selection/_rkvs.py index afd9119cc..e85b0cf76 100644 --- a/skfda/preprocessing/dim_reduction/variable_selection/_rkvs.py +++ b/skfda/preprocessing/dim_reduction/variable_selection/_rkvs.py @@ -1,28 +1,37 @@ -import sklearn.utils.validation +from __future__ import annotations + +from typing import Tuple import numpy as np import numpy.linalg as linalg +import sklearn.utils.validation +from ...._utils import _classifier_get_classes from ....representation import FDataGrid -def _rkhs_vs(X, Y, n_features_to_select: int=1): - ''' - Parameters - ---------- - X - Matrix of trajectories - Y - Vector of class labels - n_features_to_select - Number of selected features - ''' +def _rkhs_vs( + X: np.ndarray, + Y: np.ndarray, + n_features_to_select: int = 1, +) -> Tuple[np.ndarray, np.ndarray]: + """ + RKHS-VS implementation. + + Parameters: + X: Matrix of trajectories + Y: Vector of class labels + n_features_to_select: Number of selected features + + Returns: + Selected features and vector of scores. + """ X = np.atleast_2d(X) assert n_features_to_select >= 1 assert n_features_to_select <= X.shape[1] - Y = np.asarray(Y) + _, Y = _classifier_get_classes(Y) selected_features = np.zeros(n_features_to_select, dtype=int) score = np.zeros(n_features_to_select) @@ -32,8 +41,10 @@ def _rkhs_vs(X, Y, n_features_to_select: int=1): class_1_trajectories = X[Y.ravel() == 1] class_0_trajectories = X[Y.ravel() == 0] - means = (np.mean(class_1_trajectories, axis=0) - - np.mean(class_0_trajectories, axis=0)) + means = ( + np.mean(class_1_trajectories, axis=0) + - np.mean(class_0_trajectories, axis=0) + ) class_1_count = sum(Y) class_0_count = Y.shape[0] - class_1_count @@ -44,9 +55,12 @@ def _rkhs_vs(X, Y, n_features_to_select: int=1): # The result should be casted to 2D because of bug #11502 in numpy variances = ( class_1_proportion * np.atleast_2d( - np.cov(class_1_trajectories, rowvar=False, bias=True)) + - class_0_proportion * np.atleast_2d( - np.cov(class_0_trajectories, rowvar=False, bias=True))) + np.cov(class_1_trajectories, rowvar=False, bias=True), + ) + + class_0_proportion * np.atleast_2d( + np.cov(class_0_trajectories, rowvar=False, bias=True), + ) + ) # The first variable maximizes |mu(t)|/sigma(t) mu_sigma = np.abs(means) / np.sqrt(np.diag(variances)) @@ -59,14 +73,18 @@ def _rkhs_vs(X, Y, n_features_to_select: int=1): aux = np.zeros_like(indexes, dtype=np.float_) for j in range(0, indexes.shape[0]): - new_selection = np.concatenate([selected_features[0:i], - [indexes[j]]]) + new_selection = np.concatenate([ + selected_features[:i], + [indexes[j]], + ]) new_means = np.atleast_2d(means[new_selection]) lstsq_solution = linalg.lstsq( variances[new_selection[:, np.newaxis], new_selection], - new_means.T, rcond=None)[0] + new_means.T, + rcond=None, + )[0] aux[j] = new_means @ lstsq_solution @@ -78,9 +96,11 @@ def _rkhs_vs(X, Y, n_features_to_select: int=1): return selected_features, score -class RKHSVariableSelection(sklearn.base.BaseEstimator, - sklearn.base.TransformerMixin): - r''' +class RKHSVariableSelection( + sklearn.base.BaseEstimator, # type: ignore + sklearn.base.TransformerMixin, # type: ignore +): + r""" Reproducing kernel variable selection. This is a filter variable selection method for binary classification @@ -114,11 +134,9 @@ class RKHSVariableSelection(sklearn.base.BaseEstimator, a greedy approach, so this optimality is not always guaranteed. Parameters: - - n_features_to_select (int): number of features to select. + n_features_to_select: number of features to select. Examples: - >>> from skfda.preprocessing.dim_reduction import variable_selection >>> from skfda.datasets import make_gaussian_process >>> import skfda @@ -166,25 +184,30 @@ class RKHSVariableSelection(sklearn.base.BaseEstimator, (10000, 3) References: - .. [1] J. R. Berrendero, A. Cuevas, and J. L. Torrecilla, «On the Use of Reproducing Kernel Hilbert Spaces in Functional Classification», Journal of the American Statistical Association, vol. 113, no. 523, pp. 1210-1218, jul. 2018, doi: 10.1080/01621459.2017.1320287. - ''' + """ - def __init__(self, n_features_to_select: int=1): + def __init__(self, n_features_to_select: int = 1) -> None: self.n_features_to_select = n_features_to_select - def fit(self, X: FDataGrid, y): + def fit( # noqa: D102 + self, + X: FDataGrid, + y: np.ndarray, + ) -> RKHSVariableSelection: n_unique_labels = len(np.unique(y)) if n_unique_labels != 2: - raise ValueError(f"RK-VS can only be used when there are only " - f"two different labels, but there are " - f"{n_unique_labels}") + raise ValueError( + f"RK-VS can only be used when there are only " + f"two different labels, but there are " + f"{n_unique_labels}", + ) if X.dim_domain != 1 or X.dim_codomain != 1: raise ValueError("Domain and codomain dimensions must be 1") @@ -193,50 +216,57 @@ def fit(self, X: FDataGrid, y): self._features_shape_ = X.shape[1:] - self._features_, self._scores_ = _rkhs_vs( + features, scores = _rkhs_vs( X=X, Y=y, - n_features_to_select=self.n_features_to_select) + n_features_to_select=self.n_features_to_select, + ) + + self._features_ = features + self._scores_ = scores return self - def transform(self, X: FDataGrid, Y=None): + def transform( # noqa: D102 + self, + X: FDataGrid, + Y: None = None, + ) -> np.ndarray: sklearn.utils.validation.check_is_fitted(self) X_matrix = sklearn.utils.validation.check_array(X.data_matrix[..., 0]) if X_matrix.shape[1:] != self._features_shape_: - raise ValueError("The trajectories have a different number of " - "points than the ones fitted") + raise ValueError( + "The trajectories have a different number of " + "points than the ones fitted", + ) return X_matrix[:, self._features_] - def get_support(self, indices: bool=False): + def get_support(self, indices: bool = False) -> np.ndarray: """ - Get a mask, or integer index, of the features selected + Get a mask, or integer index, of the features selected. Parameters: - - indices : boolean (default False) - If True, the return value will be an array of integers, rather - than a boolean mask. + indices: If True, the return value will be an array of integers, + rather than a boolean mask. Returns: - support : array - An index that selects the retained features from a `FDataGrid` - object. - If `indices` is False, this is a boolean array of shape - [# input features], in which an element is True iff its - corresponding feature is selected for retention. If `indices` - is True, this is an integer array of shape [# output features] - whose values are indices into the input feature vector. + An index that selects the retained features from a `FDataGrid` + object. + If `indices` is False, this is a boolean array of shape + [# input features], in which an element is True iff its + corresponding feature is selected for retention. If `indices` + is True, this is an integer array of shape [# output features] + whose values are indices into the input feature vector. """ features = self._features_ if indices: return features - else: - mask = np.zeros(self._features_shape_[0], dtype=bool) - mask[features] = True - return mask + + mask = np.zeros(self._features_shape_[0], dtype=bool) + mask[features] = True + return mask From ded15e6d46a734bae8e8af42bc06f697b98bcce0 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 19 Apr 2021 21:13:21 +0200 Subject: [PATCH 251/417] baseplot corrected as talked --- skfda/exploratory/visualization/_baseplot.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 280d42365..71b687575 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -21,7 +21,7 @@ class BasePlot(ABC): BasePlot class. Attributes: - id_function: List of Artist objects corresponding + artists: List of Artist objects corresponding to every instance of our plot. They will be used to modify the visualization with interactivity and widgets. fig: Figure over with the graphs are plotted. @@ -32,7 +32,7 @@ class BasePlot(ABC): def __init__( self, ) -> None: - self.id_function: List[Artist] = [] + self.artists: List[Artist] = [] self.fig: Optional[Figure] = None self.axes: Sequence[Axes] = [] @@ -50,7 +50,7 @@ def plot( pass @abstractmethod - def num_instances(self) -> int: + def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" pass @@ -75,18 +75,6 @@ def set_figure_and_axes( """ pass - def clear_ax(self) -> None: - """ - Reset the basic attributes of the BasePlot. - - Clear the old axes of the BasePlot and reset the - id_function list. - """ - for ax in self.axes: - ax.clear() - if len(self.id_function) != 0: - self.id_function = [] - def _repr_svg_(self) -> str: """Automatically represents the object as an svg when calling it.""" self.fig = self.plot() From d12c0aec2fc63e641c19774b5e1e1bc623cb27ae Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 20 Apr 2021 18:10:46 +0200 Subject: [PATCH 252/417] change init --- skfda/exploratory/visualization/_baseplot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 71b687575..bd853ba50 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -31,10 +31,12 @@ class BasePlot(ABC): @abstractmethod def __init__( self, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, ) -> None: self.artists: List[Artist] = [] - self.fig: Optional[Figure] = None - self.axes: Sequence[Axes] = [] + self.fig = fig + self.axes = axes @abstractmethod def plot( From a6cce87a8e995980484ca3948f1ef69689318e9c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 20 Apr 2021 18:58:53 +0200 Subject: [PATCH 253/417] numpy array artists --- skfda/exploratory/visualization/_baseplot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index bd853ba50..4f9f40a51 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -9,6 +9,7 @@ from typing import List, Optional, Sequence, Union import matplotlib.pyplot as plt +import numpy as np from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -34,7 +35,7 @@ def __init__( fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, ) -> None: - self.artists: List[Artist] = [] + self.artists: np.ndarray self.fig = fig self.axes = axes From 1d0a5a771e1263ddc574bfdc3fc8704739c858da Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 20 Apr 2021 19:02:20 +0200 Subject: [PATCH 254/417] coprrection --- skfda/exploratory/visualization/_baseplot.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 4f9f40a51..5c34baf15 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -6,11 +6,10 @@ """ from abc import ABC, abstractmethod -from typing import List, Optional, Sequence, Union +from typing import Optional, Sequence, Union import matplotlib.pyplot as plt import numpy as np -from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure From cc27652bdd80124521ed9f30b51dd14b401ecd8e Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 20 Apr 2021 20:19:31 +0200 Subject: [PATCH 255/417] Add typing and style for Maxima Hunting. --- .../variable_selection/maxima_hunting.py | 98 ++++++++++++------- 1 file changed, 63 insertions(+), 35 deletions(-) diff --git a/skfda/preprocessing/dim_reduction/variable_selection/maxima_hunting.py b/skfda/preprocessing/dim_reduction/variable_selection/maxima_hunting.py index ff45b7e2d..bfd6377cb 100644 --- a/skfda/preprocessing/dim_reduction/variable_selection/maxima_hunting.py +++ b/skfda/preprocessing/dim_reduction/variable_selection/maxima_hunting.py @@ -1,20 +1,34 @@ -import dcor +"""Maxima Hunting dimensionality reduction and related methods.""" +from __future__ import annotations -import scipy.signal +from typing import Callable, Optional + +import numpy as np import sklearn.base import sklearn.utils -import numpy as np +import scipy.signal +from dcor import rowwise, u_distance_correlation_sqr from ....representation import FDataGrid +_DependenceMeasure = Callable[[np.ndarray, np.ndarray], np.ndarray] +_LocalMaximaSelector = Callable[[np.ndarray], np.ndarray] + + +def _compute_dependence( + X: np.ndarray, + y: np.ndarray, + *, + dependence_measure: _DependenceMeasure, +) -> np.ndarray: + """ + Compute dependence between points and target. -def _compute_dependence(X, y, *, dependence_measure): - ''' Computes the dependence of each point in each trajectory in X with the corresponding class label in Y. - ''' + """ # Move n_samples to the end # The shape is now input_shape + n_samples + n_output X = np.moveaxis(X, 0, -2) @@ -28,13 +42,13 @@ def _compute_dependence(X, y, *, dependence_measure): y = np.atleast_2d(y).T Y = np.array([y] * len(X)) - dependence_results = dcor.rowwise(dependence_measure, X, Y) + dependence_results = rowwise(dependence_measure, X, Y) return dependence_results.reshape(input_shape) -def select_local_maxima(X, *, order: int=1): - r''' +def select_local_maxima(X: np.ndarray, *, order: int = 1) -> np.ndarray: + r""" Compute local maxima of an array. Points near the boundary are considered maxima looking only at one side. @@ -43,13 +57,14 @@ def select_local_maxima(X, *, order: int=1): considered maxima. Parameters: - - X (numpy array): Where to compute the local maxima. - order (callable): How many points on each side to look, to check if + X: Where to compute the local maxima. + order: How many points on each side to look, to check if a point is a maximum in that interval. - Examples: + Returns: + Indexes of the local maxima. + Examples: >>> from skfda.preprocessing.dim_reduction.variable_selection.\ ... maxima_hunting import select_local_maxima >>> import numpy as np @@ -66,9 +81,12 @@ def select_local_maxima(X, *, order: int=1): >>> select_local_maxima(x, order=3).astype(np.int_) array([ 0, 5, 10]) - ''' + """ indexes = scipy.signal.argrelextrema( - X, comparator=np.greater_equal, order=order)[0] + X, + comparator=np.greater_equal, + order=order, + )[0] # Discard flat maxima = X[indexes] @@ -81,8 +99,11 @@ def select_local_maxima(X, *, order: int=1): return indexes[is_not_flat] -class MaximaHunting(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin): - r''' +class MaximaHunting( + sklearn.base.BaseEstimator, # type: ignore + sklearn.base.TransformerMixin, # type: ignore +): + r""" Maxima Hunting variable selection. This is a filter variable selection method for problems with a target @@ -102,7 +123,6 @@ class MaximaHunting(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin): original article [1]_. Parameters: - dependence_measure (callable): Dependence measure to use. By default, it uses the bias corrected squared distance correlation. local_maxima_selector (callable): Function to detect local maxima. The @@ -111,7 +131,6 @@ class MaximaHunting(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin): different values of ``order``. Examples: - >>> from skfda.preprocessing.dim_reduction import variable_selection >>> from skfda.preprocessing.dim_reduction.variable_selection.\ ... maxima_hunting import select_local_maxima @@ -163,26 +182,29 @@ class MaximaHunting(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin): (10000, 1) References: - .. [1] J. R. Berrendero, A. Cuevas, and J. L. Torrecilla, “Variable selection in functional data classification: a maxima-hunting proposal,” STAT SINICA, vol. 26, no. 2, pp. 619–638, 2016, doi: 10.5705/ss.202014.0014. - ''' + """ - def __init__(self, - dependence_measure=dcor.u_distance_correlation_sqr, - local_maxima_selector=select_local_maxima): + def __init__( + self, + dependence_measure: _DependenceMeasure = u_distance_correlation_sqr, + local_maxima_selector: _LocalMaximaSelector = select_local_maxima, + ) -> None: self.dependence_measure = dependence_measure self.local_maxima_selector = local_maxima_selector - def fit(self, X: FDataGrid, y): + def fit(self, X: FDataGrid, y: np.ndarray) -> MaximaHunting: # noqa: D102 self.features_shape_ = X.data_matrix.shape[1:] self.dependence_ = _compute_dependence( - X.data_matrix, y, - dependence_measure=self.dependence_measure) + X.data_matrix, + y, + dependence_measure=self.dependence_measure, + ) self.indexes_ = self.local_maxima_selector(self.dependence_) @@ -191,20 +213,26 @@ def fit(self, X: FDataGrid, y): return self - def get_support(self, indices: bool=False): + def get_support(self, indices: bool = False) -> np.ndarray: # noqa: D102 if indices: return self.indexes_ - else: - mask = np.zeros(self.features_shape_[0:-1], dtype=bool) - mask[self.indexes_] = True - return mask - def transform(self, X, y=None): + mask = np.zeros(self.features_shape_[:-1], dtype=bool) + mask[self.indexes_] = True + return mask + + def transform( # noqa: D102 + self, + X: FDataGrid, + y: Optional[np.ndarray] = None, + ) -> np.ndarray: sklearn.utils.validation.check_is_fitted(self) if X.data_matrix.shape[1:] != self.features_shape_: - raise ValueError("The trajectories have a different number of " - "points than the ones fitted") + raise ValueError( + "The trajectories have a different number of " + "points than the ones fitted", + ) return X.data_matrix[:, self.sorted_indexes_].reshape(X.n_samples, -1) From b1d06b40dec6676122ff2cffe7d285f4085c0981 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 21 Apr 2021 16:07:35 +0200 Subject: [PATCH 256/417] deleted method setfigureandaxes --- skfda/exploratory/visualization/_baseplot.py | 21 -------------------- 1 file changed, 21 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 5c34baf15..5741afef1 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -56,27 +56,6 @@ def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" pass - @abstractmethod - def set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, - ) -> None: - """ - Initialize the axes and fig of the plot. - - Args: - chart: figure over with the graphs are plotted or axis over - where the graphs are plotted. If None and ax is also - None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also None, the figure is - initialized. - axes: axis where the graphs are plotted. If None, see param fig. - """ - pass - def _repr_svg_(self) -> str: """Automatically represents the object as an svg when calling it.""" self.fig = self.plot() From f24ad7ccdf556a1c99bfea92b56e1794450cdcc0 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 10:57:34 +0200 Subject: [PATCH 257/417] done From 86abdbfea0824d5debb213cbd9f6081f7e8899b6 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 10:59:40 +0200 Subject: [PATCH 258/417] new outliegram --- .../exploratory/visualization/_outliergram.py | 196 +++++++++++++----- 1 file changed, 143 insertions(+), 53 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 7eb4f80f8..0d854feb8 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -7,7 +7,7 @@ magnitude outliers, but there is a necessity of capturing this other type. """ -from typing import Optional, Union +from typing import Optional, Sequence, Union import numpy as np import scipy.integrate as integrate @@ -17,10 +17,11 @@ from ... import FDataGrid from ..depth._depth import ModifiedBandDepth +from ._baseplot import BasePlot from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata -class Outliergram: +class Outliergram(BasePlot): """ Outliergram method of visualization. @@ -30,14 +31,25 @@ class Outliergram: this curve. Args: fdata: functional data set that we want to examine. + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. Attributes: mbd: result of the calculation of the Modified Band Depth on our - dataset. Represents the mean time a curve stays between all the - possible pair of curves we have in our data set, being a good - measure of centrality. + dataset. Represents the mean time a curve stays between other pair + of curves, being a good measure of centrality. mei: result of the calculation of the Modified Epigraph Index on our - dataset. Represents the mean time a curve stays below each curve - in our dataset. + dataset. Represents the mean time a curve stays below other curve. References: López-Pintado S., Romo J.. (2011). A half-region depth for functional data, Computational Statistics & Data Analysis, volume 55 @@ -50,22 +62,39 @@ class Outliergram: def __init__( self, fdata: FDataGrid, + *, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Optional[Sequence[Axes]] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + **kwargs, ) -> None: + BasePlot.__init__(self) self.fdata = fdata self.depth = ModifiedBandDepth() self.depth.fit(fdata) self.mbd = self.depth(fdata) self.mei = self.modified_epigraph_index_list() + if self.mbd.size != self.mei.size: + raise ValueError( + "The size of mbd and mei should be the same.", + ) + self.n = self.mbd.size + distances, parable = self.compute_distances() + self.distances = distances + mei_ordered = self.mei[:] + mei_ordered, parable = ( + list(el) for el in zip(*sorted(zip(mei_ordered, parable))) + ) + self.parable = parable + self.mei_ordered = mei_ordered + self.compute_outliergram() + + self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - chart: Union[Figure, Axes, None] = None, - *, - fig: Optional[Figure] = None, - axes: Optional[Axes] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, - **kwargs, ) -> Figure: """ Plot Outliergram. @@ -74,57 +103,45 @@ def plot( Epigraph Index (MEI) on the X axis. This points will create the form of a parabola. The shape outliers will be the points that appear far from this curve. - Args: - chart: figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - fig: figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - axes: axis where the graphs - are plotted. If None, see param fig. - n_rows: designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - kwargs: if dim_domain is 1, keyword arguments to be passed to the - matplotlib.pyplot.plot function; if dim_domain is 2, keyword - arguments to be passed to the matplotlib.pyplot.plot_surface - function. Returns: fig: figure object in which the depths will be scattered. """ - fig, axes_list = _get_figure_and_axes(chart, fig, axes) - fig, axes_list = _set_figure_layout_for_fdata( - self.fdata, fig, axes_list, n_rows, n_cols, + self.artists = [] + self.axScatter = self.axes[0] + + for i in range(self.mei.size): + self.artists.append(self.axScatter.scatter( + self.mei[i], + self.mbd[i], + picker=2, + )) + + self.axScatter.plot( + self.mei_ordered, + self.parable, ) - self.fig = fig - self.axes = axes_list - ax = self.axes[0] - - ax.scatter( - self.mei, - self.mbd, - **kwargs, + self.axScatter.plot( + self.mei_ordered, + self.shifted_parable, + linestyle='dashed', ) # Set labels of graph if self.fdata.dataset_name is not None: - fig.suptitle(self.fdata.dataset_name) - ax.set_xlabel("MEI") - ax.set_ylabel("MBD") - ax.set_xlim([0, 1]) - ax.set_ylim([ + self.axScatter.set_title(self.fdata.dataset_name) + else: + self.axScatter.set_title("Outliergram") + self.axScatter.set_xlabel("MEI") + self.axScatter.set_ylabel("MBD") + self.axScatter.set_xlim([0, 1]) + self.axScatter.set_ylim([ self.depth.min, self.depth.max, ]) - return fig + return self.fig def modified_epigraph_index_list(self) -> np.ndarray: """ @@ -139,14 +156,16 @@ def modified_epigraph_index_list(self) -> np.ndarray: - self.fdata.domain_range[0][0] ) - function = rankdata( + # Array containing at each point the number of curves + # are above it. + num_functions_above = rankdata( -self.fdata.data_matrix, method='max', axis=0, ) - 1 integrand = integrate.simps( - function, + num_functions_above, x=self.fdata.grid_points[0], axis=1, ) @@ -154,3 +173,74 @@ def modified_epigraph_index_list(self) -> np.ndarray: integrand /= (interval_len * self.fdata.n_samples) return integrand.flatten() + + def compute_distances(self) -> np.ndarray: + """ + Calculate the distances of each point towards the parabola. + + The distances can be calculated with function: + d_i = a_0 + a_1* mei_i + n^2* a_2* mei_i^2 - mb_i. + """ + distances = [] + parable = [] + a_0 = -2 / (self.n * (self.n - 1)) + a_1 = (2 * (self.n + 1)) / (self.n - 1) + a_2 = a_0 + + for mbd_item, mei_item in zip(self.mbd, self.mei): + p_i = ( + a_0 + a_1 * mei_item + pow(self.n, 2) * a_2 * pow(mei_item, 2) + ) + distances.append(p_i - mbd_item) + parable.append(p_i) + return distances, parable + + def compute_outliergram(self): + """Compute the parabola under which the outliers lie.""" + percentile_25 = 25 + percentile_75 = 75 + first_quartile = np.percentile(self.distances, percentile_25) + third_quartile = np.percentile(self.distances, percentile_75) + iqr = third_quartile - first_quartile + self.shifted_parable = self.parable - (third_quartile + iqr) + + def n_samples(self) -> int: + """Get the number of instances that will be used for interactivity.""" + return self.fdata.n_samples + + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ) -> None: + """ + Initialize the axes and fig of the plot. + + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + """ + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + fdata=self.fdata, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) + self.fig = fig + self.axes = axes From 2f5e5309dafd65723b036a9f36a16eec4481b95b Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 11:20:17 +0200 Subject: [PATCH 259/417] outliergram corrected init --- skfda/exploratory/visualization/_outliergram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 0d854feb8..146d85950 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -65,7 +65,7 @@ def __init__( *, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, - axes: Optional[Sequence[Axes]] = None, + axes: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, **kwargs, @@ -195,7 +195,7 @@ def compute_distances(self) -> np.ndarray: parable.append(p_i) return distances, parable - def compute_outliergram(self): + def compute_outliergram(self) -> None: """Compute the parabola under which the outliers lie.""" percentile_25 = 25 percentile_75 = 75 From 2f64c048c88bf4ad27b19a7dae18775631069ed8 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 11:44:30 +0200 Subject: [PATCH 260/417] artist is np.array --- skfda/exploratory/visualization/_baseplot.py | 2 +- skfda/exploratory/visualization/_outliergram.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 5741afef1..f7e68fc5e 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -34,7 +34,7 @@ def __init__( fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, ) -> None: - self.artists: np.ndarray + self.artists: np.array self.fig = fig self.axes = axes diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 146d85950..bf19950d6 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -107,11 +107,11 @@ def plot( fig: figure object in which the depths will be scattered. """ - self.artists = [] + self.artists = np.array([]) self.axScatter = self.axes[0] for i in range(self.mei.size): - self.artists.append(self.axScatter.scatter( + self.artists = np.append(self.artists, self.axScatter.scatter( self.mei[i], self.mbd[i], picker=2, From 3617c8fde7cb03bf07185ebdadbf926fe51ea7d5 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 12:09:16 +0200 Subject: [PATCH 261/417] ddplot ended --- skfda/exploratory/visualization/_ddplot.py | 87 ++++++++++++++-------- 1 file changed, 56 insertions(+), 31 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 7c0be7c02..246b31173 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -7,17 +7,19 @@ from typing import Optional, TypeVar, Union +import numpy as np from matplotlib.axes import Axes from matplotlib.figure import Figure from ...exploratory.depth.multivariate import Depth from ...representation._functional_data import FData +from ._baseplot import BasePlot from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata T = TypeVar('T', bound=FData) -class DDPlot: +class DDPlot(BasePlot): """ DDPlot visualization. @@ -33,6 +35,13 @@ class DDPlot: we want to use to compute the depth (Depth Y). depth_method: method that will be used to compute the depths of the data with respect to the distributions. + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. Attributes: depth_dist1: result of the calculation of the depth_method into our first distribution (dist1). @@ -46,7 +55,11 @@ def __init__( dist1: T, dist2: T, depth_method: Depth[T], + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, ) -> None: + BasePlot.__init__(self) self.fdata = fdata self.depth_method = depth_method self.depth_method.fit(fdata) @@ -56,14 +69,10 @@ def __init__( self.depth_dist2 = self.depth_method( self.fdata, distribution=dist2, ) + self.set_figure_and_axes(chart, fig, axes) def plot( self, - chart: Union[Figure, Axes, None] = None, - *, - fig: Optional[Figure] = None, - ax: Optional[Axes] = None, - **kwargs, ) -> Figure: """ Plot DDPlot graph. @@ -72,41 +81,26 @@ def plot( distributions,one in each axis. It is useful to understand how our data is more related with one subset of data / distribution than another one. - Args: - chart: figure over with the graphs are plotted or axis over - where the graphs are plotted. If None and ax is also - None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also None, the figure is - initialized. - ax: axis where the graphs are plotted. If None, see param fig. - kwargs: if dim_domain is 1, keyword arguments to be passed to the - matplotlib.pyplot.plot function; if dim_domain is 2, keyword - arguments to be passed to the matplotlib.pyplot.plot_surface - function. Returns: fig (figure object): figure object in which the depths will be scattered. """ + self.artists = np.array([]) margin = 0.025 width_aux_line = 0.35 color_aux_line = "gray" - fig, axes = _get_figure_and_axes(chart, fig, ax) - fig, axes = _set_figure_layout_for_fdata( - self.fdata, fig, axes, - ) - - ax = axes[0] + ax = self.axes[0] - ax.scatter( - self.depth_dist1, - self.depth_dist2, - **kwargs, - ) + for d1, d2 in zip(self.depth_dist1, self.depth_dist2): + self.artists = np.append(self.artists, ax.scatter( + d1, + d2, + picker=2, + )) # Set labels of graph - fig.suptitle("DDPlot") + ax.set_title("DDPlot") ax.set_xlabel("X depth") ax.set_ylabel("Y depth") ax.set_xlim( @@ -127,4 +121,35 @@ def plot( color=color_aux_line, ) - return fig + return self.fig + + def n_samples(self) -> int: + """Get the number of instances that will be used for interactivity.""" + return self.fdata.n_samples + + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + ) -> None: + """ + Initialize the axes and fig of the plot. + + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + """ + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + fdata=self.fdata, + fig=fig, + axes=axes, + ) + self.fig = fig + self.axes = axes From 9ebe55c3379250ab59fd526be63dfaaf8a2122e7 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 12:10:52 +0200 Subject: [PATCH 262/417] final msplot --- .../visualization/_magnitude_shape_plot.py | 88 +++++++++++++------ 1 file changed, 60 insertions(+), 28 deletions(-) diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 7e7c53dc6..489d0c244 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -5,19 +5,24 @@ detection method is implemented. """ +from typing import Optional, Sequence, Union import matplotlib import matplotlib.pyplot as plt import numpy as np +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from ... import FDataGrid from ..outliers import DirectionalOutlierDetector -from ._utils import _figure_to_svg, _get_figure_and_axes, _set_figure_layout +from ._baseplot import BasePlot +from ._utils import _get_figure_and_axes, _set_figure_layout __author__ = "Amanda Hernando Bernabé" __email__ = "amanda.hernando@estudiante.uam.es" -class MagnitudeShapePlot: +class MagnitudeShapePlot(BasePlot): r"""Implementation of the magnitude-shape plot This plot, which is based on the calculation of the :func:`directional @@ -158,14 +163,23 @@ class MagnitudeShapePlot: """ - def __init__(self, fdatagrid, **kwargs): + def __init__( + self, + fdatagrid: FDataGrid, + *, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Optional[Sequence[Axes]] = None, + **kwargs, + ): """Initialization of the MagnitudeShapePlot class. Args: fdatagrid (FDataGrid): Object containing the data. - multivariate_depth (:ref:`depth measure `, optional): - Method used to order the data. Defaults to :class:`projection - depth `. + multivariate_depth (:ref:`depth measure `, + optional): Method used to order the data. Defaults to + :class:`projection depth + `. pointwise_weights (array_like, optional): an array containing the weights of each points of discretisati on where values have been recorded. @@ -190,9 +204,16 @@ def __init__(self, fdatagrid, **kwargs): If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. By default, it is 0. + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. """ - + BasePlot.__init__(self) if fdatagrid.dim_codomain > 1: raise NotImplementedError( "Only support 1 dimension on the codomain.") @@ -207,11 +228,13 @@ def __init__(self, fdatagrid, **kwargs): self._outliers = outliers self._colormap = plt.cm.get_cmap('seismic') self._color = 0.2 - self._outliercol = 0.8, + self._outliercol = 0.8 self.xlabel = 'MO' self.ylabel = 'VO' self.title = 'MS-Plot' + self.set_figure_and_axes(chart, fig, axes) + @property def fdatagrid(self): return self._fdatagrid @@ -270,34 +293,48 @@ def outliercol(self, value): "outcol must be a number between 0 and 1.") self._outliercol = value - def plot(self, chart=None, *, fig=None, axes=None,): + def plot(self): """Visualization of the magnitude shape plot of the fdatagrid. - Args: - ax (axes object, optional): axes over where the graph is plotted. - Defaults to matplotlib current axis. - Returns: fig (figure object): figure object in which the graph is plotted. """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout(fig, axes) - + self.artists = np.array([]) colors = np.zeros((self.fdatagrid.n_samples, 4)) colors[np.where(self.outliers == 1)] = self.colormap(self.outliercol) colors[np.where(self.outliers == 0)] = self.colormap(self.color) colors_rgba = [tuple(i) for i in colors] - axes[0].scatter(self.points[:, 0].ravel(), self.points[:, 1].ravel(), - color=colors_rgba) - - axes[0].set_xlabel(self.xlabel) - axes[0].set_ylabel(self.ylabel) - axes[0].set_title(self.title) - return fig + for i in range(len(self.points[:, 0].ravel())): + self.artists = np.append(self.artists, self.axes[0].scatter( + self.points[:, 0].ravel()[i], + self.points[:, 1].ravel()[i], + color=colors_rgba[i], + picker=2, + )) + + self.axes[0].set_xlabel(self.xlabel) + self.axes[0].set_ylabel(self.ylabel) + self.axes[0].set_title(self.title) + + return self.fig + + def n_samples(self) -> int: + return self.fdatagrid.n_samples + + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + ) -> None: + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout(fig, axes) + self.fig = fig + self.axes = axes def __repr__(self): """Return repr(self).""" @@ -314,8 +351,3 @@ def __repr__(self): f"\nxlabel={repr(self.xlabel)}," f"\nylabel={repr(self.ylabel)}," f"\ntitle={repr(self.title)})").replace('\n', '\n ') - - def _repr_svg_(self): - fig = self.plot() - plt.close(fig) - return _figure_to_svg(fig) From c4e36d3909f7d7b8c12b0ccd226adf69d78e6a8a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 12:12:28 +0200 Subject: [PATCH 263/417] representation ended --- .../visualization/representation.py | 333 +++++++++++------- 1 file changed, 211 insertions(+), 122 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index ed6d61d30..4f50f5a77 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -29,6 +29,7 @@ from ..._utils import _to_domain_range, constants from ...representation._functional_data import FData from ...representation._typing import DomainRangeLike, GridPointsLike +from ._baseplot import BasePlot from ._utils import ( ColorLike, _get_figure_and_axes, @@ -59,9 +60,9 @@ def _get_color_info( legend: bool = False, kwargs: Any = None, ) -> Tuple[ - Union[ColorLike, None], - Optional[List[matplotlib.patches.Patch] - ]]: + Union[ColorLike, None], + Optional[List[matplotlib.patches.Patch]], +]: patches = None @@ -119,63 +120,89 @@ def _get_color_info( return sample_colors, patches -class GraphPlot: +class GraphPlot(BasePlot): """ - Class used to plot the FDataGrid object graph as hypersurfaces. + Class used to plot the FDatGrid object graph as hypersurfaces. When plotting functional data, we can either choose manually a color, a group of colors for the representations. Besides, we can use a list of variables (depths, scalar regression targets...) can be used as an argument to display the functions wtih a gradient of colors. - Args: fdata: functional data set that we want to plot. - gradient_values: list of real values used to determine the color - in which each of the instances will be plotted. + gradient_color_list: list of real values used to determine the color + in which each of the instances will be plotted. The size max_grad: maximum value that the gradient_list can take, it will be - used to normalize the ``gradient_values``. If not + used to normalize the gradient_color_list in order to get values + thatcan be used in the funcion colormap.__call__(). If not declared it will be initialized to the maximum value of gradient_list min_grad: minimum value that the gradient_list can take, it will be - used to normalize the ``gradient_values``. If not + used to normalize the gradient_color_list in order to get values + thatcan be used in the funcion colormap.__call__(). If not declared it will be initialized to the minimum value of gradient_list. - + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (axis object, optional): axis over where the graphs + are plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + Attributes: + gradient_list: normalization of the values from gradient color_list + that will be used to determine the intensity of the color + each function will have. """ def __init__( self, fdata: FData, - gradient_values: Optional[Sequence[float]] = None, + gradient_color_list: Union[Sequence[float], None] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, ) -> None: + BasePlot.__init__(self) self.fdata = fdata - self.gradient_values = gradient_values - if self.gradient_values is not None: - if len(self.gradient_values) != fdata.n_samples: + self.gradient_color_list = gradient_color_list + if self.gradient_color_list is not None: + if len(self.gradient_color_list) != fdata.n_samples: raise ValueError( - "The length of the gradient color" - "list should be the same as the number" + "The length of the gradient color", + "list should be the same as the number", "of samples in fdata", ) if min_grad is None: - self.min_grad = min(self.gradient_values) + self.min_grad = min(self.gradient_color_list) else: self.min_grad = min_grad if max_grad is None: - self.max_grad = max(self.gradient_values) + self.max_grad = max(self.gradient_color_list) else: self.max_grad = max_grad aux_list = [ grad_color - self.min_grad - for grad_color in self.gradient_values + for grad_color in self.gradient_color_list ] - self.gradient_list = ( + self.gradient_list: Sequence[float] = ( [ aux / (self.max_grad - self.min_grad) for aux in aux_list @@ -183,15 +210,11 @@ def __init__( ) else: self.gradient_list = [] + self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - chart: Union[Figure, Axes, None] = None, *, - fig: Optional[Figure] = None, - ax: Optional[Axes] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, n_points: Union[int, Tuple[int, int], None] = None, domain_range: Optional[DomainRangeLike] = None, group: Optional[Sequence[K]] = None, @@ -210,50 +233,35 @@ def plot( visualizations, one that displays the functions without any criteria choosing the colors and a new one that displays the function with a gradient of colors depending on the initial - gradient_values (normalized in gradient_list). - + gradient_color_list (normalized in gradient_list). Args: - chart: figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - fig : figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - ax: axis over where the graphs are plotted. If None, see param fig. - n_rows : designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - n_points: Number of points to evaluate in + n_points (int or tuple, optional): Number of points to evaluate in the plot. In case of surfaces a tuple of length 2 can be pased with the number of points to plot in each axis, otherwise the same number of points will be used in the two axes. By default in unidimensional plots will be used 501 points; in surfaces will be used 30 points per axis, wich makes a grid with 900 points. - domain_range: Range where the + domain_range (tuple or list of tuples, optional): Range where the function will be plotted. In objects with unidimensional domain the domain range should be a tuple with the bounds of the interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - group: contains integers from [0 to number of + group (list of int): contains integers from [0 to number of labels) indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors: colors in which groups are + group_colors (list of colors): colors in which groups are represented, there must be one for each group. If None, each group is shown with distict colors in the "Greys" colormap. - group_names: name of each of the groups which appear + group_names (list of str): name of each of the groups which appear in a legend, there must be one for each one. Defaults to None and the legend is not shown. Implies `legend=True`. colormap_name: name of the colormap to be used. By default we will use autumn. - legend: if `True`, show a legend with the groups. If + legend (bool): if `True`, show a legend with the groups. If `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. @@ -261,15 +269,10 @@ def plot( the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the matplotlib.pyplot.plot_surface function. - Returns: - fig: the figure in which the graphs are plotted. - + fig (figure object): figure object in which the graphs are plotted. """ - fig, axes = _get_figure_and_axes(chart, fig, ax) - fig, axes = _set_figure_layout_for_fdata( - self.fdata, fig, axes, n_rows, n_cols, - ) + self.artists = np.array([]) if domain_range is None: self.domain_range = self.fdata.domain_range @@ -307,9 +310,12 @@ def plot( set_color_dict(sample_colors, j, color_dict) - axes[i].plot( - eval_points, mat[j, ..., i].T, **color_dict, **kwargs, - ) + self.artists = np.append(self.artists, self.axes[i].plot( + eval_points, + mat[j, ..., i].T, + **color_dict, + **kwargs, + )) else: @@ -338,45 +344,104 @@ def plot( set_color_dict(sample_colors, h, color_dict) - axes[k].plot_surface( + self.artists = np.append(self.artists, self.axes[k].plot_surface( X, Y, Z[h, ..., k], **color_dict, **kwargs, - ) + )) + + _set_labels(self.fdata, self.fig, self.axes, patches) + self.fig.suptitle("GraphPlot") - _set_labels(self.fdata, fig, axes, patches) + return self.fig - return fig + def n_samples(self) -> int: + """Get the number of instances that will be used for interactivity.""" + return self.fdata.n_samples + + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ) -> None: + """ + Initialize the axes and fig of the plot. + + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + """ + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + fdata=self.fdata, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) + self.fig = fig + self.axes = axes -class ScatterPlot: +class ScatterPlot(BasePlot): """ Class used to scatter the FDataGrid object. Args: fdata: functional data set that we want to plot. - grid_points: points to plot. - + grid_points (ndarray): points to plot. + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (axis, optional): axis over where the graphs + are plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. """ def __init__( self, fdata: FData, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, grid_points: Optional[GridPointsLike] = None, ) -> None: + BasePlot.__init__(self) self.fdata = fdata self.grid_points = grid_points + self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - chart: Union[Figure, Axes, None] = None, *, - fig: Optional[Figure] = None, - ax: Optional[Axes] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Optional[Sequence[K]] = None, group_colors: Optional[Indexable[K, ColorLike]] = None, @@ -388,49 +453,35 @@ def plot( Scatter FDataGrid object. Args: - chart: figure over with the graphs are plotted or axis - over where the graphs are plotted. If None and ax - is also None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also - None, the figure is initialized. - ax: axis over where the graphs are plotted. If None, see param fig. - n_rows: designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - domain_range: Range where the function will be - plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - group: contains integers from [0 to number of labels) - indicating to which group each sample belongs to. Then, - the samples with the same label are plotted in the same color. - If None, the default value, each sample is plotted in the color - assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors: colors in which groups are represented, - there must be one for each group. If None, each - group is shown with distict colors in the "Greys" colormap. - group_names: name of each of the groups which appear - in a legend, there must be one for each one. Defaults to None - and the legend is not shown. Implies `legend=True`. - legend: if `True`, show a legend with the groups. If - `group_names` is passed, it will be used for finding the names - to display in the legend. Otherwise, the values passed to - `group` will be used. - kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. - + domain_range: Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group: contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors: colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names: name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + legend: if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. + kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. Returns: - fig (figure object): figure object in which the graphs are plotted. - + fig: figure object in which the graphs are plotted. """ + self.artists = np.array([]) evaluated_points = None if self.grid_points is None: @@ -443,11 +494,6 @@ def plot( self.grid_points, grid=True, ) - fig, axes = _get_figure_and_axes(chart, fig, ax) - fig, axes = _set_figure_layout_for_fdata( - self.fdata, fig, axes, n_rows, n_cols, - ) - if domain_range is None: self.domain_range = self.fdata.domain_range else: @@ -466,12 +512,13 @@ def plot( set_color_dict(sample_colors, j, color_dict) - axes[i].scatter( + self.artists = np.append(self.artists, self.axes[i].scatter( self.grid_points[0], evaluated_points[j, ..., i].T, **color_dict, + picker=2, **kwargs, - ) + )) else: @@ -484,17 +531,59 @@ def plot( set_color_dict(sample_colors, h, color_dict) - axes[k].scatter( + self.artists = np.append(self.artists, self.axes[k].scatter( X, Y, evaluated_points[h, ..., k].T, **color_dict, + picker=2, **kwargs, - ) + )) + + _set_labels(self.fdata, self.fig, self.axes, patches) + + return self.fig - _set_labels(self.fdata, fig, axes, patches) + def n_samples(self) -> int: + """Get the number of instances that will be used for interactivity.""" + return self.fdata.n_samples + + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ) -> None: + """ + Initialize the axes and fig of the plot. - return fig + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + """ + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + fdata=self.fdata, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) + self.fig = fig + self.axes = axes def set_color_dict( From 1b7c6ab0ce32355b39139173019d5084fc134502 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 12:14:31 +0200 Subject: [PATCH 264/417] corrected too long lines --- .../visualization/representation.py | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 4f50f5a77..d2e9c3457 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -344,13 +344,15 @@ def plot( set_color_dict(sample_colors, h, color_dict) - self.artists = np.append(self.artists, self.axes[k].plot_surface( - X, - Y, - Z[h, ..., k], - **color_dict, - **kwargs, - )) + self.artists = np.append( + self.artists, self.axes[k].plot_surface( + X, + Y, + Z[h, ..., k], + **color_dict, + **kwargs, + ), + ) _set_labels(self.fdata, self.fig, self.axes, patches) self.fig.suptitle("GraphPlot") @@ -512,13 +514,15 @@ def plot( set_color_dict(sample_colors, j, color_dict) - self.artists = np.append(self.artists, self.axes[i].scatter( - self.grid_points[0], - evaluated_points[j, ..., i].T, - **color_dict, - picker=2, - **kwargs, - )) + self.artists = np.append( + self.artists, self.axes[i].scatter( + self.grid_points[0], + evaluated_points[j, ..., i].T, + **color_dict, + picker=2, + **kwargs, + ), + ) else: @@ -531,14 +535,16 @@ def plot( set_color_dict(sample_colors, h, color_dict) - self.artists = np.append(self.artists, self.axes[k].scatter( - X, - Y, - evaluated_points[h, ..., k].T, - **color_dict, - picker=2, - **kwargs, - )) + self.artists = np.append( + self.artists, self.axes[k].scatter( + X, + Y, + evaluated_points[h, ..., k].T, + **color_dict, + picker=2, + **kwargs, + ), + ) _set_labels(self.fdata, self.fig, self.axes, patches) From f3691450e056d4ec4c8fcf53aeb926d0e98dc484 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 12:15:26 +0200 Subject: [PATCH 265/417] boxplot ended --- skfda/exploratory/visualization/_boxplot.py | 94 ++++++++++++++------- 1 file changed, 62 insertions(+), 32 deletions(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 07f69c25c..e019a8fe9 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -6,15 +6,19 @@ """ import math from abc import ABC, abstractmethod +from typing import Optional, Sequence, Union import matplotlib import matplotlib.pyplot as plt import numpy as np +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from ... import FDataGrid from ..depth import ModifiedBandDepth from ..outliers import _envelopes +from ._baseplot import BasePlot from ._utils import ( - _figure_to_svg, _get_figure_and_axes, _set_figure_layout_for_fdata, _set_labels, @@ -74,19 +78,8 @@ def colormap(self, value): "matplotlib.colors.LinearSegmentedColormap") self._colormap = value - @abstractmethod - def plot(self, chart=None, *, fig=None, axes=None, - n_rows=None, n_cols=None): - pass - - def _repr_svg_(self): - fig = self.plot() - plt.close(fig) - return _figure_to_svg(fig) - - -class Boxplot(FDataBoxplot): +class Boxplot(FDataBoxplot, BasePlot): r"""Representation of the functional boxplot. Class implementing the functionl boxplot which is an informative @@ -249,8 +242,19 @@ class Boxplot(FDataBoxplot): """ - def __init__(self, fdatagrid, depth_method=ModifiedBandDepth(), prob=[0.5], - factor=1.5): + def __init__( + self, + fdatagrid: FDataGrid, + depth_method = ModifiedBandDepth(), + prob: Sequence[float] = [0.5], + factor: float = 1.5, + *, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Optional[Sequence[Axes]] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ): """Initialization of the Boxplot class. Args: @@ -267,6 +271,7 @@ def __init__(self, fdatagrid, depth_method=ModifiedBandDepth(), prob=[0.5], """ FDataBoxplot.__init__(self, factor) + BasePlot.__init__(self) if fdatagrid.dim_domain != 1: raise ValueError( @@ -318,6 +323,8 @@ def __init__(self, fdatagrid, depth_method=ModifiedBandDepth(), prob=[0.5], self.mediancol = "black" self._show_full_outliers = False + self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + @property def fdatagrid(self): return self._fdatagrid @@ -352,8 +359,29 @@ def show_full_outliers(self, boolean): raise ValueError("show_full_outliers must be boolean type") self._show_full_outliers = boolean - def plot(self, chart=None, *, fig=None, axes=None, - n_rows=None, n_cols=None): + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ) -> None: + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + self.fdatagrid, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) + self.fig = fig + self.axes = axes + + def n_samples(self) -> int: + return self.fdatagrid.n_samples + + def plot(self): """Visualization of the functional boxplot of the fdatagrid (dim_domain=1). @@ -375,9 +403,7 @@ def plot(self, chart=None, *, fig=None, axes=None, """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - self.fdatagrid, fig, axes, n_rows, n_cols) + self.artists = np.array([]) tones = np.linspace(0.1, 1.0, len(self._prob) + 1, endpoint=False)[1:] color = self.colormap(tones) @@ -387,32 +413,36 @@ def plot(self, chart=None, *, fig=None, axes=None, var_zorder = 4 outliers = self.fdatagrid[self.outliers] + index_outliers = np.where(self.outliers == True)[0] for m in range(self.fdatagrid.dim_codomain): # Outliers for o in outliers: - axes[m].plot(o.grid_points[0], - o.data_matrix[0, :, m], - color=self.outliercol, - linestyle='--', zorder=1) + self.axes[m].plot( + o.grid_points[0], + o.data_matrix[0, :, m], + color=self.outliercol, + linestyle='--', + zorder=1, + ) for i in range(len(self._prob)): # central regions - axes[m].fill_between(self.fdatagrid.grid_points[0], + self.axes[m].fill_between(self.fdatagrid.grid_points[0], self.envelopes[i][0][..., m], self.envelopes[i][1][..., m], facecolor=color[i], zorder=var_zorder) # outlying envelope - axes[m].plot(self.fdatagrid.grid_points[0], + self.axes[m].plot(self.fdatagrid.grid_points[0], self.non_outlying_envelope[0][..., m], self.fdatagrid.grid_points[0], self.non_outlying_envelope[1][..., m], color=self.barcol, zorder=4) # central envelope - axes[m].plot(self.fdatagrid.grid_points[0], + self.axes[m].plot(self.fdatagrid.grid_points[0], self.central_envelope[0][..., m], self.fdatagrid.grid_points[0], self.central_envelope[1][..., m], @@ -421,23 +451,23 @@ def plot(self, chart=None, *, fig=None, axes=None, # vertical lines index = math.ceil(len(self.fdatagrid.grid_points[0]) / 2) x = self.fdatagrid.grid_points[0][index] - axes[m].plot([x, x], + self.axes[m].plot([x, x], [self.non_outlying_envelope[0][..., m][index], self.central_envelope[0][..., m][index]], color=self.barcol, zorder=4) - axes[m].plot([x, x], + self.axes[m].plot([x, x], [self.non_outlying_envelope[1][..., m][index], self.central_envelope[1][..., m][index]], color=self.barcol, zorder=4) # median sample - axes[m].plot(self.fdatagrid.grid_points[0], self.median[..., m], + self.axes[m].plot(self.fdatagrid.grid_points[0], self.median[..., m], color=self.mediancol, zorder=5) - _set_labels(self.fdatagrid, fig, axes) + _set_labels(self.fdatagrid, self.fig, self.axes) - return fig + return self.fig def __repr__(self): """Return repr(self).""" From 71161d8bb4e43f4c612e57ffee61caae0ae75287 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 12:16:50 +0200 Subject: [PATCH 266/417] parametric plot ended --- .../visualization/_parametric_plot.py | 90 +++++++++++++------ 1 file changed, 63 insertions(+), 27 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 4b3da19bc..b793874dc 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -6,16 +6,18 @@ of them with domain 1 and codomain 1. """ -from typing import Any, Optional, Union +from typing import Optional, Sequence, Union +import numpy as np from matplotlib.axes import Axes from matplotlib.figure import Figure from ...representation import FData +from ._baseplot import BasePlot from ._utils import _get_figure_and_axes, _set_figure_layout -class ParametricPlot: +class ParametricPlot(BasePlot): """ Parametric Plot visualization. @@ -28,23 +30,40 @@ class ParametricPlot: a dim_codomain = 1, the fdata2 will be needed. fdata2: optional functional data set, that will be needed if the fdata1 has dim_codomain = 1. + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + ax: axis where the graphs are plotted. If None, see param fig. """ def __init__( self, fdata1: FData, fdata2: Optional[FData] = None, + *, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, ) -> None: + BasePlot.__init__(self) self.fdata1 = fdata1 self.fdata2 = fdata2 + if self.fdata2 is not None: + self.fd_final = self.fdata1.concatenate( + self.fdata2, as_coordinates=True, + ) + else: + self.fd_final = self.fdata1 + + self.set_figure_and_axes(chart, fig, axes) + def plot( self, - chart: Union[Figure, Axes, None] = None, - *, - fig: Optional[Figure] = None, - ax: Optional[Axes] = None, - **kwargs: Any, + **kwargs, ) -> Figure: """ Parametric Plot graph. @@ -52,42 +71,30 @@ def plot( Plot the functions as coordinates. If two functions are passed it will concatenate both as coordinates of a vector-valued FData. Args: - chart: figure over with the graphs are plotted or axis over - where the graphs are plotted. If None and ax is also - None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also None, the figure is - initialized. - ax: axis where the graphs are plotted. If None, see param fig. kwargs: optional arguments. Returns: - fig (figure object): figure object in which the ParametricPlot + fig: figure object in which the ParametricPlot graph will be plotted. """ - fig, axes = _get_figure_and_axes(chart, fig, ax) - - if self.fdata2 is not None: - self.fd_final = self.fdata1.concatenate( - self.fdata2, as_coordinates=True, - ) - else: - self.fd_final = self.fdata1 + self.artists = np.array([]) if ( self.fd_final.dim_domain == 1 and self.fd_final.dim_codomain == 2 ): fig, axes = _set_figure_layout( - fig, axes, dim=2, n_axes=1, + self.fig, self.axes, dim=2, n_axes=1, ) - ax = axes[0] + self.fig = fig + self.axes = axes + ax = self.axes[0] for data_matrix in self.fd_final.data_matrix: - ax.plot( + self.artists = np.append(self.artists, ax.plot( data_matrix[:, 0].tolist(), data_matrix[:, 1].tolist(), **kwargs, - ) + )) else: raise ValueError( "Error in data arguments,", @@ -96,6 +103,8 @@ def plot( if self.fd_final.dataset_name is not None: fig.suptitle(self.fd_final.dataset_name) + else: + fig.suptitle("ParametricPlot") if self.fd_final.coordinate_names[0] is None: ax.set_xlabel("Function 1") @@ -108,3 +117,30 @@ def plot( ax.set_ylabel(self.fd_final.coordinate_names[1]) return fig + + def n_samples(self) -> int: + """Get the number of instances that will be used for interactivity.""" + return self.fd_final.n_samples + + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + ) -> None: + """ + Initialize the axes and fig of the plot. + + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + """ + fig, axes = _get_figure_and_axes(chart, fig, axes) + + self.fig = fig + self.axes = axes From d705923616e05163c6b13e79a9bc9a906b77e415 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 12:18:19 +0200 Subject: [PATCH 267/417] clustering ended --- skfda/exploratory/visualization/clustering.py | 863 ++++++++++-------- 1 file changed, 491 insertions(+), 372 deletions(-) diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index 21af30129..9a87e0b75 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -1,40 +1,34 @@ """Clustering Plots Module.""" -from matplotlib.ticker import MaxNLocator -from mpldatacursor import datacursor -from sklearn.exceptions import NotFittedError +from typing import Optional, Sequence, Union -from sklearn.utils.validation import check_is_fitted import matplotlib.patches as mpatches import matplotlib.pyplot as plt import numpy as np +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from matplotlib.ticker import MaxNLocator +from mpldatacursor import datacursor +from sklearn.exceptions import NotFittedError +from sklearn.utils.validation import check_is_fitted from ...ml.clustering import FuzzyCMeans -from ._utils import (_darken, - _get_figure_and_axes, _set_figure_layout_for_fdata, - _set_figure_layout, _set_labels) - +from ._baseplot import BasePlot +from ._utils import ( + _darken, + _get_figure_and_axes, + _set_figure_layout, + _set_figure_layout_for_fdata, + _set_labels, +) __author__ = "Amanda Hernando Bernabé" __email__ = "amanda.hernando@estudiante.uam.es" -def _check_if_estimator(estimator): - """Checks the argument *estimator* is actually an estimator that - implements the *fit* method. - - Args: - estimator (BaseEstimator object): estimator used to calculate the - clusters. - """ - msg = ("This %(name)s instance has no attribute \"fit\".") - if not hasattr(estimator, "fit"): - raise AttributeError(msg % {'name': type(estimator).__name__}) - - def _plot_clustering_checks(estimator, fdata, sample_colors, sample_labels, - cluster_colors, cluster_labels, - center_colors, center_labels): + cluster_colors, cluster_labels, + center_colors, center_labels): """Checks the arguments *sample_colors*, *sample_labels*, *cluster_colors*, *cluster_labels*, *center_colors*, *center_labels*, passed to the plot functions, have the correct dimensions. @@ -91,122 +85,54 @@ def _plot_clustering_checks(estimator, fdata, sample_colors, sample_labels, raise ValueError( "centers_labels must contain a label for each center.") - -def _plot_clusters(estimator, fdata, *, chart=None, fig=None, axes=None, - n_rows=None, n_cols=None, - labels, sample_labels, cluster_colors, cluster_labels, - center_colors, center_labels, center_width, colormap): - """Implementation of the plot of the FDataGrid samples by clusters. +def _check_if_estimator(estimator): + """Checks the argument *estimator* is actually an estimator that + implements the *fit* method. Args: estimator (BaseEstimator object): estimator used to calculate the clusters. - fdatagrid (FDataGrd object): contains the samples which are grouped - into different clusters. - fig (figure object): figure over which the graphs are plotted in - case ax is not specified. If None and ax is also None, the figure - is initialized. - axes (list of axes objects): axes over where the graphs are plotted. - If None, see param fig. - n_rows(int): designates the number of rows of the figure to plot the - different dimensions of the image. Only specified if fig and - ax are None. - n_cols(int): designates the number of columns of the figure to plot - the different dimensions of the image. Only specified if fig - and ax are None. - labels (numpy.ndarray, int: (n_samples, dim_codomain)): 2-dimensional - matrix where each row contains the number of cluster cluster - that observation belongs to. - sample_labels (list of str): contains in order the labels of each - sample of the fdatagrid. - cluster_colors (list of colors): contains in order the colors of each - cluster the samples of the fdatagrid are classified into. - cluster_labels (list of str): contains in order the names of each - cluster the samples of the fdatagrid are classified into. - center_colors (list of colors): contains in order the colors of each - centroid of the clusters the samples of the fdatagrid are - classified into. - center_labels list of colors): contains in order the labels of each - centroid of the clusters the samples of the fdatagrid are - classified into. - center_width (int): width of the centroids. - colormap(colormap): colormap from which the colors of the plot are - taken. + """ + msg = ("This %(name)s instance has no attribute \"fit\".") + if not hasattr(estimator, "fit"): + raise AttributeError(msg % {'name': type(estimator).__name__}) - Returns: - (tuple): tuple containing: +def _get_labels(x_label, y_label, title, xlabel_str): + """Sets the arguments *xlabel*, *ylabel*, *title* passed to the plot + functions :func:`plot_cluster_lines + ` and + :func:`plot_cluster_bars + `, + in case they are not set yet. - fig (figure object): figure object in which the graphs are plotted - in case ax is None. + Args: + xlabel (lstr): Label for the x-axes. + ylabel (str): Label for the y-axes. + title (str): Title for the figure where the clustering results are + ploted. + xlabel_str (str): In case xlabel is None, string to use for the labels + in the x-axes. - ax (axes object): axes in which the graphs are plotted. + Returns: + xlabel (str): Labels for the x-axes. + ylabel (str): Labels for the y-axes. + title (str): Title for the figure where the clustering results are + plotted. """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata(fdata, fig, axes, n_rows, n_cols) - - _plot_clustering_checks(estimator, fdata, None, sample_labels, - cluster_colors, cluster_labels, center_colors, - center_labels) - - if sample_labels is None: - sample_labels = [f'$SAMPLE: {i}$' for i in range(fdata.n_samples)] - - if cluster_colors is None: - cluster_colors = colormap( - np.arange(estimator.n_clusters) / (estimator.n_clusters - 1)) - - if cluster_labels is None: - cluster_labels = [ - f'$CLUSTER: {i}$' for i in range(estimator.n_clusters)] - - if center_colors is None: - center_colors = [_darken(c, 0.5) for c in cluster_colors] - - if center_labels is None: - center_labels = [ - f'$CENTER: {i}$' for i in range(estimator.n_clusters)] - - colors_by_cluster = cluster_colors[labels] - - patches = [] - for i in range(estimator.n_clusters): - patches.append( - mpatches.Patch(color=cluster_colors[i], - label=cluster_labels[i])) - - for j in range(fdata.dim_codomain): - for i in range(fdata.n_samples): - axes[j].plot(fdata.grid_points[0], - fdata.data_matrix[i, :, j], - c=colors_by_cluster[i], - label=sample_labels[i]) - for i in range(estimator.n_clusters): - axes[j].plot(fdata.grid_points[0], - estimator.cluster_centers_.data_matrix[i, :, j], - c=center_colors[i], - label=center_labels[i], - linewidth=center_width) - axes[j].legend(handles=patches) - datacursor(formatter='{label}'.format) - - _set_labels(fdata, fig, axes) + if x_label is None: + x_label = xlabel_str - return fig + if y_label is None: + y_label = "Degree of membership" + if title is None: + title = "Degrees of membership of the samples to each cluster" -def plot_clusters(estimator, X, chart=None, fig=None, axes=None, - n_rows=None, n_cols=None, - sample_labels=None, cluster_colors=None, - cluster_labels=None, center_colors=None, - center_labels=None, - center_width=3, - colormap=plt.cm.get_cmap('rainbow')): - """Plot of the FDataGrid samples by clusters. + return x_label, y_label, title - The clusters are calculated with the estimator passed as a parameter. If - the estimator is not fitted, the fit method is called. - Once each sample is assigned a label the plotting can be done. - Each group is assigned a color described in a leglend. +class ClusterPlot(BasePlot): + """ + ClusterPlot class. Args: estimator (BaseEstimator object): estimator used to calculate the @@ -239,85 +165,197 @@ def plot_clusters(estimator, X, chart=None, fig=None, axes=None, center_width (int): width of the centroid curves. colormap(colormap): colormap from which the colors of the plot are taken. Defaults to `rainbow`. + """ - Returns: - (tuple): tuple containing: + def __init__( + self, estimator, fdata, chart=None, fig=None, axes=None, + n_rows=None, n_cols=None, + sample_labels=None, cluster_colors=None, + cluster_labels=None, center_colors=None, + center_labels=None, + center_width=3, + colormap=plt.cm.get_cmap('rainbow'), + ) -> None: + BasePlot.__init__(self) + self.fdata = fdata + self.estimator = estimator + self.sample_labels = sample_labels + self.cluster_colors = cluster_colors + self.cluster_labels = cluster_labels + self.center_colors = center_colors + self.center_labels = center_labels + self.center_width = center_width + self.colormap = colormap + + self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ) -> None: + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + fdata=self.fdata, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) + + self.fig = fig + self.axes = axes + + def n_samples(self) -> int: + return self.fdata.n_samples + + def _plot_clusters(self): + """Implementation of the plot of the FDataGrid samples by clusters. + + Args: + estimator (BaseEstimator object): estimator used to calculate the + clusters. + fdatagrid (FDataGrd object): contains the samples which are grouped + into different clusters. + fig (figure object): figure over which the graphs are plotted in + case ax is not specified. If None and ax is also None, the figure + is initialized. + axes (list of axes objects): axes over where the graphs are plotted. + If None, see param fig. + n_rows(int): designates the number of rows of the figure to plot the + different dimensions of the image. Only specified if fig and + ax are None. + n_cols(int): designates the number of columns of the figure to plot + the different dimensions of the image. Only specified if fig + and ax are None. + labels (numpy.ndarray, int: (n_samples, dim_codomain)): 2-dimensional + matrix where each row contains the number of cluster cluster + that observation belongs to. + sample_labels (list of str): contains in order the labels of each + sample of the fdatagrid. + cluster_colors (list of colors): contains in order the colors of each + cluster the samples of the fdatagrid are classified into. + cluster_labels (list of str): contains in order the names of each + cluster the samples of the fdatagrid are classified into. + center_colors (list of colors): contains in order the colors of each + centroid of the clusters the samples of the fdatagrid are + classified into. + center_labels list of colors): contains in order the labels of each + centroid of the clusters the samples of the fdatagrid are + classified into. + center_width (int): width of the centroids. + colormap(colormap): colormap from which the colors of the plot are + taken. + + Returns: + (tuple): tuple containing: fig (figure object): figure object in which the graphs are plotted - in case ax is None. + in case ax is None. ax (axes object): axes in which the graphs are plotted. - """ - _check_if_estimator(estimator) - try: - check_is_fitted(estimator) - estimator._check_test_data(X) - except NotFittedError: - estimator.fit(X) - - if isinstance(estimator, FuzzyCMeans): - labels = np.argmax(estimator.labels_, axis=1) - else: - labels = estimator.labels_ - - return _plot_clusters(estimator=estimator, fdata=X, - fig=fig, axes=axes, n_rows=n_rows, n_cols=n_cols, - labels=labels, sample_labels=sample_labels, - cluster_colors=cluster_colors, - cluster_labels=cluster_labels, - center_colors=center_colors, - center_labels=center_labels, - center_width=center_width, - colormap=colormap) - - -def _get_labels(x_label, y_label, title, xlabel_str): - """Sets the arguments *xlabel*, *ylabel*, *title* passed to the plot - functions :func:`plot_cluster_lines - ` and - :func:`plot_cluster_bars - `, - in case they are not set yet. - - Args: - xlabel (lstr): Label for the x-axes. - ylabel (str): Label for the y-axes. - title (str): Title for the figure where the clustering results are - ploted. - xlabel_str (str): In case xlabel is None, string to use for the labels - in the x-axes. - - Returns: - xlabel (str): Labels for the x-axes. - ylabel (str): Labels for the y-axes. - title (str): Title for the figure where the clustering results are - plotted. - """ - if x_label is None: - x_label = xlabel_str + """ + _plot_clustering_checks( + self.estimator, + self.fdata, None, + self.sample_labels, + self.cluster_colors, + self.cluster_labels, + self.center_colors, + self.center_labels, + ) + + if self.sample_labels is None: + self.sample_labels = [f'$SAMPLE: {i}$' for i in range(self.fdata.n_samples)] + + if self.cluster_colors is None: + self.cluster_colors = self.colormap( + np.arange(self.estimator.n_clusters) / (self.estimator.n_clusters - 1)) + + if self.cluster_labels is None: + cluster_labels = [ + f'$CLUSTER: {i}$' for i in range(self.estimator.n_clusters)] + + if self.center_colors is None: + self.center_colors = [_darken(c, 0.5) for c in self.cluster_colors] + + if self.center_labels is None: + self.center_labels = [ + f'$CENTER: {i}$' for i in range(self.estimator.n_clusters)] + + colors_by_cluster = self.cluster_colors[self.labels] + + patches = [] + for i in range(self.estimator.n_clusters): + patches.append( + mpatches.Patch(color=self.cluster_colors[i], + label=self.cluster_labels[i])) + + for j in range(self.fdata.dim_codomain): + for i in range(self.fdata.n_samples): + self.artists = np.append(self.artists, self.axes[j].plot( + self.fdata.grid_points[0], + self.fdata.data_matrix[i, :, j], + c=colors_by_cluster[i], + label=self.sample_labels[i] + )) + for i in range(self.estimator.n_clusters): + self.axes[j].plot(self.fdata.grid_points[0], + self.estimator.cluster_centers_.data_matrix[ + i, + :, + j, + ], + c=self.center_colors[i], + label=self.center_labels[i], + linewidth=self.center_width) + self.axes[j].legend(handles=patches) + datacursor(formatter='{label}'.format) + + _set_labels(self.fdata, self.fig, self.axes) + + return self.fig + + def plot(self): + """Plot of the FDataGrid samples by clusters. + + The clusters are calculated with the estimator passed as a parameter. If + the estimator is not fitted, the fit method is called. + Once each sample is assigned a label the plotting can be done. + Each group is assigned a color described in a legend. + + Returns: + (tuple): tuple containing: + + fig (figure object): figure object in which the graphs are plotted + in case ax is None. - if y_label is None: - y_label = "Degree of membership" + ax (axes object): axes in which the graphs are plotted. + """ - if title is None: - title = "Degrees of membership of the samples to each cluster" + self.artists = np.array([]) - return x_label, y_label, title + _check_if_estimator(self.estimator) + try: + check_is_fitted(self.estimator) + self.estimator._check_test_data(self.fdata) + except NotFittedError: + self.estimator.fit(self.fdata) + if isinstance(self.estimator, FuzzyCMeans): + self.labels = np.argmax(self.estimator.labels_, axis=1) + else: + self.labels = self.estimator.labels_ -def plot_cluster_lines(estimator, X, chart=None, fig=None, axes=None, - sample_colors=None, sample_labels=None, - cluster_labels=None, - colormap=plt.cm.get_cmap('rainbow'), - x_label=None, y_label=None, title=None): - """Implementation of the plotting of the results of the - :func:`Fuzzy K-Means ` method. + return self._plot_clusters() - A kind of Parallel Coordinates plot is generated in this function with the - membership values obtained from the algorithm. A line is plotted for each - sample with the values for each cluster. See `Clustering Example - <../auto_examples/plot_clustering.html>`_. +class ClusterPlotLines(BasePlot): + """ + Class ClusterPlotLines. Args: estimator (BaseEstimator object): estimator used to calculate the @@ -343,184 +381,265 @@ def plot_cluster_lines(estimator, X, chart=None, fig=None, axes=None, title (str, optional): Title for the figure where the clustering results are ploted. Defaults to "Degrees of membership of the samples to each cluster". - - Returns: - (tuple): tuple containing: - - fig (figure object): figure object in which the graphs are plotted - in case ax is None. - - ax (axes object): axes in which the graphs are plotted. - - """ - fdata = X - _check_if_estimator(estimator) - - if not isinstance(estimator, FuzzyCMeans): - raise ValueError("The estimator must be a FuzzyCMeans object.") - - try: - check_is_fitted(estimator) - estimator._check_test_data(X) - except NotFittedError: - estimator.fit(X) - - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout(fig, axes) - - _plot_clustering_checks(estimator, fdata, sample_colors, sample_labels, - None, cluster_labels, None, None) - - x_label, y_label, title = _get_labels(x_label, y_label, title, "Cluster") - - if sample_colors is None: - cluster_colors = colormap(np.arange(estimator.n_clusters) / - (estimator.n_clusters - 1)) - labels_by_cluster = np.argmax(estimator.labels_, axis=1) - sample_colors = cluster_colors[labels_by_cluster] - - if sample_labels is None: - sample_labels = ['$SAMPLE: {}$'.format(i) for i in - range(fdata.n_samples)] - - if cluster_labels is None: - cluster_labels = ['${}$'.format(i) for i in - range(estimator.n_clusters)] - - axes[0].get_xaxis().set_major_locator(MaxNLocator(integer=True)) - for i in range(fdata.n_samples): - axes[0].plot(np.arange(estimator.n_clusters), - estimator.labels_[i], - label=sample_labels[i], - color=sample_colors[i]) - axes[0].set_xticks(np.arange(estimator.n_clusters)) - axes[0].set_xticklabels(cluster_labels) - axes[0].set_xlabel(x_label) - axes[0].set_ylabel(y_label) - datacursor(formatter='{label}'.format) - - fig.suptitle(title) - return fig - - -def plot_cluster_bars(estimator, X, chart=None, fig=None, axes=None, sort=-1, - sample_labels=None, cluster_colors=None, - cluster_labels=None, colormap=plt.cm.get_cmap('rainbow'), - x_label=None, y_label=None, title=None): - """Implementation of the plotting of the results of the - :func:`Fuzzy K-Means ` method. - - - A kind of barplot is generated in this function with the membership values - obtained from the algorithm. There is a bar for each sample whose height is - 1 (the sum of the membership values of a sample add to 1), and the part - proportional to each cluster is coloured with the corresponding color. See - `Clustering Example <../auto_examples/plot_clustering.html>`_. - - Args: - estimator (BaseEstimator object): estimator used to calculate the - clusters. - X (FDataGrd object): contains the samples which are grouped - into different clusters. - fig (figure object, optional): figure over which the graph is - plotted in case ax is not specified. If None and ax is also None, - the figure is initialized. - axes (axes object, optional): axes over where the graph is plotted. - If None, see param fig. - sort(int, optional): Number in the range [-1, n_clusters) designating - the cluster whose labels are sorted in a decrementing order. - Defaults to -1, in this case, no sorting is done. - sample_labels (list of str, optional): contains in order the labels - of each sample of the fdatagrid. - cluster_labels (list of str, optional): contains in order the names of - each cluster the samples of the fdatagrid are classified into. - cluster_colors (list of colors): contains in order the colors of each - cluster the samples of the fdatagrid are classified into. - colormap(colormap, optional): colormap from which the colors of the - plot are taken. - x_label (str): Label for the x-axis. Defaults to "Sample". - y_label (str): Label for the y-axis. Defaults to - "Degree of membership". - title (str): Title for the figure where the clustering results are - plotted. - Defaults to "Degrees of membership of the samples to each cluster". - - Returns: - (tuple): tuple containing: - - fig (figure object): figure object in which the graph is plotted - in case ax is None. - - ax (axis object): axis in which the graph is plotted. - """ - fdata = X - _check_if_estimator(estimator) - - if not isinstance(estimator, FuzzyCMeans): - raise ValueError("The estimator must be a FuzzyCMeans object.") - try: - check_is_fitted(estimator) - estimator._check_test_data(X) - except NotFittedError: - estimator.fit(X) + def __init__( + self, + estimator, fdata, chart=None, fig=None, axes=None, + sample_colors=None, sample_labels=None, + cluster_labels=None, + colormap=plt.cm.get_cmap('rainbow'), + x_label=None, y_label=None, title=None, + ) -> None: + BasePlot.__init__(self) + self.fdata = fdata + self.estimator = estimator + self.sample_labels = sample_labels + self.sample_colors = sample_colors + self.cluster_labels = cluster_labels + self.x_label = x_label + self.y_label = y_label + self.title = title + self.colormap = colormap + + self.set_figure_and_axes(chart, fig, axes) + + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + ) -> None: + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout(fig, axes) + + self.fig = fig + self.axes = axes + + def n_samples(self) -> int: + return self.fdata.n_samples + + def plot(self): + """Implementation of the plotting of the results of the + :func:`Fuzzy K-Means ` method. + + + A kind of Parallel Coordinates plot is generated in this function with the + membership values obtained from the algorithm. A line is plotted for each + sample with the values for each cluster. See `Clustering Example + <../auto_examples/plot_clustering.html>`_. + + Returns: + (tuple): tuple containing: + + fig (figure object): figure object in which the graphs are plotted + in case ax is None. + + ax (axes object): axes in which the graphs are plotted. + + """ + self.artists = np.array([]) + + _check_if_estimator(self.estimator) + + if not isinstance(self.estimator, FuzzyCMeans): + raise ValueError("The estimator must be a FuzzyCMeans object.") + + try: + check_is_fitted(self.estimator) + self.estimator._check_test_data(self.fdata) + except NotFittedError: + self.estimator.fit(self.fdata) + + _plot_clustering_checks(self.estimator, self.fdata, self.sample_colors, self.sample_labels, + None, self.cluster_labels, None, None) + + self.x_label, self.y_label, self.title = _get_labels(self.x_label, self.y_label, self.title, "Cluster") + + if self.sample_colors is None: + self.cluster_colors = self.colormap(np.arange(self.estimator.n_clusters) / + (self.estimator.n_clusters - 1)) + labels_by_cluster = np.argmax(self.estimator.labels_, axis=1) + self.sample_colors = self.cluster_colors[labels_by_cluster] + + if self.sample_labels is None: + self.sample_labels = ['$SAMPLE: {}$'.format(i) for i in + range(self.fdata.n_samples)] + + if self.cluster_labels is None: + self.cluster_labels = ['${}$'.format(i) for i in + range(self.estimator.n_clusters)] + + self.axes[0].get_xaxis().set_major_locator(MaxNLocator(integer=True)) + for i in range(self.fdata.n_samples): + self.artists = np.append(self.artists, self.axes[0].plot( + np.arange(self.estimator.n_clusters), + self.estimator.labels_[i], + label=self.sample_labels[i], + color=self.sample_colors[i], + )) + self.axes[0].set_xticks(np.arange(self.estimator.n_clusters)) + self.axes[0].set_xticklabels(self.cluster_labels) + self.axes[0].set_xlabel(self.x_label) + self.axes[0].set_ylabel(self.y_label) + datacursor(formatter='{label}'.format) - if sort < -1 or sort >= estimator.n_clusters: - raise ValueError( - "The sorting number must belong to the interval [-1, n_clusters)") - - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout(fig, axes) - - _plot_clustering_checks(estimator, fdata, None, sample_labels, - cluster_colors, cluster_labels, None, None) - - x_label, y_label, title = _get_labels(x_label, y_label, title, "Sample") - - if sample_labels is None: - sample_labels = np.arange(fdata.n_samples) - - if cluster_colors is None: - cluster_colors = colormap( - np.arange(estimator.n_clusters) / (estimator.n_clusters - 1)) - - if cluster_labels is None: - cluster_labels = [f'$CLUSTER: {i}$' for i in - range(estimator.n_clusters)] - - patches = [] - for i in range(estimator.n_clusters): - patches.append( - mpatches.Patch(color=cluster_colors[i], label=cluster_labels[i])) - - if sort != -1: - sample_indices = np.argsort(-estimator.labels_[:, sort]) - sample_labels = np.copy(sample_labels[sample_indices]) - labels_dim = np.copy(estimator.labels_[sample_indices]) - - temp_labels = np.copy(labels_dim[:, 0]) - labels_dim[:, 0] = labels_dim[:, sort] - labels_dim[:, sort] = temp_labels - - temp_color = np.copy(cluster_colors[0]) - cluster_colors[0] = cluster_colors[sort] - cluster_colors[sort] = temp_color - else: - labels_dim = estimator.labels_ - - conc = np.zeros((fdata.n_samples, 1)) - labels_dim = np.concatenate((conc, labels_dim), axis=-1) - for i in range(estimator.n_clusters): - axes[0].bar(np.arange(fdata.n_samples), - labels_dim[:, i + 1], - bottom=np.sum(labels_dim[:, :(i + 1)], axis=1), - color=cluster_colors[i]) - axes[0].set_xticks(np.arange(fdata.n_samples)) - axes[0].set_xticklabels(sample_labels) - axes[0].set_xlabel(x_label) - axes[0].set_ylabel(y_label) - axes[0].legend(handles=patches) - - fig.suptitle(title) - return fig + self.fig.suptitle(self.title) + return self.fig + + +class ClusterPlotBars(BasePlot): + + def __init__( + self, + estimator, fdata, chart=None, fig=None, axes=None, sort=-1, + sample_labels=None, cluster_colors=None, + cluster_labels=None, colormap=plt.cm.get_cmap('rainbow'), + x_label=None, y_label=None, title=None, + ) -> None: + BasePlot.__init__(self) + self.fdata = fdata + self.estimator = estimator + self.sample_labels = sample_labels + self.cluster_colors = cluster_colors + self.cluster_labels = cluster_labels + self.x_label = x_label + self.y_label = y_label + self.title = title + self.colormap = colormap + self.sort = sort + + self.set_figure_and_axes(chart, fig, axes) + + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + ) -> None: + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout(fig, axes) + + self.fig = fig + self.axes = axes + + def n_samples(self) -> int: + return self.fdata.n_samples + + def plot(self): + """Implementation of the plotting of the results of the + :func:`Fuzzy K-Means ` method. + + + A kind of barplot is generated in this function with the membership values + obtained from the algorithm. There is a bar for each sample whose height is + 1 (the sum of the membership values of a sample add to 1), and the part + proportional to each cluster is coloured with the corresponding color. See + `Clustering Example <../auto_examples/plot_clustering.html>`_. + + Args: + estimator (BaseEstimator object): estimator used to calculate the + clusters. + X (FDataGrd object): contains the samples which are grouped + into different clusters. + fig (figure object, optional): figure over which the graph is + plotted in case ax is not specified. If None and ax is also None, + the figure is initialized. + axes (axes object, optional): axes over where the graph is plotted. + If None, see param fig. + sort(int, optional): Number in the range [-1, n_clusters) designating + the cluster whose labels are sorted in a decrementing order. + Defaults to -1, in this case, no sorting is done. + sample_labels (list of str, optional): contains in order the labels + of each sample of the fdatagrid. + cluster_labels (list of str, optional): contains in order the names of + each cluster the samples of the fdatagrid are classified into. + cluster_colors (list of colors): contains in order the colors of each + cluster the samples of the fdatagrid are classified into. + colormap(colormap, optional): colormap from which the colors of the + plot are taken. + x_label (str): Label for the x-axis. Defaults to "Sample". + y_label (str): Label for the y-axis. Defaults to + "Degree of membership". + title (str): Title for the figure where the clustering results are + plotted. + Defaults to "Degrees of membership of the samples to each cluster". + + Returns: + (tuple): tuple containing: + + fig (figure object): figure object in which the graph is plotted + in case ax is None. + + ax (axis object): axis in which the graph is plotted. + + """ + self.artists = np.array([]) + + _check_if_estimator(self.estimator) + + if not isinstance(self.estimator, FuzzyCMeans): + raise ValueError("The estimator must be a FuzzyCMeans object.") + + try: + check_is_fitted(self.estimator) + self.estimator._check_test_data(self.fdata) + except NotFittedError: + self.estimator.fit(self.fdata) + + if self.sort < -1 or self.sort >= self.estimator.n_clusters: + raise ValueError( + "The sorting number must belong to the interval [-1, n_clusters)") + + _plot_clustering_checks(self.estimator, self.fdata, None, self.sample_labels, + self.cluster_colors, self.cluster_labels, None, None) + + self.x_label, self.y_label, self.title = _get_labels(self.x_label, self.y_label, self.title, "Sample") + + if self.sample_labels is None: + self.sample_labels = np.arange(self.fdata.n_samples) + + if self.cluster_colors is None: + self.cluster_colors = self.colormap( + np.arange(self.estimator.n_clusters) / (self.estimator.n_clusters - 1)) + + if self.cluster_labels is None: + self.cluster_labels = [f'$CLUSTER: {i}$' for i in + range(self.estimator.n_clusters)] + + patches = [] + for i in range(self.estimator.n_clusters): + patches.append( + mpatches.Patch(color=self.cluster_colors[i], label=self.cluster_labels[i])) + + if self.sort != -1: + sample_indices = np.argsort(-self.estimator.labels_[:, self.sort]) + self.sample_labels = np.copy(self.sample_labels[sample_indices]) + labels_dim = np.copy(self.estimator.labels_[sample_indices]) + + temp_labels = np.copy(labels_dim[:, 0]) + labels_dim[:, 0] = labels_dim[:, self.sort] + labels_dim[:, self.sort] = temp_labels + + temp_color = np.copy(self.cluster_colors[0]) + self.cluster_colors[0] = self.cluster_colors[self.sort] + self.cluster_colors[self.sort] = temp_color + else: + labels_dim = self.estimator.labels_ + + conc = np.zeros((self.fdata.n_samples, 1)) + labels_dim = np.concatenate((conc, labels_dim), axis=-1) + for i in range(self.estimator.n_clusters): + self.x = self.axes[0].bar(np.arange(self.fdata.n_samples), + labels_dim[:, i + 1], + bottom=np.sum(labels_dim[:, :(i + 1)], axis=1), + color=self.cluster_colors[i]) + + self.axes[0].set_xticks(np.arange(self.fdata.n_samples)) + self.axes[0].set_xticklabels(self.sample_labels) + self.axes[0].set_xlabel(self.x_label) + self.axes[0].set_ylabel(self.y_label) + self.axes[0].legend(handles=patches) + + self.fig.suptitle(self.title) + return self.fig From 3d3f50a2473f5dcf858a281bf65b742488a00529 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 12:19:31 +0200 Subject: [PATCH 268/417] fpca ended --- skfda/exploratory/visualization/fpca.py | 137 +++++++++++++++--------- 1 file changed, 86 insertions(+), 51 deletions(-) diff --git a/skfda/exploratory/visualization/fpca.py b/skfda/exploratory/visualization/fpca.py index 5edbc7fa8..ee54e5bda 100644 --- a/skfda/exploratory/visualization/fpca.py +++ b/skfda/exploratory/visualization/fpca.py @@ -1,19 +1,19 @@ -from matplotlib import pyplot as plt -from skfda.representation import FDataGrid, FDataBasis, FData +from typing import Optional, Union + +import numpy as np +from matplotlib.axes import Axes +from matplotlib.figure import Figure + from skfda.exploratory.visualization._utils import _get_figure_and_axes +from skfda.exploratory.visualization.representation import GraphPlot +from skfda.representation import FData +from ._baseplot import BasePlot -def plot_fpca_perturbation_graphs(mean, components, multiple, - chart = None, - fig=None, - axes=None, - **kwargs): - """ Plots the perturbation graphs for the principal components. - The perturbations are defined as variations over the mean. Adding a multiple - of the principal component curve to the mean function results in the - positive perturbation and subtracting a multiple of the principal component - curve results in the negative perturbation. For each principal component - curve passed, a subplot with the mean and the perturbations is shown. + +class FPCAPlot(BasePlot): + """ + FPCAPlot visualization. Args: mean (FDataGrid or FDataBasis): @@ -29,51 +29,86 @@ def plot_fpca_perturbation_graphs(mean, components, multiple, be initialized axes (axes object, optional): axis over where the graph is plotted. If None, see param fig. - - Returns: - (FDataGrid or FDataBasis): this contains the mean function followed - by the positive perturbation and the negative perturbation. """ - if len(mean) > 1: - mean = mean.mean() + def __init__( + self, + mean, components, multiple, + chart=None, + fig=None, + axes=None, + ): + BasePlot.__init__(self) + self.mean = mean + self.components = components + self.multiple = multiple + + self.set_figure_and_axes(chart, fig, axes) - fig, axes = _get_figure_and_axes(chart, fig, axes) + def plot(self, **kwargs): + """ + Plots the perturbation graphs for the principal components. + The perturbations are defined as variations over the mean. Adding a multiple + of the principal component curve to the mean function results in the + positive perturbation and subtracting a multiple of the principal component + curve results in the negative perturbation. For each principal component + curve passed, a subplot with the mean and the perturbations is shown. - if not axes: - axes = fig.subplots(nrows=len(components)) + Returns: + (FDataGrid or FDataBasis): this contains the mean function followed + by the positive perturbation and the negative perturbation. + """ - for i in range(len(axes)): - aux = _get_component_perturbations(mean, components, i, multiple) - aux.plot(axes[i], **kwargs) - axes[i].set_title('Principal component ' + str(i + 1)) + if len(self.mean) > 1: + self.mean = self.mean.mean() - return fig + for i in range(len(self.axes)): + aux = self._get_component_perturbations(i) + gp = GraphPlot(fdata=aux, axes=self.axes[i]).plot(**kwargs) + self.artists = gp.artists + self.axes[i].set_title('Principal component ' + str(i + 1)) + return self.fig -def _get_component_perturbations(mean, components, index=0, multiple=30): - """ Computes the perturbations over the mean function of a principal - component at a certain index. + def n_samples(self) -> int: + return self.fdata.n_samples - Args: - X (FDataGrid or FDataBasis): - the functional data object from which we obtain the mean - index (int): - index of the component for which we want to compute the - perturbations - multiple (float): - multiple of the principal component curve to be added or - subtracted. + def set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + ) -> None: + fig, axes = _get_figure_and_axes(chart, fig, axes) + if not axes: + axes = fig.subplots(nrows=len(self.components)) - Returns: - (FDataGrid or FDataBasis): this contains the mean function followed - by the positive perturbation and the negative perturbation. - """ - if not isinstance(mean, FData): - raise AttributeError("X must be a FData object") - perturbations = mean.copy() - perturbations = perturbations.concatenate( - perturbations[0] + multiple * components[index]) - perturbations = perturbations.concatenate( - perturbations[0] - multiple * components[index]) - return perturbations + self.fig = fig + self.axes = axes + + def _get_component_perturbations(self, index=0): + """ Computes the perturbations over the mean function of a principal + component at a certain index. + + Args: + X (FDataGrid or FDataBasis): + the functional data object from which we obtain the mean + index (int): + index of the component for which we want to compute the + perturbations + multiple (float): + multiple of the principal component curve to be added or + subtracted. + + Returns: + (FDataGrid or FDataBasis): this contains the mean function followed + by the positive perturbation and the negative perturbation. + """ + if not isinstance(self.mean, FData): + raise AttributeError("X must be a FData object") + perturbations = self.mean.copy() + perturbations = perturbations.concatenate( + perturbations[0] + self.multiple * self.components[index]) + perturbations = perturbations.concatenate( + perturbations[0] - self.multiple * self.components[index]) + return perturbations From 032659f2ec29b40ef4eb13f8f07f653845add678 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 22 Apr 2021 12:21:55 +0200 Subject: [PATCH 269/417] multiple display ended with all functions --- skfda/exploratory/visualization/__init__.py | 1 + .../visualization/_multiple_display.py | 532 ++++++++++++++++++ 2 files changed, 533 insertions(+) create mode 100644 skfda/exploratory/visualization/_multiple_display.py diff --git a/skfda/exploratory/visualization/__init__.py b/skfda/exploratory/visualization/__init__.py index 3cf3414d5..e5c6b4c6a 100644 --- a/skfda/exploratory/visualization/__init__.py +++ b/skfda/exploratory/visualization/__init__.py @@ -5,6 +5,7 @@ from ._boxplot import Boxplot, SurfaceBoxplot from ._ddplot import DDPlot from ._magnitude_shape_plot import MagnitudeShapePlot +from ._multiple_display import MultipleDisplay from ._outliergram import Outliergram from ._parametric_plot import ParametricPlot from .fpca import plot_fpca_perturbation_graphs diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py new file mode 100644 index 000000000..b4e836883 --- /dev/null +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -0,0 +1,532 @@ +import collections +import copy +from typing import List, Optional, Sequence, Union + +import numpy as np +from matplotlib.artist import Artist +from matplotlib.axes import Axes +from matplotlib.backend_bases import Event +from matplotlib.figure import Figure +from matplotlib.widgets import Slider, Widget + +from ._baseplot import BasePlot +from ._utils import _get_axes_shape, _get_figure_and_axes, _set_figure_layout + + +class MultipleDisplay: + """ + MultipleDisplay class used to combine and interact with plots. + + This module is used to combine different BasePlot objects that + represent the same curves or surfaces, and represent them + together in the same figure. Besides this, it includes + the functionality necessary to interact with the graphics + by clicking the points, hovering over them... Picking the points allow + us to see our selected function standing out among the others in all + the axes. It is also possible to add widgets to interact with the + plots. + Args: + displays: baseplot objects that will be plotted in the fig. + criteria: sequence of criteria used to order the points in the + slider widget. The size should be equal to sliders, as each + criterion is for one slider. + sliders: sequence of widgets that will be plotted. + label_sliders: label of each of the sliders. + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + Attributes: + point_clicked: artist object containing the last point clicked. + num_graphs: number of graphs that will be plotted. + length_data: number of instances or curves of the different displays. + clicked: boolean indicating whether a point has being clicked. + index_clicked: index of the function selected with the interactive + module or widgets. + tags: list of tags for each ax, that contain the information printed + while hovering. + previous_hovered: artist object containing of the last point hovered. + is_updating: boolean value that determines wheter a widget + is being updated. + """ + + def __init__( + self, + displays: Union[BasePlot, List[BasePlot]], + criteria: Union[ + Sequence[float], + Sequence[Sequence[float]], + None, + ] = None, + sliders: Union[Widget, Sequence[Widget], None] = None, + label_sliders: Union[ + str, + Sequence[str], + None, + ] = None, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Optional[Sequence[Axes]] = None, + ): + if isinstance(displays, BasePlot): + self.displays = [copy.copy(displays)] + else: + self.displays = [] + for d in displays: + self.displays.append(copy.copy(d)) + self.point_clicked: Artist = None + self.num_graphs = len(self.displays) + self.length_data = self.displays[0].n_samples() + self.sliders = [] + self.criteria = [] + self.clicked = False + self.index_clicked = -1 + self.tags = [] + self.previous_hovered = None + self.fig = fig + self.axes = axes + self.chart = chart + self.is_updating = False + + if criteria is not None and sliders is not None: + if isinstance(sliders, collections.Iterable): + if len(criteria) == len(sliders): + self.create_sliders(criteria, sliders, label_sliders) + else: + raise ValueError( + "Size of criteria, and sliders should be equal.", + ) + else: + self.create_sliders(criteria, sliders, label_sliders) + else: + self.init_axes() + + def plot( + self, + ): + """ + Plot Multiple Display method. + + Plot the different BasePlot objects and widgets selected. + Activates the interactivity functionality of clicking and + hovering points. When clicking a point, the rest will be + made partially transparent in all the corresponding graphs. + Returns: + fig: figure object in which the displays and + widgets will be plotted. + """ + if self.num_graphs > 1: + for d in self.displays[1:]: + if d.n_samples() != self.length_data: + raise ValueError( + "Length of some data sets are not equal ", + ) + + for disp, ax in zip(self.displays, self.axes): + ax.clear() + disp.set_figure_and_axes(axes=ax) + disp.plot() + self.tags.append( + ax.annotate( + "", + xy=(0, 0), + xytext=(20, 20), + textcoords="offset points", + bbox=dict(boxstyle="round", fc="w"), + arrowprops=dict(arrowstyle="->"), + ), + ) + + self.fig.canvas.mpl_connect('motion_notify_event', self.hover) + self.fig.canvas.mpl_connect('pick_event', self.pick) + + for i in range(self.num_graphs): + self.tags[i].set_visible(False) + + self.fig.suptitle("Multiple display") + self.fig.tight_layout() + + for slider in self.sliders: + slider.on_changed(self.value_updated) + + return self.fig + + def update_annot(self, index_ax: int, index_point: int): + """ + Auxiliary method used to update the hovering annotations. + + Method used to update the annotations that appear while + hovering a scattered point. The annotations indicate + the index and coordinates of the point hovered. + Args: + index_ax: index of the ax being hovered. + index_point: index of the point being hovered. + """ + xdata_graph = self.previous_hovered.get_offsets()[0][0] + ydata_graph = self.previous_hovered.get_offsets()[0][1] + xdata_aprox = "{0:.2f}".format(xdata_graph) + ydata_aprox = "{0:.2f}".format(ydata_graph) + + current_tag = self.tags[index_ax] + current_tag.xy = (xdata_graph, ydata_graph) + current_tag.xy = (xdata_graph, ydata_graph) + text = "".join([ + str(index_point), + ": (", + str(xdata_aprox), + ", ", + str(ydata_aprox), + ")", + ]) + + x_axis = self.axes[index_ax].get_xlim() + self.x_axis = x_axis + self.xdata_graph = xdata_graph + if (xdata_graph - x_axis[0]) > (x_axis[1] - xdata_graph): + current_tag.set_position((-80, 20)) + else: + current_tag.set_position((20, 20)) + + current_tag.set_text(text) + current_tag.get_bbox_patch().set_facecolor(color='red') + intensity = 0.4 + current_tag.get_bbox_patch().set_alpha(intensity) + + def hover(self, event: Event): + """ + Activate the annotation when hovering a point. + + Callback method that activates the annotation when hovering + a specific point in a graph. The annotation is a description + of the point containing its coordinates. + Args: + event: event object containing the artist of the point + hovered. + """ + index_axis = -1 + + for i in range(self.num_graphs): + if event.inaxes == self.axes[i]: + index_axis = i + + if len(self.displays[i].artists) == 0: + return + + for j in range(len(self.displays[i].artists)): + artist = self.displays[i].artists[j] + if isinstance(artist, List): + return + is_graph, ind = artist.contains(event) + if is_graph and self.previous_hovered == artist: + return + if is_graph: + self.previous_hovered = artist + index_point = j + break + break + + for k in range(self.num_graphs, len(self.axes)): + if event.inaxes == self.axes[k]: + self.widget_index = k - self.num_graphs + + if index_axis != -1 and is_graph: + self.update_annot(index_axis, index_point) + self.tags[index_axis].set_visible(True) + self.fig.canvas.draw_idle() + elif self.tags[index_axis].get_visible(): + self.previous_hovered = None + self.tags[index_axis].set_visible(False) + self.fig.canvas.draw_idle() + + def init_axes( + self, + extra: int = 0, + ) -> None: + """ + Initialize the axes and figure. + + Args: + extra: integer indicating the extra axes needed due to the + necessity for them to plot the sliders. + """ + widget_aspect = 1 / 4 + fig, axes = _get_figure_and_axes(self.chart, self.fig, self.axes) + if len(axes) != 0 and len(axes) != (self.num_graphs + extra): + raise ValueError("Invalid number of axes.") + + n_rows, n_cols = _get_axes_shape(self.num_graphs + extra) + + number_axes = n_rows * n_cols + fig, axes = _set_figure_layout( + fig=fig, axes=axes, n_axes=self.num_graphs + extra, + ) + + for i in range(self.num_graphs, number_axes): + if i >= self.num_graphs + extra: + axes[i].set_visible(False) + else: + axes[i].set_box_aspect(widget_aspect) + + self.fig = fig + self.axes = axes + + def pick(self, event: Event) -> None: + """ + Activate interactive functionality when picking a point. + + Callback method that is activated when a point is picked. + If no point was clicked previously, all the points but the + one selected will be more transparent in all the graphs. + If a point was clicked already, this new point will be the + one highlighted among the rest. If the same point is clicked, + the initial state of the graphics is restored. + Args: + event: event object containing the artist of the point + picked. + """ + if self.clicked: + self.point_clicked = event.artist + self.change_points_intensity() + self.clicked = False + elif self.point_clicked is None: + self.point_clicked = event.artist + self.update_index_display_picked() + self.reduce_points_intensity() + elif self.point_clicked == event.artist: + self.restore_points_intensity() + else: + self.point_clicked = event.artist + self.change_points_intensity() + + def update_index_display_picked(self) -> None: + """Update the index corresponding to the display picked.""" + for i in range(self.num_graphs): + if self.axes[i] == self.point_clicked.axes: + self.index_clicked = np.where( + self.displays[i].artists == self.point_clicked, + )[0][0] + return + + def reduce_points_intensity(self) -> None: + """Reduce the transparency of all the points but the selected one.""" + for i in range(self.length_data): + if i != self.index_clicked: + for d in self.displays: + if len(d.artists) != 0: + if isinstance(d.artists[i], list): + d.artists[i][0].set_alpha(0.1) + else: + d.artists[i].set_alpha(0.1) + + self.is_updating = True + for j in range(len(self.sliders)): + val_widget = list(self.criteria[j]).index(self.index_clicked) + self.sliders[j].set_val(val_widget) + self.is_updating = False + + def restore_points_intensity(self) -> None: + """Restore the original transparency of all the points.""" + for i in range(self.length_data): + for d in self.displays: + if len(d.artists) != 0: + if isinstance(d.artists[i], list): + d.artists[i][0].set_alpha(1) + else: + d.artists[i].set_alpha(1) + self.point_clicked = None + self.index_clicked = -1 + + self.is_updating = True + for j in range(len(self.sliders)): + self.sliders[j].set_val(0) + self.is_updating = False + + def change_points_intensity( + self, + old_index: Union[int, None] = None, + ) -> None: + """ + Change the intensity of the points. + + Changes the intensity of the points, the highlighted one now + will be the selected one and the one with old_index with have + its transparency increased. + Args: + old_index: index of the last point clicked, as it should + reduce its transparency. + """ + if old_index is None: + old_index = self.index_clicked + self.update_index_display_picked() + + if self.index_clicked == old_index: + self.restore_points_intensity() + return + + for i in range(self.length_data): + if i == self.index_clicked: + intensity = 1 + elif i == old_index: + intensity = 0.1 + else: + intensity = -1 + + if intensity != -1: + self.change_display_intensity(i, intensity) + + self.is_updating = True + for j in range(len(self.sliders)): + val_widget = list(self.criteria[j]).index(self.index_clicked) + self.sliders[j].set_val(val_widget) + self.is_updating = False + + def change_display_intensity(self, index: int, intensity: int) -> None: + """ + Change the intensity of the point selected by index in every display. + + Args: + index: index of the last point clicked, as it should + reduce its transparency. + intensity: new intensity of the points. + """ + for d in self.displays: + if len(d.artists) != 0: + if isinstance(d.artists[index], list): + d.artists[index][0].set_alpha(intensity) + else: + d.artists[index].set_alpha(intensity) + + def create_sliders( + self, + criteria: Union[Sequence[float], Sequence[Sequence[float]]], + sliders: Union[Widget, Sequence[Widget]], + label_sliders: Union[str, Sequence[str], None] = None, + ) -> None: + """ + Create the sliders with the criteria selected. + + Args: + criteria: different criterion for each of the sliders. + sliders: widget types. + label_sliders: sequence of the names of each slider. + """ + if isinstance(criteria[0], collections.Iterable): + for c in criteria: + if len(c) != self.length_data: + raise ValueError( + "Slider criteria should be of the same size as data", + ) + + self.init_axes(extra=len(criteria)) + + if label_sliders is None: + for i in range(len(criteria)): + self.add_slider(i, criteria[i], sliders[i]) + elif isinstance(label_sliders, str): + raise ValueError( + "Incorrect length of slider labels.", + ) + elif len(label_sliders) == len(sliders): + for k in range(len(criteria)): + self.add_slider( + k, + criteria[k], + sliders[k], + label_sliders[k], + ) + else: + raise ValueError( + "Incorrect length of slider labels.", + ) + elif ( + len(criteria) == self.length_data + and (isinstance(label_sliders, str) or label_sliders is None) + ): + self.init_axes(extra=1) + self.add_slider(0, criteria, sliders, label_sliders) + else: + raise ValueError( + "Slider criteria should be of the same size as data", + ) + + def add_slider( + self, + ind_ax: int, + criterion: Sequence[float], + widget_func: Widget = Slider, + label_slider: Optional[str] = None, + ) -> None: + """ + Add the slider to the MultipleDisplay object. + + Args: + ind_ax: index of the selected ax for the widget. + criterion: criterion used for the slider. + widget_func: widget type. + label_slider: names of the slider. + """ + if label_slider is None: + full_desc = "".join(["Filter (", str(ind_ax), ")"]) + else: + full_desc = label_slider + self.sliders.append( + widget_func( + self.fig.axes[self.num_graphs + ind_ax], + full_desc, + valmin=0, + valmax=self.length_data - 1, + valinit=0, + ), + ) + + self.fig.axes[self.num_graphs + ind_ax].annotate( + '0', + xy=(0, -0.5), + xycoords='axes fraction', + annotation_clip=False, + ) + self.fig.axes[self.num_graphs + ind_ax].annotate( + str(self.length_data - 1), + xy=(0.95, -0.5), + xycoords='axes fraction', + annotation_clip=False, + ) + + dic = dict(zip(criterion, range(self.length_data))) + order_dic = collections.OrderedDict(sorted(dic.items())) + self.criteria.append(order_dic.values()) + + def value_updated(self, value: int) -> None: + """ + Update the graphs when a widget is clicked. + + Args: + value: current value of the widget. + """ + # Used to avoid entering in an etern loop + if self.is_updating is True: + return + self.is_updating = True + + # Make the changes of the slider discrete + index = int(int(value / 0.5) * 0.5) + old_index = self.index_clicked + self.index_clicked = list(self.criteria[self.widget_index])[index] + self.sliders[self.widget_index].valtext.set_text('{}'.format(index)) + + # Update the other sliders values + for i in range(len(self.sliders)): + if i != self.widget_index: + val_widget = list(self.criteria[i]).index(self.index_clicked) + self.sliders[i].set_val(val_widget) + + self.is_updating = False + + self.clicked = True + if old_index == -1: + self.reduce_points_intensity() + else: + self.change_points_intensity(old_index=old_index) From 35342fa591c46a64d348464d903d295de25d9691 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 25 Apr 2021 19:40:23 +0200 Subject: [PATCH 270/417] solved errors --- skfda/exploratory/visualization/_outliergram.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index bf19950d6..5c480b213 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -8,6 +8,7 @@ """ from typing import Optional, Sequence, Union +from matplotlib.artist import Artist import numpy as np import scipy.integrate as integrate @@ -107,15 +108,15 @@ def plot( fig: figure object in which the depths will be scattered. """ - self.artists = np.array([]) + self.artists = np.zeros(self.n_samples(), dtype=Artist) self.axScatter = self.axes[0] for i in range(self.mei.size): - self.artists = np.append(self.artists, self.axScatter.scatter( + self.artists[i] = self.axScatter.scatter( self.mei[i], self.mbd[i], picker=2, - )) + ) self.axScatter.plot( self.mei_ordered, @@ -131,8 +132,7 @@ def plot( # Set labels of graph if self.fdata.dataset_name is not None: self.axScatter.set_title(self.fdata.dataset_name) - else: - self.axScatter.set_title("Outliergram") + self.axScatter.set_xlabel("MEI") self.axScatter.set_ylabel("MBD") self.axScatter.set_xlim([0, 1]) From 69b76f349e53c5e6cb7adaf88daa1bb5986b5421 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 25 Apr 2021 19:41:04 +0200 Subject: [PATCH 271/417] np.ndarray is the type of artists --- skfda/exploratory/visualization/_baseplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index f7e68fc5e..5741afef1 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -34,7 +34,7 @@ def __init__( fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, ) -> None: - self.artists: np.array + self.artists: np.ndarray self.fig = fig self.axes = axes From df0676faabd4f4fa68b2fa18f3b6956e82e9b9a7 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 25 Apr 2021 19:44:10 +0200 Subject: [PATCH 272/417] solved import order --- skfda/exploratory/visualization/_outliergram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 5c480b213..a5f96c26f 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -8,10 +8,10 @@ """ from typing import Optional, Sequence, Union -from matplotlib.artist import Artist import numpy as np import scipy.integrate as integrate +from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure from scipy.stats import rankdata From 22496f90c9f8348ee2526f2fcc9bb071a9b1e3fe Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 26 Apr 2021 14:40:44 +0200 Subject: [PATCH 273/417] errors solved, now vectorized and private --- .../exploratory/visualization/_outliergram.py | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index a5f96c26f..3502fbf97 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -82,7 +82,7 @@ def __init__( "The size of mbd and mei should be the same.", ) self.n = self.mbd.size - distances, parable = self.compute_distances() + distances, parable = self.__compute_distances() self.distances = distances mei_ordered = self.mei[:] mei_ordered, parable = ( @@ -90,9 +90,9 @@ def __init__( ) self.parable = parable self.mei_ordered = mei_ordered - self.compute_outliergram() + self.__compute_outliergram() - self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + self.__set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, @@ -174,33 +174,28 @@ def modified_epigraph_index_list(self) -> np.ndarray: return integrand.flatten() - def compute_distances(self) -> np.ndarray: + def __compute_distances(self) -> np.ndarray: """ Calculate the distances of each point towards the parabola. The distances can be calculated with function: d_i = a_0 + a_1* mei_i + n^2* a_2* mei_i^2 - mb_i. """ - distances = [] - parable = [] a_0 = -2 / (self.n * (self.n - 1)) a_1 = (2 * (self.n + 1)) / (self.n - 1) a_2 = a_0 - for mbd_item, mei_item in zip(self.mbd, self.mei): - p_i = ( - a_0 + a_1 * mei_item + pow(self.n, 2) * a_2 * pow(mei_item, 2) - ) - distances.append(p_i - mbd_item) - parable.append(p_i) + parable = ( + a_0 + a_1 * self.mei + pow(self.n, 2) * a_2 * pow(self.mei, 2) + ) + distances = parable - self.mbd + return distances, parable - def compute_outliergram(self) -> None: + def __compute_outliergram(self) -> None: """Compute the parabola under which the outliers lie.""" - percentile_25 = 25 - percentile_75 = 75 - first_quartile = np.percentile(self.distances, percentile_25) - third_quartile = np.percentile(self.distances, percentile_75) + first_quartile = np.percentile(self.distances, 25) + third_quartile = np.percentile(self.distances, 75) iqr = third_quartile - first_quartile self.shifted_parable = self.parable - (third_quartile + iqr) @@ -208,7 +203,7 @@ def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples - def set_figure_and_axes( + def __set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From 13cc6055768f673aa13edd1fbda78d028d6619ce Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 26 Apr 2021 14:48:26 +0200 Subject: [PATCH 274/417] solved errors --- skfda/exploratory/visualization/_ddplot.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 246b31173..d6648d4fa 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -8,6 +8,7 @@ from typing import Optional, TypeVar, Union import numpy as np +from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -85,22 +86,23 @@ def plot( fig (figure object): figure object in which the depths will be scattered. """ - self.artists = np.array([]) + self.artists = np.zeros(self.n_samples(), dtype=Artist) margin = 0.025 width_aux_line = 0.35 color_aux_line = "gray" ax = self.axes[0] - for d1, d2 in zip(self.depth_dist1, self.depth_dist2): - self.artists = np.append(self.artists, ax.scatter( - d1, - d2, + for i in range(len(self.depth_dist1)): + self.artists[i] = ax.scatter( + self.depth_dist1[i], + self.depth_dist1[i], picker=2, - )) + ) # Set labels of graph - ax.set_title("DDPlot") + if self.fdata.dataset_name is not None: + ax.set_title(self.fdata.dataset_name) ax.set_xlabel("X depth") ax.set_ylabel("Y depth") ax.set_xlim( From 29315eb1b538712ab4888e9c78af8acde688b9f9 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 26 Apr 2021 14:49:23 +0200 Subject: [PATCH 275/417] set_figure_and_axes is now private --- skfda/exploratory/visualization/_ddplot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index d6648d4fa..454c4b00c 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -70,7 +70,7 @@ def __init__( self.depth_dist2 = self.depth_method( self.fdata, distribution=dist2, ) - self.set_figure_and_axes(chart, fig, axes) + self.__set_figure_and_axes(chart, fig, axes) def plot( self, @@ -129,7 +129,7 @@ def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples - def set_figure_and_axes( + def __set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From 4d3d45b51595501052997ef8b5fa016f6c14ada3 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 26 Apr 2021 21:48:54 +0200 Subject: [PATCH 276/417] method private --- skfda/exploratory/visualization/_ddplot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 454c4b00c..603ab792f 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -70,7 +70,7 @@ def __init__( self.depth_dist2 = self.depth_method( self.fdata, distribution=dist2, ) - self.__set_figure_and_axes(chart, fig, axes) + self._set_figure_and_axes(chart, fig, axes) def plot( self, @@ -129,7 +129,7 @@ def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples - def __set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From c95718a8a78fab19d3a2a3e59c0b6ce2421f14c5 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 26 Apr 2021 22:49:49 +0200 Subject: [PATCH 277/417] corrected setfigureaxes --- skfda/exploratory/visualization/_outliergram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 3502fbf97..da4ded8c7 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -203,7 +203,7 @@ def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples - def __set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From 85bb90679d22cbce649ba1325d4a767f2c7ee059 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 26 Apr 2021 22:52:46 +0200 Subject: [PATCH 278/417] correction --- skfda/exploratory/visualization/_outliergram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index da4ded8c7..6138ca0f1 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -92,7 +92,7 @@ def __init__( self.mei_ordered = mei_ordered self.__compute_outliergram() - self.__set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, From d5774b0569511f6ad8a1ac8cccbd3c37df77813a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 27 Apr 2021 12:58:48 +0200 Subject: [PATCH 279/417] methods now protected --- skfda/exploratory/visualization/_outliergram.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 6138ca0f1..ecd42ca04 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -63,8 +63,8 @@ class Outliergram(BasePlot): def __init__( self, fdata: FDataGrid, - *, chart: Union[Figure, Axes, None] = None, + *, fig: Optional[Figure] = None, axes: Optional[Axes] = None, n_rows: Optional[int] = None, @@ -82,7 +82,7 @@ def __init__( "The size of mbd and mei should be the same.", ) self.n = self.mbd.size - distances, parable = self.__compute_distances() + distances, parable = self._compute_distances() self.distances = distances mei_ordered = self.mei[:] mei_ordered, parable = ( @@ -90,7 +90,7 @@ def __init__( ) self.parable = parable self.mei_ordered = mei_ordered - self.__compute_outliergram() + self._compute_outliergram() self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) @@ -174,7 +174,7 @@ def modified_epigraph_index_list(self) -> np.ndarray: return integrand.flatten() - def __compute_distances(self) -> np.ndarray: + def _compute_distances(self) -> np.ndarray: """ Calculate the distances of each point towards the parabola. @@ -192,10 +192,10 @@ def __compute_distances(self) -> np.ndarray: return distances, parable - def __compute_outliergram(self) -> None: + def _compute_outliergram(self) -> None: """Compute the parabola under which the outliers lie.""" - first_quartile = np.percentile(self.distances, 25) - third_quartile = np.percentile(self.distances, 75) + first_quartile = np.percentile(self.distances, 25) # noqa: WPS432 + third_quartile = np.percentile(self.distances, 75) # noqa: WPS432 iqr = third_quartile - first_quartile self.shifted_parable = self.parable - (third_quartile + iqr) From c8679cf0096634a29921127b2a849b5448b9a82e Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 28 Apr 2021 21:49:48 +0200 Subject: [PATCH 280/417] outliergram corrected --- skfda/exploratory/visualization/_outliergram.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index ecd42ca04..9dabc517b 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -77,16 +77,15 @@ def __init__( self.depth.fit(fdata) self.mbd = self.depth(fdata) self.mei = self.modified_epigraph_index_list() - if self.mbd.size != self.mei.size: + if len(self.mbd) != len(self.mei): raise ValueError( "The size of mbd and mei should be the same.", ) self.n = self.mbd.size distances, parable = self._compute_distances() self.distances = distances - mei_ordered = self.mei[:] mei_ordered, parable = ( - list(el) for el in zip(*sorted(zip(mei_ordered, parable))) + list(el) for el in zip(*sorted(zip(self.mei, parable))) ) self.parable = parable self.mei_ordered = mei_ordered From f3fb6f5cbd22eee7466a4fc66492a41a65cef5e7 Mon Sep 17 00:00:00 2001 From: mellamansanchez <38490771+mellamansanchez@users.noreply.github.com> Date: Wed, 28 Apr 2021 21:52:17 +0200 Subject: [PATCH 281/417] Update skfda/exploratory/visualization/_ddplot.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Ramos Carreño --- skfda/exploratory/visualization/_ddplot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 603ab792f..c81f52ac8 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -55,8 +55,9 @@ def __init__( fdata: T, dist1: T, dist2: T, - depth_method: Depth[T], chart: Union[Figure, Axes, None] = None, + *, + depth_method: Depth[T], fig: Optional[Figure] = None, axes: Optional[Axes] = None, ) -> None: From 1f6cf626eb97b4f960af5120096a003d8afda229 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 28 Apr 2021 22:00:08 +0200 Subject: [PATCH 282/417] msplot --- skfda/exploratory/visualization/_magnitude_shape_plot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 489d0c244..0ea65b516 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -166,8 +166,8 @@ class MagnitudeShapePlot(BasePlot): def __init__( self, fdatagrid: FDataGrid, - *, chart: Union[Figure, Axes, None] = None, + *, fig: Optional[Figure] = None, axes: Optional[Sequence[Axes]] = None, **kwargs, @@ -233,7 +233,7 @@ def __init__( self.ylabel = 'VO' self.title = 'MS-Plot' - self.set_figure_and_axes(chart, fig, axes) + self._set_figure_and_axes(chart, fig, axes) @property def fdatagrid(self): @@ -325,7 +325,7 @@ def plot(self): def n_samples(self) -> int: return self.fdatagrid.n_samples - def set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From 109ffad0da9b19c8973eb2d428759bdadc493843 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 28 Apr 2021 22:45:55 +0200 Subject: [PATCH 283/417] _set_figure_and_axes --- skfda/exploratory/visualization/clustering.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index 9a87e0b75..1fa23a459 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -187,9 +187,9 @@ def __init__( self.center_width = center_width self.colormap = colormap - self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) - def set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, @@ -402,9 +402,9 @@ def __init__( self.title = title self.colormap = colormap - self.set_figure_and_axes(chart, fig, axes) + self._set_figure_and_axes(chart, fig, axes) - def set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, @@ -509,9 +509,9 @@ def __init__( self.colormap = colormap self.sort = sort - self.set_figure_and_axes(chart, fig, axes) + self._set_figure_and_axes(chart, fig, axes) - def set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From 1861302d8d172226225560a0c4a063eb143108d8 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 28 Apr 2021 22:46:49 +0200 Subject: [PATCH 284/417] _set_figure_and_axes --- skfda/exploratory/visualization/_parametric_plot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index b793874dc..4fab77506 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -59,7 +59,7 @@ def __init__( else: self.fd_final = self.fdata1 - self.set_figure_and_axes(chart, fig, axes) + self._set_figure_and_axes(chart, fig, axes) def plot( self, @@ -122,7 +122,7 @@ def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fd_final.n_samples - def set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From 5ca935dff4795a6924725d0c76098cbd098afde7 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 28 Apr 2021 22:47:23 +0200 Subject: [PATCH 285/417] _set_figure_and_axes --- skfda/exploratory/visualization/fpca.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/fpca.py b/skfda/exploratory/visualization/fpca.py index ee54e5bda..18166b590 100644 --- a/skfda/exploratory/visualization/fpca.py +++ b/skfda/exploratory/visualization/fpca.py @@ -43,7 +43,7 @@ def __init__( self.components = components self.multiple = multiple - self.set_figure_and_axes(chart, fig, axes) + self._set_figure_and_axes(chart, fig, axes) def plot(self, **kwargs): """ @@ -73,7 +73,7 @@ def plot(self, **kwargs): def n_samples(self) -> int: return self.fdata.n_samples - def set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From 0f511d91f7f7c081f34efd968bea329704fac39b Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 28 Apr 2021 22:49:08 +0200 Subject: [PATCH 286/417] _set_figure_and_axes --- skfda/exploratory/visualization/_boxplot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index e019a8fe9..d4701443b 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -323,7 +323,7 @@ def __init__( self.mediancol = "black" self._show_full_outliers = False - self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) @property def fdatagrid(self): @@ -359,7 +359,7 @@ def show_full_outliers(self, boolean): raise ValueError("show_full_outliers must be boolean type") self._show_full_outliers = boolean - def set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From 6770a7e3df39efbd8fb1d526b21fcfc3aea2d08b Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 28 Apr 2021 22:49:31 +0200 Subject: [PATCH 287/417] _set_figure_and_axes --- skfda/exploratory/visualization/representation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index d2e9c3457..168d904e9 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -210,7 +210,7 @@ def __init__( ) else: self.gradient_list = [] - self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, @@ -363,7 +363,7 @@ def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples - def set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, @@ -439,7 +439,7 @@ def __init__( BasePlot.__init__(self) self.fdata = fdata self.grid_points = grid_points - self.set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, @@ -554,7 +554,7 @@ def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples - def set_figure_and_axes( + def _set_figure_and_axes( self, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, From a631fbe7eb2e2ad8807cbc68064a85fcf5906d2a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 28 Apr 2021 22:57:57 +0200 Subject: [PATCH 288/417] indexing done --- skfda/exploratory/visualization/_outliergram.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 9dabc517b..8fb88c3fb 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -84,11 +84,9 @@ def __init__( self.n = self.mbd.size distances, parable = self._compute_distances() self.distances = distances - mei_ordered, parable = ( - list(el) for el in zip(*sorted(zip(self.mei, parable))) - ) - self.parable = parable - self.mei_ordered = mei_ordered + indices = np.argsort(self.mei) + self.parable = parable[indices] + self.mei_ordered = self.mei[indices] self._compute_outliergram() self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) From f336bb30639105165f61543c917dbfae0d96e261 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 28 Apr 2021 23:05:43 +0200 Subject: [PATCH 289/417] whitespace --- skfda/exploratory/visualization/_outliergram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 8fb88c3fb..366ce196a 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -84,7 +84,7 @@ def __init__( self.n = self.mbd.size distances, parable = self._compute_distances() self.distances = distances - indices = np.argsort(self.mei) + indices = np.argsort(self.mei) self.parable = parable[indices] self.mei_ordered = self.mei[indices] self._compute_outliergram() From fcf0dfe9489dc0c474ab7630ba11557e0e3d217a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 2 May 2021 18:56:16 +0200 Subject: [PATCH 290/417] solved n --- skfda/exploratory/visualization/_outliergram.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 366ce196a..f90a9cd0e 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -81,7 +81,6 @@ def __init__( raise ValueError( "The size of mbd and mei should be the same.", ) - self.n = self.mbd.size distances, parable = self._compute_distances() self.distances = distances indices = np.argsort(self.mei) @@ -178,12 +177,12 @@ def _compute_distances(self) -> np.ndarray: The distances can be calculated with function: d_i = a_0 + a_1* mei_i + n^2* a_2* mei_i^2 - mb_i. """ - a_0 = -2 / (self.n * (self.n - 1)) - a_1 = (2 * (self.n + 1)) / (self.n - 1) + a_0 = -2 / (self.n_samples() * (self.n_samples() - 1)) + a_1 = (2 * (self.n_samples() + 1)) / (self.n_samples() - 1) a_2 = a_0 parable = ( - a_0 + a_1 * self.mei + pow(self.n, 2) * a_2 * pow(self.mei, 2) + a_0 + a_1 * self.mei + pow(self.n_samples(), 2) * a_2 * pow(self.mei, 2) ) distances = parable - self.mbd From b089bff3f31e2059e5f92b6c0a2399d4816cdf1c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 2 May 2021 18:58:55 +0200 Subject: [PATCH 291/417] solved too long line --- skfda/exploratory/visualization/_outliergram.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index f90a9cd0e..f1fabbb0d 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -182,7 +182,8 @@ def _compute_distances(self) -> np.ndarray: a_2 = a_0 parable = ( - a_0 + a_1 * self.mei + pow(self.n_samples(), 2) * a_2 * pow(self.mei, 2) + a_0 + a_1 * self.mei + + pow(self.n_samples(), 2) * a_2 * pow(self.mei, 2) ) distances = parable - self.mbd From 08fad6468d6e506c79e826af0217736699b55af9 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 2 May 2021 19:07:18 +0200 Subject: [PATCH 292/417] solved error at scattering --- skfda/exploratory/visualization/_ddplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index c81f52ac8..4102cb1d7 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -97,7 +97,7 @@ def plot( for i in range(len(self.depth_dist1)): self.artists[i] = ax.scatter( self.depth_dist1[i], - self.depth_dist1[i], + self.depth_dist2[i], picker=2, ) From ff2359cd3e1c962929027ea88397d3e059457fb6 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 2 May 2021 19:33:30 +0200 Subject: [PATCH 293/417] using pickradius --- skfda/exploratory/visualization/_ddplot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 4102cb1d7..740a9d74b 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -98,7 +98,8 @@ def plot( self.artists[i] = ax.scatter( self.depth_dist1[i], self.depth_dist2[i], - picker=2, + picker=True, + pickradius=2, ) # Set labels of graph From 7c768f3c1a55972071e06d06299a8fb62a4e1e6e Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 2 May 2021 19:38:58 +0200 Subject: [PATCH 294/417] using pickerradius --- skfda/exploratory/visualization/representation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 168d904e9..afb93f7e1 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -519,7 +519,8 @@ def plot( self.grid_points[0], evaluated_points[j, ..., i].T, **color_dict, - picker=2, + picker=True, + pickradius=2, **kwargs, ), ) @@ -541,7 +542,8 @@ def plot( Y, evaluated_points[h, ..., k].T, **color_dict, - picker=2, + picker=True, + pickradius=2, **kwargs, ), ) From 612a12457061b08d712e8e2d44a6ca15d5056fed Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 2 May 2021 19:42:53 +0200 Subject: [PATCH 295/417] pick radius corrected --- skfda/exploratory/visualization/_magnitude_shape_plot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 0ea65b516..21b4475de 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -313,7 +313,8 @@ def plot(self): self.points[:, 0].ravel()[i], self.points[:, 1].ravel()[i], color=colors_rgba[i], - picker=2, + picker=True, + pickradius=2, )) self.axes[0].set_xlabel(self.xlabel) From 9cd208c1f54fe974fceea9c1facbe019ba57261d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 2 May 2021 20:06:09 +0200 Subject: [PATCH 296/417] style corrections --- .../visualization/_multiple_display.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index b4e836883..d20204be0 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -7,6 +7,7 @@ from matplotlib.axes import Axes from matplotlib.backend_bases import Event from matplotlib.figure import Figure +from matplotlib.text import Annotation from matplotlib.widgets import Slider, Widget from ._baseplot import BasePlot @@ -49,7 +50,7 @@ class MultipleDisplay: tags: list of tags for each ax, that contain the information printed while hovering. previous_hovered: artist object containing of the last point hovered. - is_updating: boolean value that determines wheter a widget + is_updating: boolean value that determines whether a widget is being updated. """ @@ -80,11 +81,11 @@ def __init__( self.point_clicked: Artist = None self.num_graphs = len(self.displays) self.length_data = self.displays[0].n_samples() - self.sliders = [] - self.criteria = [] + self.sliders: Sequence[Widget] = [] + self.criteria: Sequence[List[int]] = [] self.clicked = False self.index_clicked = -1 - self.tags = [] + self.tags: Sequence[Annotation] = [] self.previous_hovered = None self.fig = fig self.axes = axes @@ -127,7 +128,7 @@ def plot( for disp, ax in zip(self.displays, self.axes): ax.clear() - disp.set_figure_and_axes(axes=ax) + disp._set_figure_and_axes(axes=ax) disp.plot() self.tags.append( ax.annotate( @@ -474,7 +475,7 @@ def add_slider( full_desc = label_slider self.sliders.append( widget_func( - self.fig.axes[self.num_graphs + ind_ax], + self.axes[self.num_graphs + ind_ax], full_desc, valmin=0, valmax=self.length_data - 1, @@ -482,13 +483,13 @@ def add_slider( ), ) - self.fig.axes[self.num_graphs + ind_ax].annotate( + self.axes[self.num_graphs + ind_ax].annotate( '0', xy=(0, -0.5), xycoords='axes fraction', annotation_clip=False, ) - self.fig.axes[self.num_graphs + ind_ax].annotate( + self.axes[self.num_graphs + ind_ax].annotate( str(self.length_data - 1), xy=(0.95, -0.5), xycoords='axes fraction', From 37f576174f5415fbb2165cee3c363d8e967fe164 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 2 May 2021 20:19:35 +0200 Subject: [PATCH 297/417] corrected list --- skfda/exploratory/visualization/_multiple_display.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index d20204be0..02b34041f 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -81,11 +81,11 @@ def __init__( self.point_clicked: Artist = None self.num_graphs = len(self.displays) self.length_data = self.displays[0].n_samples() - self.sliders: Sequence[Widget] = [] - self.criteria: Sequence[List[int]] = [] + self.sliders: List[Widget] = [] + self.criteria: List[List[int]] = [] self.clicked = False self.index_clicked = -1 - self.tags: Sequence[Annotation] = [] + self.tags: List[Annotation] = [] self.previous_hovered = None self.fig = fig self.axes = axes From a562618aca3be532e278e8ff95cab82c48100a99 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 2 May 2021 22:29:26 +0200 Subject: [PATCH 298/417] example and errors corrected --- .../visualization/_magnitude_shape_plot.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 21b4475de..f8880ee87 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -10,6 +10,7 @@ import matplotlib import matplotlib.pyplot as plt import numpy as np +from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -18,9 +19,6 @@ from ._baseplot import BasePlot from ._utils import _get_figure_and_axes, _set_figure_layout -__author__ = "Amanda Hernando Bernabé" -__email__ = "amanda.hernando@estudiante.uam.es" - class MagnitudeShapePlot(BasePlot): r"""Implementation of the magnitude-shape plot @@ -109,7 +107,7 @@ class MagnitudeShapePlot(BasePlot): ... [-0.5, -0.5, -0.5, -1, -1, -1]] >>> grid_points = [ 0., 2., 4., 6., 8., 10.] >>> fd = skfda.FDataGrid(data_matrix, grid_points) - >>> MagnitudeShapePlot(fd) + >>> print(MagnitudeShapePlot(fd).__repr__()) MagnitudeShapePlot( FDataGrid=FDataGrid( array([[[ 1. ], @@ -136,20 +134,24 @@ class MagnitudeShapePlot(BasePlot): [-1. ], [-1. ], [-1. ]]]), - grid_points=(array([ 0., 2., 4., 6., 8., 10.]),), + grid_points=(array([ 0., 2., 4., 6., 8., 10.]),), domain_range=((0.0, 10.0),), - ...), + dataset_name=None, + argument_names=(None,), + coordinate_names=(None,), + extrapolation=None, + interpolation=SplineInterpolation(interpolation_order=1, smoothness_parameter=0, monotone=False)), multivariate_depth=None, pointwise_weights=None, alpha=0.993, points=array([[ 1.66666667, 0.12777778], - [ 0. , 0. ], - [-0.8 , 0.17666667], - [-1.74444444, 0.94395062]]), + [ 0. , 0. ], + [-0.8 , 0.17666667], + [-1.74444444, 0.94395062]]), outliers=array([False, False, False, False]), colormap=seismic, color=0.2, - outliercol=(0.8,), + outliercol=0.8, xlabel='MO', ylabel='VO', title='MS-Plot') @@ -301,7 +303,7 @@ def plot(self): """ - self.artists = np.array([]) + self.artists = np.zeros(self.n_samples(), dtype=Artist) colors = np.zeros((self.fdatagrid.n_samples, 4)) colors[np.where(self.outliers == 1)] = self.colormap(self.outliercol) colors[np.where(self.outliers == 0)] = self.colormap(self.color) @@ -309,13 +311,13 @@ def plot(self): colors_rgba = [tuple(i) for i in colors] for i in range(len(self.points[:, 0].ravel())): - self.artists = np.append(self.artists, self.axes[0].scatter( + self.artists[i] = self.axes[0].scatter( self.points[:, 0].ravel()[i], self.points[:, 1].ravel()[i], color=colors_rgba[i], picker=True, pickradius=2, - )) + ) self.axes[0].set_xlabel(self.xlabel) self.axes[0].set_ylabel(self.ylabel) From 658a4ec3d61c27e71ee2f81e73083d27c32daf2a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 3 May 2021 01:22:15 +0200 Subject: [PATCH 299/417] solved example --- skfda/exploratory/visualization/_magnitude_shape_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index f8880ee87..a2170c7a6 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -107,7 +107,7 @@ class MagnitudeShapePlot(BasePlot): ... [-0.5, -0.5, -0.5, -1, -1, -1]] >>> grid_points = [ 0., 2., 4., 6., 8., 10.] >>> fd = skfda.FDataGrid(data_matrix, grid_points) - >>> print(MagnitudeShapePlot(fd).__repr__()) + >>> MagnitudeShapePlot(fd) MagnitudeShapePlot( FDataGrid=FDataGrid( array([[[ 1. ], From 682d27db02eff547c5c5652454c97878dcd3e927 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 3 May 2021 18:19:02 +0200 Subject: [PATCH 300/417] corrected example --- .../visualization/_magnitude_shape_plot.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index a2170c7a6..07a9e0d62 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -136,18 +136,14 @@ class MagnitudeShapePlot(BasePlot): [-1. ]]]), grid_points=(array([ 0., 2., 4., 6., 8., 10.]),), domain_range=((0.0, 10.0),), - dataset_name=None, - argument_names=(None,), - coordinate_names=(None,), - extrapolation=None, - interpolation=SplineInterpolation(interpolation_order=1, smoothness_parameter=0, monotone=False)), + ...), multivariate_depth=None, pointwise_weights=None, alpha=0.993, points=array([[ 1.66666667, 0.12777778], - [ 0. , 0. ], - [-0.8 , 0.17666667], - [-1.74444444, 0.94395062]]), + [ 0. , 0. ], + [-0.8 , 0.17666667], + [-1.74444444, 0.94395062]]), outliers=array([False, False, False, False]), colormap=seismic, color=0.2, From b188d96dd4a70281fb7deb2584c107bc0f793d33 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 3 May 2021 20:15:07 +0200 Subject: [PATCH 301/417] temp changes --- .../visualization/representation.py | 67 ++++++++++--------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index afb93f7e1..4ca76bdce 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -21,6 +21,7 @@ import matplotlib.cm import matplotlib.patches import numpy as np +from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure from typing_extensions import Protocol @@ -272,7 +273,7 @@ def plot( Returns: fig (figure object): figure object in which the graphs are plotted. """ - self.artists = np.array([]) + self.artists = np.zeros(self.n_samples(), dtype=Artist) if domain_range is None: self.domain_range = self.fdata.domain_range @@ -305,17 +306,19 @@ def plot( eval_points = np.linspace(*self.domain_range[0], self.n_points) mat = self.fdata(eval_points) + ind = 0 for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - self.artists = np.append(self.artists, self.axes[i].plot( + self.artists[ind] = self.axes[i].plot( eval_points, mat[j, ..., i].T, **color_dict, **kwargs, - )) + ) + ind += 1 else: @@ -339,20 +342,20 @@ def plot( X, Y = np.meshgrid(x, y, indexing='ij') + ind = 0 for k in range(self.fdata.dim_codomain): for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - self.artists = np.append( - self.artists, self.axes[k].plot_surface( - X, - Y, - Z[h, ..., k], - **color_dict, - **kwargs, - ), + self.artists[ind] = self.axes[k].plot_surface( + X, + Y, + Z[h, ..., k], + **color_dict, + **kwargs, ) + ind += 1 _set_labels(self.fdata, self.fig, self.axes, patches) self.fig.suptitle("GraphPlot") @@ -361,7 +364,7 @@ def plot( def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" - return self.fdata.n_samples + return self.fdata.n_samples * self.fdata.dim_codomain def _set_figure_and_axes( self, @@ -483,7 +486,7 @@ def plot( Returns: fig: figure object in which the graphs are plotted. """ - self.artists = np.array([]) + self.artists = np.zeros(self.n_samples(), dtype=Artist) evaluated_points = None if self.grid_points is None: @@ -509,21 +512,21 @@ def plot( if self.fdata.dim_domain == 1: + ind = 0 for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - self.artists = np.append( - self.artists, self.axes[i].scatter( - self.grid_points[0], - evaluated_points[j, ..., i].T, - **color_dict, - picker=True, - pickradius=2, - **kwargs, - ), + self.artists[ind] = self.axes[i].scatter( + self.grid_points[0], + evaluated_points[j, ..., i].T, + **color_dict, + picker=True, + pickradius=2, + **kwargs, ) + ind += 1 else: @@ -531,22 +534,22 @@ def plot( Y = self.fdata.grid_points[1] X, Y = np.meshgrid(X, Y) + ind = 0 for k in range(self.fdata.dim_codomain): for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - self.artists = np.append( - self.artists, self.axes[k].scatter( - X, - Y, - evaluated_points[h, ..., k].T, - **color_dict, - picker=True, - pickradius=2, - **kwargs, - ), + self.artists = self.axes[k].scatter( + X, + Y, + evaluated_points[h, ..., k].T, + **color_dict, + picker=True, + pickradius=2, + **kwargs, ) + ind += 1 _set_labels(self.fdata, self.fig, self.axes, patches) From faeb2cbb03c80b2b8b1e1967d49fcc6e294dddab Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 15:45:47 +0200 Subject: [PATCH 302/417] solved multiple axes incompatibility --- .../visualization/_multiple_display.py | 116 ++++-- .../visualization/representation.py | 361 +++++++++++------- 2 files changed, 311 insertions(+), 166 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 02b34041f..5856aa8f0 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -79,7 +79,7 @@ def __init__( for d in displays: self.displays.append(copy.copy(d)) self.point_clicked: Artist = None - self.num_graphs = len(self.displays) + self.num_graphs = sum(len(d.axes) for d in self.displays) self.length_data = self.displays[0].n_samples() self.sliders: List[Widget] = [] self.criteria: List[List[int]] = [] @@ -126,12 +126,10 @@ def plot( "Length of some data sets are not equal ", ) - for disp, ax in zip(self.displays, self.axes): - ax.clear() - disp._set_figure_and_axes(axes=ax) - disp.plot() + for i in range(self.num_graphs): + self.axes[i].clear() self.tags.append( - ax.annotate( + self.axes[i].annotate( "", xy=(0, 0), xytext=(20, 20), @@ -141,6 +139,14 @@ def plot( ), ) + int_index = 0 + for disp in self.displays: + axes_needed = len(disp.axes) + end_index = axes_needed + int_index + disp._set_figure_and_axes(axes=self.axes[int_index:end_index]) + disp.plot() + int_index = end_index + self.fig.canvas.mpl_connect('motion_notify_event', self.hover) self.fig.canvas.mpl_connect('pick_event', self.pick) @@ -209,25 +215,37 @@ def hover(self, event: Event): """ index_axis = -1 - for i in range(self.num_graphs): - if event.inaxes == self.axes[i]: - index_axis = i - - if len(self.displays[i].artists) == 0: - return - - for j in range(len(self.displays[i].artists)): - artist = self.displays[i].artists[j] - if isinstance(artist, List): + index = 0 + for d in self.displays: + for i in range(len(d.axes)): + if event.inaxes == d.axes[i]: + index_axis = index + if len(d.artists) == 0: return - is_graph, ind = artist.contains(event) - if is_graph and self.previous_hovered == artist: + + elif ( + isinstance(d.artists[0], List) + or isinstance(d.artists[0][0], List) + ): return - if is_graph: - self.previous_hovered = artist - index_point = j - break - break + + elif isinstance(d.artists[0], Artist): + artists_array = d.artists + elif isinstance(d.artists[0], np.ndarray): + artists_array = d.artists[i] + for j in range(len(artists_array)): + artist = artists_array[j] + is_graph, ind = artist.contains(event) + if is_graph and self.previous_hovered == artist: + return + if is_graph: + self.previous_hovered = artist + index_point = j + break + break + + else: + index += 1 for k in range(self.num_graphs, len(self.axes)): if event.inaxes == self.axes[k]: @@ -304,12 +322,21 @@ def pick(self, event: Event) -> None: def update_index_display_picked(self) -> None: """Update the index corresponding to the display picked.""" - for i in range(self.num_graphs): - if self.axes[i] == self.point_clicked.axes: - self.index_clicked = np.where( - self.displays[i].artists == self.point_clicked, - )[0][0] - return + for d in self.displays: + if isinstance(d.artists[0], Artist): + if d.axes[0] == self.point_clicked.axes: + self.index_clicked = np.where( + d.artists == self.point_clicked, + )[0][0] + return + else: + for i in range(len(d.axes)): + self.x = 0 + if d.axes[i] == self.point_clicked.axes: + self.index_clicked = np.where( + d.artists[i] == self.point_clicked, + )[0][0] + return def reduce_points_intensity(self) -> None: """Reduce the transparency of all the points but the selected one.""" @@ -317,10 +344,16 @@ def reduce_points_intensity(self) -> None: if i != self.index_clicked: for d in self.displays: if len(d.artists) != 0: - if isinstance(d.artists[i], list): + if isinstance(d.artists[0], list): d.artists[i][0].set_alpha(0.1) - else: + elif isinstance(d.artists[0], Artist): d.artists[i].set_alpha(0.1) + else: + for a in d.artists: + if isinstance(a[0], list): + a[i][0].set_alpha(0.1) + elif isinstance(a[0], Artist): + a[i].set_alpha(0.1) self.is_updating = True for j in range(len(self.sliders)): @@ -333,10 +366,17 @@ def restore_points_intensity(self) -> None: for i in range(self.length_data): for d in self.displays: if len(d.artists) != 0: - if isinstance(d.artists[i], list): + if isinstance(d.artists[0], list): d.artists[i][0].set_alpha(1) - else: + elif isinstance(d.artists[0], Artist): d.artists[i].set_alpha(1) + else: + for a in d.artists: + if isinstance(a[0], list): + a[i][0].set_alpha(1) + elif isinstance(a[0], Artist): + a[i].set_alpha(1) + self.point_clicked = None self.index_clicked = -1 @@ -395,10 +435,16 @@ def change_display_intensity(self, index: int, intensity: int) -> None: """ for d in self.displays: if len(d.artists) != 0: - if isinstance(d.artists[index], list): + if isinstance(d.artists[0], list): d.artists[index][0].set_alpha(intensity) - else: + elif isinstance(d.artists[0], Artist): d.artists[index].set_alpha(intensity) + else: + for a in d.artists: + if isinstance(a[0], list): + a[index][0].set_alpha(intensity) + elif isinstance(a[0], Artist): + a[index].set_alpha(intensity) def create_sliders( self, diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index ed6d61d30..0d4be6a76 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -21,6 +21,7 @@ import matplotlib.cm import matplotlib.patches import numpy as np +from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure from typing_extensions import Protocol @@ -29,6 +30,7 @@ from ..._utils import _to_domain_range, constants from ...representation._functional_data import FData from ...representation._typing import DomainRangeLike, GridPointsLike +from ._baseplot import BasePlot from ._utils import ( ColorLike, _get_figure_and_axes, @@ -59,9 +61,9 @@ def _get_color_info( legend: bool = False, kwargs: Any = None, ) -> Tuple[ - Union[ColorLike, None], - Optional[List[matplotlib.patches.Patch] - ]]: + Union[ColorLike, None], + Optional[List[matplotlib.patches.Patch]], +]: patches = None @@ -119,63 +121,89 @@ def _get_color_info( return sample_colors, patches -class GraphPlot: +class GraphPlot(BasePlot): """ - Class used to plot the FDataGrid object graph as hypersurfaces. + Class used to plot the FDatGrid object graph as hypersurfaces. When plotting functional data, we can either choose manually a color, a group of colors for the representations. Besides, we can use a list of variables (depths, scalar regression targets...) can be used as an argument to display the functions wtih a gradient of colors. - Args: fdata: functional data set that we want to plot. - gradient_values: list of real values used to determine the color - in which each of the instances will be plotted. + gradient_color_list: list of real values used to determine the color + in which each of the instances will be plotted. The size max_grad: maximum value that the gradient_list can take, it will be - used to normalize the ``gradient_values``. If not + used to normalize the gradient_color_list in order to get values + thatcan be used in the funcion colormap.__call__(). If not declared it will be initialized to the maximum value of gradient_list min_grad: minimum value that the gradient_list can take, it will be - used to normalize the ``gradient_values``. If not + used to normalize the gradient_color_list in order to get values + thatcan be used in the funcion colormap.__call__(). If not declared it will be initialized to the minimum value of gradient_list. - + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (axis object, optional): axis over where the graphs + are plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + Attributes: + gradient_list: normalization of the values from gradient color_list + that will be used to determine the intensity of the color + each function will have. """ def __init__( self, fdata: FData, - gradient_values: Optional[Sequence[float]] = None, + gradient_color_list: Union[Sequence[float], None] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, ) -> None: + BasePlot.__init__(self) self.fdata = fdata - self.gradient_values = gradient_values - if self.gradient_values is not None: - if len(self.gradient_values) != fdata.n_samples: + self.gradient_color_list = gradient_color_list + if self.gradient_color_list is not None: + if len(self.gradient_color_list) != fdata.n_samples: raise ValueError( - "The length of the gradient color" - "list should be the same as the number" + "The length of the gradient color", + "list should be the same as the number", "of samples in fdata", ) if min_grad is None: - self.min_grad = min(self.gradient_values) + self.min_grad = min(self.gradient_color_list) else: self.min_grad = min_grad if max_grad is None: - self.max_grad = max(self.gradient_values) + self.max_grad = max(self.gradient_color_list) else: self.max_grad = max_grad aux_list = [ grad_color - self.min_grad - for grad_color in self.gradient_values + for grad_color in self.gradient_color_list ] - self.gradient_list = ( + self.gradient_list: Sequence[float] = ( [ aux / (self.max_grad - self.min_grad) for aux in aux_list @@ -183,15 +211,11 @@ def __init__( ) else: self.gradient_list = [] + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - chart: Union[Figure, Axes, None] = None, *, - fig: Optional[Figure] = None, - ax: Optional[Axes] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, n_points: Union[int, Tuple[int, int], None] = None, domain_range: Optional[DomainRangeLike] = None, group: Optional[Sequence[K]] = None, @@ -210,50 +234,35 @@ def plot( visualizations, one that displays the functions without any criteria choosing the colors and a new one that displays the function with a gradient of colors depending on the initial - gradient_values (normalized in gradient_list). - + gradient_color_list (normalized in gradient_list). Args: - chart: figure over - with the graphs are plotted or axis over where the graphs are - plotted. If None and ax is also None, the figure is - initialized. - fig : figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - ax: axis over where the graphs are plotted. If None, see param fig. - n_rows : designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - n_points: Number of points to evaluate in + n_points (int or tuple, optional): Number of points to evaluate in the plot. In case of surfaces a tuple of length 2 can be pased with the number of points to plot in each axis, otherwise the same number of points will be used in the two axes. By default in unidimensional plots will be used 501 points; in surfaces will be used 30 points per axis, wich makes a grid with 900 points. - domain_range: Range where the + domain_range (tuple or list of tuples, optional): Range where the function will be plotted. In objects with unidimensional domain the domain range should be a tuple with the bounds of the interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - group: contains integers from [0 to number of + group (list of int): contains integers from [0 to number of labels) indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors: colors in which groups are + group_colors (list of colors): colors in which groups are represented, there must be one for each group. If None, each group is shown with distict colors in the "Greys" colormap. - group_names: name of each of the groups which appear + group_names (list of str): name of each of the groups which appear in a legend, there must be one for each one. Defaults to None and the legend is not shown. Implies `legend=True`. colormap_name: name of the colormap to be used. By default we will use autumn. - legend: if `True`, show a legend with the groups. If + legend (bool): if `True`, show a legend with the groups. If `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. @@ -261,15 +270,10 @@ def plot( the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the matplotlib.pyplot.plot_surface function. - Returns: - fig: the figure in which the graphs are plotted. - + fig (figure object): figure object in which the graphs are plotted. """ - fig, axes = _get_figure_and_axes(chart, fig, ax) - fig, axes = _set_figure_layout_for_fdata( - self.fdata, fig, axes, n_rows, n_cols, - ) + self.artists = np.zeros(len(self.axes), dtype=np.ndarray) if domain_range is None: self.domain_range = self.fdata.domain_range @@ -303,12 +307,16 @@ def plot( mat = self.fdata(eval_points) for i in range(self.fdata.dim_codomain): + self.artists[i] = np.zeros(self.n_samples(), dtype=Artist) for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - axes[i].plot( - eval_points, mat[j, ..., i].T, **color_dict, **kwargs, + self.artists[i][j] = self.axes[i].plot( + eval_points, + mat[j, ..., i].T, + **color_dict, + **kwargs, ) else: @@ -333,50 +341,124 @@ def plot( X, Y = np.meshgrid(x, y, indexing='ij') + ind = 0 for k in range(self.fdata.dim_codomain): + self.artists[k] = np.zeros(self.n_samples(), dtype=Artist) for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - axes[k].plot_surface( + self.artists[k][h] = self.axes[k].plot_surface( X, Y, Z[h, ..., k], **color_dict, **kwargs, ) + ind += 1 + + _set_labels(self.fdata, self.fig, self.axes, patches) + self.fig.suptitle("GraphPlot") + + return self.fig - _set_labels(self.fdata, fig, axes, patches) + def n_samples(self) -> int: + """Get the number of instances that will be used for interactivity.""" + return self.fdata.n_samples + + def _set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ) -> None: + """ + Initialize the axes and fig of the plot. - return fig + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + """ + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + fdata=self.fdata, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) + self.fig = fig + self.axes = axes -class ScatterPlot: +class ScatterPlot(BasePlot): """ Class used to scatter the FDataGrid object. Args: fdata: functional data set that we want to plot. - grid_points: points to plot. - + grid_points (ndarray): points to plot. + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (axis, optional): axis over where the graphs + are plotted. If None, see param fig. + n_rows (int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. """ def __init__( self, fdata: FData, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, grid_points: Optional[GridPointsLike] = None, ) -> None: + BasePlot.__init__(self) self.fdata = fdata self.grid_points = grid_points + self.evaluated_points = None + if self.grid_points is None: + # This can only be done for FDataGrid + self.grid_points = self.fdata.grid_points + self.evaluated_points = self.fdata.data_matrix + + if self.evaluated_points is None: + self.evaluated_points = self.fdata( + self.grid_points, grid=True, + ) + + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + def plot( self, - chart: Union[Figure, Axes, None] = None, *, - fig: Optional[Figure] = None, - ax: Optional[Axes] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, group: Optional[Sequence[K]] = None, group_colors: Optional[Indexable[K, ColorLike]] = None, @@ -388,65 +470,35 @@ def plot( Scatter FDataGrid object. Args: - chart: figure over with the graphs are plotted or axis - over where the graphs are plotted. If None and ax - is also None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also - None, the figure is initialized. - ax: axis over where the graphs are plotted. If None, see param fig. - n_rows: designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - domain_range: Range where the function will be - plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - group: contains integers from [0 to number of labels) - indicating to which group each sample belongs to. Then, - the samples with the same label are plotted in the same color. - If None, the default value, each sample is plotted in the color - assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors: colors in which groups are represented, - there must be one for each group. If None, each - group is shown with distict colors in the "Greys" colormap. - group_names: name of each of the groups which appear - in a legend, there must be one for each one. Defaults to None - and the legend is not shown. Implies `legend=True`. - legend: if `True`, show a legend with the groups. If - `group_names` is passed, it will be used for finding the names - to display in the legend. Otherwise, the values passed to - `group` will be used. - kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. - + domain_range: Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group: contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors: colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names: name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + legend: if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. + kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. Returns: - fig (figure object): figure object in which the graphs are plotted. - + fig: figure object in which the graphs are plotted. """ - evaluated_points = None - - if self.grid_points is None: - # This can only be done for FDataGrid - self.grid_points = self.fdata.grid_points - evaluated_points = self.fdata.data_matrix - - if evaluated_points is None: - evaluated_points = self.fdata( - self.grid_points, grid=True, - ) - - fig, axes = _get_figure_and_axes(chart, fig, ax) - fig, axes = _set_figure_layout_for_fdata( - self.fdata, fig, axes, n_rows, n_cols, - ) + self.artists = np.zeros(len(self.axes), dtype=np.ndarray) if domain_range is None: self.domain_range = self.fdata.domain_range @@ -462,14 +514,17 @@ def plot( if self.fdata.dim_domain == 1: for i in range(self.fdata.dim_codomain): + self.artists[i] = np.zeros(self.n_samples(), dtype=Artist) for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - axes[i].scatter( + self.artists[i][j] = self.axes[i].scatter( self.grid_points[0], - evaluated_points[j, ..., i].T, + self.evaluated_points[j, ..., i].T, **color_dict, + picker=True, + pickradius=2, **kwargs, ) @@ -480,21 +535,65 @@ def plot( X, Y = np.meshgrid(X, Y) for k in range(self.fdata.dim_codomain): + self.artists[k] = np.zeros(self.n_samples(), dtype=Artist) for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - axes[k].scatter( + self.artists[k][h] = self.axes[k].scatter( X, Y, - evaluated_points[h, ..., k].T, + self.evaluated_points[h, ..., k].T, **color_dict, + picker=True, + pickradius=2, **kwargs, ) - _set_labels(self.fdata, fig, axes, patches) + _set_labels(self.fdata, self.fig, self.axes, patches) - return fig + return self.fig + + def n_samples(self) -> int: + """Get the number of instances that will be used for interactivity.""" + return self.fdata.n_samples + + def _set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ) -> None: + """ + Initialize the axes and fig of the plot. + + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: axis where the graphs are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + """ + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout_for_fdata( + fdata=self.fdata, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) + self.fig = fig + self.axes = axes def set_color_dict( From ca45569594c12a984eda80848ace218ffcce0933 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 16:20:35 +0200 Subject: [PATCH 303/417] changed plot arguments --- .../visualization/representation.py | 173 ++++++++++-------- skfda/representation/_functional_data.py | 2 +- skfda/representation/grid.py | 2 +- 3 files changed, 100 insertions(+), 77 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 4ca76bdce..20221c675 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -176,6 +176,13 @@ def __init__( axes: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, + n_points: Union[int, Tuple[int, int], None] = None, + domain_range: Optional[DomainRangeLike] = None, + group: Optional[Sequence[K]] = None, + group_colors: Optional[Indexable[K, ColorLike]] = None, + group_names: Optional[Indexable[K, str]] = None, + colormap_name: str = 'autumn', + legend: bool = False, ) -> None: BasePlot.__init__(self) self.fdata = fdata @@ -211,18 +218,19 @@ def __init__( ) else: self.gradient_list = [] + + self.n_points = n_points + self.domain_range = domain_range + self.group = group + self.group_colors = group_colors + self.group_names = group_names + self.legend = legend + self.colormap_name = colormap_name + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - *, - n_points: Union[int, Tuple[int, int], None] = None, - domain_range: Optional[DomainRangeLike] = None, - group: Optional[Sequence[K]] = None, - group_colors: Optional[Indexable[K, ColorLike]] = None, - group_names: Optional[Indexable[K, str]] = None, - colormap_name: str = 'autumn', - legend: bool = False, **kwargs: Any, ) -> Figure: """ @@ -273,20 +281,25 @@ def plot( Returns: fig (figure object): figure object in which the graphs are plotted. """ - self.artists = np.zeros(self.n_samples(), dtype=Artist) + self.artists = np.zeros(len(self.axes), dtype=np.ndarray) - if domain_range is None: + if self.domain_range is None: self.domain_range = self.fdata.domain_range else: - self.domain_range = _to_domain_range(domain_range) + self.domain_range = _to_domain_range(self.domain_range) if len(self.gradient_list) == 0: sample_colors, patches = _get_color_info( - self.fdata, group, group_names, group_colors, legend, kwargs, + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, ) else: patches = None - colormap = matplotlib.cm.get_cmap(colormap_name) + colormap = matplotlib.cm.get_cmap(self.colormap_name) colormap = colormap.reversed() sample_colors = [None] * self.fdata.n_samples @@ -299,38 +312,38 @@ def plot( if self.fdata.dim_domain == 1: - if n_points is None: + if self.n_points is None: self.n_points = constants.N_POINTS_UNIDIMENSIONAL_PLOT_MESH # Evaluates the object in a linspace eval_points = np.linspace(*self.domain_range[0], self.n_points) mat = self.fdata(eval_points) - ind = 0 for i in range(self.fdata.dim_codomain): + self.artists[i] = np.zeros(self.n_samples(), dtype=Artist) for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - self.artists[ind] = self.axes[i].plot( + self.artists[i][j] = self.axes[i].plot( eval_points, mat[j, ..., i].T, **color_dict, **kwargs, ) - ind += 1 else: # Selects the number of points - if n_points is None: + if self.n_points is None: n_points_tuple = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) - elif isinstance(n_points, int): - n_points_tuple = (n_points, n_points) - elif len(n_points) != 2: + elif isinstance(self.n_points, int): + n_points_tuple = (self.n_points, self.n_points) + elif len(self.n_points) != 2: raise ValueError( "n_points should be a number or a tuple of " - "length 2, and has length {0}.".format(len(n_points)), + "length 2, and has " + "length {0}.".format(len(self.n_points)), ) # Axes where will be evaluated @@ -344,11 +357,12 @@ def plot( ind = 0 for k in range(self.fdata.dim_codomain): + self.artists[k] = np.zeros(self.n_samples(), dtype=Artist) for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - self.artists[ind] = self.axes[k].plot_surface( + self.artists[k][h] = self.axes[k].plot_surface( X, Y, Z[h, ..., k], @@ -364,7 +378,7 @@ def plot( def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" - return self.fdata.n_samples * self.fdata.dim_codomain + return self.fdata.n_samples def _set_figure_and_axes( self, @@ -426,6 +440,27 @@ class ScatterPlot(BasePlot): n_cols(int, optional): designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. + domain_range: Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group: contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors: colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names: name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + legend: if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. """ def __init__( @@ -438,47 +473,43 @@ def __init__( n_rows: Optional[int] = None, n_cols: Optional[int] = None, grid_points: Optional[GridPointsLike] = None, + domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, + group: Optional[Sequence[K]] = None, + group_colors: Optional[Indexable[K, ColorLike]] = None, + group_names: Optional[Indexable[K, str]] = None, + legend: bool = False, ) -> None: BasePlot.__init__(self) self.fdata = fdata self.grid_points = grid_points + + self.evaluated_points = None + if self.grid_points is None: + # This can only be done for FDataGrid + self.grid_points = self.fdata.grid_points + self.evaluated_points = self.fdata.data_matrix + + if self.evaluated_points is None: + self.evaluated_points = self.fdata( + self.grid_points, grid=True, + ) + + self.domain_range = domain_range + self.group = group + self.group_colors = group_colors + self.group_names = group_names + self.legend = legend + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( - self, - *, - domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, - group: Optional[Sequence[K]] = None, - group_colors: Optional[Indexable[K, ColorLike]] = None, - group_names: Optional[Indexable[K, str]] = None, - legend: bool = False, + self, **kwargs: Any, ) -> Figure: """ Scatter FDataGrid object. Args: - domain_range: Range where the - function will be plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - group: contains integers from [0 to number of - labels) indicating to which group each sample belongs to. Then, - the samples with the same label are plotted in the same color. - If None, the default value, each sample is plotted in the color - assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors: colors in which groups are - represented, there must be one for each group. If None, each - group is shown with distict colors in the "Greys" colormap. - group_names: name of each of the groups which appear - in a legend, there must be one for each one. Defaults to None - and the legend is not shown. Implies `legend=True`. - legend: if `True`, show a legend with the groups. If - `group_names` is passed, it will be used for finding the names - to display in the legend. Otherwise, the values passed to - `group` will be used. kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the @@ -486,47 +517,40 @@ def plot( Returns: fig: figure object in which the graphs are plotted. """ - self.artists = np.zeros(self.n_samples(), dtype=Artist) - evaluated_points = None + self.artists = np.zeros(len(self.axes), dtype=np.ndarray) - if self.grid_points is None: - # This can only be done for FDataGrid - self.grid_points = self.fdata.grid_points - evaluated_points = self.fdata.data_matrix - - if evaluated_points is None: - evaluated_points = self.fdata( - self.grid_points, grid=True, - ) - - if domain_range is None: + if self.domain_range is None: self.domain_range = self.fdata.domain_range else: - self.domain_range = _to_domain_range(domain_range) + self.domain_range = _to_domain_range(self.domain_range) sample_colors, patches = _get_color_info( - self.fdata, group, group_names, group_colors, legend, kwargs, + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, ) color_dict: Mapping[str, Union[ColorLike, None]] = {} if self.fdata.dim_domain == 1: - ind = 0 for i in range(self.fdata.dim_codomain): + self.artists[i] = np.zeros(self.n_samples(), dtype=Artist) for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - self.artists[ind] = self.axes[i].scatter( + self.artists[i][j] = self.axes[i].scatter( self.grid_points[0], - evaluated_points[j, ..., i].T, + self.evaluated_points[j, ..., i].T, **color_dict, picker=True, pickradius=2, **kwargs, ) - ind += 1 else: @@ -534,22 +558,21 @@ def plot( Y = self.fdata.grid_points[1] X, Y = np.meshgrid(X, Y) - ind = 0 for k in range(self.fdata.dim_codomain): + self.artists[k] = np.zeros(self.n_samples(), dtype=Artist) for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - self.artists = self.axes[k].scatter( + self.artists[k][h] = self.axes[k].scatter( X, Y, - evaluated_points[h, ..., k].T, + self.evaluated_points[h, ..., k].T, **color_dict, picker=True, pickradius=2, **kwargs, ) - ind += 1 _set_labels(self.fdata, self.fig, self.axes, patches) diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index f44236a11..3ff844015 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -781,7 +781,7 @@ def plot(self, *args: Any, **kwargs: Any) -> Any: """ from ..exploratory.visualization.representation import GraphPlot - return GraphPlot(fdata=self).plot(*args, **kwargs) + return GraphPlot(fdata=self, *args).plot(**kwargs) @abstractmethod def copy( diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 9622c1007..21abbbff0 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -821,7 +821,7 @@ def scatter(self, *args: Any, **kwargs: Any) -> Figure: """ from ..exploratory.visualization.representation import ScatterPlot - return ScatterPlot(self).plot(*args, **kwargs) + return ScatterPlot(self, *args).plot(**kwargs) def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: """Return the basis representation of the object. From 056dc500872e253527942f7c6f2c6b64cca8ad8f Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 16:24:17 +0200 Subject: [PATCH 304/417] solved whitespace --- skfda/exploratory/visualization/representation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 20221c675..0ba4781cd 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -503,7 +503,7 @@ def __init__( self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( - self, + self, **kwargs: Any, ) -> Figure: """ From 28196a7016095a8920e04936df5d5ee3ab70f6ad Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 16:50:43 +0200 Subject: [PATCH 305/417] solved errrors --- examples/plot_boxplot.py | 2 +- skfda/exploratory/visualization/_boxplot.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/plot_boxplot.py b/examples/plot_boxplot.py index 7e5832f1b..3fc1d1cb8 100644 --- a/examples/plot_boxplot.py +++ b/examples/plot_boxplot.py @@ -92,7 +92,7 @@ # previous one. fdBoxplot = Boxplot( - fd_temperatures, depth_method=ModifiedBandDepth(), factor=0.4) + fd_temperatures, depth_method=ModifiedBandDepth, factor=0.4) fdBoxplot.show_full_outliers = True fdBoxplot.plot() diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index d4701443b..59b8455d0 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -11,9 +11,12 @@ import matplotlib import matplotlib.pyplot as plt import numpy as np +from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure +from skfda.exploratory.depth.multivariate import Depth + from ... import FDataGrid from ..depth import ModifiedBandDepth from ..outliers import _envelopes @@ -245,11 +248,11 @@ class Boxplot(FDataBoxplot, BasePlot): def __init__( self, fdatagrid: FDataGrid, - depth_method = ModifiedBandDepth(), + depth_method: Depth = ModifiedBandDepth, prob: Sequence[float] = [0.5], factor: float = 1.5, - *, chart: Union[Figure, Axes, None] = None, + *, fig: Optional[Figure] = None, axes: Optional[Sequence[Axes]] = None, n_rows: Optional[int] = None, @@ -286,7 +289,8 @@ def __init__( self._envelopes = [None] * len(prob) - depth = depth_method(fdatagrid) + depth_func = depth_method() + depth = depth_func(fdatagrid) indices_descending_depth = (-depth).argsort(axis=0) # The median is the deepest curve @@ -403,7 +407,7 @@ def plot(self): """ - self.artists = np.array([]) + self.artists = np.zeros(self.n_samples(), dtype=Artist) tones = np.linspace(0.1, 1.0, len(self._prob) + 1, endpoint=False)[1:] color = self.colormap(tones) From 4e3f9cd12f25b7a8086daa7492f16161920ef88c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 17:00:22 +0200 Subject: [PATCH 306/417] changes --- examples/plot_boxplot.py | 2 +- skfda/exploratory/visualization/_boxplot.py | 32 ++++++++++----------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/plot_boxplot.py b/examples/plot_boxplot.py index 3fc1d1cb8..6a0ecac06 100644 --- a/examples/plot_boxplot.py +++ b/examples/plot_boxplot.py @@ -106,7 +106,7 @@ # :func:`~skfda.exploratory.depth.IntegratedDepth` is used and the 25% and # 75% central regions are specified. -fdBoxplot = Boxplot(fd_temperatures, depth_method=IntegratedDepth(), +fdBoxplot = Boxplot(fd_temperatures, depth_method=IntegratedDepth, prob=[0.75, 0.5, 0.25]) fdBoxplot.plot() diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 59b8455d0..7985a051f 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -262,15 +262,28 @@ def __init__( Args: fdatagrid (FDataGrid): Object containing the data. - depth_method (:ref:`depth measure `, optional): - Method used to order the data. Defaults to :func:`modified - band depth + depth_method: Method used to order the data. + Defaults to :func:`modified band depth `. prob (list of float, optional): List with float numbers (in the range from 1 to 0) that indicate which central regions to represent. Defaults to [0.5] which represents the 50% central region. factor (double): Number used to calculate the outlying envelope. + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig (figure object, optional): figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes (list of axis objects, optional): axis over where the graphs + are plotted. If None, see param fig. + n_rows(int, optional): designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols(int, optional): designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. """ FDataBoxplot.__init__(self, factor) @@ -389,19 +402,6 @@ def plot(self): """Visualization of the functional boxplot of the fdatagrid (dim_domain=1). - Args: - fig (figure object, optional): figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs - are plotted. If None, see param fig. - n_rows(int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols(int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - Returns: fig (figure): figure object in which the graphs are plotted. From 9a4e428611fdd109b4f0f854f114924653ca0da3 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 19:02:37 +0200 Subject: [PATCH 307/417] corrected parametric plot --- skfda/exploratory/visualization/_parametric_plot.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 4fab77506..394e636e6 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -9,6 +9,7 @@ from typing import Optional, Sequence, Union import numpy as np +from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -76,7 +77,7 @@ def plot( fig: figure object in which the ParametricPlot graph will be plotted. """ - self.artists = np.array([]) + self.artists = np.zeros(self.n_samples(), dtype=Artist) if ( self.fd_final.dim_domain == 1 @@ -89,12 +90,12 @@ def plot( self.axes = axes ax = self.axes[0] - for data_matrix in self.fd_final.data_matrix: - self.artists = np.append(self.artists, ax.plot( - data_matrix[:, 0].tolist(), - data_matrix[:, 1].tolist(), + for i in range(self.fd_final.n_samples): + self.artists[i] = ax.plot( + self.fd_final.data_matrix[i][:, 0].tolist(), + self.fd_final.data_matrix[i][:, 1].tolist(), **kwargs, - )) + ) else: raise ValueError( "Error in data arguments,", From 0fd3e7ae10bd751a9d3a5811870ad96df0ad34b8 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 19:04:53 +0200 Subject: [PATCH 308/417] corrected chart --- skfda/exploratory/visualization/_parametric_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 394e636e6..af1e9738f 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -44,8 +44,8 @@ def __init__( self, fdata1: FData, fdata2: Optional[FData] = None, - *, chart: Union[Figure, Axes, None] = None, + *, fig: Optional[Figure] = None, axes: Optional[Axes] = None, ) -> None: From b8c98ee927fe8bbbecc7a73858fd14155a16ef82 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 19:15:18 +0200 Subject: [PATCH 309/417] Fpca plot corrected --- examples/plot_fpca.py | 12 +++++++----- skfda/exploratory/visualization/__init__.py | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/plot_fpca.py b/examples/plot_fpca.py index 460a1db7c..b5fbe99fe 100644 --- a/examples/plot_fpca.py +++ b/examples/plot_fpca.py @@ -10,7 +10,7 @@ import skfda from skfda.datasets import fetch_growth -from skfda.exploratory.visualization import plot_fpca_perturbation_graphs +from skfda.exploratory.visualization import FPCAPlot from skfda.preprocessing.dim_reduction.projection import FPCA from skfda.representation.basis import BSpline, Fourier, Monomial @@ -75,10 +75,12 @@ # faster at an early age and boys tend to start puberty later, therefore, their # growth is more significant later. Girls also stop growing early -plot_fpca_perturbation_graphs(basis_fd.mean(), - fpca.components_, - 30, - fig=plt.figure(figsize=(6, 2 * 4))) +FPCAPlot( + basis_fd.mean(), + fpca.components_, + 30, + fig=plt.figure(figsize=(6, 2 * 4)), +).plot() ############################################################################## # We can also specify another basis for the principal components as argument diff --git a/skfda/exploratory/visualization/__init__.py b/skfda/exploratory/visualization/__init__.py index 3cf3414d5..87936849a 100644 --- a/skfda/exploratory/visualization/__init__.py +++ b/skfda/exploratory/visualization/__init__.py @@ -7,4 +7,4 @@ from ._magnitude_shape_plot import MagnitudeShapePlot from ._outliergram import Outliergram from ._parametric_plot import ParametricPlot -from .fpca import plot_fpca_perturbation_graphs +from .fpca import FPCAPlot From 7f3cfff13eb62919f835ca225db0f495e9ef1fd4 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 19:34:47 +0200 Subject: [PATCH 310/417] solved issues --- .../exploratory/visualization/representation.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 0ba4781cd..1b62ca325 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -9,7 +9,6 @@ from typing import ( Any, - List, Mapping, Optional, Sequence, @@ -61,8 +60,8 @@ def _get_color_info( legend: bool = False, kwargs: Any = None, ) -> Tuple[ - Union[ColorLike, None], - Optional[List[matplotlib.patches.Patch]], + Optional[ColorLike], + Optional[Sequence[matplotlib.patches.Patch]], ]: patches = None @@ -167,7 +166,7 @@ class GraphPlot(BasePlot): def __init__( self, fdata: FData, - gradient_color_list: Union[Sequence[float], None] = None, + gradient_color_list: Optional[Sequence[float]] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, chart: Union[Figure, Axes, None] = None, @@ -308,7 +307,7 @@ def plot( self.sample_colors = sample_colors - color_dict: Mapping[str, Union[ColorLike, None]] = {} + color_dict: Mapping[str, Optional[ColorLike]] = {} if self.fdata.dim_domain == 1: @@ -372,7 +371,6 @@ def plot( ind += 1 _set_labels(self.fdata, self.fig, self.axes, patches) - self.fig.suptitle("GraphPlot") return self.fig @@ -488,8 +486,7 @@ def __init__( # This can only be done for FDataGrid self.grid_points = self.fdata.grid_points self.evaluated_points = self.fdata.data_matrix - - if self.evaluated_points is None: + else: self.evaluated_points = self.fdata( self.grid_points, grid=True, ) @@ -533,7 +530,7 @@ def plot( kwargs, ) - color_dict: Mapping[str, Union[ColorLike, None]] = {} + color_dict: Mapping[str, Optional[ColorLike]] = {} if self.fdata.dim_domain == 1: @@ -623,7 +620,7 @@ def _set_figure_and_axes( def set_color_dict( sample_colors: Any, ind: int, - color_dict: Mapping[str, Union[ColorLike, None]], + color_dict: Mapping[str, Optional[ColorLike]], ) -> None: """ Auxiliary method used to update color_dict. From 150cffd36f4e48f5e0fa8897052336c0af692c72 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 19:36:33 +0200 Subject: [PATCH 311/417] corrected condition --- skfda/exploratory/visualization/representation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 1b62ca325..be051e5d0 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -216,7 +216,7 @@ def __init__( ] ) else: - self.gradient_list = [] + self.gradient_list = None self.n_points = n_points self.domain_range = domain_range @@ -287,7 +287,7 @@ def plot( else: self.domain_range = _to_domain_range(self.domain_range) - if len(self.gradient_list) == 0: + if self.gradient_list is None: sample_colors, patches = _get_color_info( self.fdata, self.group, From c5a0b359215aa3e24637f9b5eb88a444933b7f86 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 4 May 2021 19:41:05 +0200 Subject: [PATCH 312/417] solved depth argument --- examples/plot_boxplot.py | 4 ++-- skfda/exploratory/visualization/_boxplot.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/plot_boxplot.py b/examples/plot_boxplot.py index 6a0ecac06..c94d3b17a 100644 --- a/examples/plot_boxplot.py +++ b/examples/plot_boxplot.py @@ -92,7 +92,7 @@ # previous one. fdBoxplot = Boxplot( - fd_temperatures, depth_method=ModifiedBandDepth, factor=0.4) + fd_temperatures, depth_method=ModifiedBandDepth(), factor=0.4) fdBoxplot.show_full_outliers = True fdBoxplot.plot() @@ -106,7 +106,7 @@ # :func:`~skfda.exploratory.depth.IntegratedDepth` is used and the 25% and # 75% central regions are specified. -fdBoxplot = Boxplot(fd_temperatures, depth_method=IntegratedDepth, +fdBoxplot = Boxplot(fd_temperatures, depth_method=IntegratedDepth(), prob=[0.75, 0.5, 0.25]) fdBoxplot.plot() diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 7985a051f..5dc1961ae 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -248,7 +248,7 @@ class Boxplot(FDataBoxplot, BasePlot): def __init__( self, fdatagrid: FDataGrid, - depth_method: Depth = ModifiedBandDepth, + depth_method: Optional[Depth] = None, prob: Sequence[float] = [0.5], factor: float = 1.5, chart: Union[Figure, Axes, None] = None, @@ -262,7 +262,7 @@ def __init__( Args: fdatagrid (FDataGrid): Object containing the data. - depth_method: Method used to order the data. + depth_method: Method used to order the data. Defaults to :func:`modified band depth `. prob (list of float, optional): List with float numbers (in the @@ -302,8 +302,9 @@ def __init__( self._envelopes = [None] * len(prob) - depth_func = depth_method() - depth = depth_func(fdatagrid) + if depth_method is None: + depth_method = ModifiedBandDepth() + depth = depth_method(fdatagrid) indices_descending_depth = (-depth).argsort(axis=0) # The median is the deepest curve From bb88e856baaaf007438447b7ea16f35397c8a7f6 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 5 May 2021 17:11:34 +0200 Subject: [PATCH 313/417] solved else --- skfda/exploratory/visualization/_parametric_plot.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index af1e9738f..2ac060820 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -104,8 +104,6 @@ def plot( if self.fd_final.dataset_name is not None: fig.suptitle(self.fd_final.dataset_name) - else: - fig.suptitle("ParametricPlot") if self.fd_final.coordinate_names[0] is None: ax.set_xlabel("Function 1") From 33239efb5f3b1a6124cee0f89e6472e4d4028870 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 5 May 2021 17:13:57 +0200 Subject: [PATCH 314/417] corrected depth method --- skfda/exploratory/visualization/_boxplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 5dc1961ae..647109e99 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -248,7 +248,7 @@ class Boxplot(FDataBoxplot, BasePlot): def __init__( self, fdatagrid: FDataGrid, - depth_method: Optional[Depth] = None, + depth_method: Optional[Depth[FDataGrid]] = None, prob: Sequence[float] = [0.5], factor: float = 1.5, chart: Union[Figure, Axes, None] = None, From 4bed45a1494e2289fdb22ffd56eb21e5ac9c138c Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 5 May 2021 17:37:28 +0200 Subject: [PATCH 315/417] paramplot --- .../visualization/_parametric_plot.py | 39 +++++++++++++++---- skfda/exploratory/visualization/_utils.py | 15 ++++++- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 2ac060820..3ab9954be 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -6,7 +6,7 @@ of them with domain 1 and codomain 1. """ -from typing import Optional, Sequence, Union +from typing import Optional, Sequence, TypeVar, Union import numpy as np from matplotlib.artist import Artist @@ -15,7 +15,16 @@ from ...representation import FData from ._baseplot import BasePlot -from ._utils import _get_figure_and_axes, _set_figure_layout +from ._utils import ( + ColorLike, + _get_figure_and_axes, + _set_figure_layout, + _set_labels, +) +from .representation import Indexable, _get_color_info + +K = TypeVar('K', contravariant=True) +V = TypeVar('V', covariant=True) class ParametricPlot(BasePlot): @@ -48,6 +57,10 @@ def __init__( *, fig: Optional[Figure] = None, axes: Optional[Axes] = None, + group: Optional[Sequence[K]] = None, + group_colors: Optional[Indexable[K, ColorLike]] = None, + group_names: Optional[Indexable[K, str]] = None, + legend: bool = False, ) -> None: BasePlot.__init__(self) self.fdata1 = fdata1 @@ -60,25 +73,35 @@ def __init__( else: self.fd_final = self.fdata1 + self.group = group + self.group_names = group_names + self.group_colors = group_colors + self.legend = legend + self._set_figure_and_axes(chart, fig, axes) def plot( self, - **kwargs, ) -> Figure: """ Parametric Plot graph. Plot the functions as coordinates. If two functions are passed it will concatenate both as coordinates of a vector-valued FData. - Args: - kwargs: optional arguments. Returns: fig: figure object in which the ParametricPlot graph will be plotted. """ self.artists = np.zeros(self.n_samples(), dtype=Artist) + sample_colors, patches = _get_color_info( + self.fd_final, + self.group, + self.group_names, + self.group_colors, + self.legend, + ) + if ( self.fd_final.dim_domain == 1 and self.fd_final.dim_codomain == 2 @@ -94,7 +117,7 @@ def plot( self.artists[i] = ax.plot( self.fd_final.data_matrix[i][:, 0].tolist(), self.fd_final.data_matrix[i][:, 1].tolist(), - **kwargs, + **color_dict, ) else: raise ValueError( @@ -103,7 +126,7 @@ def plot( ) if self.fd_final.dataset_name is not None: - fig.suptitle(self.fd_final.dataset_name) + self.fig.suptitle(self.fd_final.dataset_name) if self.fd_final.coordinate_names[0] is None: ax.set_xlabel("Function 1") @@ -115,6 +138,8 @@ def plot( else: ax.set_ylabel(self.fd_final.coordinate_names[1]) + _set_labels(self.fdata, self.fig, self.axes, patches) + return fig def n_samples(self) -> int: diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 72e20db9c..b3bb9e6c8 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,7 +1,7 @@ import io import math import re -from typing import Optional, Sequence, Tuple, Union +from typing import Optional, Protocol, Sequence, Tuple, TypeVar, Union import matplotlib.backends.backend_svg import matplotlib.pyplot as plt @@ -27,6 +27,19 @@ Sequence[float], ] +K = TypeVar('K', contravariant=True) +V = TypeVar('V', covariant=True) + + +class Indexable(Protocol[K, V]): + """Class Indexable used to type _get_color_info.""" + + def __getitem__(self, __key: K) -> V: + pass + + def __len__(self) -> int: + pass + def _create_figure(): """Create figure using the default backend.""" fig = plt.figure() From 39e89f69c9edbfa4c9fe239dfca8343d1879c404 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 6 May 2021 01:07:00 +0200 Subject: [PATCH 316/417] picking solved, hover not rdy --- .../visualization/_multiple_display.py | 62 ++--- .../visualization/representation.py | 229 ++++++++++-------- 2 files changed, 142 insertions(+), 149 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 5856aa8f0..8331ba12e 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -47,7 +47,7 @@ class MultipleDisplay: clicked: boolean indicating whether a point has being clicked. index_clicked: index of the function selected with the interactive module or widgets. - tags: list of tags for each ax, that contain the information printed + tags: list of tags for each ax, that contain the information printedº while hovering. previous_hovered: artist object containing of the last point hovered. is_updating: boolean value that determines whether a widget @@ -323,37 +323,26 @@ def pick(self, event: Event) -> None: def update_index_display_picked(self) -> None: """Update the index corresponding to the display picked.""" for d in self.displays: - if isinstance(d.artists[0], Artist): - if d.axes[0] == self.point_clicked.axes: - self.index_clicked = np.where( - d.artists == self.point_clicked, - )[0][0] - return - else: - for i in range(len(d.axes)): - self.x = 0 - if d.axes[i] == self.point_clicked.axes: + for i in range(len(d.axes)): + if d.axes[i] == self.point_clicked.axes: + if len(d.axes) == 1: + self.x = 1 self.index_clicked = np.where( - d.artists[i] == self.point_clicked, + d.artists == self.point_clicked, )[0][0] - return + else: + self.index_clicked = np.where( + d.artists[:, i] == self.point_clicked, + )[0][0] + return def reduce_points_intensity(self) -> None: """Reduce the transparency of all the points but the selected one.""" for i in range(self.length_data): if i != self.index_clicked: for d in self.displays: - if len(d.artists) != 0: - if isinstance(d.artists[0], list): - d.artists[i][0].set_alpha(0.1) - elif isinstance(d.artists[0], Artist): - d.artists[i].set_alpha(0.1) - else: - for a in d.artists: - if isinstance(a[0], list): - a[i][0].set_alpha(0.1) - elif isinstance(a[0], Artist): - a[i].set_alpha(0.1) + for artist in np.ravel(d.artists[i]): + artist.set_alpha(0.1) self.is_updating = True for j in range(len(self.sliders)): @@ -365,17 +354,8 @@ def restore_points_intensity(self) -> None: """Restore the original transparency of all the points.""" for i in range(self.length_data): for d in self.displays: - if len(d.artists) != 0: - if isinstance(d.artists[0], list): - d.artists[i][0].set_alpha(1) - elif isinstance(d.artists[0], Artist): - d.artists[i].set_alpha(1) - else: - for a in d.artists: - if isinstance(a[0], list): - a[i][0].set_alpha(1) - elif isinstance(a[0], Artist): - a[i].set_alpha(1) + for artist in np.ravel(d.artists[i]): + artist.set_alpha(1) self.point_clicked = None self.index_clicked = -1 @@ -435,16 +415,8 @@ def change_display_intensity(self, index: int, intensity: int) -> None: """ for d in self.displays: if len(d.artists) != 0: - if isinstance(d.artists[0], list): - d.artists[index][0].set_alpha(intensity) - elif isinstance(d.artists[0], Artist): - d.artists[index].set_alpha(intensity) - else: - for a in d.artists: - if isinstance(a[0], list): - a[index][0].set_alpha(intensity) - elif isinstance(a[0], Artist): - a[index].set_alpha(intensity) + for artist in np.ravel(d.artists[index]): + artist.set_alpha(intensity) def create_sliders( self, diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 0d4be6a76..50689d531 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -9,7 +9,6 @@ from typing import ( Any, - List, Mapping, Optional, Sequence, @@ -61,8 +60,8 @@ def _get_color_info( legend: bool = False, kwargs: Any = None, ) -> Tuple[ - Union[ColorLike, None], - Optional[List[matplotlib.patches.Patch]], + Optional[ColorLike], + Optional[Sequence[matplotlib.patches.Patch]], ]: patches = None @@ -158,6 +157,36 @@ class GraphPlot(BasePlot): n_cols(int, optional): designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. + n_points (int or tuple, optional): Number of points to evaluate in + the plot. In case of surfaces a tuple of length 2 can be pased + with the number of points to plot in each axis, otherwise the + same number of points will be used in the two axes. By default + in unidimensional plots will be used 501 points; in surfaces + will be used 30 points per axis, wich makes a grid with 900 + points. + domain_range (tuple or list of tuples, optional): Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group (list of int): contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors (list of colors): colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names (list of str): name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + colormap_name: name of the colormap to be used. By default we will + use autumn. + legend (bool): if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. Attributes: gradient_list: normalization of the values from gradient color_list that will be used to determine the intensity of the color @@ -167,7 +196,7 @@ class GraphPlot(BasePlot): def __init__( self, fdata: FData, - gradient_color_list: Union[Sequence[float], None] = None, + gradient_color_list: Optional[Sequence[float]] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, chart: Union[Figure, Axes, None] = None, @@ -176,6 +205,13 @@ def __init__( axes: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, + n_points: Union[int, Tuple[int, int], None] = None, + domain_range: Optional[DomainRangeLike] = None, + group: Optional[Sequence[K]] = None, + group_colors: Optional[Indexable[K, ColorLike]] = None, + group_names: Optional[Indexable[K, str]] = None, + colormap_name: str = 'autumn', + legend: bool = False, ) -> None: BasePlot.__init__(self) self.fdata = fdata @@ -210,19 +246,20 @@ def __init__( ] ) else: - self.gradient_list = [] + self.gradient_list = None + + self.n_points = n_points + self.domain_range = domain_range + self.group = group + self.group_colors = group_colors + self.group_names = group_names + self.legend = legend + self.colormap_name = colormap_name + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - *, - n_points: Union[int, Tuple[int, int], None] = None, - domain_range: Optional[DomainRangeLike] = None, - group: Optional[Sequence[K]] = None, - group_colors: Optional[Indexable[K, ColorLike]] = None, - group_names: Optional[Indexable[K, str]] = None, - colormap_name: str = 'autumn', - legend: bool = False, **kwargs: Any, ) -> Figure: """ @@ -236,36 +273,6 @@ def plot( function with a gradient of colors depending on the initial gradient_color_list (normalized in gradient_list). Args: - n_points (int or tuple, optional): Number of points to evaluate in - the plot. In case of surfaces a tuple of length 2 can be pased - with the number of points to plot in each axis, otherwise the - same number of points will be used in the two axes. By default - in unidimensional plots will be used 501 points; in surfaces - will be used 30 points per axis, wich makes a grid with 900 - points. - domain_range (tuple or list of tuples, optional): Range where the - function will be plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - group (list of int): contains integers from [0 to number of - labels) indicating to which group each sample belongs to. Then, - the samples with the same label are plotted in the same color. - If None, the default value, each sample is plotted in the color - assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors (list of colors): colors in which groups are - represented, there must be one for each group. If None, each - group is shown with distict colors in the "Greys" colormap. - group_names (list of str): name of each of the groups which appear - in a legend, there must be one for each one. Defaults to None - and the legend is not shown. Implies `legend=True`. - colormap_name: name of the colormap to be used. By default we will - use autumn. - legend (bool): if `True`, show a legend with the groups. If - `group_names` is passed, it will be used for finding the names - to display in the legend. Otherwise, the values passed to - `group` will be used. kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the @@ -273,20 +280,28 @@ def plot( Returns: fig (figure object): figure object in which the graphs are plotted. """ - self.artists = np.zeros(len(self.axes), dtype=np.ndarray) + self.artists = np.zeros( + (self.n_samples(), self.fdata.dim_codomain), + dtype=Artist, + ) - if domain_range is None: + if self.domain_range is None: self.domain_range = self.fdata.domain_range else: - self.domain_range = _to_domain_range(domain_range) + self.domain_range = _to_domain_range(self.domain_range) - if len(self.gradient_list) == 0: + if self.gradient_list is None: sample_colors, patches = _get_color_info( - self.fdata, group, group_names, group_colors, legend, kwargs, + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, ) else: patches = None - colormap = matplotlib.cm.get_cmap(colormap_name) + colormap = matplotlib.cm.get_cmap(self.colormap_name) colormap = colormap.reversed() sample_colors = [None] * self.fdata.n_samples @@ -295,11 +310,11 @@ def plot( self.sample_colors = sample_colors - color_dict: Mapping[str, Union[ColorLike, None]] = {} + color_dict: Mapping[str, Optional[ColorLike]] = {} if self.fdata.dim_domain == 1: - if n_points is None: + if self.n_points is None: self.n_points = constants.N_POINTS_UNIDIMENSIONAL_PLOT_MESH # Evaluates the object in a linspace @@ -307,29 +322,29 @@ def plot( mat = self.fdata(eval_points) for i in range(self.fdata.dim_codomain): - self.artists[i] = np.zeros(self.n_samples(), dtype=Artist) for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - self.artists[i][j] = self.axes[i].plot( + self.artists[j, i] = self.axes[i].plot( eval_points, mat[j, ..., i].T, **color_dict, **kwargs, - ) + )[0] else: # Selects the number of points - if n_points is None: + if self.n_points is None: n_points_tuple = 2 * (constants.N_POINTS_SURFACE_PLOT_AX,) - elif isinstance(n_points, int): - n_points_tuple = (n_points, n_points) - elif len(n_points) != 2: + elif isinstance(self.n_points, int): + n_points_tuple = (self.n_points, self.n_points) + elif len(self.n_points) != 2: raise ValueError( "n_points should be a number or a tuple of " - "length 2, and has length {0}.".format(len(n_points)), + "length 2, and has " + "length {0}.".format(len(self.n_points)), ) # Axes where will be evaluated @@ -341,24 +356,20 @@ def plot( X, Y = np.meshgrid(x, y, indexing='ij') - ind = 0 for k in range(self.fdata.dim_codomain): - self.artists[k] = np.zeros(self.n_samples(), dtype=Artist) for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - self.artists[k][h] = self.axes[k].plot_surface( + self.artists[h, k] = self.axes[k].plot_surface( X, Y, Z[h, ..., k], **color_dict, **kwargs, - ) - ind += 1 + )[0] _set_labels(self.fdata, self.fig, self.axes, patches) - self.fig.suptitle("GraphPlot") return self.fig @@ -426,6 +437,27 @@ class ScatterPlot(BasePlot): n_cols(int, optional): designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. + domain_range: Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group: contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors: colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names: name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + legend: if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. """ def __init__( @@ -438,6 +470,11 @@ def __init__( n_rows: Optional[int] = None, n_cols: Optional[int] = None, grid_points: Optional[GridPointsLike] = None, + domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, + group: Optional[Sequence[K]] = None, + group_colors: Optional[Indexable[K, ColorLike]] = None, + group_names: Optional[Indexable[K, str]] = None, + legend: bool = False, ) -> None: BasePlot.__init__(self) self.fdata = fdata @@ -448,49 +485,27 @@ def __init__( # This can only be done for FDataGrid self.grid_points = self.fdata.grid_points self.evaluated_points = self.fdata.data_matrix - - if self.evaluated_points is None: + else: self.evaluated_points = self.fdata( self.grid_points, grid=True, ) + self.domain_range = domain_range + self.group = group + self.group_colors = group_colors + self.group_names = group_names + self.legend = legend + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - *, - domain_range: Union[Tuple[int, int], DomainRangeLike, None] = None, - group: Optional[Sequence[K]] = None, - group_colors: Optional[Indexable[K, ColorLike]] = None, - group_names: Optional[Indexable[K, str]] = None, - legend: bool = False, **kwargs: Any, ) -> Figure: """ Scatter FDataGrid object. Args: - domain_range: Range where the - function will be plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - group: contains integers from [0 to number of - labels) indicating to which group each sample belongs to. Then, - the samples with the same label are plotted in the same color. - If None, the default value, each sample is plotted in the color - assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors: colors in which groups are - represented, there must be one for each group. If None, each - group is shown with distict colors in the "Greys" colormap. - group_names: name of each of the groups which appear - in a legend, there must be one for each one. Defaults to None - and the legend is not shown. Implies `legend=True`. - legend: if `True`, show a legend with the groups. If - `group_names` is passed, it will be used for finding the names - to display in the legend. Otherwise, the values passed to - `group` will be used. kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the @@ -498,28 +513,35 @@ def plot( Returns: fig: figure object in which the graphs are plotted. """ - self.artists = np.zeros(len(self.axes), dtype=np.ndarray) + self.artists = np.zeros( + (self.n_samples(), self.fdata.dim_codomain), + dtype=Artist, + ) - if domain_range is None: + if self.domain_range is None: self.domain_range = self.fdata.domain_range else: - self.domain_range = _to_domain_range(domain_range) + self.domain_range = _to_domain_range(self.domain_range) sample_colors, patches = _get_color_info( - self.fdata, group, group_names, group_colors, legend, kwargs, + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, ) - color_dict: Mapping[str, Union[ColorLike, None]] = {} + color_dict: Mapping[str, Optional[ColorLike]] = {} if self.fdata.dim_domain == 1: for i in range(self.fdata.dim_codomain): - self.artists[i] = np.zeros(self.n_samples(), dtype=Artist) for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - self.artists[i][j] = self.axes[i].scatter( + self.artists[j, i] = self.axes[i].scatter( self.grid_points[0], self.evaluated_points[j, ..., i].T, **color_dict, @@ -535,12 +557,11 @@ def plot( X, Y = np.meshgrid(X, Y) for k in range(self.fdata.dim_codomain): - self.artists[k] = np.zeros(self.n_samples(), dtype=Artist) for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - self.artists[k][h] = self.axes[k].scatter( + self.artists[h, k] = self.axes[k].scatter( X, Y, self.evaluated_points[h, ..., k].T, @@ -599,7 +620,7 @@ def _set_figure_and_axes( def set_color_dict( sample_colors: Any, ind: int, - color_dict: Mapping[str, Union[ColorLike, None]], + color_dict: Mapping[str, Optional[ColorLike]], ) -> None: """ Auxiliary method used to update color_dict. From 3575fb325315af81d9ded93eedfc458755eafbb9 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 6 May 2021 19:53:30 +0200 Subject: [PATCH 317/417] everything works with the new multiple display --- skfda/exploratory/visualization/_ddplot.py | 7 +++++-- .../visualization/_magnitude_shape_plot.py | 7 +++++-- .../visualization/_multiple_display.py | 20 ++++++------------- .../exploratory/visualization/_outliergram.py | 7 +++++-- 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 740a9d74b..749111889 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -87,7 +87,10 @@ def plot( fig (figure object): figure object in which the depths will be scattered. """ - self.artists = np.zeros(self.n_samples(), dtype=Artist) + self.artists = np.zeros( + (self.n_samples(), 1), + dtype=Artist, + ) margin = 0.025 width_aux_line = 0.35 color_aux_line = "gray" @@ -95,7 +98,7 @@ def plot( ax = self.axes[0] for i in range(len(self.depth_dist1)): - self.artists[i] = ax.scatter( + self.artists[i, 0] = ax.scatter( self.depth_dist1[i], self.depth_dist2[i], picker=True, diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 07a9e0d62..e00324940 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -299,7 +299,10 @@ def plot(self): """ - self.artists = np.zeros(self.n_samples(), dtype=Artist) + self.artists = np.zeros( + (self.n_samples(), 1), + dtype=Artist, + ) colors = np.zeros((self.fdatagrid.n_samples, 4)) colors[np.where(self.outliers == 1)] = self.colormap(self.outliercol) colors[np.where(self.outliers == 0)] = self.colormap(self.color) @@ -307,7 +310,7 @@ def plot(self): colors_rgba = [tuple(i) for i in colors] for i in range(len(self.points[:, 0].ravel())): - self.artists[i] = self.axes[0].scatter( + self.artists[i, 0] = self.axes[0].scatter( self.points[:, 0].ravel()[i], self.points[:, 1].ravel()[i], color=colors_rgba[i], diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 8331ba12e..b0479905d 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -6,6 +6,7 @@ from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.backend_bases import Event +from matplotlib.collections import PathCollection from matplotlib.figure import Figure from matplotlib.text import Annotation from matplotlib.widgets import Slider, Widget @@ -220,21 +221,13 @@ def hover(self, event: Event): for i in range(len(d.axes)): if event.inaxes == d.axes[i]: index_axis = index - if len(d.artists) == 0: - return - - elif ( - isinstance(d.artists[0], List) - or isinstance(d.artists[0][0], List) - ): - return - - elif isinstance(d.artists[0], Artist): - artists_array = d.artists - elif isinstance(d.artists[0], np.ndarray): - artists_array = d.artists[i] + + artists_array = d.artists[:, i] for j in range(len(artists_array)): artist = artists_array[j] + if not isinstance(artist, PathCollection): + return + is_graph, ind = artist.contains(event) if is_graph and self.previous_hovered == artist: return @@ -326,7 +319,6 @@ def update_index_display_picked(self) -> None: for i in range(len(d.axes)): if d.axes[i] == self.point_clicked.axes: if len(d.axes) == 1: - self.x = 1 self.index_clicked = np.where( d.artists == self.point_clicked, )[0][0] diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index f1fabbb0d..9eae6a332 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -104,11 +104,14 @@ def plot( fig: figure object in which the depths will be scattered. """ - self.artists = np.zeros(self.n_samples(), dtype=Artist) + self.artists = np.zeros( + (self.n_samples(), 1), + dtype=Artist, + ) self.axScatter = self.axes[0] for i in range(self.mei.size): - self.artists[i] = self.axScatter.scatter( + self.artists[i, 0] = self.axScatter.scatter( self.mei[i], self.mbd[i], picker=2, From f8e229bdc12b30be62e62af7abe4b5b5403ad22d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 6 May 2021 19:56:07 +0200 Subject: [PATCH 318/417] corrected arguments and how the artists are created in a matrix, works with interactivity --- .../visualization/representation.py | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index be051e5d0..50689d531 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -157,6 +157,36 @@ class GraphPlot(BasePlot): n_cols(int, optional): designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. + n_points (int or tuple, optional): Number of points to evaluate in + the plot. In case of surfaces a tuple of length 2 can be pased + with the number of points to plot in each axis, otherwise the + same number of points will be used in the two axes. By default + in unidimensional plots will be used 501 points; in surfaces + will be used 30 points per axis, wich makes a grid with 900 + points. + domain_range (tuple or list of tuples, optional): Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group (list of int): contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors (list of colors): colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names (list of str): name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + colormap_name: name of the colormap to be used. By default we will + use autumn. + legend (bool): if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. Attributes: gradient_list: normalization of the values from gradient color_list that will be used to determine the intensity of the color @@ -243,36 +273,6 @@ def plot( function with a gradient of colors depending on the initial gradient_color_list (normalized in gradient_list). Args: - n_points (int or tuple, optional): Number of points to evaluate in - the plot. In case of surfaces a tuple of length 2 can be pased - with the number of points to plot in each axis, otherwise the - same number of points will be used in the two axes. By default - in unidimensional plots will be used 501 points; in surfaces - will be used 30 points per axis, wich makes a grid with 900 - points. - domain_range (tuple or list of tuples, optional): Range where the - function will be plotted. In objects with unidimensional domain - the domain range should be a tuple with the bounds of the - interval; in the case of surfaces a list with 2 tuples with - the ranges for each dimension. Default uses the domain range - of the functional object. - group (list of int): contains integers from [0 to number of - labels) indicating to which group each sample belongs to. Then, - the samples with the same label are plotted in the same color. - If None, the default value, each sample is plotted in the color - assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors (list of colors): colors in which groups are - represented, there must be one for each group. If None, each - group is shown with distict colors in the "Greys" colormap. - group_names (list of str): name of each of the groups which appear - in a legend, there must be one for each one. Defaults to None - and the legend is not shown. Implies `legend=True`. - colormap_name: name of the colormap to be used. By default we will - use autumn. - legend (bool): if `True`, show a legend with the groups. If - `group_names` is passed, it will be used for finding the names - to display in the legend. Otherwise, the values passed to - `group` will be used. kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the @@ -280,7 +280,10 @@ def plot( Returns: fig (figure object): figure object in which the graphs are plotted. """ - self.artists = np.zeros(len(self.axes), dtype=np.ndarray) + self.artists = np.zeros( + (self.n_samples(), self.fdata.dim_codomain), + dtype=Artist, + ) if self.domain_range is None: self.domain_range = self.fdata.domain_range @@ -319,17 +322,16 @@ def plot( mat = self.fdata(eval_points) for i in range(self.fdata.dim_codomain): - self.artists[i] = np.zeros(self.n_samples(), dtype=Artist) for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - self.artists[i][j] = self.axes[i].plot( + self.artists[j, i] = self.axes[i].plot( eval_points, mat[j, ..., i].T, **color_dict, **kwargs, - ) + )[0] else: @@ -354,21 +356,18 @@ def plot( X, Y = np.meshgrid(x, y, indexing='ij') - ind = 0 for k in range(self.fdata.dim_codomain): - self.artists[k] = np.zeros(self.n_samples(), dtype=Artist) for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - self.artists[k][h] = self.axes[k].plot_surface( + self.artists[h, k] = self.axes[k].plot_surface( X, Y, Z[h, ..., k], **color_dict, **kwargs, - ) - ind += 1 + )[0] _set_labels(self.fdata, self.fig, self.axes, patches) @@ -514,7 +513,10 @@ def plot( Returns: fig: figure object in which the graphs are plotted. """ - self.artists = np.zeros(len(self.axes), dtype=np.ndarray) + self.artists = np.zeros( + (self.n_samples(), self.fdata.dim_codomain), + dtype=Artist, + ) if self.domain_range is None: self.domain_range = self.fdata.domain_range @@ -535,12 +537,11 @@ def plot( if self.fdata.dim_domain == 1: for i in range(self.fdata.dim_codomain): - self.artists[i] = np.zeros(self.n_samples(), dtype=Artist) for j in range(self.fdata.n_samples): set_color_dict(sample_colors, j, color_dict) - self.artists[i][j] = self.axes[i].scatter( + self.artists[j, i] = self.axes[i].scatter( self.grid_points[0], self.evaluated_points[j, ..., i].T, **color_dict, @@ -556,12 +557,11 @@ def plot( X, Y = np.meshgrid(X, Y) for k in range(self.fdata.dim_codomain): - self.artists[k] = np.zeros(self.n_samples(), dtype=Artist) for h in range(self.fdata.n_samples): set_color_dict(sample_colors, h, color_dict) - self.artists[k][h] = self.axes[k].scatter( + self.artists[h, k] = self.axes[k].scatter( X, Y, self.evaluated_points[h, ..., k].T, From 240b3be2da8f80d5542992f84241c3cbead75fcd Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 6 May 2021 20:05:26 +0200 Subject: [PATCH 319/417] parametric plot including groups... --- .../visualization/_parametric_plot.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 3ab9954be..98a2a67fe 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -6,7 +6,7 @@ of them with domain 1 and codomain 1. """ -from typing import Optional, Sequence, TypeVar, Union +from typing import Any, Mapping, Optional, Sequence, TypeVar, Union import numpy as np from matplotlib.artist import Artist @@ -82,6 +82,7 @@ def __init__( def plot( self, + **kwargs: Any, ) -> Figure: """ Parametric Plot graph. @@ -92,7 +93,10 @@ def plot( fig: figure object in which the ParametricPlot graph will be plotted. """ - self.artists = np.zeros(self.n_samples(), dtype=Artist) + self.artists = np.zeros( + (self.n_samples(), 1), + dtype=Artist, + ) sample_colors, patches = _get_color_info( self.fd_final, @@ -100,8 +104,11 @@ def plot( self.group_names, self.group_colors, self.legend, + kwargs, ) + color_dict: Mapping[str, Union[ColorLike, None]] = {} + if ( self.fd_final.dim_domain == 1 and self.fd_final.dim_codomain == 2 @@ -114,10 +121,15 @@ def plot( ax = self.axes[0] for i in range(self.fd_final.n_samples): - self.artists[i] = ax.plot( + + if sample_colors is not None: + color_dict["color"] = sample_colors[i] + + self.artists[i, 0] = ax.plot( self.fd_final.data_matrix[i][:, 0].tolist(), self.fd_final.data_matrix[i][:, 1].tolist(), **color_dict, + **kwargs, ) else: raise ValueError( @@ -138,8 +150,6 @@ def plot( else: ax.set_ylabel(self.fd_final.coordinate_names[1]) - _set_labels(self.fdata, self.fig, self.axes, patches) - return fig def n_samples(self) -> int: From 616cd58589c132307d9a706087654ad358877620 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 6 May 2021 20:14:00 +0200 Subject: [PATCH 320/417] solved importing protocols --- skfda/exploratory/visualization/_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index b3bb9e6c8..40f7c3fe3 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,12 +1,13 @@ import io import math import re -from typing import Optional, Protocol, Sequence, Tuple, TypeVar, Union +from typing import Optional, Sequence, Tuple, TypeVar, Union import matplotlib.backends.backend_svg import matplotlib.pyplot as plt from matplotlib.axes import Axes from matplotlib.figure import Figure +from typing_extensions import Protocol from ...representation._functional_data import FData From 9a0f342211b683d16df1e1d12600895f64e639ad Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 7 May 2021 00:06:02 +0200 Subject: [PATCH 321/417] solved some mypy --- skfda/exploratory/visualization/_parametric_plot.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 98a2a67fe..f5fd409fa 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -15,12 +15,7 @@ from ...representation import FData from ._baseplot import BasePlot -from ._utils import ( - ColorLike, - _get_figure_and_axes, - _set_figure_layout, - _set_labels, -) +from ._utils import ColorLike, _get_figure_and_axes, _set_figure_layout from .representation import Indexable, _get_color_info K = TypeVar('K', contravariant=True) @@ -93,10 +88,7 @@ def plot( fig: figure object in which the ParametricPlot graph will be plotted. """ - self.artists = np.zeros( - (self.n_samples(), 1), - dtype=Artist, - ) + self.artists = np.zeros((self.n_samples(), 1), dtype=Artist) sample_colors, patches = _get_color_info( self.fd_final, From dd3466f771738a49d76d31e46701c1d00a3825d5 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 7 May 2021 00:11:34 +0200 Subject: [PATCH 322/417] solved some issues --- skfda/exploratory/visualization/_boxplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 647109e99..7e31353e0 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -408,7 +408,7 @@ def plot(self): """ - self.artists = np.zeros(self.n_samples(), dtype=Artist) + self.artists = np.zeros((self.n_samples(), 1), dtype=Artist) tones = np.linspace(0.1, 1.0, len(self._prob) + 1, endpoint=False)[1:] color = self.colormap(tones) From 84af26312b6d014dc1a7e6a7143c95f221bfc057 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 7 May 2021 00:15:36 +0200 Subject: [PATCH 323/417] fpca --- skfda/exploratory/visualization/fpca.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/visualization/fpca.py b/skfda/exploratory/visualization/fpca.py index 18166b590..27fec3585 100644 --- a/skfda/exploratory/visualization/fpca.py +++ b/skfda/exploratory/visualization/fpca.py @@ -1,4 +1,5 @@ -from typing import Optional, Union +from re import A +from typing import Any, Optional, Union import numpy as np from matplotlib.axes import Axes @@ -33,10 +34,13 @@ class FPCAPlot(BasePlot): def __init__( self, - mean, components, multiple, - chart=None, - fig=None, - axes=None, + mean: FData, + components: FData, + multiple: float, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, ): BasePlot.__init__(self) self.mean = mean @@ -45,7 +49,7 @@ def __init__( self._set_figure_and_axes(chart, fig, axes) - def plot(self, **kwargs): + def plot(self, **kwargs: Any): """ Plots the perturbation graphs for the principal components. The perturbations are defined as variations over the mean. Adding a multiple @@ -86,7 +90,7 @@ def _set_figure_and_axes( self.fig = fig self.axes = axes - def _get_component_perturbations(self, index=0): + def _get_component_perturbations(self, index: int = 0): """ Computes the perturbations over the mean function of a principal component at a certain index. From 0b1bafd8bc4608177a313ea3d629a58cdc85ce9b Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 7 May 2021 00:16:02 +0200 Subject: [PATCH 324/417] deleted unused import --- skfda/exploratory/visualization/fpca.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/skfda/exploratory/visualization/fpca.py b/skfda/exploratory/visualization/fpca.py index 27fec3585..9f58fbcc2 100644 --- a/skfda/exploratory/visualization/fpca.py +++ b/skfda/exploratory/visualization/fpca.py @@ -1,7 +1,5 @@ -from re import A from typing import Any, Optional, Union -import numpy as np from matplotlib.axes import Axes from matplotlib.figure import Figure From ff0eb1b2c7fc945b42d1dc3f7501d14f8320ed48 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 7 May 2021 00:26:05 +0200 Subject: [PATCH 325/417] solved tuple --- skfda/exploratory/visualization/_boxplot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 7e31353e0..664afa7a4 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -6,7 +6,7 @@ """ import math from abc import ABC, abstractmethod -from typing import Optional, Sequence, Union +from typing import Optional, Sequence, Tuple, Union import matplotlib import matplotlib.pyplot as plt @@ -249,7 +249,7 @@ def __init__( self, fdatagrid: FDataGrid, depth_method: Optional[Depth[FDataGrid]] = None, - prob: Sequence[float] = [0.5], + prob: Tuple[float] = (0.5,), factor: float = 1.5, chart: Union[Figure, Axes, None] = None, *, @@ -293,7 +293,7 @@ def __init__( raise ValueError( "Function only supports FDataGrid with domain dimension 1.") - if sorted(prob, reverse=True) != prob: + if sorted(prob, reverse=True) != list(prob): raise ValueError( "Probabilities required to be in descending order.") From 3390894c7b0dbcb68189908795a835d7a64632ce Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 8 May 2021 14:51:53 +0200 Subject: [PATCH 326/417] Improve operators. Add typing and style. Get rid or _FDataCallable. Allow callables as weights for LinearDifferentialOperator, and simplify its logic. --- setup.py | 2 +- skfda/_utils/__init__.py | 1 - skfda/_utils/_utils.py | 34 +- skfda/misc/_math.py | 68 ++- skfda/misc/operators/__init__.py | 10 +- skfda/misc/operators/_identity.py | 24 +- skfda/misc/operators/_integral_transform.py | 45 +- .../_linear_differential_operator.py | 435 +++++++++--------- skfda/misc/operators/_operators.py | 61 ++- tests/test_linear_differential_operator.py | 91 ++-- 10 files changed, 427 insertions(+), 344 deletions(-) diff --git a/setup.py b/setup.py index 19493fdcc..7a8096766 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ 'findiff', 'matplotlib', 'mpldatacursor', - 'multimethod>=1.2', + 'multimethod>=1.5', 'numpy>=1.16', 'pandas', 'rdata', diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index c8227ab93..191324177 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -8,7 +8,6 @@ _classifier_get_classes, _classifier_get_depth_methods, _evaluate_grid, - _FDataCallable, _int_to_real, _pairwise_symmetric, _reshape_eval_points, diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index fa6fd0fb9..01403252b 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -20,7 +20,6 @@ ) import numpy as np -import scipy.integrate from numpy import ndarray from pandas.api.indexers import check_array_indexer from sklearn.base import clone @@ -28,6 +27,8 @@ from sklearn.utils.multiclass import check_classification_targets from typing_extensions import Literal, Protocol +import scipy.integrate + from ..representation._typing import ( ArrayLike, DomainRange, @@ -46,36 +47,6 @@ T = TypeVar("T", bound=FData) -class _FDataCallable(): - - def __init__(self, function, *, domain_range, n_samples=1): - - self.function = function - self.domain_range = domain_range - self.n_samples = n_samples - - def __call__(self, *args, **kwargs): - - return self.function(*args, **kwargs) - - def __len__(self): - - return self.n_samples - - def __getitem__(self, key): - - def new_function(*args, **kwargs): - return self.function(*args, **kwargs)[key] - - tmp = np.empty(self.n_samples) - new_nsamples = len(tmp[key]) - - return _FDataCallable( - new_function, - domain_range=self.domain_range, - n_samples=new_nsamples, - ) - def check_is_univariate(fd: FData) -> None: """Check if an FData is univariate and raises an error. @@ -104,6 +75,7 @@ def check_is_univariate(fd: FData) -> None: f"and dim_codomain=1 {codomain_str}", ) + def _to_grid( X: FData, y: FData, diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 594c607c3..8575aa704 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -6,17 +6,22 @@ """ import warnings from builtins import isinstance -from typing import Any, Optional, TypeVar, Union, cast +from typing import Any, Callable, Optional, TypeVar, Union, cast import multimethod import numpy as np + import scipy.integrate from .._utils import _same_domain, nquad_vec from ..representation import FData, FDataBasis, FDataGrid +from ..representation._typing import DomainRange from ..representation.basis import Basis -Vector = TypeVar("Vector") +Vector = TypeVar( + "Vector", + bound=Union[np.ndarray, Callable[[np.ndarray], np.ndarray]] +) def sqrt(fdatagrid: FDataGrid) -> FDataGrid: @@ -206,10 +211,11 @@ def cumsum(fdatagrid: FDataGrid) -> FDataGrid: @multimethod.multidispatch def inner_product( - arg1: Any, - arg2: Any, + arg1: Vector, + arg2: Vector, *, - _matrix=False, + _matrix: bool = False, + _domain_range: Optional[DomainRange] = None, ) -> np.ndarray: r"""Return the usual (:math:`L_2`) inner product. @@ -309,8 +315,13 @@ def inner_product( array([ 0.5 , 0.25]) """ - if callable(arg1): - return _inner_product_integrate(arg1, arg2, _matrix=_matrix) + if callable(arg1) and callable(arg2): + return _inner_product_integrate( + arg1, + arg2, + _matrix=_matrix, + _domain_range=_domain_range, + ) return ( np.einsum('n...,m...->nm...', arg1, arg2).sum(axis=-1) @@ -323,7 +334,7 @@ def _inner_product_fdatagrid( arg1: FDataGrid, arg2: FDataGrid, *, - _matrix=False, + _matrix: bool = False, ) -> np.ndarray: if not np.array_equal( @@ -376,9 +387,9 @@ def _inner_product_fdatabasis( arg1: Union[FDataBasis, Basis], arg2: Union[FDataBasis, Basis], *, - _matrix=False, - inner_product_matrix=None, - force_numerical=False, + _matrix: bool = False, + inner_product_matrix: Optional[np.ndarray] = None, + force_numerical: bool = False, ) -> np.ndarray: if not _same_domain(arg1, arg2): @@ -437,17 +448,34 @@ def _inner_product_fdatabasis( def _inner_product_integrate( - arg1: FData, - arg2: FData, + arg1: Callable[[np.ndarray], np.ndarray], + arg2: Callable[[np.ndarray], np.ndarray], *, _matrix: bool = False, + _domain_range: Optional[DomainRange] = None, ) -> np.ndarray: - if not np.array_equal( - arg1.domain_range, - arg2.domain_range, - ): - raise ValueError("Domain range for both objects must be equal") + domain_range: DomainRange + + if isinstance(arg1, FData) and isinstance(arg2, FData): + if not np.array_equal( + arg1.domain_range, + arg2.domain_range, + ): + raise ValueError("Domain range for both objects must be equal") + + domain_range = arg1.domain_range + len_arg1 = len(arg1) + len_arg2 = len(arg2) + else: + # If the arguments are callables, we need to pass the domain range + # explicitly. This is used internally for computing the gramian + # matrix of operators. + assert _domain_range is not None + domain_range = _domain_range + left_domain = np.array(domain_range)[:, 0] + len_arg1 = len(arg1(left_domain)) + len_arg2 = len(arg2(left_domain)) def integrand(*args: np.ndarray) -> np.ndarray: # noqa: WPS430 f1 = arg1(args)[:, 0, :] @@ -461,13 +489,13 @@ def integrand(*args: np.ndarray) -> np.ndarray: # noqa: WPS430 integral = nquad_vec( integrand, - arg1.domain_range, + domain_range, ) summation = np.sum(integral, axis=-1) if _matrix: - summation = summation.reshape((len(arg1), len(arg2))) + summation = summation.reshape((len_arg1, len_arg2)) return summation diff --git a/skfda/misc/operators/__init__.py b/skfda/misc/operators/__init__.py index 62d78e994..7cac49f18 100644 --- a/skfda/misc/operators/__init__.py +++ b/skfda/misc/operators/__init__.py @@ -1,6 +1,10 @@ +"""Operators applicable to functional data.""" from ._identity import Identity from ._integral_transform import IntegralTransform from ._linear_differential_operator import LinearDifferentialOperator -from ._operators import (Operator, gramian_matrix, - gramian_matrix_optimization, - MatrixOperator) +from ._operators import ( + MatrixOperator, + Operator, + gramian_matrix, + gramian_matrix_optimization, +) diff --git a/skfda/misc/operators/_identity.py b/skfda/misc/operators/_identity.py index d2d3ac259..4ef00b66e 100644 --- a/skfda/misc/operators/_identity.py +++ b/skfda/misc/operators/_identity.py @@ -1,11 +1,18 @@ +from __future__ import annotations + +from typing import Any, TypeVar + import numpy as np from ...representation import FDataGrid +from ...representation._typing import Vector from ...representation.basis import Basis from ._operators import Operator, gramian_matrix_optimization +T = TypeVar("T", bound=Vector) -class Identity(Operator): + +class Identity(Operator[T, T]): """Identity operator. Linear operator that returns its input. @@ -17,22 +24,25 @@ class Identity(Operator): """ - def __call__(self, f): + def __call__(self, f: T) -> T: # noqa: D102 return f @gramian_matrix_optimization.register def basis_penalty_matrix_optimized( - linear_operator: Identity, - basis: Basis): - + linear_operator: Identity[Any], + basis: Basis, +) -> np.ndarray: + """Optimized version of the penalty matrix for Basis.""" return basis.gram_matrix() @gramian_matrix_optimization.register def fdatagrid_penalty_matrix_optimized( - linear_operator: Identity, - basis: FDataGrid): + linear_operator: Identity[Any], + basis: FDataGrid, +) -> np.ndarray: + """Optimized version of the penalty matrix for FDataGrid.""" from ..metrics import l2_norm return np.diag(l2_norm(basis)**2) diff --git a/skfda/misc/operators/_integral_transform.py b/skfda/misc/operators/_integral_transform.py index aab01d5ad..943718de8 100644 --- a/skfda/misc/operators/_integral_transform.py +++ b/skfda/misc/operators/_integral_transform.py @@ -1,33 +1,52 @@ +from __future__ import annotations + +from typing import Callable + +import numpy as np + import scipy.integrate +from ...representation import FData from ._operators import Operator -class IntegralTransform(Operator): +class IntegralTransform(Operator[FData, Callable[[np.ndarray], np.ndarray]]): """Integral operator. - - - Attributes: - kernel_function (callable): Kernel function corresponding to - the operator. + Parameters: + kernel_function: Kernel function corresponding to the operator. """ - def __init__(self, kernel_function): + def __init__( + self, + kernel_function: Callable[[np.ndarray, np.ndarray], np.ndarray], + ) -> None: self.kernel_function = kernel_function - def __call__(self, f): + def __call__( # noqa: D102 + self, + f: FData, + ) -> Callable[[np.ndarray], np.ndarray]: - def evaluate_covariance(points): + def evaluate_covariance( # noqa: WPS430 + points: np.ndarray, + ) -> np.ndarray: - def integral_body(integration_var): - return (f(integration_var) * - self.kernel_function(integration_var, points)) + def integral_body( # noqa: WPS430 + integration_var: np.ndarray, + ) -> np.ndarray: + return ( + f(integration_var) + * self.kernel_function(integration_var, points) + ) domain_range = f.domain_range[0] return scipy.integrate.quad_vec( - integral_body, domain_range[0], domain_range[1])[0] + integral_body, + domain_range[0], + domain_range[1], + )[0] return evaluate_covariance diff --git a/skfda/misc/operators/_linear_differential_operator.py b/skfda/misc/operators/_linear_differential_operator.py index 2597b6740..aaef7c1e5 100644 --- a/skfda/misc/operators/_linear_differential_operator.py +++ b/skfda/misc/operators/_linear_differential_operator.py @@ -1,35 +1,71 @@ +from __future__ import annotations + import numbers +from typing import Callable, Optional, Sequence, Tuple, Union, cast import numpy as np -import scipy.integrate from numpy import polyder, polyint, polymul, polyval + +import scipy.integrate from scipy.interpolate import PPoly -from ..._utils import _FDataCallable, _same_domain -from ...representation import FDataGrid -from ...representation.basis import BSpline, Constant, Fourier, Monomial +from ..._utils import _same_domain +from ...representation import FData, FDataGrid +from ...representation._typing import DomainRangeLike +from ...representation.basis import ( + BSpline, + Constant, + FDataBasis, + Fourier, + Monomial, +) from ._operators import Operator, gramian_matrix_optimization -__author__ = "Pablo Pérez Manso" -__email__ = "92manso@gmail.com" +Order = int + +WeightSequence = Sequence[ + Union[ + float, + Callable[[np.ndarray], np.ndarray], + ], +] -class LinearDifferentialOperator(Operator): - """Defines the structure of a linear differential operator function system +class LinearDifferentialOperator( + Operator[FData, Callable[[np.ndarray], np.ndarray]], +): + r""" + Defines the structure of a linear differential operator function system. .. math:: Lx(t) = b_0(t) x(t) + b_1(t) x'(x) + - \\dots + b_{n-1}(t) d^{n-1}(x(t)) + b_n(t) d^n(x(t)) + \dots + b_{n-1}(t) d^{n-1}(x(t)) + b_n(t) d^n(x(t)) Can only be applied to functional data, as multivariate data has no derivatives. - Attributes: + You have to provide either order or weights. + If both are provided, it will raise an error. + If a positional argument is supplied it will be considered the + order if it is an integral type and the weights otherwise. + + Parameters: + order (int, optional): the order of the operator. It's the highest + derivative order of the operator + + weights (list, optional): A FDataBasis objects list of length + order + 1 items + domain_range (tuple or list of tuples, optional): Definition + of the interval where the weight functions are + defined. If the functional weights are specified + and this is not, takes the domain range from them. + Otherwise, defaults to (0,1). + + Attributes: weights (list): A list of callables. Examples: - Create a linear differential operator that penalizes the second derivative (acceleration) @@ -39,19 +75,7 @@ class LinearDifferentialOperator(Operator): >>> >>> LinearDifferentialOperator(2) LinearDifferentialOperator( - weights=[ - FDataBasis( - basis=Constant(domain_range=((0, 1),), n_basis=1), - coefficients=[[ 0.]], - ...), - FDataBasis( - basis=Constant(domain_range=((0, 1),), n_basis=1), - coefficients=[[ 0.]], - ...), - FDataBasis( - basis=Constant(domain_range=((0, 1),), n_basis=1), - coefficients=[[ 1.]], - ...)] + weights=(0, 0, 1), ) Create a linear differential operator that penalizes three times @@ -59,19 +83,7 @@ class LinearDifferentialOperator(Operator): >>> LinearDifferentialOperator(weights=[0, 2, 3]) LinearDifferentialOperator( - weights=[ - FDataBasis( - basis=Constant(domain_range=((0, 1),), n_basis=1), - coefficients=[[ 0.]], - ...), - FDataBasis( - basis=Constant(domain_range=((0, 1),), n_basis=1), - coefficients=[[ 2.]], - ...), - FDataBasis( - basis=Constant(domain_range=((0, 1),), n_basis=1), - coefficients=[[ 3.]], - ...)] + weights=(0, 2, 3), ) Create a linear differential operator with non-constant weights. @@ -83,124 +95,95 @@ class LinearDifferentialOperator(Operator): ... FDataBasis(monomial, [1., 2., 3.])] >>> LinearDifferentialOperator(weights=fdlist) LinearDifferentialOperator( - weights=[ - FDataBasis( - basis=Constant(domain_range=((0, 1),), n_basis=1), - coefficients=[[ 0.]], - ...), - FDataBasis( - basis=Constant(domain_range=((0, 1),), n_basis=1), - coefficients=[[ 0.]], - ...), - FDataBasis( - basis=Monomial(domain_range=((0, 1),), n_basis=3), - coefficients=[[ 1. 2. 3.]], - ...)] + weights=(FDataBasis( + basis=Constant(domain_range=((0, 1),), n_basis=1), + coefficients=[[ 0.]], + ...), + FDataBasis( + basis=Constant(domain_range=((0, 1),), n_basis=1), + coefficients=[[ 0.]], + ...), + FDataBasis( + basis=Monomial(domain_range=((0, 1),), n_basis=3), + coefficients=[[ 1. 2. 3.]], + ...)), ) """ def __init__( - self, order_or_weights=None, *, order=None, weights=None, - domain_range=None): - """Constructor. You have to provide either order or weights. - If both are provided, it will raise an error. - If a positional argument is supplied it will be considered the - order if it is an integral type and the weights otherwise. - - Args: - order (int, optional): the order of the operator. It's the highest - derivative order of the operator - - weights (list, optional): A FDataBasis objects list of length - order + 1 items - - domain_range (tuple or list of tuples, optional): Definition - of the interval where the weight functions are - defined. If the functional weights are specified - and this is not, takes the domain range from them. - Otherwise, defaults to (0,1). - - """ - - from ...representation.basis import FDataBasis + self, + order_or_weights: Union[Order, WeightSequence, None] = None, + *, + order: Optional[int] = None, + weights: Optional[WeightSequence] = None, + domain_range: Optional[DomainRangeLike] = None, + ) -> None: num_args = sum( - [a is not None for a in [order_or_weights, order, weights]]) + a is not None for a in (order_or_weights, order, weights) + ) if num_args > 1: - raise ValueError("You have to provide the order or the weights, " - "not both") - - real_domain_range = (domain_range if domain_range is not None - else (0, 1)) + raise ValueError( + "You have to provide the order or the weights, not both.", + ) if order_or_weights is not None: if isinstance(order_or_weights, numbers.Integral): - order = order_or_weights + order = int(order_or_weights) else: + assert isinstance(order_or_weights, Sequence) weights = order_or_weights if order is None and weights is None: - self.weights = (FDataBasis(Constant(real_domain_range), 0),) + weights = (0,) - elif weights is None: + elif order is not None: if order < 0: - raise ValueError("Order should be an non-negative integer") - - self.weights = [ - FDataBasis(Constant(real_domain_range), - 0 if (i < order) else 1) - for i in range(order + 1)] - - else: - if len(weights) == 0: - raise ValueError("You have to provide one weight at least") - - if all(isinstance(n, numbers.Real) for n in weights): - self.weights = list(FDataBasis(Constant(real_domain_range), - np.array(weights) - .reshape(-1, 1))) - - elif all(isinstance(n, FDataBasis) for n in weights): - if all([_same_domain(weights[0], x) - and x.n_samples == 1 for x in weights]): - self.weights = weights - - real_domain_range = weights[0].domain_range - if (domain_range is not None - and real_domain_range != domain_range): - raise ValueError("The domain range provided for the " - "linear operator does not match the " - "domain range of the weights") - - else: - raise ValueError("FDataBasis objects in the list have " - "not the same domain_range") - - else: - raise ValueError("The elements of the list are neither " - "integers or FDataBasis objects") - - self.domain_range = real_domain_range - - def __repr__(self): - """Representation of linear differential operator object.""" - - bwtliststr = "" - for w in self.weights: - bwtliststr = bwtliststr + "\n" + repr(w) + "," - - return (f"{self.__class__.__name__}(" - f"\nweights=[{bwtliststr[:-1]}]" - f"\n)").replace('\n', '\n ') - - def __eq__(self, other): - """Equality of linear differential operator objects""" - return (self.weights == other.weights) + raise ValueError("Order should be an non-negative integer.") + + weights = tuple( + 0 if (i < order) else 1 + for i in range(order + 1) + ) + + assert weights is not None + if len(weights) == 0: + raise ValueError("You have to provide one weight at least.") + + # Check domain ranges + for w in weights: + w_domain_range = getattr(w, "domain_range", None) + + if w_domain_range is not None: + if domain_range is None: + domain_range = w_domain_range + elif not np.array_equal(w_domain_range, domain_range): + raise ValueError( + "Weights with wrong domain range.", + ) + self.weights = tuple(weights) + + def __repr__(self) -> str: + + return ( + f"{self.__class__.__name__}(\n" + f"\tweights={self.weights},\n" + f")" + ).replace('\n', '\n ') + + def __eq__(self, other: object) -> bool: + + return ( + isinstance(other, LinearDifferentialOperator) + and self.weights == other.weights + ) - def constant_weights(self): + def constant_weights(self) -> Optional[np.ndarray]: """ + Return constant weights. + Return the scalar weights of the linear differential operator if they are constant basis. Otherwise, return None. @@ -210,25 +193,26 @@ def constant_weights(self): for constant weights. """ - coefs = [w.coefficients[0, 0] if isinstance(w.basis, Constant) - else None - for w in self.weights] + weights = np.array(self.weights) - return np.array(coefs) if coefs.count(None) == 0 else None + return None if weights.dtype == np.object_ else weights - def __call__(self, f): + def __call__(self, f: FData) -> Callable[[np.ndarray], np.ndarray]: """Return the function that results of applying the operator.""" - function_derivatives = [ - f.derivative(order=i) for i, _ in enumerate(self.weights)] + f.derivative(order=i) for i, _ in enumerate(self.weights) + ] - def applied_linear_diff_op(t): - return sum(w(t) * function_derivatives[i](t) - for i, w in enumerate(self.weights)) + def applied_linear_diff_op( + t: np.ndarray, + ) -> np.ndarray: + return sum( + (w(t) if callable(w) else w) + * function_derivatives[i](t) + for i, w in enumerate(self.weights) + ) - return _FDataCallable(applied_linear_diff_op, - domain_range=f.domain_range, - n_samples=len(f)) + return applied_linear_diff_op ############################################################# @@ -240,29 +224,34 @@ def applied_linear_diff_op(t): @gramian_matrix_optimization.register def constant_penalty_matrix_optimized( - linear_operator: LinearDifferentialOperator, - basis: Constant): - + linear_operator: LinearDifferentialOperator, + basis: Constant, +) -> np.ndarray: + """Optimized version for Constant basis.""" coefs = linear_operator.constant_weights() if coefs is None: return NotImplemented - return np.array([[coefs[0] ** 2 * - (basis.domain_range[0][1] - - basis.domain_range[0][0])]]) - + return np.array([[ + coefs[0] ** 2 + * (basis.domain_range[0][1] - basis.domain_range[0][0]), + ]]) -def _monomial_evaluate_constant_linear_diff_op(basis, weights): - """ - Evaluate constant weights of a linear differential operator - over the basis functions. - """ +def _monomial_evaluate_constant_linear_diff_op( + basis: Monomial, + weights: np.ndarray, +) -> np.ndarray: + """Evaluate constant weights over the monomial basis.""" max_derivative = len(weights) - 1 seq = np.arange(basis.n_basis) - coef_mat = np.linspace(seq, seq - max_derivative + 1, - max_derivative, dtype=int) + coef_mat = np.linspace( + seq, + seq - max_derivative + 1, + max_derivative, + dtype=int, + ) # Compute coefficients for each derivative coefs = np.cumprod(coef_mat, axis=0) @@ -286,16 +275,21 @@ def _monomial_evaluate_constant_linear_diff_op(basis, weights): # The matrix is now triangular # refcheck is False to prevent exceptions while debugging weighted_coefs = np.copy(weighted_coefs.T) - weighted_coefs.resize(basis.n_basis, - basis.n_basis, refcheck=False) + weighted_coefs.resize( + basis.n_basis, + basis.n_basis, + refcheck=False, + ) weighted_coefs = weighted_coefs.T # Shift the coefficients so that they correspond to the right # exponent indexes = np.tril_indices(basis.n_basis) polynomials = np.zeros_like(weighted_coefs) - polynomials[indexes[0], indexes[1] - - indexes[0] - 1] = weighted_coefs[indexes] + polynomials[ + indexes[0], + indexes[1] - indexes[0] - 1, + ] = weighted_coefs[indexes] # At this point, each row of the matrix correspond to a polynomial # that is the result of applying the linear differential operator @@ -306,9 +300,10 @@ def _monomial_evaluate_constant_linear_diff_op(basis, weights): @gramian_matrix_optimization.register def monomial_penalty_matrix_optimized( - linear_operator: LinearDifferentialOperator, - basis: Monomial): - + linear_operator: LinearDifferentialOperator, + basis: Monomial, +) -> np.ndarray: + """Optimized version for Monomial basis.""" weights = linear_operator.constant_weights() if weights is None: return NotImplemented @@ -340,10 +335,14 @@ def monomial_penalty_matrix_optimized( integrand /= denom # Add column of zeros at the right to increase exponent - integrand = np.pad(integrand, - pad_width=((0, 0), - (0, 1)), - mode='constant') + integrand = np.pad( + integrand, + pad_width=( + (0, 0), + (0, 1), + ), + mode='constant', + ) # Now, apply Barrow's rule # polyval applies Horner method over the first dimension, @@ -364,19 +363,21 @@ def monomial_penalty_matrix_optimized( return penalty_matrix -def _fourier_penalty_matrix_optimized_orthonormal(basis, weights): - """ - Return the penalty when the basis is orthonormal. - """ - +def _fourier_penalty_matrix_optimized_orthonormal( + basis: Fourier, + weights: np.ndarray, +) -> np.ndarray: + """Return the penalty when the basis is orthonormal.""" signs = np.array([1, 1, -1, -1]) signs_expanded = np.tile(signs, len(weights) // 4 + 1) signs_odd = signs_expanded[:len(weights)] signs_even = signs_expanded[1:len(weights) + 1] - phases = (np.arange(1, (basis.n_basis - 1) // 2 + 1) * - 2 * np.pi / basis.period) + phases = ( + np.arange(1, (basis.n_basis - 1) // 2 + 1) + * 2 * np.pi / basis.period + ) # Compute increasing powers coefs_no_sign = np.vander(phases, len(weights), increasing=True) @@ -411,8 +412,14 @@ def _fourier_penalty_matrix_optimized_orthonormal(basis, weights): penalty_matrix = np.diag(main_diag) # Add row and column for the constant - penalty_matrix = np.pad(penalty_matrix, pad_width=((1, 0), (1, 0)), - mode='constant') + penalty_matrix = np.pad( + penalty_matrix, + pad_width=( + (1, 0), + (1, 0), + ), + mode='constant', + ) penalty_matrix[0, 0] = weights[0]**2 @@ -421,9 +428,10 @@ def _fourier_penalty_matrix_optimized_orthonormal(basis, weights): @gramian_matrix_optimization.register def fourier_penalty_matrix_optimized( - linear_operator: LinearDifferentialOperator, - basis: Fourier): - + linear_operator: LinearDifferentialOperator, + basis: Fourier, +) -> np.ndarray: + """Optimized version for Fourier basis.""" weights = linear_operator.constant_weights() if weights is None: return NotImplemented @@ -438,9 +446,10 @@ def fourier_penalty_matrix_optimized( @gramian_matrix_optimization.register def bspline_penalty_matrix_optimized( - linear_operator: LinearDifferentialOperator, - basis: BSpline): - + linear_operator: LinearDifferentialOperator, + basis: BSpline, +) -> np.ndarray: + """Optimized version for BSpline basis.""" coefs = linear_operator.constant_weights() if coefs is None: return NotImplemented @@ -480,7 +489,7 @@ def bspline_penalty_matrix_optimized( # representation of splines # Places m knots at the boundaries - knots = basis._evaluation_knots() + knots = np.array(basis._evaluation_knots()) # c is used the select which spline the function # PPoly.from_spline below computes @@ -516,10 +525,9 @@ def bspline_penalty_matrix_optimized( # Now for each pair of basis computes the inner product after # applying the linear differential operator penalty_matrix = np.zeros((basis.n_basis, basis.n_basis)) - for interval in range(len(no_0_intervals)): + for interval, _ in enumerate(no_0_intervals): for i in range(basis.n_basis): - poly_i = np.trim_zeros(ppoly_lst[i][:, - interval], 'f') + poly_i = np.trim_zeros(ppoly_lst[i][:, interval], 'f') if len(poly_i) <= derivative_degree: # if the order of the polynomial is lesser or # equal to the derivative the result of the @@ -531,13 +539,16 @@ def bspline_penalty_matrix_optimized( integral = polyint(square) # definite integral - penalty_matrix[i, i] += np.diff(polyval( - integral, basis.knots[interval: interval + 2] - - basis.knots[interval]))[0] + penalty_matrix[i, i] += np.diff( + polyval( + integral, + basis.knots[interval: interval + 2] + - basis.knots[interval], + ), + )[0] for j in range(i + 1, basis.n_basis): - poly_j = np.trim_zeros(ppoly_lst[j][:, - interval], 'f') + poly_j = np.trim_zeros(ppoly_lst[j][:, interval], 'f') if len(poly_j) <= derivative_degree: # if the order of the polynomial is lesser # or equal to the derivative the result of @@ -545,12 +556,18 @@ def bspline_penalty_matrix_optimized( continue # indefinite integral integral = polyint( - polymul(polyder(poly_i, derivative_degree), - polyder(poly_j, derivative_degree))) + polymul( + polyder(poly_i, derivative_degree), + polyder(poly_j, derivative_degree), + ), + ) # definite integral - penalty_matrix[i, j] += np.diff(polyval( - integral, basis.knots[interval: interval + 2] - - basis.knots[interval]) + penalty_matrix[i, j] += np.diff( + polyval( + integral, + basis.knots[interval: interval + 2] + - basis.knots[interval], + ), )[0] penalty_matrix[j, i] = penalty_matrix[i, j] return penalty_matrix @@ -558,13 +575,15 @@ def bspline_penalty_matrix_optimized( @gramian_matrix_optimization.register def fdatagrid_penalty_matrix_optimized( - linear_operator: LinearDifferentialOperator, - basis: FDataGrid): - + linear_operator: LinearDifferentialOperator, + basis: FDataGrid, +) -> np.ndarray: + """Optimized version for FDatagrid.""" evaluated_basis = sum( - w(basis.grid_points[0]) * - basis.derivative(order=i)(basis.grid_points[0]) - for i, w in enumerate(linear_operator.weights)) + w(basis.grid_points[0]) if callable(w) else w + * basis.derivative(order=i)(basis.grid_points[0]) + for i, w in enumerate(linear_operator.weights) + ) indices = np.triu_indices(basis.n_samples) product = evaluated_basis[indices[0]] * evaluated_basis[indices[1]] diff --git a/skfda/misc/operators/_operators.py b/skfda/misc/operators/_operators.py index 7781aaf46..1cd171b90 100644 --- a/skfda/misc/operators/_operators.py +++ b/skfda/misc/operators/_operators.py @@ -1,22 +1,43 @@ +from __future__ import annotations + import abc +from typing import Callable, Generic, TypeVar, Union import multimethod +import numpy as np +from ...representation import FData -class Operator(abc.ABC): - """ - Abstract class for :term:`operators`. +OperatorInput = TypeVar( + "OperatorInput", + bound=Union[np.ndarray, FData], + contravariant=True, +) - """ +OperatorOutput = TypeVar( + "OperatorOutput", + bound=Union[np.ndarray, Callable[[np.ndarray], np.ndarray]], + covariant=True, +) + + +class Operator(abc.ABC, Generic[OperatorInput, OperatorOutput]): + """Abstract class for :term:`operators`.""" @abc.abstractmethod - def __call__(self, vector): + def __call__(self, vector: OperatorInput) -> OperatorOutput: + """Evaluate the operator.""" pass @multimethod.multidispatch -def gramian_matrix_optimization(linear_operator, basis): - r""" +def gramian_matrix_optimization( + linear_operator: Callable[[OperatorInput], OperatorOutput], + basis: OperatorInput, +) -> np.ndarray: + """ + Efficient implementation of gramian_matrix. + Generic function that can be subclassed for different combinations of operator and basis in order to provide a more efficient implementation for the gramian matrix. @@ -24,8 +45,11 @@ def gramian_matrix_optimization(linear_operator, basis): return NotImplemented -def gramian_matrix_numerical(linear_operator, basis): - r""" +def gramian_matrix_numerical( + linear_operator: Callable[[OperatorInput], OperatorOutput], + basis: OperatorInput, +) -> np.ndarray: + """ Return the gramian matrix given a basis, computed numerically. This method should work for every linear operator. @@ -35,10 +59,15 @@ def gramian_matrix_numerical(linear_operator, basis): evaluated_basis = linear_operator(basis) - return inner_product_matrix(evaluated_basis) + domain_range = getattr(basis, "domain_range", None) + + return inner_product_matrix(evaluated_basis, _domain_range=domain_range) -def gramian_matrix(linear_operator, basis): +def gramian_matrix( + linear_operator: Callable[[OperatorInput], OperatorOutput], + basis: OperatorInput, +) -> np.ndarray: r""" Return the gramian matrix given a basis. @@ -55,7 +84,6 @@ def gramian_matrix(linear_operator, basis): falling back to a numerical computation otherwise. """ - # Try to use a more efficient implementation matrix = gramian_matrix_optimization(linear_operator, basis) if matrix is not NotImplemented: @@ -64,20 +92,19 @@ def gramian_matrix(linear_operator, basis): return gramian_matrix_numerical(linear_operator, basis) -class MatrixOperator(Operator): +class MatrixOperator(Operator[np.ndarray, np.ndarray]): """Linear operator for finite spaces. Between finite dimensional spaces, every linear operator can be expressed as a product by a matrix. Attributes: - matrix (array-like object): The matrix containing the linear - transformation. + matrix: The matrix containing the linear transformation. """ - def __init__(self, matrix): + def __init__(self, matrix: np.ndarray) -> None: self.matrix = matrix - def __call__(self, f): + def __call__(self, f: np.ndarray) -> np.ndarray: # noqa: D102 return self.matrix @ f diff --git a/tests/test_linear_differential_operator.py b/tests/test_linear_differential_operator.py index d871f70bc..e676de8b7 100644 --- a/tests/test_linear_differential_operator.py +++ b/tests/test_linear_differential_operator.py @@ -1,14 +1,25 @@ +"""Tests of the LinearDifferentialOperator.""" + import unittest +from typing import Callable, Sequence, Union import numpy as np from skfda.misc.operators import LinearDifferentialOperator from skfda.representation.basis import Constant, FDataBasis, Monomial +WeightCallable = Callable[[np.ndarray], np.ndarray] -class TestLinearDifferentialOperator(unittest.TestCase): - def _assert_equal_weights(self, weights, weights2, msg): +class TestLinearDifferentialOperator(unittest.TestCase): + """Tests of the linear differential operator.""" + + def _assert_equal_weights( + self, + weights: Sequence[Union[float, WeightCallable]], + weights2: Sequence[Union[float, WeightCallable]], + msg: str, + ) -> None: self.assertEqual(len(weights), len(weights2), msg) for w, w2 in zip(weights, weights2): @@ -20,59 +31,57 @@ def _assert_equal_weights(self, weights, weights2, msg): else: self.assertTrue(eq(w2), msg) - def test_init_default(self): + def test_init_default(self) -> None: """Tests default initialization (do not penalize).""" lfd = LinearDifferentialOperator() - weightfd = [FDataBasis(Constant(domain_range=(0, 1)), 0)] + weights = [0] self._assert_equal_weights( - lfd.weights, weightfd, - "Wrong list of weight functions of the linear operator") + lfd.weights, + weights, + "Wrong list of weight functions of the linear operator", + ) - def test_init_integer(self): + def test_init_integer(self) -> None: """Tests initializations which only specify the order.""" - # Checks for a zero order Lfd object lfd_0 = LinearDifferentialOperator(order=0) - weightfd = [FDataBasis(Constant(domain_range=(0, 1)), 1)] + weights = [1] self._assert_equal_weights( - lfd_0.weights, weightfd, - "Wrong list of weight functions of the linear operator") + lfd_0.weights, + weights, + "Wrong list of weight functions of the linear operator", + ) # Checks for a non zero order Lfd object lfd_3 = LinearDifferentialOperator(3) - consfd = FDataBasis( - Constant(domain_range=(0, 1)), - [[0], [0], [0], [1]], - ) - bwtlist3 = list(consfd) + weights = [0, 0, 0, 1] self._assert_equal_weights( - lfd_3.weights, bwtlist3, - "Wrong list of weight functions of the linear operator") + lfd_3.weights, + weights, + "Wrong list of weight functions of the linear operator", + ) # Negative order must fail with np.testing.assert_raises(ValueError): LinearDifferentialOperator(-1) - def test_init_list_int(self): + def test_init_list_int(self) -> None: """Tests initializations with integer weights.""" + weights = [1, 3, 4, 5, 6, 7] - coefficients = [1, 3, 4, 5, 6, 7] - - constant = Constant((0, 1)) - fd = FDataBasis(constant, np.array(coefficients).reshape(-1, 1)) - - lfd = LinearDifferentialOperator(weights=coefficients) + lfd = LinearDifferentialOperator(weights=weights) self._assert_equal_weights( - lfd.weights, list(fd), - "Wrong list of weight functions of the linear operator") + lfd.weights, + weights, + "Wrong list of weight functions of the linear operator", + ) - def test_init_list_fdatabasis(self): + def test_init_list_fdatabasis(self) -> None: """Test initialization with functional weights.""" - n_basis = 4 n_weights = 6 @@ -86,8 +95,10 @@ def test_init_list_fdatabasis(self): lfd = LinearDifferentialOperator(weights=fdlist) self._assert_equal_weights( - lfd.weights, list(fd), - "Wrong list of weight functions of the linear operator") + lfd.weights, + list(fd), + "Wrong list of weight functions of the linear operator", + ) # Check failure if intervals do not match constant = Constant(domain_range=(0, 2)) @@ -95,8 +106,8 @@ def test_init_list_fdatabasis(self): with np.testing.assert_raises(ValueError): LinearDifferentialOperator(weights=fdlist) - def test_init_wrong_params(self): - + def test_init_wrong_params(self) -> None: + """Check invalid parameters.""" # Check specifying both arguments fail with np.testing.assert_raises(ValueError): LinearDifferentialOperator(1, weights=[1, 1]) @@ -106,17 +117,11 @@ def test_init_wrong_params(self): fdlist = [FDataBasis(monomial, [1, 2, 3])] with np.testing.assert_raises(ValueError): - LinearDifferentialOperator(weights=fdlist, - domain_range=(0, 2)) - - # Check wrong types fail - with np.testing.assert_raises(ValueError): - LinearDifferentialOperator(weights=['a']) - - with np.testing.assert_raises(ValueError): - LinearDifferentialOperator(weights='a') + LinearDifferentialOperator( + weights=fdlist, + domain_range=(0, 2), + ) if __name__ == '__main__': - print() unittest.main() From 46f8350bcc31379893d313d3287d851b6632af64 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 8 May 2021 16:20:17 +0200 Subject: [PATCH 327/417] Typed regularization. --- setup.cfg | 2 +- skfda/misc/operators/_operators.py | 14 ++-- skfda/misc/regularization/__init__.py | 8 +- skfda/misc/regularization/_regularization.py | 79 ++++++++++++++------ 4 files changed, 70 insertions(+), 33 deletions(-) diff --git a/setup.cfg b/setup.cfg index 86da72b2d..4fa33edcf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -106,7 +106,7 @@ rst-directives = versionadded,versionchanged, rst-roles = - attr,class,func,meth,mod,obj,ref,term, + attr,class,doc,func,meth,mod,obj,ref,term, allowed-domain-names = data, obj, result, results, val, value, values, var diff --git a/skfda/misc/operators/_operators.py b/skfda/misc/operators/_operators.py index 1cd171b90..ffc5b5fa7 100644 --- a/skfda/misc/operators/_operators.py +++ b/skfda/misc/operators/_operators.py @@ -1,16 +1,18 @@ from __future__ import annotations import abc -from typing import Callable, Generic, TypeVar, Union +from typing import Any, Callable, TypeVar, Union import multimethod import numpy as np +from typing_extensions import Protocol from ...representation import FData +from ...representation.basis import Basis OperatorInput = TypeVar( "OperatorInput", - bound=Union[np.ndarray, FData], + bound=Union[np.ndarray, FData, Basis], contravariant=True, ) @@ -21,7 +23,7 @@ ) -class Operator(abc.ABC, Generic[OperatorInput, OperatorOutput]): +class Operator(Protocol[OperatorInput, OperatorOutput]): """Abstract class for :term:`operators`.""" @abc.abstractmethod @@ -32,7 +34,7 @@ def __call__(self, vector: OperatorInput) -> OperatorOutput: @multimethod.multidispatch def gramian_matrix_optimization( - linear_operator: Callable[[OperatorInput], OperatorOutput], + linear_operator: Any, basis: OperatorInput, ) -> np.ndarray: """ @@ -46,7 +48,7 @@ def gramian_matrix_optimization( def gramian_matrix_numerical( - linear_operator: Callable[[OperatorInput], OperatorOutput], + linear_operator: Operator[OperatorInput, OperatorOutput], basis: OperatorInput, ) -> np.ndarray: """ @@ -65,7 +67,7 @@ def gramian_matrix_numerical( def gramian_matrix( - linear_operator: Callable[[OperatorInput], OperatorOutput], + linear_operator: Operator[OperatorInput, OperatorOutput], basis: OperatorInput, ) -> np.ndarray: r""" diff --git a/skfda/misc/regularization/__init__.py b/skfda/misc/regularization/__init__.py index 01f89d797..769366cff 100644 --- a/skfda/misc/regularization/__init__.py +++ b/skfda/misc/regularization/__init__.py @@ -1,3 +1,5 @@ -from ._regularization import (TikhonovRegularization, - L2Regularization, - compute_penalty_matrix) +from ._regularization import ( + L2Regularization, + TikhonovRegularization, + compute_penalty_matrix, +) diff --git a/skfda/misc/regularization/_regularization.py b/skfda/misc/regularization/_regularization.py index 42a496ac6..4928cb1cd 100644 --- a/skfda/misc/regularization/_regularization.py +++ b/skfda/misc/regularization/_regularization.py @@ -1,14 +1,24 @@ -from collections.abc import Iterable +from __future__ import annotations + import itertools -from skfda.misc.operators import gramian_matrix, Identity +from typing import Any, Generic, Iterable, Union -import scipy.linalg +import numpy as np from sklearn.base import BaseEstimator -import numpy as np +import scipy.linalg +from skfda.misc.operators import Identity, gramian_matrix +from ...representation import FData +from ...representation.basis import Basis +from ..operators import Operator +from ..operators._operators import OperatorInput -class TikhonovRegularization(BaseEstimator): + +class TikhonovRegularization( + BaseEstimator, # type: ignore + Generic[OperatorInput], +): r""" Implements Tikhonov regularization. @@ -33,7 +43,6 @@ class TikhonovRegularization(BaseEstimator): penalization. Examples: - Construct a regularization that penalizes the second derivative, which is a measure of the curvature of the function. @@ -77,21 +86,29 @@ class TikhonovRegularization(BaseEstimator): """ - def __init__(self, linear_operator, - *, regularization_parameter=1): + def __init__( + self, + linear_operator: Operator[OperatorInput, Any], + *, + regularization_parameter: float = 1, + ) -> None: self.linear_operator = linear_operator self.regularization_parameter = regularization_parameter - def penalty_matrix(self, basis): - r""" - Return a penalty matrix for ordinary least squares. - - """ + def penalty_matrix( + self, + basis: OperatorInput, + ) -> np.ndarray: + """Return a penalty matrix for ordinary least squares.""" return self.regularization_parameter * gramian_matrix( - self.linear_operator, basis) + self.linear_operator, + basis, + ) -class L2Regularization(TikhonovRegularization): +class L2Regularization( + TikhonovRegularization[Union[np.ndarray, FData, Basis]], +): r""" Implements :math:`L_2` regularization. @@ -113,16 +130,28 @@ class L2Regularization(TikhonovRegularization): """ - def __init__(self, *, regularization_parameter=1): + def __init__( + self, + *, + regularization_parameter: float = 1, + ) -> None: return super().__init__( linear_operator=Identity(), - regularization_parameter=regularization_parameter) + regularization_parameter=regularization_parameter, + ) + + +BasisTypes = Union[np.ndarray, FData, Basis] +Regularization = TikhonovRegularization[Any] -def compute_penalty_matrix(basis_iterable, regularization_parameter, - regularization): +def compute_penalty_matrix( + basis_iterable: Iterable[BasisTypes], + regularization_parameter: Union[float, Iterable[float]], + regularization: Union[None, Regularization, Iterable[Regularization]], +) -> Union[float, np.ndarray]: """ - Computes the regularization matrix for a linear differential operator. + Compute the regularization matrix for a linear differential operator. X can be a list of mixed data. @@ -137,13 +166,17 @@ def compute_penalty_matrix(basis_iterable, regularization_parameter, if not isinstance(regularization_parameter, Iterable): regularization_parameter = itertools.repeat( - regularization_parameter) + regularization_parameter, + ) penalty_blocks = [ np.zeros((len(b), len(b))) if r is None else a * r.penalty_matrix(b) - for b, r, a in zip(basis_iterable, regularization, - regularization_parameter)] + for b, r, a in zip( + basis_iterable, + regularization, + regularization_parameter, + )] penalty_matrix = scipy.linalg.block_diag(*penalty_blocks) return penalty_matrix From 69631036117caf115387d799d38a8d85effd956b Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 10 May 2021 11:08:26 +0200 Subject: [PATCH 328/417] Improve style of regularization tests. --- skfda/misc/operators/_operators.py | 8 +- tests/test_regularization.py | 239 +++++++++++++++++++---------- 2 files changed, 162 insertions(+), 85 deletions(-) diff --git a/skfda/misc/operators/_operators.py b/skfda/misc/operators/_operators.py index ffc5b5fa7..c95e4d32d 100644 --- a/skfda/misc/operators/_operators.py +++ b/skfda/misc/operators/_operators.py @@ -16,9 +16,11 @@ contravariant=True, ) +OutputType = Union[np.ndarray, Callable[[np.ndarray], np.ndarray]] + OperatorOutput = TypeVar( "OperatorOutput", - bound=Union[np.ndarray, Callable[[np.ndarray], np.ndarray]], + bound=OutputType, covariant=True, ) @@ -48,7 +50,7 @@ def gramian_matrix_optimization( def gramian_matrix_numerical( - linear_operator: Operator[OperatorInput, OperatorOutput], + linear_operator: Operator[OperatorInput, OutputType], basis: OperatorInput, ) -> np.ndarray: """ @@ -67,7 +69,7 @@ def gramian_matrix_numerical( def gramian_matrix( - linear_operator: Operator[OperatorInput, OperatorOutput], + linear_operator: Operator[OperatorInput, OutputType], basis: OperatorInput, ) -> np.ndarray: r""" diff --git a/tests/test_regularization.py b/tests/test_regularization.py index f9b8b1db9..41f839829 100644 --- a/tests/test_regularization.py +++ b/tests/test_regularization.py @@ -1,26 +1,48 @@ -import skfda -from skfda.misc.operators import LinearDifferentialOperator, gramian_matrix -from skfda.misc.operators._linear_differential_operator import ( - _monomial_evaluate_constant_linear_diff_op) -from skfda.misc.operators._operators import gramian_matrix_numerical -from skfda.misc.regularization import TikhonovRegularization, L2Regularization -from skfda.ml.regression import LinearRegression -from skfda.representation.basis import Constant, Monomial, BSpline, Fourier +"""Test regularization methods.""" +from __future__ import annotations + import unittest import warnings +from typing import Callable, Optional, Sequence, Union +import numpy as np from sklearn.datasets import make_regression from sklearn.linear_model import Ridge from sklearn.model_selection._split import train_test_split -import numpy as np +import skfda +from skfda.misc.operators import LinearDifferentialOperator, gramian_matrix +from skfda.misc.operators._linear_differential_operator import ( + _monomial_evaluate_constant_linear_diff_op, +) +from skfda.misc.operators._operators import gramian_matrix_numerical +from skfda.misc.regularization import L2Regularization, TikhonovRegularization +from skfda.ml.regression import LinearRegression +from skfda.representation.basis import ( + Basis, + BSpline, + Constant, + Fourier, + Monomial, +) +LinearDifferentialOperatorInput = Union[ + int, + Sequence[Union[float, Callable[[np.ndarray], np.ndarray]]], + None, +] -class TestLinearDifferentialOperatorRegularization(unittest.TestCase): - # def setUp(self): could be defined for set up before any test +class TestLinearDifferentialOperatorRegularization(unittest.TestCase): + """Test linear differential operator penalty with different bases.""" - def _test_penalty(self, basis, linear_diff_op, atol=0, result=None): + def _test_penalty( + self, + basis: Basis, + linear_diff_op: LinearDifferentialOperatorInput, + atol: float = 0, + result: Optional[np.ndarray] = None, + ) -> None: operator = LinearDifferentialOperator(linear_diff_op) @@ -30,73 +52,93 @@ def _test_penalty(self, basis, linear_diff_op, atol=0, result=None): np.testing.assert_allclose( penalty, numerical_penalty, - atol=atol + atol=atol, ) if result is not None: np.testing.assert_allclose( penalty, result, - atol=atol + atol=atol, ) - def test_constant_penalty(self): + def test_constant_penalty(self) -> None: + """Test penalty for Constant basis.""" basis = Constant(domain_range=(0, 3)) res = np.array([[12]]) self._test_penalty(basis, linear_diff_op=[2, 3, 4], result=res) - def test_monomial_linear_diff_op(self): + def test_monomial_linear_diff_op(self) -> None: + """Test directly the penalty for Monomial basis.""" n_basis = 5 basis = Monomial(n_basis=n_basis) linear_diff_op = [3] - res = np.array([[0., 0., 0., 0., 3.], - [0., 0., 0., 3., 0.], - [0., 0., 3., 0., 0.], - [0., 3., 0., 0., 0.], - [3., 0., 0., 0., 0.]]) + res = np.array([ + [0, 0, 0, 0, 3], + [0, 0, 0, 3, 0], + [0, 0, 3, 0, 0], + [0, 3, 0, 0, 0], + [3, 0, 0, 0, 0], + ]) np.testing.assert_allclose( - _monomial_evaluate_constant_linear_diff_op(basis, linear_diff_op), - res + _monomial_evaluate_constant_linear_diff_op( + basis, + np.array(linear_diff_op), + ), + res, ) linear_diff_op = [3, 2] - res = np.array([[0., 0., 0., 0., 3.], - [0., 0., 0., 3., 2.], - [0., 0., 3., 4., 0.], - [0., 3., 6., 0., 0.], - [3., 8., 0., 0., 0.]]) + res = np.array([ + [0, 0, 0, 0, 3], + [0, 0, 0, 3, 2], + [0, 0, 3, 4, 0], + [0, 3, 6, 0, 0], + [3, 8, 0, 0, 0], + ]) np.testing.assert_allclose( - _monomial_evaluate_constant_linear_diff_op(basis, linear_diff_op), - res + _monomial_evaluate_constant_linear_diff_op( + basis, + np.array(linear_diff_op), + ), + res, ) linear_diff_op = [3, 0, 5] - res = np.array([[0., 0., 0., 0., 3.], - [0., 0., 0., 3., 0.], - [0., 0., 3., 0., 10.], - [0., 3., 0., 30., 0.], - [3., 0., 60., 0., 0.]]) + res = np.array([ + [0, 0, 0, 0, 3], + [0, 0, 0, 3, 0], + [0, 0, 3, 0, 10], + [0, 3, 0, 30, 0], + [3, 0, 60, 0, 0], + ]) np.testing.assert_allclose( - _monomial_evaluate_constant_linear_diff_op(basis, linear_diff_op), - res + _monomial_evaluate_constant_linear_diff_op( + basis, + np.array(linear_diff_op), + ), + res, ) - def test_monomial_penalty(self): + def test_monomial_penalty(self) -> None: + """Test penalty for Monomial basis.""" basis = Monomial(n_basis=5, domain_range=(0, 3)) # Theorethical result - res = np.array([[0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0.], - [0., 0., 12., 54., 216.], - [0., 0., 54., 324., 1458.], - [0., 0., 216., 1458., 6998.4]]) + res = np.array([ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 12, 54, 216], + [0, 0, 54, 324, 1458], + [0, 0, 216, 1458, 6998.4], + ]) self._test_penalty(basis, linear_diff_op=2, result=res) @@ -108,14 +150,17 @@ def test_monomial_penalty(self): self._test_penalty(basis, linear_diff_op=1) self._test_penalty(basis, linear_diff_op=27) - def test_fourier_penalty(self): + def test_fourier_penalty(self) -> None: + """Test penalty for Fourier basis.""" basis = Fourier(n_basis=5) - res = np.array([[0., 0., 0., 0., 0.], - [0., 1558.55, 0., 0., 0.], - [0., 0., 1558.55, 0., 0.], - [0., 0., 0., 24936.73, 0.], - [0., 0., 0., 0., 24936.73]]) + res = np.array([ + [0, 0, 0, 0, 0], + [0, 1558.55, 0, 0, 0], + [0, 0, 1558.55, 0, 0], + [0, 0, 0, 24936.73, 0], + [0, 0, 0, 0, 24936.73], + ]) # Those comparisons require atol as there are zeros involved self._test_penalty(basis, linear_diff_op=2, atol=0.01, result=res) @@ -127,14 +172,17 @@ def test_fourier_penalty(self): self._test_penalty(basis, linear_diff_op=1, atol=1e-7) self._test_penalty(basis, linear_diff_op=3, atol=1e-7) - def test_bspline_penalty(self): + def test_bspline_penalty(self) -> None: + """Test penalty for BSpline basis.""" basis = BSpline(n_basis=5) - res = np.array([[96., -132., 24., 12., 0.], - [-132., 192., -48., -24., 12.], - [24., -48., 48., -48., 24.], - [12., -24., -48., 192., -132.], - [0., 12., 24., -132., 96.]]) + res = np.array([ + [96, -132, 24, 12, 0], + [-132, 192, -48, -24, 12], + [24, -48, 48, -48, 24], + [12, -24, -48, 192, -132], + [0, 12, 24, -132, 96], + ]) self._test_penalty(basis, linear_diff_op=2, result=res) @@ -149,14 +197,17 @@ def test_bspline_penalty(self): basis = BSpline(n_basis=16, order=8) self._test_penalty(basis, linear_diff_op=0, atol=1e-7) - def test_bspline_penalty_special_case(self): + def test_bspline_penalty_special_case(self) -> None: + """Test for behavior like in issue #185.""" basis = BSpline(n_basis=5) - res = np.array([[1152., -2016., 1152., -288., 0.], - [-2016., 3600., -2304., 1008., -288.], - [1152., -2304., 2304., -2304., 1152.], - [-288., 1008., -2304., 3600., -2016.], - [0., -288., 1152., -2016., 1152.]]) + res = np.array([ + [1152, -2016, 1152, -288, 0], + [-2016, 3600, -2304, 1008, -288], + [1152, -2304, 2304, -2304, 1152], + [-288, 1008, -2304, 3600, -2016], + [0, -288, 1152, -2016, 1152], + ]) operator = LinearDifferentialOperator(basis.order - 1) penalty = gramian_matrix(operator, basis) @@ -164,63 +215,81 @@ def test_bspline_penalty_special_case(self): np.testing.assert_allclose( penalty, - res + res, ) np.testing.assert_allclose( numerical_penalty, - res + res, ) class TestEndpointsDifferenceRegularization(unittest.TestCase): + """Test regularization with a callable.""" - def test_basis_conversion(self): - + def test_basis_conversion(self) -> None: + """Test that in basis smoothing.""" data_matrix = np.linspace([0, 1, 2, 3], [1, 2, 3, 4], 100) fd = skfda.FDataGrid(data_matrix.T) smoother = skfda.preprocessing.smoothing.BasisSmoother( basis=skfda.representation.basis.BSpline( - n_basis=10, domain_range=fd.domain_range), + n_basis=10, + domain_range=fd.domain_range, + ), regularization=TikhonovRegularization( - lambda x: x(1)[:, 0] - x(0)[:, 0]), - smoothing_parameter=10000) + lambda x: x(1)[:, 0] - x(0)[:, 0], + ), + smoothing_parameter=10000, + ) fd_basis = smoother.fit_transform(fd) np.testing.assert_allclose( fd_basis(0), fd_basis(1), - atol=0.001 + atol=0.001, ) class TestL2Regularization(unittest.TestCase): + """Test the L2 regularization.""" - def test_multivariate(self): + def test_multivariate(self) -> None: + """Test that it works with multivariate inputs.""" - def ignore_scalar_warning(): + def ignore_scalar_warning() -> None: # noqa: WPS430 warnings.filterwarnings( - "ignore", category=UserWarning, - message="All the covariates are scalar.") + "ignore", + category=UserWarning, + message="All the covariates are scalar.", + ) - X, y = make_regression(n_samples=20, n_features=10, - random_state=1, bias=3.5) + X, y = make_regression( + n_samples=20, + n_features=10, + random_state=1, + bias=3.5, + ) X_train, X_test, y_train, _ = train_test_split( - X, y, random_state=2) + X, + y, + random_state=2, + ) - for regularization_parameter in [0, 1, 10, 100]: + for regularization_parameter in (0, 1, 10, 100): with self.subTest( - regularization_parameter=regularization_parameter): + regularization_parameter=regularization_parameter, + ): sklearn_l2 = Ridge(alpha=regularization_parameter) skfda_l2 = LinearRegression( regularization=L2Regularization( - regularization_parameter=regularization_parameter), + regularization_parameter=regularization_parameter, + ), ) sklearn_l2.fit(X_train, y_train) @@ -234,10 +303,16 @@ def ignore_scalar_warning(): skfda_y_pred = skfda_l2.predict(X_test) np.testing.assert_allclose( - sklearn_l2.coef_, skfda_l2.coef_[0]) + sklearn_l2.coef_, + skfda_l2.coef_[0], + ) np.testing.assert_allclose( - sklearn_l2.intercept_, skfda_l2.intercept_) + sklearn_l2.intercept_, + skfda_l2.intercept_, + ) np.testing.assert_allclose( - sklearn_y_pred, skfda_y_pred) + sklearn_y_pred, + skfda_y_pred, + ) From 292c1a0b32298609eb8009d1ef146cb37377837a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Tue, 11 May 2021 16:48:24 +0200 Subject: [PATCH 329/417] clustering classes renamed and example corrected --- examples/plot_clustering.py | 40 ++++++++++--------- skfda/exploratory/visualization/clustering.py | 35 ++++++++++++++-- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/examples/plot_clustering.py b/examples/plot_clustering.py index f83ca5323..1e25062a1 100644 --- a/examples/plot_clustering.py +++ b/examples/plot_clustering.py @@ -12,14 +12,16 @@ # sphinx_gallery_thumbnail_number = 6 -from skfda import datasets -from skfda.exploratory.visualization.clustering import ( - plot_clusters, plot_cluster_lines, plot_cluster_bars) -from skfda.ml.clustering import KMeans, FuzzyCMeans - import matplotlib.pyplot as plt import numpy as np +from skfda import datasets +from skfda.exploratory.visualization.clustering import ( + ClusterMembershipLinesPlot, + ClusterMembershipPlot, + ClusterPlot, +) +from skfda.ml.clustering import FuzzyCMeans, KMeans ############################################################################## # First, the Canadian Weather dataset is downloaded from the package 'fda' in @@ -81,8 +83,8 @@ cluster_colors = climate_colors[np.array([0, 2, 1])] cluster_labels = climates.categories[np.array([0, 2, 1])] -plot_clusters(kmeans, fd, cluster_colors=cluster_colors, - cluster_labels=cluster_labels) +ClusterPlot(kmeans, fd, cluster_colors=cluster_colors, + cluster_labels=cluster_labels).plot() ############################################################################## # Other clustering algorithm implemented is the Fuzzy K-Means found in the @@ -106,8 +108,8 @@ # be used. It assigns each sample to the cluster whose membership value is the # greatest. -plot_clusters(fuzzy_kmeans, fd, cluster_colors=cluster_colors, - cluster_labels=cluster_labels) +ClusterPlot(fuzzy_kmeans, fd, cluster_colors=cluster_colors, + cluster_labels=cluster_labels).plot() ############################################################################## # Another plot implemented to show the results in the class @@ -119,8 +121,8 @@ colors_by_climate = colormap(climates.codes / (n_climates - 1)) -plot_cluster_lines(fuzzy_kmeans, fd, cluster_labels=cluster_labels, - sample_colors=colors_by_climate) +ClusterMembershipLinesPlot(fuzzy_kmeans, fd, cluster_labels=cluster_labels, + sample_colors=colors_by_climate).plot() ############################################################################## # Finally, the function @@ -128,8 +130,8 @@ # returns a barplot. Each sample is designated with a bar which is filled # proportionally to the membership values with the color of each cluster. -plot_cluster_bars(fuzzy_kmeans, fd, cluster_colors=cluster_colors, - cluster_labels=cluster_labels) +ClusterMembershipPlot(fuzzy_kmeans, fd, cluster_colors=cluster_colors, + cluster_labels=cluster_labels).plot() ############################################################################## # The possibility of sorting the bars according to a cluster is given @@ -137,15 +139,15 @@ # [0, n_clusters). # # We can order the data using the first cluster: -plot_cluster_bars(fuzzy_kmeans, fd, sort=0, cluster_colors=cluster_colors, - cluster_labels=cluster_labels) +ClusterMembershipPlot(fuzzy_kmeans, fd, sort=0, cluster_colors=cluster_colors, + cluster_labels=cluster_labels).plot() ############################################################################## # Using the second cluster: -plot_cluster_bars(fuzzy_kmeans, fd, sort=1, cluster_colors=cluster_colors, - cluster_labels=cluster_labels) +ClusterMembershipPlot(fuzzy_kmeans, fd, sort=1, cluster_colors=cluster_colors, + cluster_labels=cluster_labels).plot() ############################################################################## # And using the third cluster: -plot_cluster_bars(fuzzy_kmeans, fd, sort=2, cluster_colors=cluster_colors, - cluster_labels=cluster_labels) +ClusterMembershipPlot(fuzzy_kmeans, fd, sort=2, cluster_colors=cluster_colors, + cluster_labels=cluster_labels).plot() diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index 1fa23a459..f94fe24cb 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -353,9 +353,9 @@ def plot(self): return self._plot_clusters() -class ClusterPlotLines(BasePlot): +class ClusterMembershipLinesPlot(BasePlot): """ - Class ClusterPlotLines. + Class ClusterMembershipLinesPlot. Args: estimator (BaseEstimator object): estimator used to calculate the @@ -488,7 +488,36 @@ def plot(self): return self.fig -class ClusterPlotBars(BasePlot): +class ClusterMembershipPlot(BasePlot): + + """ + Class ClusterMembershipPlot. + + Args: + estimator (BaseEstimator object): estimator used to calculate the + clusters. + X (FDataGrd object): contains the samples which are grouped + into different clusters. + fig (figure object, optional): figure over which the graph is + plotted in case ax is not specified. If None and ax is also None, + the figure is initialized. + axes (axes object, optional): axis over where the graph is plotted. + If None, see param fig. + sample_colors (list of colors, optional): contains in order the colors + of each sample of the fdatagrid. + sample_labels (list of str, optional): contains in order the labels + of each sample of the fdatagrid. + cluster_labels (list of str, optional): contains in order the names of + each cluster the samples of the fdatagrid are classified into. + colormap(colormap, optional): colormap from which the colors of the + plot are taken. + x_label (str): Label for the x-axis. Defaults to "Cluster". + y_label (str): Label for the y-axis. Defaults to + "Degree of membership". + title (str, optional): Title for the figure where the clustering + results are ploted. + Defaults to "Degrees of membership of the samples to each cluster". + """ def __init__( self, From 545883e501588f9ad193d92aad006d70cde33d97 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 12 May 2021 16:55:48 +0200 Subject: [PATCH 330/417] dataset handwrit --- skfda/datasets/__init__.py | 3 +- skfda/datasets/_real_datasets.py | 92 ++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/skfda/datasets/__init__.py b/skfda/datasets/__init__.py index 7fa549473..fe0b45366 100644 --- a/skfda/datasets/__init__.py +++ b/skfda/datasets/__init__.py @@ -3,7 +3,8 @@ fetch_phoneme, fetch_growth, fetch_tecator, fetch_medflies, fetch_weather, fetch_aemet, - fetch_octane, fetch_gait) + fetch_octane, fetch_gait, + fetch_handwrit) from ._samples_generators import (make_gaussian, make_gaussian_process, make_sinusoidal_process, diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index aa3b66bb5..cc1326108 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -1206,3 +1206,95 @@ def fetch_gait( if fetch_gait.__doc__ is not None: # docstrings can be stripped off fetch_gait.__doc__ += _gait_descr + _param_descr + +_handwrit_descr = """ + Data representing the X-Y coordinates of 20 writing the "fda". + + References: + Ramsay, James O., and Silverman, Bernard W. (2006), + Functional Data Analysis, 2nd ed. , Springer, New York. +""" + + +@overload +def fetch_handwrit( + *, + return_X_y: Literal[False] = False, + as_frame: bool = False, +) -> Bunch: + pass + + +@overload +def fetch_handwrit( + *, + return_X_y: Literal[True], + as_frame: Literal[False] = False, +) -> Tuple[FDataGrid, None]: + pass + + +@overload +def fetch_handwrit( + *, + return_X_y: Literal[True], + as_frame: Literal[True], +) -> Tuple[DataFrame, None]: + pass + +def fetch_handwrit( + return_X_y: bool = False, + as_frame: bool = False, +) -> Union[Bunch, Tuple[FDataGrid, None], Tuple[DataFrame, None]]: + """ + Load the HANDWRIT dataset. + + The data is obtained from the R package 'fda' from CRAN. + + """ + descr = _handwrit_descr + + raw_data = _fetch_fda("handwrit") + + data = raw_data["handwrit"] + + data_matrix = np.asarray(data) + data_matrix = np.transpose(data_matrix, axes=(1, 0, 2)) + grid_points = np.asarray(data.coords.get('dim_0'), np.float64) + sample_names = np.asarray(data.coords.get('dim_1')) + feature_name = 'handwrit' + + curves = FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + dataset_name=feature_name, + sample_names=sample_names, + argument_names=("time",), + coordinate_names=( + "x coordinates", + "y coordinates", + ), + ) + + frame = None + + if as_frame: + curves = pd.DataFrame({feature_name: curves}) + frame = curves + + if return_X_y: + return curves, None + + return Bunch( + data=curves, + target=None, + frame=frame, + categories={}, + feature_names=[feature_name], + target_names=[], + DESCR=descr, + ) + + +if fetch_handwrit.__doc__ is not None: # docstrings can be stripped off + fetch_handwrit.__doc__ += _gait_descr + _param_descr \ No newline at end of file From 02c02de5e7753201c28dec8d5d577b47d1b405d5 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 12 May 2021 18:29:38 +0200 Subject: [PATCH 331/417] solved kwargs in parameters --- .../visualization/representation.py | 111 +++++++++--------- skfda/representation/_functional_data.py | 2 +- skfda/representation/grid.py | 2 +- 3 files changed, 56 insertions(+), 59 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 50689d531..d3b049ed8 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -187,6 +187,10 @@ class GraphPlot(BasePlot): `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. + kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. Attributes: gradient_list: normalization of the values from gradient color_list that will be used to determine the intensity of the color @@ -212,6 +216,7 @@ def __init__( group_names: Optional[Indexable[K, str]] = None, colormap_name: str = 'autumn', legend: bool = False, + **kwargs: Any, ) -> None: BasePlot.__init__(self) self.fdata = fdata @@ -256,11 +261,31 @@ def __init__( self.legend = legend self.colormap_name = colormap_name + if self.gradient_list is None: + sample_colors, patches = _get_color_info( + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, + ) + else: + patches = None + colormap = matplotlib.cm.get_cmap(self.colormap_name) + colormap = colormap.reversed() + + sample_colors = [None] * self.fdata.n_samples + for m in range(self.fdata.n_samples): + sample_colors[m] = colormap(self.gradient_list[m]) + + self.sample_colors = sample_colors + self.patches = patches + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - **kwargs: Any, ) -> Figure: """ Plot the graph. @@ -271,12 +296,7 @@ def plot( visualizations, one that displays the functions without any criteria choosing the colors and a new one that displays the function with a gradient of colors depending on the initial - gradient_color_list (normalized in gradient_list). - Args: - kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. + gradient_color_list (normalized in gradient_list). Returns: fig (figure object): figure object in which the graphs are plotted. """ @@ -290,26 +310,6 @@ def plot( else: self.domain_range = _to_domain_range(self.domain_range) - if self.gradient_list is None: - sample_colors, patches = _get_color_info( - self.fdata, - self.group, - self.group_names, - self.group_colors, - self.legend, - kwargs, - ) - else: - patches = None - colormap = matplotlib.cm.get_cmap(self.colormap_name) - colormap = colormap.reversed() - - sample_colors = [None] * self.fdata.n_samples - for m in range(self.fdata.n_samples): - sample_colors[m] = colormap(self.gradient_list[m]) - - self.sample_colors = sample_colors - color_dict: Mapping[str, Optional[ColorLike]] = {} if self.fdata.dim_domain == 1: @@ -324,13 +324,12 @@ def plot( for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): - set_color_dict(sample_colors, j, color_dict) + set_color_dict(self.sample_colors, j, color_dict) self.artists[j, i] = self.axes[i].plot( eval_points, mat[j, ..., i].T, **color_dict, - **kwargs, )[0] else: @@ -359,17 +358,16 @@ def plot( for k in range(self.fdata.dim_codomain): for h in range(self.fdata.n_samples): - set_color_dict(sample_colors, h, color_dict) + set_color_dict(self.sample_colors, h, color_dict) self.artists[h, k] = self.axes[k].plot_surface( X, Y, Z[h, ..., k], **color_dict, - **kwargs, )[0] - _set_labels(self.fdata, self.fig, self.axes, patches) + _set_labels(self.fdata, self.fig, self.axes, self.patches) return self.fig @@ -458,6 +456,10 @@ class ScatterPlot(BasePlot): `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. + kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. """ def __init__( @@ -475,6 +477,7 @@ def __init__( group_colors: Optional[Indexable[K, ColorLike]] = None, group_names: Optional[Indexable[K, str]] = None, legend: bool = False, + **kwargs: Any, ) -> None: BasePlot.__init__(self) self.fdata = fdata @@ -496,20 +499,30 @@ def __init__( self.group_names = group_names self.legend = legend + if self.domain_range is None: + self.domain_range = self.fdata.domain_range + else: + self.domain_range = _to_domain_range(self.domain_range) + + sample_colors, patches = _get_color_info( + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, + ) + self.sample_colors = sample_colors + self.patches = patches + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - **kwargs: Any, ) -> Figure: """ Scatter FDataGrid object. - Args: - kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. Returns: fig: figure object in which the graphs are plotted. """ @@ -518,20 +531,6 @@ def plot( dtype=Artist, ) - if self.domain_range is None: - self.domain_range = self.fdata.domain_range - else: - self.domain_range = _to_domain_range(self.domain_range) - - sample_colors, patches = _get_color_info( - self.fdata, - self.group, - self.group_names, - self.group_colors, - self.legend, - kwargs, - ) - color_dict: Mapping[str, Optional[ColorLike]] = {} if self.fdata.dim_domain == 1: @@ -539,7 +538,7 @@ def plot( for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): - set_color_dict(sample_colors, j, color_dict) + set_color_dict(self.sample_colors, j, color_dict) self.artists[j, i] = self.axes[i].scatter( self.grid_points[0], @@ -547,7 +546,6 @@ def plot( **color_dict, picker=True, pickradius=2, - **kwargs, ) else: @@ -559,7 +557,7 @@ def plot( for k in range(self.fdata.dim_codomain): for h in range(self.fdata.n_samples): - set_color_dict(sample_colors, h, color_dict) + set_color_dict(self.sample_colors, h, color_dict) self.artists[h, k] = self.axes[k].scatter( X, @@ -568,10 +566,9 @@ def plot( **color_dict, picker=True, pickradius=2, - **kwargs, ) - _set_labels(self.fdata, self.fig, self.axes, patches) + _set_labels(self.fdata, self.fig, self.axes, self.patches) return self.fig diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 3ff844015..73c0249dc 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -781,7 +781,7 @@ def plot(self, *args: Any, **kwargs: Any) -> Any: """ from ..exploratory.visualization.representation import GraphPlot - return GraphPlot(fdata=self, *args).plot(**kwargs) + return GraphPlot(fdata=self, *args, **kwargs).plot() @abstractmethod def copy( diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 21abbbff0..66ccdcabe 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -821,7 +821,7 @@ def scatter(self, *args: Any, **kwargs: Any) -> Figure: """ from ..exploratory.visualization.representation import ScatterPlot - return ScatterPlot(self, *args).plot(**kwargs) + return ScatterPlot(self, *args, **kwargs).plot() def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: """Return the basis representation of the object. From 99d191afda7fd2cabf5b74f999d205946fd23438 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Wed, 12 May 2021 22:26:04 +0200 Subject: [PATCH 332/417] kwargs --- skfda/exploratory/visualization/representation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index d3b049ed8..0316ca263 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -286,6 +286,7 @@ def __init__( def plot( self, + **kwargs, ) -> Figure: """ Plot the graph. @@ -330,6 +331,7 @@ def plot( eval_points, mat[j, ..., i].T, **color_dict, + **kwargs, )[0] else: @@ -365,6 +367,7 @@ def plot( Y, Z[h, ..., k], **color_dict, + **kwargs, )[0] _set_labels(self.fdata, self.fig, self.axes, self.patches) @@ -519,6 +522,7 @@ def __init__( def plot( self, + **kwargs: Any, ) -> Figure: """ Scatter FDataGrid object. @@ -546,6 +550,7 @@ def plot( **color_dict, picker=True, pickradius=2, + **kwargs, ) else: @@ -566,6 +571,7 @@ def plot( **color_dict, picker=True, pickradius=2, + **kwargs, ) _set_labels(self.fdata, self.fig, self.axes, self.patches) From 5496e799a709ee28c52a4f20da8091527bc9c1e5 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 13 May 2021 11:53:31 +0200 Subject: [PATCH 333/417] handwrit data set --- skfda/datasets/_real_datasets.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index cc1326108..8c5319273 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -1207,8 +1207,11 @@ def fetch_gait( if fetch_gait.__doc__ is not None: # docstrings can be stripped off fetch_gait.__doc__ += _gait_descr + _param_descr -_handwrit_descr = """ - Data representing the X-Y coordinates of 20 writing the "fda". +_handwriting_descr = """ + Data representing the X-Y coordinates along time obtained while + writing the word "fda". The sample contains 20 instances measured over + 2.3 seconds that had been aligned for a better understanding. Each instance + is formed by 1401 coordinate values. References: Ramsay, James O., and Silverman, Bernard W. (2006), @@ -1217,7 +1220,7 @@ def fetch_gait( @overload -def fetch_handwrit( +def fetch_handwriting( *, return_X_y: Literal[False] = False, as_frame: bool = False, @@ -1226,7 +1229,7 @@ def fetch_handwrit( @overload -def fetch_handwrit( +def fetch_handwriting( *, return_X_y: Literal[True], as_frame: Literal[False] = False, @@ -1235,14 +1238,14 @@ def fetch_handwrit( @overload -def fetch_handwrit( +def fetch_handwriting( *, return_X_y: Literal[True], as_frame: Literal[True], ) -> Tuple[DataFrame, None]: pass -def fetch_handwrit( +def fetch_handwriting( return_X_y: bool = False, as_frame: bool = False, ) -> Union[Bunch, Tuple[FDataGrid, None], Tuple[DataFrame, None]]: @@ -1252,7 +1255,7 @@ def fetch_handwrit( The data is obtained from the R package 'fda' from CRAN. """ - descr = _handwrit_descr + descr = _handwriting_descr raw_data = _fetch_fda("handwrit") @@ -1296,5 +1299,5 @@ def fetch_handwrit( ) -if fetch_handwrit.__doc__ is not None: # docstrings can be stripped off - fetch_handwrit.__doc__ += _gait_descr + _param_descr \ No newline at end of file +if fetch_handwriting.__doc__ is not None: # docstrings can be stripped off + fetch_handwriting.__doc__ += _handwriting_descr + _param_descr From ff26d10ebeb6bc8812f82695588b592bda05f818 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 13 May 2021 11:58:23 +0200 Subject: [PATCH 334/417] corrected init --- skfda/datasets/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/datasets/__init__.py b/skfda/datasets/__init__.py index fe0b45366..c427cca35 100644 --- a/skfda/datasets/__init__.py +++ b/skfda/datasets/__init__.py @@ -4,7 +4,7 @@ fetch_tecator, fetch_medflies, fetch_weather, fetch_aemet, fetch_octane, fetch_gait, - fetch_handwrit) + fetch_handwriting) from ._samples_generators import (make_gaussian, make_gaussian_process, make_sinusoidal_process, From a1dd149495e25c3a45009de06734013f292092ef Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Thu, 13 May 2021 12:20:01 +0200 Subject: [PATCH 335/417] solved warnings --- skfda/datasets/__init__.py | 35 ++++++++++++++++++++------------ skfda/datasets/_real_datasets.py | 4 ++-- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/skfda/datasets/__init__.py b/skfda/datasets/__init__.py index c427cca35..d5f6f0cd7 100644 --- a/skfda/datasets/__init__.py +++ b/skfda/datasets/__init__.py @@ -1,13 +1,22 @@ -from ._real_datasets import (fdata_constructor, fetch_cran, - fetch_ucr, - fetch_phoneme, fetch_growth, - fetch_tecator, fetch_medflies, - fetch_weather, fetch_aemet, - fetch_octane, fetch_gait, - fetch_handwriting) -from ._samples_generators import (make_gaussian, - make_gaussian_process, - make_sinusoidal_process, - make_multimodal_samples, - make_multimodal_landmarks, - make_random_warping) +from ._real_datasets import ( + fdata_constructor, + fetch_aemet, + fetch_cran, + fetch_gait, + fetch_growth, + fetch_handwriting, + fetch_medflies, + fetch_octane, + fetch_phoneme, + fetch_tecator, + fetch_ucr, + fetch_weather, +) +from ._samples_generators import ( + make_gaussian, + make_gaussian_process, + make_multimodal_landmarks, + make_multimodal_samples, + make_random_warping, + make_sinusoidal_process, +) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index 8c5319273..49d43c14c 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -3,13 +3,12 @@ import numpy as np import pandas as pd +import rdata from numpy import ndarray from pandas import DataFrame, Series from sklearn.utils import Bunch from typing_extensions import Literal -import rdata - from .. import FDataGrid @@ -1245,6 +1244,7 @@ def fetch_handwriting( ) -> Tuple[DataFrame, None]: pass + def fetch_handwriting( return_X_y: bool = False, as_frame: bool = False, From 4730d04ea4ffd903aae0a6afab424b98fb361a35 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 14 May 2021 17:49:25 +0200 Subject: [PATCH 336/417] Fix #324. Remove the change of dataset_name for FDataGrid derivative, and fix the examples that relied in the old behaviour. --- examples/plot_elastic_registration.py | 26 ++++++------ examples/plot_explore.py | 50 ++++++++++++++++------- skfda/representation/basis/_fdatabasis.py | 2 +- skfda/representation/grid.py | 6 --- 4 files changed, 50 insertions(+), 34 deletions(-) diff --git a/examples/plot_elastic_registration.py b/examples/plot_elastic_registration.py index c49be791f..e577eae37 100644 --- a/examples/plot_elastic_registration.py +++ b/examples/plot_elastic_registration.py @@ -10,14 +10,13 @@ # sphinx_gallery_thumbnail_number = 5 +import numpy as np + import skfda -from skfda.datasets import make_multimodal_samples, fetch_growth +from skfda.datasets import fetch_growth, make_multimodal_samples from skfda.preprocessing.registration import ElasticRegistration from skfda.preprocessing.registration.elastic import elastic_mean -import numpy as np - - ############################################################################## # In the example of pairwise alignment was shown the usage of # :class:`~skfda.preprocessing.registration.ElasticRegistration` to align @@ -31,7 +30,7 @@ # # We will create a synthetic dataset to show the basic usage of the # registration. -# + fd = make_multimodal_samples(n_modes=2, stop=4, random_state=1) fd.plot() @@ -78,15 +77,16 @@ # Obtain velocity curves fd.interpolation = skfda.representation.interpolation.SplineInterpolation(3) -fd = fd.to_grid(np.linspace(*fd.domain_range[0], 200)).derivative() -fd = fd.to_grid(np.linspace(*fd.domain_range[0], 50)) -fd.plot() +fd_derivative = fd.to_grid(np.linspace(*fd.domain_range[0], 200)).derivative() +fd_derivative = fd_derivative.to_grid(np.linspace(*fd.domain_range[0], 50)) +fd_derivative.dataset_name = f"{fd.dataset_name} - derivative" +fd_derivative.plot() ############################################################################## # We now show the aligned curves: -fd_align = elastic_registration.fit_transform(fd) -fd_align.dataset_name += " - aligned" +fd_align = elastic_registration.fit_transform(fd_derivative) +fd_align.dataset_name = f"{fd.dataset_name} - derivative aligned" fd_align.plot() @@ -94,10 +94,10 @@ # * Srivastava, Anuj & Klassen, Eric P. (2016). Functional and shape data # analysis. In *Functional Data and Elastic Registration* (pp. 73-122). # Springer. -# +# # * Tucker, J. D., Wu, W. and Srivastava, A. (2013). Generative Models for -# Functional Data using Phase and Amplitude Separation. Computational Statistics -# and Data Analysis, Vol. 61, 50-66. +# Functional Data using Phase and Amplitude Separation. +# Computational Statistics and Data Analysis, Vol. 61, 50-66. # # * J. S. Marron, James O. Ramsay, Laura M. Sangalli and Anuj Srivastava # (2015). Functional Data Analysis of Amplitude and Phase Variation. diff --git a/examples/plot_explore.py b/examples/plot_explore.py index dbc6b9a94..59a438c56 100644 --- a/examples/plot_explore.py +++ b/examples/plot_explore.py @@ -9,10 +9,9 @@ # Author: Miguel Carbajo Berrocal # License: MIT -import skfda - import numpy as np +import skfda ############################################################################## # In this example we are going to explore the functional properties of the @@ -33,11 +32,18 @@ low_fat = fat < 20 labels = np.full(fd.n_samples, 'high fat') labels[low_fat] = 'low fat' -colors = {'high fat': 'red', - 'low fat': 'blue'} - -fig = fd.plot(group=labels, group_colors=colors, - linewidth=0.5, alpha=0.7, legend=True) +colors = { + 'high fat': 'red', + 'low fat': 'blue', +} + +fig = fd.plot( + group=labels, + group_colors=colors, + linewidth=0.5, + alpha=0.7, + legend=True, +) ############################################################################## # The means of each group are the following ones. @@ -47,9 +53,13 @@ means = mean_high.concatenate(mean_low) -means.dataset_name = fd.dataset_name + ' - means' -means.plot(group=['high fat', 'low fat'], group_colors=colors, - linewidth=0.5, legend=True) +means.dataset_name = f"{fd.dataset_name} - means" +means.plot( + group=['high fat', 'low fat'], + group_colors=colors, + linewidth=0.5, + legend=True, +) ############################################################################## # In this dataset, the vertical shift in the original trajectories is not @@ -60,11 +70,23 @@ # The first derivative is shown below: fdd = fd.derivative() -fig = fdd.plot(group=labels, group_colors=colors, - linewidth=0.5, alpha=0.7, legend=True) +fdd.dataset_name = f"{fd.dataset_name} - derivative" +fig = fdd.plot( + group=labels, + group_colors=colors, + linewidth=0.5, + alpha=0.7, + legend=True, +) ############################################################################## # We now show the second derivative: fdd = fd.derivative(order=2) -fig = fdd.plot(group=labels, group_colors=colors, - linewidth=0.5, alpha=0.7, legend=True) +fdd.dataset_name = f"{fd.dataset_name} - second derivative" +fig = fdd.plot( + group=labels, + group_colors=colors, + linewidth=0.5, + alpha=0.7, + legend=True, +) diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index d8d3e4574..a9071e7f4 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -348,7 +348,7 @@ def derivative(self: T, *, order: int = 1) -> T: # noqa: D102 order, ) - return FDataBasis(basis, coefficients) + return self.copy(basis=basis, coefficients=coefficients) def sum( # noqa: WPS125 self: T, diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 9622c1007..f9389d902 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -465,14 +465,8 @@ def derivative(self: T, *, order: int = 1) -> T: ]) data_matrix = operator(self.data_matrix.astype(float)) - dataset_name = ( - f"{self.dataset_name} - {order} derivative" - if self.dataset_name else None - ) - return self.copy( data_matrix=data_matrix, - dataset_name=dataset_name, ) def _check_same_dimensions(self: T, other: T) -> None: From e7d2a553a80ca0a6c3bd3cf8a9834efa50519038 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 14 May 2021 22:35:21 +0200 Subject: [PATCH 337/417] corrections colormap and name gradient_criteria --- .../visualization/representation.py | 41 ++++++++----------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 0316ca263..d0401462c 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -7,21 +7,14 @@ like depth measures. """ -from typing import ( - Any, - Mapping, - Optional, - Sequence, - Tuple, - TypeVar, - Union, -) +from typing import Any, Mapping, Optional, Sequence, Tuple, TypeVar, Union import matplotlib.cm import matplotlib.patches import numpy as np from matplotlib.artist import Artist from matplotlib.axes import Axes +from matplotlib.colors import Colormap from matplotlib.figure import Figure from typing_extensions import Protocol @@ -130,15 +123,15 @@ class GraphPlot(BasePlot): argument to display the functions wtih a gradient of colors. Args: fdata: functional data set that we want to plot. - gradient_color_list: list of real values used to determine the color + gradient_criteria: list of real values used to determine the color in which each of the instances will be plotted. The size max_grad: maximum value that the gradient_list can take, it will be - used to normalize the gradient_color_list in order to get values + used to normalize the gradient_criteria in order to get values thatcan be used in the funcion colormap.__call__(). If not declared it will be initialized to the maximum value of gradient_list min_grad: minimum value that the gradient_list can take, it will be - used to normalize the gradient_color_list in order to get values + used to normalize the gradient_criteria in order to get values thatcan be used in the funcion colormap.__call__(). If not declared it will be initialized to the minimum value of gradient_list. @@ -181,7 +174,7 @@ class GraphPlot(BasePlot): group_names (list of str): name of each of the groups which appear in a legend, there must be one for each one. Defaults to None and the legend is not shown. Implies `legend=True`. - colormap_name: name of the colormap to be used. By default we will + colormap: name of the colormap to be used. By default we will use autumn. legend (bool): if `True`, show a legend with the groups. If `group_names` is passed, it will be used for finding the names @@ -200,7 +193,7 @@ class GraphPlot(BasePlot): def __init__( self, fdata: FData, - gradient_color_list: Optional[Sequence[float]] = None, + gradient_criteria: Optional[Sequence[float]] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, chart: Union[Figure, Axes, None] = None, @@ -214,15 +207,15 @@ def __init__( group: Optional[Sequence[K]] = None, group_colors: Optional[Indexable[K, ColorLike]] = None, group_names: Optional[Indexable[K, str]] = None, - colormap_name: str = 'autumn', + colormap: Union[Colormap, str, None] = 'autumn', legend: bool = False, **kwargs: Any, ) -> None: BasePlot.__init__(self) self.fdata = fdata - self.gradient_color_list = gradient_color_list - if self.gradient_color_list is not None: - if len(self.gradient_color_list) != fdata.n_samples: + self.gradient_criteria = gradient_criteria + if self.gradient_criteria is not None: + if len(self.gradient_criteria) != fdata.n_samples: raise ValueError( "The length of the gradient color", "list should be the same as the number", @@ -230,18 +223,18 @@ def __init__( ) if min_grad is None: - self.min_grad = min(self.gradient_color_list) + self.min_grad = min(self.gradient_criteria) else: self.min_grad = min_grad if max_grad is None: - self.max_grad = max(self.gradient_color_list) + self.max_grad = max(self.gradient_criteria) else: self.max_grad = max_grad aux_list = [ grad_color - self.min_grad - for grad_color in self.gradient_color_list + for grad_color in self.gradient_criteria ] self.gradient_list: Sequence[float] = ( @@ -259,7 +252,7 @@ def __init__( self.group_colors = group_colors self.group_names = group_names self.legend = legend - self.colormap_name = colormap_name + self.colormap = colormap if self.gradient_list is None: sample_colors, patches = _get_color_info( @@ -272,7 +265,7 @@ def __init__( ) else: patches = None - colormap = matplotlib.cm.get_cmap(self.colormap_name) + colormap = matplotlib.cm.get_cmap(self.colormap) colormap = colormap.reversed() sample_colors = [None] * self.fdata.n_samples @@ -297,7 +290,7 @@ def plot( visualizations, one that displays the functions without any criteria choosing the colors and a new one that displays the function with a gradient of colors depending on the initial - gradient_color_list (normalized in gradient_list). + gradient_criteria (normalized in gradient_list). Returns: fig (figure object): figure object in which the graphs are plotted. """ From 1ccf3c60eb23645b44b9a80fe6f76c66a7b275d1 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 14 May 2021 22:38:33 +0200 Subject: [PATCH 338/417] whitespace --- skfda/exploratory/visualization/representation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index d0401462c..8e8fd0e50 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -290,7 +290,7 @@ def plot( visualizations, one that displays the functions without any criteria choosing the colors and a new one that displays the function with a gradient of colors depending on the initial - gradient_criteria (normalized in gradient_list). + gradient_criteria (normalized in gradient_list). Returns: fig (figure object): figure object in which the graphs are plotted. """ From cd71f013489bbdf59e44585def1503eda4b1edd6 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 14 May 2021 22:39:52 +0200 Subject: [PATCH 339/417] v deleted --- skfda/exploratory/visualization/_parametric_plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index f5fd409fa..1cb142c52 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -19,7 +19,6 @@ from .representation import Indexable, _get_color_info K = TypeVar('K', contravariant=True) -V = TypeVar('V', covariant=True) class ParametricPlot(BasePlot): From 947626e178ae4c8f1fea335179b1a67b5649b2a6 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Fri, 14 May 2021 22:53:40 +0200 Subject: [PATCH 340/417] boxplot --- skfda/exploratory/visualization/_boxplot.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 664afa7a4..23e97ca97 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -17,6 +17,7 @@ from skfda.exploratory.depth.multivariate import Depth +from ... import FData from ... import FDataGrid from ..depth import ModifiedBandDepth from ..outliers import _envelopes @@ -247,14 +248,14 @@ class Boxplot(FDataBoxplot, BasePlot): def __init__( self, - fdatagrid: FDataGrid, + fdatagrid: FData, depth_method: Optional[Depth[FDataGrid]] = None, - prob: Tuple[float] = (0.5,), + prob: Tuple[float, ...] = (0.5,), factor: float = 1.5, chart: Union[Figure, Axes, None] = None, *, fig: Optional[Figure] = None, - axes: Optional[Sequence[Axes]] = None, + axes: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, ): From 84b917832b53539aadf675a6e1ba8a48032315be Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 17 May 2021 01:54:07 +0200 Subject: [PATCH 341/417] Fix basis tests. --- skfda/_utils/_utils.py | 6 +- skfda/misc/_math.py | 2 +- skfda/representation/basis/_basis.py | 19 +- tests/test_basis.py | 699 ++++++++++++++++++--------- 4 files changed, 491 insertions(+), 235 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 01403252b..eefb2e9ac 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -266,7 +266,7 @@ def _same_domain(fd: Union[Basis, FData], fd2: Union[Basis, FData]) -> bool: @overload def _reshape_eval_points( - eval_points: np.ndarray, + eval_points: ArrayLike, *, aligned: Literal[True], n_samples: int, @@ -277,7 +277,7 @@ def _reshape_eval_points( @overload def _reshape_eval_points( - eval_points: Sequence[np.ndarray], + eval_points: Sequence[ArrayLike], *, aligned: Literal[True], n_samples: int, @@ -288,7 +288,7 @@ def _reshape_eval_points( @overload def _reshape_eval_points( - eval_points: Union[np.ndarray, Sequence[np.ndarray]], + eval_points: Union[ArrayLike, Sequence[ArrayLike]], *, aligned: bool, n_samples: int, diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 8575aa704..252b4f79e 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -20,7 +20,7 @@ Vector = TypeVar( "Vector", - bound=Union[np.ndarray, Callable[[np.ndarray], np.ndarray]] + bound=Union[np.ndarray, Basis, Callable[[np.ndarray], np.ndarray]], ) diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py index 45fe9fd15..d5ba326b9 100644 --- a/skfda/representation/basis/_basis.py +++ b/skfda/representation/basis/_basis.py @@ -11,7 +11,7 @@ from matplotlib.figure import Figure from ..._utils import _reshape_eval_points, _same_domain, _to_domain_range -from .._typing import DomainRange, DomainRangeLike +from .._typing import ArrayLike, DomainRange, DomainRangeLike if TYPE_CHECKING: from . import FDataBasis @@ -52,7 +52,7 @@ def __init__( def __call__( self, - eval_points: np.ndarray, + eval_points: ArrayLike, *, derivative: int = 0, ) -> np.ndarray: @@ -126,7 +126,7 @@ def _evaluate( def evaluate( self, - eval_points: np.ndarray, + eval_points: ArrayLike, *, derivative: int = 0, ) -> np.ndarray: @@ -278,7 +278,7 @@ def coordinate_basis_and_coefs( ) def rescale(self: T, domain_range: Optional[DomainRangeLike] = None) -> T: - r""" + """ Return a copy of the basis with a new :term:`domain` range. Args: @@ -318,7 +318,10 @@ def to_basis(self) -> FDataBasis: def _to_R(self) -> str: # noqa: N802 raise NotImplementedError - def inner_product_matrix(self, other: Optional[Basis] = None) -> np.array: + def inner_product_matrix( + self, + other: Optional[Basis] = None, + ) -> np.ndarray: r""" Return the Inner Product Matrix of a pair of basis. @@ -348,13 +351,13 @@ def inner_product_matrix(self, other: Optional[Basis] = None) -> np.array: return inner_product_matrix(self, other) - def _gram_matrix_numerical(self) -> np.array: + def _gram_matrix_numerical(self) -> np.ndarray: """Compute the Gram matrix numerically.""" from ...misc import inner_product_matrix return inner_product_matrix(self, force_numerical=True) - def _gram_matrix(self) -> np.array: + def _gram_matrix(self) -> np.ndarray: """ Compute the Gram matrix. @@ -364,7 +367,7 @@ def _gram_matrix(self) -> np.array: """ return self._gram_matrix_numerical() - def gram_matrix(self) -> np.array: + def gram_matrix(self) -> np.ndarray: r""" Return the Gram Matrix of a basis. diff --git a/tests/test_basis.py b/tests/test_basis.py index 27c065868..faff95c2f 100644 --- a/tests/test_basis.py +++ b/tests/test_basis.py @@ -19,266 +19,480 @@ class TestBasis(unittest.TestCase): + """Tests of basis and FDataBasis.""" # def setUp(self): could be defined for set up before any test - def test_from_data_cholesky(self): + def test_from_data_cholesky(self) -> None: + """Test basis conversion using Cholesky method.""" t = np.linspace(0, 1, 5) x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) basis = BSpline((0, 1), n_basis=5) np.testing.assert_array_almost_equal( - FDataBasis.from_data(x, grid_points=t, basis=basis, - method='cholesky' - ).coefficients.round(2), - np.array([[1., 2.78, -3., -0.78, 1.]]) + FDataBasis.from_data( + x, + grid_points=t, + basis=basis, + method='cholesky', + ).coefficients.round(2), + np.array([[1.0, 2.78, -3.0, -0.78, 1.0]]), ) - def test_from_data_qr(self): + def test_from_data_qr(self) -> None: + """Test basis conversion using QR method.""" t = np.linspace(0, 1, 5) x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) basis = BSpline((0, 1), n_basis=5) np.testing.assert_array_almost_equal( - FDataBasis.from_data(x, grid_points=t, basis=basis, - method='qr' - ).coefficients.round(2), - np.array([[1., 2.78, -3., -0.78, 1.]]) + FDataBasis.from_data( + x, + grid_points=t, + basis=basis, + method='qr', + ).coefficients.round(2), + np.array([[1.0, 2.78, -3.0, -0.78, 1.0]]), ) - def test_basis_inner_matrix(self): + def test_basis_inner_matrix(self) -> None: + """Test the inner product matrix of FDataBasis objects.""" + basis = Monomial(n_basis=3) + np.testing.assert_array_almost_equal( - Monomial(n_basis=3).inner_product_matrix(), - [[1, 1 / 2, 1 / 3], [1 / 2, 1 / 3, 1 / 4], [1 / 3, 1 / 4, 1 / 5]]) + basis.inner_product_matrix(), + [ + [1, 1 / 2, 1 / 3], # noqa: WPS204 + [1 / 2, 1 / 3, 1 / 4], # noqa: WPS204 + [1 / 3, 1 / 4, 1 / 5], + ], + ) np.testing.assert_array_almost_equal( - Monomial(n_basis=3).inner_product_matrix(Monomial(n_basis=3)), - [[1, 1 / 2, 1 / 3], [1 / 2, 1 / 3, 1 / 4], [1 / 3, 1 / 4, 1 / 5]]) + basis.inner_product_matrix(basis), + [ + [1, 1 / 2, 1 / 3], + [1 / 2, 1 / 3, 1 / 4], + [1 / 3, 1 / 4, 1 / 5], + ], + ) np.testing.assert_array_almost_equal( - Monomial(n_basis=3).inner_product_matrix(Monomial(n_basis=4)), - [[1, 1 / 2, 1 / 3, 1 / 4], - [1 / 2, 1 / 3, 1 / 4, 1 / 5], - [1 / 3, 1 / 4, 1 / 5, 1 / 6]]) + basis.inner_product_matrix(Monomial(n_basis=4)), + [ + [1, 1 / 2, 1 / 3, 1 / 4], + [1 / 2, 1 / 3, 1 / 4, 1 / 5], + [1 / 3, 1 / 4, 1 / 5, 1 / 6], + ], + ) # TODO testing with other basis - def test_basis_gram_matrix_monomial(self): - + def test_basis_gram_matrix_monomial(self) -> None: + """Test the Gram matrix with monomial basis.""" basis = Monomial(n_basis=3) gram_matrix = basis.gram_matrix() - gram_matrix_numerical = basis._gram_matrix_numerical() - gram_matrix_res = np.array([[1, 1 / 2, 1 / 3], - [1 / 2, 1 / 3, 1 / 4], - [1 / 3, 1 / 4, 1 / 5]]) + gram_matrix_numerical = basis._gram_matrix_numerical() # noqa: WPS437 + gram_matrix_res = np.array([ + [1, 1 / 2, 1 / 3], + [1 / 2, 1 / 3, 1 / 4], + [1 / 3, 1 / 4, 1 / 5], + ]) np.testing.assert_allclose( - gram_matrix, gram_matrix_res) + gram_matrix, + gram_matrix_res, + ) np.testing.assert_allclose( - gram_matrix_numerical, gram_matrix_res) - - def test_basis_gram_matrix_fourier(self): + gram_matrix_numerical, + gram_matrix_res, + ) + def test_basis_gram_matrix_fourier(self) -> None: + """Test the Gram matrix with fourier basis.""" basis = Fourier(n_basis=3) gram_matrix = basis.gram_matrix() - gram_matrix_numerical = basis._gram_matrix_numerical() + gram_matrix_numerical = basis._gram_matrix_numerical() # noqa: WPS437 gram_matrix_res = np.identity(3) np.testing.assert_allclose( - gram_matrix, gram_matrix_res) + gram_matrix, + gram_matrix_res, + ) np.testing.assert_allclose( - gram_matrix_numerical, gram_matrix_res, atol=1e-15, rtol=1e-15) - - def test_basis_gram_matrix_bspline(self): + gram_matrix_numerical, + gram_matrix_res, + atol=1e-15, + rtol=1e-15, + ) + def test_basis_gram_matrix_bspline(self) -> None: + """Test the Gram matrix with B-spline basis.""" basis = BSpline(n_basis=6) gram_matrix = basis.gram_matrix() - gram_matrix_numerical = basis._gram_matrix_numerical() - gram_matrix_res = np.array( - [[0.04761905, 0.02916667, 0.00615079, - 0.00039683, 0., 0.], - [0.02916667, 0.07380952, 0.05208333, - 0.01145833, 0.00014881, 0.], - [0.00615079, 0.05208333, 0.10892857, 0.07098214, - 0.01145833, 0.00039683], - [0.00039683, 0.01145833, 0.07098214, 0.10892857, - 0.05208333, 0.00615079], - [0., 0.00014881, 0.01145833, 0.05208333, - 0.07380952, 0.02916667], - [0., 0., 0.00039683, 0.00615079, - 0.02916667, 0.04761905]]) + gram_matrix_numerical = basis._gram_matrix_numerical() # noqa: WPS437 + gram_matrix_res = np.array([ + [0.04761905, 0.02916667, 0.00615079, 0.00039683, 0, 0], + [0.02916667, 0.07380952, 0.05208333, 0.01145833, 0.00014881, 0], + [ # noqa: WPS317 + 0.00615079, 0.05208333, 0.10892857, + 0.07098214, 0.01145833, 0.00039683, + ], + [ # noqa: WPS317 + 0.00039683, 0.01145833, 0.07098214, + 0.10892857, 0.05208333, 0.00615079, + ], + [0, 0.00014881, 0.01145833, 0.05208333, 0.07380952, 0.02916667], + [0, 0, 0.00039683, 0.00615079, 0.02916667, 0.04761905], + ]) np.testing.assert_allclose( - gram_matrix, gram_matrix_res, rtol=1e-4) + gram_matrix, + gram_matrix_res, + rtol=1e-4, + ) np.testing.assert_allclose( - gram_matrix_numerical, gram_matrix_res, rtol=1e-4) + gram_matrix_numerical, + gram_matrix_res, + rtol=1e-4, + ) - def test_basis_basis_inprod(self): + def test_basis_basis_inprod(self) -> None: + """Test inner product between different basis.""" monomial = Monomial(n_basis=4) bspline = BSpline(n_basis=5, order=4) np.testing.assert_allclose( monomial.inner_product_matrix(bspline), - np.array( - [[0.12499983, 0.25000035, 0.24999965, 0.25000035, 0.12499983], - [0.01249991, 0.07500017, 0.12499983, 0.17500017, 0.11249991], - [0.00208338, 0.02916658, 0.07083342, 0.12916658, 0.10208338], - [0.00044654, 0.01339264, 0.04375022, 0.09910693, 0.09330368] - ]), rtol=1e-3) + np.array([ + [0.12499983, 0.25000035, 0.24999965, 0.25000035, 0.12499983], + [0.01249991, 0.07500017, 0.12499983, 0.17500017, 0.11249991], + [0.00208338, 0.02916658, 0.07083342, 0.12916658, 0.10208338], + [0.00044654, 0.01339264, 0.04375022, 0.09910693, 0.09330368], + ]), + rtol=1e-3, + ) np.testing.assert_array_almost_equal( monomial.inner_product_matrix(bspline), - bspline.inner_product_matrix(monomial).T + bspline.inner_product_matrix(monomial).T, ) - def test_basis_fdatabasis_inprod(self): + def test_basis_fdatabasis_inprod(self) -> None: + """Test inner product between different basis expansions.""" monomial = Monomial(n_basis=4) bspline = BSpline(n_basis=5, order=3) bsplinefd = FDataBasis(bspline, np.arange(0, 15).reshape(3, 5)) np.testing.assert_allclose( - inner_product_matrix(monomial, bsplinefd), - np.array([[2., 7., 12.], - [1.29626206, 3.79626206, 6.29626206], - [0.96292873, 2.62959539, 4.29626206], - [0.7682873, 2.0182873, 3.2682873]]), rtol=1e-4) + inner_product_matrix(monomial.to_basis(), bsplinefd), + np.array([ + [2.0, 7.0, 12.0], + [1.29626206, 3.79626206, 6.29626206], + [0.96292873, 2.62959539, 4.29626206], + [0.7682873, 2.0182873, 3.2682873], + ]), + rtol=1e-4, + ) - def test_fdatabasis_fdatabasis_inprod(self): + def test_fdatabasis_fdatabasis_inprod(self) -> None: + """Test inner product between FDataBasis objects.""" monomial = Monomial(n_basis=4) - monomialfd = FDataBasis(monomial, [[5, 4, 1, 0], - [4, 2, 1, 0], - [4, 1, 6, 4], - [4, 5, 0, 1], - [5, 6, 2, 0]]) + monomialfd = FDataBasis( + monomial, + [ + [5, 4, 1, 0], + [4, 2, 1, 0], + [4, 1, 6, 4], + [4, 5, 0, 1], + [5, 6, 2, 0], + ], + ) bspline = BSpline(n_basis=5, order=3) bsplinefd = FDataBasis(bspline, np.arange(0, 15).reshape(3, 5)) np.testing.assert_allclose( inner_product_matrix(monomialfd, bsplinefd), - np.array([[16.14797697, 52.81464364, 89.4813103], - [11.55565285, 38.22211951, 64.88878618], - [18.14698361, 55.64698361, 93.14698361], - [15.2495976, 48.9995976, 82.7495976], - [19.70392982, 63.03676315, 106.37009648]]), - rtol=1e-4) - - def test_comutativity_inprod(self): + np.array([ + [16.14797697, 52.81464364, 89.4813103], + [11.55565285, 38.22211951, 64.88878618], + [18.14698361, 55.64698361, 93.14698361], + [15.2495976, 48.9995976, 82.7495976], + [19.70392982, 63.03676315, 106.37009648], + ]), + rtol=1e-4, + ) + + def test_comutativity_inprod(self) -> None: + """Test commutativity of the inner product.""" monomial = Monomial(n_basis=4) bspline = BSpline(n_basis=5, order=3) bsplinefd = FDataBasis(bspline, np.arange(0, 15).reshape(3, 5)) np.testing.assert_allclose( - inner_product_matrix(bsplinefd, monomial), - np.transpose(inner_product_matrix(monomial, bsplinefd)) + inner_product_matrix(bsplinefd, monomial.to_basis()), + np.transpose(inner_product_matrix(monomial.to_basis(), bsplinefd)), ) - def test_fdatabasis__add__(self): + def test_concatenate(self) -> None: + """Test concatenation of two FDataBasis.""" + sample1 = np.arange(0, 10) + sample2 = np.arange(10, 20) + fd1 = FDataGrid([sample1]).to_basis(Fourier(n_basis=5)) + fd2 = FDataGrid([sample2]).to_basis(Fourier(n_basis=5)) + + fd = concatenate([fd1, fd2]) + + np.testing.assert_equal(fd.n_samples, 2) + np.testing.assert_equal(fd.dim_codomain, 1) + np.testing.assert_equal(fd.dim_domain, 1) + np.testing.assert_array_equal( + fd.coefficients, + np.concatenate([fd1.coefficients, fd2.coefficients]), + ) + + +class TestFDataBasisOperations(unittest.TestCase): + """Test FDataBasis operations.""" + + def test_fdatabasis_add(self) -> None: + """Test addition of FDataBasis.""" monomial1 = FDataBasis(Monomial(n_basis=3), [1, 2, 3]) monomial2 = FDataBasis(Monomial(n_basis=3), [[1, 2, 3], [3, 4, 5]]) - self.assertTrue((monomial1 + monomial2).equals( - FDataBasis(Monomial(n_basis=3), - [[2, 4, 6], [4, 6, 8]]))) + self.assertTrue( + (monomial1 + monomial2).equals( + FDataBasis( + Monomial(n_basis=3), + [[2, 4, 6], [4, 6, 8]], + ), + ), + ) with np.testing.assert_raises(TypeError): - monomial2 + FDataBasis(Fourier(n_basis=3), - [[2, 2, 3], [5, 4, 5]]) + monomial2 + FDataBasis( # noqa: WPS428 + Fourier(n_basis=3), + [[2, 2, 3], [5, 4, 5]], + ) - def test_fdatabasis__sub__(self): + def test_fdatabasis_sub(self) -> None: + """Test subtraction of FDataBasis.""" monomial1 = FDataBasis(Monomial(n_basis=3), [1, 2, 3]) monomial2 = FDataBasis(Monomial(n_basis=3), [[1, 2, 3], [3, 4, 5]]) - self.assertTrue((monomial1 - monomial2).equals( - FDataBasis(Monomial(n_basis=3), - [[0, 0, 0], [-2, -2, -2]]))) + self.assertTrue( + (monomial1 - monomial2).equals( + FDataBasis( + Monomial(n_basis=3), + [[0, 0, 0], [-2, -2, -2]], + ), + ), + ) with np.testing.assert_raises(TypeError): - monomial2 - FDataBasis(Fourier(n_basis=3), - [[2, 2, 3], [5, 4, 5]]) + monomial2 - FDataBasis( # noqa: WPS428 + Fourier(n_basis=3), + [[2, 2, 3], [5, 4, 5]], + ) - def test_fdatabasis__mul__(self): - monomial1 = FDataBasis(Monomial(n_basis=3), [1, 2, 3]) - monomial2 = FDataBasis(Monomial(n_basis=3), [[1, 2, 3], [3, 4, 5]]) + def test_fdatabasis_mul(self) -> None: + """Test multiplication of FDataBasis.""" + basis = Monomial(n_basis=3) + + monomial1 = FDataBasis(basis, [1, 2, 3]) + monomial2 = FDataBasis(basis, [[1, 2, 3], [3, 4, 5]]) + + self.assertTrue( + (monomial1 * 2).equals( + FDataBasis( + basis, + [[2, 4, 6]], + ), + ), + ) + + self.assertTrue( + (3 * monomial2).equals( + FDataBasis( + basis, + [[3, 6, 9], [9, 12, 15]], + ), + ), + ) + + self.assertTrue( + (3 * monomial2).equals( + monomial2 * 3, + ), + ) + + self.assertTrue( + (monomial2 * [1, 2]).equals( + FDataBasis( + basis, + [[1, 2, 3], [6, 8, 10]], + ), + ), + ) - self.assertTrue((monomial1 * 2).equals( - FDataBasis(Monomial(n_basis=3), - [[2, 4, 6]]))) - self.assertTrue((3 * monomial2).equals( - FDataBasis(Monomial(n_basis=3), - [[3, 6, 9], [9, 12, 15]]))) - self.assertTrue((3 * monomial2).equals( - monomial2 * 3)) - - self.assertTrue((monomial2 * [1, 2]).equals( - FDataBasis(Monomial(n_basis=3), - [[1, 2, 3], [6, 8, 10]]))) - self.assertTrue(([1, 2] * monomial2).equals( - FDataBasis(Monomial(n_basis=3), - [[1, 2, 3], [6, 8, 10]]))) + self.assertTrue( + ([1, 2] * monomial2).equals( + FDataBasis( + basis, + [[1, 2, 3], [6, 8, 10]], + ), + ), + ) with np.testing.assert_raises(TypeError): - monomial2 * FDataBasis(Fourier(n_basis=3), - [[2, 2, 3], [5, 4, 5]]) + monomial2 * FDataBasis( # noqa: WPS428 + Fourier(n_basis=3), + [[2, 2, 3], [5, 4, 5]], + ) with np.testing.assert_raises(TypeError): - monomial2 * monomial2 + monomial2 * monomial2 # noqa: WPS428 - def test_fdatabasis__div__(self): - monomial1 = FDataBasis(Monomial(n_basis=3), [1, 2, 3]) - monomial2 = FDataBasis(Monomial(n_basis=3), [[1, 2, 3], [3, 4, 5]]) + def test_fdatabasis_div(self) -> None: + """Test division of FDataBasis.""" + basis = Monomial(n_basis=3) + + monomial1 = FDataBasis(basis, [1, 2, 3]) + monomial2 = FDataBasis(basis, [[1, 2, 3], [3, 4, 5]]) self.assertTrue((monomial1 / 2).equals( - FDataBasis(Monomial(n_basis=3), - [[1 / 2, 1, 3 / 2]]))) - self.assertTrue((monomial2 / 2).equals( - FDataBasis(Monomial(n_basis=3), - [[1 / 2, 1, 3 / 2], [3 / 2, 2, 5 / 2]]))) - - self.assertTrue((monomial2 / [1, 2]).equals( - FDataBasis(Monomial(n_basis=3), - [[1, 2, 3], [3 / 2, 2, 5 / 2]]))) - - def test_fdatabasis_derivative_constant(self): - constant = FDataBasis(Constant(), - [[1], [2], [3], [4]]) - - self.assertTrue(constant.derivative().equals( - FDataBasis(Constant(), - [[0], [0], [0], [0]]))) - self.assertTrue(constant.derivative(order=0).equals( - FDataBasis(Constant(), - [[1], [2], [3], [4]]))) - - def test_fdatabasis_derivative_monomial(self): - monomial = FDataBasis(Monomial(n_basis=8), - [1, 5, 8, 9, 7, 8, 4, 5]) - monomial2 = FDataBasis(Monomial(n_basis=5), - [[4, 9, 7, 4, 3], - [1, 7, 9, 8, 5], - [4, 6, 6, 6, 8]]) - - self.assertTrue(monomial.derivative().equals( - FDataBasis(Monomial(n_basis=7), - [5, 16, 27, 28, 40, 24, 35]))) - self.assertTrue(monomial.derivative(order=0).equals(monomial)) - self.assertTrue(monomial.derivative(order=6).equals( - FDataBasis(Monomial(n_basis=2), - [2880, 25200]))) - self.assertTrue(monomial2.derivative().equals( - FDataBasis(Monomial(n_basis=4), - [[9, 14, 12, 12], + FDataBasis( + basis, + [[1 / 2, 1, 3 / 2]], + ), + )) + + self.assertTrue( + (monomial2 / 2).equals( + FDataBasis( + basis, + [[1 / 2, 1, 3 / 2], [3 / 2, 2, 5 / 2]], + ), + ), + ) + + self.assertTrue( + (monomial2 / [1, 2]).equals( + FDataBasis( + basis, + [[1.0, 2.0, 3.0], [3 / 2, 2, 5 / 2]], + ), + ), + ) + + +class TestFDataBasisDerivatives(unittest.TestCase): + """Test FDataBasis derivatives.""" + + def test_fdatabasis_derivative_constant(self) -> None: + """Test derivatives with a constant basis.""" + constant = FDataBasis( + Constant(), + [[1], [2], [3], [4]], + ) + + self.assertTrue( + constant.derivative().equals( + FDataBasis( + Constant(), + [[0], [0], [0], [0]], + ), + ), + ) + + self.assertTrue( + constant.derivative(order=0).equals( + FDataBasis( + Constant(), + [[1], [2], [3], [4]], + ), + ), + ) + + def test_fdatabasis_derivative_monomial(self) -> None: + """Test derivatives with a monomial basis.""" + monomial = FDataBasis( + Monomial(n_basis=8), + [1, 5, 8, 9, 7, 8, 4, 5], + ) + + monomial2 = FDataBasis( + Monomial(n_basis=5), + [ + [4, 9, 7, 4, 3], + [1, 7, 9, 8, 5], + [4, 6, 6, 6, 8], + ], + ) + + self.assertTrue( + monomial.derivative().equals( + FDataBasis( + Monomial(n_basis=7), + [5, 16, 27, 28, 40, 24, 35], + ), + ), + ) + + self.assertTrue( + monomial.derivative(order=0).equals(monomial), + ) + + self.assertTrue( + monomial.derivative(order=6).equals( + FDataBasis( + Monomial(n_basis=2), + [2880, 25200], + ), + ), + ) + + self.assertTrue( + monomial2.derivative().equals( + FDataBasis( + Monomial(n_basis=4), + [ + [9, 14, 12, 12], [7, 18, 24, 20], - [6, 12, 18, 32]]))) - self.assertTrue(monomial2.derivative(order=0).equals(monomial2)) - self.assertTrue(monomial2.derivative(order=3).equals( - FDataBasis(Monomial(n_basis=2), - [[24, 72], + [6, 12, 18, 32], + ], + ), + ), + ) + + self.assertTrue( + monomial2.derivative(order=0).equals(monomial2), + ) + + self.assertTrue( + monomial2.derivative(order=3).equals( + FDataBasis( + Monomial(n_basis=2), + [ + [24, 72], [48, 120], - [36, 192]]))) + [36, 192], + ], + ), + ), + ) + + def test_fdatabasis_derivative_fourier(self) -> None: + """Test derivatives with a fourier basis.""" + fourier = FDataBasis( + Fourier(n_basis=7), + [1, 5, 8, 9, 8, 4, 5], + ) - def test_fdatabasis_derivative_fourier(self): - fourier = FDataBasis(Fourier(n_basis=7), - [1, 5, 8, 9, 8, 4, 5]) - fourier2 = FDataBasis(Fourier(n_basis=5), - [[4, 9, 7, 4, 3], - [1, 7, 9, 8, 5], - [4, 6, 6, 6, 8]]) + fourier2 = FDataBasis( + Fourier(n_basis=5), + [ + [4, 9, 7, 4, 3], + [1, 7, 9, 8, 5], + [4, 6, 6, 6, 8], + ], + ) fou0 = fourier.derivative(order=0) fou1 = fourier.derivative() @@ -287,16 +501,25 @@ def test_fdatabasis_derivative_fourier(self): np.testing.assert_equal(fou1.basis, fourier.basis) np.testing.assert_almost_equal( fou1.coefficients.round(5), - np.atleast_2d([0, -50.26548, 31.41593, - -100.53096, 113.09734, - -94.24778, 75.39822])) + np.atleast_2d( + [ # noqa: WPS317 + 0, -50.26548, 31.41593, -100.53096, + 113.09734, -94.24778, 75.39822, + ], + ), + ) + self.assertTrue(fou0.equals(fourier)) np.testing.assert_equal(fou2.basis, fourier.basis) np.testing.assert_almost_equal( fou2.coefficients.round(5), - np.atleast_2d([0, -197.39209, -315.82734, - -1421.22303, -1263.30936, - -1421.22303, -1776.52879])) + np.atleast_2d( + [ # noqa: WPS317 + 0, -197.39209, -315.82734, -1421.22303, + -1263.30936, -1421.22303, -1776.52879, + ], + ), + ) fou0 = fourier2.derivative(order=0) fou1 = fourier2.derivative() @@ -305,77 +528,106 @@ def test_fdatabasis_derivative_fourier(self): np.testing.assert_equal(fou1.basis, fourier2.basis) np.testing.assert_almost_equal( fou1.coefficients.round(5), - [[0, -43.98230, 56.54867, -37.69911, 50.26548], - [0, -56.54867, 43.98230, - - 62.83185, 100.53096], - [0, -37.69911, 37.69911, -100.53096, 75.39822]]) + [ + [0, -43.9823, 56.54867, -37.69911, 50.26548], + [0, -56.54867, 43.9823, -62.83185, 100.53096], + [0, -37.69911, 37.69911, -100.53096, 75.39822], + ], + ) + self.assertTrue(fou0.equals(fourier2)) np.testing.assert_equal(fou2.basis, fourier2.basis) np.testing.assert_almost_equal( fou2.coefficients.round(5), - [[0, -355.30576, -276.34892, -631.65468, -473.74101], - [0, -276.34892, -355.30576, - - 1263.30936, -789.56835], - [0, -236.87051, -236.87051, -947.48202, -1263.30936]]) - - def test_fdatabasis_derivative_bspline(self): - bspline = FDataBasis(BSpline(n_basis=8), - [1, 5, 8, 9, 7, 8, 4, 5]) - bspline2 = FDataBasis(BSpline(n_basis=5), - [[4, 9, 7, 4, 3], - [1, 7, 9, 8, 5], - [4, 6, 6, 6, 8]]) + [ + [0, -355.30576, -276.34892, -631.65468, -473.74101], + [0, -276.34892, -355.30576, -1263.30936, -789.56835], + [0, -236.87051, -236.87051, -947.48202, -1263.30936], + ], + ) + + def test_fdatabasis_derivative_bspline(self) -> None: + """Test derivatives with a B-spline basis.""" + bspline = FDataBasis( + BSpline(n_basis=8), + [1, 5, 8, 9, 7, 8, 4, 5], + ) + bspline2 = FDataBasis( + BSpline(n_basis=5), + [ + [4, 9, 7, 4, 3], + [1, 7, 9, 8, 5], + [4, 6, 6, 6, 8], + ], + ) bs0 = bspline.derivative(order=0) bs1 = bspline.derivative() bs2 = bspline.derivative(order=2) np.testing.assert_equal(bs1.basis, BSpline(n_basis=7, order=3)) - np.testing.assert_almost_equal(bs1.coefficients, - np.atleast_2d([60, 22.5, 5, - -10, 5, -30, 15])) + + np.testing.assert_almost_equal( + bs1.coefficients, + np.atleast_2d([60, 22.5, 5, -10, 5, -30, 15]), + ) + self.assertTrue(bs0.equals(bspline)) - np.testing.assert_equal(bs2.basis, BSpline(n_basis=6, order=2)) - np.testing.assert_almost_equal(bs2.coefficients, - np.atleast_2d([-375, -87.5, -75, - 75, -175, 450])) + + np.testing.assert_equal( + bs2.basis, + BSpline(n_basis=6, order=2), + ) + + np.testing.assert_almost_equal( + bs2.coefficients, + np.atleast_2d([-375, -87.5, -75, 75, -175, 450]), + ) bs0 = bspline2.derivative(order=0) bs1 = bspline2.derivative() bs2 = bspline2.derivative(order=2) np.testing.assert_equal(bs1.basis, BSpline(n_basis=4, order=3)) - np.testing.assert_almost_equal(bs1.coefficients, - [[30, -6, -9, -6], - [36, 6, -3, -18], - [12, 0, 0, 12]]) + + np.testing.assert_almost_equal( + bs1.coefficients, + [ + [30, -6, -9, -6], + [36, 6, -3, -18], + [12, 0, 0, 12], + ], + ) + self.assertTrue(bs0.equals(bspline2)) - np.testing.assert_equal(bs2.basis, BSpline(n_basis=3, order=2)) - np.testing.assert_almost_equal(bs2.coefficients, - [[-144, -6, 12], - [-120, -18, -60], - [-48, 0, 48]]) - def test_concatenate(self): - sample1 = np.arange(0, 10) - sample2 = np.arange(10, 20) - fd1 = FDataGrid([sample1]).to_basis(Fourier(n_basis=5)) - fd2 = FDataGrid([sample2]).to_basis(Fourier(n_basis=5)) + np.testing.assert_equal( + bs2.basis, + BSpline(n_basis=3, order=2), + ) - fd = concatenate([fd1, fd2]) + np.testing.assert_almost_equal( + bs2.coefficients, + [ + [-144, -6, 12], + [-120, -18, -60], + [-48, 0, 48], + ], + ) - np.testing.assert_equal(fd.n_samples, 2) - np.testing.assert_equal(fd.dim_codomain, 1) - np.testing.assert_equal(fd.dim_domain, 1) - np.testing.assert_array_equal(fd.coefficients, np.concatenate( - [fd1.coefficients, fd2.coefficients])) - def test_vector_valued(self): +class TestVectorValuedBasis(unittest.TestCase): + """Tests for the vector valued basis.""" + + def test_vector_valued(self) -> None: + """Test vector valued basis.""" X, _ = skfda.datasets.fetch_weather(return_X_y=True) basis_dim = skfda.representation.basis.Fourier( - n_basis=7, domain_range=X.domain_range) + n_basis=7, + domain_range=X.domain_range, + ) basis = skfda.representation.basis.VectorValued( - [basis_dim] * 2 + [basis_dim] * 2, ) X_basis = X.to_basis(basis) @@ -385,12 +637,14 @@ def test_vector_valued(self): self.assertEqual(X_basis.coordinates[0].basis, basis_dim) np.testing.assert_allclose( X_basis.coordinates[0].coefficients, - X.coordinates[0].to_basis(basis_dim).coefficients) + X.coordinates[0].to_basis(basis_dim).coefficients, + ) self.assertEqual(X_basis.coordinates[1].basis, basis_dim) np.testing.assert_allclose( X_basis.coordinates[1].coefficients, - X.coordinates[1].to_basis(basis_dim).coefficients) + X.coordinates[1].to_basis(basis_dim).coefficients, + ) class TestTensorBasis(unittest.TestCase): @@ -496,5 +750,4 @@ def test_tensor_gram_matrix(self) -> None: if __name__ == '__main__': - print() unittest.main() From 2fd1da73a47fd8b2cf25d0fe787653b1021c728f Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 23 May 2021 19:05:56 +0200 Subject: [PATCH 342/417] solved init problems and typos --- .../visualization/representation.py | 56 ++++++++++--------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 8e8fd0e50..c2dcf8d96 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -115,7 +115,7 @@ def _get_color_info( class GraphPlot(BasePlot): """ - Class used to plot the FDatGrid object graph as hypersurfaces. + Class used to plot the FDataGrid object graph as hypersurfaces. When plotting functional data, we can either choose manually a color, a group of colors for the representations. Besides, we can use a list of @@ -124,59 +124,59 @@ class GraphPlot(BasePlot): Args: fdata: functional data set that we want to plot. gradient_criteria: list of real values used to determine the color - in which each of the instances will be plotted. The size + in which each of the instances will be plotted. max_grad: maximum value that the gradient_list can take, it will be - used to normalize the gradient_criteria in order to get values - thatcan be used in the funcion colormap.__call__(). If not + used to normalize the ``gradient_criteria`` in order to get values + that can be used in the funcion colormap.__call__(). If not declared it will be initialized to the maximum value of - gradient_list + gradient_list. min_grad: minimum value that the gradient_list can take, it will be - used to normalize the gradient_criteria in order to get values + used to normalize the ``gradient_criteria`` in order to get values thatcan be used in the funcion colormap.__call__(). If not declared it will be initialized to the minimum value of gradient_list. - chart (figure object, axe or list of axes, optional): figure over + chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - fig (figure object, optional): figure over with the graphs are + fig: figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (axis object, optional): axis over where the graphs + axes: axis over where the graphs are plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure + n_rows: designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols(int, optional): designates the number of columns of the + n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_points (int or tuple, optional): Number of points to evaluate in + n_points: Number of points to evaluate in the plot. In case of surfaces a tuple of length 2 can be pased with the number of points to plot in each axis, otherwise the same number of points will be used in the two axes. By default in unidimensional plots will be used 501 points; in surfaces will be used 30 points per axis, wich makes a grid with 900 points. - domain_range (tuple or list of tuples, optional): Range where the + domain_range: Range where the function will be plotted. In objects with unidimensional domain the domain range should be a tuple with the bounds of the interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - group (list of int): contains integers from [0 to number of + group: contains integers from [0 to number of labels) indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors (list of colors): colors in which groups are + group_colors: colors in which groups are represented, there must be one for each group. If None, each group is shown with distict colors in the "Greys" colormap. - group_names (list of str): name of each of the groups which appear + group_names: name of each of the groups which appear in a legend, there must be one for each one. Defaults to None and the legend is not shown. Implies `legend=True`. colormap: name of the colormap to be used. By default we will use autumn. - legend (bool): if `True`, show a legend with the groups. If + legend: if `True`, show a legend with the groups. If `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. @@ -247,13 +247,17 @@ def __init__( self.gradient_list = None self.n_points = n_points - self.domain_range = domain_range self.group = group self.group_colors = group_colors self.group_names = group_names self.legend = legend self.colormap = colormap + if domain_range is None: + self.domain_range = self.fdata.domain_range + else: + self.domain_range = _to_domain_range(domain_range) + if self.gradient_list is None: sample_colors, patches = _get_color_info( self.fdata, @@ -265,8 +269,11 @@ def __init__( ) else: patches = None - colormap = matplotlib.cm.get_cmap(self.colormap) - colormap = colormap.reversed() + if self.colormap == 'autumn': + colormap = matplotlib.cm.get_cmap(self.colormap) + colormap = colormap.reversed() + else: + colormap = matplotlib.cm.get_cmap(self.colormap) sample_colors = [None] * self.fdata.n_samples for m in range(self.fdata.n_samples): @@ -299,11 +306,6 @@ def plot( dtype=Artist, ) - if self.domain_range is None: - self.domain_range = self.fdata.domain_range - else: - self.domain_range = _to_domain_range(self.domain_range) - color_dict: Mapping[str, Optional[ColorLike]] = {} if self.fdata.dim_domain == 1: @@ -621,8 +623,8 @@ def set_color_dict( """ Auxiliary method used to update color_dict. - Sets the new color of the color - dict thanks to sample colors and index. + Sets the new color of the color_dict + thanks to sample colors and index. """ if sample_colors is not None: color_dict["color"] = sample_colors[ind] From 2e764e4226505c71a48bd02face0003ba8513643 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 24 May 2021 17:35:50 +0200 Subject: [PATCH 343/417] colormap autumn not inversed automatically --- skfda/exploratory/visualization/representation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index c2dcf8d96..de2600d11 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -207,7 +207,7 @@ def __init__( group: Optional[Sequence[K]] = None, group_colors: Optional[Indexable[K, ColorLike]] = None, group_names: Optional[Indexable[K, str]] = None, - colormap: Union[Colormap, str, None] = 'autumn', + colormap: Union[Colormap, str, None] = None, legend: bool = False, **kwargs: Any, ) -> None: @@ -269,8 +269,8 @@ def __init__( ) else: patches = None - if self.colormap == 'autumn': - colormap = matplotlib.cm.get_cmap(self.colormap) + if self.colormap is None: + colormap = matplotlib.cm.get_cmap("autumn") colormap = colormap.reversed() else: colormap = matplotlib.cm.get_cmap(self.colormap) From 18e6fc980eee70b2962aedae988261d635d5ae23 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 24 May 2021 19:50:49 +0200 Subject: [PATCH 344/417] Move computation of least squares to its own module. --- skfda/_utils/__init__.py | 2 +- skfda/_utils/lstsq.py | 98 +++++ skfda/misc/regularization/_regularization.py | 6 +- skfda/ml/regression/_linear_regression.py | 11 +- .../dim_reduction/projection/_fpca.py | 16 +- skfda/preprocessing/smoothing/_basis.py | 364 ++++++++++-------- 6 files changed, 320 insertions(+), 177 deletions(-) create mode 100644 skfda/_utils/lstsq.py diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 191324177..5fc1e97df 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -1,4 +1,4 @@ -from . import constants +from . import constants, lstsq from ._utils import ( RandomStateLike, _cartesian_product, diff --git a/skfda/_utils/lstsq.py b/skfda/_utils/lstsq.py new file mode 100644 index 000000000..994a3bbbd --- /dev/null +++ b/skfda/_utils/lstsq.py @@ -0,0 +1,98 @@ +"""Methods to solve least squares problems.""" +from __future__ import annotations + +from typing import Callable, Optional, Union + +import numpy as np +from typing_extensions import Final, Literal + +import scipy.linalg + +LstsqMethodCallable = Callable[[np.ndarray, np.ndarray], np.ndarray] +LstsqMethodName = Literal["cholesky", "qr", "svd"] +LstsqMethod = Union[LstsqMethodCallable, LstsqMethodName] + + +def lstsq_method_cholesky( + coefs: np.ndarray, + result: np.ndarray, +) -> np.ndarray: + """Solve OLS problem using a Cholesky decomposition.""" + left = coefs.T @ coefs + right = coefs.T @ result + return scipy.linalg.solve(left, right, assume_a="pos") + + +def lstsq_method_qr( + coefs: np.ndarray, + result: np.ndarray, +) -> np.ndarray: + """Solve OLS problem using a QR decomposition.""" + return scipy.linalg.lstsq(coefs, result, lapack_driver="gelsy")[0] + + +def lstsq_method_svd( + coefs: np.ndarray, + result: np.ndarray, +) -> np.ndarray: + """Solve OLS problem using a SVD decomposition.""" + return scipy.linalg.lstsq(coefs, result, lapack_driver="gelsd")[0] + + +method_dict: Final = { + "cholesky": lstsq_method_cholesky, + "qr": lstsq_method_qr, + "svd": lstsq_method_svd, +} + + +def _get_lstsq_method( + method: LstsqMethod, +) -> LstsqMethodCallable: + """Convert method string to method if necessary.""" + return method if callable(method) else method_dict[method] + + +def solve_regularized_weighted_lstsq( + coefs: np.ndarray, + result: np.ndarray, + *, + weights: Optional[np.ndarray] = None, + penalty_matrix_coef: float = 1, + penalty_matrix: Optional[np.ndarray] = None, + lstsq_method: LstsqMethod = lstsq_method_cholesky, +) -> np.ndarray: + """ + Solve a regularized and weighted least squares problem. + + If the penalty matrix is not ``None`` and nonzero, there + is a closed solution. Otherwise the problem can be reduced + to a least squares problem. + + """ + lstsq_method = _get_lstsq_method(lstsq_method) + + if lstsq_method is not lstsq_method_cholesky and ( + penalty_matrix is None + or penalty_matrix_coef == 0 + ): + # Weighted least squares case + if weights is not None: + weights_chol = scipy.linalg.cholesky(weights) + coefs = weights_chol @ coefs + result = weights_chol @ result + + return lstsq_method(coefs, result) + + # Cholesky case (always used for the regularized case) + if weights is None: + left = coefs.T @ coefs + right = coefs.T @ result + else: + left = coefs.T @ weights @ coefs + right = coefs.T @ weights @ result + + if penalty_matrix is not None: + left += penalty_matrix_coef * penalty_matrix + + return scipy.linalg.solve(left, right, assume_a="pos") diff --git a/skfda/misc/regularization/_regularization.py b/skfda/misc/regularization/_regularization.py index 4928cb1cd..7098ad690 100644 --- a/skfda/misc/regularization/_regularization.py +++ b/skfda/misc/regularization/_regularization.py @@ -1,7 +1,7 @@ from __future__ import annotations import itertools -from typing import Any, Generic, Iterable, Union +from typing import Any, Generic, Iterable, Optional, Union import numpy as np from sklearn.base import BaseEstimator @@ -149,7 +149,7 @@ def compute_penalty_matrix( basis_iterable: Iterable[BasisTypes], regularization_parameter: Union[float, Iterable[float]], regularization: Union[None, Regularization, Iterable[Regularization]], -) -> Union[float, np.ndarray]: +) -> Optional[np.ndarray]: """ Compute the regularization matrix for a linear differential operator. @@ -158,7 +158,7 @@ def compute_penalty_matrix( """ # If there is no regularization, return 0 and rely on broadcasting if regularization_parameter == 0 or regularization is None: - return 0 + return None # Compute penalty matrix if not provided if not isinstance(regularization, Iterable): diff --git a/skfda/ml/regression/_linear_regression.py b/skfda/ml/regression/_linear_regression.py index 383987cf0..6e3032f9a 100644 --- a/skfda/ml/regression/_linear_regression.py +++ b/skfda/ml/regression/_linear_regression.py @@ -1,12 +1,11 @@ -from collections.abc import Iterable import itertools import warnings +from collections.abc import Iterable +import numpy as np from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.validation import check_is_fitted -import numpy as np - from ...misc.regularization import compute_penalty_matrix from ...representation import FData from ._coefficients import coefficient_info_from_covariate @@ -154,11 +153,13 @@ def fit(self, X, y=None, sample_weight=None): regularization_parameter=1, regularization=regularization) - if self.fit_intercept and hasattr(penalty_matrix, "shape"): + if self.fit_intercept and penalty_matrix is not None: # Intercept is not penalized penalty_matrix[0, 0] = 0 - gram_inner_x_coef = inner_products.T @ inner_products + penalty_matrix + gram_inner_x_coef = inner_products.T @ inner_products + if penalty_matrix is not None: + gram_inner_x_coef += penalty_matrix inner_x_coef_y = inner_products.T @ y coef_lengths = np.array([i.shape[1] for i in inner_products_list]) diff --git a/skfda/preprocessing/dim_reduction/projection/_fpca.py b/skfda/preprocessing/dim_reduction/projection/_fpca.py index 07b8a07df..cb40cb666 100644 --- a/skfda/preprocessing/dim_reduction/projection/_fpca.py +++ b/skfda/preprocessing/dim_reduction/projection/_fpca.py @@ -1,10 +1,10 @@ """Functional Principal Component Analysis Module.""" import numpy as np -from scipy.linalg import solve_triangular from sklearn.base import BaseEstimator, TransformerMixin from sklearn.decomposition import PCA +from scipy.linalg import solve_triangular from skfda.misc.regularization import compute_penalty_matrix from skfda.representation.basis import FDataBasis from skfda.representation.grid import FDataGrid @@ -171,7 +171,8 @@ def _fit_basis(self, X: FDataBasis, y=None): regularization=self.regularization) # apply regularization - g_matrix = (g_matrix + regularization_matrix) + if regularization_matrix is not None: + g_matrix = (g_matrix + regularization_matrix) # obtain triangulation using cholesky l_matrix = np.linalg.cholesky(g_matrix) @@ -310,9 +311,14 @@ def _fit_grid(self, X: FDataGrid, y=None): regularization_parameter=1, regularization=self.regularization) - fd_data = np.transpose(np.linalg.solve( - np.transpose(basis.data_matrix[..., 0] + regularization_matrix), - np.transpose(fd_data))) + basis_matrix = basis.data_matrix[..., 0] + if regularization_matrix is not None: + basis_matrix = basis_matrix + regularization_matrix + + fd_data = np.linalg.solve( + basis_matrix.T, + fd_data.T, + ).T # see docstring for more information final_matrix = fd_data @ np.sqrt(weights_matrix) / np.sqrt(n_samples) diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index a27662c8f..ef934034f 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -4,131 +4,129 @@ This module contains the class for the basis smoothing. """ -from enum import Enum -from typing import Union, Iterable +from __future__ import annotations -import scipy.linalg +from enum import Enum +from typing import Any, Callable, Iterable, Optional, Sequence, Union import numpy as np -from ... import FDataBasis -from ... import FDataGrid +import scipy.linalg + from ..._utils import _cartesian_product +from ..._utils.lstsq import ( + lstsq_method_cholesky, + lstsq_method_qr, + solve_regularized_weighted_lstsq, +) +from ...misc.regularization import TikhonovRegularization +from ...representation import FData, FDataBasis, FDataGrid +from ...representation.basis import Basis from ._linear import _LinearSmoother class _Cholesky(): - """Solve the linear equation using cholesky factorization""" - - def __call__(self, *, basis_values, weight_matrix, data_matrix, - penalty_matrix, **_): - - common_matrix = basis_values.T - - if weight_matrix is not None: - common_matrix @= weight_matrix - - right_matrix = common_matrix @ data_matrix - left_matrix = common_matrix @ basis_values - - # Adds the roughness penalty to the equation - if penalty_matrix is not None: - left_matrix += penalty_matrix - - coefficients = scipy.linalg.cho_solve(scipy.linalg.cho_factor( - left_matrix, lower=True), right_matrix) - - # The ith column is the coefficients of the ith basis for each - # sample - coefficients = coefficients.T - - return coefficients + """Solve the linear equation using cholesky factorization.""" + + def __call__( + self, + *, + basis_values: np.ndarray, + weight_matrix: Optional[np.ndarray], + data_matrix: np.ndarray, + penalty_matrix: Optional[np.ndarray], + **_: Any, + ) -> np.ndarray: + + return solve_regularized_weighted_lstsq( + coefs=basis_values, + result=data_matrix, + weights=weight_matrix, + penalty_matrix=penalty_matrix, + ).T class _QR(): - """Solve the linear equation using qr factorization""" - - def __call__(self, *, basis_values, weight_matrix, data_matrix, - penalty_matrix, **_): - - if weight_matrix is not None: - # Decompose W in U'U and calculate UW and Uy - upper = scipy.linalg.cholesky(weight_matrix) - basis_values = upper @ basis_values - data_matrix = upper @ data_matrix - - if not np.all(penalty_matrix == 0): - w, v = np.linalg.eigh(penalty_matrix) - - w = w[::-1] - v = v[:, ::-1] - - w = np.maximum(w, 0) - - penalty_matrix = v @ np.diag(np.sqrt(w)) - # Augment the basis matrix with the square root of the - # penalty matrix - basis_values = np.concatenate([ - basis_values, - penalty_matrix.T], - axis=0) - # Augment data matrix by n zeros - data_matrix = np.pad(data_matrix, - ((0, len(v)), - (0, 0)), - mode='constant') - - # Resolves the equation - # B.T @ B @ C = B.T @ D - # by means of the QR decomposition - - # B = Q @ R - q, r = np.linalg.qr(basis_values) - right_matrix = q.T @ data_matrix - - # R @ C = Q.T @ D - coefficients = np.linalg.solve(r, right_matrix) - # The ith column is the coefficients of the ith basis for each - # sample - coefficients = coefficients.T - - return coefficients + """Solve the linear equation using qr factorization.""" + + def __call__( + self, + *, + basis_values: np.ndarray, + weight_matrix: Optional[np.ndarray], + data_matrix: np.ndarray, + penalty_matrix: Optional[np.ndarray], + **_: Any, + ) -> np.ndarray: + + return solve_regularized_weighted_lstsq( + coefs=basis_values, + result=data_matrix, + weights=weight_matrix, + penalty_matrix=penalty_matrix, + lstsq_method=lstsq_method_qr, + ).T class _Matrix(): - """Solve the linear equation using matrix inversion""" - - def fit(self, estimator, X, y=None): + """Solve the linear equation using matrix inversion.""" + + def fit( + self, + estimator: BasisSmoother, + X: FDataGrid, + y: None = None, + ) -> BasisSmoother: if estimator.return_basis: estimator._cached_coef_matrix = estimator._coef_matrix( - estimator.input_points_) + estimator.input_points_, + ) else: # Force caching the hat matrix estimator.hat_matrix() - def fit_transform(self, estimator, X, y=None): + return estimator + + def fit_transform( + self, + estimator: BasisSmoother, + X: FDataGrid, + y: None = None, + ) -> FData: return estimator.fit(X, y).transform(X, y) def __call__(self, *, estimator, **_): pass - def transform(self, estimator, X, y=None): + def transform( + self, + estimator: BasisSmoother, + X: FDataGrid, + y: None = None, + ) -> FData: if estimator.return_basis: - coefficients = (X.data_matrix.reshape((X.n_samples, -1)) - @ estimator._cached_coef_matrix.T) + coefficients = ( + X.data_matrix.reshape((X.n_samples, -1)) + @ estimator._cached_coef_matrix.T + ) fdatabasis = FDataBasis( - basis=estimator.basis, coefficients=coefficients) + basis=estimator.basis, + coefficients=coefficients, + ) return fdatabasis else: # The matrix is cached - return X.copy(data_matrix=self.hat_matrix() @ X.data_matrix, - grid_points=estimator.output_points_) + return X.copy( + data_matrix=estimator.hat_matrix() @ X.data_matrix, + grid_points=estimator.output_points_, + ) class BasisSmoother(_LinearSmoother): - r"""Transform raw data to a smooth functional form. + r""" + Transform raw data to a smooth functional form. Takes functional data in a discrete form and makes an approximates it to the closest function that can be generated by the basis.a. @@ -190,7 +188,6 @@ class BasisSmoother(_LinearSmoother): a FDataBasis object. Examples: - By default, this smoother returns a FDataGrid, like the other smoothers: @@ -295,20 +292,23 @@ class BasisSmoother(_LinearSmoother): _required_parameters = ["basis"] class SolverMethod(Enum): + """Method used to solve the equations.""" + cholesky = _Cholesky() qr = _QR() matrix = _Matrix() - def __init__(self, - basis, - *, - smoothing_parameter: float = 1., - weights=None, - regularization: Union[int, Iterable[float], - 'LinearDifferentialOperator'] = None, - output_points=None, - method='cholesky', - return_basis=False): + def __init__( + self, + basis: Basis, + *, + smoothing_parameter: float = 1.0, + weights: Optional[np.ndarray] = None, + regularization: Optional[TikhonovRegularization[FDataGrid]] = None, + output_points: Optional[Sequence[np.ndarray]] = None, + method='cholesky', + return_basis: bool = False, + ) -> None: self.basis = basis self.smoothing_parameter = smoothing_parameter self.weights = weights @@ -317,8 +317,8 @@ def __init__(self, self.method = method self.return_basis = return_basis - def _method_function(self): - """ Return the method function""" + def _method_function(self) -> BasisSmoother.SolverMethod: + """Return the method function.""" method_function = self.method if not isinstance(method_function, self.SolverMethod): method_function = self.SolverMethod[ @@ -326,58 +326,73 @@ def _method_function(self): return method_function.value - def _coef_matrix(self, input_points): - """Get the matrix that gives the coefficients""" + def _coef_matrix(self, input_points: Sequence[np.ndarray]) -> np.ndarray: + """Get the matrix that gives the coefficients.""" from ...misc.regularization import compute_penalty_matrix basis_values_input = self.basis.evaluate( - _cartesian_product(input_points)).reshape( - (self.basis.n_basis, -1)).T + _cartesian_product(input_points), + ).reshape((self.basis.n_basis, -1)).T # If no weight matrix is given all the weights are one if self.weights is not None: - ols_matrix = (basis_values_input.T @ self.weights - @ basis_values_input) + ols_matrix = ( + basis_values_input.T @ self.weights + @ basis_values_input + ) else: ols_matrix = basis_values_input.T @ basis_values_input penalty_matrix = compute_penalty_matrix( basis_iterable=(self.basis,), regularization_parameter=self.smoothing_parameter, - regularization=self.regularization) + regularization=self.regularization, + ) - ols_matrix += penalty_matrix + if penalty_matrix is not None: + ols_matrix += penalty_matrix right_side = basis_values_input.T if self.weights is not None: right_side @= self.weights return np.linalg.solve( - ols_matrix, right_side) - - def _hat_matrix(self, input_points, output_points): - basis_values_output = self.basis.evaluate(_cartesian_product( - output_points)).reshape( - (self.basis.n_basis, -1)).T + ols_matrix, + right_side, + ) + + def _hat_matrix( + self, + input_points: Sequence[np.ndarray], + output_points: Sequence[np.ndarray], + ) -> np.ndarray: + basis_values_output = self.basis.evaluate( + _cartesian_product(output_points), + ).reshape((self.basis.n_basis, -1)).T return basis_values_output @ self._coef_matrix(input_points) - def fit(self, X: FDataGrid, y=None): + def fit( + self, + X: FDataGrid, + y: None = None, + ) -> BasisSmoother: """Compute the hat matrix for the desired output points. Args: - X (FDataGrid): - The data whose points are used to compute the matrix. - y : Ignored + X: The data whose points are used to compute the matrix. + y: Ignored. + Returns: - self (object) + self """ - self.input_points_ = X.grid_points - self.output_points_ = (self.output_points - if self.output_points is not None - else self.input_points_) + self.output_points_ = ( + self.output_points + if self.output_points is not None + else self.input_points_ + ) method = self._method_function() method_fit = getattr(method, "fit", None) @@ -386,28 +401,36 @@ def fit(self, X: FDataGrid, y=None): return self - def fit_transform(self, X: FDataGrid, y=None): - """Compute the hat matrix for the desired output points. + def fit_transform( + self, + X: FDataGrid, + y: None = None, + ): + """ + Fit the estimator and smooth the data. Args: - X (FDataGrid): - The data whose points are used to compute the matrix. - y : Ignored + X: The data to smooth. + y: Ignored + Returns: - self (object) + Smoothed data. """ from ...misc.regularization import compute_penalty_matrix self.input_points_ = X.grid_points - self.output_points_ = (self.output_points - if self.output_points is not None - else self.input_points_) + self.output_points_ = ( + self.output_points + if self.output_points is not None + else self.input_points_ + ) penalty_matrix = compute_penalty_matrix( basis_iterable=(self.basis,), regularization_parameter=self.smoothing_parameter, - regularization=self.regularization) + regularization=self.regularization, + ) # n is the samples # m is the observations @@ -418,8 +441,8 @@ def fit_transform(self, X: FDataGrid, y=None): # Each basis in a column basis_values = self.basis.evaluate( - _cartesian_product(self.input_points_)).reshape( - (self.basis.n_basis, -1)).T + _cartesian_product(self.input_points_) + ).reshape((self.basis.n_basis, -1)).T # If no weight matrix is given all the weights are one weight_matrix = self.weights @@ -433,8 +456,10 @@ def fit_transform(self, X: FDataGrid, y=None): # C the coefficient matrix (the unknown) # Y is the data_matrix - if(data_matrix.shape[0] > self.basis.n_basis - or self.smoothing_parameter > 0): + if ( + data_matrix.shape[0] > self.basis.n_basis + or self.smoothing_parameter > 0 + ): method = self._method_function() @@ -444,11 +469,13 @@ def fit_transform(self, X: FDataGrid, y=None): return method_fit_transform(estimator=self, X=X, y=y) # Otherwise the method is used to compute the coefficients - coefficients = method(estimator=self, - basis_values=basis_values, - weight_matrix=weight_matrix, - data_matrix=data_matrix, - penalty_matrix=penalty_matrix) + coefficients = method( + estimator=self, + basis_values=basis_values, + weight_matrix=weight_matrix, + data_matrix=data_matrix, + penalty_matrix=penalty_matrix, + ) elif data_matrix.shape[0] == self.basis.n_basis: # If the number of basis equals the number of points and no @@ -456,46 +483,57 @@ def fit_transform(self, X: FDataGrid, y=None): coefficients = np.linalg.solve(basis_values, data_matrix).T else: # data_matrix.shape[0] < basis.n_basis - raise ValueError(f"The number of basis functions " - f"({self.basis.n_basis}) " - f"exceed the number of points to be smoothed " - f"({data_matrix.shape[0]}).") + raise ValueError( + f"The number of basis functions " + f"({self.basis.n_basis}) " + f"exceed the number of points to be smoothed " + f"({data_matrix.shape[0]}).", + ) fdatabasis = FDataBasis( - basis=self.basis, coefficients=coefficients, + basis=self.basis, + coefficients=coefficients, dataset_name=X.dataset_name, argument_names=X.argument_names, coordinate_names=X.coordinate_names, - sample_names=X.sample_names) + sample_names=X.sample_names, + ) if self.return_basis: return fdatabasis - else: - return fdatabasis.to_grid(grid_points=self.output_points_) - return self + return fdatabasis.to_grid(grid_points=self.output_points_) - def transform(self, X: FDataGrid, y=None): - """Apply the smoothing. + def transform( + self, + X: FDataGrid, + y: None = None, + ) -> FData: + """ + Smooth the data. Args: - X (FDataGrid): - The data to smooth. - y : Ignored + X: The data to smooth. + y: Ignored + Returns: - self (object) + Smoothed data. """ - - assert all([all(i == s) - for i, s in zip(self.input_points_, X.grid_points)]) + assert all( + [all(i == s) for i, s in zip(self.input_points_, X.grid_points)], + ) method = self._method_function() # If the method provides the complete transformation use it method_transform = getattr(method, "transform", None) if method_transform is not None: - return method_transform(estimator=self, X=X, y=y) + return method_transform( + estimator=self, + X=X, + y=y, + ) # Otherwise use fit_transform over the data # Note that data leakage is not possible because the matrix only From 4dfb3347f6b8cfe4086a712ca2fb36ed5b8cced9 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 27 May 2021 14:31:36 +0200 Subject: [PATCH 345/417] More refactoring. --- skfda/_utils/lstsq.py | 20 +- skfda/preprocessing/smoothing/_basis.py | 355 ++++-------------- skfda/preprocessing/smoothing/_linear.py | 63 ++-- .../smoothing/kernel_smoothers.py | 5 +- tests/test_smoothing.py | 18 +- 5 files changed, 131 insertions(+), 330 deletions(-) diff --git a/skfda/_utils/lstsq.py b/skfda/_utils/lstsq.py index 994a3bbbd..a5f5f6815 100644 --- a/skfda/_utils/lstsq.py +++ b/skfda/_utils/lstsq.py @@ -13,7 +13,7 @@ LstsqMethod = Union[LstsqMethodCallable, LstsqMethodName] -def lstsq_method_cholesky( +def lstsq_cholesky( coefs: np.ndarray, result: np.ndarray, ) -> np.ndarray: @@ -23,7 +23,7 @@ def lstsq_method_cholesky( return scipy.linalg.solve(left, right, assume_a="pos") -def lstsq_method_qr( +def lstsq_qr( coefs: np.ndarray, result: np.ndarray, ) -> np.ndarray: @@ -31,7 +31,7 @@ def lstsq_method_qr( return scipy.linalg.lstsq(coefs, result, lapack_driver="gelsy")[0] -def lstsq_method_svd( +def lstsq_svd( coefs: np.ndarray, result: np.ndarray, ) -> np.ndarray: @@ -40,9 +40,9 @@ def lstsq_method_svd( method_dict: Final = { - "cholesky": lstsq_method_cholesky, - "qr": lstsq_method_qr, - "svd": lstsq_method_svd, + "cholesky": lstsq_cholesky, + "qr": lstsq_qr, + "svd": lstsq_svd, } @@ -58,9 +58,8 @@ def solve_regularized_weighted_lstsq( result: np.ndarray, *, weights: Optional[np.ndarray] = None, - penalty_matrix_coef: float = 1, penalty_matrix: Optional[np.ndarray] = None, - lstsq_method: LstsqMethod = lstsq_method_cholesky, + lstsq_method: LstsqMethod = lstsq_cholesky, ) -> np.ndarray: """ Solve a regularized and weighted least squares problem. @@ -72,9 +71,8 @@ def solve_regularized_weighted_lstsq( """ lstsq_method = _get_lstsq_method(lstsq_method) - if lstsq_method is not lstsq_method_cholesky and ( + if lstsq_method is not lstsq_cholesky and ( penalty_matrix is None - or penalty_matrix_coef == 0 ): # Weighted least squares case if weights is not None: @@ -93,6 +91,6 @@ def solve_regularized_weighted_lstsq( right = coefs.T @ weights @ result if penalty_matrix is not None: - left += penalty_matrix_coef * penalty_matrix + left += penalty_matrix return scipy.linalg.solve(left, right, assume_a="pos") diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index ef934034f..f482e2ca6 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -6,124 +6,21 @@ """ from __future__ import annotations -from enum import Enum -from typing import Any, Callable, Iterable, Optional, Sequence, Union +from typing import Optional, Sequence import numpy as np +from typing_extensions import Final import scipy.linalg from ..._utils import _cartesian_product -from ..._utils.lstsq import ( - lstsq_method_cholesky, - lstsq_method_qr, - solve_regularized_weighted_lstsq, -) +from ..._utils.lstsq import LstsqMethod, solve_regularized_weighted_lstsq from ...misc.regularization import TikhonovRegularization from ...representation import FData, FDataBasis, FDataGrid from ...representation.basis import Basis from ._linear import _LinearSmoother -class _Cholesky(): - """Solve the linear equation using cholesky factorization.""" - - def __call__( - self, - *, - basis_values: np.ndarray, - weight_matrix: Optional[np.ndarray], - data_matrix: np.ndarray, - penalty_matrix: Optional[np.ndarray], - **_: Any, - ) -> np.ndarray: - - return solve_regularized_weighted_lstsq( - coefs=basis_values, - result=data_matrix, - weights=weight_matrix, - penalty_matrix=penalty_matrix, - ).T - - -class _QR(): - """Solve the linear equation using qr factorization.""" - - def __call__( - self, - *, - basis_values: np.ndarray, - weight_matrix: Optional[np.ndarray], - data_matrix: np.ndarray, - penalty_matrix: Optional[np.ndarray], - **_: Any, - ) -> np.ndarray: - - return solve_regularized_weighted_lstsq( - coefs=basis_values, - result=data_matrix, - weights=weight_matrix, - penalty_matrix=penalty_matrix, - lstsq_method=lstsq_method_qr, - ).T - - -class _Matrix(): - """Solve the linear equation using matrix inversion.""" - - def fit( - self, - estimator: BasisSmoother, - X: FDataGrid, - y: None = None, - ) -> BasisSmoother: - if estimator.return_basis: - estimator._cached_coef_matrix = estimator._coef_matrix( - estimator.input_points_, - ) - else: - # Force caching the hat matrix - estimator.hat_matrix() - - return estimator - - def fit_transform( - self, - estimator: BasisSmoother, - X: FDataGrid, - y: None = None, - ) -> FData: - return estimator.fit(X, y).transform(X, y) - - def __call__(self, *, estimator, **_): - pass - - def transform( - self, - estimator: BasisSmoother, - X: FDataGrid, - y: None = None, - ) -> FData: - if estimator.return_basis: - coefficients = ( - X.data_matrix.reshape((X.n_samples, -1)) - @ estimator._cached_coef_matrix.T - ) - - fdatabasis = FDataBasis( - basis=estimator.basis, - coefficients=coefficients, - ) - - return fdatabasis - else: - # The matrix is cached - return X.copy( - data_matrix=estimator.hat_matrix() @ X.data_matrix, - grid_points=estimator.output_points_, - ) - - class BasisSmoother(_LinearSmoother): r""" Transform raw data to a smooth functional form. @@ -200,8 +97,7 @@ class BasisSmoother(_LinearSmoother): >>> fd = skfda.FDataGrid(data_matrix=x, grid_points=t) >>> basis = skfda.representation.basis.Fourier((0, 1), n_basis=3) - >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( - ... basis, method='cholesky') + >>> smoother = skfda.preprocessing.smoothing.BasisSmoother(basis) >>> fd_smooth = smoother.fit_transform(fd) >>> fd_smooth.data_matrix.round(2) array([[[ 3.], @@ -216,19 +112,28 @@ class BasisSmoother(_LinearSmoother): >>> fd = skfda.FDataGrid(data_matrix=x, grid_points=t) >>> basis = skfda.representation.basis.Fourier((0, 1), n_basis=3) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( - ... basis, method='cholesky', return_basis=True) + ... basis, + ... method='cholesky', + ... return_basis=True, + ... ) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) array([[ 2. , 0.71, 0.71]]) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( - ... basis, method='qr', return_basis=True) + ... basis, + ... method='qr', + ... return_basis=True, + ... ) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) array([[ 2. , 0.71, 0.71]]) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( - ... basis, method='matrix', return_basis=True) + ... basis, + ... method='svd', + ... return_basis=True, + ... ) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) array([[ 2. , 0.71, 0.71]]) @@ -248,10 +153,13 @@ class BasisSmoother(_LinearSmoother): >>> fd = skfda.FDataGrid(data_matrix=x, grid_points=t) >>> basis = skfda.representation.basis.Fourier((0, 1), n_basis=3) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( - ... basis, method='cholesky', - ... regularization=TikhonovRegularization( - ... LinearDifferentialOperator([0.1, 0.2])), - ... return_basis=True) + ... basis, + ... method='cholesky', + ... regularization=TikhonovRegularization( + ... LinearDifferentialOperator([0.1, 0.2]), + ... ), + ... return_basis=True, + ... ) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) array([[ 2.04, 0.51, 0.55]]) @@ -259,10 +167,13 @@ class BasisSmoother(_LinearSmoother): >>> fd = skfda.FDataGrid(data_matrix=x, grid_points=t) >>> basis = skfda.representation.basis.Fourier((0, 1), n_basis=3) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( - ... basis, method='qr', - ... regularization=TikhonovRegularization( - ... LinearDifferentialOperator([0.1, 0.2])), - ... return_basis=True) + ... basis, + ... method='qr', + ... regularization=TikhonovRegularization( + ... LinearDifferentialOperator([0.1, 0.2]), + ... ), + ... return_basis=True, + ... ) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) array([[ 2.04, 0.51, 0.55]]) @@ -270,10 +181,13 @@ class BasisSmoother(_LinearSmoother): >>> fd = skfda.FDataGrid(data_matrix=x, grid_points=t) >>> basis = skfda.representation.basis.Fourier((0, 1), n_basis=3) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( - ... basis, method='matrix', - ... regularization=TikhonovRegularization( - ... LinearDifferentialOperator([0.1, 0.2])), - ... return_basis=True) + ... basis, + ... method='svd', + ... regularization=TikhonovRegularization( + ... LinearDifferentialOperator([0.1, 0.2]), + ... ), + ... return_basis=True, + ... ) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) array([[ 2.04, 0.51, 0.55]]) @@ -291,13 +205,6 @@ class BasisSmoother(_LinearSmoother): _required_parameters = ["basis"] - class SolverMethod(Enum): - """Method used to solve the equations.""" - - cholesky = _Cholesky() - qr = _QR() - matrix = _Matrix() - def __init__( self, basis: Basis, @@ -306,7 +213,7 @@ def __init__( weights: Optional[np.ndarray] = None, regularization: Optional[TikhonovRegularization[FDataGrid]] = None, output_points: Optional[Sequence[np.ndarray]] = None, - method='cholesky', + method: LstsqMethod = 'svd', return_basis: bool = False, ) -> None: self.basis = basis @@ -315,18 +222,14 @@ def __init__( self.regularization = regularization self.output_points = output_points self.method = method - self.return_basis = return_basis + self.return_basis: Final = return_basis - def _method_function(self) -> BasisSmoother.SolverMethod: - """Return the method function.""" - method_function = self.method - if not isinstance(method_function, self.SolverMethod): - method_function = self.SolverMethod[ - method_function.lower()] - - return method_function.value - - def _coef_matrix(self, input_points: Sequence[np.ndarray]) -> np.ndarray: + def _coef_matrix( + self, + input_points: Sequence[np.ndarray], + *, + data_matrix: Optional[np.ndarray] = None, + ) -> np.ndarray: """Get the matrix that gives the coefficients.""" from ...misc.regularization import compute_penalty_matrix @@ -334,31 +237,23 @@ def _coef_matrix(self, input_points: Sequence[np.ndarray]) -> np.ndarray: _cartesian_product(input_points), ).reshape((self.basis.n_basis, -1)).T - # If no weight matrix is given all the weights are one - if self.weights is not None: - ols_matrix = ( - basis_values_input.T @ self.weights - @ basis_values_input - ) - else: - ols_matrix = basis_values_input.T @ basis_values_input - penalty_matrix = compute_penalty_matrix( basis_iterable=(self.basis,), regularization_parameter=self.smoothing_parameter, regularization=self.regularization, ) - if penalty_matrix is not None: - ols_matrix += penalty_matrix - - right_side = basis_values_input.T - if self.weights is not None: - right_side @= self.weights + # Get the matrix for computing the coefficients if no + # data_matrix is passed + if data_matrix is None: + data_matrix = np.eye(basis_values_input.shape[0]) - return np.linalg.solve( - ols_matrix, - right_side, + return solve_regularized_weighted_lstsq( + coefs=basis_values_input, + result=data_matrix, + weights=self.weights, + penalty_matrix=penalty_matrix, + lstsq_method=self.method, ) def _hat_matrix( @@ -394,116 +289,11 @@ def fit( else self.input_points_ ) - method = self._method_function() - method_fit = getattr(method, "fit", None) - if method_fit is not None: - method_fit(estimator=self, X=X, y=y) + if not self.return_basis: + super().fit(X, y) return self - def fit_transform( - self, - X: FDataGrid, - y: None = None, - ): - """ - Fit the estimator and smooth the data. - - Args: - X: The data to smooth. - y: Ignored - - Returns: - Smoothed data. - - """ - from ...misc.regularization import compute_penalty_matrix - - self.input_points_ = X.grid_points - self.output_points_ = ( - self.output_points - if self.output_points is not None - else self.input_points_ - ) - - penalty_matrix = compute_penalty_matrix( - basis_iterable=(self.basis,), - regularization_parameter=self.smoothing_parameter, - regularization=self.regularization, - ) - - # n is the samples - # m is the observations - # k is the number of elements of the basis - - # Each sample in a column (m x n) - data_matrix = X.data_matrix.reshape((X.n_samples, -1)).T - - # Each basis in a column - basis_values = self.basis.evaluate( - _cartesian_product(self.input_points_) - ).reshape((self.basis.n_basis, -1)).T - - # If no weight matrix is given all the weights are one - weight_matrix = self.weights - - # We need to solve the equation - # (phi' W phi + lambda * R) C = phi' W Y - # where: - # phi is the basis_values - # W is the weight matrix - # lambda the smoothness parameter - # C the coefficient matrix (the unknown) - # Y is the data_matrix - - if ( - data_matrix.shape[0] > self.basis.n_basis - or self.smoothing_parameter > 0 - ): - - method = self._method_function() - - # If the method provides the complete transformation use it - method_fit_transform = getattr(method, "fit_transform", None) - if method_fit_transform is not None: - return method_fit_transform(estimator=self, X=X, y=y) - - # Otherwise the method is used to compute the coefficients - coefficients = method( - estimator=self, - basis_values=basis_values, - weight_matrix=weight_matrix, - data_matrix=data_matrix, - penalty_matrix=penalty_matrix, - ) - - elif data_matrix.shape[0] == self.basis.n_basis: - # If the number of basis equals the number of points and no - # smoothing is required - coefficients = np.linalg.solve(basis_values, data_matrix).T - - else: # data_matrix.shape[0] < basis.n_basis - raise ValueError( - f"The number of basis functions " - f"({self.basis.n_basis}) " - f"exceed the number of points to be smoothed " - f"({data_matrix.shape[0]}).", - ) - - fdatabasis = FDataBasis( - basis=self.basis, - coefficients=coefficients, - dataset_name=X.dataset_name, - argument_names=X.argument_names, - coordinate_names=X.coordinate_names, - sample_names=X.sample_names, - ) - - if self.return_basis: - return fdatabasis - - return fdatabasis.to_grid(grid_points=self.output_points_) - def transform( self, X: FDataGrid, @@ -521,21 +311,26 @@ def transform( """ assert all( - [all(i == s) for i, s in zip(self.input_points_, X.grid_points)], + np.array_equal(i, s) for i, s in zip( + self.input_points_, + X.grid_points, + ) ) - method = self._method_function() + if self.return_basis: + coefficients = self._coef_matrix( + input_points=X.grid_points, + data_matrix=X.data_matrix.reshape((X.n_samples, -1)).T, + ).T - # If the method provides the complete transformation use it - method_transform = getattr(method, "transform", None) - if method_transform is not None: - return method_transform( - estimator=self, - X=X, - y=y, + return FDataBasis( + basis=self.basis, + coefficients=coefficients, + dataset_name=X.dataset_name, + argument_names=X.argument_names, + coordinate_names=X.coordinate_names, + sample_names=X.sample_names, ) - # Otherwise use fit_transform over the data - # Note that data leakage is not possible because the matrix only - # depends on the input/output points - return self.fit_transform(X, y) + else: + return super().transform(X, y) diff --git a/skfda/preprocessing/smoothing/_linear.py b/skfda/preprocessing/smoothing/_linear.py index 0729290dd..04ac4f3a9 100644 --- a/skfda/preprocessing/smoothing/_linear.py +++ b/skfda/preprocessing/smoothing/_linear.py @@ -6,19 +6,18 @@ """ import abc -from sklearn.base import BaseEstimator, TransformerMixin - import numpy as np +from sklearn.base import BaseEstimator, TransformerMixin from ... import FDataGrid +from ..._utils import _to_grid_points -def _check_r_to_r(f): - if f.dim_domain != 1 or f.dim_codomain != 1: - raise NotImplementedError("Only accepts functions from R to R") - - -class _LinearSmoother(abc.ABC, BaseEstimator, TransformerMixin): +class _LinearSmoother( + abc.ABC, + BaseEstimator, # type: ignore + TransformerMixin, # type: ignore +): """Linear smoother. Abstract base class for all linear smoothers. The subclasses must override @@ -26,8 +25,11 @@ class _LinearSmoother(abc.ABC, BaseEstimator, TransformerMixin): """ - def __init__(self, *, - output_points=None): + def __init__( + self, + *, + output_points=None, + ): self.output_points = output_points def hat_matrix(self, input_points=None, output_points=None): @@ -47,16 +49,15 @@ def hat_matrix(self, input_points=None, output_points=None): if cached_hat_matrix is None: self.cached_hat_matrix = self._hat_matrix( input_points=self.input_points_, - output_points=self.output_points_ + output_points=self.output_points_, ) return self.cached_hat_matrix - else: - # We only cache the matrix for the fit points - return self._hat_matrix( - input_points=self.input_points_, - output_points=self.output_points_ - ) + # We only cache the matrix for the fit points + return self._hat_matrix( + input_points=self.input_points_, + output_points=self.output_points_, + ) @abc.abstractmethod def _hat_matrix(self, input_points, output_points): @@ -73,17 +74,18 @@ def fit(self, X: FDataGrid, y=None): Args: X (FDataGrid): The data whose points are used to compute the matrix. - y : Ignored + y : Ignored. + Returns: self (object) """ - _check_r_to_r(X) - - self.input_points_ = X.grid_points[0] - self.output_points_ = (self.output_points - if self.output_points is not None - else self.input_points_) + self.input_points_ = X.grid_points + self.output_points_ = ( + _to_grid_points(self.output_points) + if self.output_points is not None + else self.input_points_ + ) # Force caching the hat matrix self.hat_matrix() @@ -102,11 +104,18 @@ def transform(self, X: FDataGrid, y=None): """ - assert all(self.input_points_ == X.grid_points[0]) + assert all( + np.array_equal(i, s) for i, s in zip( + self.input_points_, + X.grid_points, + ) + ) # The matrix is cached - return X.copy(data_matrix=self.hat_matrix() @ X.data_matrix, - grid_points=self.output_points_) + return X.copy( + data_matrix=self.hat_matrix() @ X.data_matrix, + grid_points=self.output_points_, + ) def score(self, X, y): """Returns the generalized cross validation (GCV) score. diff --git a/skfda/preprocessing/smoothing/kernel_smoothers.py b/skfda/preprocessing/smoothing/kernel_smoothers.py index 2bf48ad03..fe3b0a584 100644 --- a/skfda/preprocessing/smoothing/kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/kernel_smoothers.py @@ -13,7 +13,6 @@ from ...misc import kernels from ._linear import _LinearSmoother - __author__ = "Miguel Carbajo Berrocal" __email__ = "miguel.carbajo@estudiante.uam.es" @@ -31,8 +30,8 @@ def __init__(self, *, smoothing_parameter=None, def _hat_matrix(self, input_points, output_points): return self._hat_matrix_function( - input_points=input_points, - output_points=output_points, + input_points=input_points[0], + output_points=output_points[0], smoothing_parameter=self.smoothing_parameter, kernel=self.kernel, weights=self.weights, diff --git a/tests/test_smoothing.py b/tests/test_smoothing.py index 1061bc02a..e3f9b0bb3 100644 --- a/tests/test_smoothing.py +++ b/tests/test_smoothing.py @@ -1,17 +1,17 @@ -import skfda -from skfda._utils import _check_estimator -from skfda.misc.operators import LinearDifferentialOperator -from skfda.misc.regularization import TikhonovRegularization -from skfda.representation.basis import BSpline, Monomial -from skfda.representation.grid import FDataGrid import unittest +import numpy as np import sklearn -import numpy as np +import skfda import skfda.preprocessing.smoothing as smoothing import skfda.preprocessing.smoothing.kernel_smoothers as kernel_smoothers import skfda.preprocessing.smoothing.validation as validation +from skfda._utils import _check_estimator +from skfda.misc.operators import LinearDifferentialOperator +from skfda.misc.regularization import TikhonovRegularization +from skfda.representation.basis import BSpline, Monomial +from skfda.representation.grid import FDataGrid class TestSklearnEstimators(unittest.TestCase): @@ -129,7 +129,7 @@ def test_monomial_smoothing(self): fd_basis.coefficients.round(2), np.array([[0.61, -0.88, 0.06, 0.02]])) - def test_vector_valued_smoothing(self): + def test_vector_valued_smoothing(self) -> None: X, _ = skfda.datasets.fetch_weather(return_X_y=True) basis_dim = skfda.representation.basis.Fourier( @@ -138,7 +138,7 @@ def test_vector_valued_smoothing(self): [basis_dim] * 2 ) - for method in smoothing.BasisSmoother.SolverMethod: + for method in ('cholesky', 'qr', 'svd'): with self.subTest(method=method): basis_smoother = smoothing.BasisSmoother( From 5cf7157d3a2ce0240203bc0492d9bdc5d3e89dc5 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 27 May 2021 19:05:58 +0200 Subject: [PATCH 346/417] SVD is the default method for solving least squares. --- skfda/_utils/__init__.py | 2 +- skfda/misc/__init__.py | 16 +++++++++++----- skfda/{_utils => misc}/lstsq.py | 0 skfda/preprocessing/smoothing/_basis.py | 2 +- tests/test_math.py | 6 +++--- 5 files changed, 16 insertions(+), 10 deletions(-) rename skfda/{_utils => misc}/lstsq.py (100%) diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 5fc1e97df..191324177 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -1,4 +1,4 @@ -from . import constants, lstsq +from . import constants from ._utils import ( RandomStateLike, _cartesian_product, diff --git a/skfda/misc/__init__.py b/skfda/misc/__init__.py index f3ef87ad1..83431f624 100644 --- a/skfda/misc/__init__.py +++ b/skfda/misc/__init__.py @@ -1,5 +1,11 @@ -from . import covariances, kernels, metrics -from . import operators -from . import regularization -from ._math import (log, log2, log10, exp, sqrt, cumsum, - inner_product, inner_product_matrix) +from . import covariances, kernels, lstsq, metrics, operators, regularization +from ._math import ( + cumsum, + exp, + inner_product, + inner_product_matrix, + log, + log2, + log10, + sqrt, +) diff --git a/skfda/_utils/lstsq.py b/skfda/misc/lstsq.py similarity index 100% rename from skfda/_utils/lstsq.py rename to skfda/misc/lstsq.py diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index f482e2ca6..fbc300e42 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -14,7 +14,7 @@ import scipy.linalg from ..._utils import _cartesian_product -from ..._utils.lstsq import LstsqMethod, solve_regularized_weighted_lstsq +from ...misc.lstsq import LstsqMethod, solve_regularized_weighted_lstsq from ...misc.regularization import TikhonovRegularization from ...representation import FData, FDataBasis, FDataGrid from ...representation.basis import Basis diff --git a/tests/test_math.py b/tests/test_math.py index d2da4017c..dee31333f 100644 --- a/tests/test_math.py +++ b/tests/test_math.py @@ -19,7 +19,7 @@ def test_several_variables(self): def f(x, y, z): return x * y * z - t = np.linspace(0, 1, 100) + t = np.linspace(0, 1, 30) x2, y2, z2 = ndm(t, 2 * t, 3 * t) @@ -39,9 +39,9 @@ def f(x, y, z): res = 8 np.testing.assert_allclose( - skfda.misc.inner_product(fd, fd), res, rtol=1e-5) + skfda.misc.inner_product(fd, fd), res, rtol=1e-4) np.testing.assert_allclose( - skfda.misc.inner_product(fd_basis, fd_basis), res, rtol=1e-5) + skfda.misc.inner_product(fd_basis, fd_basis), res, rtol=1e-4) def test_vector_valued(self): From 3e19c88adfb623b003377882402bc58a786a08b3 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 27 May 2021 19:40:01 +0200 Subject: [PATCH 347/417] Make linear regression use lstsq module. --- skfda/misc/lstsq.py | 2 +- skfda/ml/regression/_linear_regression.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/skfda/misc/lstsq.py b/skfda/misc/lstsq.py index a5f5f6815..9d96cb03d 100644 --- a/skfda/misc/lstsq.py +++ b/skfda/misc/lstsq.py @@ -59,7 +59,7 @@ def solve_regularized_weighted_lstsq( *, weights: Optional[np.ndarray] = None, penalty_matrix: Optional[np.ndarray] = None, - lstsq_method: LstsqMethod = lstsq_cholesky, + lstsq_method: LstsqMethod = lstsq_svd, ) -> np.ndarray: """ Solve a regularized and weighted least squares problem. diff --git a/skfda/ml/regression/_linear_regression.py b/skfda/ml/regression/_linear_regression.py index 6e3032f9a..b198067d5 100644 --- a/skfda/ml/regression/_linear_regression.py +++ b/skfda/ml/regression/_linear_regression.py @@ -6,6 +6,7 @@ from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.validation import check_is_fitted +from ...misc.lstsq import solve_regularized_weighted_lstsq from ...misc.regularization import compute_penalty_matrix from ...representation import FData from ._coefficients import coefficient_info_from_covariate @@ -157,15 +158,14 @@ def fit(self, X, y=None, sample_weight=None): # Intercept is not penalized penalty_matrix[0, 0] = 0 - gram_inner_x_coef = inner_products.T @ inner_products - if penalty_matrix is not None: - gram_inner_x_coef += penalty_matrix - inner_x_coef_y = inner_products.T @ y + basiscoefs = solve_regularized_weighted_lstsq( + coefs=inner_products, + result=y, + penalty_matrix=penalty_matrix, + ) coef_lengths = np.array([i.shape[1] for i in inner_products_list]) coef_start = np.cumsum(coef_lengths) - - basiscoefs = np.linalg.solve(gram_inner_x_coef, inner_x_coef_y) basiscoef_list = np.split(basiscoefs, coef_start) # Express the coefficients in functional form From 41adc5c7ba463278c790fa0a9da32b4df8116b18 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 29 May 2021 01:53:11 +0200 Subject: [PATCH 348/417] Simplify linear smoothers logic. --- skfda/preprocessing/smoothing/_linear.py | 70 ++++++++++++--------- skfda/preprocessing/smoothing/validation.py | 6 +- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/skfda/preprocessing/smoothing/_linear.py b/skfda/preprocessing/smoothing/_linear.py index 04ac4f3a9..20fade56a 100644 --- a/skfda/preprocessing/smoothing/_linear.py +++ b/skfda/preprocessing/smoothing/_linear.py @@ -4,13 +4,17 @@ This module contains the abstract base class for all linear smoothers. """ +from __future__ import annotations + import abc +from typing import Any, Mapping, Optional import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from ... import FDataGrid from ..._utils import _to_grid_points +from ...representation._typing import GridPointsLike class _LinearSmoother( @@ -28,47 +32,45 @@ class _LinearSmoother( def __init__( self, *, - output_points=None, + output_points: Optional[GridPointsLike] = None, ): self.output_points = output_points - def hat_matrix(self, input_points=None, output_points=None): - cached_input_points = getattr(self, "input_points_", None) - cached_output_points = getattr(self, "output_points_", None) + def hat_matrix( + self, + input_points: Optional[GridPointsLike] = None, + output_points: Optional[GridPointsLike] = None, + ) -> np.ndarray: # Use the fitted points if they are not provided if input_points is None: - input_points = cached_input_points + input_points = self.input_points_ if output_points is None: - output_points = cached_output_points - - if (cached_input_points is not None and - np.array_equal(input_points, cached_input_points) and - np.array_equal(output_points, cached_output_points)): - cached_hat_matrix = getattr(self, "_cached_hat_matrix", None) - if cached_hat_matrix is None: - self.cached_hat_matrix = self._hat_matrix( - input_points=self.input_points_, - output_points=self.output_points_, - ) - return self.cached_hat_matrix - - # We only cache the matrix for the fit points + output_points = self.output_points_ + return self._hat_matrix( input_points=self.input_points_, output_points=self.output_points_, ) @abc.abstractmethod - def _hat_matrix(self, input_points, output_points): + def _hat_matrix( + self, + input_points: GridPointsLike, + output_points: GridPointsLike, + ) -> np.ndarray: pass - def _more_tags(self): + def _more_tags(self) -> Mapping[str, Any]: return { - 'X_types': [] + 'X_types': [], } - def fit(self, X: FDataGrid, y=None): + def fit( + self, + X: FDataGrid, + y: None = None, + ) -> _LinearSmoother: """Compute the hat matrix for the desired output points. Args: @@ -87,23 +89,26 @@ def fit(self, X: FDataGrid, y=None): else self.input_points_ ) - # Force caching the hat matrix - self.hat_matrix() + self.hat_matrix_ = self.hat_matrix() return self - def transform(self, X: FDataGrid, y=None): - """Multiplies the hat matrix for the functions values to smooth them. + def transform( + self, + X: FDataGrid, + y: None = None, + ) -> FDataGrid: + """Multiply the hat matrix with the function values to smooth them. Args: X (FDataGrid): The data to smooth. y : Ignored + Returns: FDataGrid: Functional data smoothed. """ - assert all( np.array_equal(i, s) for i, s in zip( self.input_points_, @@ -117,14 +122,19 @@ def transform(self, X: FDataGrid, y=None): grid_points=self.output_points_, ) - def score(self, X, y): - """Returns the generalized cross validation (GCV) score. + def score( + self, + X: FDataGrid, + y: FDataGrid, + ) -> float: + """Return the generalized cross validation (GCV) score. Args: X (FDataGrid): The data to smooth. y (FDataGrid): The target data. Typically the same as ``X``. + Returns: float: Generalized cross validation score. diff --git a/skfda/preprocessing/smoothing/validation.py b/skfda/preprocessing/smoothing/validation.py index a01080707..411f547d7 100644 --- a/skfda/preprocessing/smoothing/validation.py +++ b/skfda/preprocessing/smoothing/validation.py @@ -1,10 +1,8 @@ """Defines methods for the validation of the smoothing.""" +import numpy as np import sklearn from sklearn.model_selection import GridSearchCV -import numpy as np - - __author__ = "Miguel Carbajo Berrocal" __email__ = "miguel.carbajo@estudiante.uam.es" @@ -17,7 +15,7 @@ def _get_input_estimation_and_matrix(estimator, X): estimator.fit(X) y_est = estimator.transform(X) - hat_matrix = estimator.hat_matrix() + hat_matrix = estimator.hat_matrix_ return y_est, hat_matrix From 62f70c6d6f0f8bdecb94a558ffc455f80bd31c22 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 29 May 2021 16:01:19 +0200 Subject: [PATCH 349/417] Typing fixes. --- skfda/preprocessing/smoothing/_basis.py | 53 ++++++++++++------------ skfda/preprocessing/smoothing/_linear.py | 22 ++++------ 2 files changed, 36 insertions(+), 39 deletions(-) diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index fbc300e42..77d51191f 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -6,17 +6,18 @@ """ from __future__ import annotations -from typing import Optional, Sequence +from typing import Optional import numpy as np from typing_extensions import Final import scipy.linalg -from ..._utils import _cartesian_product +from ..._utils import _cartesian_product, _to_grid_points from ...misc.lstsq import LstsqMethod, solve_regularized_weighted_lstsq from ...misc.regularization import TikhonovRegularization from ...representation import FData, FDataBasis, FDataGrid +from ...representation._typing import GridPointsLike from ...representation.basis import Basis from ._linear import _LinearSmoother @@ -62,25 +63,24 @@ class BasisSmoother(_LinearSmoother): [RS05-5-2-8]_ Args: - basis: (Basis): Basis used. - weights (array_like, optional): Matrix to weight the - observations. Defaults to the identity matrix. - smoothing_parameter (int or float, optional): Smoothing - parameter. Trying with several factors in a logarithm scale is - suggested. If 0 no smoothing is performed. Defaults to 1. - regularization (int, iterable or :class:`Regularization`): - Regularization object. This allows the penalization of + basis: Basis used. + weights: Matrix to weight the observations. Defaults to the identity + matrix. + smoothing_parameter: Smoothing parameter. Trying with several + factors in a logarithm scale is suggested. If 0 no smoothing is + performed. Defaults to 1. + regularization: Regularization object. This allows the penalization of complicated models, which applies additional smoothing. By default is ``None`` meaning that no additional smoothing has to take place. - method (str): Algorithm used for calculating the coefficients using + method: Algorithm used for calculating the coefficients using the least squares method. The values admitted are 'cholesky', 'qr' - and 'matrix' for Cholesky and QR factorisation methods, and matrix - inversion respectively. The default is 'cholesky'. - output_points (ndarray, optional): The output points. If ommited, - the input points are used. If ``return_basis`` is ``True``, this - parameter is ignored. - return_basis (boolean): If ``False`` (the default) returns the smoothed + and 'svd' for Cholesky, QR and SVD factorisation methods + respectively, or a callable similar to the `lstsq` function. The + default is 'svd', which is the most robust but less performant one. + output_points: The output points. If ommited, the input points are + used. If ``return_basis`` is ``True``, this parameter is ignored. + return_basis: If ``False`` (the default) returns the smoothed data as an FDataGrid, like the other smoothers. If ``True`` returns a FDataBasis object. @@ -212,7 +212,7 @@ def __init__( smoothing_parameter: float = 1.0, weights: Optional[np.ndarray] = None, regularization: Optional[TikhonovRegularization[FDataGrid]] = None, - output_points: Optional[Sequence[np.ndarray]] = None, + output_points: Optional[GridPointsLike] = None, method: LstsqMethod = 'svd', return_basis: bool = False, ) -> None: @@ -226,7 +226,7 @@ def __init__( def _coef_matrix( self, - input_points: Sequence[np.ndarray], + input_points: GridPointsLike, *, data_matrix: Optional[np.ndarray] = None, ) -> np.ndarray: @@ -234,7 +234,7 @@ def _coef_matrix( from ...misc.regularization import compute_penalty_matrix basis_values_input = self.basis.evaluate( - _cartesian_product(input_points), + _cartesian_product(_to_grid_points(input_points)), ).reshape((self.basis.n_basis, -1)).T penalty_matrix = compute_penalty_matrix( @@ -258,11 +258,13 @@ def _coef_matrix( def _hat_matrix( self, - input_points: Sequence[np.ndarray], - output_points: Sequence[np.ndarray], + input_points: GridPointsLike, + output_points: GridPointsLike, ) -> np.ndarray: basis_values_output = self.basis.evaluate( - _cartesian_product(output_points), + _cartesian_product( + _to_grid_points(output_points), + ), ).reshape((self.basis.n_basis, -1)).T return basis_values_output @ self._coef_matrix(input_points) @@ -284,7 +286,7 @@ def fit( """ self.input_points_ = X.grid_points self.output_points_ = ( - self.output_points + _to_grid_points(self.output_points) if self.output_points is not None else self.input_points_ ) @@ -332,5 +334,4 @@ def transform( sample_names=X.sample_names, ) - else: - return super().transform(X, y) + return super().transform(X, y) diff --git a/skfda/preprocessing/smoothing/_linear.py b/skfda/preprocessing/smoothing/_linear.py index 20fade56a..ac87a4ce5 100644 --- a/skfda/preprocessing/smoothing/_linear.py +++ b/skfda/preprocessing/smoothing/_linear.py @@ -74,12 +74,11 @@ def fit( """Compute the hat matrix for the desired output points. Args: - X (FDataGrid): - The data whose points are used to compute the matrix. - y : Ignored. + X: The data whose points are used to compute the matrix. + y: Ignored. Returns: - self (object) + self """ self.input_points_ = X.grid_points @@ -101,12 +100,11 @@ def transform( """Multiply the hat matrix with the function values to smooth them. Args: - X (FDataGrid): - The data to smooth. - y : Ignored + X: The data to smooth. + y: Ignored Returns: - FDataGrid: Functional data smoothed. + Functional data smoothed. """ assert all( @@ -130,13 +128,11 @@ def score( """Return the generalized cross validation (GCV) score. Args: - X (FDataGrid): - The data to smooth. - y (FDataGrid): - The target data. Typically the same as ``X``. + X: The data to smooth. + y: The target data. Typically the same as ``X``. Returns: - float: Generalized cross validation score. + Generalized cross validation score. """ from .validation import LinearSmootherGeneralizedCVScorer From 2f3d7571aa607a534a879ad2c223df5bfe366710 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 31 May 2021 19:18:40 +0200 Subject: [PATCH 350/417] typo corrected --- skfda/exploratory/visualization/representation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index de2600d11..7e4d49bb3 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -127,12 +127,12 @@ class GraphPlot(BasePlot): in which each of the instances will be plotted. max_grad: maximum value that the gradient_list can take, it will be used to normalize the ``gradient_criteria`` in order to get values - that can be used in the funcion colormap.__call__(). If not + that can be used in the function colormap.__call__(). If not declared it will be initialized to the maximum value of gradient_list. min_grad: minimum value that the gradient_list can take, it will be used to normalize the ``gradient_criteria`` in order to get values - thatcan be used in the funcion colormap.__call__(). If not + that can be used in the function colormap.__call__(). If not declared it will be initialized to the minimum value of gradient_list. chart: figure over From 89fee4b75b5c4497cf74033f89665a03aea38c7d Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 31 May 2021 19:22:57 +0200 Subject: [PATCH 351/417] remove types in comments --- skfda/exploratory/visualization/_boxplot.py | 70 ++++++++++----------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 23e97ca97..66f0bf351 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -262,27 +262,27 @@ def __init__( """Initialization of the Boxplot class. Args: - fdatagrid (FDataGrid): Object containing the data. + fdatagrid: Object containing the data. depth_method: Method used to order the data. Defaults to :func:`modified band depth `. - prob (list of float, optional): List with float numbers (in the + prob: List with float numbers (in the range from 1 to 0) that indicate which central regions to represent. Defaults to [0.5] which represents the 50% central region. - factor (double): Number used to calculate the outlying envelope. + factor: Number used to calculate the outlying envelope. chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - fig (figure object, optional): figure over with the graphs are + fig: figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs + axes: axis over where the graphs are plotted. If None, see param fig. - n_rows(int, optional): designates the number of rows of the figure + n_rows: designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols(int, optional): designates the number of columns of the + n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. @@ -405,7 +405,7 @@ def plot(self): (dim_domain=1). Returns: - fig (figure): figure object in which the graphs are plotted. + fig: figure object in which the graphs are plotted. """ @@ -502,30 +502,28 @@ class SurfaceBoxplot(FDataBoxplot): Args: - fdatagrid (FDataGrid): Object containing the data. - method (:ref:`depth measure `, optional): Method + fdatagrid: Object containing the data. + method: Method used to order the data. Defaults to :class:`modified band depth `. - prob (list of float, optional): List with float numbers (in the + prob: List with float numbers (in the range from 1 to 0) that indicate which central regions to represent. Defaults to [0.5] which represents the 50% central region. - factor (double): Number used to calculate the outlying envelope. + factor: Number used to calculate the outlying envelope. Attributes: - fdatagrid (FDataGrid): Object containing the data. - median (array, (fdatagrid.dim_codomain, lx, ly)): contains + fdatagrid: Object containing the data. + median: contains the median/s. - central_envelope (array, (fdatagrid.dim_codomain, 2, lx, ly)): - contains the central envelope/s. - non_outlying_envelope (array,(fdatagrid.dim_codomain, 2, lx, ly)): - contains the non-outlying envelope/s. - colormap (matplotlib.colors.LinearSegmentedColormap): Colormap from + central_envelope: contains the central envelope/s. + non_outlying_envelope: contains the non-outlying envelope/s. + colormap: Colormap from which the colors to represent the central regions are selected. - boxcol (string): Color of the box, which includes median and central + boxcol: Color of the box, which includes median and central envelope. - outcol (string): Color of the outlying envelope. + outcol: Color of the outlying envelope. Examples: @@ -605,15 +603,13 @@ def __init__(self, fdatagrid, method=ModifiedBandDepth(), factor=1.5): """Initialization of the functional boxplot. Args: - fdatagrid (FDataGrid): Object containing the data. - method (:ref:`depth measure `, optional): Method - used to order the data. Defaults to :class:`modified band depth - `. - prob (list of float, optional): List with float numbers (in the - range from 1 to 0) that indicate which central regions to - represent. + fdatagrid: Object containing the data. + method: Method used to order the data. Defaults to :class:`modified + band depth `. + prob: List with float numbers (in the range from 1 to 0) + that indicate which central regions to represent. Defaults to [0.5] which represents the 50% central region. - factor (double): Number used to calculate the outlying envelope. + factor: Number used to calculate the outlying envelope. """ FDataBoxplot.__init__(self, factor) @@ -690,20 +686,20 @@ def plot(self, chart=None, *, fig=None, axes=None, """Visualization of the surface boxplot of the fdatagrid (dim_domain=2). Args: - fig (figure object, optional): figure over with the graphs are - plotted in case ax is not specified. If None and ax is also - None, the figure is initialized. - axes (list of axis objects, optional): axis over where the graphs - are plotted. If None, see param fig. - n_rows(int, optional): designates the number of rows of the figure + fig: figure over with the graphs are plotted in case ax + is not specified. If None and ax is also None, the figure + is initialized. + axes: axis over where the graphs are plotted. If None, + see param fig. + n_rows: designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols(int, optional): designates the number of columns of the + n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. Returns: - fig (figure): figure object in which the graphs are plotted. + fig: figure object in which the graphs are plotted. """ fig, axes = _get_figure_and_axes(chart, fig, axes) From 228d3e7be29fd63d9f3254ce43bf97c0673af431 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 31 May 2021 21:12:47 +0200 Subject: [PATCH 352/417] Linear regression typing. --- skfda/misc/regularization/_regularization.py | 10 +- skfda/ml/regression/_coefficients.py | 129 ++++++++++--- skfda/ml/regression/_linear_regression.py | 181 ++++++++++++++----- 3 files changed, 245 insertions(+), 75 deletions(-) diff --git a/skfda/misc/regularization/_regularization.py b/skfda/misc/regularization/_regularization.py index 7098ad690..ea920c583 100644 --- a/skfda/misc/regularization/_regularization.py +++ b/skfda/misc/regularization/_regularization.py @@ -143,12 +143,17 @@ def __init__( BasisTypes = Union[np.ndarray, FData, Basis] Regularization = TikhonovRegularization[Any] +RegularizationLike = Union[ + None, + Regularization, + Iterable[Optional[Regularization]], +] def compute_penalty_matrix( basis_iterable: Iterable[BasisTypes], regularization_parameter: Union[float, Iterable[float]], - regularization: Union[None, Regularization, Iterable[Regularization]], + regularization: RegularizationLike, ) -> Optional[np.ndarray]: """ Compute the regularization matrix for a linear differential operator. @@ -177,6 +182,5 @@ def compute_penalty_matrix( regularization, regularization_parameter, )] - penalty_matrix = scipy.linalg.block_diag(*penalty_blocks) - return penalty_matrix + return scipy.linalg.block_diag(*penalty_blocks) diff --git a/skfda/ml/regression/_coefficients.py b/skfda/ml/regression/_coefficients.py index 67f30ab16..3a2d8af07 100644 --- a/skfda/ml/regression/_coefficients.py +++ b/skfda/ml/regression/_coefficients.py @@ -1,12 +1,18 @@ +from __future__ import annotations + +import abc from functools import singledispatch +from typing import Any, Generic, TypeVar import numpy as np from ...misc._math import inner_product from ...representation.basis import Basis, FDataBasis +CovariateType = TypeVar("CovariateType") + -class CoefficientInfo(): +class CoefficientInfo(abc.ABC, Generic[CovariateType]): """ Information about an estimated coefficient. @@ -15,10 +21,18 @@ class CoefficientInfo(): """ - def __init__(self, basis): + def __init__( + self, + basis: CovariateType, + ) -> None: self.basis = basis - def regression_matrix(self, X, y): + @abc.abstractmethod + def regression_matrix( + self, + X: CovariateType, + y: np.ndarray, + ) -> np.ndarray: """ Return the constant coefficients matrix for regression. @@ -26,29 +40,72 @@ def regression_matrix(self, X, y): X: covariate data for regression. y: target data for regression. + Returns: + Coefficients matrix. + """ - return np.atleast_2d(X) + pass - def convert_from_constant_coefs(self, coefs): + @abc.abstractmethod + def convert_from_constant_coefs( + self, + coefs: np.ndarray, + ) -> CovariateType: """ Return the coefficients object from the constant coefs. Parameters: coefs: estimated constant coefficients. - """ - return coefs + Returns: + Coefficient. - def inner_product(self, coefs, X): """ + pass + + @abc.abstractmethod + def inner_product( + self, + coefs: CovariateType, + X: CovariateType, + ) -> np.ndarray: + """ + Inner product. + Compute the inner product between the coefficient and the covariate. """ + pass + + +class CoefficientInfoNdarray(CoefficientInfo[np.ndarray]): + + def regression_matrix( # noqa: D102 + self, + X: np.ndarray, + y: np.ndarray, + ) -> np.ndarray: + + return np.atleast_2d(X) + + def convert_from_constant_coefs( # noqa: D102 + self, + coefs: np.ndarray, + ) -> np.ndarray: + + return coefs + + def inner_product( # noqa: D102 + self, + coefs: np.ndarray, + X: np.ndarray, + ) -> np.ndarray: + return inner_product(coefs, X) -class CoefficientInfoFDataBasis(CoefficientInfo): +class CoefficientInfoFDataBasis(CoefficientInfo[FDataBasis]): """ Information about a FDataBasis coefficient. @@ -57,7 +114,11 @@ class CoefficientInfoFDataBasis(CoefficientInfo): """ - def regression_matrix(self, X, y): + def regression_matrix( # noqa: D102 + self, + X: FDataBasis, + y: np.ndarray, + ) -> np.ndarray: # The matrix is the matrix of coefficients multiplied by # the matrix of inner products. @@ -65,32 +126,54 @@ def regression_matrix(self, X, y): self.inner_basis = X.basis.inner_product_matrix(self.basis) return xcoef @ self.inner_basis - def convert_from_constant_coefs(self, coefs): - return FDataBasis(self.basis, coefs.T) - - def inner_product(self, coefs, X): + def convert_from_constant_coefs( # noqa: D102 + self, + coefs: np.ndarray, + ) -> FDataBasis: + return FDataBasis(self.basis.basis, coefs.T) + + def inner_product( # noqa: D102 + self, + coefs: FDataBasis, + X: FDataBasis, + ) -> np.ndarray: # Efficient implementation of the inner product using the # inner product matrix previously computed return inner_product(coefs, X, inner_product_matrix=self.inner_basis.T) @singledispatch -def coefficient_info_from_covariate(X, y, **kwargs) -> CoefficientInfo: - """ - Make a coefficient info object from a covariate. +def coefficient_info_from_covariate( + X: CovariateType, + y: np.ndarray, + **_: Any, +) -> CoefficientInfo[CovariateType]: + """Make a coefficient info object from a covariate.""" + raise ValueError(f"Invalid type of covariate = {type(X)}.") - """ - return CoefficientInfo(basis=np.identity(X.shape[1], dtype=X.dtype)) + +@coefficient_info_from_covariate.register(np.ndarray) +def _coefficient_info_from_covariate_ndarray( + X: np.ndarray, + y: np.ndarray, + **_: Any, +) -> CoefficientInfo[np.ndarray]: + return CoefficientInfoNdarray(basis=np.identity(X.shape[1], dtype=X.dtype)) @coefficient_info_from_covariate.register(FDataBasis) -def coefficient_info_from_covariate_fdatabasis( - X: FDataBasis, y, **kwargs) -> CoefficientInfoFDataBasis: - basis = kwargs['basis'] +def _coefficient_info_from_covariate_fdatabasis( + X: FDataBasis, + y: np.ndarray, + *, + basis: Basis, + **_: Any, +) -> CoefficientInfoFDataBasis: + if basis is None: basis = X.basis if not isinstance(basis, Basis): raise TypeError(f"basis must be a Basis object, not {type(basis)}") - return CoefficientInfoFDataBasis(basis=basis) + return CoefficientInfoFDataBasis(basis=basis.to_basis()) diff --git a/skfda/ml/regression/_linear_regression.py b/skfda/ml/regression/_linear_regression.py index b198067d5..5de4d4eb1 100644 --- a/skfda/ml/regression/_linear_regression.py +++ b/skfda/ml/regression/_linear_regression.py @@ -1,18 +1,58 @@ +from __future__ import annotations + import itertools import warnings -from collections.abc import Iterable +from typing import Any, Iterable, List, Optional, Sequence, Tuple, Union import numpy as np from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.validation import check_is_fitted from ...misc.lstsq import solve_regularized_weighted_lstsq -from ...misc.regularization import compute_penalty_matrix +from ...misc.regularization import ( + TikhonovRegularization, + compute_penalty_matrix, +) from ...representation import FData -from ._coefficients import coefficient_info_from_covariate - - -class LinearRegression(BaseEstimator, RegressorMixin): +from ...representation.basis import Basis +from ._coefficients import CoefficientInfo, coefficient_info_from_covariate + +RegularizationType = Union[ + TikhonovRegularization[Any], + Sequence[Optional[TikhonovRegularization[Any]]], + None, +] + +RegularizationIterableType = Union[ + TikhonovRegularization[Any], + Iterable[Optional[TikhonovRegularization[Any]]], + None, +] + +AcceptedDataType = Union[ + FData, + np.ndarray, +] + +AcceptedDataCoefsType = Union[ + CoefficientInfo[FData], + CoefficientInfo[np.ndarray], +] + +BasisCoefsType = Sequence[Optional[Basis]] + +ArgcheckResultType = Tuple[ + List[AcceptedDataType], + np.ndarray, + Optional[np.ndarray], + List[AcceptedDataCoefsType], +] + + +class LinearRegression( + BaseEstimator, # type: ignore + RegressorMixin, # type: ignore +): r"""Linear regression with multivariate response. This is a regression algorithm equivalent to multivariate linear @@ -38,7 +78,7 @@ class LinearRegression(BaseEstimator, RegressorMixin): for a functional covariate, the same basis is assumed. If this parameter is ``None`` (the default), it is assumed that ``None`` is provided for all covariates. - fit_intercept (bool): Whether to calculate the intercept for this + fit_intercept: Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered). regularization (int, iterable or :class:`Regularization`): If it is @@ -55,14 +95,13 @@ class LinearRegression(BaseEstimator, RegressorMixin): ``None``. Attributes: - coef_ (iterable): A list containing the weight coefficient for each + coef\_: A list containing the weight coefficient for each covariate. For multivariate data, the covariate is a Numpy array. For functional data, the covariate is a FDataBasis object. - intercept_ (float): Independent term in the linear model. Set to 0.0 + intercept\_: Independent term in the linear model. Set to 0.0 if `fit_intercept = False`. Examples: - >>> from skfda.ml.regression import LinearRegression >>> from skfda.representation.basis import (FDataBasis, Monomial, ... Constant) @@ -116,22 +155,36 @@ class LinearRegression(BaseEstimator, RegressorMixin): """ - def __init__(self, *, coef_basis=None, fit_intercept=True, - regularization=None): + def __init__( + self, + *, + coef_basis: Optional[BasisCoefsType] = None, + fit_intercept: bool = True, + regularization: RegularizationType = None, + ) -> None: self.coef_basis = coef_basis self.fit_intercept = fit_intercept self.regularization = regularization - def fit(self, X, y=None, sample_weight=None): - - X, y, sample_weight, coef_info = self._argcheck_X_y( - X, y, sample_weight, self.coef_basis) + def fit( # noqa: D102 + self, + X: Union[AcceptedDataType, Sequence[AcceptedDataType]], + y: np.ndarray, + sample_weight: Optional[np.ndarray] = None, + ) -> LinearRegression: + + X_new, y, sample_weight, coef_info = self._argcheck_X_y( + X, + y, + sample_weight, + self.coef_basis, + ) - regularization = self.regularization + regularization: RegularizationIterableType = self.regularization if self.fit_intercept: new_x = np.ones((len(y), 1)) - X = [new_x] + X + X_new = [new_x] + X_new coef_info = [coefficient_info_from_covariate(new_x, y)] + coef_info if isinstance(regularization, Iterable): @@ -139,8 +192,10 @@ def fit(self, X, y=None, sample_weight=None): elif regularization is not None: regularization = (None, regularization) - inner_products_list = [c.regression_matrix(x, y) - for x, c in zip(X, coef_info)] + inner_products_list = [ + c.regression_matrix(x, y) + for x, c in zip(X_new, coef_info) + ] # This is C @ J inner_products = np.concatenate(inner_products_list, axis=1) @@ -152,7 +207,8 @@ def fit(self, X, y=None, sample_weight=None): penalty_matrix = compute_penalty_matrix( basis_iterable=(c.basis for c in coef_info), regularization_parameter=1, - regularization=regularization) + regularization=regularization, + ) if self.fit_intercept and penalty_matrix is not None: # Intercept is not penalized @@ -169,14 +225,16 @@ def fit(self, X, y=None, sample_weight=None): basiscoef_list = np.split(basiscoefs, coef_start) # Express the coefficients in functional form - coefs = [c.convert_from_constant_coefs(bcoefs) - for c, bcoefs in zip(coef_info, basiscoef_list)] + coefs = [ + c.convert_from_constant_coefs(bcoefs) + for c, bcoefs in zip(coef_info, basiscoef_list) + ] if self.fit_intercept: self.intercept_ = coefs[0] coefs = coefs[1:] else: - self.intercept_ = 0.0 + self.intercept_ = np.zeros(1) self.coef_ = coefs self._coef_info = coef_info @@ -184,15 +242,22 @@ def fit(self, X, y=None, sample_weight=None): return self - def predict(self, X): - from ...misc import inner_product + def predict( # noqa: D102 + self, + X: Union[AcceptedDataType, Sequence[AcceptedDataType]], + ) -> np.ndarray: check_is_fitted(self) X = self._argcheck_X(X) - result = np.sum([coef_info.inner_product(coef, x) - for coef, x, coef_info - in zip(self.coef_, X, self._coef_info)], axis=0) + result = np.sum( + [ + coef_info.inner_product(coef, x) + for coef, x, coef_info + in zip(self.coef_, X, self._coef_info) + ], + axis=0, + ) result += self.intercept_ @@ -201,8 +266,11 @@ def predict(self, X): return result - def _argcheck_X(self, X): - if isinstance(X, FData) or isinstance(X, np.ndarray): + def _argcheck_X( + self, + X: Union[AcceptedDataType, Sequence[AcceptedDataType]], + ) -> Sequence[AcceptedDataType]: + if isinstance(X, (FData, np.ndarray)): X = [X] X = [x if isinstance(x, FData) else np.asarray(x) for x in X] @@ -212,41 +280,56 @@ def _argcheck_X(self, X): return X - def _argcheck_X_y(self, X, y, sample_weight=None, coef_basis=None): - """Do some checks to types and shapes""" - + def _argcheck_X_y( + self, + X: Union[AcceptedDataType, Sequence[AcceptedDataType]], + y: np.ndarray, + sample_weight: Optional[np.ndarray] = None, + coef_basis: Optional[BasisCoefsType] = None, + ) -> ArgcheckResultType: + """Do some checks to types and shapes.""" # TODO: Add support for Dataframes - X = self._argcheck_X(X) + new_X = self._argcheck_X(X) if any(isinstance(i, FData) for i in y): raise ValueError( - "Some of the response variables are not scalar") + "Some of the response variables are not scalar", + ) y = np.asarray(y) if coef_basis is None: - coef_basis = [None] * len(X) + coef_basis = [None] * len(new_X) - if len(coef_basis) != len(X): - raise ValueError("Number of regression coefficients does " - "not match number of independent variables.") + if len(coef_basis) != len(new_X): + raise ValueError( + "Number of regression coefficients does " + "not match number of independent variables.", + ) - if any(len(y) != len(x) for x in X): - raise ValueError("The number of samples on independent and " - "dependent variables should be the same") + if any(len(y) != len(x) for x in new_X): + raise ValueError( + "The number of samples on independent and " + "dependent variables should be the same", + ) - coef_info = [coefficient_info_from_covariate(x, y, basis=b) - for x, b in zip(X, coef_basis)] + coef_info = [ + coefficient_info_from_covariate(x, y, basis=b) + for x, b in zip(new_X, coef_basis) + ] if sample_weight is not None: if len(sample_weight) != len(y): - raise ValueError("The number of sample weights should be " - "equal to the number of samples.") + raise ValueError( + "The number of sample weights should be " + "equal to the number of samples.", + ) if np.any(np.array(sample_weight) < 0): raise ValueError( - "The sample weights should be non negative values") + "The sample weights should be non negative values", + ) - return X, y, sample_weight, coef_info + return new_X, y, sample_weight, coef_info From e86d291bbf652e0e1c6fafed0ad4b0a4e83596c9 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 4 Jun 2021 17:19:35 +0200 Subject: [PATCH 353/417] Typing FPCA. --- .../dim_reduction/projection/_fpca.py | 366 ++++++++++-------- 1 file changed, 215 insertions(+), 151 deletions(-) diff --git a/skfda/preprocessing/dim_reduction/projection/_fpca.py b/skfda/preprocessing/dim_reduction/projection/_fpca.py index cb40cb666..f10077e4b 100644 --- a/skfda/preprocessing/dim_reduction/projection/_fpca.py +++ b/skfda/preprocessing/dim_reduction/projection/_fpca.py @@ -1,34 +1,48 @@ """Functional Principal Component Analysis Module.""" +from __future__ import annotations + +from typing import Optional, TypeVar + import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.decomposition import PCA from scipy.linalg import solve_triangular -from skfda.misc.regularization import compute_penalty_matrix -from skfda.representation.basis import FDataBasis +from skfda.misc.regularization import ( + TikhonovRegularization, + compute_penalty_matrix, +) +from skfda.representation import FData +from skfda.representation.basis import Basis, FDataBasis from skfda.representation.grid import FDataGrid -__author__ = "Yujian Hong" -__email__ = "yujian.hong@estudiante.uam.es" +from ....misc.lstsq import solve_regularized_weighted_lstsq + +Function = TypeVar("Function", bound=FData) -class FPCA(BaseEstimator, TransformerMixin): - """Class that implements functional principal component analysis for both +class FPCA( + BaseEstimator, # type: ignore + TransformerMixin, # type: ignore +): + r""" + Principal component analysis. + + Class that implements functional principal component analysis for both basis and grid representations of the data. Most parameters are shared - when fitting a FDataBasis or FDataGrid, except weights and components_basis. + when fitting a FDataBasis or FDataGrid, except weights and + ``components_basis``. Parameters: - n_components (int): number of principal components to obtain from + n_components: Number of principal components to keep from functional principal component analysis. Defaults to 3. - centering (bool): if True then calculate the mean of the functional data - object and center the data first. Defaults to True. If True the - passed FDataBasis object is modified. - regularization (Regularization): - Regularization object to be applied. - components_basis (Basis): the basis in which we want the principal - components. We can use a different basis than the basis contained in - the passed FDataBasis object. This parameter is only used when + centering: If ``True`` then calculate the mean of the functional + data object and center the data first. Defaults to ``True``. + regularization: Regularization object to be applied. + components_basis: The basis in which we want the principal + components. We can use a different basis than the basis contained + in the passed FDataBasis object. This parameter is only used when fitting a FDataBasis. weights (numpy.array or callable): the weights vector used for discrete integration. If none then the trapezoidal rule is used for @@ -38,19 +52,19 @@ class FPCA(BaseEstimator, TransformerMixin): This parameter is only used when fitting a FDataGrid. Attributes: - components_ (FData): this contains the principal components in a + components\_ (FData): this contains the principal components in a basis representation. - explained_variance_ (array_like): The amount of variance explained by + explained_variance\_ (array_like): The amount of variance explained by each of the selected components. - explained_variance_ratio_ (array_like): this contains the percentage of - variance explained by each principal component. - mean_ (FData): mean of the train data. + explained_variance_ratio\_ (array_like): this contains the percentage + of variance explained by each principal component. + mean\_ (FData): mean of the train data. Examples: - Construct an artificial FDataBasis object and run FPCA with this object. - The resulting principal components are not compared because there are - several equivalent possibilities. + Construct an artificial FDataBasis object and run FPCA with this + object. The resulting principal components are not compared because + there are several equivalent possibilities. >>> import skfda >>> data_matrix = np.array([[1.0, 0.0], [0.0, 2.0]]) @@ -75,44 +89,52 @@ class FPCA(BaseEstimator, TransformerMixin): >>> fpca_grid = FPCA(2) >>> fpca_grid = fpca_grid.fit(fd) - """ - def __init__(self, - n_components=3, - centering=True, - regularization=None, - weights=None, - components_basis=None - ): + def __init__( + self, + n_components: int = 3, + centering: bool = True, + regularization: Optional[TikhonovRegularization[FData]] = None, + weights: Optional[np.ndarray] = None, + components_basis: Optional[Basis] = None, + ) -> None: self.n_components = n_components self.centering = centering self.regularization = regularization self.weights = weights self.components_basis = components_basis - def _center_if_necessary(self, X, *, learn_mean=True): + def _center_if_necessary( + self, + X: Function, + *, + learn_mean: bool = True, + ) -> Function: if learn_mean: self.mean_ = X.mean() return X - self.mean_ if self.centering else X - def _fit_basis(self, X: FDataBasis, y=None): - """Computes the first n_components principal components and saves them. + def _fit_basis( + self, + X: FDataBasis, + y: None = None, + ) -> FPCA: + """ + Compute the first n_components principal components and saves them. + The eigenvalues associated with these principal components are also saved. For more details about how it is implemented please view the referenced book. Args: - X (FDataBasis): - the functional data object to be analysed in basis - representation - y (None, not used): - only present for convention of a fit function + X: The functional data object to be analysed. + y: Ignored. Returns: - self (object) + self References: .. [RS05-8-4-2] Ramsay, J., Silverman, B. W. (2005). Basis function @@ -120,24 +142,30 @@ def _fit_basis(self, X: FDataBasis, y=None): (pp. 161-164). Springer. """ - # the maximum number of components is established by the target basis # if the target basis is available. - n_basis = (self.components_basis.n_basis if self.components_basis - else X.basis.n_basis) + n_basis = ( + self.components_basis.n_basis + if self.components_basis + else X.basis.n_basis + ) n_samples = X.n_samples # check that the number of components is smaller than the sample size if self.n_components > X.n_samples: - raise AttributeError("The sample size must be bigger than the " - "number of components") + raise AttributeError( + "The sample size must be bigger than the " + "number of components", + ) # check that we do not exceed limits for n_components as it should # be smaller than the number of attributes of the basis if self.n_components > n_basis: - raise AttributeError("The number of components should be " - "smaller than the number of attributes of " - "target principal components' basis.") + raise AttributeError( + "The number of components should be " + "smaller than the number of attributes of " + "target principal components' basis.", + ) # if centering is True then subtract the mean function to each function # in FDataBasis @@ -148,15 +176,16 @@ def _fit_basis(self, X: FDataBasis, y=None): if components_basis is not None: # First fix domain range if not already done components_basis = components_basis.copy( - domain_range=X.basis.domain_range) + domain_range=X.basis.domain_range, + ) g_matrix = components_basis.gram_matrix() - # the matrix that are in charge of changing the computed principal + # The matrix that are in charge of changing the computed principal # components to target matrix is essentially the inner product # of both basis. j_matrix = X.basis.inner_product_matrix(components_basis) else: - # if no other basis is specified we use the same basis as the passed - # FDataBasis Object + # If no other basis is specified we use the same basis as the + # passed FDataBasis object components_basis = X.basis.copy() g_matrix = components_basis.gram_matrix() j_matrix = g_matrix @@ -168,7 +197,8 @@ def _fit_basis(self, X: FDataBasis, y=None): regularization_matrix = compute_penalty_matrix( basis_iterable=(components_basis,), regularization_parameter=1, - regularization=self.regularization) + regularization=self.regularization, + ) # apply regularization if regularization_matrix is not None: @@ -181,12 +211,16 @@ def _fit_basis(self, X: FDataBasis, y=None): # using solve to get the multiplication result directly or just invert # the matrix. We choose solve because it is faster and more stable. # The following matrix is needed: L^{-1}*J^T - l_inv_j_t = solve_triangular(l_matrix, np.transpose(j_matrix), - lower=True) + l_inv_j_t = solve_triangular( + l_matrix, + np.transpose(j_matrix), + lower=True, + ) # the final matrix, C(L-1Jt)t for svd or (L-1Jt)-1CtC(L-1Jt)t for PCA - final_matrix = (X.coefficients @ np.transpose(l_inv_j_t) / - np.sqrt(n_samples)) + final_matrix = ( + X.coefficients @ np.transpose(l_inv_j_t) / np.sqrt(n_samples) + ) # initialize the pca module provided by scikit-learn pca = PCA(n_components=self.n_components) @@ -194,46 +228,56 @@ def _fit_basis(self, X: FDataBasis, y=None): # we choose solve to obtain the component coefficients for the # same reason: it is faster and more efficient - component_coefficients = solve_triangular(np.transpose(l_matrix), - np.transpose( - pca.components_), - lower=False) - - component_coefficients = np.transpose(component_coefficients) + component_coefficients = solve_triangular( + np.transpose(l_matrix), + np.transpose(pca.components_), + lower=False, + ) self.explained_variance_ratio_ = pca.explained_variance_ratio_ self.explained_variance_ = pca.explained_variance_ - self.components_ = X.copy(basis=components_basis, - coefficients=component_coefficients, - sample_names=(None,) * self.n_components) + self.components_ = X.copy( + basis=components_basis, + coefficients=component_coefficients.T, + sample_names=(None,) * self.n_components, + ) return self - def _transform_basis(self, X, y=None): - """Computes the n_components first principal components score and - returns them. + def _transform_basis( + self, + X: FDataBasis, + y: None = None, + ) -> np.ndarray: + """Compute the n_components first principal components score. Args: - X (FDataBasis): - the functional data object to be analysed - y (None, not used): - only present because of fit function convention + X: The functional data object to be analysed. + y: Ignored. Returns: - (array_like): the scores of the data with reference to the - principal components - """ + Principal component scores. + """ if X.basis != self._X_basis: - raise ValueError("The basis used in fit is different from " - "the basis used in transform.") + raise ValueError( + "The basis used in fit is different from " + "the basis used in transform.", + ) # in this case it is the inner product of our data with the components - return (X.coefficients @ self._j_matrix - @ self.components_.coefficients.T) + return ( + X.coefficients @ self._j_matrix + @ self.components_.coefficients.T + ) - def _fit_grid(self, X: FDataGrid, y=None): - r"""Computes the n_components first principal components and saves them. + def _fit_grid( + self, + X: FDataGrid, + y: None = None, + ) -> FPCA: + r""" + Compute the n_components first principal components and saves them. The eigenvalues associated with these principal components are also saved. For more details about how it is implemented @@ -247,31 +291,33 @@ def _fit_grid(self, X: FDataGrid, y=None): obtained using the trapezoidal rule. Args: - X (FDataGrid): - the functional data object to be analysed in basis - representation - y (None, not used): - only present for convention of a fit function + X: The functional data object to be analysed. + y: Ignored. Returns: - self (object) + self. References: .. [RS05-8-4-1] Ramsay, J., Silverman, B. W. (2005). Discretizing - the functions. In *Functional Data Analysis* (p. 161). Springer. - """ + the functions. In *Functional Data Analysis* (p. 161). + Springer. + """ # check that the number of components is smaller than the sample size if self.n_components > X.n_samples: - raise AttributeError("The sample size must be bigger than the " - "number of components") + raise AttributeError( + "The sample size must be bigger than the " + "number of components", + ) # check that we do not exceed limits for n_components as it should # be smaller than the number of attributes of the funcional data object if self.n_components > X.data_matrix.shape[1]: - raise AttributeError("The number of components should be " - "smaller than the number of discretization " - "points of the functional data object.") + raise AttributeError( + "The number of components should be " + "smaller than the number of discretization " + "points of the functional data object.", + ) # data matrix initialization fd_data = X.data_matrix.reshape(X.data_matrix.shape[:-1]) @@ -286,9 +332,9 @@ def _fit_grid(self, X: FDataGrid, y=None): # establish weights for each point of discretization if not self.weights: # grid_points is a list with one array in the 1D case - # in trapezoidal rule, suppose \deltax_k = x_k - x_{k-1}, the weight - # vector is as follows: [\deltax_1/2, \deltax_1/2 + \deltax_2/2, - # \deltax_2/2 + \deltax_3/2, ... , \deltax_n/2] + # in trapezoidal rule, suppose \deltax_k = x_k - x_{k-1}, the + # weight vector is as follows: [\deltax_1/2, \deltax_1/2 + + # \deltax_2/2, \deltax_2/2 + \deltax_3/2, ... , \deltax_n/2] differences = np.diff(X.grid_points[0]) differences = np.concatenate(((0,), differences, (0,))) self.weights = (differences[:-1] + differences[1:]) / 2 @@ -303,13 +349,14 @@ def _fit_grid(self, X: FDataGrid, y=None): basis = FDataGrid( data_matrix=np.identity(n_points_discretization), - grid_points=X.grid_points + grid_points=X.grid_points, ) regularization_matrix = compute_penalty_matrix( basis_iterable=(basis,), regularization_parameter=1, - regularization=self.regularization) + regularization=self.regularization, + ) basis_matrix = basis.data_matrix[..., 0] if regularization_matrix is not None: @@ -325,71 +372,86 @@ def _fit_grid(self, X: FDataGrid, y=None): pca = PCA(n_components=self.n_components) pca.fit(final_matrix) - self.components_ = X.copy(data_matrix=np.transpose( - np.linalg.solve(np.sqrt(weights_matrix), - np.transpose(pca.components_))), - sample_names=(None,) * self.n_components) + self.components_ = X.copy( + data_matrix=np.transpose( + np.linalg.solve( + np.sqrt(weights_matrix), + np.transpose(pca.components_), + ), + ), + sample_names=(None,) * self.n_components, + ) self.explained_variance_ratio_ = pca.explained_variance_ratio_ self.explained_variance_ = pca.explained_variance_ return self - def _transform_grid(self, X: FDataGrid, y=None): - """Computes the n_components first principal components score and - returns them. + def _transform_grid( + self, + X: FDataGrid, + y: None = None, + ) -> np.ndarray: + """ + Compute the ``n_components`` first principal components score. Args: - X (FDataGrid): - the functional data object to be analysed - y (None, not used): - only present because of fit function convention + X: The functional data object to be analysed. + y: Ignored. Returns: - (array_like): the scores of the data with reference to the - principal components - """ + Principal component scores. + """ # in this case its the coefficient matrix multiplied by the principal # components as column vectors - return X.data_matrix.reshape( - X.data_matrix.shape[:-1]) @ np.transpose( - self.components_.data_matrix.reshape( - self.components_.data_matrix.shape[:-1])) + return ( + X.data_matrix.reshape(X.data_matrix.shape[:-1]) + @ np.transpose( + self.components_.data_matrix.reshape( + self.components_.data_matrix.shape[:-1], + ), + ) + ) - def fit(self, X, y=None): - """Computes the n_components first principal components and saves them - inside the FPCA object, both FDataGrid and FDataBasis are accepted + def fit( + self, + X: FData, + y: None = None, + ) -> FPCA: + """ + Compute the n_components first principal components and saves them. Args: - X (FDataGrid or FDataBasis): - the functional data object to be analysed - y (None, not used): - only present for convention of a fit function + X: The functional data object to be analysed. + y: Ignored. Returns: - self (object) + self + """ if isinstance(X, FDataGrid): return self._fit_grid(X, y) elif isinstance(X, FDataBasis): return self._fit_basis(X, y) - else: - raise AttributeError("X must be either FDataGrid or FDataBasis") - def transform(self, X, y=None): - """Computes the n_components first principal components score and - returns them. + raise AttributeError("X must be either FDataGrid or FDataBasis") + + def transform( + self, + X: FData, + y: None = None, + ) -> np.ndarray: + """ + Compute the ``n_components`` first principal components scores. Args: - X (FDataGrid or FDataBasis): - the functional data object to be analysed - y (None, not used): - only present because of fit function convention + X: The functional data object to be analysed. + y: Only present because of fit function convention Returns: - (array_like): the scores of the data with reference to the - principal components + Principal component scores. + """ X = self._center_if_necessary(X, learn_mean=False) @@ -397,21 +459,23 @@ def transform(self, X, y=None): return self._transform_grid(X, y) elif isinstance(X, FDataBasis): return self._transform_basis(X, y) - else: - raise AttributeError("X must be either FDataGrid or FDataBasis") - def fit_transform(self, X, y=None, **fit_params): - """Computes the n_components first principal components and their scores - and returns them. + raise AttributeError("X must be either FDataGrid or FDataBasis") + + def fit_transform( + self, + X: FData, + y: None = None, + ) -> np.ndarray: + """ + Compute the n_components first principal components and their scores. + Args: - X (FDataGrid or FDataBasis): - the functional data object to be analysed - y (None, not used): - only present for convention of a fit function + X: The functional data object to be analysed. + y: Ignored Returns: - (array_like): the scores of the data with reference to the - principal components + Principal component scores. + """ - self.fit(X, y) - return self.transform(X, y) + return self.fit(X, y).transform(X, y) From d355430e84cf960937b557b32204a77ad04f4964 Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Tue, 8 Jun 2021 21:37:43 +0200 Subject: [PATCH 354/417] Bibtex references --- docs/conf.py | 5 +- docs/refs.bib | 377 ++++++++++++++++++ readthedocs-requirements.txt | 3 +- skfda/exploratory/stats/_stats.py | 7 +- skfda/exploratory/visualization/_boxplot.py | 15 +- .../visualization/_magnitude_shape_plot.py | 9 +- skfda/inference/anova/_anova_oneway.py | 18 +- skfda/inference/hotelling/_hotelling.py | 15 +- skfda/misc/metrics/_elastic_metrics.py | 29 +- .../classification/_centroid_classifiers.py | 7 +- .../variable_selection/maxima_hunting.py | 7 +- .../recursive_maxima_hunting.py | 17 +- .../registration/_landmark_registration.py | 14 +- .../registration/_shift_registration.py | 25 +- skfda/preprocessing/registration/elastic.py | 56 +-- 15 files changed, 465 insertions(+), 139 deletions(-) create mode 100644 docs/refs.bib diff --git a/docs/conf.py b/docs/conf.py index bfdd98b4e..dec8905cb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -57,7 +57,10 @@ 'sphinx.ext.intersphinx', 'sphinx.ext.doctest', 'jupyter_sphinx', - 'sphinx.ext.autodoc.typehints'] + 'sphinx.ext.autodoc.typehints', + 'sphinxcontrib.bibtex'] + +bibtex_bibfiles = ['refs.bib'] autodoc_default_flags = ['members', 'inherited-members'] diff --git a/docs/refs.bib b/docs/refs.bib new file mode 100644 index 000000000..cdda1c8bc --- /dev/null +++ b/docs/refs.bib @@ -0,0 +1,377 @@ +@article{dai+genton_2018_visualization, + author = {Wenlin Dai and Marc G. Genton}, + title = {Multivariate Functional Data Visualization and Outlier Detection}, + journal = {Journal of Computational and Graphical Statistics}, + volume = {27}, + number = {4}, + pages = {923 -- 934}, + year = {2018}, + publisher = {Taylor & Francis}, + doi = {10.1080/10618600.2018.1473781}, + URL = {https://doi.org/10.1080/10618600.2018.1473781} +} + +@article{sun+genton_2011_boxplots, + author = {Ying Sun and Marc G. Genton}, + title = {Functional Boxplots}, + journal = {Journal of Computational and Graphical Statistics}, + volume = {20}, + number = {2}, + pages = {316 -- 334}, + year = {2011}, + publisher = {Taylor & Francis}, + doi = {10.1198/jcgs.2011.09224}, + URL = {https://doi.org/10.1198/jcgs.2011.09224} +} + +@article{gervini_2008_estimation, + author = {Gervini, Daniel}, + title = "{Robust functional estimation using the median and spherical principal components}", + journal = {Biometrika}, + volume = {95}, + number = {3}, + pages = {587 -- 600}, + year = {2008}, + month = {09}, + issn = {0006-3444}, + doi = {10.1093/biomet/asn031}, + url = {https://doi.org/10.1093/biomet/asn031} +} + +@inproceedings{breunig++_2000_outliers, + author = {Breunig, Markus and Kriegel, Hans-Peter and Ng, Raymond and Sander, Joerg}, + year = {2000}, + month = {06}, + pages = {93 -- 104}, + title = {LOF: Identifying Density-Based Local Outliers.}, + volume = {29}, + journal = {ACM Sigmod Record}, + doi = {10.1145/342009.335388} +} + +@article{cuevas++_2004_anova + author = {Cuevas, Antonio and Febrero-Bande, Manuel and Fraiman, Ricardo}, + year = {2004}, + month = {02}, + pages = {111 -- 122}, + title = {An ANOVA test for functional data}, + volume = {47}, + journal = {Computational Statistics & Data Analysis}, + doi = {10.1016/j.csda.2003.10.021} +} + +@article{pini+stamm+vantini_2018_hotellings, + title = {Hotelling's T2 in separable Hilbert spaces}, + author = {Alessia Pini and Aymeric Stamm and Simone Vantini}, + journal = {Journal of Multivariate Analysis}, + year = {2018}, + month = {05}, + volume = {167}, + pages = {284 -- 305}, + doi = {10.1016/j.jmva.2018.05.007} +} + +@article{srivastava++_2011_ficher-rao, + author = {Srivastava, Anuj and Wu, Wei and Kurtek, Sebastian and Klassen, Eric and Marron, J.}, + year = {2011}, + journal={}, + title = {Registration of Functional Data Using Fisher-Rao Metric}, + pages = {5 -- 7}, + URL = {https://arxiv.org/abs/1103.3817v2} +} + +@inbook{srivastava+klassen_2016_analysis_amplitude, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Functional Data and Elastic Registration}, + pages = {107 -- 109}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@inbook{srivastava+klassen_2016_analysis_phase, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Functional Data and Elastic Registration}, + pages = {109 -- 111}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@inbook{srivastava+klassen_2016_analysis_probability, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Functional Data and Elastic Registration}, + pages = {113 -- 117}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@article{ghosh+chaudhuri_2005_depth, + author = {Ghosh, Anil and Chaudhuri, Probal}, + year = {2005}, + month = {02}, + pages = {327 -- 350}, + title = {On Maximum Depth and Related Classifiers}, + volume = {32}, + journal = {Scandinavian Journal of Statistics}, + doi = {10.1111/j.1467-9469.2005.00423.x} +} + +@article{fraiman+muniz_2001_trimmed, + author = {Fraiman, Ricardo and Muniz, Graciela}, + year = {2001}, + month = {02}, + pages = {419 -- 440}, + title = {Trimmed means for functional data}, + volume = {10}, + journal = {TEST: An Official Journal of the Spanish Society of Statistics and Operations Research}, + doi = {10.1007/BF02595706} +} + +@article{szekely+rizzo_2010_brownian, + author = {Gábor J. Székely and Maria L. Rizzo}, + title = {Brownian distance covariance}, + volume = {3}, + journal = {The Annals of Applied Statistics}, + number = {4}, + publisher = {Institute of Mathematical Statistics}, + pages = {1236 -- 1265}, + year = {2009}, + doi = {10.1214/09-AOAS312}, + URL = {https://doi.org/10.1214/09-AOAS312} +} + +@inproceedings{torrecilla+suarez_2016_hunting, + author = {Torrecilla, Jose L. and Su\'{a}rez, Alberto}, + title = {Feature Selection in Functional Data Classification with Recursive Maxima Hunting}, + year = {2016}, + volume = {29}, + publisher = {Curran Associates Inc.}, + booktitle = {Proceedings of the 30th International Conference on Neural Information Processing Systems}, + pages = {4835 -- 4843}, + series = {NIPS'16} +} + +@article{berrendero+cuevas+torrecilla_2016_hunting, + author = {Berrendero, J.R. and Cuevas, Antonio and Torrecilla, José}, + year = {2016}, + pages = {619 -- 638}, + title = {Variable selection in functional data classification: A maxima-hunting proposal}, + number = {2}, + volume = {26}, + journal = {Statistica Sinica}, + doi = {10.5705/ss.202014.0014} +} + +@article{berrendero+cuevas+torrecilla_2018_hilbert, + author = {José R. Berrendero and Antonio Cuevas and José L. Torrecilla}, + title = {On the Use of Reproducing Kernel Hilbert Spaces in Functional Classification}, + journal = {Journal of the American Statistical Association}, + volume = {113}, + number = {523}, + pages = {1210 -- 1218}, + year = {2018}, + publisher = {Taylor & Francis}, + doi = {10.1080/01621459.2017.1320287}, + URL = {https://doi.org/10.1080/01621459.2017.1320287} +} + +@inbook{ramsay+silverman_2005_functional_basis, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {Principal components analysis for functional data}, + pages = {161 -- 164}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} +} + +@inbook{ramsay+silverman_2005_functional_discretizing, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {Principal components analysis for functional data}, + pages = {161}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} +} + +@article{cuesta-albertos++_2015_ddg, + title = {The DDG-classifier in the functional setting}, + author = {J. A. Cuesta-Albertos and M. Febrero-Bande and M. Oviedo de la Fuente}, + journal = {TEST}, + year = {2015}, + volume = {26}, + pages = {119 -- 142} +} + +@inbook{ramsay+silverman_2005_functional_landmark, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {The registration and display of functional data}, + pages = {132 -- 136}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} +} + +@inbook{ramsay+silverman_2005_functional_shift, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {The registration and display of functional data}, + pages = {129 -- 132}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} +} + +@inbook{ramsay+silverman_2005_functional_newton-raphson, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {The registration and display of functional data}, + pages = {142 -- 144}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} +} + +@inbook{srivastava+klassen_2016_analysis_elastic, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Functional Data and Elastic Registration}, + pages = {73 -- 122}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@inbook{srivastava+klassen_2016_analysis_square, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Functional Data and Elastic Registration}, + pages = {91 -- 93}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@inbook{srivastava+klassen_2016_analysis_orbit, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Statistical Modeling of Functional Data}, + pages = {274 -- 277}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@inbook{srivastava+klassen_2016_analysis_karcher, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Statistical Modeling of Functional Data}, + pages = {273 -- 274}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@article{srivastava++_2011_ficher-rao_orbit, + author = {Srivastava, Anuj and Wu, Wei and Kurtek, Sebastian and Klassen, Eric and Marron, J.}, + year = {2011}, + journal={}, + title = {Registration of Functional Data Using Fisher-Rao Metric}, + pages = {9 -- 10}, + URL = {https://arxiv.org/abs/1103.3817v2} +} + +@article{srivastava++_2011_ficher-rao_karcher, + author = {Srivastava, Anuj and Wu, Wei and Kurtek, Sebastian and Klassen, Eric and Marron, J.}, + year = {2011}, + journal={}, + title = {Registration of Functional Data Using Fisher-Rao Metric}, + pages = {7 -- 10}, + URL = {https://arxiv.org/abs/1103.3817v2} +} + + +@inbook{ramsay+silverman_2005_functional_spline, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {Smoothing functional data with a roughness penalty}, + pages = {86 -- 87}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} +} + +@inbook{ramsay+silverman_2005_functional_spline_squares, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {Smoothing functional data with a roughness penalty}, + pages = {89 -- 90}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} +} + +@inbook{wasserman_2006_nonparametric_nw, + author = {Larry Wasserman}, + title = {All of Nonparametric Statistics}, + chapter = {Nonparametric Regression}, + pages = {71}, + publisher = {Springer-Verlag New York}, + year = {2006}, + isbn = {978-0-387-25145-5}, + doi = {10.1007/0-387-30623-4} +} + +@inbook{wasserman_2006_nonparametric_llr, + author = {Larry Wasserman}, + title = {All of Nonparametric Statistics}, + chapter = {Nonparametric Regression}, + pages = {77}, + publisher = {Springer-Verlag New York}, + year = {2006}, + isbn = {978-0-387-25145-5}, + doi = {10.1007/0-387-30623-4} +} + +@inbook{ferraty+vieu_2006_nonparametric_knn, + author = {Frédéric Ferraty and Philippe Vieu}, + title = {Nonparametric Functional Data Analysis. Theory and Practice}, + chapter = {Functional Nonparametric Supervised Classification}, + pages = {116}, + publisher = {Springer-Verlag New York}, + year = {2006}, + isbn = {978-0-387-30369-7}, + doi = {10.1007/0-387-36620-2} +} + +@inbook{ramsay+silverman_2005_functional_bspline, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {From functional data to smooth functions}, + pages = {50 -- 51}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} +} \ No newline at end of file diff --git a/readthedocs-requirements.txt b/readthedocs-requirements.txt index a3cb14b8f..382761815 100644 --- a/readthedocs-requirements.txt +++ b/readthedocs-requirements.txt @@ -13,4 +13,5 @@ setuptools>=41.2 multimethod>=1.2 findiff jupyter-sphinx -pytest \ No newline at end of file +pytest +sphinxcontrib.bibtex \ No newline at end of file diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 6aaac3871..930b5ddf9 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -126,7 +126,8 @@ def geometric_median( r"""Compute the geometric median. The sample geometric median is the point that minimizes the :math:`L_1` - norm of the vector of distances to all observations: + norm of the vector of distances to all observations + :footcite:`gervini_2008_estimation`: .. math:: @@ -158,9 +159,7 @@ def geometric_median( :func:`depth_based_median` References: - Gervini, D. (2008). Robust functional estimation using the median and - spherical principal components. Biometrika, 95(3), 587–600. - https://doi.org/10.1093/biomet/asn031 + .. footbibliography:: """ weights = np.full(len(X), 1 / len(X)) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 07f69c25c..127908bfd 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -99,7 +99,8 @@ class Boxplot(FDataBoxplot): functional boxplot are: the envelope of the 50% central region, the median curve,and the maximum non-outlying envelope. In addition, outliers can be detected in a functional boxplot by the 1.5 times the 50% central region - empirical rule, analogous to the rule for classical boxplots. + empirical rule, analogous to the rule for classical boxplots + :footcite:`sun+genton_2011_boxplots`. Args: @@ -241,10 +242,7 @@ class Boxplot(FDataBoxplot): outliers=array([ True, False, False, True])) References: - - Sun, Y., & Genton, M. G. (2011). Functional Boxplots. Journal of - Computational and Graphical Statistics, 20(2), 316-334. - https://doi.org/10.1198/jcgs.2011.09224 + .. footbibliography:: """ @@ -462,7 +460,7 @@ class SurfaceBoxplot(FDataBoxplot): :ref:`depth measure ` for functional data, it represents the envelope of the 50% central region, the median curve, and the maximum non-outlying - envelope. + envelope :footcite:`sun+genton_2011_boxplots`. Args: @@ -558,10 +556,7 @@ class SurfaceBoxplot(FDataBoxplot): [ 5. ]]]))) References: - - Sun, Y., & Genton, M. G. (2011). Functional Boxplots. Journal of - Computational and Graphical Statistics, 20(2), 316-334. - https://doi.org/10.1198/jcgs.2011.09224 + .. footbibliography:: """ diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 07a9e0d62..211a84fc6 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -26,7 +26,8 @@ class MagnitudeShapePlot(BasePlot): This plot, which is based on the calculation of the :func:`directional outlyingness ` of each of the samples, serves as a visualization tool for the centrality - of curves. Furthermore, an outlier detection procedure is included. + of curves. Furthermore, an outlier detection procedure is included + :footcite:`dai+genton_2018_visualization`. The norm of the mean of the directional outlyingness (:math:`\lVert \mathbf{MO}\rVert`) is plotted in the x-axis, and the variation of the @@ -153,11 +154,7 @@ class MagnitudeShapePlot(BasePlot): title='MS-Plot') References: - - Dai, W., & Genton, M. G. (2018). Multivariate Functional Data - Visualization and Outlier Detection. Journal of Computational - and Graphical Statistics, 27(4), 923-934. - https://doi.org/10.1080/10618600.2018.1473781 + .. footbibliography:: """ diff --git a/skfda/inference/anova/_anova_oneway.py b/skfda/inference/anova/_anova_oneway.py index 39ef66687..6b38d53f2 100644 --- a/skfda/inference/anova/_anova_oneway.py +++ b/skfda/inference/anova/_anova_oneway.py @@ -33,7 +33,7 @@ def v_sample_stat(fd: FData, weights: ArrayLike, p: int = 2) -> float: .. math:: V_n = \sum_{i float: 0.01649448843348894 References: - [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An - anova test for functional data". *Computational Statistics Data - Analysis*, 47:111-112, 02 2004 + .. footbibliography:: """ weights = np.asarray(weights) @@ -113,7 +111,7 @@ def v_asymptotic_stat(fd: FData, weights: ArrayLike, p: int = 2) -> float: .. math:: \sum_{i float: 0.0018159320335885969 References: - [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An - anova test for functional data". *Computational Statistics Data - Analysis*, 47:111-112, 02 2004 + .. footbibliography:: """ weights = np.asarray(weights) @@ -290,7 +286,7 @@ def oneway_anova( calculated. This procedure is repeated `n_reps` times, creating a sampling distribution of the statistic. - This procedure is from Cuevas[1]. + This procedure is from Cuevas :footcite:`cuevas++_2004_anova`. Args: args: The sample measurements for each each group. @@ -330,9 +326,7 @@ def oneway_anova( [ 184.0698 212.7395 195.3663] References: - [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An - anova test for functional data". *Computational Statistics Data - Analysis*, 47:111-112, 02 2004 + .. footbibliography:: """ if len(args) < 2: diff --git a/skfda/inference/hotelling/_hotelling.py b/skfda/inference/hotelling/_hotelling.py index 24258956a..854617790 100644 --- a/skfda/inference/hotelling/_hotelling.py +++ b/skfda/inference/hotelling/_hotelling.py @@ -43,7 +43,8 @@ def hotelling_t2( covariance matrices, computed with the basis coefficients or using the discrete representation, depending on the input. - This statistic is defined in Pini, Stamm and Vantini[1]. + This statistic is defined in Pini, Stamm and Vantini + :footcite:`pini+stamm+vantini_2018_hotellings`. Args: fd1: Object with the first sample. @@ -69,9 +70,7 @@ def hotelling_t2( '2.00' References: - [1] A. Pini, A. Stamm and S. Vantini, "Hotelling's t2 in - separable hilbert spaces", *Jounal of Multivariate Analysis*, - 167 (2018), pp.284-305. + .. footbibliography:: """ if not isinstance(fd1, FData): @@ -165,7 +164,8 @@ def hotelling_test_ind( number of repetitions of the algorithm is provided then the permutations tested are generated randomly. - This procedure is from Pini, Stamm and Vantinni[1]. + This procedure is from Pini, Stamm and Vantinni + :footcite:`pini+stamm+vantini_2018_hotellings`. Args: fd1: First sample of data. @@ -202,9 +202,8 @@ def hotelling_test_ind( [ 2. 2. 0. 0. 2. 2.] References: - [1] A. Pini, A. Stamm and S. Vantini, "Hotelling's t2 in - separable hilbert spaces", *Jounal of Multivariate Analysis*, - 167 (2018), pp.284-305. + .. footbibliography:: + """ if not isinstance(fd1, FData): raise TypeError("Argument type must inherit FData.") diff --git a/skfda/misc/metrics/_elastic_metrics.py b/skfda/misc/metrics/_elastic_metrics.py index 871bac8bd..8025d4d4e 100644 --- a/skfda/misc/metrics/_elastic_metrics.py +++ b/skfda/misc/metrics/_elastic_metrics.py @@ -38,7 +38,7 @@ def fisher_rao_distance( If the observations are distributions of random variables the distance will match with the usual fisher-rao distance in non-parametric form for - probability distributions [S11-2]_. + probability distributions :footcite:`srivastava++_2011_ficher-rao`. If the observations are defined in a :term:`domain` different than (0,1) their domains are normalized to this interval with an affine @@ -56,9 +56,7 @@ def fisher_rao_distance( ValueError: If the objects are not unidimensional. References: - .. [S11-2] Srivastava, Anuj et. al. Registration of Functional Data - Using Fisher-Rao Metric (2011). In *Function Representation and - Metric* (pp. 5-7). arXiv:1103.3817v2. + .. footbibliography:: """ fdata1, fdata2 = _cast_to_grid( @@ -121,7 +119,8 @@ def amplitude_distance( .. math:: \mathcal{R}(\gamma) = \|\sqrt{\dot{\gamma}}- 1 \|_{\mathbb{L}^2}^2 - See [SK16-4-10-1]_ for a detailed explanation. + See the :footcite:`srivastava+klassen_2016_analysis_amplitude` for a + detailed explanation. If the observations are defined in a :term:`domain` different than (0,1) their domains are normalized to this interval with an affine @@ -142,9 +141,8 @@ def amplitude_distance( ValueError: If the objects are not unidimensional. References: - .. [SK16-4-10-1] Srivastava, Anuj & Klassen, Eric P. (2016). - Functional and shape data analysis. In *Amplitude Space and a - Metric Structure* (pp. 107-109). Springer. + .. footbibliography:: + """ fdata1, fdata2 = _cast_to_grid( fdata1, @@ -214,7 +212,8 @@ def phase_distance( d_{P}(f_i, f_j) = d_{FR}(\gamma_{ij}, \gamma_{id}) = arcos \left ( \int_0^1 \sqrt {\dot \gamma_{ij}(t)} dt \right ) - See [SK16-4-10-2]_ for a detailed explanation. + See :footcite:`srivastava+klassen_2016_analysis_phase` for a detailed + explanation. If the observations are defined in a :term:`domain` different than (0,1) their domains are normalized to this interval with an affine @@ -233,9 +232,8 @@ def phase_distance( ValueError: If the objects are not unidimensional. References: - .. [SK16-4-10-2] Srivastava, Anuj & Klassen, Eric P. (2016). - Functional and shape data analysis. In *Phase Space and a Metric - Structure* (pp. 109-111). Springer. + .. footbibliography:: + """ fdata1, fdata2 = _cast_to_grid( fdata1, @@ -295,7 +293,8 @@ def warping_distance( d_{\Gamma}(\gamma_i, \gamma_j) = cos^{-1} \left ( \int_0^1 \sqrt{\dot \gamma_i(t)\dot \gamma_j(t)}dt \right ) - See [SK16-4-11-2]_ for a detailed explanation. + See :footcite:`srivastava+klassen_2016_analysis_probability` for a detailed + explanation. If the warpings are not defined in [0,1], an affine transformation is maked to change the :term:`domain`. @@ -312,9 +311,7 @@ def warping_distance( ValueError: If the objects are not unidimensional. References: - .. [SK16-4-11-2] Srivastava, Anuj & Klassen, Eric P. (2016). - Functional and shape data analysis. In *Probability Density - Functions* (pp. 113-117). Springer. + .. footbibliography:: """ warping1, warping2 = _cast_to_grid( diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index db2df1920..33a7c9b38 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -124,7 +124,8 @@ class DTMClassifier( """Distance to trimmed means (DTM) classification. Test samples are classified to the class that minimizes the distance of - the observation to the trimmed mean of the group. + the observation to the trimmed mean of the group + :footcite:`fraiman+muniz_2001_trimmed`. Parameters: proportiontocut: @@ -173,8 +174,8 @@ class DTMClassifier( :class:`~skfda.ml.classification.NearestCentroid` References: - Fraiman, R. and Muniz, G. (2001). Trimmed means for functional - data. Test, 10, 419-440. + .. footbibliography:: + """ def __init__( diff --git a/skfda/preprocessing/dim_reduction/variable_selection/maxima_hunting.py b/skfda/preprocessing/dim_reduction/variable_selection/maxima_hunting.py index bfd6377cb..74f2c35f4 100644 --- a/skfda/preprocessing/dim_reduction/variable_selection/maxima_hunting.py +++ b/skfda/preprocessing/dim_reduction/variable_selection/maxima_hunting.py @@ -120,7 +120,7 @@ class MaximaHunting( For a longer explanation about the method, and comparison with other functional variable selection methods, we refer the reader to the - original article [1]_. + original article :footcite:`berrendero+cuevas+torrecilla_2016_hunting`. Parameters: dependence_measure (callable): Dependence measure to use. By default, @@ -182,10 +182,7 @@ class MaximaHunting( (10000, 1) References: - .. [1] J. R. Berrendero, A. Cuevas, and J. L. Torrecilla, “Variable - selection in functional data classification: a maxima-hunting - proposal,” STAT SINICA, vol. 26, no. 2, pp. 619–638, 2016, - doi: 10.5705/ss.202014.0014. + .. footbibliography:: """ diff --git a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py index be3b5f22d..b4e4798ca 100644 --- a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py +++ b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py @@ -522,7 +522,7 @@ class AsymptoticIndependenceTestStop(StoppingCondition): Stop when the selected point is independent from the target. It uses an asymptotic test based on the chi-squared distribution described - in [1]_. The test rejects independence if + in :footcite:`szekely+rizzo_2010_brownian`. The test rejects independence if .. math:: @@ -542,11 +542,7 @@ class AsymptoticIndependenceTestStop(StoppingCondition): default is 0.01 (1%). References: - - .. [1] G. J. Székely and M. L. Rizzo, “Brownian distance covariance,” - Ann. Appl. Stat., vol. 3, no. 4, pp. 1236–1265, Dec. 2009, - doi: 10.1214/09-AOAS312. - + .. footbibliography:: """ @@ -811,7 +807,8 @@ class RecursiveMaximaHunting( relevant once other points are selected. Those points would not be selected by :class:`MaximaHunting` alone. - This method was originally described in a special case in article [1]_. + This method was originally described in a special case in article + :footcite:`torrecilla+suarez_2016_hunting`. Additional information about the usage of this method can be found in :doc:`/modules/preprocessing/dim_reduction/recursive_maxima_hunting`. @@ -880,11 +877,7 @@ class RecursiveMaximaHunting( (1000, 3) References: - - .. [1] J. L. Torrecilla and A. Suárez, “Feature selection in - functional data classification with recursive maxima hunting,” - in Advances in Neural Information Processing Systems 29, - Curran Associates, Inc., 2016, pp. 4835–4843. + .. footbibliography:: """ diff --git a/skfda/preprocessing/registration/_landmark_registration.py b/skfda/preprocessing/registration/_landmark_registration.py index 5997ca610..2bdafaaa6 100644 --- a/skfda/preprocessing/registration/_landmark_registration.py +++ b/skfda/preprocessing/registration/_landmark_registration.py @@ -188,7 +188,8 @@ def landmark_registration_warping( :math:`h_i(t^*_j)=t_{ij}`. The registered samples can be obtained as :math:`x^*_i(t)=x_i(h_i(t))`. - See [RS05-7-3-1]_ for a detailed explanation. + See :footcite:`ramsay+silverman_2005_functional_landmark` + for a detailed explanation. Args: fd: Functional data object. @@ -210,9 +211,7 @@ def landmark_registration_warping( the number of samples. References: - .. [RS05-7-3-1] Ramsay, J., Silverman, B. W. (2005). Feature or - landmark registration. In *Functional Data Analysis* (pp. 132-136). - Springer. + .. footbibliography:: Examples: >>> from skfda.datasets import make_multimodal_landmarks @@ -316,7 +315,8 @@ def landmark_registration( The registered samples will have their features aligned, i.e., :math:`x^*_i(t^*_j)=x_i(t_{ij})`. - See [RS05-7-3]_ for a detailed explanation. + See :footcite:`ramsay+silverman_2005_functional_landmark` + for a detailed explanation. Args: fd: Functional data object. @@ -334,9 +334,7 @@ def landmark_registration( FDataGrid with the functional data object registered. References: - .. [RS05-7-3] Ramsay, J., Silverman, B. W. (2005). Feature or landmark - registration. In *Functional Data Analysis* (pp. 132-136). - Springer. + .. footbibliography:: Examples: >>> from skfda.datasets import make_multimodal_landmarks diff --git a/skfda/preprocessing/registration/_shift_registration.py b/skfda/preprocessing/registration/_shift_registration.py index d298d6546..0416e2e49 100644 --- a/skfda/preprocessing/registration/_shift_registration.py +++ b/skfda/preprocessing/registration/_shift_registration.py @@ -23,9 +23,10 @@ class ShiftRegistration(RegistrationTransformer): r"""Register a functional dataset using shift alignment. Realizes the registration of a set of curves using a shift aligment - [RaSi2005-7-2]_. Let :math:`\{x_i(t)\}_{i=1}^{N}` be a functional dataset, - calculates :math:`\delta_{i}` for each sample such that - :math:`x_i(t + \delta_{i})` minimizes the least squares criterion: + :footcite:`ramsay+silverman_2005_functional_shift`. + Let :math:`\{x_i(t)\}_{i=1}^{N}` be a functional dataset, calculates + :math:`\delta_{i}` for each sample such that :math:`x_i(t + \delta_{i})` + minimizes the least squares criterion: .. math:: \text{REGSSE} = \sum_{i=1}^{N} \int_{\mathcal{T}} @@ -34,7 +35,7 @@ class ShiftRegistration(RegistrationTransformer): Estimates each shift parameter :math:`\delta_i` iteratively by using a modified Newton-Raphson algorithm, updating the template :math:`\mu` in each iteration as is described in detail in - [RaSi2005-7-9-1]_. + :footcite:`ramsay+silverman_2005_functional_newton-raphson`. Method only implemented for univariate functional data. @@ -54,14 +55,16 @@ class ShiftRegistration(RegistrationTransformer): If the template is an FData is used directly as the final template to the registration, if it is a callable or "mean" the template is computed iteratively constructing a temporal template - in each iteration. In [RaSi2005-7-9-1]_ is described in detail this - procedure. Defaults to "mean". + in each iteration. + In :footcite:`ramsay+silverman_2005_functional_newton-raphson` + is described in detail this procedure. Defaults to "mean". extrapolation: Controls the extrapolation mode for points outside the :term:`domain` range. By default uses the method defined in the data to be transformed. See the `extrapolation` documentation to obtain more information. step_size: Parameter to adjust the rate of - convergence in the Newton-Raphson algorithm, see [RaSi2005-7-9-1]_. + convergence in the Newton-Raphson algorithm, see + :footcite:`ramsay+silverman_2005_functional_newton-raphson`. Defaults to 1. restrict_domain: If True restricts the :term:`domain` to avoid the need of using extrapolation, in which @@ -122,12 +125,8 @@ class ShiftRegistration(RegistrationTransformer): References: - .. [RaSi2005-7-2] Ramsay, J., Silverman, B. W. (2005). Shift - registration. In *Functional Data Analysis* (pp. 129-132). - Springer. - .. [RaSi2005-7-9-1] Ramsay, J., Silverman, B. W. (2005). Shift - registration by the Newton-Raphson algorithm. In *Functional - Data Analysis* (pp. 142-144). Springer. + .. footbibliography:: + """ def __init__( diff --git a/skfda/preprocessing/registration/elastic.py b/skfda/preprocessing/registration/elastic.py index 6d189d29a..c90280c85 100644 --- a/skfda/preprocessing/registration/elastic.py +++ b/skfda/preprocessing/registration/elastic.py @@ -38,7 +38,7 @@ class SRSF(BaseEstimator, TransformerMixin): # type: ignore This representation it is used to compute the extended non-parametric Fisher-Rao distance between functions, wich under the SRSF representation becomes the usual :math:`\mathbb{L}^2` distance between functions. - See [SK16-4-6]_ . + See :footcite:`srivastava+klassen_2016_analysis_square`. The inverse SRSF transform is defined as @@ -75,9 +75,7 @@ class SRSF(BaseEstimator, TransformerMixin): # type: ignore in order to achieve good results. References: - .. [SK16-4-6] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Square-Root Slope Function - Representation* (pp. 91-93). Springer. + .. footbibliography:: Examples: Create a toy dataset and apply the transformation and its inverse. @@ -131,7 +129,8 @@ def transform(self, X: FDataGrid, y: None = None) -> FDataGrid: r"""Compute the square-root slope function (SRSF) transform. Let :math:`f : [a,b] \rightarrow \mathbb{R}` be an absolutely - continuous function, the SRSF transform is defined as [SK16-4-6-1]_: + continuous function, the SRSF transform is defined as + :footcite:`srivastava+klassen_2016_analysis_square`: .. math:: @@ -147,11 +146,6 @@ def transform(self, X: FDataGrid, y: None = None) -> FDataGrid: Raises: ValueError: If functions are not univariate. - References: - .. [SK16-4-6-1] Srivastava, Anuj & Klassen, Eric P. (2016). - Functional and shape data analysis. In *Square-Root Slope - Function Representation* (pp. 91-93). Springer. - """ check_is_univariate(X) @@ -182,7 +176,7 @@ def inverse_transform(self, X: FDataGrid, y: None = None) -> FDataGrid: r"""Compute the inverse SRSF transform. Given the srsf and the initial value the original function can be - obtained as [SK16-4-6-2]_ : + obtained as :footcite:`srivastava+klassen_2016_analysis_square`: .. math:: f(t) = f(a) + \int_{a}^t q(t)|q(t)|dt @@ -201,12 +195,6 @@ def inverse_transform(self, X: FDataGrid, y: None = None) -> FDataGrid: Raises: ValueError: If functions are multidimensional. - - References: - .. [SK16-4-6-2] Srivastava, Anuj & Klassen, Eric P. (2016). - Functional and shape data analysis. In *Square-Root Slope - Function Representation* (pp. 91-93). Springer. - """ check_is_univariate(X) @@ -298,15 +286,16 @@ def warping_mean( \gamma_i(b)=b`. The karcher mean :math:`\bar \gamma` is defined as the warping that - minimises locally the sum of Fisher-Rao squared distances. - [SK16-8-3-2]_. + minimises locally the sum of Fisher-Rao squared distances + :footcite:`srivastava+klassen_2016_analysis_orbit`. .. math:: \bar \gamma = argmin_{\gamma \in \Gamma} \sum_{i=1}^{n} d_{FR}^2(\gamma, \gamma_i) The computation is performed using the structure of Hilbert Sphere obtained - after a transformation of the warpings, see [S11-3-3]_. + after a transformation of the warpings, see + :footcite:`srivastava++_2011_ficher-rao_orbit`. Args: warping: Set of warpings. @@ -321,13 +310,7 @@ def warping_mean( the mean. References: - .. [SK16-8-3-2] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Template: Center of the Mean Orbit* - (pp. 274-277). Springer. - - .. [S11-3-3] Srivastava, Anuj et. al. Registration of Functional Data - Using Fisher-Rao Metric (2011). In *Center of an Orbit* (pp. 9-10). - arXiv:1103.3817v2. + .. footbibliography:: """ eval_points = warping.grid_points[0] @@ -441,7 +424,8 @@ def elastic_mean( equivalence class which makes the mean of the warpings employed be the identity. - See [SK16-8-3-1]_ and [S11-3]_. + See :footcite:`srivastava+klassen_2016_analysis_karcher` and + :footcite:`srivastava++_2011_ficher-rao_karcher`. Args: fdatagrid: Set of functions to compute the @@ -466,13 +450,7 @@ def elastic_mean( do not match with the fdatagrid. References: - .. [SK16-8-3-1] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Karcher Mean of Amplitudes* - (pp. 273-274). Springer. - - .. [S11-3] Srivastava, Anuj et. al. Registration of Functional Data - Using Fisher-Rao Metric (2011). In *Karcher Mean and Function - Alignment* (pp. 7-10). arXiv:1103.3817v2. + .. footbibliography:: """ check_is_univariate(fdatagrid) @@ -622,8 +600,8 @@ class ElasticRegistration(RegistrationTransformer): `elastic mean`, wich is the local minimum of the sum of squares of elastic distances. See :func:`~elastic_mean`. - In [SK16-4-2]_ are described extensively the algorithms employed and - the SRSF framework. + In :footcite:`srivastava+klassen_2016_analysis_elastic` are described + extensively the algorithms employed and the SRSF framework. Args: template (str, :class:`FDataGrid` or callable, optional): Template to @@ -645,9 +623,7 @@ class ElasticRegistration(RegistrationTransformer): transformation. References: - .. [SK16-4-2] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Functional Data and Elastic - Registration* (pp. 73-122). Springer. + .. footbibliography:: Examples: Elastic registration of with train/test sets. From 142beb6250c3f86c92923de72834b4af6553bbef Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 11 Jun 2021 13:57:10 +0200 Subject: [PATCH 355/417] Use cached hat matrix. --- skfda/preprocessing/smoothing/_linear.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/preprocessing/smoothing/_linear.py b/skfda/preprocessing/smoothing/_linear.py index ac87a4ce5..f9d787d73 100644 --- a/skfda/preprocessing/smoothing/_linear.py +++ b/skfda/preprocessing/smoothing/_linear.py @@ -116,7 +116,7 @@ def transform( # The matrix is cached return X.copy( - data_matrix=self.hat_matrix() @ X.data_matrix, + data_matrix=self.hat_matrix_ @ X.data_matrix, grid_points=self.output_points_, ) From 7c666a5ee4789046bfef543be9d39c4bb88a7852 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 13 Jun 2021 17:01:36 +0200 Subject: [PATCH 356/417] changes --- skfda/datasets/_real_datasets.py | 4 ++-- skfda/exploratory/visualization/representation.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index aa3b66bb5..db7103e37 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -437,7 +437,7 @@ def fetch_growth( target_name = "sex" target_categories = ["male", "female"] frame = None - + if as_frame: sex = pd.Categorical.from_codes(sex, categories=target_categories) frame = pd.DataFrame({ @@ -449,7 +449,7 @@ def fetch_growth( if return_X_y: return curves, sex - + return Bunch( data=curves, target=sex, diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 7e4d49bb3..d18df5240 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -363,7 +363,7 @@ def plot( Z[h, ..., k], **color_dict, **kwargs, - )[0] + ) _set_labels(self.fdata, self.fig, self.axes, self.patches) From 9619df545c40fde3fda23014040efda651f1682a Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 13 Jun 2021 17:39:37 +0200 Subject: [PATCH 357/417] bug correct3ed while picking --- skfda/exploratory/visualization/_multiple_display.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index b0479905d..d442e23d2 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -541,3 +541,5 @@ def value_updated(self, value: int) -> None: self.reduce_points_intensity() else: self.change_points_intensity(old_index=old_index) + if self.index_clicked == old_index: + self.clicked = False From fd2a3fe2eb10cf0577a796cfea9c66e84dff8001 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Sun, 13 Jun 2021 17:54:00 +0200 Subject: [PATCH 358/417] correction of bug v2 --- skfda/exploratory/visualization/_multiple_display.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index d442e23d2..c7e31b17c 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -540,6 +540,6 @@ def value_updated(self, value: int) -> None: if old_index == -1: self.reduce_points_intensity() else: - self.change_points_intensity(old_index=old_index) if self.index_clicked == old_index: self.clicked = False + self.change_points_intensity(old_index=old_index) From b0b1935734398e273a01b288ef734dae5fefa445 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 14 Jun 2021 17:22:42 +0200 Subject: [PATCH 359/417] solved change --- skfda/exploratory/visualization/_boxplot.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 66f0bf351..89bb68ad1 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -250,7 +250,7 @@ def __init__( self, fdatagrid: FData, depth_method: Optional[Depth[FDataGrid]] = None, - prob: Tuple[float, ...] = (0.5,), + prob: Sequence[float] = [0.5], factor: float = 1.5, chart: Union[Figure, Axes, None] = None, *, @@ -294,7 +294,7 @@ def __init__( raise ValueError( "Function only supports FDataGrid with domain dimension 1.") - if sorted(prob, reverse=True) != list(prob): + if sorted(prob, reverse=True) != prob: raise ValueError( "Probabilities required to be in descending order.") @@ -606,9 +606,6 @@ def __init__(self, fdatagrid, method=ModifiedBandDepth(), factor=1.5): fdatagrid: Object containing the data. method: Method used to order the data. Defaults to :class:`modified band depth `. - prob: List with float numbers (in the range from 1 to 0) - that indicate which central regions to represent. - Defaults to [0.5] which represents the 50% central region. factor: Number used to calculate the outlying envelope. """ @@ -686,7 +683,7 @@ def plot(self, chart=None, *, fig=None, axes=None, """Visualization of the surface boxplot of the fdatagrid (dim_domain=2). Args: - fig: figure over with the graphs are plotted in case ax + fig: figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. axes: axis over where the graphs are plotted. If None, From cf71d0d1baf897f89e0f3221226af457f5f42a2f Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 14 Jun 2021 17:23:39 +0200 Subject: [PATCH 360/417] boxplot --- skfda/exploratory/visualization/_boxplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 89bb68ad1..8f08c219e 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -6,7 +6,7 @@ """ import math from abc import ABC, abstractmethod -from typing import Optional, Sequence, Tuple, Union +from typing import Optional, Sequence, Union import matplotlib import matplotlib.pyplot as plt From 300948d843fb9f88adabaceac64129c1c6ae2eb0 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 14 Jun 2021 17:26:20 +0200 Subject: [PATCH 361/417] change --- skfda/exploratory/visualization/fpca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/fpca.py b/skfda/exploratory/visualization/fpca.py index 9f58fbcc2..447a90ddc 100644 --- a/skfda/exploratory/visualization/fpca.py +++ b/skfda/exploratory/visualization/fpca.py @@ -47,7 +47,7 @@ def __init__( self._set_figure_and_axes(chart, fig, axes) - def plot(self, **kwargs: Any): + def plot(self, **kwargs: Any) -> Figure: """ Plots the perturbation graphs for the principal components. The perturbations are defined as variations over the mean. Adding a multiple From fca87a4e05ff1e6eacb74b670be40ba5d827d282 Mon Sep 17 00:00:00 2001 From: mellamansanchez Date: Mon, 14 Jun 2021 17:42:18 +0200 Subject: [PATCH 362/417] repr --- .../visualization/representation.py | 198 +++++++++--------- 1 file changed, 98 insertions(+), 100 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 50689d531..4a13fd22f 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -7,21 +7,14 @@ like depth measures. """ -from typing import ( - Any, - Mapping, - Optional, - Sequence, - Tuple, - TypeVar, - Union, -) +from typing import Any, Mapping, Optional, Sequence, Tuple, TypeVar, Union import matplotlib.cm import matplotlib.patches import numpy as np from matplotlib.artist import Artist from matplotlib.axes import Axes +from matplotlib.colors import Colormap from matplotlib.figure import Figure from typing_extensions import Protocol @@ -122,7 +115,7 @@ def _get_color_info( class GraphPlot(BasePlot): """ - Class used to plot the FDatGrid object graph as hypersurfaces. + Class used to plot the FDataGrid object graph as hypersurfaces. When plotting functional data, we can either choose manually a color, a group of colors for the representations. Besides, we can use a list of @@ -130,63 +123,67 @@ class GraphPlot(BasePlot): argument to display the functions wtih a gradient of colors. Args: fdata: functional data set that we want to plot. - gradient_color_list: list of real values used to determine the color - in which each of the instances will be plotted. The size + gradient_criteria: list of real values used to determine the color + in which each of the instances will be plotted. max_grad: maximum value that the gradient_list can take, it will be - used to normalize the gradient_color_list in order to get values - thatcan be used in the funcion colormap.__call__(). If not + used to normalize the ``gradient_criteria`` in order to get values + that can be used in the function colormap.__call__(). If not declared it will be initialized to the maximum value of - gradient_list + gradient_list. min_grad: minimum value that the gradient_list can take, it will be - used to normalize the gradient_color_list in order to get values - thatcan be used in the funcion colormap.__call__(). If not + used to normalize the ``gradient_criteria`` in order to get values + that can be used in the function colormap.__call__(). If not declared it will be initialized to the minimum value of gradient_list. - chart (figure object, axe or list of axes, optional): figure over + chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - fig (figure object, optional): figure over with the graphs are + fig: figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (axis object, optional): axis over where the graphs + axes: axis over where the graphs are plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure + n_rows: designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols(int, optional): designates the number of columns of the + n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_points (int or tuple, optional): Number of points to evaluate in + n_points: Number of points to evaluate in the plot. In case of surfaces a tuple of length 2 can be pased with the number of points to plot in each axis, otherwise the same number of points will be used in the two axes. By default in unidimensional plots will be used 501 points; in surfaces will be used 30 points per axis, wich makes a grid with 900 points. - domain_range (tuple or list of tuples, optional): Range where the + domain_range: Range where the function will be plotted. In objects with unidimensional domain the domain range should be a tuple with the bounds of the interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - group (list of int): contains integers from [0 to number of + group: contains integers from [0 to number of labels) indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - group_colors (list of colors): colors in which groups are + group_colors: colors in which groups are represented, there must be one for each group. If None, each group is shown with distict colors in the "Greys" colormap. - group_names (list of str): name of each of the groups which appear + group_names: name of each of the groups which appear in a legend, there must be one for each one. Defaults to None and the legend is not shown. Implies `legend=True`. - colormap_name: name of the colormap to be used. By default we will + colormap: name of the colormap to be used. By default we will use autumn. - legend (bool): if `True`, show a legend with the groups. If + legend: if `True`, show a legend with the groups. If `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. + kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. Attributes: gradient_list: normalization of the values from gradient color_list that will be used to determine the intensity of the color @@ -196,7 +193,7 @@ class GraphPlot(BasePlot): def __init__( self, fdata: FData, - gradient_color_list: Optional[Sequence[float]] = None, + gradient_criteria: Optional[Sequence[float]] = None, max_grad: Optional[float] = None, min_grad: Optional[float] = None, chart: Union[Figure, Axes, None] = None, @@ -210,14 +207,15 @@ def __init__( group: Optional[Sequence[K]] = None, group_colors: Optional[Indexable[K, ColorLike]] = None, group_names: Optional[Indexable[K, str]] = None, - colormap_name: str = 'autumn', + colormap: Union[Colormap, str, None] = None, legend: bool = False, + **kwargs: Any, ) -> None: BasePlot.__init__(self) self.fdata = fdata - self.gradient_color_list = gradient_color_list - if self.gradient_color_list is not None: - if len(self.gradient_color_list) != fdata.n_samples: + self.gradient_criteria = gradient_criteria + if self.gradient_criteria is not None: + if len(self.gradient_criteria) != fdata.n_samples: raise ValueError( "The length of the gradient color", "list should be the same as the number", @@ -225,18 +223,18 @@ def __init__( ) if min_grad is None: - self.min_grad = min(self.gradient_color_list) + self.min_grad = min(self.gradient_criteria) else: self.min_grad = min_grad if max_grad is None: - self.max_grad = max(self.gradient_color_list) + self.max_grad = max(self.gradient_criteria) else: self.max_grad = max_grad aux_list = [ grad_color - self.min_grad - for grad_color in self.gradient_color_list + for grad_color in self.gradient_criteria ] self.gradient_list: Sequence[float] = ( @@ -249,18 +247,46 @@ def __init__( self.gradient_list = None self.n_points = n_points - self.domain_range = domain_range self.group = group self.group_colors = group_colors self.group_names = group_names self.legend = legend - self.colormap_name = colormap_name + self.colormap = colormap + + if domain_range is None: + self.domain_range = self.fdata.domain_range + else: + self.domain_range = _to_domain_range(domain_range) + + if self.gradient_list is None: + sample_colors, patches = _get_color_info( + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, + ) + else: + patches = None + if self.colormap is None: + colormap = matplotlib.cm.get_cmap("autumn") + colormap = colormap.reversed() + else: + colormap = matplotlib.cm.get_cmap(self.colormap) + + sample_colors = [None] * self.fdata.n_samples + for m in range(self.fdata.n_samples): + sample_colors[m] = colormap(self.gradient_list[m]) + + self.sample_colors = sample_colors + self.patches = patches self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( self, - **kwargs: Any, + **kwargs, ) -> Figure: """ Plot the graph. @@ -271,12 +297,7 @@ def plot( visualizations, one that displays the functions without any criteria choosing the colors and a new one that displays the function with a gradient of colors depending on the initial - gradient_color_list (normalized in gradient_list). - Args: - kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. + gradient_criteria (normalized in gradient_list). Returns: fig (figure object): figure object in which the graphs are plotted. """ @@ -285,31 +306,6 @@ def plot( dtype=Artist, ) - if self.domain_range is None: - self.domain_range = self.fdata.domain_range - else: - self.domain_range = _to_domain_range(self.domain_range) - - if self.gradient_list is None: - sample_colors, patches = _get_color_info( - self.fdata, - self.group, - self.group_names, - self.group_colors, - self.legend, - kwargs, - ) - else: - patches = None - colormap = matplotlib.cm.get_cmap(self.colormap_name) - colormap = colormap.reversed() - - sample_colors = [None] * self.fdata.n_samples - for m in range(self.fdata.n_samples): - sample_colors[m] = colormap(self.gradient_list[m]) - - self.sample_colors = sample_colors - color_dict: Mapping[str, Optional[ColorLike]] = {} if self.fdata.dim_domain == 1: @@ -324,7 +320,7 @@ def plot( for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): - set_color_dict(sample_colors, j, color_dict) + set_color_dict(self.sample_colors, j, color_dict) self.artists[j, i] = self.axes[i].plot( eval_points, @@ -359,7 +355,7 @@ def plot( for k in range(self.fdata.dim_codomain): for h in range(self.fdata.n_samples): - set_color_dict(sample_colors, h, color_dict) + set_color_dict(self.sample_colors, h, color_dict) self.artists[h, k] = self.axes[k].plot_surface( X, @@ -367,9 +363,9 @@ def plot( Z[h, ..., k], **color_dict, **kwargs, - )[0] + ) - _set_labels(self.fdata, self.fig, self.axes, patches) + _set_labels(self.fdata, self.fig, self.axes, self.patches) return self.fig @@ -458,6 +454,10 @@ class ScatterPlot(BasePlot): `group_names` is passed, it will be used for finding the names to display in the legend. Otherwise, the values passed to `group` will be used. + kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. """ def __init__( @@ -475,6 +475,7 @@ def __init__( group_colors: Optional[Indexable[K, ColorLike]] = None, group_names: Optional[Indexable[K, str]] = None, legend: bool = False, + **kwargs: Any, ) -> None: BasePlot.__init__(self) self.fdata = fdata @@ -496,6 +497,22 @@ def __init__( self.group_names = group_names self.legend = legend + if self.domain_range is None: + self.domain_range = self.fdata.domain_range + else: + self.domain_range = _to_domain_range(self.domain_range) + + sample_colors, patches = _get_color_info( + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, + ) + self.sample_colors = sample_colors + self.patches = patches + self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( @@ -505,11 +522,6 @@ def plot( """ Scatter FDataGrid object. - Args: - kwargs: if dim_domain is 1, keyword arguments to be passed to - the matplotlib.pyplot.plot function; if dim_domain is 2, - keyword arguments to be passed to the - matplotlib.pyplot.plot_surface function. Returns: fig: figure object in which the graphs are plotted. """ @@ -518,20 +530,6 @@ def plot( dtype=Artist, ) - if self.domain_range is None: - self.domain_range = self.fdata.domain_range - else: - self.domain_range = _to_domain_range(self.domain_range) - - sample_colors, patches = _get_color_info( - self.fdata, - self.group, - self.group_names, - self.group_colors, - self.legend, - kwargs, - ) - color_dict: Mapping[str, Optional[ColorLike]] = {} if self.fdata.dim_domain == 1: @@ -539,7 +537,7 @@ def plot( for i in range(self.fdata.dim_codomain): for j in range(self.fdata.n_samples): - set_color_dict(sample_colors, j, color_dict) + set_color_dict(self.sample_colors, j, color_dict) self.artists[j, i] = self.axes[i].scatter( self.grid_points[0], @@ -559,7 +557,7 @@ def plot( for k in range(self.fdata.dim_codomain): for h in range(self.fdata.n_samples): - set_color_dict(sample_colors, h, color_dict) + set_color_dict(self.sample_colors, h, color_dict) self.artists[h, k] = self.axes[k].scatter( X, @@ -571,7 +569,7 @@ def plot( **kwargs, ) - _set_labels(self.fdata, self.fig, self.axes, patches) + _set_labels(self.fdata, self.fig, self.axes, self.patches) return self.fig @@ -625,8 +623,8 @@ def set_color_dict( """ Auxiliary method used to update color_dict. - Sets the new color of the color - dict thanks to sample colors and index. + Sets the new color of the color_dict + thanks to sample colors and index. """ if sample_colors is not None: - color_dict["color"] = sample_colors[ind] + color_dict["color"] = sample_colors[ind] \ No newline at end of file From 15b58f96d37fd27d875efaf46adf8544864a7f10 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 16 Jun 2021 13:16:52 +0200 Subject: [PATCH 363/417] Deprecate projection in favour of feature_extraction. --- examples/plot_fpca.py | 9 ++++----- skfda/preprocessing/dim_reduction/__init__.py | 13 ++++++++++++- .../dim_reduction/feature_extraction/__init__.py | 1 + .../{projection => feature_extraction}/_fpca.py | 0 .../dim_reduction/projection/__init__.py | 9 ++++++++- tests/test_fpca.py | 11 ++++++----- 6 files changed, 31 insertions(+), 12 deletions(-) rename skfda/preprocessing/dim_reduction/{projection => feature_extraction}/_fpca.py (100%) diff --git a/examples/plot_fpca.py b/examples/plot_fpca.py index 460a1db7c..ada5600b0 100644 --- a/examples/plot_fpca.py +++ b/examples/plot_fpca.py @@ -8,16 +8,15 @@ # Author: Yujian Hong # License: MIT +import matplotlib.pyplot as plt +import numpy as np + import skfda from skfda.datasets import fetch_growth from skfda.exploratory.visualization import plot_fpca_perturbation_graphs -from skfda.preprocessing.dim_reduction.projection import FPCA +from skfda.preprocessing.dim_reduction.feature_extraction import FPCA from skfda.representation.basis import BSpline, Fourier, Monomial -import matplotlib.pyplot as plt -import numpy as np - - ############################################################################## # In this example we are going to use functional principal component analysis to # explore datasets and obtain conclusions about said dataset using this diff --git a/skfda/preprocessing/dim_reduction/__init__.py b/skfda/preprocessing/dim_reduction/__init__.py index 3dec4569f..765694079 100644 --- a/skfda/preprocessing/dim_reduction/__init__.py +++ b/skfda/preprocessing/dim_reduction/__init__.py @@ -1,2 +1,13 @@ """Dim reduction.""" -from . import feature_extraction, projection, variable_selection +from __future__ import annotations + +import importlib +from typing import Any + +from . import feature_extraction, variable_selection + + +def __getattr__(name: str) -> Any: + if name == "projection": + return importlib.import_module(f".{name}", __name__) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py b/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py index a288b52a6..16355e236 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/__init__.py @@ -1,2 +1,3 @@ """Feature extraction.""" from ._ddg_transformer import DDGTransformer +from ._fpca import FPCA diff --git a/skfda/preprocessing/dim_reduction/projection/_fpca.py b/skfda/preprocessing/dim_reduction/feature_extraction/_fpca.py similarity index 100% rename from skfda/preprocessing/dim_reduction/projection/_fpca.py rename to skfda/preprocessing/dim_reduction/feature_extraction/_fpca.py diff --git a/skfda/preprocessing/dim_reduction/projection/__init__.py b/skfda/preprocessing/dim_reduction/projection/__init__.py index 4b6cf980c..b6b3116cb 100644 --- a/skfda/preprocessing/dim_reduction/projection/__init__.py +++ b/skfda/preprocessing/dim_reduction/projection/__init__.py @@ -1 +1,8 @@ -from ._fpca import FPCA +import warnings + +from ..feature_extraction import FPCA + +warnings.warn( + 'The module "projection" is deprecated. Please use "feature_extraction"', + category=DeprecationWarning, +) diff --git a/tests/test_fpca.py b/tests/test_fpca.py index a5d69b287..cf979fa73 100644 --- a/tests/test_fpca.py +++ b/tests/test_fpca.py @@ -1,12 +1,13 @@ -from skfda import FDataGrid, FDataBasis +import unittest + +import numpy as np + +from skfda import FDataBasis, FDataGrid from skfda.datasets import fetch_weather from skfda.misc.operators import LinearDifferentialOperator from skfda.misc.regularization import TikhonovRegularization -from skfda.preprocessing.dim_reduction.projection import FPCA +from skfda.preprocessing.dim_reduction.feature_extraction import FPCA from skfda.representation.basis import Fourier -import unittest - -import numpy as np class FPCATestCase(unittest.TestCase): From 0daf58eb687b65ec4e87d804755232bf23827867 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 16 Jun 2021 14:08:14 +0200 Subject: [PATCH 364/417] Fix plot. --- skfda/representation/_functional_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 73c0249dc..22ac35a8a 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -781,7 +781,7 @@ def plot(self, *args: Any, **kwargs: Any) -> Any: """ from ..exploratory.visualization.representation import GraphPlot - return GraphPlot(fdata=self, *args, **kwargs).plot() + return GraphPlot(self, *args, **kwargs).plot() @abstractmethod def copy( From 1fecb7f81d0a33b0bc193cea2fd092b2ffc091eb Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 16 Jun 2021 14:16:46 +0200 Subject: [PATCH 365/417] Fix GraphPlot. --- skfda/exploratory/visualization/representation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index d18df5240..33c8fa8c0 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -193,9 +193,6 @@ class GraphPlot(BasePlot): def __init__( self, fdata: FData, - gradient_criteria: Optional[Sequence[float]] = None, - max_grad: Optional[float] = None, - min_grad: Optional[float] = None, chart: Union[Figure, Axes, None] = None, *, fig: Optional[Figure] = None, @@ -207,6 +204,9 @@ def __init__( group: Optional[Sequence[K]] = None, group_colors: Optional[Indexable[K, ColorLike]] = None, group_names: Optional[Indexable[K, str]] = None, + gradient_criteria: Optional[Sequence[float]] = None, + max_grad: Optional[float] = None, + min_grad: Optional[float] = None, colormap: Union[Colormap, str, None] = None, legend: bool = False, **kwargs: Any, From cc32b1ceb58b9fff354a27cbdd4032f8c416cbe8 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 17 Jun 2021 17:08:24 +0200 Subject: [PATCH 366/417] Fix FPCA tests. --- .../dim_reduction/feature_extraction/_fpca.py | 19 +- tests/test_fpca.py | 634 +++++++++--------- 2 files changed, 345 insertions(+), 308 deletions(-) diff --git a/skfda/preprocessing/dim_reduction/feature_extraction/_fpca.py b/skfda/preprocessing/dim_reduction/feature_extraction/_fpca.py index f10077e4b..f551fc0e0 100644 --- a/skfda/preprocessing/dim_reduction/feature_extraction/_fpca.py +++ b/skfda/preprocessing/dim_reduction/feature_extraction/_fpca.py @@ -2,24 +2,25 @@ from __future__ import annotations -from typing import Optional, TypeVar +from typing import Callable, Optional, TypeVar, Union import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.decomposition import PCA from scipy.linalg import solve_triangular -from skfda.misc.regularization import ( + +from ....misc.regularization import ( TikhonovRegularization, compute_penalty_matrix, ) -from skfda.representation import FData -from skfda.representation.basis import Basis, FDataBasis -from skfda.representation.grid import FDataGrid - -from ....misc.lstsq import solve_regularized_weighted_lstsq +from ....representation import FData +from ....representation._typing import ArrayLike +from ....representation.basis import Basis, FDataBasis +from ....representation.grid import FDataGrid Function = TypeVar("Function", bound=FData) +WeightsCallable = Callable[[np.ndarray], np.ndarray] class FPCA( @@ -44,7 +45,7 @@ class FPCA( components. We can use a different basis than the basis contained in the passed FDataBasis object. This parameter is only used when fitting a FDataBasis. - weights (numpy.array or callable): the weights vector used for + weights: the weights vector used for discrete integration. If none then the trapezoidal rule is used for computing the weights. If a callable object is passed, then the weight vector will be obtained by evaluating the object at the @@ -96,7 +97,7 @@ def __init__( n_components: int = 3, centering: bool = True, regularization: Optional[TikhonovRegularization[FData]] = None, - weights: Optional[np.ndarray] = None, + weights: Optional[Union[ArrayLike, WeightsCallable]] = None, components_basis: Optional[Basis] = None, ) -> None: self.n_components = n_components diff --git a/tests/test_fpca.py b/tests/test_fpca.py index cf979fa73..ef0db4fcb 100644 --- a/tests/test_fpca.py +++ b/tests/test_fpca.py @@ -1,3 +1,4 @@ +"""Tests for FPCA.""" import unittest import numpy as np @@ -11,179 +12,209 @@ class FPCATestCase(unittest.TestCase): + """Tests for principal component analysis.""" - def test_basis_fpca_fit_attributes(self): + def test_basis_fpca_fit_exceptions(self) -> None: + """Check that invalid arguments in fit raise exception for basis.""" fpca = FPCA() with self.assertRaises(AttributeError): - fpca.fit(None) + fpca.fit(None) # type: ignore basis = Fourier(n_basis=1) - # check that if n_components is bigger than the number of samples then + # Check that if n_components is bigger than the number of samples then # an exception should be thrown fd = FDataBasis(basis, [[0.9]]) with self.assertRaises(AttributeError): fpca.fit(fd) - # check that n_components must be smaller than the number of elements + # Check that n_components must be smaller than the number of elements # of target basis fd = FDataBasis(basis, [[0.9], [0.7], [0.5]]) with self.assertRaises(AttributeError): fpca.fit(fd) - def test_discretized_fpca_fit_attributes(self): + def test_discretized_fpca_fit_exceptions(self) -> None: + """Check that invalid arguments in fit raise exception for grid.""" fpca = FPCA() with self.assertRaises(AttributeError): - fpca.fit(None) + fpca.fit(None) # type: ignore - # check that if n_components is bigger than the number of samples then + # Check that if n_components is bigger than the number of samples then # an exception should be thrown fd = FDataGrid([[0.5], [0.1]], grid_points=[0]) with self.assertRaises(AttributeError): fpca.fit(fd) - # check that n_components must be smaller than the number of attributes + # Check that n_components must be smaller than the number of attributes # in the FDataGrid object fd = FDataGrid([[0.9], [0.7], [0.5]], grid_points=[0]) with self.assertRaises(AttributeError): fpca.fit(fd) - def test_basis_fpca_fit_result(self): - + def test_basis_fpca_fit_result(self) -> None: + """Compare the components in basis against the fda package.""" n_basis = 9 n_components = 3 fd_data = fetch_weather()['data'].coordinates[0] - fd_data = FDataGrid(np.squeeze(fd_data.data_matrix), - np.arange(0.5, 365, 1)) - # initialize basis data + # Initialize basis data basis = Fourier(n_basis=n_basis, domain_range=(0, 365)) fd_basis = fd_data.to_basis(basis) - fpca = FPCA(n_components=n_components, - regularization=TikhonovRegularization( - LinearDifferentialOperator(2), - regularization_parameter=1e5)) + fpca = FPCA( + n_components=n_components, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2), + regularization_parameter=1e5, + ), + ) fpca.fit(fd_basis) - # results obtained using Ramsay's R package - results = [[0.92407552, 0.13544888, 0.35399023, 0.00805966, - -0.02148108, - -0.01709549, -0.00208469, -0.00297439, -0.00308224], - [-0.33314436, -0.05116842, 0.89443418, 0.14673902, - 0.21559073, - 0.02046924, 0.02203431, -0.00787185, 0.00247492], - [-0.14241092, 0.92131899, 0.00514715, 0.23391411, - -0.19497613, - 0.09800817, 0.01754439, -0.00205874, 0.01438185]] - results = np.array(results) - - # compare results obtained using this library. There are slight + # Results obtained using Ramsay's R package + results = np.array([ + [ # noqa: WPS317 + 0.92407552, 0.13544888, 0.35399023, + 0.00805966, -0.02148108, -0.01709549, + -0.00208469, -0.00297439, -0.00308224, + ], + [ # noqa: WPS317 + -0.33314436, -0.05116842, 0.89443418, + 0.14673902, 0.21559073, 0.02046924, + 0.02203431, -0.00787185, 0.00247492, + ], + [ # noqa: WPS317 + -0.14241092, 0.92131899, 0.00514715, + 0.23391411, -0.19497613, 0.09800817, + 0.01754439, -0.00205874, 0.01438185, + ], + ]) + + # Compare results obtained using this library. There are slight # variations due to the fact that we are in two different packages - for i in range(n_components): - if np.sign(fpca.components_.coefficients[i][0]) != np.sign( - results[i][0]): - results[i, :] *= -1 - np.testing.assert_allclose(fpca.components_.coefficients, results, - atol=1e-7) + # If the sign of the components is not the same the component is + # reflected. + results *= ( + np.sign(fpca.components_.coefficients[:, 0]) + * np.sign(results[:, 0]) + )[:, np.newaxis] - def test_basis_fpca_transform_result(self): + np.testing.assert_allclose( + fpca.components_.coefficients, + results, + atol=1e-7, + ) + def test_basis_fpca_transform_result(self) -> None: + """Compare the scores in basis against the fda package.""" n_basis = 9 n_components = 3 fd_data = fetch_weather()['data'].coordinates[0] - fd_data = FDataGrid(np.squeeze(fd_data.data_matrix), - np.arange(0.5, 365, 1)) - # initialize basis data + # Initialize basis data basis = Fourier(n_basis=n_basis, domain_range=(0, 365)) fd_basis = fd_data.to_basis(basis) - fpca = FPCA(n_components=n_components, - regularization=TikhonovRegularization( - LinearDifferentialOperator(2), - regularization_parameter=1e5)) + fpca = FPCA( + n_components=n_components, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2), + regularization_parameter=1e5, + ), + ) fpca.fit(fd_basis) scores = fpca.transform(fd_basis) - # results obtained using Ramsay's R package - results = [[-7.68307641e+01, 5.69034443e+01, -1.22440149e+01], - [-9.02873996e+01, 1.46262257e+01, -1.78574536e+01], - [-8.21155683e+01, 3.19159491e+01, -2.56212328e+01], - [-1.14163637e+02, 3.66425562e+01, -1.00810836e+01], - [-6.97263223e+01, 1.22817168e+01, -2.39417618e+01], - [-6.41886364e+01, -1.07261045e+01, -1.10587407e+01], - [1.35824412e+02, 2.03484658e+01, -9.04815324e+00], - [-1.46816399e+01, -2.66867491e+01, -1.20233465e+01], - [1.02507511e+00, -2.29840736e+01, -9.06081296e+00], - [-3.62936903e+01, -2.09520442e+01, -1.14799951e+01], - [-4.20649313e+01, -1.13618094e+01, -6.24909009e+00], - [-7.38115985e+01, -3.18423866e+01, -1.50298626e+01], - [-6.69822456e+01, -3.35518632e+01, -1.25167352e+01], - [-1.03534763e+02, -1.29513941e+01, -1.49103879e+01], - [-1.04542036e+02, -1.36794907e+01, -1.41555965e+01], - [-7.35863347e+00, -1.41171956e+01, -2.97562788e+00], - [7.28804530e+00, -5.34421830e+01, -3.39823418e+00], - [5.59974094e+01, -4.02154080e+01, 3.78800103e-01], - [1.80778702e+02, 1.87798201e+01, -1.99043247e+01], - [-3.69700617e+00, -4.19441020e+01, 6.45820740e+00], - [3.76527216e+01, -4.23056953e+01, 1.04221757e+01], - [1.23850646e+02, -4.24648130e+01, -2.22336786e-01], - [-7.23588457e+00, -1.20579536e+01, 2.07502089e+01], - [-4.96871011e+01, 8.88483448e+00, 2.02882768e+01], - [-1.36726355e+02, -1.86472599e+01, 1.89076217e+01], - [-1.83878661e+02, 4.12118550e+01, 1.78960356e+01], - [-1.81568820e+02, 5.20817910e+01, 2.01078870e+01], - [-5.08775852e+01, 1.34600555e+01, 3.18602712e+01], - [-1.37633866e+02, 7.50809631e+01, 2.42320782e+01], - [4.98276375e+01, 1.33401270e+00, 3.50611066e+01], - [1.51149934e+02, -5.47417776e+01, 3.97592325e+01], - [1.58366096e+02, -3.80762686e+01, -5.62415023e+00], - [2.17139548e+02, 6.34055987e+01, -1.98853635e+01], - [2.33615480e+02, -7.90787574e-02, 2.69069525e+00], - [3.45371437e+02, 9.58703622e+01, 8.47570770e+00]] - results = np.array(results) - - # compare results + # Results obtained using Ramsay's R package + results = np.array([ + [-7.68307641e1, 5.69034443e1, -1.22440149e1], + [-9.02873996e1, 1.46262257e1, -1.78574536e1], + [-8.21155683e1, 3.19159491e1, -2.56212328e1], + [-1.14163637e2, 3.66425562e1, -1.00810836e1], + [-6.97263223e1, 1.22817168e1, -2.39417618e1], + [-6.41886364e1, -1.07261045e1, -1.10587407e1], + [1.35824412e2, 2.03484658e1, -9.04815324e0], + [-1.46816399e1, -2.66867491e1, -1.20233465e1], + [1.02507511e0, -2.29840736e1, -9.06081296e0], + [-3.62936903e1, -2.09520442e1, -1.14799951e1], + [-4.20649313e1, -1.13618094e1, -6.24909009e0], + [-7.38115985e1, -3.18423866e1, -1.50298626e1], + [-6.69822456e1, -3.35518632e1, -1.25167352e1], + [-1.03534763e2, -1.29513941e1, -1.49103879e1], + [-1.04542036e2, -1.36794907e1, -1.41555965e1], + [-7.35863347e0, -1.41171956e1, -2.97562788e0], + [7.28804530e0, -5.34421830e1, -3.39823418e0], + [5.59974094e1, -4.02154080e1, 3.78800103e-1], + [1.80778702e2, 1.87798201e1, -1.99043247e1], + [-3.69700617e0, -4.19441020e1, 6.45820740e0], + [3.76527216e1, -4.23056953e1, 1.04221757e1], + [1.23850646e2, -4.24648130e1, -2.22336786e-1], + [-7.23588457e0, -1.20579536e1, 2.07502089e1], + [-4.96871011e1, 8.88483448e0, 2.02882768e1], + [-1.36726355e2, -1.86472599e1, 1.89076217e1], + [-1.83878661e2, 4.12118550e1, 1.78960356e1], + [-1.81568820e2, 5.20817910e1, 2.01078870e1], + [-5.08775852e1, 1.34600555e1, 3.18602712e1], + [-1.37633866e2, 7.50809631e1, 2.42320782e1], + [4.98276375e1, 1.33401270e0, 3.50611066e1], + [1.51149934e2, -5.47417776e1, 3.97592325e1], + [1.58366096e2, -3.80762686e1, -5.62415023e0], + [2.17139548e2, 6.34055987e1, -1.98853635e1], + [2.33615480e2, -7.90787574e-2, 2.69069525e0], + [3.45371437e2, 9.58703622e1, 8.47570770e0], + ]) + + # Compare results np.testing.assert_allclose(scores, results, atol=1e-7) - def test_basis_fpca_regularization_fit_result(self): - + def test_basis_fpca_noregularization_fit_result(self) -> None: + """Compare the components in basis against the fda package.""" n_basis = 9 n_components = 3 fd_data = fetch_weather()['data'].coordinates[0] - fd_data = FDataGrid(np.squeeze(fd_data.data_matrix), - np.arange(0.5, 365, 1)) - # initialize basis data + # Initialize basis data basis = Fourier(n_basis=n_basis, domain_range=(0, 365)) fd_basis = fd_data.to_basis(basis) fpca = FPCA(n_components=n_components) fpca.fit(fd_basis) - # results obtained using Ramsay's R package - results = [[0.9231551, 0.1364966, 0.3569451, 0.0092012, -0.0244525, - -0.02923873, -0.003566887, -0.009654571, -0.0100063], - [-0.3315211, -0.0508643, 0.89218521, 0.1669182, 0.2453900, - 0.03548997, 0.037938051, -0.025777507, 0.008416904], - [-0.1379108, 0.9125089, 0.00142045, 0.2657423, -0.2146497, - 0.16833314, 0.031509179, -0.006768189, 0.047306718]] - results = np.array(results) - - # compare results obtained using this library. There are slight + # Results obtained using Ramsay's R package + results = np.array([ + [ # noqa: WPS317 + 0.9231551, 0.1364966, 0.3569451, 0.0092012, -0.0244525, + -0.02923873, -0.003566887, -0.009654571, -0.0100063, + ], + [ # noqa: WPS317 + -0.3315211, -0.0508643, 0.89218521, 0.1669182, 0.24539, + 0.03548997, 0.037938051, -0.025777507, 0.008416904, + ], + [ # noqa: WPS317 + -0.1379108, 0.9125089, 0.00142045, 0.2657423, -0.2146497, + 0.16833314, 0.031509179, -0.006768189, 0.047306718, + ], + ]) + + # Compare results obtained using this library. There are slight # variations due to the fact that we are in two different packages - for i in range(n_components): - if np.sign(fpca.components_.coefficients[i][0]) != np.sign( - results[i][0]): - results[i, :] *= -1 - np.testing.assert_allclose(fpca.components_.coefficients, results, - atol=1e-7) + # If the sign of the components is not the same the component is + # reflected. + results *= ( + np.sign(fpca.components_.coefficients[:, 0]) + * np.sign(results[:, 0]) + )[:, np.newaxis] - def test_grid_fpca_fit_result(self): + np.testing.assert_allclose( + fpca.components_.coefficients, + results, + atol=1e-7, + ) + def test_grid_fpca_fit_result(self) -> None: + """Compare the components in grid against the fda.usc package.""" n_components = 1 fd_data = fetch_weather()['data'].coordinates[0] @@ -191,98 +222,100 @@ def test_grid_fpca_fit_result(self): fpca = FPCA(n_components=n_components, weights=[1] * 365) fpca.fit(fd_data) - # results obtained using fda.usc for the first component - results = [ - [-0.06958281, -0.07015412, -0.07095115, -0.07185632, -0.07128256, - -0.07124209, -0.07364828, -0.07297663, -0.07235438, -0.07307498, - -0.07293423, -0.07449293, -0.07647909, -0.07796823, -0.07582476, - -0.07263243, -0.07241871, -0.0718136, -0.07015477, -0.07132331, - -0.0711527, -0.07435933, -0.07602666, -0.0769783, -0.07707199, - -0.07503802, -0.0770302, -0.07705581, -0.07633515, -0.07624817, - -0.07631568, -0.07619913, -0.07568, -0.07595155, -0.07506939, - -0.07181941, -0.06907624, -0.06735476, -0.06853985, -0.06902363, - -0.07098882, -0.07479412, -0.07425241, -0.07555835, -0.0765903, - -0.07651853, -0.07682536, -0.07458996, -0.07631711, -0.07726509, - -0.07641246, -0.0744066, -0.07501397, -0.07302722, -0.07045571, - -0.06912529, -0.06792186, -0.06830739, -0.06898433, -0.07000192, - -0.07014513, -0.06994886, -0.07115909, -0.073999, -0.07292669, - -0.07139879, -0.07226865, -0.07187915, -0.07122995, -0.06975022, - -0.06800613, -0.06900793, -0.07186378, -0.07114479, -0.07015252, - -0.06944782, -0.068291, -0.06905348, -0.06925773, -0.06834624, - -0.06837319, -0.06824067, -0.06644614, -0.06637313, -0.06626312, - -0.06470209, -0.0645058, -0.06477729, -0.06411049, -0.06158499, - -0.06305197, -0.06398006, -0.06277579, -0.06282124, -0.06317684, - -0.0614125, -0.05961922, -0.05875443, -0.05845781, -0.05828608, - -0.05666474, -0.05495706, -0.05446301, -0.05468254, -0.05478609, - -0.05440798, -0.05312339, -0.05102368, -0.05160285, -0.05077954, - -0.04979648, -0.04890853, -0.04745462, -0.04496763, -0.0448713, - -0.04599596, -0.04688998, -0.04488872, -0.04404507, -0.04420729, - -0.04368153, -0.04254381, -0.0411764, -0.04022811, -0.03999746, - -0.03963634, -0.03832502, -0.0383956, -0.04015374, -0.0387544, - -0.03777315, -0.03830728, -0.03768616, -0.03714081, -0.03781918, - -0.03739374, -0.03659894, -0.03563342, -0.03658407, -0.03686991, - -0.03543746, -0.03518799, -0.03361226, -0.0321534, -0.03050438, - -0.02958411, -0.02855023, -0.02913402, -0.02992464, -0.02899548, - -0.02891629, -0.02809554, -0.02702642, -0.02672194, -0.02678648, - -0.02698471, -0.02628085, -0.02674285, -0.02658515, -0.02604447, - -0.0245711, -0.02413174, -0.02342496, -0.022898, -0.02216152, - -0.02272283, -0.02199741, -0.02305362, -0.02371371, -0.02320865, - -0.02234777, -0.0225018, -0.02104359, -0.02203346, -0.02052545, - -0.01987457, -0.01947911, -0.01986949, -0.02012196, -0.01958515, - -0.01906753, -0.01857869, -0.01874101, -0.01827973, -0.017752, - -0.01702056, -0.01759611, -0.01888485, -0.01988159, -0.01951675, - -0.01872967, -0.01866667, -0.0183576, -0.01909758, -0.018599, - -0.01910036, -0.01930315, -0.01958856, -0.02129936, -0.0216614, - -0.0204397, -0.02002368, -0.02058828, -0.02149915, -0.02167326, - -0.02238569, -0.02211907, -0.02168336, -0.02124387, -0.02131655, - -0.02130508, -0.02181227, -0.02230632, -0.02223732, -0.0228216, - -0.02355137, -0.02275145, -0.02286893, -0.02437776, -0.02523897, - -0.0248354, -0.02319174, -0.02335831, -0.02405789, -0.02483273, - -0.02428119, -0.02395295, -0.02437185, -0.02476434, -0.02347973, - -0.02385957, -0.02451257, -0.02414586, -0.02439035, -0.02357782, - -0.02417295, -0.02504764, -0.02682569, -0.02807111, -0.02886335, - -0.02943406, -0.02956806, -0.02893096, -0.02903812, -0.02999862, - -0.029421, -0.03016203, -0.03118823, -0.03076205, -0.03005985, - -0.03079187, -0.03215188, -0.03271075, -0.03146124, -0.03040965, - -0.03008436, -0.03085897, -0.03015341, -0.03014661, -0.03110255, - -0.03271278, -0.03217399, -0.0331721, -0.03459221, -0.03572073, - -0.03560707, -0.03531492, -0.03687657, -0.03800143, -0.0373808, - -0.03729927, -0.03748666, -0.03754171, -0.03790408, -0.03963726, - -0.03992153, -0.03812243, -0.0373844, -0.0385394, -0.03849716, - -0.03826345, -0.03743958, -0.0380861, -0.03857622, -0.04099357, - -0.04102509, -0.04170207, -0.04283573, -0.04320618, -0.04269438, - -0.04467527, -0.04470603, -0.04496092, -0.04796417, -0.04796633, - -0.047863, -0.04883668, -0.0505939, -0.05112441, -0.04960962, - -0.05000041, -0.04962112, -0.05087008, -0.0521671, -0.05369792, - -0.05478139, -0.05559221, -0.05669698, -0.05654505, -0.05731113, - -0.05783543, -0.05766056, -0.05754354, -0.05724272, -0.05831026, - -0.05847512, -0.05804533, -0.05875046, -0.06021703, -0.06147975, - -0.06213918, -0.0645805, -0.06500849, -0.06361716, -0.06315227, - -0.06306436, -0.06425743, -0.06626847, -0.06615213, -0.06881004, - -0.06942296, -0.06889225, -0.06868663, -0.0678667, -0.06720133, - -0.06771172, -0.06885042, -0.06896979, -0.06961627, -0.07211988, - -0.07252956, -0.07265559, -0.07264195, -0.07306334, -0.07282035, - -0.07196505, -0.07210595, -0.07203942, -0.07105821, -0.06920599, - -0.06892264, -0.06699939, -0.06537829, -0.06543323, -0.06913186, - -0.07210039, -0.07219987, -0.07124228, -0.07065497, -0.06996833, - -0.0674457, -0.06800847, -0.06784175, -0.06592871, -0.06723401]] - - results = np.array(results) - - # compare results obtained using this library. There are slight + # Results obtained using fda.usc for the first component + results = np.array([ # noqa: WPS317 + -0.06958281, -0.07015412, -0.07095115, -0.07185632, -0.07128256, + -0.07124209, -0.07364828, -0.07297663, -0.07235438, -0.07307498, + -0.07293423, -0.07449293, -0.07647909, -0.07796823, -0.07582476, + -0.07263243, -0.07241871, -0.0718136, -0.07015477, -0.07132331, + -0.0711527, -0.07435933, -0.07602666, -0.0769783, -0.07707199, + -0.07503802, -0.0770302, -0.07705581, -0.07633515, -0.07624817, + -0.07631568, -0.07619913, -0.07568, -0.07595155, -0.07506939, + -0.07181941, -0.06907624, -0.06735476, -0.06853985, -0.06902363, + -0.07098882, -0.07479412, -0.07425241, -0.07555835, -0.0765903, + -0.07651853, -0.07682536, -0.07458996, -0.07631711, -0.07726509, + -0.07641246, -0.0744066, -0.07501397, -0.07302722, -0.07045571, + -0.06912529, -0.06792186, -0.06830739, -0.06898433, -0.07000192, + -0.07014513, -0.06994886, -0.07115909, -0.073999, -0.07292669, + -0.07139879, -0.07226865, -0.07187915, -0.07122995, -0.06975022, + -0.06800613, -0.06900793, -0.07186378, -0.07114479, -0.07015252, + -0.06944782, -0.068291, -0.06905348, -0.06925773, -0.06834624, + -0.06837319, -0.06824067, -0.06644614, -0.06637313, -0.06626312, + -0.06470209, -0.0645058, -0.06477729, -0.06411049, -0.06158499, + -0.06305197, -0.06398006, -0.06277579, -0.06282124, -0.06317684, + -0.0614125, -0.05961922, -0.05875443, -0.05845781, -0.05828608, + -0.05666474, -0.05495706, -0.05446301, -0.05468254, -0.05478609, + -0.05440798, -0.05312339, -0.05102368, -0.05160285, -0.05077954, + -0.04979648, -0.04890853, -0.04745462, -0.04496763, -0.0448713, + -0.04599596, -0.04688998, -0.04488872, -0.04404507, -0.04420729, + -0.04368153, -0.04254381, -0.0411764, -0.04022811, -0.03999746, + -0.03963634, -0.03832502, -0.0383956, -0.04015374, -0.0387544, + -0.03777315, -0.03830728, -0.03768616, -0.03714081, -0.03781918, + -0.03739374, -0.03659894, -0.03563342, -0.03658407, -0.03686991, + -0.03543746, -0.03518799, -0.03361226, -0.0321534, -0.03050438, + -0.02958411, -0.02855023, -0.02913402, -0.02992464, -0.02899548, + -0.02891629, -0.02809554, -0.02702642, -0.02672194, -0.02678648, + -0.02698471, -0.02628085, -0.02674285, -0.02658515, -0.02604447, + -0.0245711, -0.02413174, -0.02342496, -0.022898, -0.02216152, + -0.02272283, -0.02199741, -0.02305362, -0.02371371, -0.02320865, + -0.02234777, -0.0225018, -0.02104359, -0.02203346, -0.02052545, + -0.01987457, -0.01947911, -0.01986949, -0.02012196, -0.01958515, + -0.01906753, -0.01857869, -0.01874101, -0.01827973, -0.017752, + -0.01702056, -0.01759611, -0.01888485, -0.01988159, -0.01951675, + -0.01872967, -0.01866667, -0.0183576, -0.01909758, -0.018599, + -0.01910036, -0.01930315, -0.01958856, -0.02129936, -0.0216614, + -0.0204397, -0.02002368, -0.02058828, -0.02149915, -0.02167326, + -0.02238569, -0.02211907, -0.02168336, -0.02124387, -0.02131655, + -0.02130508, -0.02181227, -0.02230632, -0.02223732, -0.0228216, + -0.02355137, -0.02275145, -0.02286893, -0.02437776, -0.02523897, + -0.0248354, -0.02319174, -0.02335831, -0.02405789, -0.02483273, + -0.02428119, -0.02395295, -0.02437185, -0.02476434, -0.02347973, + -0.02385957, -0.02451257, -0.02414586, -0.02439035, -0.02357782, + -0.02417295, -0.02504764, -0.02682569, -0.02807111, -0.02886335, + -0.02943406, -0.02956806, -0.02893096, -0.02903812, -0.02999862, + -0.029421, -0.03016203, -0.03118823, -0.03076205, -0.03005985, + -0.03079187, -0.03215188, -0.03271075, -0.03146124, -0.03040965, + -0.03008436, -0.03085897, -0.03015341, -0.03014661, -0.03110255, + -0.03271278, -0.03217399, -0.0331721, -0.03459221, -0.03572073, + -0.03560707, -0.03531492, -0.03687657, -0.03800143, -0.0373808, + -0.03729927, -0.03748666, -0.03754171, -0.03790408, -0.03963726, + -0.03992153, -0.03812243, -0.0373844, -0.0385394, -0.03849716, + -0.03826345, -0.03743958, -0.0380861, -0.03857622, -0.04099357, + -0.04102509, -0.04170207, -0.04283573, -0.04320618, -0.04269438, + -0.04467527, -0.04470603, -0.04496092, -0.04796417, -0.04796633, + -0.047863, -0.04883668, -0.0505939, -0.05112441, -0.04960962, + -0.05000041, -0.04962112, -0.05087008, -0.0521671, -0.05369792, + -0.05478139, -0.05559221, -0.05669698, -0.05654505, -0.05731113, + -0.05783543, -0.05766056, -0.05754354, -0.05724272, -0.05831026, + -0.05847512, -0.05804533, -0.05875046, -0.06021703, -0.06147975, + -0.06213918, -0.0645805, -0.06500849, -0.06361716, -0.06315227, + -0.06306436, -0.06425743, -0.06626847, -0.06615213, -0.06881004, + -0.06942296, -0.06889225, -0.06868663, -0.0678667, -0.06720133, + -0.06771172, -0.06885042, -0.06896979, -0.06961627, -0.07211988, + -0.07252956, -0.07265559, -0.07264195, -0.07306334, -0.07282035, + -0.07196505, -0.07210595, -0.07203942, -0.07105821, -0.06920599, + -0.06892264, -0.06699939, -0.06537829, -0.06543323, -0.06913186, + -0.07210039, -0.07219987, -0.07124228, -0.07065497, -0.06996833, + -0.0674457, -0.06800847, -0.06784175, -0.06592871, -0.06723401, + ]) + + # Compare results obtained using this library. There are slight # variations due to the fact that we are in two different packages - for i in range(n_components): - if np.sign(fpca.components_.data_matrix[i][0]) != np.sign( - results[i][0]): - results[i, :] *= -1 + # If the sign of the components is not the same the component is + # reflected. + results *= ( + np.sign(fpca.components_.data_matrix.ravel()[0]) + * np.sign(results[0]) + ) + np.testing.assert_allclose( - fpca.components_.data_matrix.reshape( - fpca.components_.data_matrix.shape[:-1]), + fpca.components_.data_matrix.ravel(), results, - rtol=1e-6) - - def test_grid_fpca_transform_result(self): + rtol=1e-6, + ) + def test_grid_fpca_transform_result(self) -> None: + """Compare the scores in grid against the fda.usc package.""" n_components = 1 fd_data = fetch_weather()['data'].coordinates[0] @@ -292,126 +325,129 @@ def test_grid_fpca_transform_result(self): scores = fpca.transform(fd_data) # results obtained - results = [[-77.05020176], [-90.56072204], [-82.39565947], - [-114.45375934], [-69.99735931], [-64.44894047], - [135.58336775], [-14.93460852], [0.75024737], - [-36.4781038], [-42.35637749], [-73.98910492], - [-67.11253749], [-103.68269798], [-104.65948079], - [-7.42817782], [7.48125036], [56.29792942], - [181.00258791], [-3.53294736], [37.94673912], - [124.43819913], [-7.04274676], [-49.61134859], - [-136.86256785], [-184.03502398], [-181.72835749], - [-51.06323208], [-137.85606731], [50.10941466], - [151.68118097], [159.01360046], [217.17981302], - [234.40195237], [345.39374006]] - results = np.array(results) + results = np.array([ # noqa: WPS317 + [-77.05020176], [-90.56072204], [-82.39565947], + [-114.45375934], [-69.99735931], [-64.44894047], + [135.58336775], [-14.93460852], [0.75024737], + [-36.4781038], [-42.35637749], [-73.98910492], + [-67.11253749], [-103.68269798], [-104.65948079], + [-7.42817782], [7.48125036], [56.29792942], + [181.00258791], [-3.53294736], [37.94673912], + [124.43819913], [-7.04274676], [-49.61134859], + [-136.86256785], [-184.03502398], [-181.72835749], + [-51.06323208], [-137.85606731], [50.10941466], + [151.68118097], [159.01360046], [217.17981302], + [234.40195237], [345.39374006], + ]) np.testing.assert_allclose(scores, results, rtol=1e-6) - def test_grid_fpca_regularization_fit_result(self): - + def test_grid_fpca_regularization_fit_result(self) -> None: + """Compare the components in grid against the fda.usc package.""" n_components = 1 fd_data = fetch_weather()['data'].coordinates[0] - fd_data = FDataGrid(np.squeeze(fd_data.data_matrix), - np.arange(0.5, 365, 1)) - fpca = FPCA( - n_components=n_components, weights=[1] * 365, + n_components=n_components, + weights=[1] * 365, regularization=TikhonovRegularization( - LinearDifferentialOperator(2))) + LinearDifferentialOperator(2), + ), + ) fpca.fit(fd_data) - # results obtained using fda.usc for the first component - results = [ - [-0.06961236, -0.07027042, -0.07090496, -0.07138247, -0.07162215, - -0.07202264, -0.07264893, -0.07279174, -0.07274672, -0.07300075, - -0.07365471, -0.07489002, -0.07617455, -0.07658708, -0.07551923, - -0.07375128, -0.0723776, -0.07138373, -0.07080555, -0.07111745, - -0.0721514, -0.07395427, -0.07558341, -0.07650959, -0.0766541, - -0.07641352, -0.07660864, -0.07669081, -0.0765396, -0.07640671, - -0.07634668, -0.07626304, -0.07603638, -0.07549114, -0.07410347, - -0.07181791, -0.06955356, -0.06824034, -0.06834077, -0.06944125, - -0.07133598, -0.07341109, -0.07471501, -0.07568844, -0.07631904, - -0.07647264, -0.07629453, -0.07598431, -0.07628157, -0.07654062, - -0.07616026, -0.07527189, -0.07426683, -0.07267961, -0.07079998, - -0.06927394, -0.068412, -0.06838534, -0.06888439, -0.0695309, - -0.07005508, -0.07066637, -0.07167196, -0.07266978, -0.07275299, - -0.07235183, -0.07207819, -0.07159814, -0.07077697, -0.06977026, - -0.0691952, -0.06965756, -0.07058327, -0.07075751, -0.07025415, - -0.06954233, -0.06899785, -0.06891026, -0.06887079, -0.06862183, - -0.06830082, -0.06777765, -0.06700202, -0.06639394, -0.06582435, - -0.06514987, -0.06467236, -0.06425272, -0.06359187, -0.062922, - -0.06300068, -0.06325494, -0.06316979, -0.06296254, -0.06246343, - -0.06136836, -0.0600936, -0.05910688, -0.05840872, -0.0576547, - -0.05655684, -0.05546518, -0.05484433, -0.05465746, -0.05449286, - -0.05397004, -0.05300742, -0.05196686, -0.05133129, -0.05064617, - -0.04973418, -0.04855687, -0.04714356, -0.04588103, -0.04547284, - -0.04571493, -0.04580704, -0.04523509, -0.04457293, -0.04405309, - -0.04338468, -0.04243512, -0.04137278, -0.04047946, -0.03984531, - -0.03931376, -0.0388847, -0.03888507, -0.03908662, -0.03877577, - -0.03830952, -0.03802713, -0.03773521, -0.03752388, -0.03743759, - -0.03714113, -0.03668387, -0.0363703, -0.03642288, -0.03633051, - -0.03574618, -0.03486536, -0.03357797, -0.03209969, -0.0306837, - -0.02963987, -0.029102, -0.0291513, -0.02932013, -0.02912619, - -0.02869407, -0.02801974, -0.02732363, -0.02690451, -0.02676622, - -0.0267323, -0.02664896, -0.02661708, -0.02637166, -0.02577496, - -0.02490428, -0.02410813, -0.02340367, -0.02283356, -0.02246305, - -0.0224229, -0.0225435, -0.02295603, -0.02324663, -0.02310005, - -0.02266893, -0.02221522, -0.02168056, -0.02129419, -0.02064909, - -0.02007801, -0.01979083, -0.01979541, -0.01978879, -0.01954269, - -0.0191623, -0.01879572, -0.01849678, -0.01810297, -0.01769666, - -0.01753802, -0.01794351, -0.01871307, -0.01930005, -0.01933, - -0.01901017, -0.01873486, -0.01861838, -0.01870777, -0.01879, - -0.01904219, -0.01945078, -0.0200607, -0.02076936, -0.02100213, - -0.02071439, -0.02052113, -0.02076313, -0.02128468, -0.02175631, - -0.02206387, -0.02201054, -0.02172142, -0.02143092, -0.02133647, - -0.02144956, -0.02176286, -0.02212579, -0.02243861, -0.02278316, - -0.02304113, -0.02313356, -0.02349275, -0.02417028, -0.0245954, - -0.0244062, -0.02388557, -0.02374682, -0.02401071, -0.02431126, - -0.02433125, -0.02427656, -0.02430442, -0.02424977, -0.02401619, - -0.02402294, -0.02415424, -0.02413262, -0.02404076, -0.02397651, - -0.0243893, -0.0253322, -0.02664395, -0.0278802, -0.02877936, - -0.02927182, -0.02937318, -0.02926277, -0.02931632, -0.02957945, - -0.02982133, -0.03023224, -0.03060406, -0.03066011, -0.03070932, - -0.03116429, -0.03179009, -0.03198094, -0.03149462, -0.03082037, - -0.03041594, -0.0303307, -0.03028465, -0.03052841, -0.0311837, - -0.03199307, -0.03262025, -0.03345083, -0.03442665, -0.03521313, - -0.0356433, -0.03606037, -0.03677406, -0.03735165, -0.03746578, - -0.03744154, -0.03752143, -0.03780898, -0.03837639, -0.03903232, - -0.03911629, -0.03857567, -0.03816592, -0.03819285, -0.03818405, - -0.03801684, -0.03788493, -0.03823232, -0.03906142, -0.04023251, - -0.04112434, -0.04188011, -0.04254759, -0.043, -0.04340181, - -0.04412687, -0.04484482, -0.04577669, -0.04700832, -0.04781373, - -0.04842662, -0.04923723, -0.05007637, -0.05037817, -0.05009794, - -0.04994083, -0.05012712, -0.05094001, -0.05216065, -0.05350458, - -0.05469781, -0.05566309, -0.05641011, -0.05688106, -0.05730818, - -0.05759156, -0.05763771, -0.05760073, -0.05766117, -0.05794587, - -0.05816696, -0.0584046, -0.05905105, -0.06014331, -0.06142231, - -0.06270788, -0.06388225, -0.06426245, -0.06386721, -0.0634656, - -0.06358049, -0.06442514, -0.06570047, -0.06694328, -0.0682621, - -0.06897846, -0.06896583, -0.06854621, -0.06797142, -0.06763755, - -0.06784024, -0.06844314, -0.06918567, -0.07021928, -0.07148473, - -0.07232504, -0.07272276, -0.07287021, -0.07289836, -0.07271531, - -0.07239956, -0.07214086, -0.07170078, -0.07081195, -0.06955202, - -0.06825156, -0.06690167, -0.06617102, -0.06683291, -0.06887539, - -0.07089424, -0.07174837, -0.07150888, -0.07070378, -0.06960066, - -0.06842496, -0.06777666, -0.06728403, -0.06681262, -0.06679066]] - - results = np.array(results) - - # compare results obtained using this library. There are slight + # Results obtained using fda.usc for the first component + results = np.array([ # noqa: WPS317 + -0.06961236, -0.07027042, -0.07090496, -0.07138247, -0.07162215, + -0.07202264, -0.07264893, -0.07279174, -0.07274672, -0.07300075, + -0.07365471, -0.07489002, -0.07617455, -0.07658708, -0.07551923, + -0.07375128, -0.0723776, -0.07138373, -0.07080555, -0.07111745, + -0.0721514, -0.07395427, -0.07558341, -0.07650959, -0.0766541, + -0.07641352, -0.07660864, -0.07669081, -0.0765396, -0.07640671, + -0.07634668, -0.07626304, -0.07603638, -0.07549114, -0.07410347, + -0.07181791, -0.06955356, -0.06824034, -0.06834077, -0.06944125, + -0.07133598, -0.07341109, -0.07471501, -0.07568844, -0.07631904, + -0.07647264, -0.07629453, -0.07598431, -0.07628157, -0.07654062, + -0.07616026, -0.07527189, -0.07426683, -0.07267961, -0.07079998, + -0.06927394, -0.068412, -0.06838534, -0.06888439, -0.0695309, + -0.07005508, -0.07066637, -0.07167196, -0.07266978, -0.07275299, + -0.07235183, -0.07207819, -0.07159814, -0.07077697, -0.06977026, + -0.0691952, -0.06965756, -0.07058327, -0.07075751, -0.07025415, + -0.06954233, -0.06899785, -0.06891026, -0.06887079, -0.06862183, + -0.06830082, -0.06777765, -0.06700202, -0.06639394, -0.06582435, + -0.06514987, -0.06467236, -0.06425272, -0.06359187, -0.062922, + -0.06300068, -0.06325494, -0.06316979, -0.06296254, -0.06246343, + -0.06136836, -0.0600936, -0.05910688, -0.05840872, -0.0576547, + -0.05655684, -0.05546518, -0.05484433, -0.05465746, -0.05449286, + -0.05397004, -0.05300742, -0.05196686, -0.05133129, -0.05064617, + -0.04973418, -0.04855687, -0.04714356, -0.04588103, -0.04547284, + -0.04571493, -0.04580704, -0.04523509, -0.04457293, -0.04405309, + -0.04338468, -0.04243512, -0.04137278, -0.04047946, -0.03984531, + -0.03931376, -0.0388847, -0.03888507, -0.03908662, -0.03877577, + -0.03830952, -0.03802713, -0.03773521, -0.03752388, -0.03743759, + -0.03714113, -0.03668387, -0.0363703, -0.03642288, -0.03633051, + -0.03574618, -0.03486536, -0.03357797, -0.03209969, -0.0306837, + -0.02963987, -0.029102, -0.0291513, -0.02932013, -0.02912619, + -0.02869407, -0.02801974, -0.02732363, -0.02690451, -0.02676622, + -0.0267323, -0.02664896, -0.02661708, -0.02637166, -0.02577496, + -0.02490428, -0.02410813, -0.02340367, -0.02283356, -0.02246305, + -0.0224229, -0.0225435, -0.02295603, -0.02324663, -0.02310005, + -0.02266893, -0.02221522, -0.02168056, -0.02129419, -0.02064909, + -0.02007801, -0.01979083, -0.01979541, -0.01978879, -0.01954269, + -0.0191623, -0.01879572, -0.01849678, -0.01810297, -0.01769666, + -0.01753802, -0.01794351, -0.01871307, -0.01930005, -0.01933, + -0.01901017, -0.01873486, -0.01861838, -0.01870777, -0.01879, + -0.01904219, -0.01945078, -0.0200607, -0.02076936, -0.02100213, + -0.02071439, -0.02052113, -0.02076313, -0.02128468, -0.02175631, + -0.02206387, -0.02201054, -0.02172142, -0.02143092, -0.02133647, + -0.02144956, -0.02176286, -0.02212579, -0.02243861, -0.02278316, + -0.02304113, -0.02313356, -0.02349275, -0.02417028, -0.0245954, + -0.0244062, -0.02388557, -0.02374682, -0.02401071, -0.02431126, + -0.02433125, -0.02427656, -0.02430442, -0.02424977, -0.02401619, + -0.02402294, -0.02415424, -0.02413262, -0.02404076, -0.02397651, + -0.0243893, -0.0253322, -0.02664395, -0.0278802, -0.02877936, + -0.02927182, -0.02937318, -0.02926277, -0.02931632, -0.02957945, + -0.02982133, -0.03023224, -0.03060406, -0.03066011, -0.03070932, + -0.03116429, -0.03179009, -0.03198094, -0.03149462, -0.03082037, + -0.03041594, -0.0303307, -0.03028465, -0.03052841, -0.0311837, + -0.03199307, -0.03262025, -0.03345083, -0.03442665, -0.03521313, + -0.0356433, -0.03606037, -0.03677406, -0.03735165, -0.03746578, + -0.03744154, -0.03752143, -0.03780898, -0.03837639, -0.03903232, + -0.03911629, -0.03857567, -0.03816592, -0.03819285, -0.03818405, + -0.03801684, -0.03788493, -0.03823232, -0.03906142, -0.04023251, + -0.04112434, -0.04188011, -0.04254759, -0.043, -0.04340181, + -0.04412687, -0.04484482, -0.04577669, -0.04700832, -0.04781373, + -0.04842662, -0.04923723, -0.05007637, -0.05037817, -0.05009794, + -0.04994083, -0.05012712, -0.05094001, -0.05216065, -0.05350458, + -0.05469781, -0.05566309, -0.05641011, -0.05688106, -0.05730818, + -0.05759156, -0.05763771, -0.05760073, -0.05766117, -0.05794587, + -0.05816696, -0.0584046, -0.05905105, -0.06014331, -0.06142231, + -0.06270788, -0.06388225, -0.06426245, -0.06386721, -0.0634656, + -0.06358049, -0.06442514, -0.06570047, -0.06694328, -0.0682621, + -0.06897846, -0.06896583, -0.06854621, -0.06797142, -0.06763755, + -0.06784024, -0.06844314, -0.06918567, -0.07021928, -0.07148473, + -0.07232504, -0.07272276, -0.07287021, -0.07289836, -0.07271531, + -0.07239956, -0.07214086, -0.07170078, -0.07081195, -0.06955202, + -0.06825156, -0.06690167, -0.06617102, -0.06683291, -0.06887539, + -0.07089424, -0.07174837, -0.07150888, -0.07070378, -0.06960066, + -0.06842496, -0.06777666, -0.06728403, -0.06681262, -0.06679066, + ]) + + # Compare results obtained using this library. There are slight # variations due to the fact that we are in two different packages - for i in range(n_components): - if np.sign(fpca.components_.data_matrix[i][0]) != np.sign( - results[i][0]): - results[i, :] *= -1 + # If the sign of the components is not the same the component is + # reflected. + results *= ( + np.sign(fpca.components_.data_matrix.ravel()[0]) + * np.sign(results[0]) + ) + np.testing.assert_allclose( - fpca.components_.data_matrix.reshape( - fpca.components_.data_matrix.shape[:-1]), + fpca.components_.data_matrix.ravel(), results, - rtol=1e-2) + rtol=1e-2, + ) if __name__ == '__main__': From a5339f6e20483c5ad3a1341336f08956fac1a651 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 19 Jun 2021 17:54:24 +0200 Subject: [PATCH 367/417] Fix Lp distance example. --- skfda/misc/metrics/_lp_distances.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/skfda/misc/metrics/_lp_distances.py b/skfda/misc/metrics/_lp_distances.py index 0c6e2d6f1..badf3f76c 100644 --- a/skfda/misc/metrics/_lp_distances.py +++ b/skfda/misc/metrics/_lp_distances.py @@ -1,3 +1,4 @@ + """Implementation of Lp distances.""" import math @@ -179,18 +180,19 @@ def lp_distance( Computes the distances between an object containing functional data corresponding to the functions y = 1 and y = x defined over the interval [0, 1] and another ones containing data of the functions y - = 0 and y = x/2. The result then is an array 2x2 with the computed - l2 distance between every pair of functions. + = 0 and y = x/2. The result then is an array of size 2 with the + computed l2 distance between the functions in the same position in + both. >>> import skfda >>> import numpy as np >>> >>> x = np.linspace(0, 1, 1001) - >>> fd = skfda.FDataGrid([np.ones(len(x))], x) - >>> fd2 = skfda.FDataGrid([np.zeros(len(x))], x) + >>> fd = skfda.FDataGrid([np.ones(len(x)), x], x) + >>> fd2 = skfda.FDataGrid([np.zeros(len(x)), x/2], x) >>> >>> skfda.misc.metrics.lp_distance(fd, fd2, p=2).round(2) - array([ 1.]) + array([ 1. , 0.29]) If the functional data are defined over a different set of points of discretisation the functions returns an exception. From 5e78ce63bd4e9f377a52573ffe98b74cfd19694c Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Wed, 23 Jun 2021 09:18:37 +0200 Subject: [PATCH 368/417] Update refs.bib --- docs/refs.bib | 393 +++++++++++++++++++++++++------------------------- 1 file changed, 196 insertions(+), 197 deletions(-) diff --git a/docs/refs.bib b/docs/refs.bib index cdda1c8bc..50e64219d 100644 --- a/docs/refs.bib +++ b/docs/refs.bib @@ -1,41 +1,25 @@ -@article{dai+genton_2018_visualization, - author = {Wenlin Dai and Marc G. Genton}, - title = {Multivariate Functional Data Visualization and Outlier Detection}, - journal = {Journal of Computational and Graphical Statistics}, - volume = {27}, - number = {4}, - pages = {923 -- 934}, - year = {2018}, - publisher = {Taylor & Francis}, - doi = {10.1080/10618600.2018.1473781}, - URL = {https://doi.org/10.1080/10618600.2018.1473781} -} - -@article{sun+genton_2011_boxplots, - author = {Ying Sun and Marc G. Genton}, - title = {Functional Boxplots}, - journal = {Journal of Computational and Graphical Statistics}, - volume = {20}, +@article{berrendero+cuevas+torrecilla_2016_hunting, + author = {Berrendero, J.R. and Cuevas, Antonio and Torrecilla, José}, + year = {2016}, + pages = {619 -- 638}, + title = {Variable selection in functional data classification: A maxima-hunting proposal}, number = {2}, - pages = {316 -- 334}, - year = {2011}, - publisher = {Taylor & Francis}, - doi = {10.1198/jcgs.2011.09224}, - URL = {https://doi.org/10.1198/jcgs.2011.09224} + volume = {26}, + journal = {Statistica Sinica}, + doi = {10.5705/ss.202014.0014} } -@article{gervini_2008_estimation, - author = {Gervini, Daniel}, - title = "{Robust functional estimation using the median and spherical principal components}", - journal = {Biometrika}, - volume = {95}, - number = {3}, - pages = {587 -- 600}, - year = {2008}, - month = {09}, - issn = {0006-3444}, - doi = {10.1093/biomet/asn031}, - url = {https://doi.org/10.1093/biomet/asn031} +@article{berrendero+cuevas+torrecilla_2018_hilbert, + author = {José R. Berrendero and Antonio Cuevas and José L. Torrecilla}, + title = {On the Use of Reproducing Kernel Hilbert Spaces in Functional Classification}, + journal = {Journal of the American Statistical Association}, + volume = {113}, + number = {523}, + pages = {1210 -- 1218}, + year = {2018}, + publisher = {Taylor & Francis}, + doi = {10.1080/01621459.2017.1320287}, + URL = {https://doi.org/10.1080/01621459.2017.1320287} } @inproceedings{breunig++_2000_outliers, @@ -49,6 +33,15 @@ @inproceedings{breunig++_2000_outliers doi = {10.1145/342009.335388} } +@article{cuesta-albertos++_2015_ddg, + title = {The DDG-classifier in the functional setting}, + author = {J. A. Cuesta-Albertos and M. Febrero-Bande and M. Oviedo de la Fuente}, + journal = {TEST}, + year = {2015}, + volume = {26}, + pages = {119 -- 142} +} + @article{cuevas++_2004_anova author = {Cuevas, Antonio and Febrero-Bande, Manuel and Fraiman, Ricardo}, year = {2004}, @@ -60,68 +53,28 @@ @article{cuevas++_2004_anova doi = {10.1016/j.csda.2003.10.021} } -@article{pini+stamm+vantini_2018_hotellings, - title = {Hotelling's T2 in separable Hilbert spaces}, - author = {Alessia Pini and Aymeric Stamm and Simone Vantini}, - journal = {Journal of Multivariate Analysis}, - year = {2018}, - month = {05}, - volume = {167}, - pages = {284 -- 305}, - doi = {10.1016/j.jmva.2018.05.007} -} - -@article{srivastava++_2011_ficher-rao, - author = {Srivastava, Anuj and Wu, Wei and Kurtek, Sebastian and Klassen, Eric and Marron, J.}, - year = {2011}, - journal={}, - title = {Registration of Functional Data Using Fisher-Rao Metric}, - pages = {5 -- 7}, - URL = {https://arxiv.org/abs/1103.3817v2} -} - -@inbook{srivastava+klassen_2016_analysis_amplitude, - author = {Srivastava, Anuj and Klassen, Eric}, - title = {Functional and Shape Data Analysis}, - chapter = {Functional Data and Elastic Registration}, - pages = {107 -- 109}, - publisher = {Springer-Verlag New York}, - year = {2016}, - isbn = {978-1-4939-4018-9}, - doi = {10.1007/978-1-4939-4020-2} -} - -@inbook{srivastava+klassen_2016_analysis_phase, - author = {Srivastava, Anuj and Klassen, Eric}, - title = {Functional and Shape Data Analysis}, - chapter = {Functional Data and Elastic Registration}, - pages = {109 -- 111}, - publisher = {Springer-Verlag New York}, - year = {2016}, - isbn = {978-1-4939-4018-9}, - doi = {10.1007/978-1-4939-4020-2} +@article{dai+genton_2018_visualization, + author = {Wenlin Dai and Marc G. Genton}, + title = {Multivariate Functional Data Visualization and Outlier Detection}, + journal = {Journal of Computational and Graphical Statistics}, + volume = {27}, + number = {4}, + pages = {923 -- 934}, + year = {2018}, + publisher = {Taylor & Francis}, + doi = {10.1080/10618600.2018.1473781}, + URL = {https://doi.org/10.1080/10618600.2018.1473781} } -@inbook{srivastava+klassen_2016_analysis_probability, - author = {Srivastava, Anuj and Klassen, Eric}, - title = {Functional and Shape Data Analysis}, - chapter = {Functional Data and Elastic Registration}, - pages = {113 -- 117}, +@inbook{ferraty+vieu_2006_nonparametric_knn, + author = {Frédéric Ferraty and Philippe Vieu}, + title = {Nonparametric Functional Data Analysis. Theory and Practice}, + chapter = {Functional Nonparametric Supervised Classification}, + pages = {116}, publisher = {Springer-Verlag New York}, - year = {2016}, - isbn = {978-1-4939-4018-9}, - doi = {10.1007/978-1-4939-4020-2} -} - -@article{ghosh+chaudhuri_2005_depth, - author = {Ghosh, Anil and Chaudhuri, Probal}, - year = {2005}, - month = {02}, - pages = {327 -- 350}, - title = {On Maximum Depth and Related Classifiers}, - volume = {32}, - journal = {Scandinavian Journal of Statistics}, - doi = {10.1111/j.1467-9469.2005.00423.x} + year = {2006}, + isbn = {978-0-387-30369-7}, + doi = {10.1007/0-387-36620-2} } @article{fraiman+muniz_2001_trimmed, @@ -135,83 +88,84 @@ @article{fraiman+muniz_2001_trimmed doi = {10.1007/BF02595706} } -@article{szekely+rizzo_2010_brownian, - author = {Gábor J. Székely and Maria L. Rizzo}, - title = {Brownian distance covariance}, - volume = {3}, - journal = {The Annals of Applied Statistics}, - number = {4}, - publisher = {Institute of Mathematical Statistics}, - pages = {1236 -- 1265}, - year = {2009}, - doi = {10.1214/09-AOAS312}, - URL = {https://doi.org/10.1214/09-AOAS312} +@article{gervini_2008_estimation, + author = {Gervini, Daniel}, + title = "{Robust functional estimation using the median and spherical principal components}", + journal = {Biometrika}, + volume = {95}, + number = {3}, + pages = {587 -- 600}, + year = {2008}, + month = {09}, + issn = {0006-3444}, + doi = {10.1093/biomet/asn031}, + url = {https://doi.org/10.1093/biomet/asn031} } -@inproceedings{torrecilla+suarez_2016_hunting, - author = {Torrecilla, Jose L. and Su\'{a}rez, Alberto}, - title = {Feature Selection in Functional Data Classification with Recursive Maxima Hunting}, - year = {2016}, - volume = {29}, - publisher = {Curran Associates Inc.}, - booktitle = {Proceedings of the 30th International Conference on Neural Information Processing Systems}, - pages = {4835 -- 4843}, - series = {NIPS'16} +@article{ghosh+chaudhuri_2005_depth, + author = {Ghosh, Anil and Chaudhuri, Probal}, + year = {2005}, + month = {02}, + pages = {327 -- 350}, + title = {On Maximum Depth and Related Classifiers}, + volume = {32}, + journal = {Scandinavian Journal of Statistics}, + doi = {10.1111/j.1467-9469.2005.00423.x} } -@article{berrendero+cuevas+torrecilla_2016_hunting, - author = {Berrendero, J.R. and Cuevas, Antonio and Torrecilla, José}, - year = {2016}, - pages = {619 -- 638}, - title = {Variable selection in functional data classification: A maxima-hunting proposal}, - number = {2}, - volume = {26}, - journal = {Statistica Sinica}, - doi = {10.5705/ss.202014.0014} +@article{pini+stamm+vantini_2018_hotellings, + title = {Hotelling's T2 in separable Hilbert spaces}, + author = {Alessia Pini and Aymeric Stamm and Simone Vantini}, + journal = {Journal of Multivariate Analysis}, + year = {2018}, + month = {05}, + volume = {167}, + pages = {284 -- 305}, + doi = {10.1016/j.jmva.2018.05.007} } -@article{berrendero+cuevas+torrecilla_2018_hilbert, - author = {José R. Berrendero and Antonio Cuevas and José L. Torrecilla}, - title = {On the Use of Reproducing Kernel Hilbert Spaces in Functional Classification}, - journal = {Journal of the American Statistical Association}, - volume = {113}, - number = {523}, - pages = {1210 -- 1218}, - year = {2018}, - publisher = {Taylor & Francis}, - doi = {10.1080/01621459.2017.1320287}, - URL = {https://doi.org/10.1080/01621459.2017.1320287} +@inbook{ramsay+silverman_2005_functional_bspline, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {From functional data to smooth functions}, + pages = {50 -- 51}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} } -@inbook{ramsay+silverman_2005_functional_basis, +@inbook{ramsay+silverman_2005_functional_spline, author = {James Ramsay and B. W. Silverman}, title = {Functional Data Analysis}, - chapter = {Principal components analysis for functional data}, - pages = {161 -- 164}, + chapter = {Smoothing functional data with a roughness penalty}, + pages = {86 -- 87}, publisher = {Springer-Verlag New York}, year = {2005}, isbn = {978-0-387-40080-8}, doi = {110.1007/b98888} } -@inbook{ramsay+silverman_2005_functional_discretizing, +@inbook{ramsay+silverman_2005_functional_spline_squares, author = {James Ramsay and B. W. Silverman}, title = {Functional Data Analysis}, - chapter = {Principal components analysis for functional data}, - pages = {161}, + chapter = {Smoothing functional data with a roughness penalty}, + pages = {89 -- 90}, publisher = {Springer-Verlag New York}, year = {2005}, isbn = {978-0-387-40080-8}, doi = {110.1007/b98888} } -@article{cuesta-albertos++_2015_ddg, - title = {The DDG-classifier in the functional setting}, - author = {J. A. Cuesta-Albertos and M. Febrero-Bande and M. Oviedo de la Fuente}, - journal = {TEST}, - year = {2015}, - volume = {26}, - pages = {119 -- 142} +@inbook{ramsay+silverman_2005_functional_shift, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {The registration and display of functional data}, + pages = {129 -- 132}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} } @inbook{ramsay+silverman_2005_functional_landmark, @@ -225,22 +179,33 @@ @inbook{ramsay+silverman_2005_functional_landmark doi = {110.1007/b98888} } -@inbook{ramsay+silverman_2005_functional_shift, +@inbook{ramsay+silverman_2005_functional_newton-raphson, author = {James Ramsay and B. W. Silverman}, title = {Functional Data Analysis}, chapter = {The registration and display of functional data}, - pages = {129 -- 132}, + pages = {142 -- 144}, publisher = {Springer-Verlag New York}, year = {2005}, isbn = {978-0-387-40080-8}, doi = {110.1007/b98888} } -@inbook{ramsay+silverman_2005_functional_newton-raphson, +@inbook{ramsay+silverman_2005_functional_discretizing, author = {James Ramsay and B. W. Silverman}, title = {Functional Data Analysis}, - chapter = {The registration and display of functional data}, - pages = {142 -- 144}, + chapter = {Principal components analysis for functional data}, + pages = {161}, + publisher = {Springer-Verlag New York}, + year = {2005}, + isbn = {978-0-387-40080-8}, + doi = {110.1007/b98888} +} + +@inbook{ramsay+silverman_2005_functional_basis, + author = {James Ramsay and B. W. Silverman}, + title = {Functional Data Analysis}, + chapter = {Principal components analysis for functional data}, + pages = {161 -- 164}, publisher = {Springer-Verlag New York}, year = {2005}, isbn = {978-0-387-40080-8}, @@ -269,11 +234,33 @@ @inbook{srivastava+klassen_2016_analysis_square doi = {10.1007/978-1-4939-4020-2} } -@inbook{srivastava+klassen_2016_analysis_orbit, +@inbook{srivastava+klassen_2016_analysis_amplitude, author = {Srivastava, Anuj and Klassen, Eric}, title = {Functional and Shape Data Analysis}, - chapter = {Statistical Modeling of Functional Data}, - pages = {274 -- 277}, + chapter = {Functional Data and Elastic Registration}, + pages = {107 -- 109}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@inbook{srivastava+klassen_2016_analysis_phase, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Functional Data and Elastic Registration}, + pages = {109 -- 111}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@inbook{srivastava+klassen_2016_analysis_probability, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Functional Data and Elastic Registration}, + pages = {113 -- 117}, publisher = {Springer-Verlag New York}, year = {2016}, isbn = {978-1-4939-4018-9}, @@ -291,12 +278,23 @@ @inbook{srivastava+klassen_2016_analysis_karcher doi = {10.1007/978-1-4939-4020-2} } -@article{srivastava++_2011_ficher-rao_orbit, +@inbook{srivastava+klassen_2016_analysis_orbit, + author = {Srivastava, Anuj and Klassen, Eric}, + title = {Functional and Shape Data Analysis}, + chapter = {Statistical Modeling of Functional Data}, + pages = {274 -- 277}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-1-4939-4018-9}, + doi = {10.1007/978-1-4939-4020-2} +} + +@article{srivastava++_2011_ficher-rao, author = {Srivastava, Anuj and Wu, Wei and Kurtek, Sebastian and Klassen, Eric and Marron, J.}, year = {2011}, journal={}, title = {Registration of Functional Data Using Fisher-Rao Metric}, - pages = {9 -- 10}, + pages = {5 -- 7}, URL = {https://arxiv.org/abs/1103.3817v2} } @@ -309,27 +307,50 @@ @article{srivastava++_2011_ficher-rao_karcher URL = {https://arxiv.org/abs/1103.3817v2} } +@article{srivastava++_2011_ficher-rao_orbit, + author = {Srivastava, Anuj and Wu, Wei and Kurtek, Sebastian and Klassen, Eric and Marron, J.}, + year = {2011}, + journal={}, + title = {Registration of Functional Data Using Fisher-Rao Metric}, + pages = {9 -- 10}, + URL = {https://arxiv.org/abs/1103.3817v2} +} -@inbook{ramsay+silverman_2005_functional_spline, - author = {James Ramsay and B. W. Silverman}, - title = {Functional Data Analysis}, - chapter = {Smoothing functional data with a roughness penalty}, - pages = {86 -- 87}, - publisher = {Springer-Verlag New York}, - year = {2005}, - isbn = {978-0-387-40080-8}, - doi = {110.1007/b98888} +@article{sun+genton_2011_boxplots, + author = {Ying Sun and Marc G. Genton}, + title = {Functional Boxplots}, + journal = {Journal of Computational and Graphical Statistics}, + volume = {20}, + number = {2}, + pages = {316 -- 334}, + year = {2011}, + publisher = {Taylor & Francis}, + doi = {10.1198/jcgs.2011.09224}, + URL = {https://doi.org/10.1198/jcgs.2011.09224} } -@inbook{ramsay+silverman_2005_functional_spline_squares, - author = {James Ramsay and B. W. Silverman}, - title = {Functional Data Analysis}, - chapter = {Smoothing functional data with a roughness penalty}, - pages = {89 -- 90}, - publisher = {Springer-Verlag New York}, - year = {2005}, - isbn = {978-0-387-40080-8}, - doi = {110.1007/b98888} +@article{szekely+rizzo_2010_brownian, + author = {Gábor J. Székely and Maria L. Rizzo}, + title = {Brownian distance covariance}, + volume = {3}, + journal = {The Annals of Applied Statistics}, + number = {4}, + publisher = {Institute of Mathematical Statistics}, + pages = {1236 -- 1265}, + year = {2009}, + doi = {10.1214/09-AOAS312}, + URL = {https://doi.org/10.1214/09-AOAS312} +} + +@inproceedings{torrecilla+suarez_2016_hunting, + author = {Torrecilla, Jose L. and Su\'{a}rez, Alberto}, + title = {Feature Selection in Functional Data Classification with Recursive Maxima Hunting}, + year = {2016}, + volume = {29}, + publisher = {Curran Associates Inc.}, + booktitle = {Proceedings of the 30th International Conference on Neural Information Processing Systems}, + pages = {4835 -- 4843}, + series = {NIPS'16} } @inbook{wasserman_2006_nonparametric_nw, @@ -352,26 +373,4 @@ @inbook{wasserman_2006_nonparametric_llr year = {2006}, isbn = {978-0-387-25145-5}, doi = {10.1007/0-387-30623-4} -} - -@inbook{ferraty+vieu_2006_nonparametric_knn, - author = {Frédéric Ferraty and Philippe Vieu}, - title = {Nonparametric Functional Data Analysis. Theory and Practice}, - chapter = {Functional Nonparametric Supervised Classification}, - pages = {116}, - publisher = {Springer-Verlag New York}, - year = {2006}, - isbn = {978-0-387-30369-7}, - doi = {10.1007/0-387-36620-2} -} - -@inbook{ramsay+silverman_2005_functional_bspline, - author = {James Ramsay and B. W. Silverman}, - title = {Functional Data Analysis}, - chapter = {From functional data to smooth functions}, - pages = {50 -- 51}, - publisher = {Springer-Verlag New York}, - year = {2005}, - isbn = {978-0-387-40080-8}, - doi = {110.1007/b98888} } \ No newline at end of file From 40b034ef8094352314a8df54b150a0ad5ce78fb8 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 24 Jun 2021 19:21:23 +0200 Subject: [PATCH 369/417] Typing evaluation transformer. --- .../representation/_evaluation_trasformer.py | 75 +++++++++++++++---- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/skfda/representation/_evaluation_trasformer.py b/skfda/representation/_evaluation_trasformer.py index b91444c31..f470057a1 100644 --- a/skfda/representation/_evaluation_trasformer.py +++ b/skfda/representation/_evaluation_trasformer.py @@ -1,10 +1,22 @@ +from __future__ import annotations + +from typing import Optional, Union, overload + +import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted +from typing_extensions import Literal + from ._functional_data import FData +from ._typing import ArrayLike, GridPointsLike +from .extrapolation import ExtrapolationLike from .grid import FDataGrid -class EvaluationTransformer(BaseEstimator, TransformerMixin): +class EvaluationTransformer( + BaseEstimator, # type:ignore + TransformerMixin, # type:ignore +): r""" Transformer returning the evaluations of FData objects as a matrix. @@ -25,10 +37,9 @@ class EvaluationTransformer(BaseEstimator, TransformerMixin): the parameter has no efect. Defaults to False. Attributes: - shape_ (tuple): original shape of coefficients per sample. + shape\_ (tuple): original shape of coefficients per sample. Examples: - >>> from skfda.representation import (FDataGrid, FDataBasis, ... EvaluationTransformer) >>> from skfda.representation.basis import Monomial @@ -82,32 +93,68 @@ class EvaluationTransformer(BaseEstimator, TransformerMixin): """ - def __init__(self, eval_points=None, *, - extrapolation=None, grid=False): + @overload + def __init__( + self, + eval_points: ArrayLike, + *, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[False] = False, + ) -> None: + pass + + @overload + def __init__( + self, + eval_points: GridPointsLike, + *, + extrapolation: Optional[ExtrapolationLike] = None, + grid: Literal[True], + ) -> None: + pass + + def __init__( + self, + eval_points: Union[ArrayLike, GridPointsLike, None] = None, + *, + extrapolation: Optional[ExtrapolationLike] = None, + grid: bool = False, + ): self.eval_points = eval_points self.extrapolation = extrapolation self.grid = grid - def fit(self, X: FData, y=None): + def fit( # noqa: D102 + self, + X: FData, + y: None = None, + ) -> EvaluationTransformer: if self.eval_points is None and not isinstance(X, FDataGrid): - raise ValueError("If no eval_points are passed, the functions " - "should be FDataGrid objects.") + raise ValueError( + "If no eval_points are passed, the functions " + "should be FDataGrid objects.", + ) self._is_fitted = True return self - def transform(self, X, y=None): + def transform( # noqa: D102 + self, + X: FData, + y: None = None, + ) -> np.ndarray: check_is_fitted(self, '_is_fitted') if self.eval_points is None: evaluation = X.data_matrix.copy() else: - evaluation = X(self.eval_points, - extrapolation=self.extrapolation, grid=self.grid) - - evaluation = evaluation.reshape((X.n_samples, -1)) + evaluation = X( # type: ignore + self.eval_points, + extrapolation=self.extrapolation, + grid=self.grid, + ) - return evaluation + return evaluation.reshape((X.n_samples, -1)) From 05c8a80afd625344ce8117a69cf5363b5db4bea1 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 30 Jun 2021 01:25:39 +0200 Subject: [PATCH 370/417] Typing CoefficientsTransformer. --- .../basis/_coefficients_transformer.py | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/skfda/representation/basis/_coefficients_transformer.py b/skfda/representation/basis/_coefficients_transformer.py index 073c2eb63..1f5f42db6 100644 --- a/skfda/representation/basis/_coefficients_transformer.py +++ b/skfda/representation/basis/_coefficients_transformer.py @@ -1,15 +1,21 @@ +from __future__ import annotations + +import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted from ._fdatabasis import FDataBasis -class CoefficientsTransformer(BaseEstimator, TransformerMixin): - """ +class CoefficientsTransformer( + BaseEstimator, # type:ignore + TransformerMixin, # type:ignore +): + r""" Transformer returning the coefficients of FDataBasis objects as a matrix. Attributes: - shape_ (tuple): original shape of coefficients per sample. + basis\_ (tuple): Basis used. Examples: >>> from skfda.representation.basis import (FDataBasis, Monomial, @@ -26,19 +32,24 @@ class CoefficientsTransformer(BaseEstimator, TransformerMixin): """ - def fit(self, X: FDataBasis, y=None): + def fit( # noqa: D102 + self, + X: FDataBasis, + y: None = None, + ) -> CoefficientsTransformer: - self.shape_ = X.coefficients.shape[1:] + self.basis_ = X.basis return self - def transform(self, X, y=None): + def transform( # noqa: D102 + self, + X: FDataBasis, + y: None = None, + ) -> np.ndarray: check_is_fitted(self) - assert X.coefficients.shape[1:] == self.shape_ - - coefficients = X.coefficients.copy() - coefficients = coefficients.reshape((X.n_samples, -1)) + assert X.basis == self.basis_ - return coefficients + return X.coefficients.copy() From c4d8143c1d3a9c9ccb481fda8465506881c12463 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 10 Jul 2021 22:30:38 +0200 Subject: [PATCH 371/417] Fix Pandas indexing. Workaround around https://github.com/pandas-dev/pandas/issues/42430. --- skfda/_utils/_utils.py | 5 +++++ skfda/exploratory/stats/_stats.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index eefb2e9ac..95012a406 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -645,6 +645,11 @@ def _int_to_real(array: np.ndarray) -> np.ndarray: def _check_array_key(array: np.ndarray, key: Any) -> Any: """Check a getitem key.""" key = check_array_indexer(array, key) + if isinstance(key, tuple): + non_ellipsis = [i for i in key if i is not Ellipsis] + if len(non_ellipsis) > 1: + raise KeyError(key) + key = non_ellipsis[0] if isinstance(key, numbers.Integral): # To accept also numpy ints key = int(key) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 6aaac3871..069e1e138 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -5,7 +5,7 @@ import numpy as np -from ...misc.metrics import l2_distance, l2_norm +from ...misc.metrics import Metric, l2_distance, l2_norm from ...representation import FData, FDataGrid from ..depth import Depth, ModifiedBandDepth @@ -121,7 +121,7 @@ def geometric_median( X: T, *, tol: float = 1.e-8, - metric: Callable[[T, T], np.ndarray] = l2_distance, + metric: Metric[T] = l2_distance, ) -> T: r"""Compute the geometric median. From db68183028804586491d0550a7df97caecfbc859 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 11 Jul 2021 14:44:14 +0200 Subject: [PATCH 372/417] Skip pandas errors for now. --- tests/test_pandas_fdatabasis.py | 5 +++++ tests/test_pandas_fdatagrid.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/tests/test_pandas_fdatabasis.py b/tests/test_pandas_fdatabasis.py index c01f41ff5..ea578939a 100644 --- a/tests/test_pandas_fdatabasis.py +++ b/tests/test_pandas_fdatabasis.py @@ -345,6 +345,11 @@ class TestArithmeticOps(base.BaseArithmeticOpsTests): series_scalar_exc = None + # Bug introduced by https://github.com/pandas-dev/pandas/pull/37132 + @pytest.mark.skip(reason="Unsupported") + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + pass + # FDatabasis does not implement division by non constant @pytest.mark.skip(reason="Unsupported") def test_divmod_series_array(self, dtype): diff --git a/tests/test_pandas_fdatagrid.py b/tests/test_pandas_fdatagrid.py index 48cc14f09..f81d9e9b6 100644 --- a/tests/test_pandas_fdatagrid.py +++ b/tests/test_pandas_fdatagrid.py @@ -353,6 +353,11 @@ class TestArithmeticOps(base.BaseArithmeticOpsTests): series_scalar_exc = None + # Bug introduced by https://github.com/pandas-dev/pandas/pull/37132 + @pytest.mark.skip(reason="Unsupported") + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + pass + # Does not convert properly a list of FData to a FData @pytest.mark.skip(reason="Unsupported") def test_arith_series_with_array(self, dtype): From 18e02d0a6d61d19e4c027713bd70416af59b7f2c Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 11 Jul 2021 15:52:23 +0200 Subject: [PATCH 373/417] Improve typing test pandas fdatagrid. --- tests/test_pandas_fdatagrid.py | 226 +++++++++++++++++++++------------ 1 file changed, 142 insertions(+), 84 deletions(-) diff --git a/tests/test_pandas_fdatagrid.py b/tests/test_pandas_fdatagrid.py index f81d9e9b6..b7b9ad27d 100644 --- a/tests/test_pandas_fdatagrid.py +++ b/tests/test_pandas_fdatagrid.py @@ -1,74 +1,89 @@ -import operator +from __future__ import annotations + +from typing import Any, Callable, Generator, NoReturn, Union import numpy as np import pandas import pytest from pandas import Series +from pandas.api.extensions import ExtensionArray, ExtensionDtype from pandas.tests.extension import base import skfda +from skfda.representation.grid import FDataGrid ############################################################################## # Fixtures ############################################################################## @pytest.fixture -def dtype(): - """A fixture providing the ExtensionDtype to validate.""" +def dtype() -> ExtensionDtype: + """Return the ExtensionDtype to validate.""" return skfda.representation.grid.FDataGridDType( grid_points=[ np.arange(10), - np.arange(10) / 10], - dim_codomain=3 + np.arange(10) / 10, + ], + dim_codomain=3, ) @pytest.fixture -def data(): +def data() -> ExtensionArray: """ + Return data. + Length-100 array for this type. * data[0] and data[1] should both be non missing * data[0] and data[1] should not be equal - """ + """ data_matrix = np.arange(1, 100 * 10 * 10 * 3 + 1).reshape(100, 10, 10, 3) grid_points = [ np.arange(10), - np.arange(10) / 10] + np.arange(10) / 10, + ] return skfda.FDataGrid(data_matrix, grid_points=grid_points) @pytest.fixture -def data_for_twos(): - """Length-100 array in which all the elements are two.""" - +def data_for_twos() -> ExtensionArray: + """Return a length-100 array in which all the elements are two.""" data_matrix = np.full( - 100 * 10 * 10 * 3, fill_value=2).reshape(100, 10, 10, 3) + 100 * 10 * 10 * 3, fill_value=2, + ).reshape(100, 10, 10, 3) grid_points = [ np.arange(10), - np.arange(10) / 10] + np.arange(10) / 10, + ] return skfda.FDataGrid(data_matrix, grid_points=grid_points) @pytest.fixture -def data_missing(): - """Length-2 array with [NA, Valid]""" - +def data_missing() -> ExtensionArray: + """Return a length-2 array with [NA, Valid].""" data_matrix = np.arange( - 2 * 10 * 10 * 3, dtype=np.float_).reshape(2, 10, 10, 3) + 2 * 10 * 10 * 3, + dtype=np.float_, + ).reshape(2, 10, 10, 3) data_matrix[0, ...] = np.NaN grid_points = [ np.arange(10), - np.arange(10) / 10] + np.arange(10) / 10, + ] return skfda.FDataGrid(data_matrix, grid_points=grid_points) @pytest.fixture(params=["data", "data_missing"]) -def all_data(request, data, data_missing): - """Parametrized fixture giving 'data' and 'data_missing'""" +def all_data( + request, + data: ExtensionArray, + data_missing: ExtensionArray, +) -> ExtensionArray: + """Return 'data' or 'data_missing'.""" if request.param == "data": return data elif request.param == "data_missing": @@ -76,30 +91,34 @@ def all_data(request, data, data_missing): @pytest.fixture -def data_repeated(data): +def data_repeated( + data: ExtensionArray, +) -> Callable[[int], Generator[ExtensionArray, None, None]]: """ Generate many datasets. - Parameters - ---------- - data : fixture implementing `data` - Returns - ------- - Callable[[int], Generator]: - A callable that takes a `count` argument and - returns a generator yielding `count` datasets. + + Args: + data : Fixture implementing `data` + + Returns: + Callable[[int], Generator]: + A callable that takes a `count` argument and + returns a generator yielding `count` datasets. """ - def gen(count): - for _ in range(count): - yield data + def gen(count: int) -> Generator[ExtensionArray, None, None]: + yield from ( + data for _ in range(count) + ) return gen @pytest.fixture -def data_for_sorting(): +def data_for_sorting() -> NoReturn: """ - Length-3 array with a known sort order. + Return ength-3 array with a known sort order. + This should be three items [B, C, A] with A < B < C """ @@ -107,9 +126,9 @@ def data_for_sorting(): @pytest.fixture -def data_missing_for_sorting(): +def data_missing_for_sorting() -> NoReturn: """ - Length-3 array with a known sort order. + Return length-3 array with a known sort order. This should be three items [B, NA, A] with A < B and NA missing. """ @@ -117,30 +136,37 @@ def data_missing_for_sorting(): @pytest.fixture -def na_cmp(): +def na_cmp() -> Callable[..., bool]: """ Binary operator for comparing NA values. + Should return a function of two arguments that returns True if both arguments are (scalar) NA for your type. By default, uses ``operator.is_`` """ - def isna(x, y): - return ((x is pandas.NA or all(x.isna())) - and (y is pandas.NA or all(y.isna()))) + def isna( + x: Union[pandas.NA, FDataGrid], + y: Union[pandas.NA, FDataGrid], + ) -> bool: + return ( + (x is pandas.NA or all(x.isna())) + and (y is pandas.NA or all(y.isna())) + ) return isna @pytest.fixture -def na_value(): - """The scalar missing value for this type. Default 'None'""" +def na_value() -> pandas.NA: + """Return the scalar missing value for this type. Default 'None'.""" return pandas.NA @pytest.fixture -def data_for_grouping(): +def data_for_grouping() -> NoReturn: """ - Data for factorization, grouping, and unique tests. + Return data for factorization, grouping, and unique tests. + Expected to be like [B, B, NA, NA, A, A, B, C] Where A < B < C and NA is missing """ @@ -148,8 +174,8 @@ def data_for_grouping(): @pytest.fixture(params=[True, False]) -def box_in_series(request): - """Whether to box the data in a Series""" +def box_in_series(request) -> bool: + """Whether to box the data in a Series.""" return request.param @@ -162,32 +188,28 @@ def box_in_series(request): ], ids=["scalar", "list", "series", "object"], ) -def groupby_apply_op(request): - """ - Functions to test groupby.apply(). - """ +def groupby_apply_op(request) -> Callable[[FDataGrid], Any]: + """Functions to test groupby.apply().""" return request.param @pytest.fixture(params=[True, False]) -def as_frame(request): - """ - Boolean fixture to support Series and Series.to_frame() comparison testing. - """ +def as_frame(request) -> bool: + """Whether to support Series and Series.to_frame() comparison testing.""" return request.param @pytest.fixture(params=[True, False]) -def as_series(request): - """ - Boolean fixture to support arr and Series(arr) comparison testing. - """ +def as_series(request) -> bool: + """Boolean fixture to support arr and Series(arr) comparison testing.""" return request.param @pytest.fixture(params=[True, False]) -def use_numpy(request): +def use_numpy(request) -> bool: """ + Compare ExtensionDtype and numpy. + Boolean fixture to support comparison testing of ExtensionDtype array and numpy array. """ @@ -195,8 +217,10 @@ def use_numpy(request): @pytest.fixture(params=["ffill", "bfill"]) -def fillna_method(request): +def fillna_method(request) -> str: """ + Series.fillna parameter fixture. + Parametrized fixture giving method parameters 'ffill' and 'bfill' for Series.fillna(method=) testing. """ @@ -204,10 +228,8 @@ def fillna_method(request): @pytest.fixture(params=[True, False]) -def as_array(request): - """ - Boolean fixture to support ExtensionDtype _from_sequence method testing. - """ +def as_array(request) -> bool: + """Whether to support ExtensionDtype _from_sequence method testing.""" return request.param @@ -230,7 +252,7 @@ def as_array(request): @pytest.fixture(params=_all_arithmetic_operators) -def all_arithmetic_operators(request): +def all_arithmetic_operators(request) -> Callable[..., Any]: """ Fixture for dunder names for common arithmetic operations. """ @@ -240,7 +262,7 @@ def all_arithmetic_operators(request): @pytest.fixture(params=["__eq__", "__ne__", # "__le__", "__lt__", "__ge__", "__gt__" ]) -def all_compare_operators(request): +def all_compare_operators(request) -> Callable[..., Any]: """ Fixture for dunder names for common compare operations """ @@ -277,12 +299,12 @@ class TestCasting(base.BaseCastingTests): # Tries to construct dtype from string @pytest.mark.skip(reason="Unsupported") - def test_astype_str(self): + def test_astype_str(self) -> None: pass # Tries to construct dtype from string @pytest.mark.skip(reason="Unsupported") - def test_astype_string(self): + def test_astype_string(self) -> None: pass @@ -290,12 +312,12 @@ class TestConstructors(base.BaseConstructorsTests): # Does not support scalars which are also ExtensionArrays @pytest.mark.skip(reason="Unsupported") - def test_series_constructor_scalar_with_index(self): + def test_series_constructor_scalar_with_index(self) -> None: pass # Tries to construct dtype from string @pytest.mark.skip(reason="Unsupported") - def test_from_dtype(self): + def test_from_dtype(self) -> None: pass @@ -303,22 +325,25 @@ class TestDtype(base.BaseDtypeTests): # Tries to construct dtype from string @pytest.mark.skip(reason="Unsupported") - def test_construct_from_string_own_name(self): + def test_construct_from_string_own_name(self) -> None: pass # Tries to construct dtype from string @pytest.mark.skip(reason="Unsupported") - def test_is_dtype_from_name(self): + def test_is_dtype_from_name(self) -> None: pass # Tries to construct dtype from string @pytest.mark.skip(reason="Unsupported") - def test_eq_with_str(self): + def test_eq_with_str(self) -> None: pass # Tries to construct dtype from string @pytest.mark.skip(reason="Unsupported") - def test_construct_from_string(self, dtype): + def test_construct_from_string( + self, + dtype: ExtensionDtype, + ) -> None: pass @@ -330,22 +355,32 @@ class TestInterface(base.BaseInterfaceTests): # Does not support scalars which are also array_like @pytest.mark.skip(reason="Unsupported") - def test_array_interface(self): + def test_array_interface(self) -> None: pass # We do not implement setitem @pytest.mark.skip(reason="Unsupported") - def test_copy(self, dtype): + def test_copy( + self, + dtype: ExtensionDtype, + ) -> None: pass # We do not implement setitem @pytest.mark.skip(reason="Unsupported") - def test_view(self, dtype): + def test_view( + self, + dtype: ExtensionDtype, + ) -> None: pass # Pending https://github.com/pandas-dev/pandas/issues/38812 resolution @pytest.mark.skip(reason="Bugged") - def test_contains(self, data, data_missing): + def test_contains( + self, + data: ExtensionArray, + data_missing: ExtensionArray, + ) -> None: pass @@ -355,17 +390,27 @@ class TestArithmeticOps(base.BaseArithmeticOpsTests): # Bug introduced by https://github.com/pandas-dev/pandas/pull/37132 @pytest.mark.skip(reason="Unsupported") - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + def test_arith_frame_with_scalar( + self, + data: ExtensionArray, + all_arithmetic_operators: Callable[..., Any], + ) -> None: pass # Does not convert properly a list of FData to a FData @pytest.mark.skip(reason="Unsupported") - def test_arith_series_with_array(self, dtype): + def test_arith_series_with_array( + self, + dtype: ExtensionDtype, + ) -> None: pass # Does not error on operations @pytest.mark.skip(reason="Unsupported") - def test_error(self, dtype): + def test_error( + self, + dtype: ExtensionDtype, + ) -> None: pass @@ -373,17 +418,30 @@ class TestComparisonOps(base.BaseComparisonOpsTests): # Cannot be compared with 0 @pytest.mark.skip(reason="Unsupported") - def test_compare_scalar(self, data, all_compare_operators): + def test_compare_scalar( + self, + data: ExtensionArray, + all_compare_operators: Callable[..., Any], + ) -> None: pass # Not sure how to pass it. Should it be reimplemented? @pytest.mark.skip(reason="Unsupported") - def test_compare_array(self, data, all_compare_operators): + def test_compare_array( + self, + data: ExtensionArray, + all_compare_operators: Callable[..., Any], + ) -> None: pass class TestNumericReduce(base.BaseNumericReduceTests): - def check_reduce(self, s, op_name, skipna): + def check_reduce( + self, + s: FDataGrid, + op_name: str, + skipna: bool, + ) -> None: result = getattr(s, op_name)(skipna=skipna) assert result.n_samples == 1 From 952e5fe5ab0388a21e047c212554498ef5279c01 Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Mon, 12 Jul 2021 12:08:56 +0200 Subject: [PATCH 374/417] bibtex refs --- readthedocs-requirements.txt | 2 +- skfda/exploratory/stats/_stats.py | 7 ++++--- skfda/exploratory/visualization/_boxplot.py | 7 ++++--- skfda/exploratory/visualization/_magnitude_shape_plot.py | 7 ++++--- .../variable_selection/recursive_maxima_hunting.py | 3 ++- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/readthedocs-requirements.txt b/readthedocs-requirements.txt index 382761815..d279be0c5 100644 --- a/readthedocs-requirements.txt +++ b/readthedocs-requirements.txt @@ -14,4 +14,4 @@ multimethod>=1.2 findiff jupyter-sphinx pytest -sphinxcontrib.bibtex \ No newline at end of file +sphinxcontrib-bibtex \ No newline at end of file diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 930b5ddf9..4daf3893e 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -126,8 +126,7 @@ def geometric_median( r"""Compute the geometric median. The sample geometric median is the point that minimizes the :math:`L_1` - norm of the vector of distances to all observations - :footcite:`gervini_2008_estimation`: + norm of the vector of distances to all observations: .. math:: @@ -159,7 +158,9 @@ def geometric_median( :func:`depth_based_median` References: - .. footbibliography:: + .. bibliography:: + + gervini_2008_estimation """ weights = np.full(len(X), 1 / len(X)) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 127908bfd..987a63b8f 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -99,8 +99,7 @@ class Boxplot(FDataBoxplot): functional boxplot are: the envelope of the 50% central region, the median curve,and the maximum non-outlying envelope. In addition, outliers can be detected in a functional boxplot by the 1.5 times the 50% central region - empirical rule, analogous to the rule for classical boxplots - :footcite:`sun+genton_2011_boxplots`. + empirical rule, analogous to the rule for classical boxplots. Args: @@ -242,7 +241,9 @@ class Boxplot(FDataBoxplot): outliers=array([ True, False, False, True])) References: - .. footbibliography:: + .. bibliography:: + + sun+genton_2011_boxplots """ diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 211a84fc6..9ce3f38a9 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -26,8 +26,7 @@ class MagnitudeShapePlot(BasePlot): This plot, which is based on the calculation of the :func:`directional outlyingness ` of each of the samples, serves as a visualization tool for the centrality - of curves. Furthermore, an outlier detection procedure is included - :footcite:`dai+genton_2018_visualization`. + of curves. Furthermore, an outlier detection procedure is included. The norm of the mean of the directional outlyingness (:math:`\lVert \mathbf{MO}\rVert`) is plotted in the x-axis, and the variation of the @@ -154,7 +153,9 @@ class MagnitudeShapePlot(BasePlot): title='MS-Plot') References: - .. footbibliography:: + .. bibliography:: + + dai+genton_2018_visualization """ diff --git a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py index b4e4798ca..3c100e859 100644 --- a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py +++ b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py @@ -522,7 +522,8 @@ class AsymptoticIndependenceTestStop(StoppingCondition): Stop when the selected point is independent from the target. It uses an asymptotic test based on the chi-squared distribution described - in :footcite:`szekely+rizzo_2010_brownian`. The test rejects independence if + in :footcite:`szekely+rizzo_2010_brownian`. The test rejects independence + if .. math:: From 769deac78bf9ecf6cde300ba8e9cfdbe2e4e37af Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 14 Jul 2021 15:16:28 +0200 Subject: [PATCH 375/417] Fix import. --- examples/plot_fpca.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/plot_fpca.py b/examples/plot_fpca.py index 5e9e7cf16..f0e7a3045 100644 --- a/examples/plot_fpca.py +++ b/examples/plot_fpca.py @@ -9,17 +9,16 @@ # License: MIT import matplotlib.pyplot as plt -import numpy as np import skfda from skfda.datasets import fetch_growth -from skfda.exploratory.visualization import plot_fpca_perturbation_graphs +from skfda.exploratory.visualization import FPCAPlot from skfda.preprocessing.dim_reduction.feature_extraction import FPCA from skfda.representation.basis import BSpline, Fourier, Monomial ############################################################################## -# In this example we are going to use functional principal component analysis to -# explore datasets and obtain conclusions about said dataset using this +# In this example we are going to use functional principal component analysis +# to explore datasets and obtain conclusions about said dataset using this # technique. # # First we are going to fetch the Berkeley Growth Study data. This dataset From e71870bdf617d6db0ef34f490d6c467ac46be2d3 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 20 Jul 2021 21:14:20 +0200 Subject: [PATCH 376/417] Move mei to stats. --- setup.cfg | 2 +- skfda/exploratory/stats/__init__.py | 12 +- skfda/exploratory/stats/_stats.py | 44 +++- .../exploratory/visualization/_outliergram.py | 38 +--- tests/test_outliergram.py | 67 ------ tests/test_stats.py | 201 ++++++++++-------- 6 files changed, 172 insertions(+), 192 deletions(-) delete mode 100644 tests/test_outliergram.py diff --git a/setup.cfg b/setup.cfg index 4fa33edcf..fa5d2b311 100644 --- a/setup.cfg +++ b/setup.cfg @@ -102,7 +102,7 @@ per-file-ignores = rst-directives = # These are sorted alphabetically - but that does not matter autosummary,data,currentmodule,deprecated, - glossary,moduleauthor,plot,testcode, + footbibliography,glossary,moduleauthor,plot,testcode, versionadded,versionchanged, rst-roles = diff --git a/skfda/exploratory/stats/__init__.py b/skfda/exploratory/stats/__init__.py index e795b513c..175abf8b3 100644 --- a/skfda/exploratory/stats/__init__.py +++ b/skfda/exploratory/stats/__init__.py @@ -1,2 +1,10 @@ -from ._stats import (mean, var, gmean, cov, - depth_based_median, trim_mean, geometric_median) +from ._stats import ( + cov, + depth_based_median, + geometric_median, + gmean, + mean, + modified_epigraph_index, + trim_mean, + var, +) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 0e56692e2..0c19282a7 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -1,10 +1,13 @@ """Functional data descriptive statistics.""" from builtins import isinstance -from typing import Callable, Optional, TypeVar, Union +from typing import Optional, TypeVar, Union import numpy as np +from scipy import integrate +from scipy.stats import rankdata + from ...misc.metrics import Metric, l2_distance, l2_norm from ...representation import FData, FDataGrid from ..depth import Depth, ModifiedBandDepth @@ -72,6 +75,39 @@ def cov(X: FData) -> FDataGrid: return X.cov() +def modified_epigraph_index(X: FDataGrid) -> np.ndarray: + """ + Calculate the Modified Epigraph Index of a FDataGrid. + + The MEI represents the mean time a curve stays below other curve. + In this case we will calculate the MEI for each curve in relation + with all the other curves of our dataset. + + """ + interval_len = ( + X.domain_range[0][1] + - X.domain_range[0][0] + ) + + # Array containing at each point the number of curves + # are above it. + num_functions_above = rankdata( + -X.data_matrix, + method='max', + axis=0, + ) - 1 + + integrand = integrate.simps( + num_functions_above, + x=X.grid_points[0], + axis=1, + ) + + integrand /= (interval_len * X.n_samples) + + return integrand.flatten() + + def depth_based_median( X: FDataGrid, depth_method: Optional[Depth] = None, @@ -158,7 +194,7 @@ def geometric_median( :func:`depth_based_median` References: - .. bibliography:: + .. footbibliography:: gervini_2008_estimation @@ -186,10 +222,10 @@ def geometric_median( def trim_mean( - X: FDataGrid, + X: F, proportiontocut: float, *, - depth_method: Optional[Depth] = None, + depth_method: Optional[Depth[F]] = None, ) -> FDataGrid: """Compute the trimmed means based on a depth measure. diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index f1fabbb0d..622249490 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -10,14 +10,13 @@ from typing import Optional, Sequence, Union import numpy as np -import scipy.integrate as integrate from matplotlib.artist import Artist from matplotlib.axes import Axes from matplotlib.figure import Figure -from scipy.stats import rankdata from ... import FDataGrid from ..depth._depth import ModifiedBandDepth +from ..stats import modified_epigraph_index from ._baseplot import BasePlot from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata @@ -75,8 +74,8 @@ def __init__( self.fdata = fdata self.depth = ModifiedBandDepth() self.depth.fit(fdata) - self.mbd = self.depth(fdata) - self.mei = self.modified_epigraph_index_list() + self.mbd = self.depth.predict(fdata) + self.mei = modified_epigraph_index(fdata) if len(self.mbd) != len(self.mei): raise ValueError( "The size of mbd and mei should be the same.", @@ -139,37 +138,6 @@ def plot( return self.fig - def modified_epigraph_index_list(self) -> np.ndarray: - """ - Calculate the Modified Epigraph Index of a FData. - - The MEI represents the mean time a curve stays below other curve. - In this case we will calculate the MEI for each curve in relation - with all the other curves of our dataset. - """ - interval_len = ( - self.fdata.domain_range[0][1] - - self.fdata.domain_range[0][0] - ) - - # Array containing at each point the number of curves - # are above it. - num_functions_above = rankdata( - -self.fdata.data_matrix, - method='max', - axis=0, - ) - 1 - - integrand = integrate.simps( - num_functions_above, - x=self.fdata.grid_points[0], - axis=1, - ) - - integrand /= (interval_len * self.fdata.n_samples) - - return integrand.flatten() - def _compute_distances(self) -> np.ndarray: """ Calculate the distances of each point towards the parabola. diff --git a/tests/test_outliergram.py b/tests/test_outliergram.py deleted file mode 100644 index d57d5a9ec..000000000 --- a/tests/test_outliergram.py +++ /dev/null @@ -1,67 +0,0 @@ -"""Outliergram testing module. - -Module containing the test coverage of outliergram module. -""" - -import unittest - -import numpy as np - -from skfda.datasets import fetch_weather -from skfda.exploratory.visualization import Outliergram - - -class TestOutliergram(unittest.TestCase): - """ - Outliergram testing class. - - Class containing the test coverage of outliergram module. - """ - - def test_outliergram(self) -> None: - """ - Outliergram testing method. - - Method containing the test coverage of outliergram module. - """ - fd = fetch_weather()["data"] - fd_temperatures = fd.coordinates[0] - outliergram = Outliergram( - fd_temperatures, - ) - # noqa: WPS317 - np.testing.assert_allclose( - outliergram.mei, - np.array( - [ # noqa: WPS317 - 0.46272668, 0.27840835, 0.36268754, 0.27908676, 0.36112198, - 0.30802348, 0.82969341, 0.45904762, 0.53907371, 0.38799739, - 0.41283757, 0.20420091, 0.23564253, 0.14737117, 0.14379648, - 0.54035225, 0.43459883, 0.6378604, 0.86964123, 0.4421396, - 0.58906719, 0.75561644, 0.54982387, 0.46095238, 0.09969993, - 0.13166341, 0.18776256, 0.4831833, 0.36816699, 0.72962818, - 0.80313112, 0.79934768, 0.90643183, 0.90139596, 0.9685062, - ], - ), - rtol=1e-5, - ) - - np.testing.assert_array_almost_equal( - outliergram.mbd, - np.array( - [ # noqa: WPS317 - 0.40685162, 0.42460381, 0.43088139, 0.35833775, 0.47847435, - 0.46825985, 0.29228349, 0.51299183, 0.5178558, 0.49868539, - 0.52408733, 0.34457312, 0.36996431, 0.2973209, 0.29107555, - 0.53304017, 0.44185565, 0.46346341, 0.23620736, 0.47652354, - 0.4814397, 0.38233529, 0.51173171, 0.51164882, 0.21551437, - 0.23084916, 0.25650589, 0.46760447, 0.30787767, 0.40929051, - 0.31801082, 0.3234519, 0.17015617, 0.17977514, 0.05769541, - ], - ), - ) - - -if __name__ == '__main__': - print() # noqa: WPS421 - unittest.main() diff --git a/tests/test_stats.py b/tests/test_stats.py index 234aa909c..7c0d8e465 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -1,111 +1,146 @@ -import skfda -from skfda.exploratory.stats import geometric_median import unittest + import numpy as np +import skfda +from skfda.exploratory.stats import geometric_median, modified_epigraph_index + class TestGeometricMedian(unittest.TestCase): + """Test the behavior of the geometric median.""" - def test_R_comparison(self): + def test_R_comparison(self) -> None: """ - Compare the results obtained using a real-world dataset with those in - R (Gmedian package). + Compare the results with real-world dataset with those in R. - """ + The R package used is the Gmedian package. + """ X, _ = skfda.datasets.fetch_tecator(return_X_y=True) - r_res = [2.74083, 2.742715, 2.744627, 2.74659, 2.748656, - 2.750879, 2.753307, 2.755984, 2.758927, 2.762182, - 2.765724, 2.76957, 2.773756, 2.778333, 2.783346, - 2.788818, 2.794758, 2.801225, 2.808233, 2.815714, - 2.82351, 2.831355, 2.838997, 2.846298, 2.853295, - 2.860186, 2.867332, 2.875107, 2.883778, 2.893419, - 2.903851, 2.914717, 2.925698, 2.936765, 2.948293, - 2.960908, 2.97526, 2.991206, 3.008222, 3.02552, - 3.042172, 3.057356, 3.070666, 3.082351, 3.093396, - 3.105338, 3.119946, 3.139307, 3.164418, 3.196014, - 3.234248, 3.278306, 3.326051, 3.374015, 3.418148, - 3.455051, 3.483095, 3.502789, 3.515961, 3.524557, - 3.530135, 3.53364, 3.535369, 3.535305, 3.533326, - 3.529343, 3.523357, 3.51548, 3.5059, 3.494807, - 3.482358, 3.468695, 3.453939, 3.438202, 3.421574, - 3.404169, 3.386148, 3.367751, 3.349166, 3.330441, - 3.311532, 3.292318, 3.272683, 3.252482, 3.23157, - 3.2099, 3.187632, 3.165129, 3.14282, 3.121008, - 3.099793, 3.079092, 3.058772, 3.038755, 3.019038, - 2.99963, 2.980476, 2.961467, 2.94252, 2.923682] + r_res = [ # noqa: WPS317 + 2.74083, 2.742715, 2.744627, 2.74659, 2.748656, + 2.750879, 2.753307, 2.755984, 2.758927, 2.762182, + 2.765724, 2.76957, 2.773756, 2.778333, 2.783346, + 2.788818, 2.794758, 2.801225, 2.808233, 2.815714, + 2.82351, 2.831355, 2.838997, 2.846298, 2.853295, + 2.860186, 2.867332, 2.875107, 2.883778, 2.893419, + 2.903851, 2.914717, 2.925698, 2.936765, 2.948293, + 2.960908, 2.97526, 2.991206, 3.008222, 3.02552, + 3.042172, 3.057356, 3.070666, 3.082351, 3.093396, + 3.105338, 3.119946, 3.139307, 3.164418, 3.196014, + 3.234248, 3.278306, 3.326051, 3.374015, 3.418148, + 3.455051, 3.483095, 3.502789, 3.515961, 3.524557, + 3.530135, 3.53364, 3.535369, 3.535305, 3.533326, + 3.529343, 3.523357, 3.51548, 3.5059, 3.494807, + 3.482358, 3.468695, 3.453939, 3.438202, 3.421574, + 3.404169, 3.386148, 3.367751, 3.349166, 3.330441, + 3.311532, 3.292318, 3.272683, 3.252482, 3.23157, + 3.2099, 3.187632, 3.165129, 3.14282, 3.121008, + 3.099793, 3.079092, 3.058772, 3.038755, 3.019038, + 2.99963, 2.980476, 2.961467, 2.94252, 2.923682, + ] median_multivariate = geometric_median(X.data_matrix[..., 0]) median = geometric_median(X) np.testing.assert_allclose( - median.data_matrix[0, :, 0], median_multivariate, rtol=1e-4) + median.data_matrix[0, :, 0], + median_multivariate, + rtol=1e-4, + ) np.testing.assert_allclose(median_multivariate, r_res, rtol=1e-6) - def test_big(self): - + def test_big(self) -> None: + """Test a bigger dataset.""" X, _ = skfda.datasets.fetch_phoneme(return_X_y=True) - res = np.array( - [10.87814495, 12.10539654, 15.19841961, 16.29929599, 15.52206033, - 15.35123923, 16.44119775, 16.92255038, 16.70263134, 16.62235371, - 16.76616863, 16.80691414, 16.67460045, 16.64628944, 16.60898231, - 16.64735698, 16.7749517, 16.84533289, 16.8134475, 16.69540395, - 16.56083649, 16.3716527, 16.13744993, 15.95246457, 15.78934047, - 15.64383354, 15.55120344, 15.4363593, 15.36998848, 15.35300094, - 15.23606121, 15.16001392, 15.07326127, 14.92863818, 14.77405828, - 14.63772985, 14.4496911, 14.22752646, 14.07162908, 13.90989422, - 13.68979176, 13.53664058, 13.45465055, 13.40192835, 13.39111557, - 13.32592256, 13.26068118, 13.2314264, 13.29364741, 13.30700552, - 13.30579737, 13.35277966, 13.36572257, 13.45244228, 13.50615096, - 13.54872786, 13.65412519, 13.74737364, 13.79203753, 13.87827636, - 13.97728725, 14.06989886, 14.09950082, 14.13697733, 14.18414727, - 14.1914785, 14.17973283, 14.19655855, 14.20551814, 14.23059727, - 14.23195262, 14.21091905, 14.22234481, 14.17687285, 14.1732165, - 14.13488535, 14.11564007, 14.0296303, 13.99540104, 13.9383672, - 13.85056848, 13.73195466, 13.66840843, 13.64387247, 13.52972191, - 13.43092629, 13.37470213, 13.31847522, 13.21687255, 13.15170299, - 13.15372387, 13.1059763, 13.09445287, 13.09041529, 13.11710243, - 13.14386673, 13.22359963, 13.27466107, 13.31319886, 13.34650331, - 13.45574711, 13.50415149, 13.53131719, 13.58150982, 13.65962685, - 13.63699657, 13.61248827, 13.60584663, 13.61072488, 13.54361538, - 13.48274699, 13.39589291, 13.33557961, 13.27237689, 13.15525989, - 13.0201153, 12.92930916, 12.81669859, 12.67134652, 12.58933066, - 12.48431933, 12.35395795, 12.23358723, 12.1604567, 12.02565859, - 11.92888167, 11.81510299, 11.74115444, 11.62986853, 11.51119027, - 11.41922977, 11.32781545, 11.23709771, 11.1553455, 11.06238304, - 10.97654662, 10.89217886, 10.837813, 10.76259305, 10.74123747, - 10.63519376, 10.58236217, 10.50270085, 10.43664285, 10.36198002, - 10.29128265, 10.27590625, 10.21337539, 10.14368936, 10.11450364, - 10.12276595, 10.0811153, 10.03603621, 10.00381717, 9.94299925, - 9.91830306, 9.90583771, 9.87254886, 9.84294024, 9.85472138, - 9.82047669, 9.8222713, 9.82272407, 9.78949033, 9.78038714, - 9.78720474, 9.81027704, 9.77565195, 9.80675363, 9.77084177, - 9.75289156, 9.75404079, 9.72316608, 9.7325137, 9.70562447, - 9.74528393, 9.70416261, 9.67298074, 9.6888954, 9.6765554, - 9.62346413, 9.65547732, 9.59897653, 9.64655533, 9.57719677, - 9.52660027, 9.54591084, 9.5389796, 9.53577489, 9.50843709, - 9.4889757, 9.46656255, 9.46875593, 9.48179707, 9.44946697, - 9.4798432, 9.46992684, 9.47672347, 9.50141949, 9.45946886, - 9.48043777, 9.49121177, 9.48771047, 9.51135703, 9.5309805, - 9.52914508, 9.54184114, 9.49902134, 9.5184432, 9.48091512, - 9.4951481, 9.51101019, 9.49815911, 9.48404411, 9.45754481, - 9.43717866, 9.38444679, 9.39625792, 9.38149371, 9.40279467, - 9.37378114, 9.31453485, 9.29494997, 9.30214391, 9.24839539, - 9.25834154, 9.24655115, 9.25298293, 9.22182526, 9.18142295, - 9.16692765, 9.1253291, 9.17396507, 9.11561516, 9.13792622, - 9.14151424, 9.10477211, 9.13132802, 9.10557653, 9.10442614, - 9.09571574, 9.13986784, 9.08555206, 9.11363748, 9.14300157, - 9.13020252, 9.15901185, 9.15329127, 9.19107506, 9.19507704, - 9.16421159, 9.18975673, 9.14399055, 9.15376256, 9.17409705, - 8.50360777]) + res = np.array([ # noqa: WPS317 + 10.87814495, 12.10539654, 15.19841961, 16.29929599, 15.52206033, + 15.35123923, 16.44119775, 16.92255038, 16.70263134, 16.62235371, + 16.76616863, 16.80691414, 16.67460045, 16.64628944, 16.60898231, + 16.64735698, 16.7749517, 16.84533289, 16.8134475, 16.69540395, + 16.56083649, 16.3716527, 16.13744993, 15.95246457, 15.78934047, + 15.64383354, 15.55120344, 15.4363593, 15.36998848, 15.35300094, + 15.23606121, 15.16001392, 15.07326127, 14.92863818, 14.77405828, + 14.63772985, 14.4496911, 14.22752646, 14.07162908, 13.90989422, + 13.68979176, 13.53664058, 13.45465055, 13.40192835, 13.39111557, + 13.32592256, 13.26068118, 13.2314264, 13.29364741, 13.30700552, + 13.30579737, 13.35277966, 13.36572257, 13.45244228, 13.50615096, + 13.54872786, 13.65412519, 13.74737364, 13.79203753, 13.87827636, + 13.97728725, 14.06989886, 14.09950082, 14.13697733, 14.18414727, + 14.1914785, 14.17973283, 14.19655855, 14.20551814, 14.23059727, + 14.23195262, 14.21091905, 14.22234481, 14.17687285, 14.1732165, + 14.13488535, 14.11564007, 14.0296303, 13.99540104, 13.9383672, + 13.85056848, 13.73195466, 13.66840843, 13.64387247, 13.52972191, + 13.43092629, 13.37470213, 13.31847522, 13.21687255, 13.15170299, + 13.15372387, 13.1059763, 13.09445287, 13.09041529, 13.11710243, + 13.14386673, 13.22359963, 13.27466107, 13.31319886, 13.34650331, + 13.45574711, 13.50415149, 13.53131719, 13.58150982, 13.65962685, + 13.63699657, 13.61248827, 13.60584663, 13.61072488, 13.54361538, + 13.48274699, 13.39589291, 13.33557961, 13.27237689, 13.15525989, + 13.0201153, 12.92930916, 12.81669859, 12.67134652, 12.58933066, + 12.48431933, 12.35395795, 12.23358723, 12.1604567, 12.02565859, + 11.92888167, 11.81510299, 11.74115444, 11.62986853, 11.51119027, + 11.41922977, 11.32781545, 11.23709771, 11.1553455, 11.06238304, + 10.97654662, 10.89217886, 10.837813, 10.76259305, 10.74123747, + 10.63519376, 10.58236217, 10.50270085, 10.43664285, 10.36198002, + 10.29128265, 10.27590625, 10.21337539, 10.14368936, 10.11450364, + 10.12276595, 10.0811153, 10.03603621, 10.00381717, 9.94299925, + 9.91830306, 9.90583771, 9.87254886, 9.84294024, 9.85472138, + 9.82047669, 9.8222713, 9.82272407, 9.78949033, 9.78038714, + 9.78720474, 9.81027704, 9.77565195, 9.80675363, 9.77084177, + 9.75289156, 9.75404079, 9.72316608, 9.7325137, 9.70562447, + 9.74528393, 9.70416261, 9.67298074, 9.6888954, 9.6765554, + 9.62346413, 9.65547732, 9.59897653, 9.64655533, 9.57719677, + 9.52660027, 9.54591084, 9.5389796, 9.53577489, 9.50843709, + 9.4889757, 9.46656255, 9.46875593, 9.48179707, 9.44946697, + 9.4798432, 9.46992684, 9.47672347, 9.50141949, 9.45946886, + 9.48043777, 9.49121177, 9.48771047, 9.51135703, 9.5309805, + 9.52914508, 9.54184114, 9.49902134, 9.5184432, 9.48091512, + 9.4951481, 9.51101019, 9.49815911, 9.48404411, 9.45754481, + 9.43717866, 9.38444679, 9.39625792, 9.38149371, 9.40279467, + 9.37378114, 9.31453485, 9.29494997, 9.30214391, 9.24839539, + 9.25834154, 9.24655115, 9.25298293, 9.22182526, 9.18142295, + 9.16692765, 9.1253291, 9.17396507, 9.11561516, 9.13792622, + 9.14151424, 9.10477211, 9.13132802, 9.10557653, 9.10442614, + 9.09571574, 9.13986784, 9.08555206, 9.11363748, 9.14300157, + 9.13020252, 9.15901185, 9.15329127, 9.19107506, 9.19507704, + 9.16421159, 9.18975673, 9.14399055, 9.15376256, 9.17409705, + 8.50360777, + ]) median_multivariate = geometric_median(X.data_matrix[..., 0]) median = geometric_median(X) np.testing.assert_allclose( - median.data_matrix[0, :, 0], median_multivariate, rtol=1e-2) + median.data_matrix[0, :, 0], + median_multivariate, + rtol=1e-2, + ) np.testing.assert_allclose(median_multivariate, res, rtol=1e-6) + + +class TestMEI(unittest.TestCase): + """Test modified epigraph index.""" + + def test_mei(self) -> None: + """Test modified epigraph index.""" + fd, _ = skfda.datasets.fetch_weather(return_X_y=True) + fd_temperatures = fd.coordinates[0] + mei = modified_epigraph_index(fd_temperatures) + np.testing.assert_allclose( + mei, + np.array([ # noqa: WPS317 + 0.46272668, 0.27840835, 0.36268754, 0.27908676, 0.36112198, + 0.30802348, 0.82969341, 0.45904762, 0.53907371, 0.38799739, + 0.41283757, 0.20420091, 0.23564253, 0.14737117, 0.14379648, + 0.54035225, 0.43459883, 0.6378604, 0.86964123, 0.4421396, + 0.58906719, 0.75561644, 0.54982387, 0.46095238, 0.09969993, + 0.13166341, 0.18776256, 0.4831833, 0.36816699, 0.72962818, + 0.80313112, 0.79934768, 0.90643183, 0.90139596, 0.9685062, + ]), + rtol=1e-5, + ) From 5ca158f25b4dde94f5bbe7652326729947e094ea Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 21 Jul 2021 00:07:40 +0200 Subject: [PATCH 377/417] Add outliergram outlier detector. --- skfda/exploratory/depth/_depth.py | 11 ++- skfda/exploratory/outliers/__init__.py | 1 + skfda/exploratory/outliers/_outliergram.py | 82 +++++++++++++++++++ .../exploratory/visualization/_outliergram.py | 72 +++++----------- 4 files changed, 114 insertions(+), 52 deletions(-) create mode 100644 skfda/exploratory/outliers/_outliergram.py diff --git a/skfda/exploratory/depth/_depth.py b/skfda/exploratory/depth/_depth.py index 30947e36f..62950b765 100644 --- a/skfda/exploratory/depth/_depth.py +++ b/skfda/exploratory/depth/_depth.py @@ -11,6 +11,7 @@ from typing import Optional import numpy as np + import scipy.integrate from ... import FDataGrid @@ -98,11 +99,17 @@ def predict(self, X: FDataGrid) -> np.ndarray: # noqa: D102 @property # noqa: WPS125 def max(self) -> float: # noqa: WPS125 - return self.multivariate_depth_.max + if self.multivariate_depth is None: + return 1 + + return self.multivariate_depth.max @property # noqa: WPS125 def min(self) -> float: # noqa: WPS125 - return self.multivariate_depth_.min + if self.multivariate_depth is None: + return 1 / 2 + + return self.multivariate_depth.min class ModifiedBandDepth(IntegratedDepth): diff --git a/skfda/exploratory/outliers/__init__.py b/skfda/exploratory/outliers/__init__.py index 862e66fd9..760c34b32 100644 --- a/skfda/exploratory/outliers/__init__.py +++ b/skfda/exploratory/outliers/__init__.py @@ -3,4 +3,5 @@ directional_outlyingness_stats, ) from ._iqr import IQROutlierDetector +from ._outliergram import OutliergramOutlierDetector from .neighbors_outlier import LocalOutlierFactor diff --git a/skfda/exploratory/outliers/_outliergram.py b/skfda/exploratory/outliers/_outliergram.py new file mode 100644 index 000000000..f8544469a --- /dev/null +++ b/skfda/exploratory/outliers/_outliergram.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import numpy as np +from sklearn.base import BaseEstimator, OutlierMixin + +from ...representation import FDataGrid +from ..depth._depth import ModifiedBandDepth +from ..stats import modified_epigraph_index + + +class OutliergramOutlierDetector( + BaseEstimator, # type: ignore + OutlierMixin, # type: ignore +): + r"""Outlier detector using the relation between MEI and MBD. + + Detects as outliers functions that have one or more points outside + ``factor`` times the interquartile range plus or minus the central + envelope, given a functional depth measure. This corresponds to the + points selected as outliers by the functional boxplot. + + Parameters: + depth_method (Callable): The functional depth measure used. + factor (float): The number of times the IQR is multiplied. + + Example: + Function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`. + + >>> import skfda + >>> data_matrix = [[1, 1, 2, 3, 2.5, 2], + ... [0.5, 0.5, 1, 2, 1.5, 1], + ... [-1, -1, -0.5, 1, 1, 0.5], + ... [-0.5, -0.5, -0.5, -1, -1, -1]] + >>> grid_points = [0, 2, 4, 6, 8, 10] + >>> fd = skfda.FDataGrid(data_matrix, grid_points) + >>> out_detector = IQROutlierDetector() + >>> out_detector.fit_predict(fd) + array([-1, 1, 1, -1]) + + """ + + def __init__(self, *, factor: float = 1.5) -> None: + self.factor = factor + + def _compute_parabola(self, X: FDataGrid) -> np.ndarray: + """Compute the parabola in which pairs (mei, mbd) should lie.""" + a_0 = -2 / (X.n_samples * (X.n_samples - 1)) + a_1 = (2 * (X.n_samples + 1)) / (X.n_samples - 1) + a_2 = a_0 + + return ( + a_0 + a_1 * self.mei_ + + X.n_samples**2 * a_2 * self.mei_**2 + ) + + def _compute_maximum_inlier_distance(self, distances: np.ndarray) -> float: + """Compute the distance above which data are considered outliers.""" + first_quartile = np.percentile(distances, 25) # noqa: WPS432 + third_quartile = np.percentile(distances, 75) # noqa: WPS432 + iqr = third_quartile - first_quartile + return third_quartile + self.factor * iqr + + def fit(self, X: FDataGrid, y: None = None) -> OutliergramOutlierDetector: + self.mbd_ = ModifiedBandDepth()(X) + self.mei_ = modified_epigraph_index(X) + self.parabola_ = self._compute_parabola(X) + self.distances_ = self.parabola_ - self.mbd_ + self.max_inlier_distance_ = self._compute_maximum_inlier_distance( + self.distances_, + ) + + return self + + def fit_predict(self, X: FDataGrid, y: None = None) -> np.ndarray: + self.fit(X, y) + + outliers = self.distances_ > self.max_inlier_distance_ + + # Predict as scikit-learn outlier detectors + predicted = ~outliers + outliers * -1 + + return predicted diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 622249490..228f45014 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -16,6 +16,7 @@ from ... import FDataGrid from ..depth._depth import ModifiedBandDepth +from ..outliers import OutliergramOutlierDetector from ..stats import modified_epigraph_index from ._baseplot import BasePlot from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata @@ -72,21 +73,11 @@ def __init__( ) -> None: BasePlot.__init__(self) self.fdata = fdata - self.depth = ModifiedBandDepth() - self.depth.fit(fdata) - self.mbd = self.depth.predict(fdata) - self.mei = modified_epigraph_index(fdata) - if len(self.mbd) != len(self.mei): - raise ValueError( - "The size of mbd and mei should be the same.", - ) - distances, parable = self._compute_distances() - self.distances = distances - indices = np.argsort(self.mei) - self.parable = parable[indices] - self.mei_ordered = self.mei[indices] - self._compute_outliergram() - + self.outlier_detector = OutliergramOutlierDetector() + self.outlier_detector.fit(fdata) + indices = np.argsort(self.outlier_detector.mei_) + self._parabola_ordered = self.outlier_detector.parabola_[indices] + self._mei_ordered = self.outlier_detector.mei_[indices] self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) def plot( @@ -106,21 +97,28 @@ def plot( self.artists = np.zeros(self.n_samples(), dtype=Artist) self.axScatter = self.axes[0] - for i in range(self.mei.size): + for i, (mei, mbd) in enumerate( + zip(self.outlier_detector.mei_, self.outlier_detector.mbd_), + ): self.artists[i] = self.axScatter.scatter( - self.mei[i], - self.mbd[i], + mei, + mbd, picker=2, ) self.axScatter.plot( - self.mei_ordered, - self.parable, + self._mei_ordered, + self._parabola_ordered, + ) + + shifted_parabola = ( + self._parabola_ordered + - self.outlier_detector.max_inlier_distance_ ) self.axScatter.plot( - self.mei_ordered, - self.shifted_parable, + self._mei_ordered, + shifted_parabola, linestyle='dashed', ) @@ -132,38 +130,12 @@ def plot( self.axScatter.set_ylabel("MBD") self.axScatter.set_xlim([0, 1]) self.axScatter.set_ylim([ - self.depth.min, - self.depth.max, + 0, # Minimum MBD + 1, # Maximum MBD ]) return self.fig - def _compute_distances(self) -> np.ndarray: - """ - Calculate the distances of each point towards the parabola. - - The distances can be calculated with function: - d_i = a_0 + a_1* mei_i + n^2* a_2* mei_i^2 - mb_i. - """ - a_0 = -2 / (self.n_samples() * (self.n_samples() - 1)) - a_1 = (2 * (self.n_samples() + 1)) / (self.n_samples() - 1) - a_2 = a_0 - - parable = ( - a_0 + a_1 * self.mei - + pow(self.n_samples(), 2) * a_2 * pow(self.mei, 2) - ) - distances = parable - self.mbd - - return distances, parable - - def _compute_outliergram(self) -> None: - """Compute the parabola under which the outliers lie.""" - first_quartile = np.percentile(self.distances, 25) # noqa: WPS432 - third_quartile = np.percentile(self.distances, 75) # noqa: WPS432 - iqr = third_quartile - first_quartile - self.shifted_parable = self.parable - (third_quartile + iqr) - def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples From 7f124f68f6c0af9bba3db164df5b8bdf5ffa9ed4 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 21 Jul 2021 01:33:05 +0200 Subject: [PATCH 378/417] Fix docstring. --- skfda/exploratory/outliers/_outliergram.py | 26 +++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/skfda/exploratory/outliers/_outliergram.py b/skfda/exploratory/outliers/_outliergram.py index f8544469a..a8dc80275 100644 --- a/skfda/exploratory/outliers/_outliergram.py +++ b/skfda/exploratory/outliers/_outliergram.py @@ -14,13 +14,12 @@ class OutliergramOutlierDetector( ): r"""Outlier detector using the relation between MEI and MBD. - Detects as outliers functions that have one or more points outside - ``factor`` times the interquartile range plus or minus the central - envelope, given a functional depth measure. This corresponds to the - points selected as outliers by the functional boxplot. + Detects as outliers functions that have the vertical distance to the + outliergram parabola greater than ``factor`` times the interquartile + range (IQR) of those distances plus the third quartile. This corresponds + to the points selected as outliers by the outliergram. Parameters: - depth_method (Callable): The functional depth measure used. factor (float): The number of times the IQR is multiplied. Example: @@ -28,14 +27,25 @@ class OutliergramOutlierDetector( >>> import skfda >>> data_matrix = [[1, 1, 2, 3, 2.5, 2], + ... [0.5, 1, -1, 3, 2, 1], ... [0.5, 0.5, 1, 2, 1.5, 1], - ... [-1, -1, -0.5, 1, 1, 0.5], + ... [-1, -1, -0.5, 5, 5, 0.5], ... [-0.5, -0.5, -0.5, -1, -1, -1]] + >>> data_matrix = [[0, 0, 0, 0, 0, 0], + ... [1, 1, 1, 1, 1, 1], + ... [2, 2, 2, 2, 2, 2], + ... [3, 3, 3, 3, 3, 3], + ... [9, 9, 9, -1, -1, -1], + ... [4, 4, 4, 4, 4, 4], + ... [5, 5, 5, 5, 5, 5], + ... [6, 6, 6, 6, 6, 6], + ... [7, 7, 7, 7, 7, 7], + ... [8, 8, 8, 8, 8, 8]] >>> grid_points = [0, 2, 4, 6, 8, 10] >>> fd = skfda.FDataGrid(data_matrix, grid_points) - >>> out_detector = IQROutlierDetector() + >>> out_detector = OutliergramOutlierDetector() >>> out_detector.fit_predict(fd) - array([-1, 1, 1, -1]) + array([ 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]) """ From 972c6622e403a720f30fbddf8be0faaf8a747035 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 22 Jul 2021 13:06:52 +0200 Subject: [PATCH 379/417] Typing IQROutlierDetector --- skfda/exploratory/outliers/_envelopes.py | 48 ++++++++++++++++-------- skfda/exploratory/outliers/_iqr.py | 36 ++++++++++++++---- 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/skfda/exploratory/outliers/_envelopes.py b/skfda/exploratory/outliers/_envelopes.py index 68c691618..d233579fe 100644 --- a/skfda/exploratory/outliers/_envelopes.py +++ b/skfda/exploratory/outliers/_envelopes.py @@ -1,24 +1,35 @@ +from __future__ import annotations + import math +from typing import Tuple import numpy as np +from ...representation import FDataGrid + -def _compute_region(fdatagrid, - indices_descending_depth, - prob): +def _compute_region( + fdatagrid: FDataGrid, + indices_descending_depth: np.ndarray, + prob: float, +) -> FDataGrid: indices_samples = indices_descending_depth[ - :math.ceil(fdatagrid.n_samples * prob)] + :math.ceil(fdatagrid.n_samples * prob) + ] return fdatagrid[indices_samples] -def _compute_envelope(region): +def _compute_envelope(region: FDataGrid) -> Tuple[np.ndarray, np.ndarray]: max_envelope = np.max(region.data_matrix, axis=0) min_envelope = np.min(region.data_matrix, axis=0) return min_envelope, max_envelope -def _predict_outliers(fdatagrid, non_outlying_threshold): +def _predict_outliers( + fdatagrid: FDataGrid, + non_outlying_threshold: Tuple[np.ndarray, np.ndarray], +) -> np.ndarray: # A functional datum is considered an outlier if it has ANY point # in ANY dimension outside the envelope for inliers @@ -26,19 +37,26 @@ def _predict_outliers(fdatagrid, non_outlying_threshold): or_axes = tuple(i for i in range(1, fdatagrid.data_matrix.ndim)) - below_outliers = np.any(fdatagrid.data_matrix < - min_threshold, axis=or_axes) - above_outliers = np.any(fdatagrid.data_matrix > - max_threshold, axis=or_axes) + below_outliers = np.any( + fdatagrid.data_matrix < min_threshold, + axis=or_axes, + ) + above_outliers = np.any( + fdatagrid.data_matrix > max_threshold, + axis=or_axes, + ) return below_outliers | above_outliers -def _non_outlying_threshold(central_envelope, factor): +def _non_outlying_threshold( + central_envelope: Tuple[np.ndarray, np.ndarray], + factor: float, +) -> Tuple[np.ndarray, np.ndarray]: iqr = central_envelope[1] - central_envelope[0] non_outlying_threshold_max = central_envelope[1] + iqr * factor non_outlying_threshold_min = central_envelope[0] - iqr * factor - non_outlying_threshold = (non_outlying_threshold_min, - non_outlying_threshold_max) - - return non_outlying_threshold + return ( + non_outlying_threshold_min, + non_outlying_threshold_max, + ) diff --git a/skfda/exploratory/outliers/_iqr.py b/skfda/exploratory/outliers/_iqr.py index 98e74f43a..f54773c29 100644 --- a/skfda/exploratory/outliers/_iqr.py +++ b/skfda/exploratory/outliers/_iqr.py @@ -1,10 +1,19 @@ +from __future__ import annotations + +from typing import Optional + +import numpy as np from sklearn.base import BaseEstimator, OutlierMixin +from ...representation import FDataGrid +from ..depth import Depth, ModifiedBandDepth from . import _envelopes -from ..depth import ModifiedBandDepth -class IQROutlierDetector(BaseEstimator, OutlierMixin): +class IQROutlierDetector( + BaseEstimator, # type: ignore + OutlierMixin, # type: ignore +): r"""Outlier detector using the interquartile range. Detects as outliers functions that have one or more points outside @@ -32,12 +41,23 @@ class IQROutlierDetector(BaseEstimator, OutlierMixin): """ - def __init__(self, *, depth_method=ModifiedBandDepth(), factor=1.5): + def __init__( + self, + *, + depth_method: Optional[Depth[FDataGrid]] = None, + factor: float = 1.5, + ) -> None: self.depth_method = depth_method self.factor = factor - def fit(self, X, y=None): - depth = self.depth_method(X) + def fit(self, X: FDataGrid, y: None = None) -> IQROutlierDetector: + + depth_method = ( + self.depth_method + if self.depth_method is not None + else ModifiedBandDepth() + ) + depth = depth_method(X) indices_descending_depth = (-depth).argsort(axis=0) # Central region and envelope must be computed for outlier detection @@ -51,9 +71,11 @@ def fit(self, X, y=None): return self - def predict(self, X): + def predict(self, X: FDataGrid) -> np.ndarray: outliers = _envelopes._predict_outliers( - X, self.non_outlying_threshold_) + X, + self.non_outlying_threshold_, + ) # Predict as scikit-learn outlier detectors predicted = ~outliers + outliers * -1 From 1f6b14d16575e9c78a489d738a4629f53b15a430 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 22 Jul 2021 17:53:33 +0200 Subject: [PATCH 380/417] Add factor parameter to the outliergram. --- skfda/exploratory/visualization/_outliergram.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 228f45014..b4f411a2e 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -69,11 +69,13 @@ def __init__( axes: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, + factor: float = 1.5, **kwargs, ) -> None: BasePlot.__init__(self) self.fdata = fdata - self.outlier_detector = OutliergramOutlierDetector() + self.factor = factor + self.outlier_detector = OutliergramOutlierDetector(factor=factor) self.outlier_detector.fit(fdata) indices = np.argsort(self.outlier_detector.mei_) self._parabola_ordered = self.outlier_detector.parabola_[indices] From 16308865d2847f42ba58e398af327325a49fe0fd Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 13:15:09 +0200 Subject: [PATCH 381/417] Add tests for historical linear regression. --- .../ml/regression/_historical_linear_model.py | 25 ++++-- tests/test_regression.py | 84 +++++++++++++++++-- 2 files changed, 96 insertions(+), 13 deletions(-) diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index 40e4eb30c..77e70d9b9 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -4,11 +4,10 @@ from typing import Tuple import numpy as np +import scipy.integrate from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.validation import check_is_fitted -import scipy.integrate - from ..._utils import _cartesian_product, _pairwise_symmetric from ...representation import FDataBasis, FDataGrid from ...representation.basis import Basis, FiniteElement @@ -207,7 +206,7 @@ def _create_fem_basis( return FiniteElement( vertices=final_points, cells=triangles, - domain_range=(start, stop), + domain_range=((start, stop),) * 2, ) @@ -324,9 +323,10 @@ def __init__( def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: X_centered = X - X.mean() if self.fit_intercept else X + y_centered = y - y.mean() if self.fit_intercept else y - self._pred_points = y.grid_points[0] - self._pred_domain_range = y.domain_range[0] + self._pred_points = y_centered.grid_points[0] + self._pred_domain_range = y_centered.domain_range[0] self._basis = _create_fem_basis( start=X_centered.domain_range[0][0], @@ -342,12 +342,21 @@ def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: ) design_matrix = design_matrix.reshape(-1, design_matrix.shape[-1]) - self.discretized_coef_ = np.linalg.lstsq( + self._coef_coefs = np.linalg.lstsq( design_matrix, - y.data_matrix[:, ..., 0].ravel(), + y_centered.data_matrix[:, ..., 0].ravel(), rcond=None, )[0] + self.basis_coef_ = FDataBasis( + basis=self._basis, + coefficients=self._coef_coefs, + ) + + self.coef_ = self.basis_coef_.to_grid( + grid_points=[X.grid_points[0]] * 2, + ) + if self.fit_intercept: self.intercept_ = y.mean() - self._predict_no_intercept(X.mean()) else: @@ -359,7 +368,7 @@ def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: def _prediction_from_matrix(self, design_matrix: np.ndarray) -> FDataGrid: - points = (design_matrix @ self.discretized_coef_).reshape( + points = (design_matrix @ self._coef_coefs).reshape( -1, len(self._pred_points), ) diff --git a/tests/test_regression.py b/tests/test_regression.py index edd661582..a23c4f45e 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -1,11 +1,15 @@ -from skfda.misc.operators import LinearDifferentialOperator -from skfda.misc.regularization import TikhonovRegularization -from skfda.ml.regression import LinearRegression -from skfda.representation.basis import (FDataBasis, Monomial, - Fourier, BSpline) import unittest import numpy as np +from scipy.integrate import cumtrapz + +from skfda.datasets import make_gaussian, make_gaussian_process +from skfda.misc.covariances import Gaussian +from skfda.misc.operators import LinearDifferentialOperator +from skfda.misc.regularization import TikhonovRegularization +from skfda.ml.regression import HistoricalLinearRegression, LinearRegression +from skfda.representation.basis import BSpline, FDataBasis, Fourier, Monomial +from skfda.representation.grid import FDataGrid class TestScalarLinearRegression(unittest.TestCase): @@ -316,6 +320,76 @@ def test_error_weights_negative(self): scalar.fit([x_fd], y, weights) +class TestHistoricalLinearRegression(unittest.TestCase): + """Tests for historical linear regression.""" + + def setUp(self) -> None: + """Generate data according to the model.""" + self.random = np.random.RandomState(1) + + self.n_samples = 50 + self.n_features = 20 + self.intercept = make_gaussian_process( + n_samples=1, + n_features=self.n_features, + cov=Gaussian(length_scale=0.4), + random_state=self.random, + ) + self.X = make_gaussian_process( + n_samples=self.n_samples, + n_features=self.n_features, + cov=Gaussian(length_scale=0.4), + random_state=self.random, + ) + self.coefficients = make_gaussian( + n_samples=1, + grid_points=[np.linspace(0, 1, self.n_features)] * 2, + cov=Gaussian(length_scale=1), + random_state=self.random, + ) + + self.create_model() + + def create_model(self) -> None: + """Create a functional response according to historical model.""" + integral_body = ( + self.X.data_matrix[..., 0, np.newaxis] + * self.coefficients.data_matrix[..., 0] + ) + integral_matrix = cumtrapz( + integral_body, + x=self.X.grid_points[0], + initial=0, + axis=1, + ) + integral = np.diagonal(integral_matrix, axis1=1, axis2=2) + self.y = FDataGrid(self.intercept.data_matrix[..., 0] + integral) + + def test_historical(self) -> None: + """Test historical regression with data following the model.""" + regression = HistoricalLinearRegression(n_intervals=6) + regression.fit(self.X, self.y) + np.testing.assert_allclose( + regression.predict(self.X).data_matrix, + self.y.data_matrix, + atol=1e-1, + rtol=0, + ) + + np.testing.assert_allclose( + regression.intercept_.data_matrix, + self.intercept.data_matrix, + rtol=1e-2, + ) + + np.testing.assert_allclose( + regression.coef_.data_matrix[0, ..., 0], + np.triu(self.coefficients.data_matrix[0, ..., 0]), + atol=0.2, + rtol=0, + ) + + if __name__ == '__main__': print() unittest.main() From 09800be7a21f27727697f21691f6ccdf4dfa925a Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 16:57:19 +0200 Subject: [PATCH 382/417] Test if multivariate_normal results change between machines. --- tests/test_regression.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_regression.py b/tests/test_regression.py index a23c4f45e..ac385b3b9 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -335,6 +335,17 @@ def setUp(self) -> None: cov=Gaussian(length_scale=0.4), random_state=self.random, ) + + np.testing.assert_almost_equal( + self.intercept.data_matrix[..., 0], + np.array([[ + -0.44419728, -0.56909477, -0.68783434, -0.80186766, -0.91540068, + -1.03397827, -1.16239266, -1.30246822, -1.45134619, -1.60079727, + -1.73785278, -1.84672707, -1.9116814, -1.92023053, -1.86597929, + -1.75042757, -1.58329321, -1.38122881, -1.16517441, -0.95690171, + ]]) + ) + self.X = make_gaussian_process( n_samples=self.n_samples, n_features=self.n_features, From 07dc190fa3268e19bd72eb9beccf5597bb6f14ec Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 17:15:56 +0200 Subject: [PATCH 383/417] Try test `gaussian`. --- tests/test_regression.py | 146 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/tests/test_regression.py b/tests/test_regression.py index ac385b3b9..4cc8057d6 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -359,6 +359,152 @@ def setUp(self) -> None: random_state=self.random, ) + np.testing.assert_almost_equal( + self.coefficients.data_matrix[0, ..., 0], + np.array([ + [4.93663563e-01, 4.78010146e-01, 4.63788522e-01, + 4.50928237e-01, 4.39314629e-01, 4.28798261e-01, + 4.19192312e-01, 4.10280148e-01, 4.01813089e-01, + 3.93515764e-01, 3.85089077e-01, 3.76209900e-01, + 3.66535004e-01, 3.55705856e-01, 3.43348716e-01, + 3.29083247e-01, 3.12524641e-01, 2.93292495e-01, + 2.71019709e-01, 2.45359953e-01], + [5.31535010e-01, 5.17720140e-01, 5.04907227e-01, + 4.93003500e-01, 4.81882668e-01, 4.71390399e-01, + 4.61342759e-01, 4.51534164e-01, 4.41733815e-01, + 4.31691405e-01, 4.21136776e-01, 4.09782265e-01, + 3.97324811e-01, 3.83448921e-01, 3.67826456e-01, + 3.50123751e-01, 3.30004632e-01, 3.07136237e-01, + 2.81197769e-01, 2.51887492e-01], + [5.61268157e-01, 5.49413977e-01, 5.38156769e-01, + 5.27383042e-01, 5.16954232e-01, 5.06709032e-01, + 4.96466435e-01, 4.86028192e-01, 4.75180833e-01, + 4.63695845e-01, 4.51331706e-01, 4.37835185e-01, + 4.22940282e-01, 4.06372937e-01, 3.87849145e-01, + 3.67081529e-01, 3.43779743e-01, 3.17658698e-01, + 2.88441954e-01, 2.55873009e-01], + [5.82214282e-01, 5.72443820e-01, 5.62899224e-01, + 5.53445283e-01, 5.43927740e-01, 5.34178785e-01, + 5.24016376e-01, 5.13250432e-01, 5.01678997e-01, + 4.89095545e-01, 4.75283435e-01, 4.60020810e-01, + 4.43078173e-01, 4.24219504e-01, 4.03203927e-01, + 3.79785535e-01, 3.53720408e-01, 3.24767458e-01, + 2.92695286e-01, 2.57289404e-01], + [5.93890073e-01, 5.86327635e-01, 5.78658665e-01, + 5.70723768e-01, 5.62353524e-01, 5.53368146e-01, + 5.43585005e-01, 5.32816681e-01, 5.20874443e-01, + 5.07566358e-01, 4.92700607e-01, 4.76083195e-01, + 4.57516183e-01, 4.36799910e-01, 4.13732435e-01, + 3.88109758e-01, 3.59729121e-01, 3.28392005e-01, + 2.93909255e-01, 2.56107758e-01], + [5.95979512e-01, 5.90749572e-01, 5.85121442e-01, + 5.78911820e-01, 5.71931019e-01, 5.63990198e-01, + 5.54899635e-01, 5.44473706e-01, 5.32530642e-01, + 5.18894359e-01, 5.03391058e-01, 4.85850840e-01, + 4.66105368e-01, 4.43987537e-01, 4.19330529e-01, + 3.91967867e-01, 3.61737266e-01, 3.28478056e-01, + 2.92041731e-01, 2.52293795e-01], + [5.88331159e-01, 5.85553166e-01, 5.82129597e-01, + 5.77850567e-01, 5.72507449e-01, 5.65895238e-01, + 5.57817817e-01, 5.48087161e-01, 5.36525668e-01, + 5.22967184e-01, 5.07253137e-01, 4.89235017e-01, + 4.68768294e-01, 4.45715658e-01, 4.19940279e-01, + 3.91310913e-01, 3.59699908e-01, 3.24984279e-01, + 2.87052173e-01, 2.45804580e-01], + [5.70945355e-01, 5.70732608e-01, 5.69672008e-01, + 5.67526575e-01, 5.64064529e-01, 5.59064647e-01, + 5.52320762e-01, 5.43637511e-01, 5.32839843e-01, + 5.19766631e-01, 5.04271253e-01, 4.86220453e-01, + 4.65490596e-01, 4.41967813e-01, 4.15543684e-01, + 3.86116938e-01, 3.53590594e-01, 3.17876843e-01, + 2.78895404e-01, 2.36583668e-01], + [5.43965927e-01, 5.46421980e-01, 5.47873568e-01, + 5.48054218e-01, 5.46709103e-01, 5.43597425e-01, + 5.38497473e-01, 5.31207983e-01, 5.21548846e-01, + 5.09360115e-01, 4.94503082e-01, 4.76855471e-01, + 4.56310745e-01, 4.32772489e-01, 4.06157107e-01, + 3.76387702e-01, 3.43396592e-01, 3.07124177e-01, + 2.67522260e-01, 2.24557489e-01], + [5.07663154e-01, 5.12878825e-01, 5.16978529e-01, + 5.19665633e-01, 5.20659115e-01, 5.19696420e-01, + 5.16538243e-01, 5.10971363e-01, 5.02808400e-01, + 4.91887178e-01, 4.78071524e-01, 4.61245444e-01, + 4.41312805e-01, 4.18194086e-01, 3.91822409e-01, + 3.62143509e-01, 3.29112722e-01, 2.92696484e-01, + 2.52874094e-01, 2.09640776e-01], + [4.62417185e-01, 4.70469373e-01, 4.77336768e-01, + 4.82693233e-01, 4.86227716e-01, 4.87654065e-01, + 4.86713876e-01, 4.83176946e-01, 4.76844995e-01, + 4.67549900e-01, 4.55151684e-01, 4.39538412e-01, + 4.20619758e-01, 3.98325863e-01, 3.72603904e-01, + 3.43417253e-01, 3.10739459e-01, 2.74560952e-01, + 2.34883811e-01, 1.91729340e-01], + [4.08701273e-01, 4.19648971e-01, 4.29385830e-01, + 4.37551868e-01, 4.43806630e-01, 4.47838242e-01, + 4.49364099e-01, 4.48135656e-01, 4.43940027e-01, + 4.36597506e-01, 4.25962501e-01, 4.11919873e-01, + 3.94380971e-01, 3.73282426e-01, 3.48580043e-01, + 3.20249019e-01, 2.88280822e-01, 2.52682487e-01, + 2.13477046e-01, 1.70708405e-01], + [3.47064514e-01, 3.60948529e-01, 3.73633162e-01, + 3.84724533e-01, 3.93852096e-01, 4.00674175e-01, + 4.04883237e-01, 4.06209018e-01, 4.04418871e-01, + 3.99319584e-01, 3.90754458e-01, 3.78600793e-01, + 3.62768890e-01, 3.43194737e-01, 3.19839536e-01, + 2.92687753e-01, 2.61741537e-01, 2.27022588e-01, + 1.88571803e-01, 1.46452555e-01], + [2.78115147e-01, 2.94954322e-01, 3.10640282e-01, + 3.24745523e-01, 3.36866989e-01, 3.46632733e-01, + 3.53706943e-01, 3.57793827e-01, 3.58640294e-01, + 3.56033094e-01, 3.49802162e-01, 3.39813697e-01, + 3.25970385e-01, 3.08204815e-01, 2.86481038e-01, + 2.60784909e-01, 2.31127858e-01, 1.97542609e-01, + 1.60085349e-01, 1.18834923e-01], + [2.02505870e-01, 2.22295616e-01, 2.41010390e-01, + 2.58188053e-01, 2.73391887e-01, 2.86217460e-01, + 2.96299516e-01, 3.03314572e-01, 3.06984085e-01, + 3.07075180e-01, 3.03397257e-01, 2.95802774e-01, + 2.84183751e-01, 2.68466045e-01, 2.48607812e-01, + 2.24598277e-01, 1.96451275e-01, 1.64208453e-01, + 1.27936293e-01, 8.77289872e-02], + [1.20919279e-01, 1.43631393e-01, 1.65374150e-01, + 1.85650125e-01, 2.03989120e-01, 2.19952959e-01, + 2.33145175e-01, 2.43211654e-01, 2.49848567e-01, + 2.52796354e-01, 2.51843737e-01, 2.46825811e-01, + 2.37618011e-01, 2.24136868e-01, 2.06333636e-01, + 1.84192432e-01, 1.57730629e-01, 1.26992827e-01, + 9.20536943e-02, 5.30199705e-02], + [3.40581354e-02, 5.96384941e-02, 8.43785197e-02, + 1.07746837e-01, 1.29237742e-01, 1.48379055e-01, + 1.64741413e-01, 1.77940510e-01, 1.87641263e-01, + 1.93558170e-01, 1.95456087e-01, 1.93147244e-01, + 1.86491192e-01, 1.75388591e-01, 1.59780096e-01, + 1.39645886e-01, 1.14997927e-01, 8.58838546e-02, + 5.23841084e-02, 1.46146099e-02], + [-5.73636876e-02, -2.89952105e-02, -1.31777107e-03, + 2.51021740e-02, 4.97245150e-02, 7.20431501e-02, + 9.15945992e-02, 1.07961645e-01, 1.20778708e-01, + 1.29730922e-01, 1.34556570e-01, 1.35044553e-01, + 1.31031788e-01, 1.22403703e-01, 1.09086975e-01, + 9.10515217e-02, 6.83044338e-02, 4.08921161e-02, + 8.89870469e-03, -2.75528458e-02], + [-1.52626004e-01, -1.21575121e-01, -9.10514034e-02, + -6.16531260e-02, -3.39560582e-02, -8.50001123e-03, + 1.42172550e-02, 3.37460153e-02, 4.96871596e-02, + 6.16958027e-02, 6.94813559e-02, 7.28063083e-02, + 7.14858605e-02, 6.53839428e-02, 5.44120992e-02, + 3.85273779e-02, 1.77287890e-02, -7.93995107e-03, + -3.83950945e-02, -7.35085564e-02], + [-2.51007663e-01, -2.17405703e-01, -1.84154945e-01, + -1.51885874e-01, -1.21206316e-01, -9.26905561e-02, + -6.68699310e-02, -4.42281214e-02, -2.51976265e-02, + -1.01544938e-02, 5.80976532e-04, 6.74184827e-03, + 8.11965928e-03, 4.55584966e-03, -4.05706483e-03, + -1.77762916e-02, -3.66129300e-02, -6.05304857e-02, + -8.94449365e-02, -1.23227367e-01], + ]), + ) + self.create_model() def create_model(self) -> None: From 9654327d9b25e1d6cac0bba5c2295c57dbfebbba Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 17:49:46 +0200 Subject: [PATCH 384/417] Trying to debug weird conditions. --- tests/test_regression.py | 146 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/tests/test_regression.py b/tests/test_regression.py index 4cc8057d6..24533b226 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -539,6 +539,152 @@ def test_historical(self) -> None: rtol=1e-2, ) + np.testing.assert_almost_equal( + self.coefficients.data_matrix[0, ..., 0], + np.array([ + [4.93663563e-01, 4.78010146e-01, 4.63788522e-01, + 4.50928237e-01, 4.39314629e-01, 4.28798261e-01, + 4.19192312e-01, 4.10280148e-01, 4.01813089e-01, + 3.93515764e-01, 3.85089077e-01, 3.76209900e-01, + 3.66535004e-01, 3.55705856e-01, 3.43348716e-01, + 3.29083247e-01, 3.12524641e-01, 2.93292495e-01, + 2.71019709e-01, 2.45359953e-01], + [5.31535010e-01, 5.17720140e-01, 5.04907227e-01, + 4.93003500e-01, 4.81882668e-01, 4.71390399e-01, + 4.61342759e-01, 4.51534164e-01, 4.41733815e-01, + 4.31691405e-01, 4.21136776e-01, 4.09782265e-01, + 3.97324811e-01, 3.83448921e-01, 3.67826456e-01, + 3.50123751e-01, 3.30004632e-01, 3.07136237e-01, + 2.81197769e-01, 2.51887492e-01], + [5.61268157e-01, 5.49413977e-01, 5.38156769e-01, + 5.27383042e-01, 5.16954232e-01, 5.06709032e-01, + 4.96466435e-01, 4.86028192e-01, 4.75180833e-01, + 4.63695845e-01, 4.51331706e-01, 4.37835185e-01, + 4.22940282e-01, 4.06372937e-01, 3.87849145e-01, + 3.67081529e-01, 3.43779743e-01, 3.17658698e-01, + 2.88441954e-01, 2.55873009e-01], + [5.82214282e-01, 5.72443820e-01, 5.62899224e-01, + 5.53445283e-01, 5.43927740e-01, 5.34178785e-01, + 5.24016376e-01, 5.13250432e-01, 5.01678997e-01, + 4.89095545e-01, 4.75283435e-01, 4.60020810e-01, + 4.43078173e-01, 4.24219504e-01, 4.03203927e-01, + 3.79785535e-01, 3.53720408e-01, 3.24767458e-01, + 2.92695286e-01, 2.57289404e-01], + [5.93890073e-01, 5.86327635e-01, 5.78658665e-01, + 5.70723768e-01, 5.62353524e-01, 5.53368146e-01, + 5.43585005e-01, 5.32816681e-01, 5.20874443e-01, + 5.07566358e-01, 4.92700607e-01, 4.76083195e-01, + 4.57516183e-01, 4.36799910e-01, 4.13732435e-01, + 3.88109758e-01, 3.59729121e-01, 3.28392005e-01, + 2.93909255e-01, 2.56107758e-01], + [5.95979512e-01, 5.90749572e-01, 5.85121442e-01, + 5.78911820e-01, 5.71931019e-01, 5.63990198e-01, + 5.54899635e-01, 5.44473706e-01, 5.32530642e-01, + 5.18894359e-01, 5.03391058e-01, 4.85850840e-01, + 4.66105368e-01, 4.43987537e-01, 4.19330529e-01, + 3.91967867e-01, 3.61737266e-01, 3.28478056e-01, + 2.92041731e-01, 2.52293795e-01], + [5.88331159e-01, 5.85553166e-01, 5.82129597e-01, + 5.77850567e-01, 5.72507449e-01, 5.65895238e-01, + 5.57817817e-01, 5.48087161e-01, 5.36525668e-01, + 5.22967184e-01, 5.07253137e-01, 4.89235017e-01, + 4.68768294e-01, 4.45715658e-01, 4.19940279e-01, + 3.91310913e-01, 3.59699908e-01, 3.24984279e-01, + 2.87052173e-01, 2.45804580e-01], + [5.70945355e-01, 5.70732608e-01, 5.69672008e-01, + 5.67526575e-01, 5.64064529e-01, 5.59064647e-01, + 5.52320762e-01, 5.43637511e-01, 5.32839843e-01, + 5.19766631e-01, 5.04271253e-01, 4.86220453e-01, + 4.65490596e-01, 4.41967813e-01, 4.15543684e-01, + 3.86116938e-01, 3.53590594e-01, 3.17876843e-01, + 2.78895404e-01, 2.36583668e-01], + [5.43965927e-01, 5.46421980e-01, 5.47873568e-01, + 5.48054218e-01, 5.46709103e-01, 5.43597425e-01, + 5.38497473e-01, 5.31207983e-01, 5.21548846e-01, + 5.09360115e-01, 4.94503082e-01, 4.76855471e-01, + 4.56310745e-01, 4.32772489e-01, 4.06157107e-01, + 3.76387702e-01, 3.43396592e-01, 3.07124177e-01, + 2.67522260e-01, 2.24557489e-01], + [5.07663154e-01, 5.12878825e-01, 5.16978529e-01, + 5.19665633e-01, 5.20659115e-01, 5.19696420e-01, + 5.16538243e-01, 5.10971363e-01, 5.02808400e-01, + 4.91887178e-01, 4.78071524e-01, 4.61245444e-01, + 4.41312805e-01, 4.18194086e-01, 3.91822409e-01, + 3.62143509e-01, 3.29112722e-01, 2.92696484e-01, + 2.52874094e-01, 2.09640776e-01], + [4.62417185e-01, 4.70469373e-01, 4.77336768e-01, + 4.82693233e-01, 4.86227716e-01, 4.87654065e-01, + 4.86713876e-01, 4.83176946e-01, 4.76844995e-01, + 4.67549900e-01, 4.55151684e-01, 4.39538412e-01, + 4.20619758e-01, 3.98325863e-01, 3.72603904e-01, + 3.43417253e-01, 3.10739459e-01, 2.74560952e-01, + 2.34883811e-01, 1.91729340e-01], + [4.08701273e-01, 4.19648971e-01, 4.29385830e-01, + 4.37551868e-01, 4.43806630e-01, 4.47838242e-01, + 4.49364099e-01, 4.48135656e-01, 4.43940027e-01, + 4.36597506e-01, 4.25962501e-01, 4.11919873e-01, + 3.94380971e-01, 3.73282426e-01, 3.48580043e-01, + 3.20249019e-01, 2.88280822e-01, 2.52682487e-01, + 2.13477046e-01, 1.70708405e-01], + [3.47064514e-01, 3.60948529e-01, 3.73633162e-01, + 3.84724533e-01, 3.93852096e-01, 4.00674175e-01, + 4.04883237e-01, 4.06209018e-01, 4.04418871e-01, + 3.99319584e-01, 3.90754458e-01, 3.78600793e-01, + 3.62768890e-01, 3.43194737e-01, 3.19839536e-01, + 2.92687753e-01, 2.61741537e-01, 2.27022588e-01, + 1.88571803e-01, 1.46452555e-01], + [2.78115147e-01, 2.94954322e-01, 3.10640282e-01, + 3.24745523e-01, 3.36866989e-01, 3.46632733e-01, + 3.53706943e-01, 3.57793827e-01, 3.58640294e-01, + 3.56033094e-01, 3.49802162e-01, 3.39813697e-01, + 3.25970385e-01, 3.08204815e-01, 2.86481038e-01, + 2.60784909e-01, 2.31127858e-01, 1.97542609e-01, + 1.60085349e-01, 1.18834923e-01], + [2.02505870e-01, 2.22295616e-01, 2.41010390e-01, + 2.58188053e-01, 2.73391887e-01, 2.86217460e-01, + 2.96299516e-01, 3.03314572e-01, 3.06984085e-01, + 3.07075180e-01, 3.03397257e-01, 2.95802774e-01, + 2.84183751e-01, 2.68466045e-01, 2.48607812e-01, + 2.24598277e-01, 1.96451275e-01, 1.64208453e-01, + 1.27936293e-01, 8.77289872e-02], + [1.20919279e-01, 1.43631393e-01, 1.65374150e-01, + 1.85650125e-01, 2.03989120e-01, 2.19952959e-01, + 2.33145175e-01, 2.43211654e-01, 2.49848567e-01, + 2.52796354e-01, 2.51843737e-01, 2.46825811e-01, + 2.37618011e-01, 2.24136868e-01, 2.06333636e-01, + 1.84192432e-01, 1.57730629e-01, 1.26992827e-01, + 9.20536943e-02, 5.30199705e-02], + [3.40581354e-02, 5.96384941e-02, 8.43785197e-02, + 1.07746837e-01, 1.29237742e-01, 1.48379055e-01, + 1.64741413e-01, 1.77940510e-01, 1.87641263e-01, + 1.93558170e-01, 1.95456087e-01, 1.93147244e-01, + 1.86491192e-01, 1.75388591e-01, 1.59780096e-01, + 1.39645886e-01, 1.14997927e-01, 8.58838546e-02, + 5.23841084e-02, 1.46146099e-02], + [-5.73636876e-02, -2.89952105e-02, -1.31777107e-03, + 2.51021740e-02, 4.97245150e-02, 7.20431501e-02, + 9.15945992e-02, 1.07961645e-01, 1.20778708e-01, + 1.29730922e-01, 1.34556570e-01, 1.35044553e-01, + 1.31031788e-01, 1.22403703e-01, 1.09086975e-01, + 9.10515217e-02, 6.83044338e-02, 4.08921161e-02, + 8.89870469e-03, -2.75528458e-02], + [-1.52626004e-01, -1.21575121e-01, -9.10514034e-02, + -6.16531260e-02, -3.39560582e-02, -8.50001123e-03, + 1.42172550e-02, 3.37460153e-02, 4.96871596e-02, + 6.16958027e-02, 6.94813559e-02, 7.28063083e-02, + 7.14858605e-02, 6.53839428e-02, 5.44120992e-02, + 3.85273779e-02, 1.77287890e-02, -7.93995107e-03, + -3.83950945e-02, -7.35085564e-02], + [-2.51007663e-01, -2.17405703e-01, -1.84154945e-01, + -1.51885874e-01, -1.21206316e-01, -9.26905561e-02, + -6.68699310e-02, -4.42281214e-02, -2.51976265e-02, + -1.01544938e-02, 5.80976532e-04, 6.74184827e-03, + 8.11965928e-03, 4.55584966e-03, -4.05706483e-03, + -1.77762916e-02, -3.66129300e-02, -6.05304857e-02, + -8.94449365e-02, -1.23227367e-01], + ]), + ) + np.testing.assert_allclose( regression.coef_.data_matrix[0, ..., 0], np.triu(self.coefficients.data_matrix[0, ..., 0]), From 817a76725daa5540f1ffa644db60e0953b998637 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 18:05:55 +0200 Subject: [PATCH 385/417] Continue debugging. --- tests/test_regression.py | 154 ++------------------------------------- 1 file changed, 8 insertions(+), 146 deletions(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index 24533b226..7ac287f45 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -359,6 +359,14 @@ def setUp(self) -> None: random_state=self.random, ) + test = self.random.random(size=10) + np.testing.assert_almost_equal( + test, + np.array([0.39564827, 0.77171848, 0.25120318, 0.65576077, + 0.96039715, 0.00355476, + 0.72336104, 0.10899503, 0.54378749, 0.62691602]) + ) + np.testing.assert_almost_equal( self.coefficients.data_matrix[0, ..., 0], np.array([ @@ -539,152 +547,6 @@ def test_historical(self) -> None: rtol=1e-2, ) - np.testing.assert_almost_equal( - self.coefficients.data_matrix[0, ..., 0], - np.array([ - [4.93663563e-01, 4.78010146e-01, 4.63788522e-01, - 4.50928237e-01, 4.39314629e-01, 4.28798261e-01, - 4.19192312e-01, 4.10280148e-01, 4.01813089e-01, - 3.93515764e-01, 3.85089077e-01, 3.76209900e-01, - 3.66535004e-01, 3.55705856e-01, 3.43348716e-01, - 3.29083247e-01, 3.12524641e-01, 2.93292495e-01, - 2.71019709e-01, 2.45359953e-01], - [5.31535010e-01, 5.17720140e-01, 5.04907227e-01, - 4.93003500e-01, 4.81882668e-01, 4.71390399e-01, - 4.61342759e-01, 4.51534164e-01, 4.41733815e-01, - 4.31691405e-01, 4.21136776e-01, 4.09782265e-01, - 3.97324811e-01, 3.83448921e-01, 3.67826456e-01, - 3.50123751e-01, 3.30004632e-01, 3.07136237e-01, - 2.81197769e-01, 2.51887492e-01], - [5.61268157e-01, 5.49413977e-01, 5.38156769e-01, - 5.27383042e-01, 5.16954232e-01, 5.06709032e-01, - 4.96466435e-01, 4.86028192e-01, 4.75180833e-01, - 4.63695845e-01, 4.51331706e-01, 4.37835185e-01, - 4.22940282e-01, 4.06372937e-01, 3.87849145e-01, - 3.67081529e-01, 3.43779743e-01, 3.17658698e-01, - 2.88441954e-01, 2.55873009e-01], - [5.82214282e-01, 5.72443820e-01, 5.62899224e-01, - 5.53445283e-01, 5.43927740e-01, 5.34178785e-01, - 5.24016376e-01, 5.13250432e-01, 5.01678997e-01, - 4.89095545e-01, 4.75283435e-01, 4.60020810e-01, - 4.43078173e-01, 4.24219504e-01, 4.03203927e-01, - 3.79785535e-01, 3.53720408e-01, 3.24767458e-01, - 2.92695286e-01, 2.57289404e-01], - [5.93890073e-01, 5.86327635e-01, 5.78658665e-01, - 5.70723768e-01, 5.62353524e-01, 5.53368146e-01, - 5.43585005e-01, 5.32816681e-01, 5.20874443e-01, - 5.07566358e-01, 4.92700607e-01, 4.76083195e-01, - 4.57516183e-01, 4.36799910e-01, 4.13732435e-01, - 3.88109758e-01, 3.59729121e-01, 3.28392005e-01, - 2.93909255e-01, 2.56107758e-01], - [5.95979512e-01, 5.90749572e-01, 5.85121442e-01, - 5.78911820e-01, 5.71931019e-01, 5.63990198e-01, - 5.54899635e-01, 5.44473706e-01, 5.32530642e-01, - 5.18894359e-01, 5.03391058e-01, 4.85850840e-01, - 4.66105368e-01, 4.43987537e-01, 4.19330529e-01, - 3.91967867e-01, 3.61737266e-01, 3.28478056e-01, - 2.92041731e-01, 2.52293795e-01], - [5.88331159e-01, 5.85553166e-01, 5.82129597e-01, - 5.77850567e-01, 5.72507449e-01, 5.65895238e-01, - 5.57817817e-01, 5.48087161e-01, 5.36525668e-01, - 5.22967184e-01, 5.07253137e-01, 4.89235017e-01, - 4.68768294e-01, 4.45715658e-01, 4.19940279e-01, - 3.91310913e-01, 3.59699908e-01, 3.24984279e-01, - 2.87052173e-01, 2.45804580e-01], - [5.70945355e-01, 5.70732608e-01, 5.69672008e-01, - 5.67526575e-01, 5.64064529e-01, 5.59064647e-01, - 5.52320762e-01, 5.43637511e-01, 5.32839843e-01, - 5.19766631e-01, 5.04271253e-01, 4.86220453e-01, - 4.65490596e-01, 4.41967813e-01, 4.15543684e-01, - 3.86116938e-01, 3.53590594e-01, 3.17876843e-01, - 2.78895404e-01, 2.36583668e-01], - [5.43965927e-01, 5.46421980e-01, 5.47873568e-01, - 5.48054218e-01, 5.46709103e-01, 5.43597425e-01, - 5.38497473e-01, 5.31207983e-01, 5.21548846e-01, - 5.09360115e-01, 4.94503082e-01, 4.76855471e-01, - 4.56310745e-01, 4.32772489e-01, 4.06157107e-01, - 3.76387702e-01, 3.43396592e-01, 3.07124177e-01, - 2.67522260e-01, 2.24557489e-01], - [5.07663154e-01, 5.12878825e-01, 5.16978529e-01, - 5.19665633e-01, 5.20659115e-01, 5.19696420e-01, - 5.16538243e-01, 5.10971363e-01, 5.02808400e-01, - 4.91887178e-01, 4.78071524e-01, 4.61245444e-01, - 4.41312805e-01, 4.18194086e-01, 3.91822409e-01, - 3.62143509e-01, 3.29112722e-01, 2.92696484e-01, - 2.52874094e-01, 2.09640776e-01], - [4.62417185e-01, 4.70469373e-01, 4.77336768e-01, - 4.82693233e-01, 4.86227716e-01, 4.87654065e-01, - 4.86713876e-01, 4.83176946e-01, 4.76844995e-01, - 4.67549900e-01, 4.55151684e-01, 4.39538412e-01, - 4.20619758e-01, 3.98325863e-01, 3.72603904e-01, - 3.43417253e-01, 3.10739459e-01, 2.74560952e-01, - 2.34883811e-01, 1.91729340e-01], - [4.08701273e-01, 4.19648971e-01, 4.29385830e-01, - 4.37551868e-01, 4.43806630e-01, 4.47838242e-01, - 4.49364099e-01, 4.48135656e-01, 4.43940027e-01, - 4.36597506e-01, 4.25962501e-01, 4.11919873e-01, - 3.94380971e-01, 3.73282426e-01, 3.48580043e-01, - 3.20249019e-01, 2.88280822e-01, 2.52682487e-01, - 2.13477046e-01, 1.70708405e-01], - [3.47064514e-01, 3.60948529e-01, 3.73633162e-01, - 3.84724533e-01, 3.93852096e-01, 4.00674175e-01, - 4.04883237e-01, 4.06209018e-01, 4.04418871e-01, - 3.99319584e-01, 3.90754458e-01, 3.78600793e-01, - 3.62768890e-01, 3.43194737e-01, 3.19839536e-01, - 2.92687753e-01, 2.61741537e-01, 2.27022588e-01, - 1.88571803e-01, 1.46452555e-01], - [2.78115147e-01, 2.94954322e-01, 3.10640282e-01, - 3.24745523e-01, 3.36866989e-01, 3.46632733e-01, - 3.53706943e-01, 3.57793827e-01, 3.58640294e-01, - 3.56033094e-01, 3.49802162e-01, 3.39813697e-01, - 3.25970385e-01, 3.08204815e-01, 2.86481038e-01, - 2.60784909e-01, 2.31127858e-01, 1.97542609e-01, - 1.60085349e-01, 1.18834923e-01], - [2.02505870e-01, 2.22295616e-01, 2.41010390e-01, - 2.58188053e-01, 2.73391887e-01, 2.86217460e-01, - 2.96299516e-01, 3.03314572e-01, 3.06984085e-01, - 3.07075180e-01, 3.03397257e-01, 2.95802774e-01, - 2.84183751e-01, 2.68466045e-01, 2.48607812e-01, - 2.24598277e-01, 1.96451275e-01, 1.64208453e-01, - 1.27936293e-01, 8.77289872e-02], - [1.20919279e-01, 1.43631393e-01, 1.65374150e-01, - 1.85650125e-01, 2.03989120e-01, 2.19952959e-01, - 2.33145175e-01, 2.43211654e-01, 2.49848567e-01, - 2.52796354e-01, 2.51843737e-01, 2.46825811e-01, - 2.37618011e-01, 2.24136868e-01, 2.06333636e-01, - 1.84192432e-01, 1.57730629e-01, 1.26992827e-01, - 9.20536943e-02, 5.30199705e-02], - [3.40581354e-02, 5.96384941e-02, 8.43785197e-02, - 1.07746837e-01, 1.29237742e-01, 1.48379055e-01, - 1.64741413e-01, 1.77940510e-01, 1.87641263e-01, - 1.93558170e-01, 1.95456087e-01, 1.93147244e-01, - 1.86491192e-01, 1.75388591e-01, 1.59780096e-01, - 1.39645886e-01, 1.14997927e-01, 8.58838546e-02, - 5.23841084e-02, 1.46146099e-02], - [-5.73636876e-02, -2.89952105e-02, -1.31777107e-03, - 2.51021740e-02, 4.97245150e-02, 7.20431501e-02, - 9.15945992e-02, 1.07961645e-01, 1.20778708e-01, - 1.29730922e-01, 1.34556570e-01, 1.35044553e-01, - 1.31031788e-01, 1.22403703e-01, 1.09086975e-01, - 9.10515217e-02, 6.83044338e-02, 4.08921161e-02, - 8.89870469e-03, -2.75528458e-02], - [-1.52626004e-01, -1.21575121e-01, -9.10514034e-02, - -6.16531260e-02, -3.39560582e-02, -8.50001123e-03, - 1.42172550e-02, 3.37460153e-02, 4.96871596e-02, - 6.16958027e-02, 6.94813559e-02, 7.28063083e-02, - 7.14858605e-02, 6.53839428e-02, 5.44120992e-02, - 3.85273779e-02, 1.77287890e-02, -7.93995107e-03, - -3.83950945e-02, -7.35085564e-02], - [-2.51007663e-01, -2.17405703e-01, -1.84154945e-01, - -1.51885874e-01, -1.21206316e-01, -9.26905561e-02, - -6.68699310e-02, -4.42281214e-02, -2.51976265e-02, - -1.01544938e-02, 5.80976532e-04, 6.74184827e-03, - 8.11965928e-03, 4.55584966e-03, -4.05706483e-03, - -1.77762916e-02, -3.66129300e-02, -6.05304857e-02, - -8.94449365e-02, -1.23227367e-01], - ]), - ) - np.testing.assert_allclose( regression.coef_.data_matrix[0, ..., 0], np.triu(self.coefficients.data_matrix[0, ..., 0]), From 7333b497ef194e5b5c527a062226f6c83fdb292c Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 19:04:02 +0200 Subject: [PATCH 386/417] Continue debugging. --- tests/test_regression.py | 241 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) diff --git a/tests/test_regression.py b/tests/test_regression.py index 7ac287f45..15c84b687 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -4,6 +4,7 @@ from scipy.integrate import cumtrapz from skfda.datasets import make_gaussian, make_gaussian_process +from skfda.misc import covariances from skfda.misc.covariances import Gaussian from skfda.misc.operators import LinearDifferentialOperator from skfda.misc.regularization import TikhonovRegularization @@ -359,6 +360,246 @@ def setUp(self) -> None: random_state=self.random, ) + covariance = covariances._execute_covariance( + Gaussian(length_scale=1), np.linspace(0, 1, self.n_features), np.linspace(0, 1, self.n_features)) + + kk = np.linalg.svd(covariance) + np.testing.assert_almost_equal( + kk[0], + np.array([[-2.07745796e-01, -3.53499224e-01, -4.24704118e-01, + 4.38526581e-01, 4.10421088e-01, -3.54696800e-01, + -2.85306097e-01, 2.14317960e-01, 1.50480799e-01, + -9.72724699e-02, 5.48931421e-02, 2.77218995e-02, + -2.90447444e-03, -8.63720916e-03, 1.47985877e-02, + -6.38392730e-03, 3.19088026e-04, -2.31445976e-03, + -2.35200464e-02, 2.87085300e-02], + [-2.12536586e-01, -3.23842468e-01, -3.02798180e-01, + 1.78544537e-01, -1.05573598e-03, 1.83142307e-01, + 3.23551473e-01, -3.97680953e-01, -4.03394435e-01, + 3.49349496e-01, -2.47364593e-01, -1.62706573e-01, + -2.83079777e-03, 7.75399053e-02, -7.82703189e-02, + 2.34672884e-02, -3.68317337e-03, 3.93668278e-03, + 1.21931948e-01, -1.73155334e-01], + [-2.16887425e-01, -2.91801208e-01, -1.89267880e-01, + -1.80827215e-02, -2.19966326e-01, 3.20250225e-01, + 2.77298225e-01, -1.15530338e-01, 9.53154240e-02, + -2.68609660e-01, 2.98125851e-01, 3.50326598e-01, + 9.00339737e-02, -2.63133363e-01, 1.38529161e-01, + -4.31304302e-02, 2.53294158e-02, 9.78557308e-03, + -2.09207371e-01, 3.93358354e-01], + [-2.20767163e-01, -2.57579395e-01, -8.58072050e-02, + -1.54324281e-01, -2.90281478e-01, 2.30663263e-01, + 1.76438603e-02, 2.10689264e-01, 3.15510651e-01, + -2.38136937e-01, 1.50956053e-01, -2.85577754e-01, + -2.94853489e-01, 3.99842395e-01, -3.37528337e-02, + 1.47721647e-01, -9.97750355e-02, 7.44328024e-04, + 6.67073910e-02, -3.56073311e-01], + [-2.24147659e-01, -2.21408067e-01, 6.02231879e-03, + -2.34681396e-01, -2.55173841e-01, 4.99470070e-02, + -2.02045126e-01, 2.87033345e-01, 1.36398452e-01, + 1.20139691e-01, -3.85416779e-01, -4.40485786e-03, + 4.14894610e-01, -1.60147670e-01, -1.83390205e-01, + -3.86382356e-01, 2.20367250e-01, -1.51073821e-01, + 1.43736598e-01, -2.11892050e-02], + [-2.27004115e-01, -1.83542799e-01, 8.48212284e-02, + -2.65079765e-01, -1.55333564e-01, -1.25459870e-01, + -2.75613302e-01, 1.35724448e-01, -1.49214888e-01, + 2.65575744e-01, -1.58539640e-01, -1.87699105e-02, + -2.55533556e-01, -3.00732826e-01, 2.36617690e-01, + 3.33864157e-01, -2.61895075e-01, 4.00904477e-01, + -1.86367907e-02, 1.78696543e-01], + [-2.29315378e-01, -1.44260722e-01, 1.49378970e-01, + -2.52723472e-01, -2.73898866e-02, -2.36707090e-01, + -2.00638882e-01, -9.19522570e-02, -2.74885183e-01, + 1.51099303e-01, 2.39776405e-01, 3.04597673e-01, + 1.86320951e-02, 3.99089908e-01, -2.39067808e-01, + 2.29630093e-01, 1.24076421e-01, -3.99092262e-01, + -1.65297964e-01, 1.21518710e-01], + [-2.31064204e-01, -1.03857162e-01, 1.98697349e-01, + -2.05914208e-01, 9.75040092e-02, -2.58925295e-01, + -3.59443114e-02, -2.44719467e-01, -1.69553167e-01, + -1.26621797e-01, 3.21190077e-01, -2.94533855e-01, + 7.93326575e-02, -1.31451985e-01, 4.54774940e-01, + -3.94313255e-01, 7.96670732e-02, -2.03962509e-02, + -8.55100650e-02, -2.85599119e-01], + [-2.32237467e-01, -6.26419518e-02, 2.32010013e-01, + -1.33840444e-01, 1.94871188e-01, -1.95773016e-01, + 1.36754789e-01, -2.43059362e-01, 6.03941812e-02, + -3.19348016e-01, -6.13381573e-02, 2.13481419e-01, + -1.50059406e-01, -3.59484931e-02, -4.50925459e-01, + -1.90977474e-01, -2.41653937e-01, 2.62422246e-01, + 4.11692342e-01, 3.13373010e-02], + [-2.32826323e-01, -2.09354746e-02, 2.48797507e-01, + -4.63422021e-02, 2.47852118e-01, -7.24487156e-02, + 2.44283673e-01, -9.97691246e-02, 2.38867772e-01, + -1.53579703e-01, -2.65935064e-01, -3.27866179e-01, + 1.97443808e-01, 4.49207015e-02, -2.95001149e-02, + 4.00764862e-01, 4.16592291e-01, 1.43047831e-01, + -2.36535841e-01, 1.83548419e-01], + [-2.32826323e-01, 2.09354746e-02, 2.48797507e-01, + 4.63422021e-02, 2.47852118e-01, 7.24487156e-02, + 2.44283659e-01, 9.97723947e-02, 2.38948800e-01, + 1.69312090e-01, -2.77706716e-01, 1.90938295e-01, + -4.25421400e-02, -4.20876679e-03, 1.81273932e-01, + -7.63459762e-02, -5.05022021e-01, -3.69985498e-01, + -3.05044017e-01, -1.41953737e-01], + [-2.32237467e-01, 6.26419518e-02, 2.32010013e-01, + 1.33840444e-01, 1.94871188e-01, 1.95773016e-01, + 1.36754789e-01, 2.43058569e-01, 6.06388818e-02, + 3.03285458e-01, 2.41002897e-01, 3.31393398e-02, + -2.80376542e-01, -1.01206815e-01, 1.73106262e-01, + 1.26376371e-02, 3.43527029e-01, -1.76747497e-01, + 5.23487628e-01, 1.38599341e-01], + [-2.31064204e-01, 1.03857162e-01, 1.98697349e-01, + 2.05914208e-01, 9.75040092e-02, 2.58925295e-01, + -3.59443091e-02, 2.44718136e-01, -1.69579812e-01, + 1.20147970e-01, 2.68572454e-01, 1.50156666e-01, + 4.84609644e-01, 2.08189327e-01, -5.76586328e-02, + 2.38638657e-02, -1.13711093e-01, 4.86469999e-01, + -6.43567836e-02, -2.06237772e-01], + [-2.29315378e-01, 1.44260722e-01, 1.49378970e-01, + 2.52723472e-01, -2.73898866e-02, 2.36707090e-01, + -2.00638871e-01, 9.19501311e-02, -2.75145016e-01, + -1.39165267e-01, 6.41684963e-02, -3.14309507e-01, + -2.86193935e-01, -3.30301420e-01, -4.42319843e-01, + -1.13890489e-01, 2.07765480e-03, -1.38702569e-01, + -3.41647725e-01, 3.68680186e-02], + [-2.27004115e-01, 1.83542799e-01, 8.48212284e-02, + 2.65079765e-01, -1.55333564e-01, 1.25459870e-01, + -2.75613309e-01, -1.35725436e-01, -1.49146759e-01, + -2.85417286e-01, -3.99169708e-01, 1.74679517e-01, + -1.71186515e-01, 3.97511534e-01, 3.62671811e-01, + -1.88009057e-01, 1.27850559e-01, 6.76436291e-02, + 3.36667289e-02, 1.82362079e-01], + [-2.24147659e-01, 2.21408067e-01, 6.02231879e-03, + 2.34681396e-01, -2.55173841e-01, -4.99470070e-02, + -2.02045125e-01, -2.87034840e-01, 1.36413298e-01, + -9.30394919e-02, 9.58631470e-02, -1.74628694e-01, + 3.69298879e-01, -1.70606523e-01, 5.39650821e-02, + 3.54541478e-01, -2.97125313e-01, -2.95501871e-01, + 3.42692461e-01, 2.66855373e-02], + [-2.20767163e-01, 2.57579395e-01, -8.58072050e-02, + 1.54324281e-01, -2.90281478e-01, -2.30663263e-01, + 1.76438437e-02, -2.10684387e-01, 3.15670812e-01, + 2.28027102e-01, 2.13532625e-02, 3.33791939e-01, + -1.87199097e-01, -1.81648876e-01, -8.92280091e-02, + 2.27687519e-02, 3.10876464e-01, 1.60642602e-01, + -1.88142728e-01, -4.16752375e-01], + [-2.16887425e-01, 2.91801208e-01, -1.89267880e-01, + 1.80827215e-02, -2.19966326e-01, -3.20250225e-01, + 2.77298249e-01, 1.15532864e-01, 9.51399665e-02, + 2.60388460e-01, 1.34926522e-01, -2.98464216e-01, + -7.93229609e-03, 2.64476169e-01, -7.05477960e-02, + -3.01971583e-01, -1.70197579e-01, 1.06442936e-01, + -7.30550644e-02, 4.41051543e-01], + [-2.12536586e-01, 3.23842468e-01, -3.02798180e-01, + -1.78544537e-01, -1.05573598e-03, -1.83142306e-01, + 3.23551463e-01, 3.97674954e-01, -4.03335658e-01, + -3.41670271e-01, -1.25538235e-01, 1.21419250e-01, + 3.62611830e-02, -1.24987052e-01, 7.74852949e-02, + 1.89687833e-01, 4.78371828e-02, -1.16123068e-01, + 8.72297239e-02, -1.94400231e-01], + [-2.07745796e-01, 3.53499224e-01, -4.24704118e-01, + -4.38526581e-01, 4.10421088e-01, 3.54696800e-01, + -2.85306096e-01, -2.14315901e-01, 1.50476117e-01, + 9.55355874e-02, 3.01805869e-02, -1.89910515e-02, + -8.89460114e-03, 2.14410599e-02, -1.85617415e-02, + -3.75430663e-02, -5.45720255e-03, 2.78969926e-02, + -2.01904240e-02, 3.26267090e-02]]) + ) + np.testing.assert_almost_equal( + kk[1], + np.array([1.83679678e+01, 1.57226237e+00, 5.83837377e-02, 1.36268266e-03, + 2.30991559e-05, 3.04177680e-07, 3.23145016e-09, 2.83381200e-11, + 2.08149707e-13, 1.27735148e-15, 2.50546185e-16, 1.67741842e-16, + 1.40236481e-16, 9.78305508e-17, 6.32294021e-17, 4.19375304e-17, + 3.74066368e-17, 3.12159770e-17, 1.98017174e-17, 1.38376694e-17]) + ) + + np.testing.assert_almost_equal( + kk[2], + np.array([[-0.2077458, -0.21253659, -0.21688742, -0.22076716, -0.22414766, + -0.22700411, -0.22931538, -0.2310642, -0.23223747, -0.23282632, + -0.23282632, -0.23223747, -0.2310642, -0.22931538, -0.22700411, + -0.22414766, -0.22076716, -0.21688742, -0.21253659, -0.2077458], + [-0.35349922, -0.32384247, -0.29180121, -0.25757939, -0.22140807, + -0.1835428, -0.14426072, -0.10385716, -0.06264195, -0.02093547, + 0.02093547, 0.06264195, 0.10385716, 0.14426072, 0.1835428, + 0.22140807, 0.25757939, 0.29180121, 0.32384247, 0.35349922], + [-0.42470412, -0.30279818, -0.18926788, -0.0858072, 0.00602232, + 0.08482123, 0.14937897, 0.19869735, 0.23201001, 0.24879751, + 0.24879751, 0.23201001, 0.19869735, 0.14937897, 0.08482123, + 0.00602232, -0.0858072, -0.18926788, -0.30279818, -0.42470412], + [0.43852658, 0.17854454, -0.01808272, -0.15432428, -0.2346814, + -0.26507976, -0.25272347, -0.20591421, -0.13384044, -0.0463422, + 0.0463422, 0.13384044, 0.20591421, 0.25272347, 0.26507976, + 0.2346814, 0.15432428, 0.01808272, -0.17854454, -0.43852658], + [0.41042109, -0.00105574, -0.21996633, -0.29028148, -0.25517384, + -0.15533356, -0.02738989, 0.09750401, 0.19487119, 0.24785212, + 0.24785212, 0.19487119, 0.09750401, -0.02738989, -0.15533356, + -0.25517384, -0.29028148, -0.21996633, -0.00105574, 0.41042109], + [-0.3546968, 0.18314231, 0.32025023, 0.23066326, 0.04994701, + -0.12545987, -0.23670709, -0.25892529, -0.19577302, -0.07244872, + 0.07244872, 0.19577302, 0.25892529, 0.23670709, 0.12545987, + -0.04994701, -0.23066326, -0.32025023, -0.18314231, 0.3546968], + [-0.2853061, 0.32355148, 0.27729823, 0.01764386, -0.20204514, + -0.2756133, -0.20063892, -0.03594428, 0.13675481, 0.24428368, + 0.24428365, 0.1367548, -0.03594434, -0.20063891, -0.27561327, + -0.20204509, 0.01764384, 0.27729823, 0.32355145, -0.28530609], + [0.21431629, -0.39767568, -0.11553366, 0.21068629, 0.2870346, + 0.13572467, -0.09194787, -0.24472108, -0.2430603, -0.09977042, + 0.09977179, 0.24305657, 0.24472172, 0.09195302, -0.13572777, + -0.28703635, -0.21068478, 0.11553353, 0.39767582, -0.21431641], + [0.15050979, -0.40352422, 0.09550264, 0.3154507, 0.13640236, + -0.14941301, -0.27463812, -0.16962604, 0.06040535, 0.23887409, + 0.23899712, 0.06040722, -0.16950586, -0.27484944, -0.14938464, + 0.13641897, 0.31563022, 0.09525851, -0.40340336, 0.15048796], + [-0.09823207, 0.35503381, -0.28129892, -0.22469036, 0.10896079, + 0.28518116, 0.12260137, -0.10726981, -0.32147545, -0.15808811, + 0.16075458, 0.32795726, 0.11424999, -0.16857865, -0.25750771, + -0.10087266, 0.23727235, 0.24460211, -0.3322095, 0.09360981], + [0.05032393, -0.23010173, 0.29017025, 0.09760863, -0.28077051, + -0.24398518, 0.30061428, 0.26489603, -0.06452987, -0.2101119, + -0.24567425, 0.10810049, 0.31202781, 0.15029365, -0.44866164, + 0.0956641, -0.0481157, 0.24811557, -0.18832063, 0.04245667], + [0.01549762, -0.10841756, 0.29727625, -0.39335616, 0.27040749, + -0.20776369, 0.32141654, -0.27802659, 0.13642451, -0.17648692, + 0.18449089, -0.16592692, 0.36208749, -0.40197757, 0.09225227, + 0.06170084, 0.07464316, -0.14404867, 0.07210019, -0.01229315], + [0.0216311, -0.09586963, 0.09305442, 0.17599598, -0.39185062, + 0.10796132, 0.24254513, -0.21991255, 0.2646336, -0.36661166, + 0.04009641, 0.31406494, -0.23194085, 0.02228008, 0.14332905, + -0.32948443, 0.37141779, -0.21893078, 0.06553256, -0.00794184], + [0.01125609, -0.08861691, 0.26711699, -0.35215627, 0.08304963, + 0.29312265, -0.31155358, 0.11212051, -0.02198142, 0.00202485, + -0.04121941, 0.08438469, -0.05096357, 0.20759584, -0.43944241, + 0.19004198, 0.31269486, -0.41733714, 0.19228711, -0.03242449], + [-0.01883737, 0.07467897, -0.05861398, -0.1107113, 0.15443992, + -0.00505078, 0.19761474, -0.60373465, 0.39000866, 0.15994591, + -0.17344901, -0.02705277, -0.17525876, 0.41131052, -0.27773873, + 0.10067809, -0.13446971, 0.16923336, -0.09077617, 0.01778304], + [0.00306016, -0.01955881, 0.01034007, 0.17493436, -0.48093734, + 0.38491632, 0.16019839, -0.24461942, -0.18951878, 0.07325417, + 0.41656003, -0.35216484, 0.0874835, -0.0410465, -0.14441806, + 0.32046984, -0.15171097, -0.0621303, 0.07121258, -0.01632441], + [-0.0025309, 0.01491356, -0.02571007, -0.01631912, 0.07365214, + 0.06062228, -0.30240844, 0.14247689, 0.31275391, -0.40762651, + 0.35492964, -0.45712523, 0.15402126, 0.2955034, -0.06288251, + -0.29563434, 0.07642346, 0.21028462, -0.15953874, 0.0341947], + [0.03302216, -0.19153524, 0.40688843, -0.31933408, -0.05277152, + 0.09866682, 0.15790266, -0.01634339, -0.38131874, 0.32361413, + 0.07763813, -0.1368291, -0.21047218, 0.24730162, 0.21768207, + -0.40471486, 0.05619326, 0.20536906, -0.13904006, 0.02808083], + [0.01330707, -0.07434674, 0.1640741, -0.17601905, 0.09966413, + -0.05496571, 0.00477538, 0.12104608, -0.01966449, -0.38902541, + 0.36282006, 0.288918, -0.4972182, 0.02834714, 0.06560865, + 0.32273737, -0.4027942, 0.15208733, -0.00212404, -0.00722747], + [0.01750219, -0.10485528, 0.23013136, -0.16200192, -0.19656005, + 0.4842784, -0.33731099, -0.08931874, 0.29052167, -0.00818688, + -0.31158999, 0.15565469, 0.18959906, -0.28819379, 0.19323482, + 0.01392894, -0.24431804, 0.27410116, -0.12994371, 0.02332712]]) + ) + test = self.random.random(size=10) np.testing.assert_almost_equal( test, From c5db7c695151c10ca7d3d0417521e9fe0e942bbe Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 19:36:54 +0200 Subject: [PATCH 387/417] Fixed test. --- tests/test_regression.py | 407 +-------------------------------------- 1 file changed, 1 insertion(+), 406 deletions(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index 15c84b687..1d2f92429 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -4,7 +4,6 @@ from scipy.integrate import cumtrapz from skfda.datasets import make_gaussian, make_gaussian_process -from skfda.misc import covariances from skfda.misc.covariances import Gaussian from skfda.misc.operators import LinearDifferentialOperator from skfda.misc.regularization import TikhonovRegularization @@ -337,16 +336,6 @@ def setUp(self) -> None: random_state=self.random, ) - np.testing.assert_almost_equal( - self.intercept.data_matrix[..., 0], - np.array([[ - -0.44419728, -0.56909477, -0.68783434, -0.80186766, -0.91540068, - -1.03397827, -1.16239266, -1.30246822, -1.45134619, -1.60079727, - -1.73785278, -1.84672707, -1.9116814, -1.92023053, -1.86597929, - -1.75042757, -1.58329321, -1.38122881, -1.16517441, -0.95690171, - ]]) - ) - self.X = make_gaussian_process( n_samples=self.n_samples, n_features=self.n_features, @@ -360,400 +349,6 @@ def setUp(self) -> None: random_state=self.random, ) - covariance = covariances._execute_covariance( - Gaussian(length_scale=1), np.linspace(0, 1, self.n_features), np.linspace(0, 1, self.n_features)) - - kk = np.linalg.svd(covariance) - np.testing.assert_almost_equal( - kk[0], - np.array([[-2.07745796e-01, -3.53499224e-01, -4.24704118e-01, - 4.38526581e-01, 4.10421088e-01, -3.54696800e-01, - -2.85306097e-01, 2.14317960e-01, 1.50480799e-01, - -9.72724699e-02, 5.48931421e-02, 2.77218995e-02, - -2.90447444e-03, -8.63720916e-03, 1.47985877e-02, - -6.38392730e-03, 3.19088026e-04, -2.31445976e-03, - -2.35200464e-02, 2.87085300e-02], - [-2.12536586e-01, -3.23842468e-01, -3.02798180e-01, - 1.78544537e-01, -1.05573598e-03, 1.83142307e-01, - 3.23551473e-01, -3.97680953e-01, -4.03394435e-01, - 3.49349496e-01, -2.47364593e-01, -1.62706573e-01, - -2.83079777e-03, 7.75399053e-02, -7.82703189e-02, - 2.34672884e-02, -3.68317337e-03, 3.93668278e-03, - 1.21931948e-01, -1.73155334e-01], - [-2.16887425e-01, -2.91801208e-01, -1.89267880e-01, - -1.80827215e-02, -2.19966326e-01, 3.20250225e-01, - 2.77298225e-01, -1.15530338e-01, 9.53154240e-02, - -2.68609660e-01, 2.98125851e-01, 3.50326598e-01, - 9.00339737e-02, -2.63133363e-01, 1.38529161e-01, - -4.31304302e-02, 2.53294158e-02, 9.78557308e-03, - -2.09207371e-01, 3.93358354e-01], - [-2.20767163e-01, -2.57579395e-01, -8.58072050e-02, - -1.54324281e-01, -2.90281478e-01, 2.30663263e-01, - 1.76438603e-02, 2.10689264e-01, 3.15510651e-01, - -2.38136937e-01, 1.50956053e-01, -2.85577754e-01, - -2.94853489e-01, 3.99842395e-01, -3.37528337e-02, - 1.47721647e-01, -9.97750355e-02, 7.44328024e-04, - 6.67073910e-02, -3.56073311e-01], - [-2.24147659e-01, -2.21408067e-01, 6.02231879e-03, - -2.34681396e-01, -2.55173841e-01, 4.99470070e-02, - -2.02045126e-01, 2.87033345e-01, 1.36398452e-01, - 1.20139691e-01, -3.85416779e-01, -4.40485786e-03, - 4.14894610e-01, -1.60147670e-01, -1.83390205e-01, - -3.86382356e-01, 2.20367250e-01, -1.51073821e-01, - 1.43736598e-01, -2.11892050e-02], - [-2.27004115e-01, -1.83542799e-01, 8.48212284e-02, - -2.65079765e-01, -1.55333564e-01, -1.25459870e-01, - -2.75613302e-01, 1.35724448e-01, -1.49214888e-01, - 2.65575744e-01, -1.58539640e-01, -1.87699105e-02, - -2.55533556e-01, -3.00732826e-01, 2.36617690e-01, - 3.33864157e-01, -2.61895075e-01, 4.00904477e-01, - -1.86367907e-02, 1.78696543e-01], - [-2.29315378e-01, -1.44260722e-01, 1.49378970e-01, - -2.52723472e-01, -2.73898866e-02, -2.36707090e-01, - -2.00638882e-01, -9.19522570e-02, -2.74885183e-01, - 1.51099303e-01, 2.39776405e-01, 3.04597673e-01, - 1.86320951e-02, 3.99089908e-01, -2.39067808e-01, - 2.29630093e-01, 1.24076421e-01, -3.99092262e-01, - -1.65297964e-01, 1.21518710e-01], - [-2.31064204e-01, -1.03857162e-01, 1.98697349e-01, - -2.05914208e-01, 9.75040092e-02, -2.58925295e-01, - -3.59443114e-02, -2.44719467e-01, -1.69553167e-01, - -1.26621797e-01, 3.21190077e-01, -2.94533855e-01, - 7.93326575e-02, -1.31451985e-01, 4.54774940e-01, - -3.94313255e-01, 7.96670732e-02, -2.03962509e-02, - -8.55100650e-02, -2.85599119e-01], - [-2.32237467e-01, -6.26419518e-02, 2.32010013e-01, - -1.33840444e-01, 1.94871188e-01, -1.95773016e-01, - 1.36754789e-01, -2.43059362e-01, 6.03941812e-02, - -3.19348016e-01, -6.13381573e-02, 2.13481419e-01, - -1.50059406e-01, -3.59484931e-02, -4.50925459e-01, - -1.90977474e-01, -2.41653937e-01, 2.62422246e-01, - 4.11692342e-01, 3.13373010e-02], - [-2.32826323e-01, -2.09354746e-02, 2.48797507e-01, - -4.63422021e-02, 2.47852118e-01, -7.24487156e-02, - 2.44283673e-01, -9.97691246e-02, 2.38867772e-01, - -1.53579703e-01, -2.65935064e-01, -3.27866179e-01, - 1.97443808e-01, 4.49207015e-02, -2.95001149e-02, - 4.00764862e-01, 4.16592291e-01, 1.43047831e-01, - -2.36535841e-01, 1.83548419e-01], - [-2.32826323e-01, 2.09354746e-02, 2.48797507e-01, - 4.63422021e-02, 2.47852118e-01, 7.24487156e-02, - 2.44283659e-01, 9.97723947e-02, 2.38948800e-01, - 1.69312090e-01, -2.77706716e-01, 1.90938295e-01, - -4.25421400e-02, -4.20876679e-03, 1.81273932e-01, - -7.63459762e-02, -5.05022021e-01, -3.69985498e-01, - -3.05044017e-01, -1.41953737e-01], - [-2.32237467e-01, 6.26419518e-02, 2.32010013e-01, - 1.33840444e-01, 1.94871188e-01, 1.95773016e-01, - 1.36754789e-01, 2.43058569e-01, 6.06388818e-02, - 3.03285458e-01, 2.41002897e-01, 3.31393398e-02, - -2.80376542e-01, -1.01206815e-01, 1.73106262e-01, - 1.26376371e-02, 3.43527029e-01, -1.76747497e-01, - 5.23487628e-01, 1.38599341e-01], - [-2.31064204e-01, 1.03857162e-01, 1.98697349e-01, - 2.05914208e-01, 9.75040092e-02, 2.58925295e-01, - -3.59443091e-02, 2.44718136e-01, -1.69579812e-01, - 1.20147970e-01, 2.68572454e-01, 1.50156666e-01, - 4.84609644e-01, 2.08189327e-01, -5.76586328e-02, - 2.38638657e-02, -1.13711093e-01, 4.86469999e-01, - -6.43567836e-02, -2.06237772e-01], - [-2.29315378e-01, 1.44260722e-01, 1.49378970e-01, - 2.52723472e-01, -2.73898866e-02, 2.36707090e-01, - -2.00638871e-01, 9.19501311e-02, -2.75145016e-01, - -1.39165267e-01, 6.41684963e-02, -3.14309507e-01, - -2.86193935e-01, -3.30301420e-01, -4.42319843e-01, - -1.13890489e-01, 2.07765480e-03, -1.38702569e-01, - -3.41647725e-01, 3.68680186e-02], - [-2.27004115e-01, 1.83542799e-01, 8.48212284e-02, - 2.65079765e-01, -1.55333564e-01, 1.25459870e-01, - -2.75613309e-01, -1.35725436e-01, -1.49146759e-01, - -2.85417286e-01, -3.99169708e-01, 1.74679517e-01, - -1.71186515e-01, 3.97511534e-01, 3.62671811e-01, - -1.88009057e-01, 1.27850559e-01, 6.76436291e-02, - 3.36667289e-02, 1.82362079e-01], - [-2.24147659e-01, 2.21408067e-01, 6.02231879e-03, - 2.34681396e-01, -2.55173841e-01, -4.99470070e-02, - -2.02045125e-01, -2.87034840e-01, 1.36413298e-01, - -9.30394919e-02, 9.58631470e-02, -1.74628694e-01, - 3.69298879e-01, -1.70606523e-01, 5.39650821e-02, - 3.54541478e-01, -2.97125313e-01, -2.95501871e-01, - 3.42692461e-01, 2.66855373e-02], - [-2.20767163e-01, 2.57579395e-01, -8.58072050e-02, - 1.54324281e-01, -2.90281478e-01, -2.30663263e-01, - 1.76438437e-02, -2.10684387e-01, 3.15670812e-01, - 2.28027102e-01, 2.13532625e-02, 3.33791939e-01, - -1.87199097e-01, -1.81648876e-01, -8.92280091e-02, - 2.27687519e-02, 3.10876464e-01, 1.60642602e-01, - -1.88142728e-01, -4.16752375e-01], - [-2.16887425e-01, 2.91801208e-01, -1.89267880e-01, - 1.80827215e-02, -2.19966326e-01, -3.20250225e-01, - 2.77298249e-01, 1.15532864e-01, 9.51399665e-02, - 2.60388460e-01, 1.34926522e-01, -2.98464216e-01, - -7.93229609e-03, 2.64476169e-01, -7.05477960e-02, - -3.01971583e-01, -1.70197579e-01, 1.06442936e-01, - -7.30550644e-02, 4.41051543e-01], - [-2.12536586e-01, 3.23842468e-01, -3.02798180e-01, - -1.78544537e-01, -1.05573598e-03, -1.83142306e-01, - 3.23551463e-01, 3.97674954e-01, -4.03335658e-01, - -3.41670271e-01, -1.25538235e-01, 1.21419250e-01, - 3.62611830e-02, -1.24987052e-01, 7.74852949e-02, - 1.89687833e-01, 4.78371828e-02, -1.16123068e-01, - 8.72297239e-02, -1.94400231e-01], - [-2.07745796e-01, 3.53499224e-01, -4.24704118e-01, - -4.38526581e-01, 4.10421088e-01, 3.54696800e-01, - -2.85306096e-01, -2.14315901e-01, 1.50476117e-01, - 9.55355874e-02, 3.01805869e-02, -1.89910515e-02, - -8.89460114e-03, 2.14410599e-02, -1.85617415e-02, - -3.75430663e-02, -5.45720255e-03, 2.78969926e-02, - -2.01904240e-02, 3.26267090e-02]]) - ) - np.testing.assert_almost_equal( - kk[1], - np.array([1.83679678e+01, 1.57226237e+00, 5.83837377e-02, 1.36268266e-03, - 2.30991559e-05, 3.04177680e-07, 3.23145016e-09, 2.83381200e-11, - 2.08149707e-13, 1.27735148e-15, 2.50546185e-16, 1.67741842e-16, - 1.40236481e-16, 9.78305508e-17, 6.32294021e-17, 4.19375304e-17, - 3.74066368e-17, 3.12159770e-17, 1.98017174e-17, 1.38376694e-17]) - ) - - np.testing.assert_almost_equal( - kk[2], - np.array([[-0.2077458, -0.21253659, -0.21688742, -0.22076716, -0.22414766, - -0.22700411, -0.22931538, -0.2310642, -0.23223747, -0.23282632, - -0.23282632, -0.23223747, -0.2310642, -0.22931538, -0.22700411, - -0.22414766, -0.22076716, -0.21688742, -0.21253659, -0.2077458], - [-0.35349922, -0.32384247, -0.29180121, -0.25757939, -0.22140807, - -0.1835428, -0.14426072, -0.10385716, -0.06264195, -0.02093547, - 0.02093547, 0.06264195, 0.10385716, 0.14426072, 0.1835428, - 0.22140807, 0.25757939, 0.29180121, 0.32384247, 0.35349922], - [-0.42470412, -0.30279818, -0.18926788, -0.0858072, 0.00602232, - 0.08482123, 0.14937897, 0.19869735, 0.23201001, 0.24879751, - 0.24879751, 0.23201001, 0.19869735, 0.14937897, 0.08482123, - 0.00602232, -0.0858072, -0.18926788, -0.30279818, -0.42470412], - [0.43852658, 0.17854454, -0.01808272, -0.15432428, -0.2346814, - -0.26507976, -0.25272347, -0.20591421, -0.13384044, -0.0463422, - 0.0463422, 0.13384044, 0.20591421, 0.25272347, 0.26507976, - 0.2346814, 0.15432428, 0.01808272, -0.17854454, -0.43852658], - [0.41042109, -0.00105574, -0.21996633, -0.29028148, -0.25517384, - -0.15533356, -0.02738989, 0.09750401, 0.19487119, 0.24785212, - 0.24785212, 0.19487119, 0.09750401, -0.02738989, -0.15533356, - -0.25517384, -0.29028148, -0.21996633, -0.00105574, 0.41042109], - [-0.3546968, 0.18314231, 0.32025023, 0.23066326, 0.04994701, - -0.12545987, -0.23670709, -0.25892529, -0.19577302, -0.07244872, - 0.07244872, 0.19577302, 0.25892529, 0.23670709, 0.12545987, - -0.04994701, -0.23066326, -0.32025023, -0.18314231, 0.3546968], - [-0.2853061, 0.32355148, 0.27729823, 0.01764386, -0.20204514, - -0.2756133, -0.20063892, -0.03594428, 0.13675481, 0.24428368, - 0.24428365, 0.1367548, -0.03594434, -0.20063891, -0.27561327, - -0.20204509, 0.01764384, 0.27729823, 0.32355145, -0.28530609], - [0.21431629, -0.39767568, -0.11553366, 0.21068629, 0.2870346, - 0.13572467, -0.09194787, -0.24472108, -0.2430603, -0.09977042, - 0.09977179, 0.24305657, 0.24472172, 0.09195302, -0.13572777, - -0.28703635, -0.21068478, 0.11553353, 0.39767582, -0.21431641], - [0.15050979, -0.40352422, 0.09550264, 0.3154507, 0.13640236, - -0.14941301, -0.27463812, -0.16962604, 0.06040535, 0.23887409, - 0.23899712, 0.06040722, -0.16950586, -0.27484944, -0.14938464, - 0.13641897, 0.31563022, 0.09525851, -0.40340336, 0.15048796], - [-0.09823207, 0.35503381, -0.28129892, -0.22469036, 0.10896079, - 0.28518116, 0.12260137, -0.10726981, -0.32147545, -0.15808811, - 0.16075458, 0.32795726, 0.11424999, -0.16857865, -0.25750771, - -0.10087266, 0.23727235, 0.24460211, -0.3322095, 0.09360981], - [0.05032393, -0.23010173, 0.29017025, 0.09760863, -0.28077051, - -0.24398518, 0.30061428, 0.26489603, -0.06452987, -0.2101119, - -0.24567425, 0.10810049, 0.31202781, 0.15029365, -0.44866164, - 0.0956641, -0.0481157, 0.24811557, -0.18832063, 0.04245667], - [0.01549762, -0.10841756, 0.29727625, -0.39335616, 0.27040749, - -0.20776369, 0.32141654, -0.27802659, 0.13642451, -0.17648692, - 0.18449089, -0.16592692, 0.36208749, -0.40197757, 0.09225227, - 0.06170084, 0.07464316, -0.14404867, 0.07210019, -0.01229315], - [0.0216311, -0.09586963, 0.09305442, 0.17599598, -0.39185062, - 0.10796132, 0.24254513, -0.21991255, 0.2646336, -0.36661166, - 0.04009641, 0.31406494, -0.23194085, 0.02228008, 0.14332905, - -0.32948443, 0.37141779, -0.21893078, 0.06553256, -0.00794184], - [0.01125609, -0.08861691, 0.26711699, -0.35215627, 0.08304963, - 0.29312265, -0.31155358, 0.11212051, -0.02198142, 0.00202485, - -0.04121941, 0.08438469, -0.05096357, 0.20759584, -0.43944241, - 0.19004198, 0.31269486, -0.41733714, 0.19228711, -0.03242449], - [-0.01883737, 0.07467897, -0.05861398, -0.1107113, 0.15443992, - -0.00505078, 0.19761474, -0.60373465, 0.39000866, 0.15994591, - -0.17344901, -0.02705277, -0.17525876, 0.41131052, -0.27773873, - 0.10067809, -0.13446971, 0.16923336, -0.09077617, 0.01778304], - [0.00306016, -0.01955881, 0.01034007, 0.17493436, -0.48093734, - 0.38491632, 0.16019839, -0.24461942, -0.18951878, 0.07325417, - 0.41656003, -0.35216484, 0.0874835, -0.0410465, -0.14441806, - 0.32046984, -0.15171097, -0.0621303, 0.07121258, -0.01632441], - [-0.0025309, 0.01491356, -0.02571007, -0.01631912, 0.07365214, - 0.06062228, -0.30240844, 0.14247689, 0.31275391, -0.40762651, - 0.35492964, -0.45712523, 0.15402126, 0.2955034, -0.06288251, - -0.29563434, 0.07642346, 0.21028462, -0.15953874, 0.0341947], - [0.03302216, -0.19153524, 0.40688843, -0.31933408, -0.05277152, - 0.09866682, 0.15790266, -0.01634339, -0.38131874, 0.32361413, - 0.07763813, -0.1368291, -0.21047218, 0.24730162, 0.21768207, - -0.40471486, 0.05619326, 0.20536906, -0.13904006, 0.02808083], - [0.01330707, -0.07434674, 0.1640741, -0.17601905, 0.09966413, - -0.05496571, 0.00477538, 0.12104608, -0.01966449, -0.38902541, - 0.36282006, 0.288918, -0.4972182, 0.02834714, 0.06560865, - 0.32273737, -0.4027942, 0.15208733, -0.00212404, -0.00722747], - [0.01750219, -0.10485528, 0.23013136, -0.16200192, -0.19656005, - 0.4842784, -0.33731099, -0.08931874, 0.29052167, -0.00818688, - -0.31158999, 0.15565469, 0.18959906, -0.28819379, 0.19323482, - 0.01392894, -0.24431804, 0.27410116, -0.12994371, 0.02332712]]) - ) - - test = self.random.random(size=10) - np.testing.assert_almost_equal( - test, - np.array([0.39564827, 0.77171848, 0.25120318, 0.65576077, - 0.96039715, 0.00355476, - 0.72336104, 0.10899503, 0.54378749, 0.62691602]) - ) - - np.testing.assert_almost_equal( - self.coefficients.data_matrix[0, ..., 0], - np.array([ - [4.93663563e-01, 4.78010146e-01, 4.63788522e-01, - 4.50928237e-01, 4.39314629e-01, 4.28798261e-01, - 4.19192312e-01, 4.10280148e-01, 4.01813089e-01, - 3.93515764e-01, 3.85089077e-01, 3.76209900e-01, - 3.66535004e-01, 3.55705856e-01, 3.43348716e-01, - 3.29083247e-01, 3.12524641e-01, 2.93292495e-01, - 2.71019709e-01, 2.45359953e-01], - [5.31535010e-01, 5.17720140e-01, 5.04907227e-01, - 4.93003500e-01, 4.81882668e-01, 4.71390399e-01, - 4.61342759e-01, 4.51534164e-01, 4.41733815e-01, - 4.31691405e-01, 4.21136776e-01, 4.09782265e-01, - 3.97324811e-01, 3.83448921e-01, 3.67826456e-01, - 3.50123751e-01, 3.30004632e-01, 3.07136237e-01, - 2.81197769e-01, 2.51887492e-01], - [5.61268157e-01, 5.49413977e-01, 5.38156769e-01, - 5.27383042e-01, 5.16954232e-01, 5.06709032e-01, - 4.96466435e-01, 4.86028192e-01, 4.75180833e-01, - 4.63695845e-01, 4.51331706e-01, 4.37835185e-01, - 4.22940282e-01, 4.06372937e-01, 3.87849145e-01, - 3.67081529e-01, 3.43779743e-01, 3.17658698e-01, - 2.88441954e-01, 2.55873009e-01], - [5.82214282e-01, 5.72443820e-01, 5.62899224e-01, - 5.53445283e-01, 5.43927740e-01, 5.34178785e-01, - 5.24016376e-01, 5.13250432e-01, 5.01678997e-01, - 4.89095545e-01, 4.75283435e-01, 4.60020810e-01, - 4.43078173e-01, 4.24219504e-01, 4.03203927e-01, - 3.79785535e-01, 3.53720408e-01, 3.24767458e-01, - 2.92695286e-01, 2.57289404e-01], - [5.93890073e-01, 5.86327635e-01, 5.78658665e-01, - 5.70723768e-01, 5.62353524e-01, 5.53368146e-01, - 5.43585005e-01, 5.32816681e-01, 5.20874443e-01, - 5.07566358e-01, 4.92700607e-01, 4.76083195e-01, - 4.57516183e-01, 4.36799910e-01, 4.13732435e-01, - 3.88109758e-01, 3.59729121e-01, 3.28392005e-01, - 2.93909255e-01, 2.56107758e-01], - [5.95979512e-01, 5.90749572e-01, 5.85121442e-01, - 5.78911820e-01, 5.71931019e-01, 5.63990198e-01, - 5.54899635e-01, 5.44473706e-01, 5.32530642e-01, - 5.18894359e-01, 5.03391058e-01, 4.85850840e-01, - 4.66105368e-01, 4.43987537e-01, 4.19330529e-01, - 3.91967867e-01, 3.61737266e-01, 3.28478056e-01, - 2.92041731e-01, 2.52293795e-01], - [5.88331159e-01, 5.85553166e-01, 5.82129597e-01, - 5.77850567e-01, 5.72507449e-01, 5.65895238e-01, - 5.57817817e-01, 5.48087161e-01, 5.36525668e-01, - 5.22967184e-01, 5.07253137e-01, 4.89235017e-01, - 4.68768294e-01, 4.45715658e-01, 4.19940279e-01, - 3.91310913e-01, 3.59699908e-01, 3.24984279e-01, - 2.87052173e-01, 2.45804580e-01], - [5.70945355e-01, 5.70732608e-01, 5.69672008e-01, - 5.67526575e-01, 5.64064529e-01, 5.59064647e-01, - 5.52320762e-01, 5.43637511e-01, 5.32839843e-01, - 5.19766631e-01, 5.04271253e-01, 4.86220453e-01, - 4.65490596e-01, 4.41967813e-01, 4.15543684e-01, - 3.86116938e-01, 3.53590594e-01, 3.17876843e-01, - 2.78895404e-01, 2.36583668e-01], - [5.43965927e-01, 5.46421980e-01, 5.47873568e-01, - 5.48054218e-01, 5.46709103e-01, 5.43597425e-01, - 5.38497473e-01, 5.31207983e-01, 5.21548846e-01, - 5.09360115e-01, 4.94503082e-01, 4.76855471e-01, - 4.56310745e-01, 4.32772489e-01, 4.06157107e-01, - 3.76387702e-01, 3.43396592e-01, 3.07124177e-01, - 2.67522260e-01, 2.24557489e-01], - [5.07663154e-01, 5.12878825e-01, 5.16978529e-01, - 5.19665633e-01, 5.20659115e-01, 5.19696420e-01, - 5.16538243e-01, 5.10971363e-01, 5.02808400e-01, - 4.91887178e-01, 4.78071524e-01, 4.61245444e-01, - 4.41312805e-01, 4.18194086e-01, 3.91822409e-01, - 3.62143509e-01, 3.29112722e-01, 2.92696484e-01, - 2.52874094e-01, 2.09640776e-01], - [4.62417185e-01, 4.70469373e-01, 4.77336768e-01, - 4.82693233e-01, 4.86227716e-01, 4.87654065e-01, - 4.86713876e-01, 4.83176946e-01, 4.76844995e-01, - 4.67549900e-01, 4.55151684e-01, 4.39538412e-01, - 4.20619758e-01, 3.98325863e-01, 3.72603904e-01, - 3.43417253e-01, 3.10739459e-01, 2.74560952e-01, - 2.34883811e-01, 1.91729340e-01], - [4.08701273e-01, 4.19648971e-01, 4.29385830e-01, - 4.37551868e-01, 4.43806630e-01, 4.47838242e-01, - 4.49364099e-01, 4.48135656e-01, 4.43940027e-01, - 4.36597506e-01, 4.25962501e-01, 4.11919873e-01, - 3.94380971e-01, 3.73282426e-01, 3.48580043e-01, - 3.20249019e-01, 2.88280822e-01, 2.52682487e-01, - 2.13477046e-01, 1.70708405e-01], - [3.47064514e-01, 3.60948529e-01, 3.73633162e-01, - 3.84724533e-01, 3.93852096e-01, 4.00674175e-01, - 4.04883237e-01, 4.06209018e-01, 4.04418871e-01, - 3.99319584e-01, 3.90754458e-01, 3.78600793e-01, - 3.62768890e-01, 3.43194737e-01, 3.19839536e-01, - 2.92687753e-01, 2.61741537e-01, 2.27022588e-01, - 1.88571803e-01, 1.46452555e-01], - [2.78115147e-01, 2.94954322e-01, 3.10640282e-01, - 3.24745523e-01, 3.36866989e-01, 3.46632733e-01, - 3.53706943e-01, 3.57793827e-01, 3.58640294e-01, - 3.56033094e-01, 3.49802162e-01, 3.39813697e-01, - 3.25970385e-01, 3.08204815e-01, 2.86481038e-01, - 2.60784909e-01, 2.31127858e-01, 1.97542609e-01, - 1.60085349e-01, 1.18834923e-01], - [2.02505870e-01, 2.22295616e-01, 2.41010390e-01, - 2.58188053e-01, 2.73391887e-01, 2.86217460e-01, - 2.96299516e-01, 3.03314572e-01, 3.06984085e-01, - 3.07075180e-01, 3.03397257e-01, 2.95802774e-01, - 2.84183751e-01, 2.68466045e-01, 2.48607812e-01, - 2.24598277e-01, 1.96451275e-01, 1.64208453e-01, - 1.27936293e-01, 8.77289872e-02], - [1.20919279e-01, 1.43631393e-01, 1.65374150e-01, - 1.85650125e-01, 2.03989120e-01, 2.19952959e-01, - 2.33145175e-01, 2.43211654e-01, 2.49848567e-01, - 2.52796354e-01, 2.51843737e-01, 2.46825811e-01, - 2.37618011e-01, 2.24136868e-01, 2.06333636e-01, - 1.84192432e-01, 1.57730629e-01, 1.26992827e-01, - 9.20536943e-02, 5.30199705e-02], - [3.40581354e-02, 5.96384941e-02, 8.43785197e-02, - 1.07746837e-01, 1.29237742e-01, 1.48379055e-01, - 1.64741413e-01, 1.77940510e-01, 1.87641263e-01, - 1.93558170e-01, 1.95456087e-01, 1.93147244e-01, - 1.86491192e-01, 1.75388591e-01, 1.59780096e-01, - 1.39645886e-01, 1.14997927e-01, 8.58838546e-02, - 5.23841084e-02, 1.46146099e-02], - [-5.73636876e-02, -2.89952105e-02, -1.31777107e-03, - 2.51021740e-02, 4.97245150e-02, 7.20431501e-02, - 9.15945992e-02, 1.07961645e-01, 1.20778708e-01, - 1.29730922e-01, 1.34556570e-01, 1.35044553e-01, - 1.31031788e-01, 1.22403703e-01, 1.09086975e-01, - 9.10515217e-02, 6.83044338e-02, 4.08921161e-02, - 8.89870469e-03, -2.75528458e-02], - [-1.52626004e-01, -1.21575121e-01, -9.10514034e-02, - -6.16531260e-02, -3.39560582e-02, -8.50001123e-03, - 1.42172550e-02, 3.37460153e-02, 4.96871596e-02, - 6.16958027e-02, 6.94813559e-02, 7.28063083e-02, - 7.14858605e-02, 6.53839428e-02, 5.44120992e-02, - 3.85273779e-02, 1.77287890e-02, -7.93995107e-03, - -3.83950945e-02, -7.35085564e-02], - [-2.51007663e-01, -2.17405703e-01, -1.84154945e-01, - -1.51885874e-01, -1.21206316e-01, -9.26905561e-02, - -6.68699310e-02, -4.42281214e-02, -2.51976265e-02, - -1.01544938e-02, 5.80976532e-04, 6.74184827e-03, - 8.11965928e-03, 4.55584966e-03, -4.05706483e-03, - -1.77762916e-02, -3.66129300e-02, -6.05304857e-02, - -8.94449365e-02, -1.23227367e-01], - ]), - ) - self.create_model() def create_model(self) -> None: @@ -791,7 +386,7 @@ def test_historical(self) -> None: np.testing.assert_allclose( regression.coef_.data_matrix[0, ..., 0], np.triu(self.coefficients.data_matrix[0, ..., 0]), - atol=0.2, + atol=0.3, rtol=0, ) From 3190be9aa6436745f29019585cc4113c6b52d12e Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 21:14:09 +0200 Subject: [PATCH 388/417] Center in predict. --- .../ml/regression/_historical_linear_model.py | 32 ++++++++++++++++--- tests/test_regression.py | 5 ++- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index 77e70d9b9..c99b3e354 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import Tuple +from typing import Tuple, Union import numpy as np import scipy.integrate @@ -320,10 +320,28 @@ def __init__( self.fit_intercept = fit_intercept self.lag = lag + def _center_X_y( + self, + X: FDataGrid, + y: FDataGrid, + ) -> Tuple[FDataGrid, FDataGrid, FDataGrid, FDataGrid]: + + X_mean: Union[FDataGrid, float] = ( + X.mean() if self.fit_intercept else 0 + ) + X_centered = X - X_mean + y_mean: Union[FDataGrid, float] = ( + y.mean() if self.fit_intercept else 0 + ) + y_centered = y - y_mean + + return X_centered, y_centered, X_mean, y_mean + def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: - X_centered = X - X.mean() if self.fit_intercept else X - y_centered = y - y.mean() if self.fit_intercept else y + X_centered, y_centered, X_mean, y_mean = self._center_X_y(X, y) + + self._X_mean = X_mean self._pred_points = y_centered.grid_points[0] self._pred_domain_range = y_centered.domain_range[0] @@ -358,7 +376,9 @@ def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: ) if self.fit_intercept: - self.intercept_ = y.mean() - self._predict_no_intercept(X.mean()) + self.intercept_ = ( + y_mean - self._predict_no_intercept(X_mean) + ) else: self.intercept_ = y.copy( data_matrix=np.zeros_like(y.data_matrix[0]), @@ -410,6 +430,8 @@ def _predict_no_intercept(self, X: FDataGrid) -> FDataGrid: def predict(self, X: FDataGrid) -> FDataGrid: # noqa: D102 + X_centered = X - self._X_mean + check_is_fitted(self) - return self._predict_no_intercept(X) + self.intercept_ + return self._predict_no_intercept(X_centered) + self.intercept_ diff --git a/tests/test_regression.py b/tests/test_regression.py index 1d2f92429..967434aa8 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -373,14 +373,13 @@ def test_historical(self) -> None: np.testing.assert_allclose( regression.predict(self.X).data_matrix, self.y.data_matrix, - atol=1e-1, - rtol=0, + rtol=1e-1, ) np.testing.assert_allclose( regression.intercept_.data_matrix, self.intercept.data_matrix, - rtol=1e-2, + rtol=1e-4, ) np.testing.assert_allclose( From 6d4da3d02bd3fde2042e6b3ab3dbf27274e7e022 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 21:54:11 +0200 Subject: [PATCH 389/417] Fix historical linear model tests. --- .../ml/regression/_historical_linear_model.py | 28 +++++++++++-------- tests/test_regression.py | 11 ++++++-- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index c99b3e354..2a3d4dbed 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -12,6 +12,8 @@ from ...representation import FDataBasis, FDataGrid from ...representation.basis import Basis, FiniteElement +_MeanType = Union[FDataGrid, float] + def _pairwise_fem_inner_product( basis_fd: FDataBasis, @@ -324,7 +326,7 @@ def _center_X_y( self, X: FDataGrid, y: FDataGrid, - ) -> Tuple[FDataGrid, FDataGrid, FDataGrid, FDataGrid]: + ) -> Tuple[FDataGrid, FDataGrid, _MeanType, _MeanType]: X_mean: Union[FDataGrid, float] = ( X.mean() if self.fit_intercept else 0 @@ -337,12 +339,14 @@ def _center_X_y( return X_centered, y_centered, X_mean, y_mean - def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: + def _fit_and_return_centered_matrix( + self, + X: FDataGrid, + y: FDataGrid, + ) -> Tuple[np.ndarray, _MeanType]: X_centered, y_centered, X_mean, y_mean = self._center_X_y(X, y) - self._X_mean = X_mean - self._pred_points = y_centered.grid_points[0] self._pred_domain_range = y_centered.domain_range[0] @@ -376,6 +380,7 @@ def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: ) if self.fit_intercept: + assert isinstance(X_mean, FDataGrid) self.intercept_ = ( y_mean - self._predict_no_intercept(X_mean) ) @@ -384,7 +389,7 @@ def _fit_and_return_matrix(self, X: FDataGrid, y: FDataGrid) -> np.ndarray: data_matrix=np.zeros_like(y.data_matrix[0]), ) - return design_matrix + return design_matrix, y_mean def _prediction_from_matrix(self, design_matrix: np.ndarray) -> FDataGrid: @@ -405,7 +410,7 @@ def fit( # noqa: D102 y: FDataGrid, ) -> HistoricalLinearRegression: - self._fit_and_return_matrix(X, y) + self._fit_and_return_centered_matrix(X, y) return self def fit_predict( # noqa: D102 @@ -414,8 +419,11 @@ def fit_predict( # noqa: D102 y: FDataGrid, ) -> FDataGrid: - design_matrix = self._fit_and_return_matrix(X, y) - return self._prediction_from_matrix(design_matrix) + self.intercept_ + design_matrix, y_mean = self._fit_and_return_centered_matrix(X, y) + return ( + self._prediction_from_matrix(design_matrix) + + y_mean + ) def _predict_no_intercept(self, X: FDataGrid) -> FDataGrid: @@ -430,8 +438,6 @@ def _predict_no_intercept(self, X: FDataGrid) -> FDataGrid: def predict(self, X: FDataGrid) -> FDataGrid: # noqa: D102 - X_centered = X - self._X_mean - check_is_fitted(self) - return self._predict_no_intercept(X_centered) + self.intercept_ + return self._predict_no_intercept(X) + self.intercept_ diff --git a/tests/test_regression.py b/tests/test_regression.py index 967434aa8..e36c268e8 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -369,9 +369,16 @@ def create_model(self) -> None: def test_historical(self) -> None: """Test historical regression with data following the model.""" regression = HistoricalLinearRegression(n_intervals=6) - regression.fit(self.X, self.y) + fit_predict_result = regression.fit_predict(self.X, self.y) + predict_result = regression.predict(self.X) + + np.testing.assert_allclose( + predict_result.data_matrix, + fit_predict_result.data_matrix, + ) + np.testing.assert_allclose( - regression.predict(self.X).data_matrix, + predict_result.data_matrix, self.y.data_matrix, rtol=1e-1, ) From 95098ddbb7d8c43c432fb389357a208311bb5dfa Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 15 Aug 2021 23:39:21 +0200 Subject: [PATCH 390/417] Fix too small tolerancy in test. --- tests/test_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index e36c268e8..94a4b0e45 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -386,7 +386,7 @@ def test_historical(self) -> None: np.testing.assert_allclose( regression.intercept_.data_matrix, self.intercept.data_matrix, - rtol=1e-4, + rtol=1e-3, ) np.testing.assert_allclose( From 1198e56cabe76d7d99c6459a1a77667adfc5f3c0 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 16 Aug 2021 01:48:57 +0200 Subject: [PATCH 391/417] First version of vectorial model. --- .../ml/regression/_historical_linear_model.py | 18 ++--- tests/test_regression.py | 79 +++++++++++++++++-- 2 files changed, 83 insertions(+), 14 deletions(-) diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index 2a3d4dbed..4f1745577 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -10,7 +10,7 @@ from ..._utils import _cartesian_product, _pairwise_symmetric from ...representation import FDataBasis, FDataGrid -from ...representation.basis import Basis, FiniteElement +from ...representation.basis import Basis, FiniteElement, VectorValued _MeanType = Union[FDataGrid, float] @@ -36,13 +36,9 @@ def _pairwise_fem_inner_product( eval_fem = basis_fd(eval_grid_fem) eval_fd = fd(grid) - # Only for scalar valued functions for now - assert eval_fem.shape[-1] == 1 - assert eval_fd.shape[-1] == 1 - - prod = eval_fem[..., 0] * eval_fd[..., 0] - - return scipy.integrate.simps(prod, grid, axis=1) + prod = eval_fem * eval_fd + integral = scipy.integrate.simps(prod, grid, axis=1) + return np.sum(integral, axis=-1) def _inner_product_matrix( @@ -350,13 +346,17 @@ def _fit_and_return_centered_matrix( self._pred_points = y_centered.grid_points[0] self._pred_domain_range = y_centered.domain_range[0] - self._basis = _create_fem_basis( + fem_basis = _create_fem_basis( start=X_centered.domain_range[0][0], stop=X_centered.domain_range[0][1], n_intervals=self.n_intervals, lag=self.lag, ) + self._basis = VectorValued( + [fem_basis] * X_centered.dim_codomain + ) + design_matrix = _design_matrix( self._basis, X_centered, diff --git a/tests/test_regression.py b/tests/test_regression.py index 94a4b0e45..9b17891b8 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -342,6 +342,7 @@ def setUp(self) -> None: cov=Gaussian(length_scale=0.4), random_state=self.random, ) + self.coefficients = make_gaussian( n_samples=1, grid_points=[np.linspace(0, 1, self.n_features)] * 2, @@ -349,22 +350,61 @@ def setUp(self) -> None: random_state=self.random, ) + self.X2 = make_gaussian_process( + n_samples=self.n_samples, + n_features=self.n_features, + cov=Gaussian(length_scale=0.4), + random_state=self.random, + ) + + self.coefficients2 = make_gaussian( + n_samples=1, + grid_points=[np.linspace(0, 1, self.n_features)] * 2, + cov=Gaussian(length_scale=1), + random_state=self.random, + ) + self.create_model() + self.create_vectorial_model() - def create_model(self) -> None: + def create_model_no_intercept( + self, + X: FDataGrid, + coefficients: FDataGrid, + ) -> FDataGrid: """Create a functional response according to historical model.""" integral_body = ( - self.X.data_matrix[..., 0, np.newaxis] - * self.coefficients.data_matrix[..., 0] + X.data_matrix[..., 0, np.newaxis] + * coefficients.data_matrix[..., 0] ) integral_matrix = cumtrapz( integral_body, - x=self.X.grid_points[0], + x=X.grid_points[0], initial=0, axis=1, ) integral = np.diagonal(integral_matrix, axis1=1, axis2=2) - self.y = FDataGrid(self.intercept.data_matrix[..., 0] + integral) + return X.copy(data_matrix=integral) + + def create_model(self) -> None: + """Create a functional response according to historical model.""" + model_no_intercept = self.create_model_no_intercept( + X=self.X, + coefficients=self.coefficients, + ) + self.y = model_no_intercept + self.intercept + + def create_vectorial_model(self) -> None: + """Create a functional response according to historical model.""" + model_no_intercept = self.create_model_no_intercept( + X=self.X, + coefficients=self.coefficients, + ) + model_no_intercept2 = self.create_model_no_intercept( + X=self.X2, + coefficients=self.coefficients2, + ) + self.y2 = model_no_intercept + model_no_intercept2 + self.intercept def test_historical(self) -> None: """Test historical regression with data following the model.""" @@ -396,6 +436,35 @@ def test_historical(self) -> None: rtol=0, ) + def test_historical_vectorial(self) -> None: + """Test historical regression with data following the vector model.""" + X = self.X.concatenate(self.X2, as_coordinates=True) + + regression = HistoricalLinearRegression(n_intervals=10) + fit_predict_result = regression.fit_predict(X, self.y2) + predict_result = regression.predict(X) + + np.testing.assert_allclose( + predict_result.data_matrix, + fit_predict_result.data_matrix, + ) + + np.testing.assert_allclose( + predict_result.data_matrix, + self.y2.data_matrix, + atol=1e-1, + rtol=0, + ) + + np.testing.assert_allclose( + regression.intercept_.data_matrix, + self.intercept.data_matrix, + rtol=1e-2, + ) + + # Coefficient matrix not tested as it is probably + # an ill-posed problem + if __name__ == '__main__': print() From fc0129138e33817cd052309b1864c5c86dae7038 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 16 Aug 2021 02:06:06 +0200 Subject: [PATCH 392/417] Update docs. --- skfda/ml/regression/_historical_linear_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index 4f1745577..b6d81d2e8 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -244,8 +244,8 @@ class HistoricalLinearRegression( influence the prediction. Attributes: - discretized_coef\_: The discretized values of the fitted - coefficient function. + basis_coef\_: The fitted coefficient function as a FDataBasis. + coef\_: The fitted coefficient function as a FDataGrid. intercept\_: Independent term in the linear model. Set to the constant function 0 if `fit_intercept = False`. From 85322de5986b520d85e7d2132c97918b95a06aa4 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 23 Aug 2021 03:50:24 +0200 Subject: [PATCH 393/417] Improve typing in cluster visualization. --- skfda/exploratory/visualization/_baseplot.py | 6 +- skfda/exploratory/visualization/_utils.py | 11 +- skfda/exploratory/visualization/clustering.py | 640 +++++++++--------- 3 files changed, 325 insertions(+), 332 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 5741afef1..42a384c3f 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -13,7 +13,7 @@ from matplotlib.axes import Axes from matplotlib.figure import Figure -from ._utils import _figure_to_svg +from ._utils import _figure_to_svg, _get_figure_and_axes, _set_figure_layout class BasePlot(ABC): @@ -31,10 +31,14 @@ class BasePlot(ABC): @abstractmethod def __init__( self, + chart: Union[Figure, Axes, None] = None, + *, fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, ) -> None: self.artists: np.ndarray + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout(fig, axes) self.fig = fig self.axes = axes diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 40f7c3fe3..807d57eac 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -41,6 +41,7 @@ def __getitem__(self, __key: K) -> V: def __len__(self) -> int: pass + def _create_figure(): """Create figure using the default backend.""" fig = plt.figure() @@ -94,7 +95,7 @@ def _get_figure_and_axes( axes = fig.axes else: - if isinstance(axes, matplotlib.axes.Axes): + if isinstance(axes, Axes): axes = [axes] fig = axes[0].figure @@ -277,9 +278,9 @@ def _set_labels( axes[i].set_ylabel(fdata.coordinate_names[i]) -def _change_luminosity(color, amount=0.5): +def _change_luminosity(color: ColorLike, amount: float = 0.5) -> ColorLike: """ - Changes the given color luminosity by the given amount. + Change the given color luminosity by the given amount. Input can be matplotlib color string, hex string, or RGB tuple. Note: @@ -307,9 +308,9 @@ def _change_luminosity(color, amount=0.5): return colorsys.hls_to_rgb(c[0], new_lightness, c[2]) -def _darken(color, amount=0): +def _darken(color: ColorLike, amount: float = 0) -> ColorLike: return _change_luminosity(color, 0.5 - amount / 2) -def _lighten(color, amount=0): +def _lighten(color: ColorLike, amount: float = 0) -> ColorLike: return _change_luminosity(color, 0.5 + amount / 2) diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index f94fe24cb..401dd0044 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -1,7 +1,8 @@ """Clustering Plots Module.""" -from typing import Optional, Sequence, Union +from typing import Optional, Sequence, Tuple, Union +import matplotlib import matplotlib.patches as mpatches import matplotlib.pyplot as plt import numpy as np @@ -9,12 +10,15 @@ from matplotlib.figure import Figure from matplotlib.ticker import MaxNLocator from mpldatacursor import datacursor +from sklearn.base import BaseEstimator from sklearn.exceptions import NotFittedError from sklearn.utils.validation import check_is_fitted from ...ml.clustering import FuzzyCMeans +from ...representation import FData, FDataGrid from ._baseplot import BasePlot from ._utils import ( + ColorLike, _darken, _get_figure_and_axes, _set_figure_layout, @@ -22,83 +26,96 @@ _set_labels, ) -__author__ = "Amanda Hernando Bernabé" -__email__ = "amanda.hernando@estudiante.uam.es" - -def _plot_clustering_checks(estimator, fdata, sample_colors, sample_labels, - cluster_colors, cluster_labels, - center_colors, center_labels): - """Checks the arguments *sample_colors*, *sample_labels*, *cluster_colors*, - *cluster_labels*, *center_colors*, *center_labels*, passed to the plot - functions, have the correct dimensions. - - Args: - estimator (BaseEstimator object): estimator used to calculate the - clusters. - fdata (FData object): contains the samples which are grouped - into different clusters. - sample_colors (list of colors): contains in order the colors of each - sample of the fdatagrid. - sample_labels (list of str): contains in order the labels of each - sample of the fdatagrid. - cluster_colors (list of colors): contains in order the colors of each - cluster the samples of the fdatagrid are classified into. - cluster_labels (list of str): contains in order the names of each - cluster the samples of the fdatagrid are classified into. - center_colors (list of colors): contains in order the colors of each - centroid of the clusters the samples of the fdatagrid are - classified into. - center_labels list of colors): contains in order the labels of each - centroid of the clusters the samples of the fdatagrid are - classified into. - - """ - - if sample_colors is not None and len( - sample_colors) != fdata.n_samples: +def _plot_clustering_checks( + estimator: BaseEstimator, + fdata: FData, + sample_colors: Optional[Sequence[ColorLike]], + sample_labels: Optional[Sequence[str]], + cluster_colors: Optional[Sequence[ColorLike]], + cluster_labels: Optional[Sequence[str]], + center_colors: Optional[Sequence[ColorLike]], + center_labels: Optional[Sequence[str]], +) -> None: + """Check the arguments.""" + if ( + sample_colors is not None + and len(sample_colors) != fdata.n_samples + ): raise ValueError( - "sample_colors must contain a color for each sample.") + "sample_colors must contain a color for each sample.", + ) - if sample_labels is not None and len( - sample_labels) != fdata.n_samples: + if ( + sample_labels is not None + and len(sample_labels) != fdata.n_samples + ): raise ValueError( - "sample_labels must contain a label for each sample.") + "sample_labels must contain a label for each sample.", + ) - if cluster_colors is not None and len( - cluster_colors) != estimator.n_clusters: + if ( + cluster_colors is not None + and len(cluster_colors) != estimator.n_clusters + ): raise ValueError( - "cluster_colors must contain a color for each cluster.") + "cluster_colors must contain a color for each cluster.", + ) - if cluster_labels is not None and len( - cluster_labels) != estimator.n_clusters: + if ( + cluster_labels is not None + and len(cluster_labels) != estimator.n_clusters + ): raise ValueError( - "cluster_labels must contain a label for each cluster.") + "cluster_labels must contain a label for each cluster.", + ) - if center_colors is not None and len( - center_colors) != estimator.n_clusters: + if ( + center_colors is not None + and len(center_colors) != estimator.n_clusters + ): raise ValueError( - "center_colors must contain a color for each center.") + "center_colors must contain a color for each center.", + ) - if center_labels is not None and len( - center_labels) != estimator.n_clusters: + if ( + center_labels is not None + and len(center_labels) != estimator.n_clusters + ): raise ValueError( - "centers_labels must contain a label for each center.") + "centers_labels must contain a label for each center.", + ) + + +def _check_if_estimator( + estimator: BaseEstimator, +) -> None: + """ + Check if the argument is an estimator. -def _check_if_estimator(estimator): - """Checks the argument *estimator* is actually an estimator that + Checks the argument *estimator* is actually an estimator that implements the *fit* method. Args: - estimator (BaseEstimator object): estimator used to calculate the + estimator: estimator used to calculate the clusters. + """ msg = ("This %(name)s instance has no attribute \"fit\".") if not hasattr(estimator, "fit"): raise AttributeError(msg % {'name': type(estimator).__name__}) -def _get_labels(x_label, y_label, title, xlabel_str): - """Sets the arguments *xlabel*, *ylabel*, *title* passed to the plot + +def _get_labels( + x_label: Optional[str], + y_label: Optional[str], + title: Optional[str], + xlabel_str: str, +) -> Tuple[str, str, str]: + """ + Get the axes labels. + + Set the arguments *xlabel*, *ylabel*, *title* passed to the plot functions :func:`plot_cluster_lines ` and :func:`plot_cluster_bars @@ -106,17 +123,17 @@ def _get_labels(x_label, y_label, title, xlabel_str): in case they are not set yet. Args: - xlabel (lstr): Label for the x-axes. - ylabel (str): Label for the y-axes. - title (str): Title for the figure where the clustering results are + x_label: Label for the x-axes. + y_label: Label for the y-axes. + title: Title for the figure where the clustering results are ploted. - xlabel_str (str): In case xlabel is None, string to use for the labels + xlabel_str: In case xlabel is None, string to use for the labels in the x-axes. Returns: - xlabel (str): Labels for the x-axes. - ylabel (str): Labels for the y-axes. - title (str): Title for the figure where the clustering results are + xlabel: Labels for the x-axes. + ylabel: Labels for the y-axes. + title: Title for the figure where the clustering results are plotted. """ if x_label is None: @@ -130,188 +147,151 @@ def _get_labels(x_label, y_label, title, xlabel_str): return x_label, y_label, title + class ClusterPlot(BasePlot): """ ClusterPlot class. Args: - estimator (BaseEstimator object): estimator used to calculate the + estimator: estimator used to calculate the clusters. - X (FDataGrd object): contains the samples which are grouped + X: contains the samples which are grouped into different clusters. - fig (figure object): figure over which the graphs are plotted in + fig: figure over which the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (list of axis objects): axis over where the graphs are plotted. + axes: axis over where the graphs are plotted. If None, see param fig. - n_rows (int): designates the number of rows of the figure to plot the + n_rows: designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols (int): designates the number of columns of the figure to plot + n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - sample_labels (list of str): contains in order the labels of each + sample_labels: contains in order the labels of each sample of the fdatagrid. - cluster_colors (list of colors): contains in order the colors of each + cluster_colors: contains in order the colors of each cluster the samples of the fdatagrid are classified into. - cluster_labels (list of str): contains in order the names of each + cluster_labels: contains in order the names of each cluster the samples of the fdatagrid are classified into. - center_colors (list of colors): contains in order the colors of each + center_colors: contains in order the colors of each centroid of the clusters the samples of the fdatagrid are classified into. - center_labels (list of colors): contains in order the labels of each + center_labels: contains in order the labels of each centroid of the clusters the samples of the fdatagrid are classified into. - center_width (int): width of the centroid curves. - colormap(colormap): colormap from which the colors of the plot are + center_width: width of the centroid curves. + colormap: colormap from which the colors of the plot are taken. Defaults to `rainbow`. """ def __init__( - self, estimator, fdata, chart=None, fig=None, axes=None, - n_rows=None, n_cols=None, - sample_labels=None, cluster_colors=None, - cluster_labels=None, center_colors=None, - center_labels=None, - center_width=3, - colormap=plt.cm.get_cmap('rainbow'), - ) -> None: - BasePlot.__init__(self) - self.fdata = fdata - self.estimator = estimator - self.sample_labels = sample_labels - self.cluster_colors = cluster_colors - self.cluster_labels = cluster_labels - self.center_colors = center_colors - self.center_labels = center_labels - self.center_width = center_width - self.colormap = colormap - - self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) - - def _set_figure_and_axes( self, + estimator: BaseEstimator, + fdata: FDataGrid, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, + sample_labels: Optional[Sequence[str]] = None, + cluster_colors: Optional[Sequence[ColorLike]] = None, + cluster_labels: Optional[Sequence[str]] = None, + center_colors: Optional[Sequence[ColorLike]] = None, + center_labels: Optional[Sequence[str]] = None, + center_width: int = 3, + colormap: matplotlib.colors.Colormap = None, ) -> None: - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - fdata=self.fdata, + + if colormap is None: + colormap = plt.cm.get_cmap('rainbow') + + BasePlot.__init__( + self, + chart, fig=fig, axes=axes, - n_rows=n_rows, - n_cols=n_cols, ) - - self.fig = fig - self.axes = axes + self.fdata = fdata + self.estimator = estimator + self.sample_labels = sample_labels + self.cluster_colors = cluster_colors + self.cluster_labels = cluster_labels + self.center_colors = center_colors + self.center_labels = center_labels + self.center_width = center_width + self.colormap = colormap def n_samples(self) -> int: return self.fdata.n_samples - def _plot_clusters(self): - """Implementation of the plot of the FDataGrid samples by clusters. - - Args: - estimator (BaseEstimator object): estimator used to calculate the - clusters. - fdatagrid (FDataGrd object): contains the samples which are grouped - into different clusters. - fig (figure object): figure over which the graphs are plotted in - case ax is not specified. If None and ax is also None, the figure - is initialized. - axes (list of axes objects): axes over where the graphs are plotted. - If None, see param fig. - n_rows(int): designates the number of rows of the figure to plot the - different dimensions of the image. Only specified if fig and - ax are None. - n_cols(int): designates the number of columns of the figure to plot - the different dimensions of the image. Only specified if fig - and ax are None. - labels (numpy.ndarray, int: (n_samples, dim_codomain)): 2-dimensional - matrix where each row contains the number of cluster cluster - that observation belongs to. - sample_labels (list of str): contains in order the labels of each - sample of the fdatagrid. - cluster_colors (list of colors): contains in order the colors of each - cluster the samples of the fdatagrid are classified into. - cluster_labels (list of str): contains in order the names of each - cluster the samples of the fdatagrid are classified into. - center_colors (list of colors): contains in order the colors of each - centroid of the clusters the samples of the fdatagrid are - classified into. - center_labels list of colors): contains in order the labels of each - centroid of the clusters the samples of the fdatagrid are - classified into. - center_width (int): width of the centroids. - colormap(colormap): colormap from which the colors of the plot are - taken. - - Returns: - (tuple): tuple containing: - - fig (figure object): figure object in which the graphs are plotted - in case ax is None. - - ax (axes object): axes in which the graphs are plotted. - """ + def _plot_clusters(self) -> Figure: + """Implement the plot of the FDataGrid samples by clusters.""" _plot_clustering_checks( - self.estimator, - self.fdata, None, - self.sample_labels, - self.cluster_colors, - self.cluster_labels, - self.center_colors, - self.center_labels, + estimator=self.estimator, + fdata=self.fdata, + sample_colors=None, + sample_labels=self.sample_labels, + cluster_colors=self.cluster_colors, + cluster_labels=self.cluster_labels, + center_colors=self.center_colors, + center_labels=self.center_labels, ) if self.sample_labels is None: - self.sample_labels = [f'$SAMPLE: {i}$' for i in range(self.fdata.n_samples)] + self.sample_labels = [ + f'$SAMPLE: {i}$' for i in range(self.fdata.n_samples) + ] if self.cluster_colors is None: self.cluster_colors = self.colormap( - np.arange(self.estimator.n_clusters) / (self.estimator.n_clusters - 1)) + np.arange(self.estimator.n_clusters) + / (self.estimator.n_clusters - 1), + ) if self.cluster_labels is None: - cluster_labels = [ - f'$CLUSTER: {i}$' for i in range(self.estimator.n_clusters)] + self.cluster_labels = [ + f'$CLUSTER: {i}$' for i in range(self.estimator.n_clusters) + ] if self.center_colors is None: self.center_colors = [_darken(c, 0.5) for c in self.cluster_colors] if self.center_labels is None: self.center_labels = [ - f'$CENTER: {i}$' for i in range(self.estimator.n_clusters)] + f'$CENTER: {i}$' for i in range(self.estimator.n_clusters) + ] colors_by_cluster = self.cluster_colors[self.labels] patches = [] for i in range(self.estimator.n_clusters): patches.append( - mpatches.Patch(color=self.cluster_colors[i], - label=self.cluster_labels[i])) + mpatches.Patch( + color=self.cluster_colors[i], + label=self.cluster_labels[i], + ), + ) for j in range(self.fdata.dim_codomain): for i in range(self.fdata.n_samples): - self.artists = np.append(self.artists, self.axes[j].plot( - self.fdata.grid_points[0], - self.fdata.data_matrix[i, :, j], - c=colors_by_cluster[i], - label=self.sample_labels[i] - )) + self.artists = np.append( + self.artists, + self.axes[j].plot( + self.fdata.grid_points[0], + self.fdata.data_matrix[i, :, j], + c=colors_by_cluster[i], + label=self.sample_labels[i], + ), + ) for i in range(self.estimator.n_clusters): - self.axes[j].plot(self.fdata.grid_points[0], - self.estimator.cluster_centers_.data_matrix[ - i, - :, - j, - ], - c=self.center_colors[i], - label=self.center_labels[i], - linewidth=self.center_width) + self.axes[j].plot( + self.fdata.grid_points[0], + self.estimator.cluster_centers_.data_matrix[i, :, j], + c=self.center_colors[i], + label=self.center_labels[i], + linewidth=self.center_width, + ) self.axes[j].legend(handles=patches) datacursor(formatter='{label}'.format) @@ -319,23 +299,19 @@ def _plot_clusters(self): return self.fig - def plot(self): - """Plot of the FDataGrid samples by clusters. + def plot(self) -> Figure: + """ + Plot of the FDataGrid samples by clusters. - The clusters are calculated with the estimator passed as a parameter. If - the estimator is not fitted, the fit method is called. + The clusters are calculated with the estimator passed as a parameter. + If the estimator is not fitted, the fit method is called. Once each sample is assigned a label the plotting can be done. Each group is assigned a color described in a legend. Returns: - (tuple): tuple containing: - - fig (figure object): figure object in which the graphs are plotted - in case ax is None. + Plotted figure. - ax (axes object): axes in which the graphs are plotted. """ - self.artists = np.array([]) _check_if_estimator(self.estimator) @@ -358,40 +334,56 @@ class ClusterMembershipLinesPlot(BasePlot): Class ClusterMembershipLinesPlot. Args: - estimator (BaseEstimator object): estimator used to calculate the + estimator: estimator used to calculate the clusters. - X (FDataGrd object): contains the samples which are grouped + X: contains the samples which are grouped into different clusters. - fig (figure object, optional): figure over which the graph is + fig: figure over which the graph is plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (axes object, optional): axis over where the graph is plotted. + axes: axis over where the graph is plotted. If None, see param fig. - sample_colors (list of colors, optional): contains in order the colors + sample_colors: contains in order the colors of each sample of the fdatagrid. - sample_labels (list of str, optional): contains in order the labels + sample_labels: contains in order the labels of each sample of the fdatagrid. - cluster_labels (list of str, optional): contains in order the names of + cluster_labels: contains in order the names of each cluster the samples of the fdatagrid are classified into. - colormap(colormap, optional): colormap from which the colors of the + colormap: colormap from which the colors of the plot are taken. - x_label (str): Label for the x-axis. Defaults to "Cluster". - y_label (str): Label for the y-axis. Defaults to + x_label: Label for the x-axis. Defaults to "Cluster". + y_label: Label for the y-axis. Defaults to "Degree of membership". - title (str, optional): Title for the figure where the clustering + title: Title for the figure where the clustering results are ploted. Defaults to "Degrees of membership of the samples to each cluster". """ def __init__( self, - estimator, fdata, chart=None, fig=None, axes=None, - sample_colors=None, sample_labels=None, + estimator: BaseEstimator, + fdata: FDataGrid, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + sample_colors=None, + sample_labels=None, cluster_labels=None, - colormap=plt.cm.get_cmap('rainbow'), - x_label=None, y_label=None, title=None, + colormap: matplotlib.colors.Colormap = None, + x_label: Optional[str] = None, + y_label: Optional[str] = None, + title: Optional[str] = None, ) -> None: - BasePlot.__init__(self) + + if colormap is None: + colormap = plt.cm.get_cmap('rainbow') + + BasePlot.__init__( + self, + chart, + fig=fig, + axes=axes, + ) self.fdata = fdata self.estimator = estimator self.sample_labels = sample_labels @@ -402,40 +394,21 @@ def __init__( self.title = title self.colormap = colormap - self._set_figure_and_axes(chart, fig, axes) - - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, - ) -> None: - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout(fig, axes) - - self.fig = fig - self.axes = axes - def n_samples(self) -> int: return self.fdata.n_samples - def plot(self): + def plot(self) -> Figure: """Implementation of the plotting of the results of the :func:`Fuzzy K-Means ` method. - A kind of Parallel Coordinates plot is generated in this function with the - membership values obtained from the algorithm. A line is plotted for each - sample with the values for each cluster. See `Clustering Example - <../auto_examples/plot_clustering.html>`_. + A kind of Parallel Coordinates plot is generated in this function with + the membership values obtained from the algorithm. A line is plotted + for each sample with the values for each cluster. See + `Clustering Example <../auto_examples/plot_clustering.html>`_. Returns: - (tuple): tuple containing: - - fig (figure object): figure object in which the graphs are plotted - in case ax is None. - - ax (axes object): axes in which the graphs are plotted. + Plotted figure. """ self.artists = np.array([]) @@ -451,24 +424,43 @@ def plot(self): except NotFittedError: self.estimator.fit(self.fdata) - _plot_clustering_checks(self.estimator, self.fdata, self.sample_colors, self.sample_labels, - None, self.cluster_labels, None, None) + _plot_clustering_checks( + estimator=self.estimator, + fdata=self.fdata, + sample_colors=self.sample_colors, + sample_labels=self.sample_labels, + cluster_colors=None, + cluster_labels=self.cluster_labels, + center_colors=None, + center_labels=None, + ) - self.x_label, self.y_label, self.title = _get_labels(self.x_label, self.y_label, self.title, "Cluster") + self.x_label, self.y_label, self.title = _get_labels( + self.x_label, + self.y_label, + self.title, + "Cluster", + ) if self.sample_colors is None: - self.cluster_colors = self.colormap(np.arange(self.estimator.n_clusters) / - (self.estimator.n_clusters - 1)) + self.cluster_colors = self.colormap( + np.arange(self.estimator.n_clusters) + / (self.estimator.n_clusters - 1), + ) labels_by_cluster = np.argmax(self.estimator.labels_, axis=1) self.sample_colors = self.cluster_colors[labels_by_cluster] if self.sample_labels is None: - self.sample_labels = ['$SAMPLE: {}$'.format(i) for i in - range(self.fdata.n_samples)] + self.sample_labels = [ + f'$SAMPLE: {i}$' + for i in range(self.fdata.n_samples) + ] if self.cluster_labels is None: - self.cluster_labels = ['${}$'.format(i) for i in - range(self.estimator.n_clusters)] + self.cluster_labels = [ + f'${i}$' + for i in range(self.estimator.n_clusters) + ] self.axes[0].get_xaxis().set_major_locator(MaxNLocator(integer=True)) for i in range(self.fdata.n_samples): @@ -489,44 +481,61 @@ def plot(self): class ClusterMembershipPlot(BasePlot): - """ Class ClusterMembershipPlot. Args: - estimator (BaseEstimator object): estimator used to calculate the + estimator: estimator used to calculate the clusters. - X (FDataGrd object): contains the samples which are grouped + X: contains the samples which are grouped into different clusters. - fig (figure object, optional): figure over which the graph is + fig: figure over which the graph is plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (axes object, optional): axis over where the graph is plotted. + axes: axis over where the graph is plotted. If None, see param fig. - sample_colors (list of colors, optional): contains in order the colors + sample_colors: contains in order the colors of each sample of the fdatagrid. - sample_labels (list of str, optional): contains in order the labels + sample_labels: contains in order the labels of each sample of the fdatagrid. - cluster_labels (list of str, optional): contains in order the names of + cluster_labels: contains in order the names of each cluster the samples of the fdatagrid are classified into. - colormap(colormap, optional): colormap from which the colors of the + colormap: colormap from which the colors of the plot are taken. - x_label (str): Label for the x-axis. Defaults to "Cluster". - y_label (str): Label for the y-axis. Defaults to + x_label: Label for the x-axis. Defaults to "Cluster". + y_label: Label for the y-axis. Defaults to "Degree of membership". - title (str, optional): Title for the figure where the clustering + title: Title for the figure where the clustering results are ploted. Defaults to "Degrees of membership of the samples to each cluster". """ def __init__( self, - estimator, fdata, chart=None, fig=None, axes=None, sort=-1, - sample_labels=None, cluster_colors=None, - cluster_labels=None, colormap=plt.cm.get_cmap('rainbow'), - x_label=None, y_label=None, title=None, + estimator: BaseEstimator, + fdata: FData, + chart: Union[Figure, Axes, None] = None, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + sort: int = -1, + sample_labels: Optional[Sequence[str]] = None, + cluster_colors: Optional[Sequence[ColorLike]] = None, + cluster_labels: Optional[Sequence[str]] = None, + colormap: matplotlib.colors.Colormap = None, + x_label: Optional[str] = None, + y_label: Optional[str] = None, + title: Optional[str] = None, ) -> None: - BasePlot.__init__(self) + + if colormap is None: + colormap = plt.cm.get_cmap('rainbow') + + BasePlot.__init__( + self, + chart, + fig=fig, + axes=axes, + ) self.fdata = fdata self.estimator = estimator self.sample_labels = sample_labels @@ -538,69 +547,23 @@ def __init__( self.colormap = colormap self.sort = sort - self._set_figure_and_axes(chart, fig, axes) - - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, - ) -> None: - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout(fig, axes) - - self.fig = fig - self.axes = axes - def n_samples(self) -> int: return self.fdata.n_samples - def plot(self): + def plot(self) -> Figure: """Implementation of the plotting of the results of the :func:`Fuzzy K-Means ` method. - A kind of barplot is generated in this function with the membership values - obtained from the algorithm. There is a bar for each sample whose height is - 1 (the sum of the membership values of a sample add to 1), and the part - proportional to each cluster is coloured with the corresponding color. See + A kind of barplot is generated in this function with the membership + values obtained from the algorithm. There is a bar for each sample + whose height is 1 (the sum of the membership values of a sample add + to 1), and the part proportional to each cluster is coloured with + the corresponding color. See `Clustering Example <../auto_examples/plot_clustering.html>`_. - Args: - estimator (BaseEstimator object): estimator used to calculate the - clusters. - X (FDataGrd object): contains the samples which are grouped - into different clusters. - fig (figure object, optional): figure over which the graph is - plotted in case ax is not specified. If None and ax is also None, - the figure is initialized. - axes (axes object, optional): axes over where the graph is plotted. - If None, see param fig. - sort(int, optional): Number in the range [-1, n_clusters) designating - the cluster whose labels are sorted in a decrementing order. - Defaults to -1, in this case, no sorting is done. - sample_labels (list of str, optional): contains in order the labels - of each sample of the fdatagrid. - cluster_labels (list of str, optional): contains in order the names of - each cluster the samples of the fdatagrid are classified into. - cluster_colors (list of colors): contains in order the colors of each - cluster the samples of the fdatagrid are classified into. - colormap(colormap, optional): colormap from which the colors of the - plot are taken. - x_label (str): Label for the x-axis. Defaults to "Sample". - y_label (str): Label for the y-axis. Defaults to - "Degree of membership". - title (str): Title for the figure where the clustering results are - plotted. - Defaults to "Degrees of membership of the samples to each cluster". - Returns: - (tuple): tuple containing: - - fig (figure object): figure object in which the graph is plotted - in case ax is None. - - ax (axis object): axis in which the graph is plotted. + Plotted figure. """ self.artists = np.array([]) @@ -618,30 +581,55 @@ def plot(self): if self.sort < -1 or self.sort >= self.estimator.n_clusters: raise ValueError( - "The sorting number must belong to the interval [-1, n_clusters)") + "The sorting number must belong to " + "the interval [-1, n_clusters)", + ) - _plot_clustering_checks(self.estimator, self.fdata, None, self.sample_labels, - self.cluster_colors, self.cluster_labels, None, None) + _plot_clustering_checks( + estimator=self.estimator, + fdata=self.fdata, + sample_colors=None, + sample_labels=self.sample_labels, + cluster_colors=self.cluster_colors, + cluster_labels=self.cluster_labels, + center_colors=None, + center_labels=None, + ) - self.x_label, self.y_label, self.title = _get_labels(self.x_label, self.y_label, self.title, "Sample") + self.x_label, self.y_label, self.title = _get_labels( + self.x_label, + self.y_label, + self.title, + "Sample", + ) if self.sample_labels is None: self.sample_labels = np.arange(self.fdata.n_samples) if self.cluster_colors is None: self.cluster_colors = self.colormap( - np.arange(self.estimator.n_clusters) / (self.estimator.n_clusters - 1)) + np.arange(self.estimator.n_clusters) + / (self.estimator.n_clusters - 1), + ) if self.cluster_labels is None: - self.cluster_labels = [f'$CLUSTER: {i}$' for i in - range(self.estimator.n_clusters)] + self.cluster_labels = [ + f'$CLUSTER: {i}$' + for i in range(self.estimator.n_clusters) + ] patches = [] for i in range(self.estimator.n_clusters): patches.append( - mpatches.Patch(color=self.cluster_colors[i], label=self.cluster_labels[i])) + mpatches.Patch( + color=self.cluster_colors[i], + label=self.cluster_labels[i], + ), + ) if self.sort != -1: + labels_dim = self.estimator.labels_ + else: sample_indices = np.argsort(-self.estimator.labels_[:, self.sort]) self.sample_labels = np.copy(self.sample_labels[sample_indices]) labels_dim = np.copy(self.estimator.labels_[sample_indices]) @@ -653,17 +641,17 @@ def plot(self): temp_color = np.copy(self.cluster_colors[0]) self.cluster_colors[0] = self.cluster_colors[self.sort] self.cluster_colors[self.sort] = temp_color - else: - labels_dim = self.estimator.labels_ conc = np.zeros((self.fdata.n_samples, 1)) labels_dim = np.concatenate((conc, labels_dim), axis=-1) for i in range(self.estimator.n_clusters): - self.x = self.axes[0].bar(np.arange(self.fdata.n_samples), - labels_dim[:, i + 1], - bottom=np.sum(labels_dim[:, :(i + 1)], axis=1), - color=self.cluster_colors[i]) - + self.x = self.axes[0].bar( + np.arange(self.fdata.n_samples), + labels_dim[:, i + 1], + bottom=np.sum(labels_dim[:, :(i + 1)], axis=1), + color=self.cluster_colors[i], + ) + self.axes[0].set_xticks(np.arange(self.fdata.n_samples)) self.axes[0].set_xticklabels(self.sample_labels) self.axes[0].set_xlabel(self.x_label) From f69dd98815c44ab97b87b6390056087f57bb234a Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 23 Aug 2021 20:02:20 +0200 Subject: [PATCH 394/417] Typing K-Means. --- setup.cfg | 1 + skfda/_utils/__init__.py | 1 + skfda/_utils/_utils.py | 20 +- skfda/exploratory/visualization/clustering.py | 52 +- skfda/ml/clustering/_kmeans.py | 593 ++++++++++-------- skfda/representation/_typing.py | 13 +- 6 files changed, 391 insertions(+), 289 deletions(-) diff --git a/setup.cfg b/setup.cfg index fa5d2b311..c9bffddde 100644 --- a/setup.cfg +++ b/setup.cfg @@ -122,6 +122,7 @@ max-methods = 30 max-module-expressions = 15 max-module-members = 15 max-string-usages = 10 +max-try-body-length = 4 ignore-decorators = (property)|(overload) diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 191324177..61c9714ae 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -3,6 +3,7 @@ RandomStateLike, _cartesian_product, _check_array_key, + _check_compatible_fdata, _check_estimator, _classifier_fit_depth_methods, _classifier_get_classes, diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 95012a406..b0b40e7bd 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -20,6 +20,7 @@ ) import numpy as np +import scipy.integrate from numpy import ndarray from pandas.api.indexers import check_array_indexer from sklearn.base import clone @@ -27,8 +28,6 @@ from sklearn.utils.multiclass import check_classification_targets from typing_extensions import Literal, Protocol -import scipy.integrate - from ..representation._typing import ( ArrayLike, DomainRange, @@ -76,6 +75,23 @@ def check_is_univariate(fd: FData) -> None: ) +def _check_compatible_fdata(fdata1: FData, fdata2: FData) -> None: + """ + Check that fdata is compatible. + """ + if (fdata1.dim_domain != fdata2.dim_domain): + raise ValueError( + f"Functional data has incompatible domain dimensions: " + f"{fdata1.dim_domain} != {fdata2.dim_domain}" + ) + + if (fdata1.dim_codomain != fdata2.dim_codomain): + raise ValueError( + f"Functional data has incompatible codomain dimensions: " + f"{fdata1.dim_codomain} != {fdata2.dim_codomain}" + ) + + def _to_grid( X: FData, y: FData, diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index 401dd0044..edc7a8d35 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -9,22 +9,15 @@ from matplotlib.axes import Axes from matplotlib.figure import Figure from matplotlib.ticker import MaxNLocator -from mpldatacursor import datacursor from sklearn.base import BaseEstimator from sklearn.exceptions import NotFittedError from sklearn.utils.validation import check_is_fitted +from ..._utils import _check_compatible_fdata from ...ml.clustering import FuzzyCMeans from ...representation import FData, FDataGrid from ._baseplot import BasePlot -from ._utils import ( - ColorLike, - _darken, - _get_figure_and_axes, - _set_figure_layout, - _set_figure_layout_for_fdata, - _set_labels, -) +from ._utils import ColorLike, _darken, _set_labels def _plot_clustering_checks( @@ -273,17 +266,21 @@ def _plot_clusters(self) -> Figure: ), ) + artists = [ + self.axes[j].plot( + self.fdata.grid_points[0], + self.fdata.data_matrix[i, :, j], + c=colors_by_cluster[i], + label=self.sample_labels[i], + ) + for j in range(self.fdata.dim_codomain) + for i in range(self.fdata.n_samples) + ] + + self.artists = np.array(artists) + for j in range(self.fdata.dim_codomain): - for i in range(self.fdata.n_samples): - self.artists = np.append( - self.artists, - self.axes[j].plot( - self.fdata.grid_points[0], - self.fdata.data_matrix[i, :, j], - c=colors_by_cluster[i], - label=self.sample_labels[i], - ), - ) + for i in range(self.estimator.n_clusters): self.axes[j].plot( self.fdata.grid_points[0], @@ -293,7 +290,6 @@ def _plot_clusters(self) -> Figure: linewidth=self.center_width, ) self.axes[j].legend(handles=patches) - datacursor(formatter='{label}'.format) _set_labels(self.fdata, self.fig, self.axes) @@ -317,7 +313,10 @@ def plot(self) -> Figure: _check_if_estimator(self.estimator) try: check_is_fitted(self.estimator) - self.estimator._check_test_data(self.fdata) + _check_compatible_fdata( + self.estimator.cluster_centers_, + self.fdata, + ) except NotFittedError: self.estimator.fit(self.fdata) @@ -420,7 +419,10 @@ def plot(self) -> Figure: try: check_is_fitted(self.estimator) - self.estimator._check_test_data(self.fdata) + _check_compatible_fdata( + self.estimator.cluster_centers_, + self.fdata, + ) except NotFittedError: self.estimator.fit(self.fdata) @@ -474,7 +476,6 @@ def plot(self) -> Figure: self.axes[0].set_xticklabels(self.cluster_labels) self.axes[0].set_xlabel(self.x_label) self.axes[0].set_ylabel(self.y_label) - datacursor(formatter='{label}'.format) self.fig.suptitle(self.title) return self.fig @@ -575,7 +576,10 @@ def plot(self) -> Figure: try: check_is_fitted(self.estimator) - self.estimator._check_test_data(self.fdata) + _check_compatible_fdata( + self.estimator.cluster_centers_, + self.fdata, + ) except NotFittedError: self.estimator.fit(self.fdata) diff --git a/skfda/ml/clustering/_kmeans.py b/skfda/ml/clustering/_kmeans.py index 09e644b22..c144f6114 100644 --- a/skfda/ml/clustering/_kmeans.py +++ b/skfda/ml/clustering/_kmeans.py @@ -1,17 +1,31 @@ """K-Means Algorithms Module.""" +from __future__ import annotations + import warnings from abc import abstractmethod +from typing import Any, Generic, Optional, Tuple, TypeVar import numpy as np from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin from sklearn.utils import check_random_state from sklearn.utils.validation import check_is_fitted -from ...misc.metrics import PairwiseMetric, l2_distance +from ..._utils import RandomStateLike, _check_compatible_fdata +from ...misc.metrics import Metric, PairwiseMetric, l2_distance +from ...representation import FDataGrid +from ...representation._typing import NDArrayAny, NDArrayFloat, NDArrayInt + +SelfType = TypeVar("SelfType", bound="BaseKMeans[Any]") +MembershipType = TypeVar("MembershipType", bound=NDArrayAny) -class BaseKMeans(BaseEstimator, ClusterMixin, TransformerMixin): +class BaseKMeans( + BaseEstimator, # type: ignore + ClusterMixin, # type: ignore + TransformerMixin, # type: ignore + Generic[MembershipType], +): """Base class to implement K-Means clustering algorithms. Class from which both :class:`K-Means @@ -20,32 +34,41 @@ class BaseKMeans(BaseEstimator, ClusterMixin, TransformerMixin): classes inherit. """ - def __init__(self, n_clusters, init, metric, n_init, max_iter, tol, - random_state): - """Initialization of the BaseKMeans class. + def __init__( + self, + *, + n_clusters: int = 2, + init: Optional[FDataGrid] = None, + metric: Metric[FDataGrid] = l2_distance, + n_init: int = 1, + max_iter: int = 100, + tol: float = 1e-4, + random_state: RandomStateLike = 0, + ): + """Initialize the BaseKMeans class. Args: - n_clusters (int, optional): Number of groups into which the samples + n_clusters: Number of groups into which the samples are classified. Defaults to 2. - init (FDataGrid, optional): Contains the initial centers of the + init: Contains the initial centers of the different clusters the algorithm starts with. Its data_marix must be of the shape (n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. - metric (optional): functional data metric. Defaults to + metric: functional data metric. Defaults to *l2_distance*. - n_init (int, optional): Number of time the k-means algorithm will + n_init: Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. - max_iter (int, optional): Maximum number of iterations of the + max_iter: Maximum number of iterations of the clustering algorithm for a single run. Defaults to 100. - tol (float, optional): tolerance used to compare the centroids + tol: tolerance used to compare the centroids calculated with the previous ones in every single run of the algorithm. - random_state (int, RandomState instance or None, optional): + random_state: Determines random number generation for centroid - initialization. ç Use an int to make the randomness + initialization. Use an int to make the randomness deterministic. Defaults to 0. See :term:`Glossary `. """ @@ -57,131 +80,166 @@ def __init__(self, n_clusters, init, metric, n_init, max_iter, tol, self.tol = tol self.random_state = random_state - def _check_clustering(self, fdata): - """Checks the arguments used in the - :func:`fit method `. + def _check_clustering(self, fdata: FDataGrid) -> FDataGrid: + """Check the arguments used in fit. Args: - fdata (FDataGrid object): Object whose samples + fdata: Object whose samples are classified into different groups. - """ + Returns: + Validated input. + + """ if fdata.dim_domain > 1: raise NotImplementedError( - "Only support 1 dimension on the domain.") + "Only support 1 dimension on the domain.", + ) if fdata.n_samples < 2: raise ValueError( - "The number of observations must be greater than 1.") + "The number of observations must be greater than 1.", + ) if self.n_clusters < 2: raise ValueError( - "The number of clusters must be greater than 1.") + "The number of clusters must be greater than 1.", + ) if self.n_init < 1: raise ValueError( - "The number of iterations must be greater than 0.") + "The number of iterations must be greater than 0.", + ) if self.init is not None and self.n_init != 1: self.n_init = 1 - warnings.warn("Warning: The number of iterations is ignored " - "because the init parameter is set.") - - if self.init is not None and self.init.data_matrix.shape != ( - self.n_clusters,) + fdata.data_matrix.shape[1:]: - raise ValueError("The init FDataGrid data_matrix should be of " - "shape (n_clusters, n_features, dim_codomain) " - "and gives the initial centers.") + warnings.warn( + "Warning: The number of iterations is ignored " + "because the init parameter is set.", + ) + + if ( + self.init is not None + and self.init.data_matrix.shape != ( + (self.n_clusters,) + fdata.data_matrix.shape[1:] + ) + ): + raise ValueError( + "The init FDataGrid data_matrix should be of " + "shape (n_clusters, n_features, dim_codomain) " + "and gives the initial centers.", + ) if self.max_iter < 1: raise ValueError( - "The number of maximum iterations must be greater than 0.") + "The number of maximum iterations must be greater than 0.", + ) if self.tol < 0: raise ValueError("The tolerance must be positive.") return fdata - def _tolerance(self, fdata): + def _tolerance(self, fdata: FDataGrid) -> float: variance = fdata.var() mean_variance = np.mean(variance[0].data_matrix) return mean_variance * self.tol - def _init_centroids(self, fdatagrid, random_state): - """Compute the initial centroids + def _init_centroids( + self, + fdatagrid: FDataGrid, + random_state: np.random.RandomState, + ) -> FDataGrid: + """ + Compute the initial centroids. Args: - data_matrix (ndarray): matrix with the data only of the - dimension of the image of the fdatagrid the algorithm is - classifying. - fdatagrid (FDataGrid object): Object whose samples are - classified into different groups. - random_state (RandomState object): random number generation for - centroid initialization. + fdatagrid: Object whose samples are classified into different + groups. + random_state: Random number generation for centroid initialization. Returns: - centroids (ndarray): initial centroids - """ + Initial centroids. + """ if self.init is None: - _, idx = np.unique(fdatagrid.data_matrix, - axis=0, return_index=True) + _, idx = np.unique( + fdatagrid.data_matrix, + axis=0, + return_index=True, + ) unique_data = fdatagrid[np.sort(idx)] if len(unique_data) < self.n_clusters: - return ValueError("Not enough unique data points to " - "initialize the requested number of " - "clusters") + raise ValueError( + "Not enough unique data points to " + "initialize the requested number of " + "clusters", + ) indices = random_state.permutation(len(unique_data))[ - :self.n_clusters] + :self.n_clusters + ] centroids = unique_data[indices] return centroids.copy() - else: - return self.init.copy() - def _check_params(self): + return self.init.copy() + + def _check_params(self) -> None: pass @abstractmethod - def _create_membership(self, n_samples): + def _create_membership(self, n_samples: int) -> MembershipType: pass @abstractmethod - def _update(self, fdata, membership_matrix, distances_to_centroids, - centroids): + def _update( + self, + fdata: FDataGrid, + membership_matrix: MembershipType, + distances_to_centroids: NDArrayFloat, + centroids: FDataGrid, + ) -> None: pass - def _algorithm(self, fdata, random_state): - """ Implementation of the Fuzzy K-Means algorithm for FDataGrid objects + def _algorithm( + self, + fdata: FDataGrid, + random_state: np.random.RandomState, + ) -> Tuple[NDArrayFloat, FDataGrid, NDArrayFloat, int]: + """ + Fuzzy K-Means algorithm. + + Implementation of the Fuzzy K-Means algorithm for FDataGrid objects of any dimension. Args: - fdata (FDataGrid object): Object whose samples are clustered, + fdata: Object whose samples are clustered, classified into different groups. - random_state (RandomState object): random number generation for + random_state: random number generation for centroid initialization. Returns: - (tuple): tuple containing: + Tuple containing: - membership values (numpy.ndarray): + membership values: membership value that observation has to each cluster. - centroids (numpy.ndarray: (n_clusters, ncol, dim_codomain)): + centroids: centroids for each cluster. - distances_to_centroids (numpy.ndarray: (n_samples, - n_clusters)): distances of each sample to each cluster. + distances_to_centroids: distances of each sample to each + cluster. - repetitions(int): number of iterations the algorithm was run. + repetitions: number of iterations the algorithm was run. """ repetitions = 0 centroids_old_matrix = np.zeros( - (self.n_clusters,) + fdata.data_matrix.shape[1:]) + (self.n_clusters,) + fdata.data_matrix.shape[1:], + ) membership_matrix = self._create_membership(fdata.n_samples) centroids = self._init_centroids(fdata, random_state) @@ -191,9 +249,13 @@ def _algorithm(self, fdata, random_state): tolerance = self._tolerance(fdata) - while (repetitions == 0 or - (not np.all(self.metric(centroids, centroids_old) < tolerance) - and repetitions < self.max_iter)): + while ( + repetitions == 0 + or ( + not np.all(self.metric(centroids, centroids_old) < tolerance) + and repetitions < self.max_iter + ) + ): centroids_old.data_matrix[...] = centroids.data_matrix @@ -203,50 +265,74 @@ def _algorithm(self, fdata, random_state): fdata=fdata, membership_matrix=membership_matrix, distances_to_centroids=distances_to_centroids, - centroids=centroids) + centroids=centroids, + ) repetitions += 1 - return (membership_matrix, centroids, - distances_to_centroids, repetitions) + return ( + membership_matrix, + centroids, + distances_to_centroids, + repetitions, + ) @abstractmethod - def _compute_inertia(self, membership, centroids, - distances_to_centroids): + def _compute_inertia( + self, + membership: MembershipType, + centroids: FDataGrid, + distances_to_centroids: NDArrayFloat, + ) -> float: pass - def fit(self, X, y=None, sample_weight=None): - """ Computes Fuzzy K-Means clustering calculating the attributes - *labels_*, *cluster_centers_*, *inertia_* and *n_iter_*. + def fit( + self: SelfType, + X: FDataGrid, + y: None = None, + sample_weight: None = None, + ) -> SelfType: + """ + Fit the model. Args: - X (FDataGrid object): Object whose samples are clusered, + X: Object whose samples are clusered, classified into different groups. - y (Ignored): present here for API consistency by convention. - sample_weight (Ignored): present here for API consistency by + y: present here for API consistency by convention. + sample_weight: present here for API consistency by convention. + + Returns: + Fitted model. + """ fdata = self._check_clustering(X) random_state = check_random_state(self.random_state) self._check_params() - best_inertia = None - best_membership = None - best_centroids = None - best_distances_to_centroids = None - best_n_iter = None + best_inertia = np.inf for _ in range(self.n_init): - (membership, centroids, - distances_to_centroids, n_iter) = ( - self._algorithm(fdata=fdata, - random_state=random_state)) - - inertia = self._compute_inertia(membership, centroids, - distances_to_centroids) - - if best_inertia is None or inertia < best_inertia: + ( + membership, + centroids, + distances_to_centroids, + n_iter, + ) = ( + self._algorithm( + fdata=fdata, + random_state=random_state, + ) + ) + + inertia = self._compute_inertia( + membership, + centroids, + distances_to_centroids, + ) + + if inertia < best_inertia: best_inertia = inertia best_membership = membership best_centroids = centroids @@ -261,30 +347,23 @@ def fit(self, X, y=None, sample_weight=None): return self - def _check_test_data(self, fdatagrid): - """Checks that the FDataGrid object and the calculated centroids have - compatible shapes. - """ - if (fdatagrid.data_matrix.shape[1:3] - != self.cluster_centers_.data_matrix.shape[1:3]): - raise ValueError("The fdatagrid shape is not the one expected for " - "the calculated cluster_centers_.") - - def predict(self, X, sample_weight=None): + def predict( + self, + X: FDataGrid, + sample_weight: None = None, + ) -> NDArrayInt: """Predict the closest cluster each sample in X belongs to. Args: - X (FDataGrid object): Object whose samples are classified into - different groups. - y (Ignored): present here for API consistency by convention. - sample_weight (Ignored): present here for API consistency by - convention. + X: Object whose samples are classified into different groups. + sample_weight: present here for API consistency by convention. Returns: Label of each sample. + """ check_is_fitted(self) - self._check_test_data(X) + _check_compatible_fdata(self.cluster_centers_, X) membership_matrix = self._create_membership(X.n_samples) centroids = self.cluster_centers_.copy() @@ -297,68 +376,73 @@ def predict(self, X, sample_weight=None): fdata=X, membership_matrix=membership_matrix, distances_to_centroids=distances_to_centroids, - centroids=centroids) + centroids=centroids, + ) return membership_matrix - def transform(self, X): + def transform(self, X: FDataGrid) -> NDArrayFloat: """Transform X to a cluster-distance space. Args: - X (FDataGrid object): Object whose samples are classified into + X: Object whose samples are classified into different groups. - y (Ignored): present here for API consistency by convention. - sample_weight (Ignored): present here for API consistency by - convention. Returns: - distances_to_centers (numpy.ndarray: (n_samples, n_clusters)): + distances_to_centers: distances of each sample to each cluster. + """ check_is_fitted(self) - self._check_test_data(X) + _check_compatible_fdata(self.cluster_centers_, X) return self._distances_to_centers - def fit_transform(self, X, y=None, sample_weight=None): + def fit_transform( + self, + X: FDataGrid, + y: None = None, + sample_weight: None = None, + ) -> NDArrayFloat: """Compute clustering and transform X to cluster-distance space. Args: - X (FDataGrid object): Object whose samples are classified into - different groups. - y (Ignored): present here for API consistency by convention. - sample_weight (Ignored): present here for API consistency by - convention. + X: Object whose samples are classified into different groups. + y: present here for API consistency by convention. + sample_weight: present here for API consistency by convention. Returns: - distances_to_centers (numpy.ndarray: (n_samples, n_clusters)): - distances of each sample to each cluster. + Distances of each sample to each cluster. + """ self.fit(X) return self._distances_to_centers - def score(self, X, y=None, sample_weight=None): + def score( + self, + X: FDataGrid, + y: None = None, + sample_weight: None = None, + ) -> float: """Opposite of the value of X on the K-means objective. Args: - X (FDataGrid object): Object whose samples are classified into + X: Object whose samples are classified into different groups. - y (Ignored): present here for API consistency by convention. - sample_weight (Ignored): present here for API consistency by + y: present here for API consistency by convention. + sample_weight: present here for API consistency by convention. Returns: - score (numpy.array: (fdatagrid.dim_codomain)): negative *inertia_* - attribute. + Negative ``inertia_`` attribute. """ check_is_fitted(self) - self._check_test_data(X) + _check_compatible_fdata(self.cluster_centers_, X) return -self.inertia_ -class KMeans(BaseKMeans): - r"""Representation and implementation of the K-Means algorithm - for the FdataGrid object. +class KMeans(BaseKMeans[NDArrayInt]): + r"""K-Means algorithm for functional data. Let :math:`\mathbf{X = \left\{ x_{1}, x_{2}, ..., x_{n}\right\}}` be a given dataset to be analyzed, and :math:`\mathbf{V = \left\{ v_{1}, v_{2}, @@ -410,39 +494,37 @@ class KMeans(BaseKMeans): object. Args: - n_clusters (int, optional): Number of groups into which the samples are + n_clusters: Number of groups into which the samples are classified. Defaults to 2. - init (FDataGrid, optional): Contains the initial centers of the + init: Contains the initial centers of the different clusters the algorithm starts with. Its data_marix must be of the shape (n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. - metric (optional): functional data metric. Defaults to + metric: functional data metric. Defaults to *l2_distance*. - n_init (int, optional): Number of time the k-means algorithm will be + n_init: Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. - max_iter (int, optional): Maximum number of iterations of the + max_iter: Maximum number of iterations of the clustering algorithm for a single run. Defaults to 100. - tol (float, optional): tolerance used to compare the centroids + tol: Tolerance used to compare the centroids calculated with the previous ones in every single run of the algorithm. - random_state (int, RandomState instance or None, optional): + random_state: Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. Defaults to 0. See :term:`Glossary `. Attributes: - labels_ (numpy.ndarray: n_samples): vector in which each entry contains - the cluster each observation belongs to. - cluster_centers_ (FDataGrid object): data_matrix of shape - (n_clusters, ncol, dim_codomain) and contains the centroids for - each cluster. - inertia_ (numpy.ndarray, (fdatagrid.dim_codomain)): Sum of squared - distances of samples to their closest cluster center for each + labels\_: Vector in which each entry contains the cluster each + observation belongs to. + cluster_centers\_: data_matrix of shape (n_clusters, ncol, + dim_codomain) and contains the centroids for each cluster. + inertia\_: Sum of squared distances of samples to their closest + cluster center for each dimension. + n_iter\_: number of iterations the algorithm was run for each dimension. - n_iter_ (numpy.ndarray, (fdatagrid.dim_codomain)): number of iterations - the algorithm was run for each dimension. Example: @@ -472,67 +554,46 @@ class KMeans(BaseKMeans): """ - def __init__(self, n_clusters=2, init=None, - metric=l2_distance, - n_init=1, max_iter=100, tol=1e-4, random_state=0): - """Initialization of the KMeans class. - - Args: - n_clusters (int, optional): Number of groups into which the samples - are classified. Defaults to 2. - init (FDataGrid, optional): Contains the initial centers of the - different clusters the algorithm starts with. Its data_marix - must be of the shape (n_clusters, fdatagrid.ncol, - fdatagrid.dim_codomain). Defaults to None, and the centers are - initialized randomly. - metric (optional): functional data metric. Defaults to - *l2_distance*. - n_init (int, optional): Number of time the k-means algorithm will - be run with different centroid seeds. The final results will - be the best output of n_init consecutive runs in terms - of inertia. - max_iter (int, optional): Maximum number of iterations of the - clustering algorithm for a single run. Defaults to 100. - tol (float, optional): tolerance used to compare the centroids - calculated with the previous ones in every single run of the - algorithm. - random_state (int, RandomState instance or None, optional): - Determines random number generation for centroid - initialization. Use an int to make the randomness - deterministic. - Defaults to 0. - """ - super().__init__(n_clusters=n_clusters, init=init, metric=metric, - n_init=n_init, max_iter=max_iter, tol=tol, - random_state=random_state) - - def _compute_inertia(self, membership, centroids, - distances_to_centroids): - distances_to_their_center = np.choose(membership, - distances_to_centroids.T) + def _compute_inertia( + self, + membership: NDArrayInt, + centroids: FDataGrid, + distances_to_centroids: NDArrayFloat, + ) -> float: + distances_to_their_center = np.choose( + membership, + distances_to_centroids.T, + ) return np.sum(distances_to_their_center**2) - def _create_membership(self, n_samples): + def _create_membership(self, n_samples: int) -> NDArrayInt: return np.empty(n_samples, dtype=int) - def _update(self, fdata, membership_matrix, distances_to_centroids, - centroids): + def _update( + self, + fdata: FDataGrid, + membership_matrix: NDArrayInt, + distances_to_centroids: NDArrayFloat, + centroids: FDataGrid, + ) -> None: membership_matrix[:] = np.argmin(distances_to_centroids, axis=1) for i in range(self.n_clusters): - indices, = np.where(membership_matrix == i) + indices = np.where(membership_matrix == i)[0] if len(indices) != 0: centroids.data_matrix[i] = np.average( - fdata.data_matrix[indices, ...], axis=0) + fdata.data_matrix[indices, ...], + axis=0, + ) -class FuzzyCMeans(BaseKMeans): - r""" Representation and implementation of the Fuzzy c-Means clustering - algorithm for the FDataGrid object. +class FuzzyCMeans(BaseKMeans[NDArrayFloat]): + r""" + Fuzzy c-Means clustering for functional data. Let :math:`\mathbf{X = \left\{ x_{1}, x_{2}, ..., x_{n}\right\}}` be a given dataset to be analyzed, and :math:`\mathbf{V = \left\{ v_{1}, v_{2}, @@ -590,41 +651,41 @@ class FuzzyCMeans(BaseKMeans): object. Args: - n_clusters (int, optional): Number of groups into which the samples are + n_clusters: Number of groups into which the samples are classified. Defaults to 2. - init (FDataGrid, optional): Contains the initial centers of the + init: Contains the initial centers of the different clusters the algorithm starts with. Its data_marix must be of the shape (n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. - metric (optional): functional data metric. Defaults to + metric: functional data metric. Defaults to *l2_distance*. - n_init (int, optional): Number of time the k-means algorithm will be + n_init: Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. - max_iter (int, optional): Maximum number of iterations of the + max_iter: Maximum number of iterations of the clustering algorithm for a single run. Defaults to 100. - tol (float, optional): tolerance used to compare the centroids + tol: tolerance used to compare the centroids calculated with the previous ones in every single run of the algorithm. - random_state (int, RandomState instance or None, optional): + random_state: Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. Defaults to 0. See :term:`Glossary `. - fuzzifier (int, optional): Scalar parameter used to specify the + fuzzifier: Scalar parameter used to specify the degree of fuzziness in the fuzzy algorithm. Defaults to 2. Attributes: - labels_ (numpy.ndarray: (n_samples, n_clusters)): 2-dimensional + labels\_: (n_samples, n_clusters)): 2-dimensional matrix in which each row contains the cluster that observation belongs to. - cluster_centers_ (FDataGrid object): data_matrix of shape + cluster_centers\_: data_matrix of shape (n_clusters, ncol, dim_codomain) and contains the centroids for each cluster. - inertia_ (numpy.ndarray, (fdatagrid.dim_codomain)): Sum of squared + inertia\_: Sum of squared distances of samples to their closest cluster center for each dimension. - n_iter_ (numpy.ndarray, (fdatagrid.dim_codomain)): number of iterations + n_iter\_: number of iterations the algorithm was run for each dimension. @@ -652,79 +713,87 @@ class FuzzyCMeans(BaseKMeans): """ - def __init__(self, n_clusters=2, init=None, - metric=l2_distance, n_init=1, max_iter=100, - tol=1e-4, random_state=0, fuzzifier=2): - """Initialization of the FuzzyKMeans class. - - Args: - n_clusters (int, optional): Number of groups into which the samples - are classified. Defaults to 2. - init (FDataGrid, optional): Contains the initial centers of the - different clusters the algorithm starts with. Its data_marix - must be of the shape (n_clusters, fdatagrid.ncol, - fdatagrid.dim_codomain). - Defaults to None, and the centers are initialized randomly. - metric (optional): functional data metric. Defaults to - *l2_distance*. - n_init (int, optional): Number of time the k-means algorithm will - be run with different centroid seeds. The final results will be - the best output of n_init consecutive runs in terms of inertia. - max_iter (int, optional): Maximum number of iterations of the - clustering algorithm for a single run. Defaults to 100. - tol (float, optional): tolerance used to compare the centroids - calculated with the previous ones in every single run of the - algorithm. - random_state (int, RandomState instance or None, optional): - Determines random number generation for centroid - initialization. Use an int to make the randomness - deterministic. Defaults to 0. - fuzzifier (int, optional): Scalar parameter used to specify the - degree of fuzziness in the fuzzy algorithm. Defaults to 2. - - """ - super().__init__(n_clusters=n_clusters, init=init, metric=metric, - n_init=n_init, - max_iter=max_iter, tol=tol, random_state=random_state) + def __init__( + self, + *, + n_clusters: int = 2, + init: Optional[FDataGrid] = None, + metric: Metric[FDataGrid] = l2_distance, + n_init: int = 1, + max_iter: int = 100, + tol: float = 1e-4, + random_state: RandomStateLike = 0, + fuzzifier: float = 2, + ) -> None: + super().__init__( + n_clusters=n_clusters, + init=init, + metric=metric, + n_init=n_init, + max_iter=max_iter, + tol=tol, + random_state=random_state, + ) self.fuzzifier = fuzzifier - def _check_params(self): + def _check_params(self) -> None: if self.fuzzifier <= 1: raise ValueError("The fuzzifier parameter must be greater than 1.") - def _compute_inertia(self, membership, centroids, - distances_to_centroids): + def _compute_inertia( + self, + membership: NDArrayFloat, + centroids: FDataGrid, + distances_to_centroids: NDArrayFloat, + ) -> float: return np.sum( membership**self.fuzzifier * distances_to_centroids**2, ) - def _create_membership(self, n_samples): + def _create_membership(self, n_samples: int) -> NDArrayFloat: return np.empty((n_samples, self.n_clusters)) - def _update(self, fdata, membership_matrix, distances_to_centroids, - centroids): + def _update( + self, + fdata: FDataGrid, + membership_matrix: NDArrayFloat, + distances_to_centroids: NDArrayFloat, + centroids: FDataGrid, + ) -> None: # Divisions by zero allowed with np.errstate(divide='ignore'): - distances_to_centers_raised = (distances_to_centroids**( - 2 / (1 - self.fuzzifier))) + distances_to_centers_raised = ( + distances_to_centroids**(2 / (1 - self.fuzzifier)) + ) # Divisions infinity by infinity allowed with np.errstate(invalid='ignore'): - membership_matrix[:, :] = (distances_to_centers_raised - / np.sum( - distances_to_centers_raised, - axis=1, keepdims=True)) + membership_matrix[:, :] = ( + distances_to_centers_raised + / np.sum( + distances_to_centers_raised, + axis=1, + keepdims=True, + ) + ) # inf / inf divisions should be 1 in this context membership_matrix[np.isnan(membership_matrix)] = 1 membership_matrix_raised = np.power( - membership_matrix, self.fuzzifier) + membership_matrix, + self.fuzzifier, + ) - slice_denominator = ((slice(None),) + (np.newaxis,) * - (fdata.data_matrix.ndim - 1)) + slice_denominator = ( + (slice(None),) + (np.newaxis,) * (fdata.data_matrix.ndim - 1) + ) centroids.data_matrix[:] = ( - np.einsum('ij,i...->j...', membership_matrix_raised, - fdata.data_matrix) - / np.sum(membership_matrix_raised, axis=0)[slice_denominator]) + np.einsum( + 'ij,i...->j...', + membership_matrix_raised, + fdata.data_matrix, + ) + / np.sum(membership_matrix_raised, axis=0)[slice_denominator] + ) diff --git a/skfda/representation/_typing.py b/skfda/representation/_typing.py index c7228b3d6..b246901d0 100644 --- a/skfda/representation/_typing.py +++ b/skfda/representation/_typing.py @@ -1,5 +1,5 @@ """Common types.""" -from typing import Optional, Sequence, Tuple, TypeVar, Union +from typing import Any, Optional, Sequence, Tuple, TypeVar, Union import numpy as np from typing_extensions import Protocol @@ -9,6 +9,17 @@ except ImportError: ArrayLike = np.ndarray # type:ignore +try: + from numpy.typing import NDArray + NDArrayAny = NDArray[Any] + NDArrayInt = NDArray[np.int_] + NDArrayFloat = NDArray[np.float_] +except ImportError: + NDArray = np.ndarray # type:ignore + NDArrayAny = np.ndarray # type:ignore + NDArrayInt = np.ndarray # type:ignore + NDArrayFloat = np.ndarray # type:ignore + VectorType = TypeVar("VectorType") DomainRange = Tuple[Tuple[float, float], ...] From fc33fce3c5574da46f95b6a9cc7eac81a8cbc824 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 23 Aug 2021 23:42:37 +0200 Subject: [PATCH 395/417] Change FuzzyKMeans to use predict_proba --- examples/plot_clustering.py | 20 +-- skfda/_utils/_utils.py | 8 +- skfda/exploratory/visualization/_baseplot.py | 17 +- skfda/exploratory/visualization/clustering.py | 155 +++++++++--------- skfda/ml/clustering/_kmeans.py | 74 ++++++++- tests/test_clustering.py | 155 +++++++----------- 6 files changed, 228 insertions(+), 201 deletions(-) diff --git a/examples/plot_clustering.py b/examples/plot_clustering.py index 1e25062a1..e4c968f11 100644 --- a/examples/plot_clustering.py +++ b/examples/plot_clustering.py @@ -84,7 +84,7 @@ cluster_labels = climates.categories[np.array([0, 2, 1])] ClusterPlot(kmeans, fd, cluster_colors=cluster_colors, - cluster_labels=cluster_labels).plot() + cluster_labels=cluster_labels).plot() ############################################################################## # Other clustering algorithm implemented is the Fuzzy K-Means found in the @@ -92,15 +92,15 @@ # above procedure, an object of this type is instantiated with the desired # data and then, the # :func:`~skfda.ml.clustering.FuzzyCMeans.fit` method is called. -# Internally, the attribute ``labels_`` is calculated, which contains +# Internally, the attribute ``membership_degree_`` is calculated, which contains # ´n_clusters´ elements for each sample and dimension, denoting the degree of # membership of each sample to each cluster. They are obtained calling the -# method :func:`~skfda.ml.clustering.FuzzyCMeans.predict`. Also, the centroids +# method :func:`~skfda.ml.clustering.FuzzyCMeans.predict_proba`. Also, the centroids # of each cluster are obtained. fuzzy_kmeans = FuzzyCMeans(n_clusters=n_clusters, random_state=seed) fuzzy_kmeans.fit(fd) -print(fuzzy_kmeans.predict(fd)) +print(fuzzy_kmeans.predict_proba(fd)) ############################################################################## # To see the information in a graphic way, the method @@ -109,7 +109,7 @@ # greatest. ClusterPlot(fuzzy_kmeans, fd, cluster_colors=cluster_colors, - cluster_labels=cluster_labels).plot() + cluster_labels=cluster_labels).plot() ############################################################################## # Another plot implemented to show the results in the class @@ -122,7 +122,7 @@ colors_by_climate = colormap(climates.codes / (n_climates - 1)) ClusterMembershipLinesPlot(fuzzy_kmeans, fd, cluster_labels=cluster_labels, - sample_colors=colors_by_climate).plot() + sample_colors=colors_by_climate).plot() ############################################################################## # Finally, the function @@ -131,7 +131,7 @@ # proportionally to the membership values with the color of each cluster. ClusterMembershipPlot(fuzzy_kmeans, fd, cluster_colors=cluster_colors, - cluster_labels=cluster_labels).plot() + cluster_labels=cluster_labels).plot() ############################################################################## # The possibility of sorting the bars according to a cluster is given @@ -140,14 +140,14 @@ # # We can order the data using the first cluster: ClusterMembershipPlot(fuzzy_kmeans, fd, sort=0, cluster_colors=cluster_colors, - cluster_labels=cluster_labels).plot() + cluster_labels=cluster_labels).plot() ############################################################################## # Using the second cluster: ClusterMembershipPlot(fuzzy_kmeans, fd, sort=1, cluster_colors=cluster_colors, - cluster_labels=cluster_labels).plot() + cluster_labels=cluster_labels).plot() ############################################################################## # And using the third cluster: ClusterMembershipPlot(fuzzy_kmeans, fd, sort=2, cluster_colors=cluster_colors, - cluster_labels=cluster_labels).plot() + cluster_labels=cluster_labels).plot() diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index b0b40e7bd..b901099eb 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -76,19 +76,17 @@ def check_is_univariate(fd: FData) -> None: def _check_compatible_fdata(fdata1: FData, fdata2: FData) -> None: - """ - Check that fdata is compatible. - """ + """Check that fdata is compatible.""" if (fdata1.dim_domain != fdata2.dim_domain): raise ValueError( f"Functional data has incompatible domain dimensions: " - f"{fdata1.dim_domain} != {fdata2.dim_domain}" + f"{fdata1.dim_domain} != {fdata2.dim_domain}", ) if (fdata1.dim_codomain != fdata2.dim_codomain): raise ValueError( f"Functional data has incompatible codomain dimensions: " - f"{fdata1.dim_codomain} != {fdata2.dim_codomain}" + f"{fdata1.dim_codomain} != {fdata2.dim_codomain}", ) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 42a384c3f..803f7ede3 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -37,10 +37,6 @@ def __init__( axes: Union[Axes, Sequence[Axes], None] = None, ) -> None: self.artists: np.ndarray - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout(fig, axes) - self.fig = fig - self.axes = axes @abstractmethod def plot( @@ -60,6 +56,19 @@ def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" pass + def _set_figure_and_axes( + self, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Union[Axes, Sequence[Axes], None] = None, + ) -> None: + fig, axes = _get_figure_and_axes(chart, fig, axes) + fig, axes = _set_figure_layout(fig, axes) + + self.fig = fig + self.axes = axes + def _repr_svg_(self) -> str: """Automatically represents the object as an svg when calling it.""" self.fig = self.plot() diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index edc7a8d35..61ba1241e 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -1,5 +1,7 @@ """Clustering Plots Module.""" +from __future__ import annotations + from typing import Optional, Sequence, Tuple, Union import matplotlib @@ -9,19 +11,46 @@ from matplotlib.axes import Axes from matplotlib.figure import Figure from matplotlib.ticker import MaxNLocator -from sklearn.base import BaseEstimator from sklearn.exceptions import NotFittedError from sklearn.utils.validation import check_is_fitted +from typing_extensions import Protocol from ..._utils import _check_compatible_fdata -from ...ml.clustering import FuzzyCMeans from ...representation import FData, FDataGrid +from ...representation._typing import NDArrayFloat, NDArrayInt from ._baseplot import BasePlot from ._utils import ColorLike, _darken, _set_labels +class ClusteringEstimator(Protocol): + + @property + def n_clusters(self) -> int: + pass + + @property + def cluster_centers_(self) -> FDataGrid: + pass + + @property + def labels_(self) -> NDArrayInt: + pass + + def fit(self, X: FDataGrid) -> ClusteringEstimator: + pass + + def predict(self, X: FDataGrid) -> NDArrayInt: + pass + + +class FuzzyClusteringEstimator(ClusteringEstimator, Protocol): + + def predict_proba(self, X: FDataGrid) -> NDArrayFloat: + pass + + def _plot_clustering_checks( - estimator: BaseEstimator, + estimator: ClusteringEstimator, fdata: FData, sample_colors: Optional[Sequence[ColorLike]], sample_labels: Optional[Sequence[str]], @@ -80,25 +109,6 @@ def _plot_clustering_checks( ) -def _check_if_estimator( - estimator: BaseEstimator, -) -> None: - """ - Check if the argument is an estimator. - - Checks the argument *estimator* is actually an estimator that - implements the *fit* method. - - Args: - estimator: estimator used to calculate the - clusters. - - """ - msg = ("This %(name)s instance has no attribute \"fit\".") - if not hasattr(estimator, "fit"): - raise AttributeError(msg % {'name': type(estimator).__name__}) - - def _get_labels( x_label: Optional[str], y_label: Optional[str], @@ -180,7 +190,7 @@ class ClusterPlot(BasePlot): def __init__( self, - estimator: BaseEstimator, + estimator: ClusteringEstimator, fdata: FDataGrid, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, @@ -199,8 +209,7 @@ def __init__( if colormap is None: colormap = plt.cm.get_cmap('rainbow') - BasePlot.__init__( - self, + super().__init__( chart, fig=fig, axes=axes, @@ -214,6 +223,7 @@ def __init__( self.center_labels = center_labels self.center_width = center_width self.colormap = colormap + self._set_figure_and_axes(chart, fig=fig, axes=axes) def n_samples(self) -> int: return self.fdata.n_samples @@ -257,14 +267,13 @@ def _plot_clusters(self) -> Figure: colors_by_cluster = self.cluster_colors[self.labels] - patches = [] - for i in range(self.estimator.n_clusters): - patches.append( - mpatches.Patch( - color=self.cluster_colors[i], - label=self.cluster_labels[i], - ), + patches = [ + mpatches.Patch( + color=self.cluster_colors[i], + label=self.cluster_labels[i], ) + for i in range(self.estimator.n_clusters) + ] artists = [ self.axes[j].plot( @@ -310,7 +319,6 @@ def plot(self) -> Figure: """ self.artists = np.array([]) - _check_if_estimator(self.estimator) try: check_is_fitted(self.estimator) _check_compatible_fdata( @@ -320,10 +328,7 @@ def plot(self) -> Figure: except NotFittedError: self.estimator.fit(self.fdata) - if isinstance(self.estimator, FuzzyCMeans): - self.labels = np.argmax(self.estimator.labels_, axis=1) - else: - self.labels = self.estimator.labels_ + self.labels = self.estimator.labels_ return self._plot_clusters() @@ -360,14 +365,14 @@ class ClusterMembershipLinesPlot(BasePlot): def __init__( self, - estimator: BaseEstimator, + estimator: FuzzyClusteringEstimator, fdata: FDataGrid, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, - sample_colors=None, - sample_labels=None, - cluster_labels=None, + sample_colors: Optional[Sequence[ColorLike]] = None, + sample_labels: Optional[Sequence[str]] = None, + cluster_labels: Optional[Sequence[str]] = None, colormap: matplotlib.colors.Colormap = None, x_label: Optional[str] = None, y_label: Optional[str] = None, @@ -377,8 +382,7 @@ def __init__( if colormap is None: colormap = plt.cm.get_cmap('rainbow') - BasePlot.__init__( - self, + super().__init__( chart, fig=fig, axes=axes, @@ -392,14 +396,14 @@ def __init__( self.y_label = y_label self.title = title self.colormap = colormap + self._set_figure_and_axes(chart, fig=fig, axes=axes) def n_samples(self) -> int: return self.fdata.n_samples def plot(self) -> Figure: - """Implementation of the plotting of the results of the - :func:`Fuzzy K-Means ` method. - + """ + Plot cluster membership. A kind of Parallel Coordinates plot is generated in this function with the membership values obtained from the algorithm. A line is plotted @@ -412,11 +416,6 @@ def plot(self) -> Figure: """ self.artists = np.array([]) - _check_if_estimator(self.estimator) - - if not isinstance(self.estimator, FuzzyCMeans): - raise ValueError("The estimator must be a FuzzyCMeans object.") - try: check_is_fitted(self.estimator) _check_compatible_fdata( @@ -426,6 +425,8 @@ def plot(self) -> Figure: except NotFittedError: self.estimator.fit(self.fdata) + membership = self.estimator.predict_proba(self.fdata) + _plot_clustering_checks( estimator=self.estimator, fdata=self.fdata, @@ -449,7 +450,7 @@ def plot(self) -> Figure: np.arange(self.estimator.n_clusters) / (self.estimator.n_clusters - 1), ) - labels_by_cluster = np.argmax(self.estimator.labels_, axis=1) + labels_by_cluster = self.estimator.labels_ self.sample_colors = self.cluster_colors[labels_by_cluster] if self.sample_labels is None: @@ -465,13 +466,16 @@ def plot(self) -> Figure: ] self.axes[0].get_xaxis().set_major_locator(MaxNLocator(integer=True)) - for i in range(self.fdata.n_samples): - self.artists = np.append(self.artists, self.axes[0].plot( + self.artists = np.array([ + self.axes[0].plot( np.arange(self.estimator.n_clusters), - self.estimator.labels_[i], + membership[i], label=self.sample_labels[i], color=self.sample_colors[i], - )) + ) + for i in range(self.fdata.n_samples) + ]) + self.axes[0].set_xticks(np.arange(self.estimator.n_clusters)) self.axes[0].set_xticklabels(self.cluster_labels) self.axes[0].set_xlabel(self.x_label) @@ -513,7 +517,7 @@ class ClusterMembershipPlot(BasePlot): def __init__( self, - estimator: BaseEstimator, + estimator: FuzzyClusteringEstimator, fdata: FData, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, @@ -531,8 +535,7 @@ def __init__( if colormap is None: colormap = plt.cm.get_cmap('rainbow') - BasePlot.__init__( - self, + super().__init__( chart, fig=fig, axes=axes, @@ -547,14 +550,14 @@ def __init__( self.title = title self.colormap = colormap self.sort = sort + self._set_figure_and_axes(chart, fig=fig, axes=axes) def n_samples(self) -> int: return self.fdata.n_samples def plot(self) -> Figure: - """Implementation of the plotting of the results of the - :func:`Fuzzy K-Means ` method. - + """ + Plot cluster membership. A kind of barplot is generated in this function with the membership values obtained from the algorithm. There is a bar for each sample @@ -569,11 +572,6 @@ def plot(self) -> Figure: """ self.artists = np.array([]) - _check_if_estimator(self.estimator) - - if not isinstance(self.estimator, FuzzyCMeans): - raise ValueError("The estimator must be a FuzzyCMeans object.") - try: check_is_fitted(self.estimator) _check_compatible_fdata( @@ -583,6 +581,8 @@ def plot(self) -> Figure: except NotFittedError: self.estimator.fit(self.fdata) + membership = self.estimator.predict_proba(self.fdata) + if self.sort < -1 or self.sort >= self.estimator.n_clusters: raise ValueError( "The sorting number must belong to " @@ -622,21 +622,20 @@ def plot(self) -> Figure: for i in range(self.estimator.n_clusters) ] - patches = [] - for i in range(self.estimator.n_clusters): - patches.append( - mpatches.Patch( - color=self.cluster_colors[i], - label=self.cluster_labels[i], - ), + patches = [ + mpatches.Patch( + color=self.cluster_colors[i], + label=self.cluster_labels[i], ) + for i in range(self.estimator.n_clusters) + ] - if self.sort != -1: - labels_dim = self.estimator.labels_ + if self.sort == -1: + labels_dim = membership else: - sample_indices = np.argsort(-self.estimator.labels_[:, self.sort]) + sample_indices = np.argsort(-membership[:, self.sort]) self.sample_labels = np.copy(self.sample_labels[sample_indices]) - labels_dim = np.copy(self.estimator.labels_[sample_indices]) + labels_dim = np.copy(membership[sample_indices]) temp_labels = np.copy(labels_dim[:, 0]) labels_dim[:, 0] = labels_dim[:, self.sort] diff --git a/skfda/ml/clustering/_kmeans.py b/skfda/ml/clustering/_kmeans.py index c144f6114..d5138c034 100644 --- a/skfda/ml/clustering/_kmeans.py +++ b/skfda/ml/clustering/_kmeans.py @@ -339,7 +339,8 @@ def fit( best_distances_to_centroids = distances_to_centroids best_n_iter = n_iter - self.labels_ = best_membership + self._best_membership = best_membership + self.labels_ = self._prediction_from_membership(best_membership) self.cluster_centers_ = best_centroids self._distances_to_centers = best_distances_to_centroids self.inertia_ = best_inertia @@ -347,11 +348,11 @@ def fit( return self - def predict( + def _predict_membership( self, X: FDataGrid, sample_weight: None = None, - ) -> NDArrayInt: + ) -> MembershipType: """Predict the closest cluster each sample in X belongs to. Args: @@ -381,6 +382,32 @@ def predict( return membership_matrix + @abstractmethod + def _prediction_from_membership( + self, + membership_matrix: MembershipType, + ) -> NDArrayInt: + pass + + def predict( + self, + X: FDataGrid, + sample_weight: None = None, + ) -> NDArrayInt: + """Predict the closest cluster each sample in X belongs to. + + Args: + X: Object whose samples are classified into different groups. + sample_weight: present here for API consistency by convention. + + Returns: + Label of each sample. + + """ + return self._prediction_from_membership( + self._predict_membership(X, sample_weight), + ) + def transform(self, X: FDataGrid) -> NDArrayFloat: """Transform X to a cluster-distance space. @@ -570,6 +597,12 @@ def _compute_inertia( def _create_membership(self, n_samples: int) -> NDArrayInt: return np.empty(n_samples, dtype=int) + def _prediction_from_membership( + self, + membership_matrix: NDArrayInt, + ) -> NDArrayInt: + return membership_matrix + def _update( self, fdata: FDataGrid, @@ -676,9 +709,11 @@ class FuzzyCMeans(BaseKMeans[NDArrayFloat]): degree of fuzziness in the fuzzy algorithm. Defaults to 2. Attributes: - labels\_: (n_samples, n_clusters)): 2-dimensional - matrix in which each row contains the cluster that observation - belongs to. + membership_degree\_: Matrix in which each entry contains the + probability of belonging to each group. + labels\_: Vector in which each entry contains the cluster each + observation belongs to (the one with the maximum membership + degree). cluster_centers\_: data_matrix of shape (n_clusters, ncol, dim_codomain) and contains the centroids for each cluster. @@ -737,6 +772,10 @@ def __init__( self.fuzzifier = fuzzifier + @property + def membership_degree_(self) -> NDArrayFloat: + return self._best_membership + def _check_params(self) -> None: if self.fuzzifier <= 1: raise ValueError("The fuzzifier parameter must be greater than 1.") @@ -754,6 +793,12 @@ def _compute_inertia( def _create_membership(self, n_samples: int) -> NDArrayFloat: return np.empty((n_samples, self.n_clusters)) + def _prediction_from_membership( + self, + membership_matrix: NDArrayFloat, + ) -> NDArrayInt: + return np.argmax(membership_matrix, axis=1) + def _update( self, fdata: FDataGrid, @@ -797,3 +842,20 @@ def _update( ) / np.sum(membership_matrix_raised, axis=0)[slice_denominator] ) + + def predict_proba( + self, + X: FDataGrid, + sample_weight: None = None, + ) -> NDArrayFloat: + """Predict the probability of belonging to each cluster. + + Args: + X: Object whose samples are classified into different groups. + sample_weight: present here for API consistency by convention. + + Returns: + Probability of belonging to each cluster for each sample. + + """ + return self._predict_membership(X, sample_weight) diff --git a/tests/test_clustering.py b/tests/test_clustering.py index b29c06d2b..66f2024f0 100644 --- a/tests/test_clustering.py +++ b/tests/test_clustering.py @@ -1,18 +1,22 @@ -from skfda.ml.clustering import KMeans, FuzzyCMeans -from skfda.representation.grid import FDataGrid import unittest import numpy as np +from skfda.ml.clustering import FuzzyCMeans, KMeans +from skfda.representation.grid import FDataGrid + class TestClustering(unittest.TestCase): # def setUp(self): could be defined for set up before any test - def test_kmeans_univariate(self): - data_matrix = [[1, 1, 2, 3, 2.5, 2], [0.5, 0.5, 1, 2, 1.5, 1], - [-1, -1, -0.5, 1, 1, 0.5], - [-0.5, -0.5, -0.5, -1, -1, -1]] + def test_kmeans_univariate(self) -> None: + data_matrix = [ + [1, 1, 2, 3, 2.5, 2], + [0.5, 0.5, 1, 2, 1.5, 1], + [-1, -1, -0.5, 1, 1, 0.5], + [-0.5, -0.5, -0.5, -1, -1, -1], + ] grid_points = [0, 2, 4, 6, 8, 10] fd = FDataGrid(data_matrix, grid_points) init = np.array([[0, 0, 0, 0, 0, 0], [2, 1, -1, 0.5, 0, -0.5]]) @@ -28,113 +32,68 @@ def test_kmeans_univariate(self): [6.49679408, 0.0], ]), ) - np.testing.assert_array_equal(kmeans.predict(fd), - np.array([0, 0, 0, 1])) - np.testing.assert_allclose(kmeans.transform(fd), - np.array([[2.98142397, 9.23534876], - [0.68718427, 6.50960828], - [3.31243449, 4.39222798], - [6.49679408, 0.]])) - centers = FDataGrid(data_matrix=np.array( - [[0.16666667, 0.16666667, 0.83333333, 2., 1.66666667, 1.16666667], - [-0.5, -0.5, -0.5, -1., -1., -1.]]), + np.testing.assert_array_equal( + kmeans.predict(fd), + np.array([0, 0, 0, 1]), + ) + np.testing.assert_allclose( + kmeans.transform(fd), + np.array([[2.98142397, 9.23534876], + [0.68718427, 6.50960828], + [3.31243449, 4.39222798], + [6.49679408, 0.]]), + ) + centers = FDataGrid( + data_matrix=np.array([ + [0.16666667, 0.16666667, 0.83333333, 2., 1.66666667, 1.16666667], + [-0.5, -0.5, -0.5, -1., -1., -1.], + ]), grid_points=grid_points) np.testing.assert_array_almost_equal( kmeans.cluster_centers_.data_matrix, - centers.data_matrix) + centers.data_matrix, + ) np.testing.assert_allclose(kmeans.score(fd), np.array([-20.33333333])) np.testing.assert_array_equal(kmeans.n_iter_, np.array([3.])) - # def test_kmeans_multivariate(self): - # data_matrix = [[[1, 0.3], [2, 0.4], [3, 0.5], [4, 0.6]], - # [[2, 0.5], [3, 0.6], [4, 0.7], [5, 0.7]], - # [[3, 0.2], [4, 0.3], [5, 0.4], [6, 0.5]]] - # grid_points = [2, 4, 6, 8] - # fd = FDataGrid(data_matrix, grid_points) - # kmeans = KMeans() - # kmeans.fit(fd) - # np.testing.assert_array_equal(kmeans.predict(fd), - # np.array([[1, 1], - # [1, 1], - # [0, 0]])) - # np.testing.assert_allclose(kmeans.transform(fd), - # np.array([[[4.89897949, 0.24494897], - # [1.22474487, 0.23184046]], - # [[2.44948974, 0.70592729], - # [1.22474487, 0.23184046]], - # [[0., 0.], - # [3.67423461, 0.47478065]]])) - # centers = FDataGrid(data_matrix=np.array( - # [[[3, 0.2], [4, 0.3], [5, 0.4], [6, 0.5]], - # [[1.5, 0.4], [2.5, 0.5], [3.5, 0.6], [4.5, 0.65]]]), - # grid_points=grid_points) - # np.testing.assert_allclose(kmeans.cluster_centers_.data_matrix, - # centers.data_matrix) - # np.testing.assert_allclose(kmeans.score(fd), np.array([-3., -0.1075])) - # np.testing.assert_array_equal(kmeans.n_iter_, np.array([2., 2.])) - - def test_fuzzy_kmeans_univariate(self): - data_matrix = [[1, 1, 2, 3, 2.5, 2], [0.5, 0.5, 1, 2, 1.5, 1], - [-1, -1, -0.5, 1, 1, 0.5], - [-0.5, -0.5, -0.5, -1, -1, -1]] + def test_fuzzy_kmeans_univariate(self) -> None: + data_matrix = [ + [1, 1, 2, 3, 2.5, 2], + [0.5, 0.5, 1, 2, 1.5, 1], + [-1, -1, -0.5, 1, 1, 0.5], + [-0.5, -0.5, -0.5, -1, -1, -1], + ] grid_points = [0, 2, 4, 6, 8, 10] fd = FDataGrid(data_matrix, grid_points) fuzzy_kmeans = FuzzyCMeans() fuzzy_kmeans.fit(fd) - np.testing.assert_array_equal(fuzzy_kmeans.predict(fd).round(3), - np.array([[0.965, 0.035], - [0.94, 0.06], - [0.227, 0.773], - [0.049, 0.951]])) - np.testing.assert_allclose(fuzzy_kmeans.transform(fd).round(3), - np.array([[1.492, 7.879], - [1.294, 5.127], - [4.856, 2.633], - [7.775, 1.759]])) - centers = np.array([[0.707, 0.707, 1.455, 2.467, 1.981, 1.482], - [-0.695, -0.695, -0.494, -0.197, -0.199, -0.398]]) + np.testing.assert_array_equal( + fuzzy_kmeans.predict_proba(fd).round(3), + np.array([[0.965, 0.035], + [0.94, 0.06], + [0.227, 0.773], + [0.049, 0.951]]), + ) + np.testing.assert_allclose( + fuzzy_kmeans.transform(fd).round(3), + np.array([[1.492, 7.879], + [1.294, 5.127], + [4.856, 2.633], + [7.775, 1.759]]), + ) + centers = np.array([ + [0.707, 0.707, 1.455, 2.467, 1.981, 1.482], + [-0.695, -0.695, -0.494, -0.197, -0.199, -0.398], + ]) np.testing.assert_allclose( fuzzy_kmeans.cluster_centers_.data_matrix[..., 0].round(3), centers) - np.testing.assert_allclose(fuzzy_kmeans.score(fd), - np.array([-12.025179])) + np.testing.assert_allclose( + fuzzy_kmeans.score(fd), + np.array([-12.025179]), + ) self.assertEqual(fuzzy_kmeans.n_iter_, 19) - # def test_fuzzy_kmeans_multivariate(self): - # data_matrix = [[[1, 0.3], [2, 0.4], [3, 0.5], [4, 0.6]], - # [[2, 0.5], [3, 0.6], [4, 0.7], [5, 0.7]], - # [[3, 0.2], [4, 0.3], [5, 0.4], [6, 0.5]]] - # grid_points = [2, 4, 6, 8] - # fd = FDataGrid(data_matrix, grid_points) - # init = np.array([[[3, 0], [5, 0], [2, 0], [4, 0]], - # [[0, 0], [0, 1], [0, 0], [0, 1]]]) - # init_fd = FDataGrid(init, grid_points) - # fuzzy_kmeans = FuzzyKMeans(init=init_fd) - # fuzzy_kmeans.fit(fd) - # np.testing.assert_array_equal(fuzzy_kmeans.predict(fd), - # np.array([[[0., 1.], - # [0.5, 0.5]], - # [[1., 0.], - # [0.5, 0.5]], - # [[0.8, 0.2], - # [0.5, 0.5]]])) - # np.testing.assert_allclose(fuzzy_kmeans.transform(fd), - # np.array([[[25., 1.26333333], - # [126.33333333, 1.26333333]], - # [[25., 2.45833333], - # [126.33333333, 2.45833333]], - # [[6., 0.78333333], - # [24., 0.78333333]]])) - # centers = FDataGrid(data_matrix=np.array( - # [[[2, 0], [3, 0], [4, 0], [5, 0]], - # [[1, 0], [2, 0], [3, 0], [4, 0]]]), grid_points=grid_points) - # np.testing.assert_allclose(fuzzy_kmeans.cluster_centers_.data_matrix, - # centers.data_matrix) - # np.testing.assert_allclose(fuzzy_kmeans.score(fd), np.array( - # [-1.66211111e+04, -8.25302500e+00])) - # np.testing.assert_array_equal(fuzzy_kmeans.n_iter_, - # np.array([2., 2.])) - if __name__ == '__main__': print() From e1200d245f3e9414efd68e860d4daa30c0cb4250 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 25 Aug 2021 00:15:42 +0200 Subject: [PATCH 396/417] Typing visualization. --- .../visualization/_multiple_display.py | 173 +++++++------- skfda/exploratory/visualization/_utils.py | 223 ++++++++++-------- 2 files changed, 215 insertions(+), 181 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index c7e31b17c..4389ec223 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -28,57 +28,51 @@ class MultipleDisplay: the axes. It is also possible to add widgets to interact with the plots. Args: - displays: baseplot objects that will be plotted in the fig. - criteria: sequence of criteria used to order the points in the + displays: Baseplot objects that will be plotted in the fig. + criteria: Sequence of criteria used to order the points in the slider widget. The size should be equal to sliders, as each criterion is for one slider. - sliders: sequence of widgets that will be plotted. - label_sliders: label of each of the sliders. - chart: figure over with the graphs are plotted or axis over + sliders: Sequence of widgets that will be plotted. + label_sliders: Label of each of the sliders. + chart: Figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not + fig: Figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes: axis where the graphs are plotted. If None, see param fig. + axes: Axis where the graphs are plotted. If None, see param fig. Attributes: - point_clicked: artist object containing the last point clicked. - num_graphs: number of graphs that will be plotted. - length_data: number of instances or curves of the different displays. - clicked: boolean indicating whether a point has being clicked. - index_clicked: index of the function selected with the interactive + point_clicked: Artist object containing the last point clicked. + num_graphs: Number of graphs that will be plotted. + length_data: Number of instances or curves of the different displays. + clicked: Boolean indicating whether a point has being clicked. + index_clicked: Index of the function selected with the interactive module or widgets. - tags: list of tags for each ax, that contain the information printedº + tags: List of tags for each ax, that contain the information printedº while hovering. - previous_hovered: artist object containing of the last point hovered. - is_updating: boolean value that determines whether a widget + previous_hovered: Artist object containing of the last point hovered. + is_updating: Boolean value that determines whether a widget is being updated. """ def __init__( self, - displays: Union[BasePlot, List[BasePlot]], + displays: Union[BasePlot, Sequence[BasePlot]], criteria: Union[ Sequence[float], Sequence[Sequence[float]], None, ] = None, sliders: Union[Widget, Sequence[Widget], None] = None, - label_sliders: Union[ - str, - Sequence[str], - None, - ] = None, + label_sliders: Union[str, Sequence[str], None] = None, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, axes: Optional[Sequence[Axes]] = None, ): if isinstance(displays, BasePlot): - self.displays = [copy.copy(displays)] - else: - self.displays = [] - for d in displays: - self.displays.append(copy.copy(d)) + displays = (displays,) + + self.displays = [copy.copy(d) for d in displays] self.point_clicked: Artist = None self.num_graphs = sum(len(d.axes) for d in self.displays) self.length_data = self.displays[0].n_samples() @@ -88,27 +82,28 @@ def __init__( self.index_clicked = -1 self.tags: List[Annotation] = [] self.previous_hovered = None - self.fig = fig - self.axes = axes - self.chart = chart self.is_updating = False if criteria is not None and sliders is not None: - if isinstance(sliders, collections.Iterable): - if len(criteria) == len(sliders): - self.create_sliders(criteria, sliders, label_sliders) - else: + if isinstance(sliders, Sequence): + if len(criteria) != len(sliders): raise ValueError( "Size of criteria, and sliders should be equal.", ) - else: - self.create_sliders(criteria, sliders, label_sliders) + + self.create_sliders( + chart, + fig=fig, + axes=axes, + criteria=criteria, + sliders=sliders, + label_sliders=label_sliders, + ) + else: - self.init_axes() + self.init_axes(chart, fig=fig, axes=axes) - def plot( - self, - ): + def plot(self) -> Figure: """ Plot Multiple Display method. @@ -127,16 +122,21 @@ def plot( "Length of some data sets are not equal ", ) - for i in range(self.num_graphs): - self.axes[i].clear() + for ax in self.axes: + ax.clear() self.tags.append( - self.axes[i].annotate( + ax.annotate( "", xy=(0, 0), xytext=(20, 20), textcoords="offset points", - bbox=dict(boxstyle="round", fc="w"), - arrowprops=dict(arrowstyle="->"), + bbox={ + "boxstyle": "round", + "fc": "w", + }, + arrowprops={ + "arrowstyle": "->", + }, ), ) @@ -162,7 +162,7 @@ def plot( return self.fig - def update_annot(self, index_ax: int, index_point: int): + def update_annot(self, index_ax: int, index_point: int) -> None: """ Auxiliary method used to update the hovering annotations. @@ -173,22 +173,12 @@ def update_annot(self, index_ax: int, index_point: int): index_ax: index of the ax being hovered. index_point: index of the point being hovered. """ - xdata_graph = self.previous_hovered.get_offsets()[0][0] - ydata_graph = self.previous_hovered.get_offsets()[0][1] - xdata_aprox = "{0:.2f}".format(xdata_graph) - ydata_aprox = "{0:.2f}".format(ydata_graph) + xdata_graph, ydata_graph = self.previous_hovered.get_offsets()[0] current_tag = self.tags[index_ax] current_tag.xy = (xdata_graph, ydata_graph) current_tag.xy = (xdata_graph, ydata_graph) - text = "".join([ - str(index_point), - ": (", - str(xdata_aprox), - ", ", - str(ydata_aprox), - ")", - ]) + text = f"{index_point}: ({xdata_graph:.2f}, {ydata_graph:.2f})" x_axis = self.axes[index_ax].get_xlim() self.x_axis = x_axis @@ -203,7 +193,7 @@ def update_annot(self, index_ax: int, index_point: int): intensity = 0.4 current_tag.get_bbox_patch().set_alpha(intensity) - def hover(self, event: Event): + def hover(self, event: Event) -> None: """ Activate the annotation when hovering a point. @@ -218,13 +208,12 @@ def hover(self, event: Event): index = 0 for d in self.displays: - for i in range(len(d.axes)): - if event.inaxes == d.axes[i]: + for i, ax in enumerate(d.axes): + if event.inaxes == ax: index_axis = index artists_array = d.artists[:, i] - for j in range(len(artists_array)): - artist = artists_array[j] + for j, artist in enumerate(artists_array): if not isinstance(artist, PathCollection): return @@ -255,6 +244,10 @@ def hover(self, event: Event): def init_axes( self, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[Sequence[Axes]] = None, extra: int = 0, ) -> None: """ @@ -265,8 +258,8 @@ def init_axes( necessity for them to plot the sliders. """ widget_aspect = 1 / 4 - fig, axes = _get_figure_and_axes(self.chart, self.fig, self.axes) - if len(axes) != 0 and len(axes) != (self.num_graphs + extra): + fig, axes = _get_figure_and_axes(chart, fig, axes) + if len(axes) not in {0, self.num_graphs + extra}: raise ValueError("Invalid number of axes.") n_rows, n_cols = _get_axes_shape(self.num_graphs + extra) @@ -316,8 +309,8 @@ def pick(self, event: Event) -> None: def update_index_display_picked(self) -> None: """Update the index corresponding to the display picked.""" for d in self.displays: - for i in range(len(d.axes)): - if d.axes[i] == self.point_clicked.axes: + for i, a in enumerate(d.axes): + if a == self.point_clicked.axes: if len(d.axes) == 1: self.index_clicked = np.where( d.artists == self.point_clicked, @@ -337,9 +330,9 @@ def reduce_points_intensity(self) -> None: artist.set_alpha(0.1) self.is_updating = True - for j in range(len(self.sliders)): - val_widget = list(self.criteria[j]).index(self.index_clicked) - self.sliders[j].set_val(val_widget) + for criterium, slider in zip(self.criteria, self.sliders): + val_widget = list(criterium).index(self.index_clicked) + slider.set_val(val_widget) self.is_updating = False def restore_points_intensity(self) -> None: @@ -381,7 +374,7 @@ def change_points_intensity( for i in range(self.length_data): if i == self.index_clicked: - intensity = 1 + intensity = 1.0 elif i == old_index: intensity = 0.1 else: @@ -391,12 +384,12 @@ def change_points_intensity( self.change_display_intensity(i, intensity) self.is_updating = True - for j in range(len(self.sliders)): - val_widget = list(self.criteria[j]).index(self.index_clicked) - self.sliders[j].set_val(val_widget) + for criterium, slider in zip(self.criteria, self.sliders): + val_widget = list(criterium).index(self.index_clicked) + slider.set_val(val_widget) self.is_updating = False - def change_display_intensity(self, index: int, intensity: int) -> None: + def change_display_intensity(self, index: int, intensity: float) -> None: """ Change the intensity of the point selected by index in every display. @@ -412,6 +405,10 @@ def change_display_intensity(self, index: int, intensity: int) -> None: def create_sliders( self, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[Sequence[Axes]] = None, criteria: Union[Sequence[float], Sequence[Sequence[float]]], sliders: Union[Widget, Sequence[Widget]], label_sliders: Union[str, Sequence[str], None] = None, @@ -424,14 +421,19 @@ def create_sliders( sliders: widget types. label_sliders: sequence of the names of each slider. """ - if isinstance(criteria[0], collections.Iterable): + if isinstance(criteria[0], Sequence): for c in criteria: if len(c) != self.length_data: raise ValueError( "Slider criteria should be of the same size as data", ) - self.init_axes(extra=len(criteria)) + self.init_axes( + chart, + fig=fig, + axes=axes, + extra=len(criteria), + ) if label_sliders is None: for i in range(len(criteria)): @@ -441,10 +443,10 @@ def create_sliders( "Incorrect length of slider labels.", ) elif len(label_sliders) == len(sliders): - for k in range(len(criteria)): + for k, criterium in enumerate(criteria): self.add_slider( k, - criteria[k], + criterium, sliders[k], label_sliders[k], ) @@ -456,7 +458,12 @@ def create_sliders( len(criteria) == self.length_data and (isinstance(label_sliders, str) or label_sliders is None) ): - self.init_axes(extra=1) + self.init_axes( + chart, + fig=fig, + axes=axes, + extra=1, + ) self.add_slider(0, criteria, sliders, label_sliders) else: raise ValueError( @@ -526,13 +533,13 @@ def value_updated(self, value: int) -> None: index = int(int(value / 0.5) * 0.5) old_index = self.index_clicked self.index_clicked = list(self.criteria[self.widget_index])[index] - self.sliders[self.widget_index].valtext.set_text('{}'.format(index)) + self.sliders[self.widget_index].valtext.set_text(f'{index}') # Update the other sliders values - for i in range(len(self.sliders)): + for i, (c, s) in enumerate(zip(self.criteria, self.sliders)): if i != self.widget_index: - val_widget = list(self.criteria[i]).index(self.index_clicked) - self.sliders[i].set_val(val_widget) + val_widget = list(c).index(self.index_clicked) + s.set_val(val_widget) self.is_updating = False diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 807d57eac..b1cc32fc5 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -13,11 +13,11 @@ non_close_text = '[^>]*?' svg_width_regex = re.compile( - f'()' + f'()', ) svg_width_replacement = r'\g<1>100%\g<2>' svg_height_regex = re.compile( - f'()' + f'()', ) svg_height_replacement = r'\g<1>\g<2>' @@ -42,16 +42,13 @@ def __len__(self) -> int: pass -def _create_figure(): +def _create_figure() -> Figure: """Create figure using the default backend.""" - fig = plt.figure() - - return fig + return plt.figure() def _figure_to_svg(figure: Figure) -> str: """Return the SVG representation of a figure.""" - old_canvas = figure.canvas matplotlib.backends.backend_svg.FigureCanvas(figure) output = io.BytesIO() @@ -61,11 +58,16 @@ def _figure_to_svg(figure: Figure) -> str: decoded_data = data.decode('utf-8') new_data = svg_width_regex.sub( - svg_width_replacement, decoded_data, count=1) - new_data = svg_height_regex.sub( - svg_height_replacement, new_data, count=1) + svg_width_replacement, + decoded_data, + count=1, + ) - return new_data + return svg_height_regex.sub( + svg_height_replacement, + new_data, + count=1, + ) def _get_figure_and_axes( @@ -74,11 +76,12 @@ def _get_figure_and_axes( axes: Union[Axes, Sequence[Axes], None] = None, ) -> Tuple[Figure, Sequence[Axes]]: """Obtain the figure and axes from the arguments.""" - num_defined = sum(e is not None for e in (chart, fig, axes)) if num_defined > 1: - raise ValueError("Only one of chart, fig and axes parameters" - "can be passed as an argument.") + raise ValueError( + "Only one of chart, fig and axes parameters" + "can be passed as an argument.", + ) # Parse chart argument if chart is not None: @@ -88,66 +91,77 @@ def _get_figure_and_axes( axes = chart if fig is None and axes is None: - fig = _create_figure() - axes = [] + new_fig = _create_figure() + new_axes = [] elif fig is not None: - axes = fig.axes + new_fig = fig + new_axes = fig.axes else: + assert axes is not None if isinstance(axes, Axes): axes = [axes] - fig = axes[0].figure - - return fig, axes + new_fig = axes[0].figure + new_axes = axes + return new_fig, new_axes -def _get_axes_shape(n_axes, n_rows=None, n_cols=None): - """Get the number of rows and columns of the subplots""" - if ((n_rows is not None and n_cols is not None) - and ((n_rows * n_cols) < n_axes)): - raise ValueError(f"The number of rows ({n_rows}) multiplied by " - f"the number of columns ({n_cols}) " - f"is less than the number of required " - f"axes ({n_axes})") +def _get_axes_shape( + n_axes: int, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, +) -> Tuple[int, int]: + """Get the number of rows and columns of the subplots.""" + if ( + (n_rows is not None and n_cols is not None) + and ((n_rows * n_cols) < n_axes) + ): + raise ValueError( + f"The number of rows ({n_rows}) multiplied by " + f"the number of columns ({n_cols}) " + f"is less than the number of required " + f"axes ({n_axes})", + ) if n_rows is None and n_cols is None: - n_cols = int(math.ceil(math.sqrt(n_axes))) - n_rows = int(math.ceil(n_axes / n_cols)) + new_n_cols = int(math.ceil(math.sqrt(n_axes))) + new_n_rows = int(math.ceil(n_axes / new_n_cols)) elif n_rows is None and n_cols is not None: - n_rows = int(math.ceil(n_axes / n_cols)) + new_n_cols = n_cols + new_n_rows = int(math.ceil(n_axes / n_cols)) elif n_cols is None and n_rows is not None: - n_cols = int(math.ceil(n_axes / n_rows)) + new_n_cols = int(math.ceil(n_axes / n_rows)) + new_n_rows = n_rows - return n_rows, n_cols + return new_n_rows, new_n_cols def _set_figure_layout( - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, + fig: Figure, + axes: Sequence[Axes], dim: int = 2, n_axes: int = 1, n_rows: Optional[int] = None, n_cols: Optional[int] = None, ) -> Tuple[Figure, Sequence[Axes]]: - """Set the figure axes for plotting. + """ + Set the figure axes for plotting. Args: - dim (int): dimension of the plot. Either 2 for a 2D plot or - 3 for a 3D plot. - n_axes (int): Number of subplots. - fig (figure object): figure over with the graphs are - plotted in case ax is not specified. - ax (list of axis objects): axis over where the graphs are - plotted. - n_rows (int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Can only be passed - if no axes are specified. - n_cols (int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Can only be - passed if no axes are specified. + fig: Figure over with the graphs are plotted in case ax is not + specified. + axes: Axis over where the graphs are plotted. + dim: Dimension of the plot. Either 2 for a 2D plot or 3 for a 3D plot. + n_axes: Number of subplots. + n_rows: Designates the number of rows of the figure to plot the + different dimensions of the image. Can only be passed if no axes + are specified. + n_cols: Designates the number of columns of the figure to plot the + different dimensions of the image. Can only be passed if no axes + are specified. Returns: (tuple): tuple containing: @@ -157,19 +171,24 @@ def _set_figure_layout( """ if not (1 < dim < 4): - raise NotImplementedError("Only bidimensional or tridimensional " - "plots are supported.") + raise NotImplementedError( + "Only bidimensional or tridimensional plots are supported.", + ) - if len(axes) != 0 and len(axes) != n_axes: - raise ValueError(f"The number of axes must be 0 (to create them) or " - f"equal to the number of axes needed " - f"({n_axes} in this case).") + if len(axes) not in {0, n_axes}: + raise ValueError( + f"The number of axes must be 0 (to create them) or " + f"equal to the number of axes needed " + f"({n_axes} in this case).", + ) if len(axes) != 0 and (n_rows is not None or n_cols is not None): - raise ValueError("The number of columns and/or number of rows of " - "the figure, in which each dimension of the " - "image is plotted, can only be customized in case " - "that no axes are provided.") + raise ValueError( + "The number of columns and/or number of rows of " + "the figure, in which each dimension of the " + "image is plotted, can only be customized in case " + "that no axes are provided.", + ) if dim == 2: projection = 'rectilinear' @@ -180,77 +199,82 @@ def _set_figure_layout( # Create the axes n_rows, n_cols = _get_axes_shape(n_axes, n_rows, n_cols) - fig.subplots(nrows=n_rows, ncols=n_cols, - subplot_kw={"projection": projection}) + fig.subplots( + nrows=n_rows, + ncols=n_cols, + subplot_kw={"projection": projection}, + ) axes = fig.axes else: # Check that the projections are right if not all(a.name == projection for a in axes): - raise ValueError(f"The projection of the axes should be " - f"{projection}") + raise ValueError( + f"The projection of the axes should be {projection}", + ) return fig, axes def _set_figure_layout_for_fdata( fdata: FData, - fig: Optional[Figure] = None, - axes: Optional[Sequence[Axes]] = None, + fig: Figure, + axes: Sequence[Axes], n_rows: Optional[int] = None, n_cols: Optional[int] = None, ) -> Tuple[Figure, Sequence[Axes]]: - """Set the figure axes for plotting a - :class:`~skfda.representation.FData` object. + """ + Set the figure axes for plotting a FData object. Args: - fdata (FData): functional data object. - fig (figure object): figure over with the graphs are - plotted in case ax is not specified. - ax (list of axis objects): axis over where the graphs are - plotted. - n_rows (int, optional): designates the number of rows of the figure - to plot the different dimensions of the image. Can only be passed + fdata: functional data object. + fig: figure over with the graphs are plotted in case ax is not + specified. + axes: axis over where the graphs are plotted. + n_rows: designates the number of rows of the figure to plot the + different dimensions of the image. Can only be passed if no axes are specified. - n_cols (int, optional): designates the number of columns of the - figure to plot the different dimensions of the image. Can only be - passed if no axes are specified. + n_cols: designates the number of columns of the figure to plot + the different dimensions of the image. Can only be passed if + no axes are specified. Returns: - (tuple): tuple containing: + Tuple containing: - * fig (figure): figure object in which the graphs are plotted. - * axes (list): axes in which the graphs are plotted. + * fig: figure object in which the graphs are plotted. + * axes: axes in which the graphs are plotted. """ - return _set_figure_layout(fig, axes, - dim=fdata.dim_domain + 1, - n_axes=fdata.dim_codomain, - n_rows=n_rows, n_cols=n_cols) + return _set_figure_layout( + fig, + axes, + dim=fdata.dim_domain + 1, + n_axes=fdata.dim_codomain, + n_rows=n_rows, + n_cols=n_cols, + ) def _set_labels( fdata: FData, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, + fig: Figure, + axes: Sequence[Axes], patches: Optional[Sequence[matplotlib.patches.Patch]] = None, ) -> None: """Set labels if any. Args: - fdata (FData): functional data object. - fig (figure object): figure object containing the axes that - implement set_xlabel and set_ylabel, and set_zlabel in case + fdata: functional data object. + fig: figure object containing the axes that implement + set_xlabel and set_ylabel, and set_zlabel in case of a 3d projection. - axes (list of axes): axes objects that implement set_xlabel and - set_ylabel, and set_zlabel in case of a 3d projection; used if + axes: axes objects that implement set_xlabel and set_ylabel, + and set_zlabel in case of a 3d projection; used if fig is None. - patches (list of mpatches.Patch); objects used to generate each - entry in the legend. + patches: objects used to generate each entry in the legend. """ - # Dataset name if fdata.dataset_name is not None: fig.suptitle(fdata.dataset_name) @@ -261,15 +285,17 @@ def _set_labels( elif patches is not None: axes[0].legend(handles=patches) + assert len(axes) == fdata.dim_codomain + # Axis labels if axes[0].name == '3d': - for i in range(fdata.dim_codomain): + for i, a in enumerate(axes): if fdata.argument_names[0] is not None: - axes[i].set_xlabel(fdata.argument_names[0]) + a.set_xlabel(fdata.argument_names[0]) if fdata.argument_names[1] is not None: - axes[i].set_ylabel(fdata.argument_names[1]) + a.set_ylabel(fdata.argument_names[1]) if fdata.coordinate_names[i] is not None: - axes[i].set_zlabel(fdata.coordinate_names[i]) + a.set_zlabel(fdata.coordinate_names[i]) else: for i in range(fdata.dim_codomain): if fdata.argument_names[0] is not None: @@ -281,6 +307,7 @@ def _set_labels( def _change_luminosity(color: ColorLike, amount: float = 0.5) -> ColorLike: """ Change the given color luminosity by the given amount. + Input can be matplotlib color string, hex string, or RGB tuple. Note: From 6725d3ec07b9ebefbaa596a6e60ef9529a5a0b16 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 25 Aug 2021 03:03:50 +0200 Subject: [PATCH 397/417] Use only one annotation for all the samples in MultipleDisplay. --- .../visualization/_multiple_display.py | 130 ++++++++++-------- 1 file changed, 73 insertions(+), 57 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 4389ec223..01858e377 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -5,7 +5,7 @@ import numpy as np from matplotlib.artist import Artist from matplotlib.axes import Axes -from matplotlib.backend_bases import Event +from matplotlib.backend_bases import Event, MouseEvent from matplotlib.collections import PathCollection from matplotlib.figure import Figure from matplotlib.text import Annotation @@ -48,8 +48,6 @@ class MultipleDisplay: clicked: Boolean indicating whether a point has being clicked. index_clicked: Index of the function selected with the interactive module or widgets. - tags: List of tags for each ax, that contain the information printedº - while hovering. previous_hovered: Artist object containing of the last point hovered. is_updating: Boolean value that determines whether a widget is being updated. @@ -80,7 +78,7 @@ def __init__( self.criteria: List[List[int]] = [] self.clicked = False self.index_clicked = -1 - self.tags: List[Annotation] = [] + self._tag = self._create_annotation() self.previous_hovered = None self.is_updating = False @@ -103,6 +101,67 @@ def __init__( else: self.init_axes(chart, fig=fig, axes=axes) + def _create_annotation(self) -> Annotation: + tag = Annotation( + "", + xy=(0, 0), + xytext=(20, 20), + textcoords="offset points", + bbox={ + "boxstyle": "round", + "fc": "w", + }, + arrowprops={ + "arrowstyle": "->", + }, + ) + + tag.get_bbox_patch().set_facecolor(color='khaki') + intensity = 0.8 + tag.get_bbox_patch().set_alpha(intensity) + + return tag + + def _update_annotation( + self, + tag: Annotation, + axes: Axes, + index_point: int, + ) -> None: + """ + Auxiliary method used to update the hovering annotations. + + Method used to update the annotations that appear while + hovering a scattered point. The annotations indicate + the index and coordinates of the point hovered. + Args: + index_ax: index of the ax being hovered. + index_point: index of the point being hovered. + """ + xdata_graph, ydata_graph = self.previous_hovered.get_offsets()[0] + + tag.xy = (xdata_graph, ydata_graph) + text = f"{index_point}: ({xdata_graph:.2f}, {ydata_graph:.2f})" + tag.set_text(text) + + x_axis = axes.get_xlim() + y_axis = axes.get_ylim() + + label_xpos = 20 + label_ypos = 20 + if (xdata_graph - x_axis[0]) > (x_axis[1] - xdata_graph): + label_xpos = -80 + + if (ydata_graph - y_axis[0]) > (y_axis[1] - ydata_graph): + label_ypos = -20 + + if tag.figure: + tag.remove() + tag.figure = None + axes.add_artist(tag) + tag.set_transform(axes.transData) + tag.set_position((label_xpos, label_ypos)) + def plot(self) -> Figure: """ Plot Multiple Display method. @@ -124,21 +183,6 @@ def plot(self) -> Figure: for ax in self.axes: ax.clear() - self.tags.append( - ax.annotate( - "", - xy=(0, 0), - xytext=(20, 20), - textcoords="offset points", - bbox={ - "boxstyle": "round", - "fc": "w", - }, - arrowprops={ - "arrowstyle": "->", - }, - ), - ) int_index = 0 for disp in self.displays: @@ -151,8 +195,7 @@ def plot(self) -> Figure: self.fig.canvas.mpl_connect('motion_notify_event', self.hover) self.fig.canvas.mpl_connect('pick_event', self.pick) - for i in range(self.num_graphs): - self.tags[i].set_visible(False) + self._tag.set_visible(False) self.fig.suptitle("Multiple display") self.fig.tight_layout() @@ -162,38 +205,7 @@ def plot(self) -> Figure: return self.fig - def update_annot(self, index_ax: int, index_point: int) -> None: - """ - Auxiliary method used to update the hovering annotations. - - Method used to update the annotations that appear while - hovering a scattered point. The annotations indicate - the index and coordinates of the point hovered. - Args: - index_ax: index of the ax being hovered. - index_point: index of the point being hovered. - """ - xdata_graph, ydata_graph = self.previous_hovered.get_offsets()[0] - - current_tag = self.tags[index_ax] - current_tag.xy = (xdata_graph, ydata_graph) - current_tag.xy = (xdata_graph, ydata_graph) - text = f"{index_point}: ({xdata_graph:.2f}, {ydata_graph:.2f})" - - x_axis = self.axes[index_ax].get_xlim() - self.x_axis = x_axis - self.xdata_graph = xdata_graph - if (xdata_graph - x_axis[0]) > (x_axis[1] - xdata_graph): - current_tag.set_position((-80, 20)) - else: - current_tag.set_position((20, 20)) - - current_tag.set_text(text) - current_tag.get_bbox_patch().set_facecolor(color='red') - intensity = 0.4 - current_tag.get_bbox_patch().set_alpha(intensity) - - def hover(self, event: Event) -> None: + def hover(self, event: MouseEvent) -> None: """ Activate the annotation when hovering a point. @@ -234,12 +246,16 @@ def hover(self, event: Event) -> None: self.widget_index = k - self.num_graphs if index_axis != -1 and is_graph: - self.update_annot(index_axis, index_point) - self.tags[index_axis].set_visible(True) + self._update_annotation( + self._tag, + self.axes[index_axis], + index_point, + ) + self._tag.set_visible(True) self.fig.canvas.draw_idle() - elif self.tags[index_axis].get_visible(): + elif self._tag.get_visible(): self.previous_hovered = None - self.tags[index_axis].set_visible(False) + self._tag.set_visible(False) self.fig.canvas.draw_idle() def init_axes( From 7af537f35d07b13e58dd85a38eb6ed3d25d41d69 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 25 Aug 2021 03:49:22 +0200 Subject: [PATCH 398/417] Simplify hover logic. --- .../visualization/_multiple_display.py | 50 ++++++++----------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 01858e377..6644d3d6b 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -48,7 +48,6 @@ class MultipleDisplay: clicked: Boolean indicating whether a point has being clicked. index_clicked: Index of the function selected with the interactive module or widgets. - previous_hovered: Artist object containing of the last point hovered. is_updating: Boolean value that determines whether a widget is being updated. """ @@ -79,7 +78,7 @@ def __init__( self.clicked = False self.index_clicked = -1 self._tag = self._create_annotation() - self.previous_hovered = None + self._previous_hovered: Optional[Artist] = None self.is_updating = False if criteria is not None and sliders is not None: @@ -138,7 +137,7 @@ def _update_annotation( index_ax: index of the ax being hovered. index_point: index of the point being hovered. """ - xdata_graph, ydata_graph = self.previous_hovered.get_offsets()[0] + xdata_graph, ydata_graph = self._previous_hovered.get_offsets()[0] tag.xy = (xdata_graph, ydata_graph) text = f"{index_point}: ({xdata_graph:.2f}, {ydata_graph:.2f})" @@ -215,46 +214,41 @@ def hover(self, event: MouseEvent) -> None: Args: event: event object containing the artist of the point hovered. - """ - index_axis = -1 - index = 0 + """ for d in self.displays: - for i, ax in enumerate(d.axes): - if event.inaxes == ax: - index_axis = index - - artists_array = d.artists[:, i] - for j, artist in enumerate(artists_array): - if not isinstance(artist, PathCollection): - return - - is_graph, ind = artist.contains(event) - if is_graph and self.previous_hovered == artist: - return - if is_graph: - self.previous_hovered = artist - index_point = j - break - break + try: + i = d.axes.index(event.inaxes) + except ValueError: + continue - else: - index += 1 + for j, artist in enumerate(d.artists[:, i]): + if not isinstance(artist, PathCollection): + return + + is_graph, _ = artist.contains(event) + if is_graph and self._previous_hovered == artist: + return + elif is_graph: + self._previous_hovered = artist + index_point = j + break + break for k in range(self.num_graphs, len(self.axes)): if event.inaxes == self.axes[k]: self.widget_index = k - self.num_graphs - if index_axis != -1 and is_graph: + if event.inaxes is not None and is_graph: self._update_annotation( self._tag, - self.axes[index_axis], + event.inaxes, index_point, ) self._tag.set_visible(True) self.fig.canvas.draw_idle() elif self._tag.get_visible(): - self.previous_hovered = None + self._previous_hovered = None self._tag.set_visible(False) self.fig.canvas.draw_idle() From 378634ee678c83be5982ccd15f8d5f95c9b7882b Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 25 Aug 2021 18:46:20 +0200 Subject: [PATCH 399/417] Simplify hover more. --- .../visualization/_multiple_display.py | 102 +++++++++--------- 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 6644d3d6b..9710914df 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -1,11 +1,11 @@ import collections import copy -from typing import List, Optional, Sequence, Union +from typing import List, Optional, Sequence, Tuple, Union import numpy as np from matplotlib.artist import Artist from matplotlib.axes import Axes -from matplotlib.backend_bases import Event, MouseEvent +from matplotlib.backend_bases import Event, LocationEvent, MouseEvent from matplotlib.collections import PathCollection from matplotlib.figure import Figure from matplotlib.text import Annotation @@ -43,7 +43,6 @@ class MultipleDisplay: axes: Axis where the graphs are plotted. If None, see param fig. Attributes: point_clicked: Artist object containing the last point clicked. - num_graphs: Number of graphs that will be plotted. length_data: Number of instances or curves of the different displays. clicked: Boolean indicating whether a point has being clicked. index_clicked: Index of the function selected with the interactive @@ -71,22 +70,20 @@ def __init__( self.displays = [copy.copy(d) for d in displays] self.point_clicked: Artist = None - self.num_graphs = sum(len(d.axes) for d in self.displays) + self._n_graphs = sum(len(d.axes) for d in self.displays) self.length_data = self.displays[0].n_samples() self.sliders: List[Widget] = [] self.criteria: List[List[int]] = [] self.clicked = False self.index_clicked = -1 self._tag = self._create_annotation() - self._previous_hovered: Optional[Artist] = None self.is_updating = False if criteria is not None and sliders is not None: - if isinstance(sliders, Sequence): - if len(criteria) != len(sliders): - raise ValueError( - "Size of criteria, and sliders should be equal.", - ) + if isinstance(sliders, Sequence) and len(criteria) != len(sliders): + raise ValueError( + "Size of criteria, and sliders should be equal.", + ) self.create_sliders( chart, @@ -124,8 +121,10 @@ def _create_annotation(self) -> Annotation: def _update_annotation( self, tag: Annotation, + *, axes: Axes, - index_point: int, + sample_number: int, + position: Tuple[float, float], ) -> None: """ Auxiliary method used to update the hovering annotations. @@ -134,13 +133,14 @@ def _update_annotation( hovering a scattered point. The annotations indicate the index and coordinates of the point hovered. Args: - index_ax: index of the ax being hovered. - index_point: index of the point being hovered. + tag: Annotation to update. + axes: Axes were the annotation belongs. + sample_number: Number of the current sample. """ - xdata_graph, ydata_graph = self._previous_hovered.get_offsets()[0] + xdata_graph, ydata_graph = position tag.xy = (xdata_graph, ydata_graph) - text = f"{index_point}: ({xdata_graph:.2f}, {ydata_graph:.2f})" + text = f"{sample_number}: ({xdata_graph:.2f}, {ydata_graph:.2f})" tag.set_text(text) x_axis = axes.get_xlim() @@ -173,7 +173,7 @@ def plot(self) -> Figure: fig: figure object in which the displays and widgets will be plotted. """ - if self.num_graphs > 1: + if self._n_graphs > 1: for d in self.displays[1:]: if d.n_samples() != self.length_data: raise ValueError( @@ -204,6 +204,25 @@ def plot(self) -> Figure: return self.fig + def _sample_artist_from_event( + self, + event: LocationEvent, + ) -> Optional[Tuple[int, Artist]]: + for d in self.displays: + try: + i = d.axes.index(event.inaxes) + except ValueError: + continue + + for j, artist in enumerate(d.artists[:, i]): + if not isinstance(artist, PathCollection): + return None + + if artist.contains(event)[0]: + return j, artist + + return None + def hover(self, event: MouseEvent) -> None: """ Activate the annotation when hovering a point. @@ -216,39 +235,24 @@ def hover(self, event: MouseEvent) -> None: hovered. """ - for d in self.displays: - try: - i = d.axes.index(event.inaxes) - except ValueError: - continue + found_artist = self._sample_artist_from_event(event) - for j, artist in enumerate(d.artists[:, i]): - if not isinstance(artist, PathCollection): - return - - is_graph, _ = artist.contains(event) - if is_graph and self._previous_hovered == artist: - return - elif is_graph: - self._previous_hovered = artist - index_point = j - break - break - - for k in range(self.num_graphs, len(self.axes)): + for k in range(self._n_graphs, len(self.axes)): if event.inaxes == self.axes[k]: - self.widget_index = k - self.num_graphs + self.widget_index = k - self._n_graphs + + if event.inaxes is not None and found_artist is not None: + sample_number, artist = found_artist - if event.inaxes is not None and is_graph: self._update_annotation( self._tag, - event.inaxes, - index_point, + axes=event.inaxes, + sample_number=sample_number, + position=artist.get_offsets()[0], ) self._tag.set_visible(True) self.fig.canvas.draw_idle() elif self._tag.get_visible(): - self._previous_hovered = None self._tag.set_visible(False) self.fig.canvas.draw_idle() @@ -269,18 +273,20 @@ def init_axes( """ widget_aspect = 1 / 4 fig, axes = _get_figure_and_axes(chart, fig, axes) - if len(axes) not in {0, self.num_graphs + extra}: + if len(axes) not in {0, self._n_graphs + extra}: raise ValueError("Invalid number of axes.") - n_rows, n_cols = _get_axes_shape(self.num_graphs + extra) + n_rows, n_cols = _get_axes_shape(self._n_graphs + extra) number_axes = n_rows * n_cols fig, axes = _set_figure_layout( - fig=fig, axes=axes, n_axes=self.num_graphs + extra, + fig=fig, + axes=axes, + n_axes=self._n_graphs + extra, ) - for i in range(self.num_graphs, number_axes): - if i >= self.num_graphs + extra: + for i in range(self._n_graphs, number_axes): + if i >= self._n_graphs + extra: axes[i].set_visible(False) else: axes[i].set_box_aspect(widget_aspect) @@ -502,7 +508,7 @@ def add_slider( full_desc = label_slider self.sliders.append( widget_func( - self.axes[self.num_graphs + ind_ax], + self.axes[self._n_graphs + ind_ax], full_desc, valmin=0, valmax=self.length_data - 1, @@ -510,13 +516,13 @@ def add_slider( ), ) - self.axes[self.num_graphs + ind_ax].annotate( + self.axes[self._n_graphs + ind_ax].annotate( '0', xy=(0, -0.5), xycoords='axes fraction', annotation_clip=False, ) - self.axes[self.num_graphs + ind_ax].annotate( + self.axes[self._n_graphs + ind_ax].annotate( str(self.length_data - 1), xy=(0.95, -0.5), xycoords='axes fraction', From 74e197183be26e8196d0721982203b045774d141 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 25 Aug 2021 20:58:05 +0200 Subject: [PATCH 400/417] Simplify initialization code for MultipleDisplay. --- .../visualization/_multiple_display.py | 232 ++++++++---------- 1 file changed, 107 insertions(+), 125 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 9710914df..2a1c4e4b4 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -1,6 +1,6 @@ import collections import copy -from typing import List, Optional, Sequence, Tuple, Union +from typing import List, Optional, Sequence, Tuple, Type, Union, cast import numpy as np from matplotlib.artist import Artist @@ -54,12 +54,8 @@ class MultipleDisplay: def __init__( self, displays: Union[BasePlot, Sequence[BasePlot]], - criteria: Union[ - Sequence[float], - Sequence[Sequence[float]], - None, - ] = None, - sliders: Union[Widget, Sequence[Widget], None] = None, + criteria: Union[Sequence[float], Sequence[Sequence[float]]] = (), + sliders: Union[Type[Widget], Sequence[Type[Widget]]] = (), label_sliders: Union[str, Sequence[str], None] = None, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, @@ -79,23 +75,113 @@ def __init__( self._tag = self._create_annotation() self.is_updating = False - if criteria is not None and sliders is not None: - if isinstance(sliders, Sequence) and len(criteria) != len(sliders): + if len(criteria) != 0 and not isinstance(criteria[0], Sequence): + criteria = (criteria,) + + criteria = cast(Sequence[Sequence[float]], criteria) + + if not isinstance(sliders, Sequence): + sliders = (sliders,) + + if isinstance(label_sliders, str): + label_sliders = (label_sliders,) + + if len(criteria) != len(sliders): + raise ValueError( + f"Size of criteria, and sliders should be equal " + f"(have {len(criteria)} and {len(sliders)}).", + ) + + self._init_axes( + chart, + fig=fig, + axes=axes, + extra=len(criteria), + ) + + self._create_sliders( + criteria=criteria, + sliders=sliders, + label_sliders=label_sliders, + ) + + def _init_axes( + self, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[Sequence[Axes]] = None, + extra: int = 0, + ) -> None: + """ + Initialize the axes and figure. + + Args: + chart: Figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: Figure over with the graphs are plotted in case ax is not + specified. If None and ax is also None, the figure is + initialized. + axes: Axis where the graphs are plotted. If None, see param fig. + extra: integer indicating the extra axes needed due to the + necessity for them to plot the sliders. + + """ + widget_aspect = 1 / 4 + fig, axes = _get_figure_and_axes(chart, fig, axes) + if len(axes) not in {0, self._n_graphs + extra}: + raise ValueError("Invalid number of axes.") + + n_rows, n_cols = _get_axes_shape(self._n_graphs + extra) + + number_axes = n_rows * n_cols + fig, axes = _set_figure_layout( + fig=fig, + axes=axes, + n_axes=self._n_graphs + extra, + ) + + for i in range(self._n_graphs, number_axes): + if i >= self._n_graphs + extra: + axes[i].set_visible(False) + else: + axes[i].set_box_aspect(widget_aspect) + + self.fig = fig + self.axes = axes + + def _create_sliders( + self, + *, + criteria: Sequence[Sequence[float]], + sliders: Sequence[Type[Widget]], + label_sliders: Optional[Sequence[str]] = None, + ) -> None: + """ + Create the sliders with the criteria selected. + + Args: + criteria: Different criterion for each of the sliders. + sliders: Widget types. + label_sliders: Sequence of the names of each slider. + + """ + for c in criteria: + if len(c) != self.length_data: raise ValueError( - "Size of criteria, and sliders should be equal.", + "Slider criteria should be of the same size as data", ) - self.create_sliders( - chart, - fig=fig, - axes=axes, - criteria=criteria, - sliders=sliders, - label_sliders=label_sliders, - ) + for k, criterium in enumerate(criteria): + label = label_sliders[k] if label_sliders else None - else: - self.init_axes(chart, fig=fig, axes=axes) + self.add_slider( + k, + criterium, + sliders[k], + label, + ) def _create_annotation(self) -> Annotation: tag = Annotation( @@ -208,6 +294,7 @@ def _sample_artist_from_event( self, event: LocationEvent, ) -> Optional[Tuple[int, Artist]]: + """Get the number of sample and artist under a location event.""" for d in self.displays: try: i = d.axes.index(event.inaxes) @@ -256,44 +343,6 @@ def hover(self, event: MouseEvent) -> None: self._tag.set_visible(False) self.fig.canvas.draw_idle() - def init_axes( - self, - chart: Union[Figure, Axes, None] = None, - *, - fig: Optional[Figure] = None, - axes: Optional[Sequence[Axes]] = None, - extra: int = 0, - ) -> None: - """ - Initialize the axes and figure. - - Args: - extra: integer indicating the extra axes needed due to the - necessity for them to plot the sliders. - """ - widget_aspect = 1 / 4 - fig, axes = _get_figure_and_axes(chart, fig, axes) - if len(axes) not in {0, self._n_graphs + extra}: - raise ValueError("Invalid number of axes.") - - n_rows, n_cols = _get_axes_shape(self._n_graphs + extra) - - number_axes = n_rows * n_cols - fig, axes = _set_figure_layout( - fig=fig, - axes=axes, - n_axes=self._n_graphs + extra, - ) - - for i in range(self._n_graphs, number_axes): - if i >= self._n_graphs + extra: - axes[i].set_visible(False) - else: - axes[i].set_box_aspect(widget_aspect) - - self.fig = fig - self.axes = axes - def pick(self, event: Event) -> None: """ Activate interactive functionality when picking a point. @@ -419,73 +468,6 @@ def change_display_intensity(self, index: int, intensity: float) -> None: for artist in np.ravel(d.artists[index]): artist.set_alpha(intensity) - def create_sliders( - self, - chart: Union[Figure, Axes, None] = None, - *, - fig: Optional[Figure] = None, - axes: Optional[Sequence[Axes]] = None, - criteria: Union[Sequence[float], Sequence[Sequence[float]]], - sliders: Union[Widget, Sequence[Widget]], - label_sliders: Union[str, Sequence[str], None] = None, - ) -> None: - """ - Create the sliders with the criteria selected. - - Args: - criteria: different criterion for each of the sliders. - sliders: widget types. - label_sliders: sequence of the names of each slider. - """ - if isinstance(criteria[0], Sequence): - for c in criteria: - if len(c) != self.length_data: - raise ValueError( - "Slider criteria should be of the same size as data", - ) - - self.init_axes( - chart, - fig=fig, - axes=axes, - extra=len(criteria), - ) - - if label_sliders is None: - for i in range(len(criteria)): - self.add_slider(i, criteria[i], sliders[i]) - elif isinstance(label_sliders, str): - raise ValueError( - "Incorrect length of slider labels.", - ) - elif len(label_sliders) == len(sliders): - for k, criterium in enumerate(criteria): - self.add_slider( - k, - criterium, - sliders[k], - label_sliders[k], - ) - else: - raise ValueError( - "Incorrect length of slider labels.", - ) - elif ( - len(criteria) == self.length_data - and (isinstance(label_sliders, str) or label_sliders is None) - ): - self.init_axes( - chart, - fig=fig, - axes=axes, - extra=1, - ) - self.add_slider(0, criteria, sliders, label_sliders) - else: - raise ValueError( - "Slider criteria should be of the same size as data", - ) - def add_slider( self, ind_ax: int, From 4faa7c4a04ccfb2ce492421ae9c97c3c10409075 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 26 Aug 2021 02:04:52 +0200 Subject: [PATCH 401/417] Fix sliders. --- .../visualization/_multiple_display.py | 108 +++++++++--------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 2a1c4e4b4..fbc414f59 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -11,10 +11,19 @@ from matplotlib.text import Annotation from matplotlib.widgets import Slider, Widget +from _functools import partial + from ._baseplot import BasePlot from ._utils import _get_axes_shape, _get_figure_and_axes, _set_figure_layout +def _set_val_noevents(widget: Widget, val: float) -> None: + e = widget.eventson + widget.eventson = False + widget.set_val(val) + widget.eventson = e + + class MultipleDisplay: """ MultipleDisplay class used to combine and interact with plots. @@ -47,8 +56,6 @@ class MultipleDisplay: clicked: Boolean indicating whether a point has being clicked. index_clicked: Index of the function selected with the interactive module or widgets. - is_updating: Boolean value that determines whether a widget - is being updated. """ def __init__( @@ -73,7 +80,6 @@ def __init__( self.clicked = False self.index_clicked = -1 self._tag = self._create_annotation() - self.is_updating = False if len(criteria) != 0 and not isinstance(criteria[0], Sequence): criteria = (criteria,) @@ -128,7 +134,7 @@ def _init_axes( necessity for them to plot the sliders. """ - widget_aspect = 1 / 4 + widget_aspect = 1 / 8 fig, axes = _get_figure_and_axes(chart, fig, axes) if len(axes) not in {0, self._n_graphs + extra}: raise ValueError("Invalid number of axes.") @@ -266,7 +272,7 @@ def plot(self) -> Figure: "Length of some data sets are not equal ", ) - for ax in self.axes: + for ax in self.axes[:self._n_graphs]: ax.clear() int_index = 0 @@ -285,9 +291,6 @@ def plot(self) -> Figure: self.fig.suptitle("Multiple display") self.fig.tight_layout() - for slider in self.sliders: - slider.on_changed(self.value_updated) - return self.fig def _sample_artist_from_event( @@ -324,10 +327,6 @@ def hover(self, event: MouseEvent) -> None: """ found_artist = self._sample_artist_from_event(event) - for k in range(self._n_graphs, len(self.axes)): - if event.inaxes == self.axes[k]: - self.widget_index = k - self._n_graphs - if event.inaxes is not None and found_artist is not None: sample_number, artist = found_artist @@ -394,11 +393,9 @@ def reduce_points_intensity(self) -> None: for artist in np.ravel(d.artists[i]): artist.set_alpha(0.1) - self.is_updating = True for criterium, slider in zip(self.criteria, self.sliders): val_widget = list(criterium).index(self.index_clicked) - slider.set_val(val_widget) - self.is_updating = False + _set_val_noevents(slider, val_widget) def restore_points_intensity(self) -> None: """Restore the original transparency of all the points.""" @@ -410,10 +407,8 @@ def restore_points_intensity(self) -> None: self.point_clicked = None self.index_clicked = -1 - self.is_updating = True for j in range(len(self.sliders)): - self.sliders[j].set_val(0) - self.is_updating = False + _set_val_noevents(self.sliders[j], 0) def change_points_intensity( self, @@ -448,11 +443,9 @@ def change_points_intensity( if intensity != -1: self.change_display_intensity(i, intensity) - self.is_updating = True for criterium, slider in zip(self.criteria, self.sliders): val_widget = list(criterium).index(self.index_clicked) - slider.set_val(val_widget) - self.is_updating = False + _set_val_noevents(slider, val_widget) def change_display_intensity(self, index: int, intensity: float) -> None: """ @@ -472,7 +465,7 @@ def add_slider( self, ind_ax: int, criterion: Sequence[float], - widget_func: Widget = Slider, + widget_func: Type[Widget] = Slider, label_slider: Optional[str] = None, ) -> None: """ @@ -484,26 +477,27 @@ def add_slider( widget_func: widget type. label_slider: names of the slider. """ - if label_slider is None: - full_desc = "".join(["Filter (", str(ind_ax), ")"]) - else: - full_desc = label_slider - self.sliders.append( - widget_func( - self.axes[self._n_graphs + ind_ax], - full_desc, - valmin=0, - valmax=self.length_data - 1, - valinit=0, - ), + full_desc = ( + f"Filter ({ind_ax}))" if label_slider is None else label_slider + ) + + widget = widget_func( + ax=self.axes[self._n_graphs + ind_ax], + label=full_desc, + valmin=0, + valmax=self.length_data - 1, + valinit=0, ) + self.sliders.append(widget) + self.axes[self._n_graphs + ind_ax].annotate( '0', xy=(0, -0.5), xycoords='axes fraction', annotation_clip=False, ) + self.axes[self._n_graphs + ind_ax].annotate( str(self.length_data - 1), xy=(0.95, -0.5), @@ -511,35 +505,47 @@ def add_slider( annotation_clip=False, ) - dic = dict(zip(criterion, range(self.length_data))) - order_dic = collections.OrderedDict(sorted(dic.items())) - self.criteria.append(order_dic.values()) + criterion_sample_indexes = [ + x for _, x in sorted(zip(criterion, range(self.length_data))) + ] - def value_updated(self, value: int) -> None: + self.criteria.append(criterion_sample_indexes) + + on_changed_function = partial( + self.value_updated, + widget=widget, + criterion_sample_indexes=criterion_sample_indexes, + ) + + widget.on_changed(on_changed_function) + + def value_updated( + self, + value: float, + widget: Widget, + criterion_sample_indexes: Sequence[int], + ) -> None: """ Update the graphs when a widget is clicked. Args: - value: current value of the widget. - """ - # Used to avoid entering in an etern loop - if self.is_updating is True: - return - self.is_updating = True + value: Current value of the widget. + widget: Current widget. + criterion_sample_indexes: Sample numbers ordered using the + criterion. + """ # Make the changes of the slider discrete index = int(int(value / 0.5) * 0.5) old_index = self.index_clicked - self.index_clicked = list(self.criteria[self.widget_index])[index] - self.sliders[self.widget_index].valtext.set_text(f'{index}') + self.index_clicked = criterion_sample_indexes[index] + widget.valtext.set_text(f'{index}') # Update the other sliders values - for i, (c, s) in enumerate(zip(self.criteria, self.sliders)): - if i != self.widget_index: + for c, s in zip(self.criteria, self.sliders): + if s is not widget: val_widget = list(c).index(self.index_clicked) - s.set_val(val_widget) - - self.is_updating = False + _set_val_noevents(s, val_widget) self.clicked = True if old_index == -1: From 7452dca574ce6699bfa296895de6aa112725d437 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 26 Aug 2021 04:10:43 +0200 Subject: [PATCH 402/417] Simplified code for MultipleDisplay. --- .../visualization/_multiple_display.py | 211 ++++++------------ 1 file changed, 67 insertions(+), 144 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index fbc414f59..6586ec073 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -1,6 +1,15 @@ -import collections import copy -from typing import List, Optional, Sequence, Tuple, Type, Union, cast +from functools import partial +from typing import ( + Generator, + List, + Optional, + Sequence, + Tuple, + Type, + Union, + cast, +) import numpy as np from matplotlib.artist import Artist @@ -11,8 +20,6 @@ from matplotlib.text import Annotation from matplotlib.widgets import Slider, Widget -from _functools import partial - from ._baseplot import BasePlot from ._utils import _get_axes_shape, _get_figure_and_axes, _set_figure_layout @@ -51,10 +58,9 @@ class MultipleDisplay: initialized. axes: Axis where the graphs are plotted. If None, see param fig. Attributes: - point_clicked: Artist object containing the last point clicked. length_data: Number of instances or curves of the different displays. clicked: Boolean indicating whether a point has being clicked. - index_clicked: Index of the function selected with the interactive + selected_sample: Index of the function selected with the interactive module or widgets. """ @@ -72,13 +78,11 @@ def __init__( displays = (displays,) self.displays = [copy.copy(d) for d in displays] - self.point_clicked: Artist = None self._n_graphs = sum(len(d.axes) for d in self.displays) self.length_data = self.displays[0].n_samples() self.sliders: List[Widget] = [] self.criteria: List[List[int]] = [] - self.clicked = False - self.index_clicked = -1 + self.selected_sample: Optional[int] = None self._tag = self._create_annotation() if len(criteria) != 0 and not isinstance(criteria[0], Sequence): @@ -179,14 +183,14 @@ def _create_sliders( "Slider criteria should be of the same size as data", ) - for k, criterium in enumerate(criteria): + for k, criterion in enumerate(criteria): label = label_sliders[k] if label_sliders else None self.add_slider( - k, - criterium, - sliders[k], - label, + axes=self.axes[self._n_graphs + k], + criterion=criterion, + widget_class=sliders[k], + label=label, ) def _create_annotation(self) -> Annotation: @@ -356,149 +360,88 @@ def pick(self, event: Event) -> None: event: event object containing the artist of the point picked. """ - if self.clicked: - self.point_clicked = event.artist - self.change_points_intensity() - self.clicked = False - elif self.point_clicked is None: - self.point_clicked = event.artist - self.update_index_display_picked() - self.reduce_points_intensity() - elif self.point_clicked == event.artist: - self.restore_points_intensity() - else: - self.point_clicked = event.artist - self.change_points_intensity() - - def update_index_display_picked(self) -> None: - """Update the index corresponding to the display picked.""" + selected_sample = self._sample_from_artist(event.artist) + + if selected_sample is not None: + if self.selected_sample == selected_sample: + self._deselect_samples() + else: + self._select_sample(selected_sample) + + def _sample_from_artist(self, artist: Artist) -> Optional[int]: + """Return the sample corresponding to an artist.""" for d in self.displays: for i, a in enumerate(d.axes): - if a == self.point_clicked.axes: + if a == artist.axes: if len(d.axes) == 1: - self.index_clicked = np.where( - d.artists == self.point_clicked, - )[0][0] + return np.where(d.artists == artist)[0][0] else: - self.index_clicked = np.where( - d.artists[:, i] == self.point_clicked, - )[0][0] - return + return np.where(d.artists[:, i] == artist)[0][0] - def reduce_points_intensity(self) -> None: - """Reduce the transparency of all the points but the selected one.""" - for i in range(self.length_data): - if i != self.index_clicked: - for d in self.displays: - for artist in np.ravel(d.artists[i]): - artist.set_alpha(0.1) - - for criterium, slider in zip(self.criteria, self.sliders): - val_widget = list(criterium).index(self.index_clicked) - _set_val_noevents(slider, val_widget) + return None - def restore_points_intensity(self) -> None: - """Restore the original transparency of all the points.""" + def _visit_artists(self) -> Generator[Tuple[int, Artist], None, None]: for i in range(self.length_data): for d in self.displays: - for artist in np.ravel(d.artists[i]): - artist.set_alpha(1) - - self.point_clicked = None - self.index_clicked = -1 - - for j in range(len(self.sliders)): - _set_val_noevents(self.sliders[j], 0) - - def change_points_intensity( - self, - old_index: Union[int, None] = None, - ) -> None: - """ - Change the intensity of the points. + yield from ((i, artist) for artist in np.ravel(d.artists[i])) - Changes the intensity of the points, the highlighted one now - will be the selected one and the one with old_index with have - its transparency increased. - Args: - old_index: index of the last point clicked, as it should - reduce its transparency. - """ - if old_index is None: - old_index = self.index_clicked - self.update_index_display_picked() - - if self.index_clicked == old_index: - self.restore_points_intensity() - return - - for i in range(self.length_data): - if i == self.index_clicked: - intensity = 1.0 - elif i == old_index: - intensity = 0.1 - else: - intensity = -1 - - if intensity != -1: - self.change_display_intensity(i, intensity) + def _select_sample(self, selected_sample: int) -> None: + """Reduce the transparency of all the points but the selected one.""" + for i, artist in self._visit_artists(): + artist.set_alpha(1.0 if i == selected_sample else 0.1) - for criterium, slider in zip(self.criteria, self.sliders): - val_widget = list(criterium).index(self.index_clicked) + for criterion, slider in zip(self.criteria, self.sliders): + val_widget = criterion.index(selected_sample) _set_val_noevents(slider, val_widget) - def change_display_intensity(self, index: int, intensity: float) -> None: - """ - Change the intensity of the point selected by index in every display. + self.selected_sample = selected_sample + self.fig.canvas.draw_idle() - Args: - index: index of the last point clicked, as it should - reduce its transparency. - intensity: new intensity of the points. - """ - for d in self.displays: - if len(d.artists) != 0: - for artist in np.ravel(d.artists[index]): - artist.set_alpha(intensity) + def _deselect_samples(self) -> None: + """Restore the original transparency of all the points.""" + for _, artist in self._visit_artists(): + artist.set_alpha(1) + + self.selected_sample = None + self.fig.canvas.draw_idle() def add_slider( self, - ind_ax: int, + axes: Axes, criterion: Sequence[float], - widget_func: Type[Widget] = Slider, - label_slider: Optional[str] = None, + widget_class: Type[Widget] = Slider, + label: Optional[str] = None, ) -> None: """ Add the slider to the MultipleDisplay object. Args: - ind_ax: index of the selected ax for the widget. - criterion: criterion used for the slider. - widget_func: widget type. - label_slider: names of the slider. + axes: Axes for the widget. + criterion: Criterion used for the slider. + widget_class: Widget type. + label: Name of the slider. """ - full_desc = ( - f"Filter ({ind_ax}))" if label_slider is None else label_slider - ) + full_desc = "" if label is None else label - widget = widget_func( - ax=self.axes[self._n_graphs + ind_ax], + widget = widget_class( + ax=axes, label=full_desc, valmin=0, valmax=self.length_data - 1, valinit=0, + valstep=1, ) self.sliders.append(widget) - self.axes[self._n_graphs + ind_ax].annotate( + axes.annotate( '0', xy=(0, -0.5), xycoords='axes fraction', annotation_clip=False, ) - self.axes[self._n_graphs + ind_ax].annotate( + axes.annotate( str(self.length_data - 1), xy=(0.95, -0.5), xycoords='axes fraction', @@ -512,17 +455,15 @@ def add_slider( self.criteria.append(criterion_sample_indexes) on_changed_function = partial( - self.value_updated, - widget=widget, + self._value_updated, criterion_sample_indexes=criterion_sample_indexes, ) widget.on_changed(on_changed_function) - def value_updated( + def _value_updated( self, - value: float, - widget: Widget, + value: int, criterion_sample_indexes: Sequence[int], ) -> None: """ @@ -530,27 +471,9 @@ def value_updated( Args: value: Current value of the widget. - widget: Current widget. criterion_sample_indexes: Sample numbers ordered using the criterion. """ - # Make the changes of the slider discrete - index = int(int(value / 0.5) * 0.5) - old_index = self.index_clicked - self.index_clicked = criterion_sample_indexes[index] - widget.valtext.set_text(f'{index}') - - # Update the other sliders values - for c, s in zip(self.criteria, self.sliders): - if s is not widget: - val_widget = list(c).index(self.index_clicked) - _set_val_noevents(s, val_widget) - - self.clicked = True - if old_index == -1: - self.reduce_points_intensity() - else: - if self.index_clicked == old_index: - self.clicked = False - self.change_points_intensity(old_index=old_index) + self.selected_sample = criterion_sample_indexes[value] + self._select_sample(self.selected_sample) From c7a650cf150f1154f107260d27b9005242ec9c08 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 26 Aug 2021 12:13:13 +0200 Subject: [PATCH 403/417] Remove mpldatacursor dependency. --- README.rst | 1 - docs/conf.py | 1 - readthedocs-requirements.txt | 1 - requirements.txt | 1 - setup.py | 1 - 5 files changed, 5 deletions(-) diff --git a/README.rst b/README.rst index 774935851..c641b7691 100644 --- a/README.rst +++ b/README.rst @@ -65,7 +65,6 @@ Requirements * `fdasrsf `_ - SRSF framework * `findiff `_ - Finite differences * `matplotlib `_ - Plotting with Python -* `mpldatacursor `_ - Interactive data cursors for matplotlib * `multimethod `_ - Multiple dispatch * `numpy `_ - The fundamental package for scientific computing with Python * `pandas `_ - Powerful Python data analysis toolkit diff --git a/docs/conf.py b/docs/conf.py index dec8905cb..a59de0212 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -227,7 +227,6 @@ 'sklearn': ('https://scikit-learn.org/stable', None), 'matplotlib': ('https://matplotlib.org/', None), 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), - 'mpldatacursor': ('https://pypi.org/project/mpldatacursor/', None), } diff --git a/readthedocs-requirements.txt b/readthedocs-requirements.txt index d279be0c5..3b1d1b9d5 100644 --- a/readthedocs-requirements.txt +++ b/readthedocs-requirements.txt @@ -8,7 +8,6 @@ sphinx_rtd_theme sphinx-gallery pillow matplotlib -mpldatacursor setuptools>=41.2 multimethod>=1.2 findiff diff --git a/requirements.txt b/requirements.txt index 29588726f..78c981489 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,5 @@ scipy setuptools Cython sklearn -mpldatacursor multimethod>=1.2 findiff diff --git a/setup.py b/setup.py index 7a8096766..6aa5b1489 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,6 @@ 'fdasrsf>=2.2.0', 'findiff', 'matplotlib', - 'mpldatacursor', 'multimethod>=1.5', 'numpy>=1.16', 'pandas', From b5b33b2b06e1a975a86d7b679674c79f00486e16 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 26 Aug 2021 13:37:18 +0200 Subject: [PATCH 404/417] Add py.typed. --- MANIFEST.in | 1 + skfda/py.typed | 0 2 files changed, 1 insertion(+) create mode 100644 skfda/py.typed diff --git a/MANIFEST.in b/MANIFEST.in index 156bbd1f6..9ebce7b6f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ include README.rst include MANIFEST.in include VERSION +include skfda/py.typed include pyproject.toml include *.txt recursive-include deps * diff --git a/skfda/py.typed b/skfda/py.typed new file mode 100644 index 000000000..e69de29bb From 28de0713a11e2edb7d6a3c1037c17a54275f78ab Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 26 Aug 2021 13:38:58 +0200 Subject: [PATCH 405/417] Add typed TROVE classifier. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 6aa5b1489..cba429bfb 100644 --- a/setup.py +++ b/setup.py @@ -57,6 +57,7 @@ 'Programming Language :: Python :: 3.8', 'Topic :: Scientific/Engineering :: Mathematics', 'Topic :: Software Development :: Libraries :: Python Modules', + 'Typing :: Typed', ], install_requires=[ 'cython', From ae8a25ae38188f92b323f2bde39fc743b8f897d0 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 27 Aug 2021 01:44:44 +0200 Subject: [PATCH 406/417] Improve boxplot. --- setup.cfg | 6 +- skfda/exploratory/outliers/_envelopes.py | 30 +- skfda/exploratory/outliers/_iqr.py | 8 +- skfda/exploratory/visualization/_baseplot.py | 5 + skfda/exploratory/visualization/_boxplot.py | 612 ++++++++++++------- skfda/representation/_typing.py | 2 + 6 files changed, 409 insertions(+), 254 deletions(-) diff --git a/setup.cfg b/setup.cfg index c9bffddde..dcd639462 100644 --- a/setup.cfg +++ b/setup.cfg @@ -102,11 +102,13 @@ per-file-ignores = rst-directives = # These are sorted alphabetically - but that does not matter autosummary,data,currentmodule,deprecated, - footbibliography,glossary,moduleauthor,plot,testcode, + footbibliography,glossary, + jupyter-execute, + moduleauthor,plot,testcode, versionadded,versionchanged, rst-roles = - attr,class,doc,func,meth,mod,obj,ref,term, + attr,class,doc,footcite,func,meth,mod,obj,ref,term, allowed-domain-names = data, obj, result, results, val, value, values, var diff --git a/skfda/exploratory/outliers/_envelopes.py b/skfda/exploratory/outliers/_envelopes.py index d233579fe..22f8a3a46 100644 --- a/skfda/exploratory/outliers/_envelopes.py +++ b/skfda/exploratory/outliers/_envelopes.py @@ -6,33 +6,40 @@ import numpy as np from ...representation import FDataGrid +from ...representation._typing import NDArrayBool, NDArrayFloat, NDArrayInt -def _compute_region( +def compute_region( fdatagrid: FDataGrid, - indices_descending_depth: np.ndarray, + indices_descending_depth: NDArrayInt, prob: float, ) -> FDataGrid: + """Compute central region of a given quantile.""" indices_samples = indices_descending_depth[ :math.ceil(fdatagrid.n_samples * prob) ] return fdatagrid[indices_samples] -def _compute_envelope(region: FDataGrid) -> Tuple[np.ndarray, np.ndarray]: +def compute_envelope(region: FDataGrid) -> Tuple[NDArrayFloat, NDArrayFloat]: + """Compute curves comprising a region.""" max_envelope = np.max(region.data_matrix, axis=0) min_envelope = np.min(region.data_matrix, axis=0) return min_envelope, max_envelope -def _predict_outliers( +def predict_outliers( fdatagrid: FDataGrid, - non_outlying_threshold: Tuple[np.ndarray, np.ndarray], -) -> np.ndarray: - # A functional datum is considered an outlier if it has ANY point - # in ANY dimension outside the envelope for inliers + non_outlying_threshold: Tuple[NDArrayFloat, NDArrayFloat], +) -> NDArrayBool: + """ + Predict outliers given a threshold. + A functional datum is considered an outlier if it has ANY point + in ANY dimension outside the envelope for inliers. + + """ min_threshold, max_threshold = non_outlying_threshold or_axes = tuple(i for i in range(1, fdatagrid.data_matrix.ndim)) @@ -49,10 +56,11 @@ def _predict_outliers( return below_outliers | above_outliers -def _non_outlying_threshold( - central_envelope: Tuple[np.ndarray, np.ndarray], +def non_outlying_threshold( + central_envelope: Tuple[NDArrayFloat, NDArrayFloat], factor: float, -) -> Tuple[np.ndarray, np.ndarray]: +) -> Tuple[NDArrayFloat, NDArrayFloat]: + """Compute a non outlying threshold.""" iqr = central_envelope[1] - central_envelope[0] non_outlying_threshold_max = central_envelope[1] + iqr * factor non_outlying_threshold_min = central_envelope[0] - iqr * factor diff --git a/skfda/exploratory/outliers/_iqr.py b/skfda/exploratory/outliers/_iqr.py index f54773c29..5f13518a1 100644 --- a/skfda/exploratory/outliers/_iqr.py +++ b/skfda/exploratory/outliers/_iqr.py @@ -61,18 +61,18 @@ def fit(self, X: FDataGrid, y: None = None) -> IQROutlierDetector: indices_descending_depth = (-depth).argsort(axis=0) # Central region and envelope must be computed for outlier detection - central_region = _envelopes._compute_region( + central_region = _envelopes.compute_region( X, indices_descending_depth, 0.5) - self._central_envelope = _envelopes._compute_envelope(central_region) + self._central_envelope = _envelopes.compute_envelope(central_region) # Non-outlying envelope - self.non_outlying_threshold_ = _envelopes._non_outlying_threshold( + self.non_outlying_threshold_ = _envelopes.non_outlying_threshold( self._central_envelope, self.factor) return self def predict(self, X: FDataGrid) -> np.ndarray: - outliers = _envelopes._predict_outliers( + outliers = _envelopes.predict_outliers( X, self.non_outlying_threshold_, ) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 803f7ede3..620d4bf41 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -51,6 +51,11 @@ def plot( """ pass + @property + def n_subplots(self) -> int: + """Get the number of subplots that this plot uses.""" + return 1 + @abstractmethod def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 3a1d71628..240eb9eee 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -4,21 +4,24 @@ visualize it. """ +from __future__ import annotations + import math from abc import ABC, abstractmethod -from typing import Optional, Sequence, Union +from typing import Optional, Sequence, Tuple, Union import matplotlib import matplotlib.pyplot as plt import numpy as np from matplotlib.artist import Artist from matplotlib.axes import Axes +from matplotlib.colors import Colormap from matplotlib.figure import Figure from skfda.exploratory.depth.multivariate import Depth -from ... import FData -from ... import FDataGrid +from ... import FData, FDataGrid +from ...representation._typing import NDArrayBool, NDArrayFloat from ..depth import ModifiedBandDepth from ..outliers import _envelopes from ._baseplot import BasePlot @@ -28,12 +31,10 @@ _set_labels, ) -__author__ = "Amanda Hernando Bernabé" -__email__ = "amanda.hernando@estudiante.uam.es" - class FDataBoxplot(ABC): - """Abstract class inherited by the Boxplot and SurfaceBoxplot classes. + """ + Abstract class inherited by the Boxplot and SurfaceBoxplot classes. It the data of the functional boxplot or surface boxplot of a FDataGrid object, depending on the dimensions of the :term:`domain`, 1 or 2 @@ -44,47 +45,53 @@ class FDataBoxplot(ABC): graphical representation, obtained calling the plot method. """ + @abstractmethod - def __init__(self, factor=1.5): + def __init__(self, factor: float = 1.5) -> None: if factor < 0: - raise ValueError("The number used to calculate the " - "outlying envelope must be positive.") + raise ValueError( + "The number used to calculate the " + "outlying envelope must be positive.", + ) self._factor = factor @property - def factor(self): + def factor(self) -> float: return self._factor @property - def fdatagrid(self): + def fdatagrid(self) -> FDataGrid: pass @property - def median(self): + def median(self) -> NDArrayFloat: pass @property - def central_envelope(self): + def central_envelope(self) -> Tuple[NDArrayFloat, NDArrayFloat]: pass @property - def non_outlying_envelope(self): + def non_outlying_envelope(self) -> Tuple[NDArrayFloat, NDArrayFloat]: pass @property - def colormap(self): + def colormap(self) -> Colormap: return self._colormap @colormap.setter - def colormap(self, value): + def colormap(self, value: Colormap) -> None: if not isinstance(value, matplotlib.colors.LinearSegmentedColormap): - raise ValueError("colormap must be of type " - "matplotlib.colors.LinearSegmentedColormap") + raise ValueError( + "colormap must be of type " + "matplotlib.colors.LinearSegmentedColormap", + ) self._colormap = value class Boxplot(FDataBoxplot, BasePlot): - r"""Representation of the functional boxplot. + r""" + Representation of the functional boxplot. Class implementing the functionl boxplot which is an informative exploratory tool for visualizing functional data, as well as its @@ -98,39 +105,44 @@ class Boxplot(FDataBoxplot, BasePlot): detected in a functional boxplot by the 1.5 times the 50% central region empirical rule, analogous to the rule for classical boxplots. - Args: + For more information see :footcite:ts:`sun+genton_2011_boxplots`. - fdatagrid (FDataGrid): Object containing the data. - depth_method (:ref:`depth measure `, optional): - Method used to order the data. Defaults to :func:`modified - band depth - `. - prob (list of float, optional): List with float numbers (in the - range from 1 to 0) that indicate which central regions to - represent. - Defaults to [0.5] which represents the 50% central region. - factor (double): Number used to calculate the outlying envelope. + Args: + fdatagrid: Object containing the data. + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + depth_method: Method used to order the data. Defaults to + :func:`~skfda.exploratory.depth.ModifiedBandDepth`. + prob: List with float numbers (in the range from 1 to 0) that + indicate which central regions to represent. + Defaults to (0.5,) which represents the 50% central region. + factor: Number used to calculate the outlying envelope. + fig: figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes: axis over where the graphs + are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. Attributes: - - fdatagrid (FDataGrid): Object containing the data. - median (array, (fdatagrid.dim_codomain, ngrid_points)): contains - the median/s. - central_envelope (array, (fdatagrid.dim_codomain, 2, ngrid_points)): - contains the central envelope/s. - non_outlying_envelope (array, (fdatagrid.dim_codomain, 2, - ngrid_points)): - contains the non-outlying envelope/s. - colormap (matplotlib.colors.LinearSegmentedColormap): Colormap from - which the colors to represent the central regions are selected. - envelopes (array, (fdatagrid.dim_codomain * ncentral_regions, 2, - ngrid_points)): contains the region envelopes. - outliers (array, (fdatagrid.dim_codomain, fdatagrid.n_samples)): - contains the outliers. - barcol (string): Color of the envelopes and vertical lines. - outliercol (string): Color of the ouliers. - mediancol (string): Color of the median. - show_full_outliers (boolean): If False (the default) then only the part + fdatagrid: Object containing the data. + median: Contains the median/s. + central_envelope: Contains the central envelope/s. + non_outlying_envelope: Contains the non-outlying envelope/s. + colormap: Colormap from which the colors to represent the + central regions are selected. + envelopes: Contains the region envelopes. + outliers: Contains the outliers. + barcol: Color of the envelopes and vertical lines. + outliercol: Color of the ouliers. + mediancol: Color of the median. + show_full_outliers: If False (the default) then only the part outside the box is plotted. If True, complete outling curves are plotted. @@ -149,7 +161,6 @@ class Boxplot(FDataBoxplot, BasePlot): Examples: - Function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`. >>> from skfda import FDataGrid @@ -238,27 +249,24 @@ class Boxplot(FDataBoxplot, BasePlot): outliers=array([ True, False, False, True])) References: - .. bibliography:: - - sun+genton_2011_boxplots - + .. footbibliography:: """ def __init__( self, fdatagrid: FData, - depth_method: Optional[Depth[FDataGrid]] = None, - prob: Sequence[float] = [0.5], - factor: float = 1.5, chart: Union[Figure, Axes, None] = None, *, + depth_method: Optional[Depth[FDataGrid]] = None, + prob: Sequence[float] = (0.5,), + factor: float = 1.5, fig: Optional[Figure] = None, axes: Optional[Axes] = None, n_rows: Optional[int] = None, n_cols: Optional[int] = None, ): - """Initialization of the Boxplot class. + """Initialize the Boxplot class. Args: fdatagrid: Object containing the data. @@ -291,47 +299,62 @@ def __init__( if fdatagrid.dim_domain != 1: raise ValueError( - "Function only supports FDataGrid with domain dimension 1.") + "Function only supports FDataGrid with domain dimension 1.", + ) - if sorted(prob, reverse=True) != prob: + if sorted(prob, reverse=True) != list(prob): raise ValueError( - "Probabilities required to be in descending order.") + "Probabilities required to be in descending order.", + ) if min(prob) < 0 or max(prob) > 1: raise ValueError("Probabilities must be between 0 and 1.") - self._envelopes = [None] * len(prob) - if depth_method is None: depth_method = ModifiedBandDepth() depth = depth_method(fdatagrid) indices_descending_depth = (-depth).argsort(axis=0) # The median is the deepest curve - self._median = fdatagrid[indices_descending_depth[0] - ].data_matrix[0, ...] + median_fdata = fdatagrid[indices_descending_depth[0]] + self._median = median_fdata.data_matrix[0, ...] # Central region and envelope must be computed for outlier detection - central_region = _envelopes._compute_region( - fdatagrid, indices_descending_depth, 0.5) - self._central_envelope = _envelopes._compute_envelope(central_region) + central_region = _envelopes.compute_region( + fdatagrid, + indices_descending_depth, + 0.5, + ) + self._central_envelope = _envelopes.compute_envelope(central_region) # Non-outlying envelope - non_outlying_threshold = _envelopes._non_outlying_threshold( - self._central_envelope, factor) - predicted_outliers = _envelopes._predict_outliers( - fdatagrid, non_outlying_threshold) + non_outlying_threshold = _envelopes.non_outlying_threshold( + self._central_envelope, + factor, + ) + predicted_outliers = _envelopes.predict_outliers( + fdatagrid, + non_outlying_threshold, + ) inliers = fdatagrid[predicted_outliers == 0] - self._non_outlying_envelope = _envelopes._compute_envelope(inliers) + self._non_outlying_envelope = _envelopes.compute_envelope(inliers) # Outliers - self._outliers = _envelopes._predict_outliers( - fdatagrid, self._non_outlying_envelope) + self._outliers = _envelopes.predict_outliers( + fdatagrid, + self._non_outlying_envelope, + ) - for i, p in enumerate(prob): - region = _envelopes._compute_region( - fdatagrid, indices_descending_depth, p) - self._envelopes[i] = _envelopes._compute_envelope(region) + self._envelopes = [ + _envelopes.compute_envelope( + _envelopes.compute_region( + fdatagrid, + indices_descending_depth, + p, + ), + ) + for p in prob + ] self._fdatagrid = fdatagrid self._prob = prob @@ -344,35 +367,35 @@ def __init__( self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) @property - def fdatagrid(self): + def fdatagrid(self) -> FDataGrid: return self._fdatagrid @property - def median(self): + def median(self) -> NDArrayFloat: return self._median @property - def central_envelope(self): + def central_envelope(self) -> Tuple[NDArrayFloat, NDArrayFloat]: return self._central_envelope @property - def non_outlying_envelope(self): + def non_outlying_envelope(self) -> Tuple[NDArrayFloat, NDArrayFloat]: return self._non_outlying_envelope @property - def envelopes(self): + def envelopes(self) -> Sequence[Tuple[NDArrayFloat, NDArrayFloat]]: return self._envelopes @property - def outliers(self): + def outliers(self) -> NDArrayBool: return self._outliers @property - def show_full_outliers(self): + def show_full_outliers(self) -> bool: return self._show_full_outliers @show_full_outliers.setter - def show_full_outliers(self, boolean): + def show_full_outliers(self, boolean: bool) -> None: if not isinstance(boolean, bool): raise ValueError("show_full_outliers must be boolean type") self._show_full_outliers = boolean @@ -396,18 +419,21 @@ def _set_figure_and_axes( self.fig = fig self.axes = axes + @property + def n_subplots(self) -> int: + return self.fdatagrid.dim_codomain + def n_samples(self) -> int: return self.fdatagrid.n_samples - def plot(self): - """Visualization of the functional boxplot of the fdatagrid - (dim_domain=1). + def plot(self) -> Figure: + """ + Visualize the functional boxplot of the fdatagrid (dim_domain=1). Returns: - fig: figure object in which the graphs are plotted. + Figure object in which the graphs are plotted. """ - self.artists = np.zeros((self.n_samples(), 1), dtype=Artist) tones = np.linspace(0.1, 1.0, len(self._prob) + 1, endpoint=False)[1:] color = self.colormap(tones) @@ -418,75 +444,101 @@ def plot(self): var_zorder = 4 outliers = self.fdatagrid[self.outliers] - index_outliers = np.where(self.outliers == True)[0] - for m in range(self.fdatagrid.dim_codomain): + grid_points = self.fdatagrid.grid_points[0] + + for m, axes in enumerate(self.axes): # Outliers for o in outliers: - self.axes[m].plot( - o.grid_points[0], + axes.plot( + grid_points, o.data_matrix[0, :, m], color=self.outliercol, linestyle='--', zorder=1, ) - for i in range(len(self._prob)): + for envelop, col in zip(self.envelopes, color): # central regions - self.axes[m].fill_between(self.fdatagrid.grid_points[0], - self.envelopes[i][0][..., m], - self.envelopes[i][1][..., m], - facecolor=color[i], zorder=var_zorder) + axes.fill_between( + grid_points, + envelop[0][..., m], + envelop[1][..., m], + facecolor=col, + zorder=var_zorder, + ) # outlying envelope - self.axes[m].plot(self.fdatagrid.grid_points[0], - self.non_outlying_envelope[0][..., m], - self.fdatagrid.grid_points[0], - self.non_outlying_envelope[1][..., m], - color=self.barcol, zorder=4) + axes.plot( + grid_points, + self.non_outlying_envelope[0][..., m], + grid_points, + self.non_outlying_envelope[1][..., m], + color=self.barcol, + zorder=4, + ) # central envelope - self.axes[m].plot(self.fdatagrid.grid_points[0], - self.central_envelope[0][..., m], - self.fdatagrid.grid_points[0], - self.central_envelope[1][..., m], - color=self.barcol, zorder=4) + axes.plot( + grid_points, + self.central_envelope[0][..., m], + grid_points, + self.central_envelope[1][..., m], + color=self.barcol, + zorder=4, + ) # vertical lines - index = math.ceil(len(self.fdatagrid.grid_points[0]) / 2) - x = self.fdatagrid.grid_points[0][index] - self.axes[m].plot([x, x], - [self.non_outlying_envelope[0][..., m][index], - self.central_envelope[0][..., m][index]], - color=self.barcol, - zorder=4) - self.axes[m].plot([x, x], - [self.non_outlying_envelope[1][..., m][index], - self.central_envelope[1][..., m][index]], - color=self.barcol, zorder=4) + index = math.ceil(len(grid_points) / 2) + x = grid_points[index] + axes.plot( + [x, x], + [ + self.non_outlying_envelope[0][..., m][index], + self.central_envelope[0][..., m][index], + ], + color=self.barcol, + zorder=4, + ) + axes.plot( + [x, x], + [ + self.non_outlying_envelope[1][..., m][index], + self.central_envelope[1][..., m][index], + ], + color=self.barcol, + zorder=4, + ) # median sample - self.axes[m].plot(self.fdatagrid.grid_points[0], self.median[..., m], - color=self.mediancol, zorder=5) + axes.plot( + grid_points, + self.median[..., m], + color=self.mediancol, + zorder=5, + ) _set_labels(self.fdatagrid, self.fig, self.axes) return self.fig - def __repr__(self): + def __repr__(self) -> str: """Return repr(self).""" - return (f"Boxplot(" - f"\nFDataGrid={repr(self.fdatagrid)}," - f"\nmedian={repr(self.median)}," - f"\ncentral envelope={repr(self.central_envelope)}," - f"\nnon-outlying envelope={repr(self.non_outlying_envelope)}," - f"\nenvelopes={repr(self.envelopes)}," - f"\noutliers={repr(self.outliers)})").replace('\n', '\n ') + return ( + f"Boxplot(" + f"\nFDataGrid={repr(self.fdatagrid)}," + f"\nmedian={repr(self.median)}," + f"\ncentral envelope={repr(self.central_envelope)}," + f"\nnon-outlying envelope={repr(self.non_outlying_envelope)}," + f"\nenvelopes={repr(self.envelopes)}," + f"\noutliers={repr(self.outliers)})" + ).replace('\n', '\n ') class SurfaceBoxplot(FDataBoxplot): - r"""Representation of the surface boxplot. + r""" + Representation of the surface boxplot. Class implementing the surface boxplot. Analogously to the functional boxplot, it is an informative exploratory tool for visualizing functional @@ -500,7 +552,6 @@ class SurfaceBoxplot(FDataBoxplot): envelope :footcite:`sun+genton_2011_boxplots`. Args: - fdatagrid: Object containing the data. method: Method used to order the data. Defaults to :class:`modified band depth @@ -512,7 +563,6 @@ class SurfaceBoxplot(FDataBoxplot): factor: Number used to calculate the outlying envelope. Attributes: - fdatagrid: Object containing the data. median: contains the median/s. @@ -525,7 +575,6 @@ class SurfaceBoxplot(FDataBoxplot): outcol: Color of the outlying envelope. Examples: - Function :math:`f : \mathbb{R^2}\longmapsto\mathbb{R}`. >>> from skfda import FDataGrid @@ -595,13 +644,19 @@ class SurfaceBoxplot(FDataBoxplot): """ - def __init__(self, fdatagrid, method=ModifiedBandDepth(), factor=1.5): - """Initialization of the functional boxplot. + def __init__( + self, + fdatagrid: FDataGrid, + depth_method: Optional[Depth[FDataGrid]] = None, + factor: float = 1.5, + ) -> None: + """ + Initialize the functional boxplot. Args: fdatagrid: Object containing the data. - method: Method used to order the data. Defaults to :class:`modified - band depth `. + depth_method: Method used to order the data. Defaults to + :class:`~skfda.exploratory.depth.ModifiedBandDepth`. factor: Number used to calculate the outlying envelope. """ @@ -609,26 +664,37 @@ def __init__(self, fdatagrid, method=ModifiedBandDepth(), factor=1.5): if fdatagrid.dim_domain != 2: raise ValueError( - "Class only supports FDataGrid with domain dimension 2.") + "Class only supports FDataGrid with domain dimension 2.", + ) - depth = method(fdatagrid) + if depth_method is None: + depth_method = ModifiedBandDepth() + + depth = depth_method(fdatagrid) indices_descending_depth = (-depth).argsort(axis=0) # The mean is the deepest curve self._median = fdatagrid.data_matrix[indices_descending_depth[0]] # Central region and envelope must be computed for outlier detection - central_region = _envelopes._compute_region( - fdatagrid, indices_descending_depth, 0.5) - self._central_envelope = _envelopes._compute_envelope(central_region) + central_region = _envelopes.compute_region( + fdatagrid, + indices_descending_depth, + 0.5, + ) + self._central_envelope = _envelopes.compute_envelope(central_region) # Non-outlying envelope - non_outlying_threshold = _envelopes._non_outlying_threshold( - self._central_envelope, factor) - predicted_outliers = _envelopes._predict_outliers( - fdatagrid, non_outlying_threshold) + non_outlying_threshold = _envelopes.non_outlying_threshold( + self._central_envelope, + factor, + ) + predicted_outliers = _envelopes.predict_outliers( + fdatagrid, + non_outlying_threshold, + ) inliers = fdatagrid[predicted_outliers == 0] - self._non_outlying_envelope = _envelopes._compute_envelope(inliers) + self._non_outlying_envelope = _envelopes.compute_envelope(inliers) self._fdatagrid = fdatagrid self.colormap = plt.cm.get_cmap('Greys') @@ -636,68 +702,82 @@ def __init__(self, fdatagrid, method=ModifiedBandDepth(), factor=1.5): self._outcol = 0.7 @property - def fdatagrid(self): + def fdatagrid(self) -> FDataGrid: return self._fdatagrid @property - def median(self): + def median(self) -> NDArrayFloat: return self._median @property - def central_envelope(self): + def central_envelope(self) -> Tuple[NDArrayFloat, NDArrayFloat]: return self._central_envelope @property - def non_outlying_envelope(self): + def non_outlying_envelope(self) -> Tuple[NDArrayFloat, NDArrayFloat]: return self._non_outlying_envelope @property - def boxcol(self): + def boxcol(self) -> float: return self._boxcol @boxcol.setter - def boxcol(self, value): + def boxcol(self, value: float) -> None: if value < 0 or value > 1: - raise ValueError( - "boxcol must be a number between 0 and 1.") + raise ValueError("boxcol must be a number between 0 and 1.") self._boxcol = value @property - def outcol(self): + def outcol(self) -> float: return self._outcol @outcol.setter - def outcol(self, value): + def outcol(self, value: float) -> None: if value < 0 or value > 1: - raise ValueError( - "outcol must be a number between 0 and 1.") + raise ValueError("outcol must be a number between 0 and 1.") self._outcol = value - def plot(self, chart=None, *, fig=None, axes=None, - n_rows=None, n_cols=None): - """Visualization of the surface boxplot of the fdatagrid (dim_domain=2). + def plot( + self, + chart: Union[Figure, Axes, None] = None, + *, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ) -> Figure: + """ + Visualization of the surface boxplot of the fdatagrid (dim_domain=2). - Args: - fig: figure over with the graphs are plotted in case ax + Args: + chart: figure over with the graphs are plotted or axis over + where the graphs are plotted. If None and ax is also + None, the figure is initialized. + fig: figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes: axis over where the graphs are plotted. If None, + axes: axis over where the graphs are plotted. If None, see param fig. - n_rows: designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. Returns: - fig: figure object in which the graphs are plotted. + Figure object in which the graphs are plotted. """ fig, axes = _get_figure_and_axes(chart, fig, axes) fig, axes = _set_figure_layout_for_fdata( - self.fdatagrid, fig, axes, n_rows, n_cols) + self.fdatagrid, + fig, + axes, + n_rows, + n_cols, + ) x = self.fdatagrid.grid_points[0] lx = len(x) @@ -705,90 +785,148 @@ def plot(self, chart=None, *, fig=None, axes=None, ly = len(y) X, Y = np.meshgrid(x, y) - for m in range(self.fdatagrid.dim_codomain): + for m, ax in enumerate(axes): # mean sample - axes[m].plot_wireframe(X, Y, np.squeeze(self.median[..., m]).T, - rstride=ly, cstride=lx, - color=self.colormap(self.boxcol)) - axes[m].plot_surface(X, Y, np.squeeze(self.median[..., m]).T, - color=self.colormap(self.boxcol), alpha=0.8) + ax.plot_wireframe( + X, + Y, + np.squeeze(self.median[..., m]).T, + rstride=ly, + cstride=lx, + color=self.colormap(self.boxcol), + ) + ax.plot_surface( + X, + Y, + np.squeeze(self.median[..., m]).T, + color=self.colormap(self.boxcol), + alpha=0.8, + ) # central envelope - axes[m].plot_surface( - X, Y, np.squeeze(self.central_envelope[0][..., m]).T, - color=self.colormap(self.boxcol), alpha=0.5) - axes[m].plot_wireframe( - X, Y, np.squeeze(self.central_envelope[0][..., m]).T, - rstride=ly, cstride=lx, - color=self.colormap(self.boxcol)) - axes[m].plot_surface( - X, Y, np.squeeze(self.central_envelope[1][..., m]).T, - color=self.colormap(self.boxcol), alpha=0.5) - axes[m].plot_wireframe( - X, Y, np.squeeze(self.central_envelope[1][..., m]).T, - rstride=ly, cstride=lx, - color=self.colormap(self.boxcol)) + ax.plot_surface( + X, + Y, + np.squeeze(self.central_envelope[0][..., m]).T, + color=self.colormap(self.boxcol), + alpha=0.5, + ) + ax.plot_wireframe( + X, + Y, + np.squeeze(self.central_envelope[0][..., m]).T, + rstride=ly, + cstride=lx, + color=self.colormap(self.boxcol), + ) + ax.plot_surface( + X, + Y, + np.squeeze(self.central_envelope[1][..., m]).T, + color=self.colormap(self.boxcol), + alpha=0.5, + ) + ax.plot_wireframe( + X, + Y, + np.squeeze(self.central_envelope[1][..., m]).T, + rstride=ly, + cstride=lx, + color=self.colormap(self.boxcol), + ) # box vertical lines - for indices in [(0, 0), (0, ly - 1), (lx - 1, 0), - (lx - 1, ly - 1)]: + for indices in ( + (0, 0), + (0, ly - 1), + (lx - 1, 0), + (lx - 1, ly - 1), + ): x_corner = x[indices[0]] y_corner = y[indices[1]] - axes[m].plot( - [x_corner, x_corner], [y_corner, y_corner], + ax.plot( + [x_corner, x_corner], + [y_corner, y_corner], [ - self.central_envelope[1][..., m][indices[0], - indices[1]], - self.central_envelope[0][..., m][indices[0], - indices[1]]], - color=self.colormap(self.boxcol)) + self.central_envelope[1][..., m][ + indices[0], + indices[1], + ], + self.central_envelope[0][..., m][ + indices[0], + indices[1], + ], + ], + color=self.colormap(self.boxcol), + ) # outlying envelope - axes[m].plot_surface( - X, Y, + ax.plot_surface( + X, + Y, np.squeeze(self.non_outlying_envelope[0][..., m]).T, - color=self.colormap(self.outcol), alpha=0.3) - axes[m].plot_wireframe( - X, Y, + color=self.colormap(self.outcol), + alpha=0.3, + ) + ax.plot_wireframe( + X, + Y, np.squeeze(self.non_outlying_envelope[0][..., m]).T, - rstride=ly, cstride=lx, - color=self.colormap(self.outcol)) - axes[m].plot_surface( - X, Y, + rstride=ly, + cstride=lx, + color=self.colormap(self.outcol), + ) + ax.plot_surface( + X, + Y, np.squeeze(self.non_outlying_envelope[1][..., m]).T, - color=self.colormap(self.outcol), alpha=0.3) - axes[m].plot_wireframe( - X, Y, + color=self.colormap(self.outcol), + alpha=0.3, + ) + ax.plot_wireframe( + X, + Y, np.squeeze(self.non_outlying_envelope[1][..., m]).T, - rstride=ly, cstride=lx, - color=self.colormap(self.outcol)) + rstride=ly, + cstride=lx, + color=self.colormap(self.outcol), + ) # vertical lines from central to outlying envelope x_index = math.floor(lx / 2) x_central = x[x_index] y_index = math.floor(ly / 2) y_central = y[y_index] - axes[m].plot( - [x_central, x_central], [y_central, y_central], - [self.non_outlying_envelope[1][..., m][x_index, y_index], - self.central_envelope[1][..., m][x_index, y_index]], - color=self.colormap(self.boxcol)) - axes[m].plot( - [x_central, x_central], [y_central, y_central], - [self.non_outlying_envelope[0][..., m][x_index, y_index], - self.central_envelope[0][..., m][x_index, y_index]], - color=self.colormap(self.boxcol)) + ax.plot( + [x_central, x_central], + [y_central, y_central], + [ + self.non_outlying_envelope[1][..., m][x_index, y_index], + self.central_envelope[1][..., m][x_index, y_index], + ], + color=self.colormap(self.boxcol), + ) + ax.plot( + [x_central, x_central], + [y_central, y_central], + [ + self.non_outlying_envelope[0][..., m][x_index, y_index], + self.central_envelope[0][..., m][x_index, y_index], + ], + color=self.colormap(self.boxcol), + ) _set_labels(self.fdatagrid, fig, axes) return fig - def __repr__(self): + def __repr__(self) -> str: """Return repr(self).""" - return ((f"SurfaceBoxplot(" - f"\nFDataGrid={repr(self.fdatagrid)}," - f"\nmedian={repr(self.median)}," - f"\ncentral envelope={repr(self.central_envelope)}," - f"\noutlying envelope={repr(self.non_outlying_envelope)})") - .replace('\n', '\n ')) + return ( + f"SurfaceBoxplot(" + f"\nFDataGrid={repr(self.fdatagrid)}," + f"\nmedian={repr(self.median)}," + f"\ncentral envelope={repr(self.central_envelope)}," + f"\noutlying envelope={repr(self.non_outlying_envelope)})" + ).replace('\n', '\n ') diff --git a/skfda/representation/_typing.py b/skfda/representation/_typing.py index b246901d0..881ef6530 100644 --- a/skfda/representation/_typing.py +++ b/skfda/representation/_typing.py @@ -14,11 +14,13 @@ NDArrayAny = NDArray[Any] NDArrayInt = NDArray[np.int_] NDArrayFloat = NDArray[np.float_] + NDArrayBool = NDArray[np.bool_] except ImportError: NDArray = np.ndarray # type:ignore NDArrayAny = np.ndarray # type:ignore NDArrayInt = np.ndarray # type:ignore NDArrayFloat = np.ndarray # type:ignore + NDArrayBool = np.ndarray # type:ignore VectorType = TypeVar("VectorType") From cf71fad434d49d06b34308705dd78e48ce1c96c6 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 30 Aug 2021 03:15:50 +0200 Subject: [PATCH 407/417] Typing magnitude shape plot. --- .../outliers/_directional_outlyingness.py | 212 ++++++++++++------ .../visualization/_magnitude_shape_plot.py | 106 +++++---- 2 files changed, 195 insertions(+), 123 deletions(-) diff --git a/skfda/exploratory/outliers/_directional_outlyingness.py b/skfda/exploratory/outliers/_directional_outlyingness.py index 91f917fe1..3276f9d4b 100644 --- a/skfda/exploratory/outliers/_directional_outlyingness.py +++ b/skfda/exploratory/outliers/_directional_outlyingness.py @@ -1,30 +1,36 @@ -import typing +from __future__ import annotations + +from typing import NamedTuple, Optional, Tuple import numpy as np import scipy.integrate import scipy.stats from numpy import linalg as la -from scipy.stats import f from sklearn.base import BaseEstimator, OutlierMixin from sklearn.covariance import MinCovDet -from skfda.exploratory.depth.multivariate import ProjectionDepth +from skfda.exploratory.depth.multivariate import Depth, ProjectionDepth from ... import FDataGrid +from ..._utils import RandomStateLike +from ...representation._typing import NDArrayFloat, NDArrayInt -class DirectionalOutlyingnessStats(typing.NamedTuple): - directional_outlyingness: np.ndarray - functional_directional_outlyingness: np.ndarray - mean_directional_outlyingness: np.ndarray - variation_directional_outlyingness: np.ndarray +class DirectionalOutlyingnessStats(NamedTuple): + directional_outlyingness: NDArrayFloat + functional_directional_outlyingness: NDArrayFloat + mean_directional_outlyingness: NDArrayFloat + variation_directional_outlyingness: NDArrayFloat def directional_outlyingness_stats( - fdatagrid: FDataGrid, *, - multivariate_depth=ProjectionDepth(), - pointwise_weights=None) -> DirectionalOutlyingnessStats: - r"""Computes the directional outlyingness of the functional data. + fdatagrid: FDataGrid, + *, + multivariate_depth: Optional[Depth[NDArrayFloat]] = None, + pointwise_weights: Optional[NDArrayFloat] = None, +) -> DirectionalOutlyingnessStats: + r""" + Compute the directional outlyingness of the functional data. Furthermore, it calculates functional, mean and the variational directional outlyingness of the samples in the data set, which are also @@ -150,17 +156,27 @@ def directional_outlyingness_stats( if fdatagrid.dim_domain > 1: raise NotImplementedError("Only support 1 dimension on the domain.") - if (pointwise_weights is not None and - (len(pointwise_weights) != len(fdatagrid.grid_points[0]) or - pointwise_weights.sum() != 1)): + if multivariate_depth is None: + multivariate_depth = ProjectionDepth() + + if ( + pointwise_weights is not None + and ( + len(pointwise_weights) != len(fdatagrid.grid_points[0]) + or pointwise_weights.sum() != 1 + ) + ): raise ValueError( "There must be a weight in pointwise_weights for each recorded " - "time point and altogether must integrate to 1.") + "time point and altogether must integrate to 1.", + ) if pointwise_weights is None: pointwise_weights = np.ones( - len(fdatagrid.grid_points[0])) / ( - fdatagrid.domain_range[0][1] - fdatagrid.domain_range[0][0]) + len(fdatagrid.grid_points[0]), + ) / ( + fdatagrid.domain_range[0][1] - fdatagrid.domain_range[0][0] + ) depth_pointwise = multivariate_depth(fdatagrid.data_matrix) assert depth_pointwise.shape == fdatagrid.data_matrix.shape[:-1] @@ -169,7 +185,9 @@ def directional_outlyingness_stats( # v(t) = {X(t) − Z(t)}/|| X(t) − Z(t) || median_index = np.argmax(depth_pointwise, axis=0) pointwise_median = fdatagrid.data_matrix[ - median_index, range(fdatagrid.data_matrix.shape[1])] + median_index, + range(fdatagrid.data_matrix.shape[1]), + ] assert pointwise_median.shape == fdatagrid.data_matrix.shape[1:] v = fdatagrid.data_matrix - pointwise_median assert v.shape == fdatagrid.data_matrix.shape @@ -185,37 +203,53 @@ def directional_outlyingness_stats( dir_outlyingness = (1 / depth_pointwise[..., np.newaxis] - 1) * v_unitary # Calculation mean directional outlyingness - weighted_dir_outlyingness = (dir_outlyingness - * pointwise_weights[:, np.newaxis]) + weighted_dir_outlyingness = ( + dir_outlyingness * pointwise_weights[:, np.newaxis] + ) assert weighted_dir_outlyingness.shape == dir_outlyingness.shape - mean_dir_outlyingness = scipy.integrate.simps(weighted_dir_outlyingness, - fdatagrid.grid_points[0], - axis=1) + mean_dir_outlyingness = scipy.integrate.simps( + weighted_dir_outlyingness, + fdatagrid.grid_points[0], + axis=1, + ) assert mean_dir_outlyingness.shape == ( - fdatagrid.n_samples, fdatagrid.dim_codomain) + fdatagrid.n_samples, + fdatagrid.dim_codomain, + ) # Calculation variation directional outlyingness - norm = np.square(la.norm(dir_outlyingness - - mean_dir_outlyingness[:, np.newaxis, :], axis=-1)) + norm = np.square(la.norm( + dir_outlyingness + - mean_dir_outlyingness[:, np.newaxis, :], + axis=-1, + )) weighted_norm = norm * pointwise_weights variation_dir_outlyingness = scipy.integrate.simps( - weighted_norm, fdatagrid.grid_points[0], - axis=1) + weighted_norm, + fdatagrid.grid_points[0], + axis=1, + ) assert variation_dir_outlyingness.shape == (fdatagrid.n_samples,) - functional_dir_outlyingness = (np.square(la.norm(mean_dir_outlyingness)) - + variation_dir_outlyingness) + functional_dir_outlyingness = ( + np.square(la.norm(mean_dir_outlyingness)) + + variation_dir_outlyingness + ) assert functional_dir_outlyingness.shape == (fdatagrid.n_samples,) return DirectionalOutlyingnessStats( directional_outlyingness=dir_outlyingness, functional_directional_outlyingness=functional_dir_outlyingness, mean_directional_outlyingness=mean_dir_outlyingness, - variation_directional_outlyingness=variation_dir_outlyingness) + variation_directional_outlyingness=variation_dir_outlyingness, + ) -class DirectionalOutlierDetector(BaseEstimator, OutlierMixin): +class DirectionalOutlierDetector( + BaseEstimator, # type: ignore + OutlierMixin, # type: ignore +): r"""Outlier detector using directional outlyingness. Considering :math:`\mathbf{Y} = \left(\mathbf{MO}^T, VO\right)^T`, the @@ -303,16 +337,17 @@ class DirectionalOutlierDetector(BaseEstimator, OutlierMixin): """ def __init__( - self, - *, - multivariate_depth=None, - pointwise_weights=None, - assume_centered=False, - support_fraction=None, - num_resamples=1000, - random_state=0, - alpha=0.993, - _force_asymptotic=False): + self, + *, + multivariate_depth: Optional[Depth[NDArrayFloat]] = None, + pointwise_weights: Optional[NDArrayFloat] = None, + assume_centered: bool = False, + support_fraction: Optional[float] = None, + num_resamples: int = 1000, + random_state: RandomStateLike = 0, + alpha: float = 0.993, + _force_asymptotic: bool = False, + ) -> None: self.multivariate_depth = multivariate_depth self.pointwise_weights = pointwise_weights self.assume_centered = assume_centered @@ -322,7 +357,7 @@ def __init__( self.alpha = alpha self._force_asymptotic = _force_asymptotic - def _compute_points(self, X): + def _compute_points(self, X: FDataGrid) -> NDArrayFloat: multivariate_depth = self.multivariate_depth if multivariate_depth is None: multivariate_depth = ProjectionDepth() @@ -333,14 +368,19 @@ def _compute_points(self, X): multivariate_depth=multivariate_depth, pointwise_weights=self.pointwise_weights) - points = np.concatenate((mean_dir_outl, - variation_dir_outl[:, np.newaxis]), axis=1) + points = np.concatenate( + (mean_dir_outl, variation_dir_outl[:, np.newaxis]), + axis=1, + ) return points - def _parameters_asymptotic(self, sample_size, dimension): + def _parameters_asymptotic( + self, + sample_size: int, + dimension: int, + ) -> Tuple[float, float]: """Return the scaling and cutoff parameters via asymptotic formula.""" - n = sample_size p = dimension @@ -348,10 +388,16 @@ def _parameters_asymptotic(self, sample_size, dimension): # c estimation xi_left = scipy.stats.chi2.rvs( - size=self.num_resamples, df=p + 2, random_state=self.random_state_) + size=self.num_resamples, + df=p + 2, + random_state=self.random_state_, + ) xi_right = scipy.stats.ncx2.rvs( - size=self.num_resamples, df=p, nc=h / n, - random_state=self.random_state_) + size=self.num_resamples, + df=p, + nc=h / n, + random_state=self.random_state_, + ) c_numerator = np.sum(xi_left < xi_right) / self.num_resamples c_denominator = h / n @@ -370,20 +416,28 @@ def _parameters_asymptotic(self, sample_size, dimension): c4 = 3 * c3 b1 = (c_alpha * (c3 - c4)) / (1 - alpha) - b2 = (0.5 + c_alpha / (1 - alpha) * - (c3 - q_alpha / p * (c2 + (1 - alpha) / 2))) - - v1 = ((1 - alpha) * b1**2 * (alpha * ( - c_alpha * q_alpha / p - 1) ** 2 - 1) - - 2 * c3 * c_alpha**2 * (3 * (b1 - p * b2)**2 - + (p + 2) * b2 * (2 * b1 - p * b2))) + b2 = ( + 0.5 + c_alpha / (1 - alpha) + * (c3 - q_alpha / p * (c2 + (1 - alpha) / 2)) + ) + + v1 = ( + (1 - alpha) * b1**2 + * (alpha * (c_alpha * q_alpha / p - 1) ** 2 - 1) + - 2 * c3 * c_alpha**2 + * ( + 3 * (b1 - p * b2)**2 + + (p + 2) * b2 * (2 * b1 - p * b2) + ) + ) v2 = n * (b1 * (b1 - p * b2) * (1 - alpha))**2 * c_alpha**2 v = v1 / v2 m_asympt = 2 / (c_alpha**2 * v) - estimated_m = (m_asympt * - np.exp(0.725 - 0.00663 * p - 0.0780 * np.log(n))) + estimated_m = ( + m_asympt * np.exp(0.725 - 0.00663 * p - 0.0780 * np.log(n)) + ) dfn = p dfd = estimated_m - p + 1 @@ -391,11 +445,15 @@ def _parameters_asymptotic(self, sample_size, dimension): # Calculation of the cutoff value and scaling factor to identify # outliers. scaling = estimated_c * dfd / estimated_m / dfn - cutoff_value = f.ppf(self.alpha, dfn, dfd, loc=0, scale=1) + cutoff_value = scipy.stats.f.ppf(self.alpha, dfn, dfd, loc=0, scale=1) return scaling, cutoff_value - def _parameters_numeric(self, sample_size, dimension): + def _parameters_numeric( + self, + sample_size: int, + dimension: int, + ) -> Tuple[float, float]: from . import \ _directional_outlyingness_experiment_results as experiments @@ -419,10 +477,10 @@ def _parameters_numeric(self, sample_size, dimension): if use_asympt: return self._parameters_asymptotic(sample_size, dimension) - else: - return scaling_list[key], cutoff_list[key] - def fit_predict(self, X, y=None): + return scaling_list[key], cutoff_list[key] + + def fit_predict(self, X: FDataGrid, y: None = None) -> NDArrayInt: try: self.random_state_ = np.random.RandomState(self.random_state) @@ -433,11 +491,12 @@ def fit_predict(self, X, y=None): # The square mahalanobis distances of the samples are # calulated using MCD. - self.cov_ = MinCovDet(store_precision=False, - assume_centered=self.assume_centered, - support_fraction=self.support_fraction, - random_state=self.random_state_).fit( - self.points_) + self.cov_ = MinCovDet( + store_precision=False, + assume_centered=self.assume_centered, + support_fraction=self.support_fraction, + random_state=self.random_state_, + ).fit(self.points_) # Calculation of the degrees of freedom of the F-distribution # (approximation of the tail of the distance distribution). @@ -445,13 +504,18 @@ def fit_predict(self, X, y=None): # One per dimension (mean dir out) plus one (variational dir out) dimension = X.dim_codomain + 1 if self._force_asymptotic: - self.scaling_, self.cutoff_value_ = self._parameters_asymptotic( + scaling, cutoff_value = self._parameters_asymptotic( sample_size=X.n_samples, - dimension=dimension) + dimension=dimension, + ) else: - self.scaling_, self.cutoff_value_ = self._parameters_numeric( + scaling, cutoff_value = self._parameters_numeric( sample_size=X.n_samples, - dimension=dimension) + dimension=dimension, + ) + + self.scaling_ = scaling + self.cutoff_value_ = cutoff_value rmd_2 = self.cov_.mahalanobis(self.points_) diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 54f5e4a55..712330373 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -5,6 +5,8 @@ detection method is implemented. """ +from __future__ import annotations + from typing import Optional, Sequence, Union import matplotlib @@ -12,16 +14,20 @@ import numpy as np from matplotlib.artist import Artist from matplotlib.axes import Axes +from matplotlib.colors import Colormap from matplotlib.figure import Figure from ... import FDataGrid +from ...representation._typing import NDArrayFloat, NDArrayInt +from ..depth import Depth from ..outliers import DirectionalOutlierDetector from ._baseplot import BasePlot from ._utils import _get_figure_and_axes, _set_figure_layout class MagnitudeShapePlot(BasePlot): - r"""Implementation of the magnitude-shape plot + r""" + Implementation of the magnitude-shape plot. This plot, which is based on the calculation of the :func:`directional outlyingness ` @@ -35,6 +41,8 @@ class MagnitudeShapePlot(BasePlot): The outliers are detected using an instance of :class:`DirectionalOutlierDetector`. + For more information see :footcite:ts:`dai+genton_2018_visualization`. + Args: fdatagrid (FDataGrid): Object containing the data. @@ -153,9 +161,7 @@ class MagnitudeShapePlot(BasePlot): title='MS-Plot') References: - .. bibliography:: - - dai+genton_2018_visualization + .. footbibliography:: """ @@ -167,39 +173,38 @@ def __init__( fig: Optional[Figure] = None, axes: Optional[Sequence[Axes]] = None, **kwargs, - ): + ) -> None: """Initialization of the MagnitudeShapePlot class. Args: - fdatagrid (FDataGrid): Object containing the data. + fdatagrid: Object containing the data. multivariate_depth (:ref:`depth measure `, optional): Method used to order the data. Defaults to :class:`projection depth `. - pointwise_weights (array_like, optional): an array containing the + pointwise_weights: an array containing the weights of each points of discretisati on where values have been recorded. - alpha (float, optional): Denotes the quantile to choose the cutoff + alpha: Denotes the quantile to choose the cutoff value for detecting outliers Defaults to 0.993, which is used in the classical boxplot. - assume_centered (boolean, optional): If True, the support of the + assume_centered: If True, the support of the robust location and the covariance estimates is computed, and a covariance estimate is recomputed from it, without centering the data. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, default value, the robust location and covariance are directly computed with the FastMCD algorithm without additional treatment. - support_fraction (float, 0 < support_fraction < 1, optional): The - proportion of points to be included in the support of the - raw MCD estimate. + support_fraction: The proportion of points to be included in the + support of the raw MCD estimate. Default is None, which implies that the minimum value of support_fraction will be used within the algorithm: [n_sample + n_features + 1] / 2 - random_state (int, RandomState instance or None, optional): If int, - random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number - generator; If None, the random number generator is the - RandomState instance used by np.random. By default, it is 0. + random_state: If int, random_state is the seed used by the random + number generator; If RandomState instance, random_state is + the random number generator; If None, the random number + generator is the RandomState instance used by np.random. + By default, it is 0. chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. @@ -232,46 +237,48 @@ def __init__( self._set_figure_and_axes(chart, fig, axes) @property - def fdatagrid(self): + def fdatagrid(self) -> FDataGrid: return self._fdatagrid @property - def multivariate_depth(self): + def multivariate_depth(self) -> Optional[Depth[NDArrayFloat]]: return self.outlier_detector.multivariate_depth @property - def pointwise_weights(self): + def pointwise_weights(self) -> Optional[NDArrayFloat]: return self.outlier_detector.pointwise_weights @property - def alpha(self): + def alpha(self) -> float: return self.outlier_detector.alpha @property - def points(self): + def points(self) -> NDArrayFloat: return self.outlier_detector.points_ @property - def outliers(self): + def outliers(self) -> NDArrayInt: return self._outliers @property - def colormap(self): + def colormap(self) -> Colormap: return self._colormap @colormap.setter - def colormap(self, value): - if not isinstance(value, matplotlib.colors.LinearSegmentedColormap): - raise ValueError("colormap must be of type " - "matplotlib.colors.LinearSegmentedColormap") + def colormap(self, value: Colormap) -> None: + if not isinstance(value, matplotlib.colors.Colormap): + raise ValueError( + "colormap must be of type " + "matplotlib.colors.Colormap", + ) self._colormap = value @property - def color(self): + def color(self) -> float: return self._color @color.setter - def color(self, value): + def color(self, value: float) -> None: if value < 0 or value > 1: raise ValueError( "color must be a number between 0 and 1.") @@ -279,24 +286,23 @@ def color(self, value): self._color = value @property - def outliercol(self): + def outliercol(self) -> float: return self._outliercol @outliercol.setter - def outliercol(self, value): + def outliercol(self, value: float) -> None: if value < 0 or value > 1: raise ValueError( "outcol must be a number between 0 and 1.") self._outliercol = value - def plot(self): + def plot(self) -> Figure: """Visualization of the magnitude shape plot of the fdatagrid. Returns: - fig (figure object): figure object in which the graph is plotted. + Figure object in which the graph is plotted. """ - self.artists = np.zeros( (self.n_samples(), 1), dtype=Artist, @@ -336,18 +342,20 @@ def _set_figure_and_axes( self.fig = fig self.axes = axes - def __repr__(self): + def __repr__(self) -> str: """Return repr(self).""" - return (f"MagnitudeShapePlot(" - f"\nFDataGrid={repr(self.fdatagrid)}," - f"\nmultivariate_depth={self.multivariate_depth}," - f"\npointwise_weights={repr(self.pointwise_weights)}," - f"\nalpha={repr(self.alpha)}," - f"\npoints={repr(self.points)}," - f"\noutliers={repr(self.outliers)}," - f"\ncolormap={self.colormap.name}," - f"\ncolor={repr(self.color)}," - f"\noutliercol={repr(self.outliercol)}," - f"\nxlabel={repr(self.xlabel)}," - f"\nylabel={repr(self.ylabel)}," - f"\ntitle={repr(self.title)})").replace('\n', '\n ') + return ( + f"MagnitudeShapePlot(" + f"\nFDataGrid={repr(self.fdatagrid)}," + f"\nmultivariate_depth={self.multivariate_depth}," + f"\npointwise_weights={repr(self.pointwise_weights)}," + f"\nalpha={repr(self.alpha)}," + f"\npoints={repr(self.points)}," + f"\noutliers={repr(self.outliers)}," + f"\ncolormap={self.colormap.name}," + f"\ncolor={repr(self.color)}," + f"\noutliercol={repr(self.outliercol)}," + f"\nxlabel={repr(self.xlabel)}," + f"\nylabel={repr(self.ylabel)}," + f"\ntitle={repr(self.title)})" + ).replace('\n', '\n ') From 81fe9803c8cac6d7343272d6ac5af53e9a7fae10 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 1 Sep 2021 19:40:26 +0200 Subject: [PATCH 408/417] Fix plot typing. --- .../exploratory/visualization/clustering.rst | 8 +-- .../exploratory/visualization/fpca.rst | 4 +- skfda/exploratory/visualization/_ddplot.py | 6 +- .../visualization/_magnitude_shape_plot.py | 3 - .../exploratory/visualization/_outliergram.py | 2 - skfda/exploratory/visualization/clustering.py | 6 +- skfda/exploratory/visualization/fpca.py | 56 +++++++++---------- 7 files changed, 40 insertions(+), 45 deletions(-) diff --git a/docs/modules/exploratory/visualization/clustering.rst b/docs/modules/exploratory/visualization/clustering.rst index 86848f9ae..e8bb23d86 100644 --- a/docs/modules/exploratory/visualization/clustering.rst +++ b/docs/modules/exploratory/visualization/clustering.rst @@ -2,14 +2,14 @@ Clustering Plots ================ In order to show the results of the cluster algorithms in a visual way, :mod:`this module ` is -implemented. It contains the following methods: +implemented. It contains the following classes: .. autosummary:: :toctree: autosummary - skfda.exploratory.visualization.clustering.plot_clusters - skfda.exploratory.visualization.clustering.plot_cluster_lines - skfda.exploratory.visualization.clustering.plot_cluster_bars + skfda.exploratory.visualization.clustering.ClusterPlot + skfda.exploratory.visualization.clustering.ClusterMembershipLinesPlot + skfda.exploratory.visualization.clustering.ClusterMembershipPlot In the first one, the samples of the FDataGrid are divided by clusters which are assigned different colors. The following functions, are only valid for the diff --git a/docs/modules/exploratory/visualization/fpca.rst b/docs/modules/exploratory/visualization/fpca.rst index 8f22e884e..141769ef4 100644 --- a/docs/modules/exploratory/visualization/fpca.rst +++ b/docs/modules/exploratory/visualization/fpca.rst @@ -1,12 +1,12 @@ Functional Principal Component Analysis plots ============================================= In order to show the modes of variation that the principal components represent, -the following function is implemented +the following class is implemented .. autosummary:: :toctree: autosummary - skfda.exploratory.visualization.fpca.plot_fpca_perturbation_graphs + skfda.exploratory.visualization.fpca.FPCAPlot See the example :ref:`sphx_glr_auto_examples_plot_fpca.py` for detailed explanation. diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 749111889..43a6d9e5a 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -97,10 +97,10 @@ def plot( ax = self.axes[0] - for i in range(len(self.depth_dist1)): + for i, d1, d2 in enumerate(zip(self.depth_dist1, self.depth_dist2)): self.artists[i, 0] = ax.scatter( - self.depth_dist1[i], - self.depth_dist2[i], + d1, + d2, picker=True, pickradius=2, ) diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 712330373..0de836a3b 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -44,7 +44,6 @@ class MagnitudeShapePlot(BasePlot): For more information see :footcite:ts:`dai+genton_2018_visualization`. Args: - fdatagrid (FDataGrid): Object containing the data. multivariate_depth (:ref:`depth measure `, optional): Method used to order the data. Defaults to :class:`projection @@ -75,7 +74,6 @@ class MagnitudeShapePlot(BasePlot): RandomState instance used by np.random. By default, it is 0. Attributes: - points(numpy.ndarray): 2-dimensional matrix where each row contains the points plotted in the graph. outliers (1-D array, (fdatagrid.n_samples,)): Contains 1 or 0 to denote @@ -107,7 +105,6 @@ class MagnitudeShapePlot(BasePlot): MagnitudeShapePlot(fd) Example: - >>> import skfda >>> data_matrix = [[1, 1, 2, 3, 2.5, 2], ... [0.5, 0.5, 1, 2, 1.5, 1], diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index ee6ebf30d..b7bbd5c41 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -15,9 +15,7 @@ from matplotlib.figure import Figure from ... import FDataGrid -from ..depth._depth import ModifiedBandDepth from ..outliers import OutliergramOutlierDetector -from ..stats import modified_epigraph_index from ._baseplot import BasePlot from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index 61ba1241e..00221f613 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -600,13 +600,17 @@ def plot(self) -> Figure: center_labels=None, ) - self.x_label, self.y_label, self.title = _get_labels( + x_label, y_label, title = _get_labels( self.x_label, self.y_label, self.title, "Sample", ) + self.x_label = x_label + self.y_label = y_label + self.title = title + if self.sample_labels is None: self.sample_labels = np.arange(self.fdata.n_samples) diff --git a/skfda/exploratory/visualization/fpca.py b/skfda/exploratory/visualization/fpca.py index 447a90ddc..9d3fc73de 100644 --- a/skfda/exploratory/visualization/fpca.py +++ b/skfda/exploratory/visualization/fpca.py @@ -40,35 +40,36 @@ def __init__( fig: Optional[Figure] = None, axes: Optional[Axes] = None, ): - BasePlot.__init__(self) + super().__init__(self) self.mean = mean self.components = components self.multiple = multiple - + self._set_figure_and_axes(chart, fig, axes) def plot(self, **kwargs: Any) -> Figure: - """ + """ Plots the perturbation graphs for the principal components. - The perturbations are defined as variations over the mean. Adding a multiple - of the principal component curve to the mean function results in the - positive perturbation and subtracting a multiple of the principal component - curve results in the negative perturbation. For each principal component - curve passed, a subplot with the mean and the perturbations is shown. + + The perturbations are defined as variations over the mean. Adding a + multiple of the principal component curve to the mean function results + in the positive perturbation and subtracting a multiple of the + principal component curve results in the negative perturbation. For + each principal component curve passed, a subplot with the mean and + the perturbations is shown. Returns: - (FDataGrid or FDataBasis): this contains the mean function followed - by the positive perturbation and the negative perturbation. - """ + The plotted figure. + """ if len(self.mean) > 1: self.mean = self.mean.mean() - for i in range(len(self.axes)): + for i, axes in enumerate(self.axes): aux = self._get_component_perturbations(i) - gp = GraphPlot(fdata=aux, axes=self.axes[i]).plot(**kwargs) + gp = GraphPlot(fdata=aux, axes=axes).plot(**kwargs) self.artists = gp.artists - self.axes[i].set_title('Principal component ' + str(i + 1)) + axes.set_title(f"Principal component {i + 1}") return self.fig @@ -88,29 +89,24 @@ def _set_figure_and_axes( self.fig = fig self.axes = axes - def _get_component_perturbations(self, index: int = 0): - """ Computes the perturbations over the mean function of a principal - component at a certain index. + def _get_component_perturbations(self, index: int = 0) -> FData: + """ + Compute the perturbations over the mean of a principal component. Args: - X (FDataGrid or FDataBasis): - the functional data object from which we obtain the mean - index (int): - index of the component for which we want to compute the + index: Index of the component for which we want to compute the perturbations - multiple (float): - multiple of the principal component curve to be added or - subtracted. Returns: - (FDataGrid or FDataBasis): this contains the mean function followed - by the positive perturbation and the negative perturbation. + The mean function followed by the positive perturbation and + the negative perturbation. """ if not isinstance(self.mean, FData): raise AttributeError("X must be a FData object") perturbations = self.mean.copy() perturbations = perturbations.concatenate( - perturbations[0] + self.multiple * self.components[index]) - perturbations = perturbations.concatenate( - perturbations[0] - self.multiple * self.components[index]) - return perturbations + perturbations[0] + self.multiple * self.components[index], + ) + return perturbations.concatenate( + perturbations[0] - self.multiple * self.components[index], + ) From 79f6d1dfd30672bb11ad5d7e65a5318115af16cb Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 10 Sep 2021 19:24:03 +0200 Subject: [PATCH 409/417] Simplify plots. - Reduce code complexity moving common parts to the common ancestor class. - Improve compatibility with MultipleDisplay. --- setup.cfg | 2 +- skfda/exploratory/visualization/_baseplot.py | 59 +++++- skfda/exploratory/visualization/_boxplot.py | 62 ++---- skfda/exploratory/visualization/_ddplot.py | 54 ++--- .../visualization/_magnitude_shape_plot.py | 44 ++-- .../visualization/_multiple_display.py | 31 ++- .../exploratory/visualization/_outliergram.py | 86 ++------ .../visualization/_parametric_plot.py | 69 ++----- skfda/exploratory/visualization/_utils.py | 4 +- skfda/exploratory/visualization/clustering.py | 160 ++++++++------- skfda/exploratory/visualization/fpca.py | 69 +++---- .../visualization/representation.py | 191 ++++++------------ 12 files changed, 335 insertions(+), 496 deletions(-) diff --git a/setup.cfg b/setup.cfg index dcd639462..d8a7354cd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -108,7 +108,7 @@ rst-directives = versionadded,versionchanged, rst-roles = - attr,class,doc,footcite,func,meth,mod,obj,ref,term, + attr,class,doc,footcite,footcite:ts,func,meth,mod,obj,ref,term, allowed-domain-names = data, obj, result, results, val, value, values, var diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 620d4bf41..40a4c429c 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -6,7 +6,7 @@ """ from abc import ABC, abstractmethod -from typing import Optional, Sequence, Union +from typing import Optional, Sequence, Tuple, Union import matplotlib.pyplot as plt import numpy as np @@ -35,10 +35,23 @@ def __init__( *, fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, ) -> None: - self.artists: np.ndarray + self.artists: Optional[np.ndarray] = None + self.chart = chart + self.fig = fig + self.axes = axes + self.n_rows = n_rows + self.n_cols = n_cols + + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: + pass - @abstractmethod def plot( self, ) -> Figure: @@ -49,17 +62,32 @@ def plot( Figure: figure object in which the displays and widgets will be plotted. """ - pass + fig = getattr(self, "fig_", None) + axes = getattr(self, "axes_", None) + + if fig is None: + fig, axes = self._set_figure_and_axes( + self.chart, + fig=self.fig, + axes=self.axes, + ) + + self._plot(fig, axes) + return fig + + @property + def dim(self) -> int: + """Get the number of dimensions for this plot.""" + return 2 @property def n_subplots(self) -> int: """Get the number of subplots that this plot uses.""" return 1 - @abstractmethod - def n_samples(self) -> int: + def n_samples(self) -> Optional[int]: """Get the number of instances that will be used for interactivity.""" - pass + return None def _set_figure_and_axes( self, @@ -67,12 +95,21 @@ def _set_figure_and_axes( *, fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, - ) -> None: + ) -> Tuple[Figure, Sequence[Axes]]: fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout(fig, axes) + fig, axes = _set_figure_layout( + fig=fig, + axes=axes, + dim=self.dim, + n_axes=self.n_subplots, + n_rows=self.n_rows, + n_cols=self.n_cols, + ) - self.fig = fig - self.axes = axes + self.fig_ = fig + self.axes_ = axes + + return fig, axes def _repr_svg_(self) -> str: """Automatically represents the object as an svg when calling it.""" diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 240eb9eee..d9ef3f143 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -295,7 +295,14 @@ def __init__( """ FDataBoxplot.__init__(self, factor) - BasePlot.__init__(self) + BasePlot.__init__( + self, + chart, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) if fdatagrid.dim_domain != 1: raise ValueError( @@ -364,8 +371,6 @@ def __init__( self.mediancol = "black" self._show_full_outliers = False - self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) - @property def fdatagrid(self) -> FDataGrid: return self._fdatagrid @@ -400,25 +405,6 @@ def show_full_outliers(self, boolean: bool) -> None: raise ValueError("show_full_outliers must be boolean type") self._show_full_outliers = boolean - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, - ) -> None: - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - self.fdatagrid, - fig=fig, - axes=axes, - n_rows=n_rows, - n_cols=n_cols, - ) - self.fig = fig - self.axes = axes - @property def n_subplots(self) -> int: return self.fdatagrid.dim_codomain @@ -426,14 +412,12 @@ def n_subplots(self) -> int: def n_samples(self) -> int: return self.fdatagrid.n_samples - def plot(self) -> Figure: - """ - Visualize the functional boxplot of the fdatagrid (dim_domain=1). - - Returns: - Figure object in which the graphs are plotted. + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: - """ self.artists = np.zeros((self.n_samples(), 1), dtype=Artist) tones = np.linspace(0.1, 1.0, len(self._prob) + 1, endpoint=False)[1:] color = self.colormap(tones) @@ -447,11 +431,11 @@ def plot(self) -> Figure: grid_points = self.fdatagrid.grid_points[0] - for m, axes in enumerate(self.axes): + for m, ax in enumerate(axes): # Outliers for o in outliers: - axes.plot( + ax.plot( grid_points, o.data_matrix[0, :, m], color=self.outliercol, @@ -461,7 +445,7 @@ def plot(self) -> Figure: for envelop, col in zip(self.envelopes, color): # central regions - axes.fill_between( + ax.fill_between( grid_points, envelop[0][..., m], envelop[1][..., m], @@ -470,7 +454,7 @@ def plot(self) -> Figure: ) # outlying envelope - axes.plot( + ax.plot( grid_points, self.non_outlying_envelope[0][..., m], grid_points, @@ -480,7 +464,7 @@ def plot(self) -> Figure: ) # central envelope - axes.plot( + ax.plot( grid_points, self.central_envelope[0][..., m], grid_points, @@ -492,7 +476,7 @@ def plot(self) -> Figure: # vertical lines index = math.ceil(len(grid_points) / 2) x = grid_points[index] - axes.plot( + ax.plot( [x, x], [ self.non_outlying_envelope[0][..., m][index], @@ -501,7 +485,7 @@ def plot(self) -> Figure: color=self.barcol, zorder=4, ) - axes.plot( + ax.plot( [x, x], [ self.non_outlying_envelope[1][..., m][index], @@ -512,16 +496,14 @@ def plot(self) -> Figure: ) # median sample - axes.plot( + ax.plot( grid_points, self.median[..., m], color=self.mediancol, zorder=5, ) - _set_labels(self.fdatagrid, self.fig, self.axes) - - return self.fig + _set_labels(self.fdatagrid, fig, axes) def __repr__(self) -> str: """Return repr(self).""" diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 43a6d9e5a..68b1a80e7 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -15,7 +15,6 @@ from ...exploratory.depth.multivariate import Depth from ...representation._functional_data import FData from ._baseplot import BasePlot -from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata T = TypeVar('T', bound=FData) @@ -61,21 +60,29 @@ def __init__( fig: Optional[Figure] = None, axes: Optional[Axes] = None, ) -> None: - BasePlot.__init__(self) + BasePlot.__init__( + self, + chart, + fig=fig, + axes=axes, + ) self.fdata = fdata self.depth_method = depth_method self.depth_method.fit(fdata) self.depth_dist1 = self.depth_method( - self.fdata, distribution=dist1, + self.fdata, + distribution=dist1, ) self.depth_dist2 = self.depth_method( - self.fdata, distribution=dist2, + self.fdata, + distribution=dist2, ) - self._set_figure_and_axes(chart, fig, axes) - def plot( + def _plot( self, - ) -> Figure: + fig: Figure, + axes: Axes, + ) -> None: """ Plot DDPlot graph. @@ -95,9 +102,9 @@ def plot( width_aux_line = 0.35 color_aux_line = "gray" - ax = self.axes[0] + ax = axes[0] - for i, d1, d2 in enumerate(zip(self.depth_dist1, self.depth_dist2)): + for i, (d1, d2) in enumerate(zip(self.depth_dist1, self.depth_dist2)): self.artists[i, 0] = ax.scatter( d1, d2, @@ -128,35 +135,6 @@ def plot( color=color_aux_line, ) - return self.fig - def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples - - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Optional[Axes] = None, - ) -> None: - """ - Initialize the axes and fig of the plot. - - Args: - chart: figure over with the graphs are plotted or axis over - where the graphs are plotted. If None and ax is also - None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also None, the figure is - initialized. - axes: axis where the graphs are plotted. If None, see param fig. - """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - fdata=self.fdata, - fig=fig, - axes=axes, - ) - self.fig = fig - self.axes = axes diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 0de836a3b..bf40f9777 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -22,7 +22,6 @@ from ..depth import Depth from ..outliers import DirectionalOutlierDetector from ._baseplot import BasePlot -from ._utils import _get_figure_and_axes, _set_figure_layout class MagnitudeShapePlot(BasePlot): @@ -211,7 +210,12 @@ def __init__( axes: axis where the graphs are plotted. If None, see param fig. """ - BasePlot.__init__(self) + BasePlot.__init__( + self, + chart, + fig=fig, + axes=axes, + ) if fdatagrid.dim_codomain > 1: raise NotImplementedError( "Only support 1 dimension on the codomain.") @@ -231,8 +235,6 @@ def __init__( self.ylabel = 'VO' self.title = 'MS-Plot' - self._set_figure_and_axes(chart, fig, axes) - @property def fdatagrid(self) -> FDataGrid: return self._fdatagrid @@ -293,13 +295,12 @@ def outliercol(self, value: float) -> None: "outcol must be a number between 0 and 1.") self._outliercol = value - def plot(self) -> Figure: - """Visualization of the magnitude shape plot of the fdatagrid. - - Returns: - Figure object in which the graph is plotted. + def _plot( + self, + fig: Figure, + axes: Axes, + ) -> None: - """ self.artists = np.zeros( (self.n_samples(), 1), dtype=Artist, @@ -310,8 +311,8 @@ def plot(self) -> Figure: colors_rgba = [tuple(i) for i in colors] - for i in range(len(self.points[:, 0].ravel())): - self.artists[i, 0] = self.axes[0].scatter( + for i, _ in enumerate(self.points[:, 0].ravel()): + self.artists[i, 0] = axes[0].scatter( self.points[:, 0].ravel()[i], self.points[:, 1].ravel()[i], color=colors_rgba[i], @@ -319,26 +320,13 @@ def plot(self) -> Figure: pickradius=2, ) - self.axes[0].set_xlabel(self.xlabel) - self.axes[0].set_ylabel(self.ylabel) - self.axes[0].set_title(self.title) - - return self.fig + axes[0].set_xlabel(self.xlabel) + axes[0].set_ylabel(self.ylabel) + axes[0].set_title(self.title) def n_samples(self) -> int: return self.fdatagrid.n_samples - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, - ) -> None: - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout(fig, axes) - self.fig = fig - self.axes = axes - def __repr__(self) -> str: """Return repr(self).""" return ( diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 6586ec073..727ade0de 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -78,8 +78,12 @@ def __init__( displays = (displays,) self.displays = [copy.copy(d) for d in displays] - self._n_graphs = sum(len(d.axes) for d in self.displays) - self.length_data = self.displays[0].n_samples() + self._n_graphs = sum(d.n_subplots for d in self.displays) + self.length_data = next( + d.n_samples() + for d in self.displays + if d.n_samples() is not None + ) self.sliders: List[Widget] = [] self.criteria: List[List[int]] = [] self.selected_sample: Optional[int] = None @@ -271,7 +275,10 @@ def plot(self) -> Figure: """ if self._n_graphs > 1: for d in self.displays[1:]: - if d.n_samples() != self.length_data: + if ( + d.n_samples() is not None + and d.n_samples() != self.length_data + ): raise ValueError( "Length of some data sets are not equal ", ) @@ -281,7 +288,7 @@ def plot(self) -> Figure: int_index = 0 for disp in self.displays: - axes_needed = len(disp.axes) + axes_needed = disp.n_subplots end_index = axes_needed + int_index disp._set_figure_and_axes(axes=self.axes[int_index:end_index]) disp.plot() @@ -303,8 +310,11 @@ def _sample_artist_from_event( ) -> Optional[Tuple[int, Artist]]: """Get the number of sample and artist under a location event.""" for d in self.displays: + if d.artists is None: + continue + try: - i = d.axes.index(event.inaxes) + i = d.axes_.index(event.inaxes) except ValueError: continue @@ -371,9 +381,13 @@ def pick(self, event: Event) -> None: def _sample_from_artist(self, artist: Artist) -> Optional[int]: """Return the sample corresponding to an artist.""" for d in self.displays: - for i, a in enumerate(d.axes): + + if d.artists is None: + continue + + for i, a in enumerate(d.axes_): if a == artist.axes: - if len(d.axes) == 1: + if len(d.axes_) == 1: return np.where(d.artists == artist)[0][0] else: return np.where(d.artists[:, i] == artist)[0][0] @@ -383,6 +397,9 @@ def _sample_from_artist(self, artist: Artist) -> Optional[int]: def _visit_artists(self) -> Generator[Tuple[int, Artist], None, None]: for i in range(self.length_data): for d in self.displays: + if d.artists is None: + continue + yield from ((i, artist) for artist in np.ravel(d.artists[i])) def _select_sample(self, selected_sample: int) -> None: diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index b7bbd5c41..3ba7ad2ed 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -17,7 +17,6 @@ from ... import FDataGrid from ..outliers import OutliergramOutlierDetector from ._baseplot import BasePlot -from ._utils import _get_figure_and_axes, _set_figure_layout_for_fdata class Outliergram(BasePlot): @@ -65,12 +64,14 @@ def __init__( *, fig: Optional[Figure] = None, axes: Optional[Axes] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, factor: float = 1.5, - **kwargs, ) -> None: - BasePlot.__init__(self) + BasePlot.__init__( + self, + chart, + fig=fig, + axes=axes, + ) self.fdata = fdata self.factor = factor self.outlier_detector = OutliergramOutlierDetector(factor=factor) @@ -78,38 +79,28 @@ def __init__( indices = np.argsort(self.outlier_detector.mei_) self._parabola_ordered = self.outlier_detector.parabola_[indices] self._mei_ordered = self.outlier_detector.mei_[indices] - self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) - def plot( + def _plot( self, - ) -> Figure: - """ - Plot Outliergram. - - Plots the Modified Band Depth (MBD) on the Y axis and the Modified - Epigraph Index (MEI) on the X axis. This points will create the form of - a parabola. The shape outliers will be the points that appear far from - this curve. - Returns: - fig: figure object in which the depths will be - scattered. - """ + fig: Figure, + axes: Axes, + ) -> None: + self.artists = np.zeros( (self.n_samples(), 1), dtype=Artist, ) - self.axScatter = self.axes[0] for i, (mei, mbd) in enumerate( zip(self.outlier_detector.mei_, self.outlier_detector.mbd_), ): - self.artists[i, 0] = self.axScatter.scatter( + self.artists[i, 0] = axes[0].scatter( mei, mbd, picker=2, ) - self.axScatter.plot( + axes[0].plot( self._mei_ordered, self._parabola_ordered, ) @@ -119,7 +110,7 @@ def plot( - self.outlier_detector.max_inlier_distance_ ) - self.axScatter.plot( + axes[0].plot( self._mei_ordered, shifted_parabola, linestyle='dashed', @@ -127,55 +118,16 @@ def plot( # Set labels of graph if self.fdata.dataset_name is not None: - self.axScatter.set_title(self.fdata.dataset_name) + axes[0].set_title(self.fdata.dataset_name) - self.axScatter.set_xlabel("MEI") - self.axScatter.set_ylabel("MBD") - self.axScatter.set_xlim([0, 1]) - self.axScatter.set_ylim([ + axes[0].set_xlabel("MEI") + axes[0].set_ylabel("MBD") + axes[0].set_xlim([0, 1]) + axes[0].set_ylim([ 0, # Minimum MBD 1, # Maximum MBD ]) - return self.fig - def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples - - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, - ) -> None: - """ - Initialize the axes and fig of the plot. - - Args: - chart: figure over with the graphs are plotted or axis over - where the graphs are plotted. If None and ax is also - None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also None, the figure is - initialized. - axes: axis where the graphs are plotted. If None, see param fig. - n_rows: designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - fdata=self.fdata, - fig=fig, - axes=axes, - n_rows=n_rows, - n_cols=n_cols, - ) - self.fig = fig - self.axes = axes diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 1cb142c52..7b749e162 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -6,7 +6,7 @@ of them with domain 1 and codomain 1. """ -from typing import Any, Mapping, Optional, Sequence, TypeVar, Union +from typing import Mapping, Optional, Sequence, TypeVar, Union import numpy as np from matplotlib.artist import Artist @@ -15,7 +15,7 @@ from ...representation import FData from ._baseplot import BasePlot -from ._utils import ColorLike, _get_figure_and_axes, _set_figure_layout +from ._utils import ColorLike from .representation import Indexable, _get_color_info K = TypeVar('K', contravariant=True) @@ -56,7 +56,12 @@ def __init__( group_names: Optional[Indexable[K, str]] = None, legend: bool = False, ) -> None: - BasePlot.__init__(self) + BasePlot.__init__( + self, + chart, + fig=fig, + axes=axes, + ) self.fdata1 = fdata1 self.fdata2 = fdata2 @@ -72,21 +77,12 @@ def __init__( self.group_colors = group_colors self.legend = legend - self._set_figure_and_axes(chart, fig, axes) - - def plot( + def _plot( self, - **kwargs: Any, - ) -> Figure: - """ - Parametric Plot graph. - - Plot the functions as coordinates. If two functions are passed - it will concatenate both as coordinates of a vector-valued FData. - Returns: - fig: figure object in which the ParametricPlot - graph will be plotted. - """ + fig: Figure, + axes: Axes, + ) -> None: + self.artists = np.zeros((self.n_samples(), 1), dtype=Artist) sample_colors, patches = _get_color_info( @@ -95,7 +91,6 @@ def plot( self.group_names, self.group_colors, self.legend, - kwargs, ) color_dict: Mapping[str, Union[ColorLike, None]] = {} @@ -104,12 +99,7 @@ def plot( self.fd_final.dim_domain == 1 and self.fd_final.dim_codomain == 2 ): - fig, axes = _set_figure_layout( - self.fig, self.axes, dim=2, n_axes=1, - ) - self.fig = fig - self.axes = axes - ax = self.axes[0] + ax = axes[0] for i in range(self.fd_final.n_samples): @@ -120,8 +110,8 @@ def plot( self.fd_final.data_matrix[i][:, 0].tolist(), self.fd_final.data_matrix[i][:, 1].tolist(), **color_dict, - **kwargs, - ) + )[0] + else: raise ValueError( "Error in data arguments,", @@ -129,7 +119,7 @@ def plot( ) if self.fd_final.dataset_name is not None: - self.fig.suptitle(self.fd_final.dataset_name) + fig.suptitle(self.fd_final.dataset_name) if self.fd_final.coordinate_names[0] is None: ax.set_xlabel("Function 1") @@ -141,31 +131,6 @@ def plot( else: ax.set_ylabel(self.fd_final.coordinate_names[1]) - return fig - def n_samples(self) -> int: """Get the number of instances that will be used for interactivity.""" return self.fd_final.n_samples - - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, - ) -> None: - """ - Initialize the axes and fig of the plot. - - Args: - chart: figure over with the graphs are plotted or axis over - where the graphs are plotted. If None and ax is also - None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also None, the figure is - initialized. - axes: axis where the graphs are plotted. If None, see param fig. - """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - - self.fig = fig - self.axes = axes diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index b1cc32fc5..5bb7c551f 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -177,8 +177,8 @@ def _set_figure_layout( if len(axes) not in {0, n_axes}: raise ValueError( - f"The number of axes must be 0 (to create them) or " - f"equal to the number of axes needed " + f"The number of axes ({len(axes)}) must be 0 (to create them)" + f" or equal to the number of axes needed " f"({n_axes} in this case).", ) diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index 00221f613..6a8a94418 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -8,8 +8,11 @@ import matplotlib.patches as mpatches import matplotlib.pyplot as plt import numpy as np +from matplotlib.artist import Artist from matplotlib.axes import Axes +from matplotlib.collections import PatchCollection from matplotlib.figure import Figure +from matplotlib.patches import Rectangle from matplotlib.ticker import MaxNLocator from sklearn.exceptions import NotFittedError from sklearn.utils.validation import check_is_fitted @@ -213,6 +216,8 @@ def __init__( chart, fig=fig, axes=axes, + n_rows=n_rows, + n_cols=n_cols, ) self.fdata = fdata self.estimator = estimator @@ -223,12 +228,19 @@ def __init__( self.center_labels = center_labels self.center_width = center_width self.colormap = colormap - self._set_figure_and_axes(chart, fig=fig, axes=axes) + + @property + def n_subplots(self) -> int: + return self.fdata.dim_codomain def n_samples(self) -> int: return self.fdata.n_samples - def _plot_clusters(self) -> Figure: + def _plot_clusters( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: """Implement the plot of the FDataGrid samples by clusters.""" _plot_clustering_checks( estimator=self.estimator, @@ -276,7 +288,7 @@ def _plot_clusters(self) -> Figure: ] artists = [ - self.axes[j].plot( + axes[j].plot( self.fdata.grid_points[0], self.fdata.data_matrix[i, :, j], c=colors_by_cluster[i], @@ -286,38 +298,29 @@ def _plot_clusters(self) -> Figure: for i in range(self.fdata.n_samples) ] - self.artists = np.array(artists) + self.artists = np.array(artists).reshape( + (self.n_subplots, self.n_samples()), + ).T for j in range(self.fdata.dim_codomain): for i in range(self.estimator.n_clusters): - self.axes[j].plot( + axes[j].plot( self.fdata.grid_points[0], self.estimator.cluster_centers_.data_matrix[i, :, j], c=self.center_colors[i], label=self.center_labels[i], linewidth=self.center_width, ) - self.axes[j].legend(handles=patches) - - _set_labels(self.fdata, self.fig, self.axes) - - return self.fig - - def plot(self) -> Figure: - """ - Plot of the FDataGrid samples by clusters. - - The clusters are calculated with the estimator passed as a parameter. - If the estimator is not fitted, the fit method is called. - Once each sample is assigned a label the plotting can be done. - Each group is assigned a color described in a legend. + axes[j].legend(handles=patches) - Returns: - Plotted figure. + _set_labels(self.fdata, fig, axes) - """ - self.artists = np.array([]) + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: try: check_is_fitted(self.estimator) @@ -330,7 +333,7 @@ def plot(self) -> Figure: self.labels = self.estimator.labels_ - return self._plot_clusters() + self._plot_clusters(fig=fig, axes=axes) class ClusterMembershipLinesPlot(BasePlot): @@ -367,6 +370,7 @@ def __init__( self, estimator: FuzzyClusteringEstimator, fdata: FDataGrid, + *, chart: Union[Figure, Axes, None] = None, fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, @@ -396,25 +400,15 @@ def __init__( self.y_label = y_label self.title = title self.colormap = colormap - self._set_figure_and_axes(chart, fig=fig, axes=axes) def n_samples(self) -> int: return self.fdata.n_samples - def plot(self) -> Figure: - """ - Plot cluster membership. - - A kind of Parallel Coordinates plot is generated in this function with - the membership values obtained from the algorithm. A line is plotted - for each sample with the values for each cluster. See - `Clustering Example <../auto_examples/plot_clustering.html>`_. - - Returns: - Plotted figure. - - """ - self.artists = np.array([]) + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: try: check_is_fitted(self.estimator) @@ -438,7 +432,7 @@ def plot(self) -> Figure: center_labels=None, ) - self.x_label, self.y_label, self.title = _get_labels( + x_label, y_label, title = _get_labels( self.x_label, self.y_label, self.title, @@ -465,9 +459,9 @@ def plot(self) -> Figure: for i in range(self.estimator.n_clusters) ] - self.axes[0].get_xaxis().set_major_locator(MaxNLocator(integer=True)) + axes[0].get_xaxis().set_major_locator(MaxNLocator(integer=True)) self.artists = np.array([ - self.axes[0].plot( + axes[0].plot( np.arange(self.estimator.n_clusters), membership[i], label=self.sample_labels[i], @@ -476,13 +470,12 @@ def plot(self) -> Figure: for i in range(self.fdata.n_samples) ]) - self.axes[0].set_xticks(np.arange(self.estimator.n_clusters)) - self.axes[0].set_xticklabels(self.cluster_labels) - self.axes[0].set_xlabel(self.x_label) - self.axes[0].set_ylabel(self.y_label) + axes[0].set_xticks(np.arange(self.estimator.n_clusters)) + axes[0].set_xticklabels(self.cluster_labels) + axes[0].set_xlabel(x_label) + axes[0].set_ylabel(y_label) - self.fig.suptitle(self.title) - return self.fig + fig.suptitle(title) class ClusterMembershipPlot(BasePlot): @@ -520,6 +513,7 @@ def __init__( estimator: FuzzyClusteringEstimator, fdata: FData, chart: Union[Figure, Axes, None] = None, + *, fig: Optional[Figure] = None, axes: Union[Axes, Sequence[Axes], None] = None, sort: int = -1, @@ -550,27 +544,21 @@ def __init__( self.title = title self.colormap = colormap self.sort = sort - self._set_figure_and_axes(chart, fig=fig, axes=axes) def n_samples(self) -> int: return self.fdata.n_samples - def plot(self) -> Figure: - """ - Plot cluster membership. - - A kind of barplot is generated in this function with the membership - values obtained from the algorithm. There is a bar for each sample - whose height is 1 (the sum of the membership values of a sample add - to 1), and the part proportional to each cluster is coloured with - the corresponding color. See - `Clustering Example <../auto_examples/plot_clustering.html>`_. - - Returns: - Plotted figure. + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: - """ - self.artists = np.array([]) + self.artists = np.full( + (self.n_samples(), self.n_subplots), + None, + dtype=Artist, + ) try: check_is_fitted(self.estimator) @@ -607,10 +595,6 @@ def plot(self) -> Figure: "Sample", ) - self.x_label = x_label - self.y_label = y_label - self.title = title - if self.sample_labels is None: self.sample_labels = np.arange(self.fdata.n_samples) @@ -651,19 +635,41 @@ def plot(self) -> Figure: conc = np.zeros((self.fdata.n_samples, 1)) labels_dim = np.concatenate((conc, labels_dim), axis=-1) - for i in range(self.estimator.n_clusters): - self.x = self.axes[0].bar( + bars = [ + axes[0].bar( np.arange(self.fdata.n_samples), labels_dim[:, i + 1], bottom=np.sum(labels_dim[:, :(i + 1)], axis=1), color=self.cluster_colors[i], ) + for i in range(self.estimator.n_clusters) + ] + + for b in bars: + b.remove() + b.figure = None + + for i in range(self.n_samples()): + collection = PatchCollection( + [ + Rectangle( + bar.patches[i].get_xy(), + bar.patches[i].get_width(), + bar.patches[i].get_height(), + color=bar.patches[i].get_facecolor(), + ) for bar in bars + ], + match_original=True, + ) + axes[0].add_collection(collection) + self.artists[i, 0] = collection + + fig.canvas.draw() - self.axes[0].set_xticks(np.arange(self.fdata.n_samples)) - self.axes[0].set_xticklabels(self.sample_labels) - self.axes[0].set_xlabel(self.x_label) - self.axes[0].set_ylabel(self.y_label) - self.axes[0].legend(handles=patches) + axes[0].set_xticks(np.arange(self.fdata.n_samples)) + axes[0].set_xticklabels(self.sample_labels) + axes[0].set_xlabel(x_label) + axes[0].set_ylabel(y_label) + axes[0].legend(handles=patches) - self.fig.suptitle(self.title) - return self.fig + fig.suptitle(title) diff --git a/skfda/exploratory/visualization/fpca.py b/skfda/exploratory/visualization/fpca.py index 9d3fc73de..77c9e1c5a 100644 --- a/skfda/exploratory/visualization/fpca.py +++ b/skfda/exploratory/visualization/fpca.py @@ -1,9 +1,8 @@ -from typing import Any, Optional, Union +from typing import Optional, Sequence, Union from matplotlib.axes import Axes from matplotlib.figure import Figure -from skfda.exploratory.visualization._utils import _get_figure_and_axes from skfda.exploratory.visualization.representation import GraphPlot from skfda.representation import FData @@ -15,18 +14,18 @@ class FPCAPlot(BasePlot): FPCAPlot visualization. Args: - mean (FDataGrid or FDataBasis): + mean: the functional data object containing the mean function. If len(mean) > 1, the mean is computed. - components (FDataGrid or FDataBasis): + components: the principal components - multiple (float): + multiple: multiple of the principal component curve to be added or subtracted. - fig (figure object, optional): + fig: figure over which the graph is plotted. If not specified it will be initialized - axes (axes object, optional): axis over where the graph is plotted. + axes: axis over where the graph is plotted. If None, see param fig. """ @@ -40,54 +39,32 @@ def __init__( fig: Optional[Figure] = None, axes: Optional[Axes] = None, ): - super().__init__(self) + super().__init__( + chart, + fig=fig, + axes=axes, + ) self.mean = mean self.components = components self.multiple = multiple - self._set_figure_and_axes(chart, fig, axes) - - def plot(self, **kwargs: Any) -> Figure: - """ - Plots the perturbation graphs for the principal components. + @property + def n_subplots(self) -> int: + return self.components.dim_codomain - The perturbations are defined as variations over the mean. Adding a - multiple of the principal component curve to the mean function results - in the positive perturbation and subtracting a multiple of the - principal component curve results in the negative perturbation. For - each principal component curve passed, a subplot with the mean and - the perturbations is shown. - - Returns: - The plotted figure. + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: - """ if len(self.mean) > 1: self.mean = self.mean.mean() - for i, axes in enumerate(self.axes): - aux = self._get_component_perturbations(i) - gp = GraphPlot(fdata=aux, axes=axes).plot(**kwargs) - self.artists = gp.artists - axes.set_title(f"Principal component {i + 1}") - - return self.fig - - def n_samples(self) -> int: - return self.fdata.n_samples - - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Optional[Axes] = None, - ) -> None: - fig, axes = _get_figure_and_axes(chart, fig, axes) - if not axes: - axes = fig.subplots(nrows=len(self.components)) - - self.fig = fig - self.axes = axes + for i, ax in enumerate(axes): + perturbations = self._get_component_perturbations(i) + GraphPlot(fdata=perturbations, axes=axes).plot() + ax.set_title(f"Principal component {i + 1}") def _get_component_perturbations(self, index: int = 0) -> FData: """ diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 6b13e98bd..a86903ec7 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -51,12 +51,15 @@ def _get_color_info( group_names: Optional[Indexable[K, str]] = None, group_colors: Optional[Indexable[K, ColorLike]] = None, legend: bool = False, - kwargs: Any = None, + kwargs: Optional[Mapping[str, Any]] = None, ) -> Tuple[ Optional[ColorLike], Optional[Sequence[matplotlib.patches.Patch]], ]: + if kwargs is None: + kwargs = {} + patches = None if group is not None: @@ -211,7 +214,14 @@ def __init__( legend: bool = False, **kwargs: Any, ) -> None: - BasePlot.__init__(self) + BasePlot.__init__( + self, + chart, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) self.fdata = fdata self.gradient_criteria = gradient_criteria if self.gradient_criteria is not None: @@ -282,25 +292,24 @@ def __init__( self.sample_colors = sample_colors self.patches = patches - self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + @property + def dim(self) -> int: + return self.fdata.dim_domain + 1 - def plot( + @property + def n_subplots(self) -> int: + return self.fdata.dim_codomain + + def n_samples(self) -> int: + """Get the number of instances that will be used for interactivity.""" + return self.fdata.n_samples + + def _plot( self, - **kwargs, - ) -> Figure: - """ - Plot the graph. - - Plots each coordinate separately. If the :term:`domain` is one - dimensional, the plots will be curves, and if it is two - dimensional, they will be surfaces. There are two styles of - visualizations, one that displays the functions without any - criteria choosing the colors and a new one that displays the - function with a gradient of colors depending on the initial - gradient_criteria (normalized in gradient_list). - Returns: - fig (figure object): figure object in which the graphs are plotted. - """ + fig: Figure, + axes: Sequence[Axes], + ) -> None: + self.artists = np.zeros( (self.n_samples(), self.fdata.dim_codomain), dtype=Artist, @@ -322,11 +331,10 @@ def plot( set_color_dict(self.sample_colors, j, color_dict) - self.artists[j, i] = self.axes[i].plot( + self.artists[j, i] = axes[i].plot( eval_points, mat[j, ..., i].T, **color_dict, - **kwargs, )[0] else: @@ -357,58 +365,14 @@ def plot( set_color_dict(self.sample_colors, h, color_dict) - self.artists[h, k] = self.axes[k].plot_surface( + self.artists[h, k] = axes[k].plot_surface( X, Y, Z[h, ..., k], **color_dict, - **kwargs, ) - _set_labels(self.fdata, self.fig, self.axes, self.patches) - - return self.fig - - def n_samples(self) -> int: - """Get the number of instances that will be used for interactivity.""" - return self.fdata.n_samples - - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, - ) -> None: - """ - Initialize the axes and fig of the plot. - - Args: - chart: figure over with the graphs are plotted or axis over - where the graphs are plotted. If None and ax is also - None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also None, the figure is - initialized. - axes: axis where the graphs are plotted. If None, see param fig. - n_rows: designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - fdata=self.fdata, - fig=fig, - axes=axes, - n_rows=n_rows, - n_cols=n_cols, - ) - self.fig = fig - self.axes = axes + _set_labels(self.fdata, fig, axes, self.patches) class ScatterPlot(BasePlot): @@ -417,20 +381,20 @@ class ScatterPlot(BasePlot): Args: fdata: functional data set that we want to plot. - grid_points (ndarray): points to plot. - chart (figure object, axe or list of axes, optional): figure over + grid_points: points to plot. + chart: figure over with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - fig (figure object, optional): figure over with the graphs are + fig: figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes (axis, optional): axis over where the graphs + axes: axis over where the graphs are plotted. If None, see param fig. - n_rows (int, optional): designates the number of rows of the figure + n_rows: designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols(int, optional): designates the number of columns of the + n_cols: designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. domain_range: Range where the @@ -477,7 +441,14 @@ def __init__( legend: bool = False, **kwargs: Any, ) -> None: - BasePlot.__init__(self) + BasePlot.__init__( + self, + chart, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) self.fdata = fdata self.grid_points = grid_points @@ -513,12 +484,23 @@ def __init__( self.sample_colors = sample_colors self.patches = patches - self._set_figure_and_axes(chart, fig, axes, n_rows, n_cols) + @property + def dim(self) -> int: + return self.fdata.dim_domain + 1 + + @property + def n_subplots(self) -> int: + return self.fdata.dim_codomain + + def n_samples(self) -> int: + """Get the number of instances that will be used for interactivity.""" + return self.fdata.n_samples - def plot( + def _plot( self, - **kwargs: Any, - ) -> Figure: + fig: Figure, + axes: Sequence[Axes], + ) -> None: """ Scatter FDataGrid object. @@ -539,13 +521,12 @@ def plot( set_color_dict(self.sample_colors, j, color_dict) - self.artists[j, i] = self.axes[i].scatter( + self.artists[j, i] = axes[i].scatter( self.grid_points[0], self.evaluated_points[j, ..., i].T, **color_dict, picker=True, pickradius=2, - **kwargs, ) else: @@ -559,60 +540,16 @@ def plot( set_color_dict(self.sample_colors, h, color_dict) - self.artists[h, k] = self.axes[k].scatter( + self.artists[h, k] = axes[k].scatter( X, Y, self.evaluated_points[h, ..., k].T, **color_dict, picker=True, pickradius=2, - **kwargs, ) - _set_labels(self.fdata, self.fig, self.axes, self.patches) - - return self.fig - - def n_samples(self) -> int: - """Get the number of instances that will be used for interactivity.""" - return self.fdata.n_samples - - def _set_figure_and_axes( - self, - chart: Union[Figure, Axes, None] = None, - fig: Optional[Figure] = None, - axes: Union[Axes, Sequence[Axes], None] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, - ) -> None: - """ - Initialize the axes and fig of the plot. - - Args: - chart: figure over with the graphs are plotted or axis over - where the graphs are plotted. If None and ax is also - None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax is not - specified. If None and ax is also None, the figure is - initialized. - axes: axis where the graphs are plotted. If None, see param fig. - n_rows: designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - fdata=self.fdata, - fig=fig, - axes=axes, - n_rows=n_rows, - n_cols=n_cols, - ) - self.fig = fig - self.axes = axes + _set_labels(self.fdata, fig, axes, self.patches) def set_color_dict( @@ -627,4 +564,4 @@ def set_color_dict( thanks to sample colors and index. """ if sample_colors is not None: - color_dict["color"] = sample_colors[ind] \ No newline at end of file + color_dict["color"] = sample_colors[ind] From acd426093380d86825c9bf4906f71cb5381116da Mon Sep 17 00:00:00 2001 From: VNMabus Date: Fri, 10 Sep 2021 23:55:39 +0200 Subject: [PATCH 410/417] Change `n_samples` to a property. --- skfda/exploratory/visualization/_baseplot.py | 1 + skfda/exploratory/visualization/_boxplot.py | 3 ++- skfda/exploratory/visualization/_ddplot.py | 10 +++++----- .../exploratory/visualization/_magnitude_shape_plot.py | 9 +++++---- skfda/exploratory/visualization/_multiple_display.py | 8 ++++---- skfda/exploratory/visualization/_outliergram.py | 10 +++++----- skfda/exploratory/visualization/_parametric_plot.py | 10 +++++----- skfda/exploratory/visualization/clustering.py | 9 ++++++--- skfda/exploratory/visualization/representation.py | 8 ++++---- 9 files changed, 37 insertions(+), 31 deletions(-) diff --git a/skfda/exploratory/visualization/_baseplot.py b/skfda/exploratory/visualization/_baseplot.py index 40a4c429c..56eeab59b 100644 --- a/skfda/exploratory/visualization/_baseplot.py +++ b/skfda/exploratory/visualization/_baseplot.py @@ -85,6 +85,7 @@ def n_subplots(self) -> int: """Get the number of subplots that this plot uses.""" return 1 + @property def n_samples(self) -> Optional[int]: """Get the number of instances that will be used for interactivity.""" return None diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index d9ef3f143..ea7b70a37 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -409,6 +409,7 @@ def show_full_outliers(self, boolean: bool) -> None: def n_subplots(self) -> int: return self.fdatagrid.dim_codomain + @property def n_samples(self) -> int: return self.fdatagrid.n_samples @@ -418,7 +419,7 @@ def _plot( axes: Sequence[Axes], ) -> None: - self.artists = np.zeros((self.n_samples(), 1), dtype=Artist) + self.artists = np.zeros((self.n_samples, 1), dtype=Artist) tones = np.linspace(0.1, 1.0, len(self._prob) + 1, endpoint=False)[1:] color = self.colormap(tones) diff --git a/skfda/exploratory/visualization/_ddplot.py b/skfda/exploratory/visualization/_ddplot.py index 68b1a80e7..bfa6707cd 100644 --- a/skfda/exploratory/visualization/_ddplot.py +++ b/skfda/exploratory/visualization/_ddplot.py @@ -78,6 +78,10 @@ def __init__( distribution=dist2, ) + @property + def n_samples(self) -> int: + return self.fdata.n_samples + def _plot( self, fig: Figure, @@ -95,7 +99,7 @@ def _plot( scattered. """ self.artists = np.zeros( - (self.n_samples(), 1), + (self.n_samples, 1), dtype=Artist, ) margin = 0.025 @@ -134,7 +138,3 @@ def _plot( linewidth=width_aux_line, color=color_aux_line, ) - - def n_samples(self) -> int: - """Get the number of instances that will be used for interactivity.""" - return self.fdata.n_samples diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index bf40f9777..54ce674e8 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -295,6 +295,10 @@ def outliercol(self, value: float) -> None: "outcol must be a number between 0 and 1.") self._outliercol = value + @property + def n_samples(self) -> int: + return self.fdatagrid.n_samples + def _plot( self, fig: Figure, @@ -302,7 +306,7 @@ def _plot( ) -> None: self.artists = np.zeros( - (self.n_samples(), 1), + (self.n_samples, 1), dtype=Artist, ) colors = np.zeros((self.fdatagrid.n_samples, 4)) @@ -324,9 +328,6 @@ def _plot( axes[0].set_ylabel(self.ylabel) axes[0].set_title(self.title) - def n_samples(self) -> int: - return self.fdatagrid.n_samples - def __repr__(self) -> str: """Return repr(self).""" return ( diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 727ade0de..47a926872 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -80,9 +80,9 @@ def __init__( self.displays = [copy.copy(d) for d in displays] self._n_graphs = sum(d.n_subplots for d in self.displays) self.length_data = next( - d.n_samples() + d.n_samples for d in self.displays - if d.n_samples() is not None + if d.n_samples is not None ) self.sliders: List[Widget] = [] self.criteria: List[List[int]] = [] @@ -276,8 +276,8 @@ def plot(self) -> Figure: if self._n_graphs > 1: for d in self.displays[1:]: if ( - d.n_samples() is not None - and d.n_samples() != self.length_data + d.n_samples is not None + and d.n_samples != self.length_data ): raise ValueError( "Length of some data sets are not equal ", diff --git a/skfda/exploratory/visualization/_outliergram.py b/skfda/exploratory/visualization/_outliergram.py index 3ba7ad2ed..d7410f41f 100644 --- a/skfda/exploratory/visualization/_outliergram.py +++ b/skfda/exploratory/visualization/_outliergram.py @@ -80,6 +80,10 @@ def __init__( self._parabola_ordered = self.outlier_detector.parabola_[indices] self._mei_ordered = self.outlier_detector.mei_[indices] + @property + def n_samples(self) -> int: + return self.fdata.n_samples + def _plot( self, fig: Figure, @@ -87,7 +91,7 @@ def _plot( ) -> None: self.artists = np.zeros( - (self.n_samples(), 1), + (self.n_samples, 1), dtype=Artist, ) @@ -127,7 +131,3 @@ def _plot( 0, # Minimum MBD 1, # Maximum MBD ]) - - def n_samples(self) -> int: - """Get the number of instances that will be used for interactivity.""" - return self.fdata.n_samples diff --git a/skfda/exploratory/visualization/_parametric_plot.py b/skfda/exploratory/visualization/_parametric_plot.py index 7b749e162..6ba990bf8 100644 --- a/skfda/exploratory/visualization/_parametric_plot.py +++ b/skfda/exploratory/visualization/_parametric_plot.py @@ -77,13 +77,17 @@ def __init__( self.group_colors = group_colors self.legend = legend + @property + def n_samples(self) -> int: + return self.fd_final.n_samples + def _plot( self, fig: Figure, axes: Axes, ) -> None: - self.artists = np.zeros((self.n_samples(), 1), dtype=Artist) + self.artists = np.zeros((self.n_samples, 1), dtype=Artist) sample_colors, patches = _get_color_info( self.fd_final, @@ -130,7 +134,3 @@ def _plot( ax.set_ylabel("Function 2") else: ax.set_ylabel(self.fd_final.coordinate_names[1]) - - def n_samples(self) -> int: - """Get the number of instances that will be used for interactivity.""" - return self.fd_final.n_samples diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index 6a8a94418..603bbd7c5 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -233,6 +233,7 @@ def __init__( def n_subplots(self) -> int: return self.fdata.dim_codomain + @property def n_samples(self) -> int: return self.fdata.n_samples @@ -299,7 +300,7 @@ def _plot_clusters( ] self.artists = np.array(artists).reshape( - (self.n_subplots, self.n_samples()), + (self.n_subplots, self.n_samples), ).T for j in range(self.fdata.dim_codomain): @@ -401,6 +402,7 @@ def __init__( self.title = title self.colormap = colormap + @property def n_samples(self) -> int: return self.fdata.n_samples @@ -545,6 +547,7 @@ def __init__( self.colormap = colormap self.sort = sort + @property def n_samples(self) -> int: return self.fdata.n_samples @@ -555,7 +558,7 @@ def _plot( ) -> None: self.artists = np.full( - (self.n_samples(), self.n_subplots), + (self.n_samples, self.n_subplots), None, dtype=Artist, ) @@ -649,7 +652,7 @@ def _plot( b.remove() b.figure = None - for i in range(self.n_samples()): + for i in range(self.n_samples): collection = PatchCollection( [ Rectangle( diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index a86903ec7..1b7484dd0 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -300,8 +300,8 @@ def dim(self) -> int: def n_subplots(self) -> int: return self.fdata.dim_codomain + @property def n_samples(self) -> int: - """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples def _plot( @@ -311,7 +311,7 @@ def _plot( ) -> None: self.artists = np.zeros( - (self.n_samples(), self.fdata.dim_codomain), + (self.n_samples, self.fdata.dim_codomain), dtype=Artist, ) @@ -492,8 +492,8 @@ def dim(self) -> int: def n_subplots(self) -> int: return self.fdata.dim_codomain + @property def n_samples(self) -> int: - """Get the number of instances that will be used for interactivity.""" return self.fdata.n_samples def _plot( @@ -508,7 +508,7 @@ def _plot( fig: figure object in which the graphs are plotted. """ self.artists = np.zeros( - (self.n_samples(), self.fdata.dim_codomain), + (self.n_samples, self.fdata.dim_codomain), dtype=Artist, ) From 9554da2a9a308af521a04c6b4475a2aa692ae5e3 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 11 Sep 2021 14:17:52 +0200 Subject: [PATCH 411/417] Extend mei to functions of several variables. --- skfda/exploratory/stats/_stats.py | 18 +++++++++++------- .../visualization/representation.py | 7 +------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 0c19282a7..bc946703a 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -4,7 +4,6 @@ from typing import Optional, TypeVar, Union import numpy as np - from scipy import integrate from scipy.stats import rankdata @@ -97,13 +96,18 @@ def modified_epigraph_index(X: FDataGrid) -> np.ndarray: axis=0, ) - 1 - integrand = integrate.simps( - num_functions_above, - x=X.grid_points[0], - axis=1, - ) + integrand = num_functions_above + + for d, s in zip(X.domain_range, X.grid_points): + integrand = integrate.simps( + integrand, + x=s, + axis=1, + ) + interval_len = d[1] - d[0] + integrand /= interval_len - integrand /= (interval_len * X.n_samples) + integrand /= X.n_samples return integrand.flatten() diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 1b7484dd0..bde4576e9 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -23,12 +23,7 @@ from ...representation._functional_data import FData from ...representation._typing import DomainRangeLike, GridPointsLike from ._baseplot import BasePlot -from ._utils import ( - ColorLike, - _get_figure_and_axes, - _set_figure_layout_for_fdata, - _set_labels, -) +from ._utils import ColorLike, _set_labels K = TypeVar('K', contravariant=True) V = TypeVar('V', covariant=True) From d1ffcfbc5916da04708bb7b5ff4af5f4d1addbf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Ramos=20Carre=C3=B1o?= Date: Sat, 11 Sep 2021 16:39:23 +0200 Subject: [PATCH 412/417] Update list of authors. --- THANKS.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/THANKS.txt b/THANKS.txt index e5d3abfe0..8530edd4b 100644 --- a/THANKS.txt +++ b/THANKS.txt @@ -5,4 +5,10 @@ Carlos Ramos Carreño for the design, reviews and supervision, and for contribut Pablo Marcos Manchón for the registration functions, including integration with fdasrsf. Amanda Hernando Bernabé for visualization and clustering functions. Pablo Pérez Manso for regression and related utilities. -Sergio Ruiz Lozano for the design of the logo. \ No newline at end of file +Yujian Hong for the Principal Component Analysis functionalities. +David García Fernandez for implementing Anova and Hotelling tests. +Pedro Martín Rodríguez-Ponga Eyriès for implementing several classification methods. +Álvaro Sánchez Romero for improving the visualization methods and adding interactive visualizations. +Elena Petrunina for improving the documentation, and regression functions. +Luis Alberto Rodriguez Ramirez for providing mathematical support. +Sergio Ruiz Lozano for the design of the logo. From e62ce44f10d8c8968595afba2ee30f3445af8c33 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 11 Sep 2021 18:50:10 +0200 Subject: [PATCH 413/417] Improve support for 3d interactive plots. --- .../visualization/_multiple_display.py | 9 +++ skfda/exploratory/visualization/_utils.py | 61 +++++++++++++------ 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/skfda/exploratory/visualization/_multiple_display.py b/skfda/exploratory/visualization/_multiple_display.py index 47a926872..79316a1d8 100644 --- a/skfda/exploratory/visualization/_multiple_display.py +++ b/skfda/exploratory/visualization/_multiple_display.py @@ -1,4 +1,5 @@ import copy +import itertools from functools import partial from typing import ( Generator, @@ -149,11 +150,19 @@ def _init_axes( n_rows, n_cols = _get_axes_shape(self._n_graphs + extra) + dim = list( + itertools.chain.from_iterable( + [d.dim] * d.n_subplots + for d in self.displays + ), + ) + [2] * extra + number_axes = n_rows * n_cols fig, axes = _set_figure_layout( fig=fig, axes=axes, n_axes=self._n_graphs + extra, + dim=dim, ) for i in range(self._n_graphs, number_axes): diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 5bb7c551f..0dbb24a1b 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,6 +1,7 @@ import io import math import re +from itertools import repeat from typing import Optional, Sequence, Tuple, TypeVar, Union import matplotlib.backends.backend_svg @@ -139,10 +140,22 @@ def _get_axes_shape( return new_n_rows, new_n_cols +def _projection_from_dim(dim: int) -> str: + + if dim == 2: + return 'rectilinear' + elif dim == 3: + return '3d' + + raise NotImplementedError( + "Only bidimensional or tridimensional plots are supported.", + ) + + def _set_figure_layout( fig: Figure, axes: Sequence[Axes], - dim: int = 2, + dim: Union[int, Sequence[int]] = 2, n_axes: int = 1, n_rows: Optional[int] = None, n_cols: Optional[int] = None, @@ -170,11 +183,6 @@ def _set_figure_layout( * axes (list): axes in which the graphs are plotted. """ - if not (1 < dim < 4): - raise NotImplementedError( - "Only bidimensional or tridimensional plots are supported.", - ) - if len(axes) not in {0, n_axes}: raise ValueError( f"The number of axes ({len(axes)}) must be 0 (to create them)" @@ -190,29 +198,42 @@ def _set_figure_layout( "that no axes are provided.", ) - if dim == 2: - projection = 'rectilinear' - else: - projection = '3d' - if len(axes) == 0: # Create the axes n_rows, n_cols = _get_axes_shape(n_axes, n_rows, n_cols) - fig.subplots( - nrows=n_rows, - ncols=n_cols, - subplot_kw={"projection": projection}, - ) + + for i in range(n_rows): + for j in range(n_cols): + subplot_index = i * n_cols + j + if subplot_index < n_axes: + plot_dim = ( + dim if isinstance(dim, int) else dim[subplot_index] + ) + + fig.add_subplot( + n_rows, + n_cols, + subplot_index + 1, + projection=_projection_from_dim(plot_dim), + ) + axes = fig.axes else: # Check that the projections are right + projections = ( + repeat(_projection_from_dim(dim)) + if isinstance(dim, int) + else (_projection_from_dim(d) for d in dim) + ) - if not all(a.name == projection for a in axes): - raise ValueError( - f"The projection of the axes should be {projection}", - ) + for a, proj in zip(axes, projections): + if a.name != proj: + raise ValueError( + f"The projection of the axes is {a.name} " + f"but should be {proj}", + ) return fig, axes From dd18b3adbb90ea32d10b9529092dc41da36bec08 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 11 Sep 2021 20:08:00 +0200 Subject: [PATCH 414/417] Fix boxplot support of MultipleDisplay. --- skfda/exploratory/visualization/_boxplot.py | 127 +++++++++----------- 1 file changed, 60 insertions(+), 67 deletions(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index ea7b70a37..20f474d63 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -7,7 +7,7 @@ from __future__ import annotations import math -from abc import ABC, abstractmethod +from abc import abstractmethod from typing import Optional, Sequence, Tuple, Union import matplotlib @@ -32,7 +32,7 @@ ) -class FDataBoxplot(ABC): +class FDataBoxplot(BasePlot): """ Abstract class inherited by the Boxplot and SurfaceBoxplot classes. @@ -47,12 +47,29 @@ class FDataBoxplot(ABC): """ @abstractmethod - def __init__(self, factor: float = 1.5) -> None: + def __init__( + self, + chart: Union[Figure, Axes, None] = None, + *, + factor: float = 1.5, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, + ) -> None: if factor < 0: raise ValueError( "The number used to calculate the " "outlying envelope must be positive.", ) + + super().__init__( + chart, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) self._factor = factor @property @@ -89,7 +106,7 @@ def colormap(self, value: Colormap) -> None: self._colormap = value -class Boxplot(FDataBoxplot, BasePlot): +class Boxplot(FDataBoxplot): r""" Representation of the functional boxplot. @@ -118,15 +135,15 @@ class Boxplot(FDataBoxplot, BasePlot): indicate which central regions to represent. Defaults to (0.5,) which represents the 50% central region. factor: Number used to calculate the outlying envelope. - fig: figure over with the graphs are + fig: Figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. - axes: axis over where the graphs + axes: Axis over where the graphs are plotted. If None, see param fig. - n_rows: designates the number of rows of the figure + n_rows: Designates the number of rows of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_cols: designates the number of columns of the + n_cols: Designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. @@ -294,14 +311,13 @@ def __init__( specified if fig and ax are None. """ - FDataBoxplot.__init__(self, factor) - BasePlot.__init__( - self, + super().__init__( chart, fig=fig, axes=axes, n_rows=n_rows, n_cols=n_cols, + factor=factor, ) if fdatagrid.dim_domain != 1: @@ -409,17 +425,12 @@ def show_full_outliers(self, boolean: bool) -> None: def n_subplots(self) -> int: return self.fdatagrid.dim_codomain - @property - def n_samples(self) -> int: - return self.fdatagrid.n_samples - def _plot( self, fig: Figure, axes: Sequence[Axes], ) -> None: - self.artists = np.zeros((self.n_samples, 1), dtype=Artist) tones = np.linspace(0.1, 1.0, len(self._prob) + 1, endpoint=False)[1:] color = self.colormap(tones) @@ -556,6 +567,17 @@ class SurfaceBoxplot(FDataBoxplot): boxcol: Color of the box, which includes median and central envelope. outcol: Color of the outlying envelope. + fig: Figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes: Axis over where the graphs + are plotted. If None, see param fig. + n_rows: Designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: Designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. Examples: Function :math:`f : \mathbb{R^2}\longmapsto\mathbb{R}`. @@ -630,20 +652,24 @@ class SurfaceBoxplot(FDataBoxplot): def __init__( self, fdatagrid: FDataGrid, + chart: Union[Figure, Axes, None] = None, + *, depth_method: Optional[Depth[FDataGrid]] = None, factor: float = 1.5, + fig: Optional[Figure] = None, + axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, ) -> None: - """ - Initialize the functional boxplot. - - Args: - fdatagrid: Object containing the data. - depth_method: Method used to order the data. Defaults to - :class:`~skfda.exploratory.depth.ModifiedBandDepth`. - factor: Number used to calculate the outlying envelope. - """ - FDataBoxplot.__init__(self, factor) + super().__init__( + chart, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + factor=factor, + ) if fdatagrid.dim_domain != 2: raise ValueError( @@ -721,46 +747,15 @@ def outcol(self, value: float) -> None: raise ValueError("outcol must be a number between 0 and 1.") self._outcol = value - def plot( - self, - chart: Union[Figure, Axes, None] = None, - *, - fig: Optional[Figure] = None, - axes: Optional[Axes] = None, - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, - ) -> Figure: - """ - Visualization of the surface boxplot of the fdatagrid (dim_domain=2). - - Args: - chart: figure over with the graphs are plotted or axis over - where the graphs are plotted. If None and ax is also - None, the figure is initialized. - fig: figure over with the graphs are plotted in case ax - is not specified. If None and ax is also None, the figure - is initialized. - axes: axis over where the graphs are plotted. If None, - see param fig. - n_rows: designates the number of rows of the figure - to plot the different dimensions of the image. Only specified - if fig and ax are None. - n_cols: designates the number of columns of the - figure to plot the different dimensions of the image. Only - specified if fig and ax are None. - - Returns: - Figure object in which the graphs are plotted. + @property + def dim(self) -> int: + return 3 - """ - fig, axes = _get_figure_and_axes(chart, fig, axes) - fig, axes = _set_figure_layout_for_fdata( - self.fdatagrid, - fig, - axes, - n_rows, - n_cols, - ) + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: x = self.fdatagrid.grid_points[0] lx = len(x) @@ -902,8 +897,6 @@ def plot( _set_labels(self.fdatagrid, fig, axes) - return fig - def __repr__(self) -> str: """Return repr(self).""" return ( From 79671520ee55a3a53c3da25f69f8f8469c690161 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sat, 11 Sep 2021 20:27:48 +0200 Subject: [PATCH 415/417] Remove unused code. --- skfda/exploratory/visualization/_boxplot.py | 6 +--- skfda/exploratory/visualization/_utils.py | 39 --------------------- 2 files changed, 1 insertion(+), 44 deletions(-) diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 20f474d63..73c6161ad 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -25,11 +25,7 @@ from ..depth import ModifiedBandDepth from ..outliers import _envelopes from ._baseplot import BasePlot -from ._utils import ( - _get_figure_and_axes, - _set_figure_layout_for_fdata, - _set_labels, -) +from ._utils import _set_labels class FDataBoxplot(BasePlot): diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 0dbb24a1b..e9aa766cc 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -238,45 +238,6 @@ def _set_figure_layout( return fig, axes -def _set_figure_layout_for_fdata( - fdata: FData, - fig: Figure, - axes: Sequence[Axes], - n_rows: Optional[int] = None, - n_cols: Optional[int] = None, -) -> Tuple[Figure, Sequence[Axes]]: - """ - Set the figure axes for plotting a FData object. - - Args: - fdata: functional data object. - fig: figure over with the graphs are plotted in case ax is not - specified. - axes: axis over where the graphs are plotted. - n_rows: designates the number of rows of the figure to plot the - different dimensions of the image. Can only be passed - if no axes are specified. - n_cols: designates the number of columns of the figure to plot - the different dimensions of the image. Can only be passed if - no axes are specified. - - Returns: - Tuple containing: - - * fig: figure object in which the graphs are plotted. - * axes: axes in which the graphs are plotted. - - """ - return _set_figure_layout( - fig, - axes, - dim=fdata.dim_domain + 1, - n_axes=fdata.dim_codomain, - n_rows=n_rows, - n_cols=n_cols, - ) - - def _set_labels( fdata: FData, fig: Figure, From 82bd8aacabab756dfbce8dfea916073e406c02d2 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 12 Sep 2021 19:23:15 +0200 Subject: [PATCH 416/417] Fix FPCA plot. --- examples/plot_fpca.py | 1 + skfda/exploratory/visualization/fpca.py | 26 +++++++++++++------------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/examples/plot_fpca.py b/examples/plot_fpca.py index f0e7a3045..f3f78bfdd 100644 --- a/examples/plot_fpca.py +++ b/examples/plot_fpca.py @@ -78,6 +78,7 @@ fpca.components_, 30, fig=plt.figure(figsize=(6, 2 * 4)), + n_rows=2, ).plot() ############################################################################## diff --git a/skfda/exploratory/visualization/fpca.py b/skfda/exploratory/visualization/fpca.py index 77c9e1c5a..8da05b16b 100644 --- a/skfda/exploratory/visualization/fpca.py +++ b/skfda/exploratory/visualization/fpca.py @@ -14,19 +14,17 @@ class FPCAPlot(BasePlot): FPCAPlot visualization. Args: - mean: - the functional data object containing the mean function. + mean: The functional data object containing the mean function. If len(mean) > 1, the mean is computed. - components: - the principal components - multiple: - multiple of the principal component curve to be added or + components: The principal components + multiple: Multiple of the principal component curve to be added or subtracted. - fig: - figure over which the graph is plotted. If not specified it will + fig: Figure over which the graph is plotted. If not specified it will be initialized - axes: axis over where the graph is plotted. - If None, see param fig. + axes: Axes over where the graph is plotted. + If ``None``, see param fig. + n_rows: Designates the number of rows of the figure. + n_cols: Designates the number of columns of the figure. """ def __init__( @@ -38,11 +36,15 @@ def __init__( *, fig: Optional[Figure] = None, axes: Optional[Axes] = None, + n_rows: Optional[int] = None, + n_cols: Optional[int] = None, ): super().__init__( chart, fig=fig, axes=axes, + n_rows=n_rows, + n_cols=n_cols, ) self.mean = mean self.components = components @@ -50,7 +52,7 @@ def __init__( @property def n_subplots(self) -> int: - return self.components.dim_codomain + return len(self.components) def _plot( self, @@ -63,7 +65,7 @@ def _plot( for i, ax in enumerate(axes): perturbations = self._get_component_perturbations(i) - GraphPlot(fdata=perturbations, axes=axes).plot() + GraphPlot(fdata=perturbations, axes=ax).plot() ax.set_title(f"Principal component {i + 1}") def _get_component_perturbations(self, index: int = 0) -> FData: From 115b482272cfc59f8f8cbab3ec0b0392cd4e049b Mon Sep 17 00:00:00 2001 From: VNMabus Date: Sun, 12 Sep 2021 20:09:04 +0200 Subject: [PATCH 417/417] Bump version. --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 2eb3c4fe4..5a2a5806d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.5 +0.6