From 434340e008e5775b33b378cabfaf1e4f2671fe57 Mon Sep 17 00:00:00 2001 From: Auguste Baum Date: Tue, 21 Jan 2025 12:07:07 +0100 Subject: [PATCH] docs: Ensure EstimatorReport and CrossValidationReport instances are called "report" (#1176) Closes #1119 --- .../model_evaluation/plot_estimator_report.py | 52 +++--- skore/src/skore/sklearn/_base.py | 18 +- .../_cross_validation/metrics_accessor.py | 118 ++++++------- .../skore/sklearn/_cross_validation/report.py | 18 +- .../sklearn/_estimator/metrics_accessor.py | 166 +++++++++--------- skore/src/skore/sklearn/_estimator/report.py | 16 +- .../sklearn/_plot/precision_recall_curve.py | 8 +- .../skore/sklearn/_plot/prediction_error.py | 8 +- skore/src/skore/sklearn/_plot/roc_curve.py | 8 +- skore/tests/unit/sklearn/test_base.py | 10 +- .../unit/sklearn/test_cross_validation.py | 6 +- skore/tests/unit/sklearn/test_estimator.py | 16 +- sphinx/api.rst | 8 +- 13 files changed, 226 insertions(+), 226 deletions(-) diff --git a/examples/model_evaluation/plot_estimator_report.py b/examples/model_evaluation/plot_estimator_report.py index 044ec1f85..8d3eb84ca 100644 --- a/examples/model_evaluation/plot_estimator_report.py +++ b/examples/model_evaluation/plot_estimator_report.py @@ -67,27 +67,27 @@ # detect that our estimator is already fitted and will not fit it again. from skore import EstimatorReport -reporter = EstimatorReport( +report = EstimatorReport( estimator, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test ) -reporter +report # %% # -# Once the reporter is created, we get some information regarding the available tools +# Once the report is created, we get some information regarding the available tools # allowing us to get some insights from our specific model on the specific task. # # You can get a similar information if you call the :meth:`~skore.EstimatorReport.help` # method. -reporter.help() +report.help() # %% # # Be aware that you can access the help for each individual sub-accessor. For instance: -reporter.metrics.help() +report.metrics.help() # %% -reporter.metrics.plot.help() +report.metrics.plot.help() # %% # @@ -102,7 +102,7 @@ import time start = time.time() -metric_report = reporter.metrics.report_metrics(pos_label=pos_label) +metric_report = report.metrics.report_metrics(pos_label=pos_label) end = time.time() metric_report @@ -114,13 +114,13 @@ # An interesting feature provided by the :class:`skore.EstimatorReport` is the # the caching mechanism. Indeed, when we have a large enough dataset, computing the # predictions for a model is not cheap anymore. For instance, on our smallish dataset, -# it took a couple of seconds to compute the metrics. The reporter will cache the +# it took a couple of seconds to compute the metrics. The report will cache the # predictions and if you are interested in computing a metric again or an alternative # metric that requires the same predictions, it will be faster. Let's check by # requesting the same metrics report again. start = time.time() -metric_report = reporter.metrics.report_metrics(pos_label=pos_label) +metric_report = report.metrics.report_metrics(pos_label=pos_label) end = time.time() metric_report @@ -143,7 +143,7 @@ # and reload them if available. So for instance, let's compute the log loss. start = time.time() -log_loss = reporter.metrics.log_loss() +log_loss = report.metrics.log_loss() end = time.time() log_loss @@ -154,10 +154,10 @@ # # We can show that without initial cache, it would have taken more time to compute # the log loss. -reporter.clear_cache() +report.clear_cache() start = time.time() -log_loss = reporter.metrics.log_loss() +log_loss = report.metrics.log_loss() end = time.time() log_loss @@ -169,7 +169,7 @@ # By default, the metrics are computed on the test set. However, if a training set # is provided, we can also compute the metrics by specifying the `data_source` # parameter. -reporter.metrics.log_loss(data_source="train") +report.metrics.log_loss(data_source="train") # %% # @@ -178,7 +178,7 @@ # a `X` and `y` parameters. start = time.time() -metric_report = reporter.metrics.report_metrics( +metric_report = report.metrics.report_metrics( data_source="X_y", X=X_test, y=y_test, pos_label=pos_label ) end = time.time() @@ -195,7 +195,7 @@ # %% start = time.time() -metric_report = reporter.metrics.report_metrics( +metric_report = report.metrics.report_metrics( data_source="X_y", X=X_test, y=y_test, pos_label=pos_label ) end = time.time() @@ -245,13 +245,13 @@ def operational_decision_cost(y_true, y_pred, amount): # # Let's make sure that a function called the `predict` method and cached the result. # We compute the accuracy metric to make sure that the `predict` method is called. -reporter.metrics.accuracy() +report.metrics.accuracy() # %% # # We can now compute the cost of our operational decision. start = time.time() -cost = reporter.metrics.custom_metric( +cost = report.metrics.custom_metric( metric_function=operational_decision_cost, metric_name="Operational Decision Cost", response_method="predict", @@ -266,11 +266,11 @@ def operational_decision_cost(y_true, y_pred, amount): # %% # # Let's now clean the cache and see if it is faster. -reporter.clear_cache() +report.clear_cache() # %% start = time.time() -cost = reporter.metrics.custom_metric( +cost = report.metrics.custom_metric( metric_function=operational_decision_cost, metric_name="Operational Decision Cost", response_method="predict", @@ -287,7 +287,7 @@ def operational_decision_cost(y_true, y_pred, amount): # We observe that caching is working as expected. It is really handy because it means # that you can compute some additional metrics without having to recompute the # the predictions. -reporter.metrics.report_metrics( +report.metrics.report_metrics( scoring=["precision", "recall", operational_decision_cost], pos_label=pos_label, scoring_kwargs={ @@ -314,7 +314,7 @@ def operational_decision_cost(y_true, y_pred, amount): metric_name="Operational Decision Cost", amount=amount, ) -reporter.metrics.report_metrics(scoring=[f1_scorer, operational_decision_cost_scorer]) +report.metrics.report_metrics(scoring=[f1_scorer, operational_decision_cost_scorer]) # %% # @@ -324,12 +324,12 @@ def operational_decision_cost(y_true, y_pred, amount): # The :class:`skore.EstimatorReport` class also provides a plotting interface that # allows to plot *defacto* the most common plots. As for the the metrics, we only # provide the meaningful set of plots for the provided estimator. -reporter.metrics.plot.help() +report.metrics.plot.help() # %% # # Let's start by plotting the ROC curve for our binary classification task. -display = reporter.metrics.plot.roc(pos_label=pos_label) +display = report.metrics.plot.roc(pos_label=pos_label) plt.tight_layout() # %% @@ -357,7 +357,7 @@ def operational_decision_cost(y_true, y_pred, amount): # performance gain we can get. start = time.time() # we already trigger the computation of the predictions in a previous call -reporter.metrics.plot.roc(pos_label=pos_label) +report.metrics.plot.roc(pos_label=pos_label) plt.tight_layout() end = time.time() @@ -367,11 +367,11 @@ def operational_decision_cost(y_true, y_pred, amount): # %% # # Now, let's clean the cache and check if we get a slowdown. -reporter.clear_cache() +report.clear_cache() # %% start = time.time() -reporter.metrics.plot.roc(pos_label=pos_label) +report.metrics.plot.roc(pos_label=pos_label) plt.tight_layout() end = time.time() diff --git a/skore/src/skore/sklearn/_base.py b/skore/src/skore/sklearn/_base.py index e54620713..d62963cb9 100644 --- a/skore/src/skore/sklearn/_base.py +++ b/skore/src/skore/sklearn/_base.py @@ -124,7 +124,7 @@ def _get_attributes_for_help(self): def _create_help_tree(self): """Create a rich Tree with the available tools and accessor methods.""" - tree = Tree("reporter") + tree = Tree("report") # Add accessor methods first for accessor_attr, config in self._ACCESSOR_CONFIG.items(): @@ -205,8 +205,8 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None): data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features) or None, default=None @@ -237,8 +237,8 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None): missing_data = "X_test" if is_cluster else "X_test and y_test" raise ValueError( f"No {data_source} data (i.e. {missing_data}) were provided " - f"when creating the reporter. Please provide the {data_source} " - "data either when creating the reporter or by setting data_source " + f"when creating the report. Please provide the {data_source} " + "data either when creating the report or by setting data_source " "to 'X_y' and providing X and y." ) return self._parent._X_test, self._parent._y_test, None @@ -251,8 +251,8 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None): missing_data = "X_train" if is_cluster else "X_train and y_train" raise ValueError( f"No {data_source} data (i.e. {missing_data}) were provided " - f"when creating the reporter. Please provide the {data_source} " - "data either when creating the reporter or by setting data_source " + f"when creating the report. Please provide the {data_source} " + "data either when creating the report or by setting data_source " "to 'X_y' and providing X and y." ) return self._parent._X_train, self._parent._y_train, None @@ -307,8 +307,8 @@ def _get_cached_response_values( data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. data_source_hash : int or None diff --git a/skore/src/skore/sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/sklearn/_cross_validation/metrics_accessor.py index a5aa56799..ed47e48e8 100644 --- a/skore/src/skore/sklearn/_cross_validation/metrics_accessor.py +++ b/skore/src/skore/sklearn/_cross_validation/metrics_accessor.py @@ -59,12 +59,12 @@ def report_metrics( data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. scoring : list of str, callable, or scorer, default=None The metrics to report. You can get the possible list of string by calling - `reporter.metrics.help()`. When passing a callable, it should take as + `report.metrics.help()`. When passing a callable, it should take as arguments `y_true`, `y_pred` as the two first arguments. Additional arguments can be passed as keyword arguments and will be forwarded with `scoring_kwargs`. If the callable API is too restrictive (e.g. need to pass @@ -96,9 +96,9 @@ def report_metrics( >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.report_metrics( + >>> report.metrics.report_metrics( ... scoring=["precision", "recall"], pos_label=1, aggregate=["mean", "std"] ... ) Compute metric for each split ... @@ -192,8 +192,8 @@ def accuracy(self, *, data_source="test", aggregate=None): data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. aggregate : {"mean", "std"} or list of such str, default=None Function to aggregate the scores across the cross-validation splits. @@ -210,9 +210,9 @@ def accuracy(self, *, data_source="test", aggregate=None): >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.accuracy() + >>> report.metrics.accuracy() Compute metric for each split ... Metric Accuracy (↗︎) LogisticRegression Split #0 0.94... @@ -244,8 +244,8 @@ def precision( data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. average : {"binary","macro", "micro", "weighted", "samples"} or None, \ default=None @@ -290,9 +290,9 @@ def precision( >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.precision() + >>> report.metrics.precision() Compute metric for each split ... Metric Precision (↗︎) Class label 0 1 @@ -327,8 +327,8 @@ def recall( data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. average : {"binary","macro", "micro", "weighted", "samples"} or None, \ default=None @@ -374,9 +374,9 @@ def recall( >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.recall() + >>> report.metrics.recall() Compute metric for each split ... Metric Recall (↗︎) Class label 0 1 @@ -402,8 +402,8 @@ def brier_score(self, *, data_source="test", aggregate=None): data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. aggregate : {"mean", "std"} or list of such str, default=None Function to aggregate the scores across the cross-validation splits. @@ -420,9 +420,9 @@ def brier_score(self, *, data_source="test", aggregate=None): >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.brier_score() + >>> report.metrics.brier_score() Compute metric for each split ... Metric Brier score (↘︎) LogisticRegression Split #0 0.04... @@ -454,8 +454,8 @@ def roc_auc( data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. average : {"auto", "macro", "micro", "weighted", "samples"}, \ default=None @@ -506,9 +506,9 @@ def roc_auc( >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.roc_auc() + >>> report.metrics.roc_auc() Compute metric for each split ... Metric ROC AUC (↗︎) LogisticRegression Split #0 0.99... @@ -535,8 +535,8 @@ def log_loss(self, *, data_source="test", aggregate=None): data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. aggregate : {"mean", "std"} or list of such str, default=None Function to aggregate the scores across the cross-validation splits. @@ -553,9 +553,9 @@ def log_loss(self, *, data_source="test", aggregate=None): >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.log_loss() + >>> report.metrics.log_loss() Compute metric for each split ... Metric Log loss (↘︎) LogisticRegression Split #0 0.1... @@ -582,8 +582,8 @@ def r2( data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. multioutput : {"raw_values", "uniform_average"} or array-like of shape \ (n_outputs,), default="raw_values" @@ -610,9 +610,9 @@ def r2( >>> from skore import CrossValidationReport >>> X, y = load_diabetes(return_X_y=True) >>> regressor = Ridge() - >>> reporter = CrossValidationReport(regressor, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(regressor, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.r2() + >>> report.metrics.r2() Compute metric for each split ... Metric R² (↗︎) Ridge Split #0 0.36... @@ -640,8 +640,8 @@ def rmse( data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. multioutput : {"raw_values", "uniform_average"} or array-like of shape \ (n_outputs,), default="raw_values" @@ -668,9 +668,9 @@ def rmse( >>> from skore import CrossValidationReport >>> X, y = load_diabetes(return_X_y=True) >>> regressor = Ridge() - >>> reporter = CrossValidationReport(regressor, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(regressor, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.rmse() + >>> report.metrics.rmse() Compute metric for each split ... Metric RMSE (↘︎) Ridge Split #0 59.9... @@ -722,8 +722,8 @@ def custom_metric( data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. aggregate : {"mean", "std"} or list of such str, default=None Function to aggregate the scores across the cross-validation splits. @@ -744,9 +744,9 @@ def custom_metric( >>> from skore import CrossValidationReport >>> X, y = load_diabetes(return_X_y=True) >>> regressor = Ridge() - >>> reporter = CrossValidationReport(regressor, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(regressor, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.metrics.custom_metric( + >>> report.metrics.custom_metric( ... metric_function=mean_absolute_error, ... response_method="predict", ... metric_name="MAE (↗︎)", @@ -832,13 +832,13 @@ def _get_help_legend(self): ) def _get_help_tree_title(self): - return "[bold cyan]reporter.metrics[/bold cyan]" + return "[bold cyan]report.metrics[/bold cyan]" def __repr__(self): """Return a string representation using rich.""" return self._rich_repr( class_name="skore.CrossValidationReport.metrics", - help_method_name="reporter.metrics.help()", + help_method_name="report.metrics.help()", ) @@ -873,8 +873,8 @@ def _get_display( data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. response_method : str The response method. @@ -953,8 +953,8 @@ def roc(self, *, data_source="test", pos_label=None, ax=None): data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. pos_label : int, float, bool or str, default=None The positive class. @@ -974,9 +974,9 @@ def roc(self, *, data_source="test", pos_label=None, ax=None): >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> display = reporter.metrics.plot.roc() + >>> display = report.metrics.plot.roc() Computing predictions for display ... >>> display.plot(roc_curve_kwargs={"color": "tab:red"}) """ @@ -1004,8 +1004,8 @@ def precision_recall(self, *, data_source="test", pos_label=None, ax=None): data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. pos_label : int, float, bool or str, default=None The positive class. @@ -1025,9 +1025,9 @@ def precision_recall(self, *, data_source="test", pos_label=None, ax=None): >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> display = reporter.metrics.plot.precision_recall() + >>> display = report.metrics.plot.precision_recall() Computing predictions for display ... >>> display.plot() """ @@ -1061,8 +1061,8 @@ def prediction_error( data_source : {"test", "train"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. ax : matplotlib axes, default=None Axes object to plot on. If `None`, a new figure and axes is @@ -1100,9 +1100,9 @@ def prediction_error( >>> from skore import CrossValidationReport >>> X, y = load_diabetes(return_X_y=True) >>> regressor = Ridge() - >>> reporter = CrossValidationReport(regressor, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(regressor, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> display = reporter.metrics.plot.prediction_error( + >>> display = report.metrics.plot.prediction_error( ... kind="actual_vs_predicted" ... ) Computing predictions for display ... @@ -1122,11 +1122,11 @@ def _get_help_panel_title(self): return "[bold cyan]Available plot methods[/bold cyan]" def _get_help_tree_title(self): - return "[bold cyan]reporter.metrics.plot[/bold cyan]" + return "[bold cyan]report.metrics.plot[/bold cyan]" def __repr__(self): """Return a string representation using rich.""" return self._rich_repr( class_name="skore.CrossValidationReport.metrics.plot", - help_method_name="reporter.metrics.plot.help()", + help_method_name="report.metrics.plot.help()", ) diff --git a/skore/src/skore/sklearn/_cross_validation/report.py b/skore/src/skore/sklearn/_cross_validation/report.py index 0ab630c4f..7e01a56cc 100644 --- a/skore/src/skore/sklearn/_cross_validation/report.py +++ b/skore/src/skore/sklearn/_cross_validation/report.py @@ -26,7 +26,7 @@ def _generate_estimator_report(estimator, X, y, train_indices, test_indices): class CrossValidationReport(_BaseReport, DirNamesMixin): - """Reporter for cross-validation results. + """Report for cross-validation results. Parameters ---------- @@ -180,14 +180,14 @@ def clear_cache(self): >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.cache_predictions() + >>> report.cache_predictions() Cross-validation predictions ... Caching predictions ... Caching predictions ... - >>> reporter.clear_cache() - >>> reporter._cache + >>> report.clear_cache() + >>> report._cache {} """ for report in self.estimator_reports_: @@ -215,13 +215,13 @@ def cache_predictions(self, response_methods="auto", n_jobs=None): >>> from skore import CrossValidationReport >>> X, y = load_breast_cancer(return_X_y=True) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) + >>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2) Processing cross-validation ... - >>> reporter.cache_predictions() + >>> report.cache_predictions() Cross-validation predictions ... Caching predictions ... Caching predictions ... - >>> reporter._cache + >>> report._cache {...} """ if n_jobs is None: @@ -300,5 +300,5 @@ def _get_help_legend(self): def __repr__(self): """Return a string representation using rich.""" return self._rich_repr( - class_name="skore.CrossValidationReport", help_method_name="reporter.help()" + class_name="skore.CrossValidationReport", help_method_name="help()" ) diff --git a/skore/src/skore/sklearn/_estimator/metrics_accessor.py b/skore/src/skore/sklearn/_estimator/metrics_accessor.py index 72016b8a6..b2e5f1f6e 100644 --- a/skore/src/skore/sklearn/_estimator/metrics_accessor.py +++ b/skore/src/skore/sklearn/_estimator/metrics_accessor.py @@ -63,21 +63,21 @@ def report_metrics( data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. scoring : list of str, callable, or scorer, default=None The metrics to report. You can get the possible list of string by calling - `reporter.metrics.help()`. When passing a callable, it should take as + `report.metrics.help()`. When passing a callable, it should take as arguments `y_true`, `y_pred` as the two first arguments. Additional arguments can be passed as keyword arguments and will be forwarded with `scoring_kwargs`. If the callable API is too restrictive (e.g. need to pass @@ -109,14 +109,14 @@ def report_metrics( ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.report_metrics(pos_label=1) + >>> report.metrics.report_metrics(pos_label=1) Metric Precision (↗︎) Recall (↗︎) ROC AUC (↗︎) Brier score (↘︎) LogisticRegression 0.98... 0.93... 0.99... 0.03... """ @@ -374,17 +374,17 @@ def accuracy(self, *, data_source="test", X=None, y=None): data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. Returns ------- @@ -401,14 +401,14 @@ def accuracy(self, *, data_source="test", X=None, y=None): ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.accuracy() + >>> report.metrics.accuracy() Metric Accuracy (↗︎) LogisticRegression 0.95... """ @@ -459,17 +459,17 @@ def precision( data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. average : {"binary","macro", "micro", "weighted", "samples"} or None, \ default=None @@ -514,14 +514,14 @@ def precision( ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.precision(pos_label=1) + >>> report.metrics.precision(pos_label=1) Metric Precision (↗︎) LogisticRegression 0.98... """ @@ -588,17 +588,17 @@ def recall( data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. average : {"binary","macro", "micro", "weighted", "samples"} or None, \ default=None @@ -644,14 +644,14 @@ def recall( ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.recall(pos_label=1) + >>> report.metrics.recall(pos_label=1) Metric Recall (↗︎) LogisticRegression 0.93... """ @@ -714,17 +714,17 @@ def brier_score(self, *, data_source="test", X=None, y=None): data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. Returns ------- @@ -741,14 +741,14 @@ def brier_score(self, *, data_source="test", X=None, y=None): ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.brier_score() + >>> report.metrics.brier_score() Metric Brier score (↘︎) LogisticRegression 0.03... """ @@ -809,17 +809,17 @@ def roc_auc( data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. average : {"auto", "macro", "micro", "weighted", "samples"}, \ default=None @@ -870,14 +870,14 @@ def roc_auc( ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.roc_auc() + >>> report.metrics.roc_auc() Metric ROC AUC (↗︎) LogisticRegression 0.99... """ @@ -936,11 +936,11 @@ def log_loss(self, *, data_source="test", X=None, y=None): ---------- X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. Returns ------- @@ -957,14 +957,14 @@ def log_loss(self, *, data_source="test", X=None, y=None): ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.log_loss() + >>> report.metrics.log_loss() Metric Log loss (↘︎) LogisticRegression 0.10... """ @@ -1014,17 +1014,17 @@ def r2(self, *, data_source="test", X=None, y=None, multioutput="raw_values"): data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. multioutput : {"raw_values", "uniform_average"} or array-like of shape \ (n_outputs,), default="raw_values" @@ -1051,14 +1051,14 @@ def r2(self, *, data_source="test", X=None, y=None, multioutput="raw_values"): ... *load_diabetes(return_X_y=True), random_state=0 ... ) >>> regressor = Ridge() - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... regressor, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.r2() + >>> report.metrics.r2() Metric R² (↗︎) Ridge 0.35... """ @@ -1111,17 +1111,17 @@ def rmse(self, *, data_source="test", X=None, y=None, multioutput="raw_values"): data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. multioutput : {"raw_values", "uniform_average"} or array-like of shape \ (n_outputs,), default="raw_values" @@ -1148,14 +1148,14 @@ def rmse(self, *, data_source="test", X=None, y=None, multioutput="raw_values"): ... *load_diabetes(return_X_y=True), random_state=0 ... ) >>> regressor = Ridge() - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... regressor, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.rmse() + >>> report.metrics.rmse() Metric RMSE (↘︎) Ridge 56.5... """ @@ -1239,17 +1239,17 @@ def custom_metric( data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. **kwargs : dict Any additional keyword arguments to be passed to the metric function. @@ -1270,14 +1270,14 @@ def custom_metric( ... *load_diabetes(return_X_y=True), random_state=0 ... ) >>> regressor = Ridge() - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... regressor, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.metrics.custom_metric( + >>> report.metrics.custom_metric( ... metric_function=mean_absolute_error, ... response_method="predict", ... metric_name="MAE (↗︎)", @@ -1391,13 +1391,13 @@ def _get_help_legend(self): ) def _get_help_tree_title(self): - return "[bold cyan]reporter.metrics[/bold cyan]" + return "[bold cyan]report.metrics[/bold cyan]" def __repr__(self): """Return a string representation using rich.""" return self._rich_repr( class_name="skore.EstimatorReport.metrics", - help_method_name="reporter.metrics.help()", + help_method_name="report.metrics.help()", ) @@ -1438,8 +1438,8 @@ def _get_display( data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. response_method : str @@ -1509,17 +1509,17 @@ def roc(self, *, data_source="test", X=None, y=None, pos_label=None, ax=None): data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. pos_label : int, float, bool or str, default=None The positive class. @@ -1542,14 +1542,14 @@ def roc(self, *, data_source="test", X=None, y=None, pos_label=None, ax=None): ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> display = reporter.metrics.plot.roc() + >>> display = report.metrics.plot.roc() >>> display.plot(roc_curve_kwargs={"color": "tab:red"}) """ response_method = ("predict_proba", "decision_function") @@ -1586,17 +1586,17 @@ def precision_recall( data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. pos_label : int, float, bool or str, default=None The positive class. @@ -1619,14 +1619,14 @@ def precision_recall( ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> display = reporter.metrics.plot.precision_recall() + >>> display = report.metrics.plot.precision_recall() >>> display.plot(pr_curve_kwargs={"color": "tab:red"}) """ response_method = ("predict_proba", "decision_function") @@ -1663,17 +1663,17 @@ def prediction_error( data_source : {"test", "train", "X_y"}, default="test" The data source to use. - - "test" : use the test set provided when creating the reporter. - - "train" : use the train set provided when creating the reporter. + - "test" : use the test set provided when creating the report. + - "train" : use the train set provided when creating the report. - "X_y" : use the provided `X` and `y` to compute the metric. X : array-like of shape (n_samples, n_features), default=None New data on which to compute the metric. By default, we use the validation - set provided when creating the reporter. + set provided when creating the report. y : array-like of shape (n_samples,), default=None New target on which to compute the metric. By default, we use the target - provided when creating the reporter. + provided when creating the report. ax : matplotlib axes, default=None Axes object to plot on. If `None`, a new figure and axes is @@ -1714,14 +1714,14 @@ def prediction_error( ... *load_diabetes(return_X_y=True), random_state=0 ... ) >>> regressor = Ridge() - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... regressor, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> display = reporter.metrics.plot.prediction_error( + >>> display = report.metrics.plot.prediction_error( ... kind="actual_vs_predicted" ... ) >>> display.plot(line_kwargs={"color": "tab:red"}) @@ -1742,11 +1742,11 @@ def _get_help_panel_title(self): return "[bold cyan]Available plot methods[/bold cyan]" def _get_help_tree_title(self): - return "[bold cyan]reporter.metrics.plot[/bold cyan]" + return "[bold cyan]report.metrics.plot[/bold cyan]" def __repr__(self): """Return a string representation using rich.""" return self._rich_repr( class_name="skore.EstimatorReport.metrics.plot", - help_method_name="reporter.metrics.plot.help()", + help_method_name="report.metrics.plot.help()", ) diff --git a/skore/src/skore/sklearn/_estimator/report.py b/skore/src/skore/sklearn/_estimator/report.py index 3254dc4a3..72ff44fc3 100644 --- a/skore/src/skore/sklearn/_estimator/report.py +++ b/skore/src/skore/sklearn/_estimator/report.py @@ -158,17 +158,17 @@ def clear_cache(self): ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.cache_predictions() + >>> report.cache_predictions() Caching predictions ... - >>> reporter.clear_cache() - >>> reporter._cache + >>> report.clear_cache() + >>> report._cache {} """ self._cache = {} @@ -199,16 +199,16 @@ def cache_predictions(self, response_methods="auto", n_jobs=None): ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> reporter.cache_predictions() + >>> report.cache_predictions() Caching predictions ... - >>> reporter._cache + >>> report._cache {...} """ if self._ml_task in ("binary-classification", "multiclass-classification"): @@ -330,5 +330,5 @@ def _get_help_legend(self): def __repr__(self): """Return a string representation using rich.""" return self._rich_repr( - class_name="skore.EstimatorReport", help_method_name="reporter.help()" + class_name="skore.EstimatorReport", help_method_name="help()" ) diff --git a/skore/src/skore/sklearn/_plot/precision_recall_curve.py b/skore/src/skore/sklearn/_plot/precision_recall_curve.py index 96ce28a5b..a785ef060 100644 --- a/skore/src/skore/sklearn/_plot/precision_recall_curve.py +++ b/skore/src/skore/sklearn/_plot/precision_recall_curve.py @@ -90,14 +90,14 @@ class PrecisionRecallCurveDisplay(HelpDisplayMixin, _ClassifierCurveDisplayMixin ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> display = reporter.metrics.plot.precision_recall() + >>> display = report.metrics.plot.precision_recall() >>> display.plot(pr_curve_kwargs={"color": "tab:red"}) """ @@ -173,14 +173,14 @@ def plot( ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> display = reporter.metrics.plot.precision_recall() + >>> display = report.metrics.plot.precision_recall() >>> display.plot(pr_curve_kwargs={"color": "tab:red"}) """ self.ax_, self.figure_, estimator_name = self._validate_plot_params( diff --git a/skore/src/skore/sklearn/_plot/prediction_error.py b/skore/src/skore/sklearn/_plot/prediction_error.py index d5b9836f6..dea17a45b 100644 --- a/skore/src/skore/sklearn/_plot/prediction_error.py +++ b/skore/src/skore/sklearn/_plot/prediction_error.py @@ -67,14 +67,14 @@ class PredictionErrorDisplay(HelpDisplayMixin): ... *load_diabetes(return_X_y=True), random_state=0 ... ) >>> classifier = Ridge() - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> display = reporter.metrics.plot.prediction_error() + >>> display = report.metrics.plot.prediction_error() >>> display.plot(kind="actual_vs_predicted") """ @@ -144,14 +144,14 @@ def plot( ... *load_diabetes(return_X_y=True), random_state=0 ... ) >>> classifier = Ridge() - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> display = reporter.metrics.plot.prediction_error() + >>> display = report.metrics.plot.prediction_error() >>> display.plot(kind="actual_vs_predicted") """ expected_kind = ("actual_vs_predicted", "residual_vs_predicted") diff --git a/skore/src/skore/sklearn/_plot/roc_curve.py b/skore/src/skore/sklearn/_plot/roc_curve.py index 256d44d31..577046d32 100644 --- a/skore/src/skore/sklearn/_plot/roc_curve.py +++ b/skore/src/skore/sklearn/_plot/roc_curve.py @@ -91,14 +91,14 @@ class RocCurveDisplay(HelpDisplayMixin, _ClassifierCurveDisplayMixin): ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> display = reporter.metrics.plot.roc() + >>> display = report.metrics.plot.roc() >>> display.plot(roc_curve_kwargs={"color": "tab:red"}) """ @@ -172,14 +172,14 @@ def plot( ... *load_breast_cancer(return_X_y=True), random_state=0 ... ) >>> classifier = LogisticRegression(max_iter=10_000) - >>> reporter = EstimatorReport( + >>> report = EstimatorReport( ... classifier, ... X_train=X_train, ... y_train=y_train, ... X_test=X_test, ... y_test=y_test, ... ) - >>> display = reporter.metrics.plot.roc() + >>> display = report.metrics.plot.roc() >>> display.plot(roc_curve_kwargs={"color": "tab:red"}) """ self.ax_, self.figure_, estimator_name = self._validate_plot_params( diff --git a/skore/tests/unit/sklearn/test_base.py b/skore/tests/unit/sklearn/test_base.py index 53b4b84fe..937e4ae92 100644 --- a/skore/tests/unit/sklearn/test_base.py +++ b/skore/tests/unit/sklearn/test_base.py @@ -178,7 +178,7 @@ def test_get_cached_response_values_different_data_source_hash( class MockReport: def __init__(self, estimator, X_train=None, y_train=None, X_test=None, y_test=None): - """Mock a reporter with the minimal required attributes.""" + """Mock a report with the minimal required attributes.""" self._estimator = estimator self._X_train = X_train self._y_train = y_train @@ -224,8 +224,8 @@ def test_base_accessor_get_X_y_and_data_source_hash_error(): for data_source in ("train", "test"): err_msg = re.escape( f"No {data_source} data (i.e. X_{data_source} and y_{data_source}) were " - f"provided when creating the reporter. Please provide the {data_source} " - "data either when creating the reporter or by setting data_source to " + f"provided when creating the report. Please provide the {data_source} " + "data either when creating the report or by setting data_source to " "'X_y' and providing X and y." ) with pytest.raises(ValueError, match=err_msg): @@ -261,8 +261,8 @@ def test_base_accessor_get_X_y_and_data_source_hash_error(): for data_source in ("train", "test"): err_msg = re.escape( f"No {data_source} data (i.e. X_{data_source}) were provided when " - f"creating the reporter. Please provide the {data_source} data either " - f"when creating the reporter or by setting data_source to 'X_y' and " + f"creating the report. Please provide the {data_source} data either " + f"when creating the report or by setting data_source to 'X_y' and " f"providing X and y." ) with pytest.raises(ValueError, match=err_msg): diff --git a/skore/tests/unit/sklearn/test_cross_validation.py b/skore/tests/unit/sklearn/test_cross_validation.py index 34f8a7480..ad845db22 100644 --- a/skore/tests/unit/sklearn/test_cross_validation.py +++ b/skore/tests/unit/sklearn/test_cross_validation.py @@ -170,7 +170,7 @@ def test_cross_validation_report_repr(binary_classification_data): repr_str = repr(report) assert "skore.CrossValidationReport" in repr_str - assert "reporter.help()" in repr_str + assert "help()" in repr_str @pytest.mark.parametrize( @@ -237,7 +237,7 @@ def test_cross_validation_report_plot_repr(binary_classification_data): repr_str = repr(report.metrics.plot) assert "skore.CrossValidationReport.metrics.plot" in repr_str - assert "reporter.metrics.plot.help()" in repr_str + assert "report.metrics.plot.help()" in repr_str def test_cross_validation_report_plot_roc(binary_classification_data): @@ -295,7 +295,7 @@ def test_cross_validation_report_metrics_repr(binary_classification_data): repr_str = repr(report.metrics) assert "skore.CrossValidationReport.metrics" in repr_str - assert "reporter.metrics.help()" in repr_str + assert "report.metrics.help()" in repr_str def _normalize_metric_name(column): diff --git a/skore/tests/unit/sklearn/test_estimator.py b/skore/tests/unit/sklearn/test_estimator.py index 77c791b2a..61c1e3571 100644 --- a/skore/tests/unit/sklearn/test_estimator.py +++ b/skore/tests/unit/sklearn/test_estimator.py @@ -291,7 +291,7 @@ def test_estimator_report_repr(binary_classification_data): repr_str = repr(report) assert "skore.EstimatorReport" in repr_str - assert "reporter.help()" in repr_str + assert "help()" in repr_str @pytest.mark.parametrize( @@ -364,7 +364,7 @@ def test_estimator_report_plot_repr(binary_classification_data): repr_str = repr(report.metrics.plot) assert "skore.EstimatorReport.metrics.plot" in repr_str - assert "reporter.metrics.plot.help()" in repr_str + assert "report.metrics.plot.help()" in repr_str def test_estimator_report_plot_roc(binary_classification_data): @@ -504,7 +504,7 @@ def test_estimator_report_metrics_repr(binary_classification_data): repr_str = repr(report.metrics) assert "skore.EstimatorReport.metrics" in repr_str - assert "reporter.metrics.help()" in repr_str + assert "report.metrics.help()" in repr_str @pytest.mark.parametrize( @@ -946,7 +946,7 @@ def test_estimator_report_custom_metric_compatible_estimator( class CompatibleEstimator: """Estimator exposing only a predict method but it should be enough for the - reporters. + reports. """ def fit(self, X, y): @@ -1060,8 +1060,8 @@ def test_estimator_report_get_X_y_and_data_source_hash_error(): for data_source in ("train", "test"): err_msg = re.escape( f"No {data_source} data (i.e. X_{data_source} and y_{data_source}) were " - f"provided when creating the reporter. Please provide the {data_source} " - "data either when creating the reporter or by setting data_source to " + f"provided when creating the report. Please provide the {data_source} " + "data either when creating the report or by setting data_source to " "'X_y' and providing X and y." ) with pytest.raises(ValueError, match=err_msg): @@ -1094,8 +1094,8 @@ def test_estimator_report_get_X_y_and_data_source_hash_error(): for data_source in ("train", "test"): err_msg = re.escape( f"No {data_source} data (i.e. X_{data_source}) were provided when " - f"creating the reporter. Please provide the {data_source} data either " - f"when creating the reporter or by setting data_source to 'X_y' and " + f"creating the report. Please provide the {data_source} data either " + f"when creating the report or by setting data_source to 'X_y' and " f"providing X and y." ) with pytest.raises(ValueError, match=err_msg): diff --git a/sphinx/api.rst b/sphinx/api.rst index 80f394342..45a2707ca 100644 --- a/sphinx/api.rst +++ b/sphinx/api.rst @@ -40,9 +40,9 @@ These functions and classes enhance scikit-learn's ones. Report for a single estimator ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The class :class:`EstimatorReport` provides a reporter allowing to inspect and +The class :class:`EstimatorReport` provides a report allowing to inspect and evaluate a scikit-learn estimator in an interactive way. The functionalities of the -reporter are accessible through accessors. +report are accessible through accessors. .. autosummary:: :toctree: generated/ @@ -95,9 +95,9 @@ performance metric representations. Cross-validation report for an estimator ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The class :class:`CrossValidationReport` provides a reporter allowing to inspect and +The class :class:`CrossValidationReport` provides a report allowing to inspect and evaluate a scikit-learn estimator through cross-validation in an interactive way. The -functionalities of the reporter are accessible through accessors. +functionalities of the report are accessible through accessors. .. autosummary:: :toctree: generated/