docs: Ensure EstimatorReport and CrossValidationReport instances are …

…called "report" (#1176) Closes #1119
probabl-ai · Jan 21, 2025 · 434340e · 434340e
1 parent 3ce26b1
commit 434340e
Show file tree

Hide file tree

Showing 13 changed files with 226 additions and 226 deletions.
diff --git a/examples/model_evaluation/plot_estimator_report.py b/examples/model_evaluation/plot_estimator_report.py
@@ -67,27 +67,27 @@
 # detect that our estimator is already fitted and will not fit it again.
 from skore import EstimatorReport
 
-reporter = EstimatorReport(
+report = EstimatorReport(
     estimator, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
 )
-reporter
+report
 
 # %%
 #
-# Once the reporter is created, we get some information regarding the available tools
+# Once the report is created, we get some information regarding the available tools
 # allowing us to get some insights from our specific model on the specific task.
 #
 # You can get a similar information if you call the :meth:`~skore.EstimatorReport.help`
 # method.
-reporter.help()
+report.help()
 
 # %%
 #
 # Be aware that you can access the help for each individual sub-accessor. For instance:
-reporter.metrics.help()
+report.metrics.help()
 
 # %%
-reporter.metrics.plot.help()
+report.metrics.plot.help()
 
 # %%
 #
@@ -102,7 +102,7 @@
 import time
 
 start = time.time()
-metric_report = reporter.metrics.report_metrics(pos_label=pos_label)
+metric_report = report.metrics.report_metrics(pos_label=pos_label)
 end = time.time()
 metric_report
 
@@ -114,13 +114,13 @@
 # An interesting feature provided by the :class:`skore.EstimatorReport` is the
 # the caching mechanism. Indeed, when we have a large enough dataset, computing the
 # predictions for a model is not cheap anymore. For instance, on our smallish dataset,
-# it took a couple of seconds to compute the metrics. The reporter will cache the
+# it took a couple of seconds to compute the metrics. The report will cache the
 # predictions and if you are interested in computing a metric again or an alternative
 # metric that requires the same predictions, it will be faster. Let's check by
 # requesting the same metrics report again.
 
 start = time.time()
-metric_report = reporter.metrics.report_metrics(pos_label=pos_label)
+metric_report = report.metrics.report_metrics(pos_label=pos_label)
 end = time.time()
 metric_report
 
@@ -143,7 +143,7 @@
 # and reload them if available. So for instance, let's compute the log loss.
 
 start = time.time()
-log_loss = reporter.metrics.log_loss()
+log_loss = report.metrics.log_loss()
 end = time.time()
 log_loss
 
@@ -154,10 +154,10 @@
 #
 # We can show that without initial cache, it would have taken more time to compute
 # the log loss.
-reporter.clear_cache()
+report.clear_cache()
 
 start = time.time()
-log_loss = reporter.metrics.log_loss()
+log_loss = report.metrics.log_loss()
 end = time.time()
 log_loss
 
@@ -169,7 +169,7 @@
 # By default, the metrics are computed on the test set. However, if a training set
 # is provided, we can also compute the metrics by specifying the `data_source`
 # parameter.
-reporter.metrics.log_loss(data_source="train")
+report.metrics.log_loss(data_source="train")
 
 # %%
 #
@@ -178,7 +178,7 @@
 # a `X` and `y` parameters.
 
 start = time.time()
-metric_report = reporter.metrics.report_metrics(
+metric_report = report.metrics.report_metrics(
     data_source="X_y", X=X_test, y=y_test, pos_label=pos_label
 )
 end = time.time()
@@ -195,7 +195,7 @@
 
 # %%
 start = time.time()
-metric_report = reporter.metrics.report_metrics(
+metric_report = report.metrics.report_metrics(
     data_source="X_y", X=X_test, y=y_test, pos_label=pos_label
 )
 end = time.time()
@@ -245,13 +245,13 @@ def operational_decision_cost(y_true, y_pred, amount):
 #
 # Let's make sure that a function called the `predict` method and cached the result.
 # We compute the accuracy metric to make sure that the `predict` method is called.
-reporter.metrics.accuracy()
+report.metrics.accuracy()
 
 # %%
 #
 # We can now compute the cost of our operational decision.
 start = time.time()
-cost = reporter.metrics.custom_metric(
+cost = report.metrics.custom_metric(
     metric_function=operational_decision_cost,
     metric_name="Operational Decision Cost",
     response_method="predict",
@@ -266,11 +266,11 @@ def operational_decision_cost(y_true, y_pred, amount):
 # %%
 #
 # Let's now clean the cache and see if it is faster.
-reporter.clear_cache()
+report.clear_cache()
 
 # %%
 start = time.time()
-cost = reporter.metrics.custom_metric(
+cost = report.metrics.custom_metric(
     metric_function=operational_decision_cost,
     metric_name="Operational Decision Cost",
     response_method="predict",
@@ -287,7 +287,7 @@ def operational_decision_cost(y_true, y_pred, amount):
 # We observe that caching is working as expected. It is really handy because it means
 # that you can compute some additional metrics without having to recompute the
 # the predictions.
-reporter.metrics.report_metrics(
+report.metrics.report_metrics(
     scoring=["precision", "recall", operational_decision_cost],
     pos_label=pos_label,
     scoring_kwargs={
@@ -314,7 +314,7 @@ def operational_decision_cost(y_true, y_pred, amount):
     metric_name="Operational Decision Cost",
     amount=amount,
 )
-reporter.metrics.report_metrics(scoring=[f1_scorer, operational_decision_cost_scorer])
+report.metrics.report_metrics(scoring=[f1_scorer, operational_decision_cost_scorer])
 
 # %%
 #
@@ -324,12 +324,12 @@ def operational_decision_cost(y_true, y_pred, amount):
 # The :class:`skore.EstimatorReport` class also provides a plotting interface that
 # allows to plot *defacto* the most common plots. As for the the metrics, we only
 # provide the meaningful set of plots for the provided estimator.
-reporter.metrics.plot.help()
+report.metrics.plot.help()
 
 # %%
 #
 # Let's start by plotting the ROC curve for our binary classification task.
-display = reporter.metrics.plot.roc(pos_label=pos_label)
+display = report.metrics.plot.roc(pos_label=pos_label)
 plt.tight_layout()
 
 # %%
@@ -357,7 +357,7 @@ def operational_decision_cost(y_true, y_pred, amount):
 # performance gain we can get.
 start = time.time()
 # we already trigger the computation of the predictions in a previous call
-reporter.metrics.plot.roc(pos_label=pos_label)
+report.metrics.plot.roc(pos_label=pos_label)
 plt.tight_layout()
 end = time.time()
 
@@ -367,11 +367,11 @@ def operational_decision_cost(y_true, y_pred, amount):
 # %%
 #
 # Now, let's clean the cache and check if we get a slowdown.
-reporter.clear_cache()
+report.clear_cache()
 
 # %%
 start = time.time()
-reporter.metrics.plot.roc(pos_label=pos_label)
+report.metrics.plot.roc(pos_label=pos_label)
 plt.tight_layout()
 end = time.time()
 

diff --git a/skore/src/skore/sklearn/_base.py b/skore/src/skore/sklearn/_base.py
@@ -124,7 +124,7 @@ def _get_attributes_for_help(self):
 
     def _create_help_tree(self):
         """Create a rich Tree with the available tools and accessor methods."""
-        tree = Tree("reporter")
+        tree = Tree("report")
 
         # Add accessor methods first
         for accessor_attr, config in self._ACCESSOR_CONFIG.items():
@@ -205,8 +205,8 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None):
         data_source : {"test", "train", "X_y"}, default="test"
             The data source to use.
 
-            - "test" : use the test set provided when creating the reporter.
-            - "train" : use the train set provided when creating the reporter.
+            - "test" : use the test set provided when creating the report.
+            - "train" : use the train set provided when creating the report.
             - "X_y" : use the provided `X` and `y` to compute the metric.
 
         X : array-like of shape (n_samples, n_features) or None, default=None
@@ -237,8 +237,8 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None):
                 missing_data = "X_test" if is_cluster else "X_test and y_test"
                 raise ValueError(
                     f"No {data_source} data (i.e. {missing_data}) were provided "
-                    f"when creating the reporter. Please provide the {data_source} "
-                    "data either when creating the reporter or by setting data_source "
+                    f"when creating the report. Please provide the {data_source} "
+                    "data either when creating the report or by setting data_source "
                     "to 'X_y' and providing X and y."
                 )
             return self._parent._X_test, self._parent._y_test, None
@@ -251,8 +251,8 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None):
                 missing_data = "X_train" if is_cluster else "X_train and y_train"
                 raise ValueError(
                     f"No {data_source} data (i.e. {missing_data}) were provided "
-                    f"when creating the reporter. Please provide the {data_source} "
-                    "data either when creating the reporter or by setting data_source "
+                    f"when creating the report. Please provide the {data_source} "
+                    "data either when creating the report or by setting data_source "
                     "to 'X_y' and providing X and y."
                 )
             return self._parent._X_train, self._parent._y_train, None
@@ -307,8 +307,8 @@ def _get_cached_response_values(
     data_source : {"test", "train", "X_y"}, default="test"
         The data source to use.
 
-        - "test" : use the test set provided when creating the reporter.
-        - "train" : use the train set provided when creating the reporter.
+        - "test" : use the test set provided when creating the report.
+        - "train" : use the train set provided when creating the report.
         - "X_y" : use the provided `X` and `y` to compute the metric.
 
     data_source_hash : int or None