Skip to content

Commit

Permalink
docs: Ensure EstimatorReport and CrossValidationReport instances are …
Browse files Browse the repository at this point in the history
…called "report" (#1176)

Closes #1119
  • Loading branch information
auguste-probabl authored Jan 21, 2025
1 parent 3ce26b1 commit 434340e
Show file tree
Hide file tree
Showing 13 changed files with 226 additions and 226 deletions.
52 changes: 26 additions & 26 deletions examples/model_evaluation/plot_estimator_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,27 +67,27 @@
# detect that our estimator is already fitted and will not fit it again.
from skore import EstimatorReport

reporter = EstimatorReport(
report = EstimatorReport(
estimator, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
)
reporter
report

# %%
#
# Once the reporter is created, we get some information regarding the available tools
# Once the report is created, we get some information regarding the available tools
# allowing us to get some insights from our specific model on the specific task.
#
# You can get a similar information if you call the :meth:`~skore.EstimatorReport.help`
# method.
reporter.help()
report.help()

# %%
#
# Be aware that you can access the help for each individual sub-accessor. For instance:
reporter.metrics.help()
report.metrics.help()

# %%
reporter.metrics.plot.help()
report.metrics.plot.help()

# %%
#
Expand All @@ -102,7 +102,7 @@
import time

start = time.time()
metric_report = reporter.metrics.report_metrics(pos_label=pos_label)
metric_report = report.metrics.report_metrics(pos_label=pos_label)
end = time.time()
metric_report

Expand All @@ -114,13 +114,13 @@
# An interesting feature provided by the :class:`skore.EstimatorReport` is the
# the caching mechanism. Indeed, when we have a large enough dataset, computing the
# predictions for a model is not cheap anymore. For instance, on our smallish dataset,
# it took a couple of seconds to compute the metrics. The reporter will cache the
# it took a couple of seconds to compute the metrics. The report will cache the
# predictions and if you are interested in computing a metric again or an alternative
# metric that requires the same predictions, it will be faster. Let's check by
# requesting the same metrics report again.

start = time.time()
metric_report = reporter.metrics.report_metrics(pos_label=pos_label)
metric_report = report.metrics.report_metrics(pos_label=pos_label)
end = time.time()
metric_report

Expand All @@ -143,7 +143,7 @@
# and reload them if available. So for instance, let's compute the log loss.

start = time.time()
log_loss = reporter.metrics.log_loss()
log_loss = report.metrics.log_loss()
end = time.time()
log_loss

Expand All @@ -154,10 +154,10 @@
#
# We can show that without initial cache, it would have taken more time to compute
# the log loss.
reporter.clear_cache()
report.clear_cache()

start = time.time()
log_loss = reporter.metrics.log_loss()
log_loss = report.metrics.log_loss()
end = time.time()
log_loss

Expand All @@ -169,7 +169,7 @@
# By default, the metrics are computed on the test set. However, if a training set
# is provided, we can also compute the metrics by specifying the `data_source`
# parameter.
reporter.metrics.log_loss(data_source="train")
report.metrics.log_loss(data_source="train")

# %%
#
Expand All @@ -178,7 +178,7 @@
# a `X` and `y` parameters.

start = time.time()
metric_report = reporter.metrics.report_metrics(
metric_report = report.metrics.report_metrics(
data_source="X_y", X=X_test, y=y_test, pos_label=pos_label
)
end = time.time()
Expand All @@ -195,7 +195,7 @@

# %%
start = time.time()
metric_report = reporter.metrics.report_metrics(
metric_report = report.metrics.report_metrics(
data_source="X_y", X=X_test, y=y_test, pos_label=pos_label
)
end = time.time()
Expand Down Expand Up @@ -245,13 +245,13 @@ def operational_decision_cost(y_true, y_pred, amount):
#
# Let's make sure that a function called the `predict` method and cached the result.
# We compute the accuracy metric to make sure that the `predict` method is called.
reporter.metrics.accuracy()
report.metrics.accuracy()

# %%
#
# We can now compute the cost of our operational decision.
start = time.time()
cost = reporter.metrics.custom_metric(
cost = report.metrics.custom_metric(
metric_function=operational_decision_cost,
metric_name="Operational Decision Cost",
response_method="predict",
Expand All @@ -266,11 +266,11 @@ def operational_decision_cost(y_true, y_pred, amount):
# %%
#
# Let's now clean the cache and see if it is faster.
reporter.clear_cache()
report.clear_cache()

# %%
start = time.time()
cost = reporter.metrics.custom_metric(
cost = report.metrics.custom_metric(
metric_function=operational_decision_cost,
metric_name="Operational Decision Cost",
response_method="predict",
Expand All @@ -287,7 +287,7 @@ def operational_decision_cost(y_true, y_pred, amount):
# We observe that caching is working as expected. It is really handy because it means
# that you can compute some additional metrics without having to recompute the
# the predictions.
reporter.metrics.report_metrics(
report.metrics.report_metrics(
scoring=["precision", "recall", operational_decision_cost],
pos_label=pos_label,
scoring_kwargs={
Expand All @@ -314,7 +314,7 @@ def operational_decision_cost(y_true, y_pred, amount):
metric_name="Operational Decision Cost",
amount=amount,
)
reporter.metrics.report_metrics(scoring=[f1_scorer, operational_decision_cost_scorer])
report.metrics.report_metrics(scoring=[f1_scorer, operational_decision_cost_scorer])

# %%
#
Expand All @@ -324,12 +324,12 @@ def operational_decision_cost(y_true, y_pred, amount):
# The :class:`skore.EstimatorReport` class also provides a plotting interface that
# allows to plot *defacto* the most common plots. As for the the metrics, we only
# provide the meaningful set of plots for the provided estimator.
reporter.metrics.plot.help()
report.metrics.plot.help()

# %%
#
# Let's start by plotting the ROC curve for our binary classification task.
display = reporter.metrics.plot.roc(pos_label=pos_label)
display = report.metrics.plot.roc(pos_label=pos_label)
plt.tight_layout()

# %%
Expand Down Expand Up @@ -357,7 +357,7 @@ def operational_decision_cost(y_true, y_pred, amount):
# performance gain we can get.
start = time.time()
# we already trigger the computation of the predictions in a previous call
reporter.metrics.plot.roc(pos_label=pos_label)
report.metrics.plot.roc(pos_label=pos_label)
plt.tight_layout()
end = time.time()

Expand All @@ -367,11 +367,11 @@ def operational_decision_cost(y_true, y_pred, amount):
# %%
#
# Now, let's clean the cache and check if we get a slowdown.
reporter.clear_cache()
report.clear_cache()

# %%
start = time.time()
reporter.metrics.plot.roc(pos_label=pos_label)
report.metrics.plot.roc(pos_label=pos_label)
plt.tight_layout()
end = time.time()

Expand Down
18 changes: 9 additions & 9 deletions skore/src/skore/sklearn/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _get_attributes_for_help(self):

def _create_help_tree(self):
"""Create a rich Tree with the available tools and accessor methods."""
tree = Tree("reporter")
tree = Tree("report")

# Add accessor methods first
for accessor_attr, config in self._ACCESSOR_CONFIG.items():
Expand Down Expand Up @@ -205,8 +205,8 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None):
data_source : {"test", "train", "X_y"}, default="test"
The data source to use.
- "test" : use the test set provided when creating the reporter.
- "train" : use the train set provided when creating the reporter.
- "test" : use the test set provided when creating the report.
- "train" : use the train set provided when creating the report.
- "X_y" : use the provided `X` and `y` to compute the metric.
X : array-like of shape (n_samples, n_features) or None, default=None
Expand Down Expand Up @@ -237,8 +237,8 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None):
missing_data = "X_test" if is_cluster else "X_test and y_test"
raise ValueError(
f"No {data_source} data (i.e. {missing_data}) were provided "
f"when creating the reporter. Please provide the {data_source} "
"data either when creating the reporter or by setting data_source "
f"when creating the report. Please provide the {data_source} "
"data either when creating the report or by setting data_source "
"to 'X_y' and providing X and y."
)
return self._parent._X_test, self._parent._y_test, None
Expand All @@ -251,8 +251,8 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None):
missing_data = "X_train" if is_cluster else "X_train and y_train"
raise ValueError(
f"No {data_source} data (i.e. {missing_data}) were provided "
f"when creating the reporter. Please provide the {data_source} "
"data either when creating the reporter or by setting data_source "
f"when creating the report. Please provide the {data_source} "
"data either when creating the report or by setting data_source "
"to 'X_y' and providing X and y."
)
return self._parent._X_train, self._parent._y_train, None
Expand Down Expand Up @@ -307,8 +307,8 @@ def _get_cached_response_values(
data_source : {"test", "train", "X_y"}, default="test"
The data source to use.
- "test" : use the test set provided when creating the reporter.
- "train" : use the train set provided when creating the reporter.
- "test" : use the test set provided when creating the report.
- "train" : use the train set provided when creating the report.
- "X_y" : use the provided `X` and `y` to compute the metric.
data_source_hash : int or None
Expand Down
Loading

0 comments on commit 434340e

Please sign in to comment.