docs: Update documentation now that we have the EstimatorReport to sh…

…owcase and no UI (#1195) Fix #1185
probabl-ai · Jan 22, 2025 · e4d87ec · e4d87ec
1 parent 10e1c0f
commit e4d87ec
Show file tree

Hide file tree

Showing 4 changed files with 148 additions and 60 deletions.
diff --git a/README.md b/README.md
@@ -61,27 +61,39 @@ You can find information on the latest version [here](https://anaconda.org/conda
 
 2. Evaluate your model using `skore.CrossValidationReporter`:
     ```python
-    from sklearn.datasets import load_iris
-    from sklearn.pipeline import Pipeline
-    from sklearn.preprocessing import StandardScaler
+    from sklearn.datasets import make_classification
     from sklearn.linear_model import LogisticRegression
 
-    X, y = load_iris(return_X_y=True)
-    clf_pipeline = Pipeline([
-        ('scaler', StandardScaler()),
-        ('clf', LogisticRegression())
-    ])
+    from skore import CrossValidationReport
 
-    reporter = skore.CrossValidationReporter(clf_pipeline, X, y, cv=5)
+    X, y = make_classification(n_classes=2, n_samples=100_000, n_informative=4)
+    clf = LogisticRegression()
 
-    # Store the results in the project
-    my_project.put("cv_reporter", reporter)
+    cv_report = CrossValidationReport(clf, X, y)
 
-    # Display a plot result in your notebook
-    reporter.plots.scores
+    # Display the help tree to see all the insights that are available to you
+    cv_report.help()
     ```
 
-Also check out `skore.train_test_split()` that enhances scikit-learn. Learn more in our [documentation](https://skore.probabl.ai).
+    ```python
+    # Display the report metrics that was computed for you:
+    df_cv_report_metrics = cv_report.metrics.report_metrics()
+    df_cv_report_metrics
+    ```
+
+    ```python
+    # Display the ROC curve that was generated for you:
+    roc_plot = cv_report.metrics.plot.roc()
+    roc_plot
+    ```
+
+3. Store the results in the skore project for safe-keeping:
+    ```python
+    my_project.put("df_cv_report_metrics", df_cv_report_metrics)
+    my_project.put("roc_plot", roc_plot)
+    ```
+
+Learn more in our [documentation](https://skore.probabl.ai).
 
 
 ## Contributing

diff --git a/examples/getting_started/plot_quick_start.py b/examples/getting_started/plot_quick_start.py
@@ -20,32 +20,51 @@
 # same path (which you might not want to do that depending on your use case).
 
 # %%
-# Evaluate your model using skore's :class:`~skore.CrossValidationReporter`:
+# Evaluate your model using skore's :class:`~skore.CrossValidationReport`:
 
 # %%
-from sklearn.datasets import load_iris
-from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import StandardScaler
+from sklearn.datasets import make_classification
 from sklearn.linear_model import LogisticRegression
 
-X, y = load_iris(return_X_y=True)
-clf_pipeline = Pipeline([("scaler", StandardScaler()), ("clf", LogisticRegression())])
+from skore import CrossValidationReport
 
-reporter = skore.CrossValidationReporter(clf_pipeline, X, y, cv=5)
+X, y = make_classification(n_classes=2, n_samples=100_000, n_informative=4)
+clf = LogisticRegression()
+
+cv_report = CrossValidationReport(clf, X, y)
+
+# %%
+# Display the help tree to see all the insights that are available to you given that
+# you are doing binary classification:
+
+# %%
+cv_report.help()
 
 # %%
-# Store the results in the skore project:
+# Display the report metrics that was computed for you:
 
 # %%
-my_project.put("cv_reporter", reporter)
+df_cv_report_metrics = cv_report.metrics.report_metrics()
+df_cv_report_metrics
+
+# %%
+# Display the ROC curve that was generated for you:
+
+# %%
+import matplotlib.pyplot as plt
+
+roc_plot = cv_report.metrics.plot.roc()
+roc_plot
+plt.tight_layout()
 
 # %%
-# Display some results in your notebook:
+# Store the results in the skore project for safe-keeping:
 
 # %%
-reporter.plots.timing
+my_project.put("df_cv_report_metrics", df_cv_report_metrics)
+my_project.put("roc_plot", roc_plot)
 
 # %%
 # .. admonition:: What's next?
 #
-#    For a more in-depth guide, see our :ref:`example_skore_product_tour` page!
+#    For a more in-depth guide, see our :ref:`example_skore_getting_started` page!
diff --git a/...etting_started/plot_skore_product_tour.py → ...ing_started/plot_skore_getting_started.py b/...etting_started/plot_skore_product_tour.py → ...ing_started/plot_skore_getting_started.py
@@ -1,16 +1,11 @@
 """
-.. _example_skore_product_tour:
+.. _example_skore_getting_started:
 
-==================
-Skore product tour
-==================
+======================
+Skore: getting started
+======================
 """
 
-# %%
-# .. admonition:: Where to start?
-#
-#    See our :ref:`example_quick_start` page!
-
 # %%
 # This getting started guide illustrates how to use skore and why:
 #
@@ -20,7 +15,7 @@
 # #.    Machine learning diagnostics: get assistance when developing your ML/DS
 #       projects to avoid common pitfalls and follow recommended practices.
 #
-#       * Enhancing key scikit-learn features with :class:`skore.CrossValidationReporter`
+#       * Enhancing key scikit-learn features with :class:`skore.CrossValidationReport`
 #         and :func:`skore.train_test_split`.
 
 # %%
@@ -153,19 +148,16 @@
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 # %%
-# Suppose we store several integer values for a same item called ``my_int``, each storage
-# being separated by 0.1 second:
+# Suppose we store several integer values for a same item called ``my_int`:
 #
 # .. code-block:: python
 #
 #     import time
 #
 #     my_project.put("my_int", 4)
 #
-#     time.sleep(0.1)
 #     my_project.put("my_int", 9)
 #
-#     time.sleep(0.1)
 #     my_project.put("my_int", 16)
 #
 # Skore does not overwrite items with the same name (key value), instead it stores
@@ -182,44 +174,107 @@
 #   see :ref:`example_tracking_items`.
 
 # %%
-# Machine learning diagnostics: enhancing scikit-learn functions
-# ==============================================================
+# Machine learning diagnostics and evaluation
+# ===========================================
 #
-# Skore wraps some key scikit-learn functions to automatically provide
-# diagnostics and checks when using them, as a way to facilitate good practices
+# Skore re-implements or wraps some key scikit-learn class / functions to automatically
+# provide diagnostics and checks when using them, as a way to facilitate good practices
 # and avoid common pitfalls.
 
 # %%
-# Cross-validation with skore
+# Model evaluation with skore
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # In order to assist its users when programming, skore has implemented a
-# :class:`skore.CrossValidationReporter` function that wraps scikit-learn's
-# :func:`sklearn.model_selection.cross_validate`.
+# :class:`skore.EstimatorReport` class.
 #
-# On the same previous data and a Ridge regressor (with default ``alpha`` value),
-# let us create a ``CrossValidationReporter``.
+# Let us load some synthetic data and get the estimator report for a
+# :class:`~sklearn.linear_model.LogisticRegression`:
+
+# %%
+from sklearn.datasets import make_classification
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+
+from skore import EstimatorReport
+
+X, y = make_classification(n_classes=2, n_samples=100_000, n_informative=4)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+clf = LogisticRegression()
+
+est_report = EstimatorReport(
+    clf, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test
+)
 
 # %%
-from skore import CrossValidationReporter
+# Now, we can display the help tree to see all the insights that are available to us
+# given that we are doing binary classification:
 
-cv_reporter = CrossValidationReporter(Ridge(), X, y, cv=5)
-my_project.put("cv_reporter", cv_reporter)
-cv_reporter.plots.scores
+# %%
+est_report.help()
 
 # %%
-# Hence:
+# We can get the report metrics that was computed for us:
+
+# %%
+df_est_report_metrics = est_report.metrics.report_metrics()
+df_est_report_metrics
+
+# %%
+# We can also plot the ROC curve that was generated for us:
+
+# %%
+import matplotlib.pyplot as plt
+
+roc_plot = est_report.metrics.plot.roc()
+roc_plot
+plt.tight_layout()
+
+# .. seealso::
 #
-# * we can automatically observe some key visualizations and get insights on our
-#   cross-validation,
-# * and some well-chosen metrics are automatically computed for us, without the need to
-#   manually set them.
+#   For more information about the motivation and usage of
+#   :class:`skore.EstimatorReport`, see :ref:`example_estimator_report`.
+
+
+# %%
+# Cross-validation with skore
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
+# skore has also implemented a :class:`skore.CrossValidationReport` class that contains
+# several :class:`skore.EstimatorReport` for each fold.
+
+# %%
+from skore import CrossValidationReport
+
+cv_report = CrossValidationReport(clf, X, y, cv_splitter=5)
+
+# %%
+# We display the cross-validation report helper:
+
+# %%
+cv_report.help()
+
+# %%
+# We display the metrics for each fold:
+
+# %%
+df_cv_report_metrics = cv_report.metrics.report_metrics()
+df_cv_report_metrics
+
+# %%
+# We display the ROC curves for each fold:
+
+# %%
+roc_plot = cv_report.metrics.plot.roc()
+roc_plot
+plt.tight_layout()
+
+# %%
 # .. seealso::
 #
-#   More features exist for cross-validation.
 #   For more information about the motivation and usage of
-#   :class:`skore.CrossValidationReporter`, see :ref:`example_cross_validate`.
+#   :class:`skore.CrossValidationReport`, see :ref:`example_cross_validate`.
 
 # %%
 # Train-test split with skore

diff --git a/examples/model_evaluation/plot_estimator_report.py b/examples/model_evaluation/plot_estimator_report.py
@@ -1,4 +1,6 @@
 """
+.. _example_estimator_report:
+
 ============================================
 Get insights from any scikit-learn estimator
 ============================================