From 821d6cad4566488e210a6de91205e81cc15a34f8 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 14 Aug 2024 21:27:09 +0200 Subject: [PATCH 01/28] add notebook on the cause of miscalibration --- python_files/causes_miscalibration.py | 61 +++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 python_files/causes_miscalibration.py diff --git a/python_files/causes_miscalibration.py b/python_files/causes_miscalibration.py new file mode 100644 index 0000000..c2f5c41 --- /dev/null +++ b/python_files/causes_miscalibration.py @@ -0,0 +1,61 @@ +# %% +import numpy as np + + +def xor_generator(n_samples=1_000, seed=0): + rng = np.random.default_rng(seed) + X = rng.uniform(low=-3, high=3, size=(n_samples, 2)) + y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) + return X, y + + +# %% +import matplotlib.pyplot as plt + +X, y = xor_generator(seed=0) +_, ax = plt.subplots() +ax.scatter(*X.T, c=y, cmap="coolwarm", alpha=0.5) +ax.set( + xlim=(-3, 3), + ylim=(-3, 3), + xlabel="Feature 1", + ylabel="Feature 2", + title="XOR problem", + aspect="equal", +) + +# %% +from sklearn.preprocessing import SplineTransformer, PolynomialFeatures +from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import make_pipeline + +model = make_pipeline( + SplineTransformer(n_knots=20), + PolynomialFeatures(degree=2, interaction_only=True), + LogisticRegression(), +) +model.fit(X, y) + +# %% +from sklearn.inspection import DecisionBoundaryDisplay + +_, ax = plt.subplots() +DecisionBoundaryDisplay.from_estimator( + model, X, ax=ax, cmap="coolwarm", response_method="predict_proba" +) +ax.scatter(*X.T, c=y, cmap="coolwarm", alpha=0.5) +ax.set( + xlim=(-3, 3), + ylim=(-3, 3), + xlabel="Feature 1", + ylabel="Feature 2", + title="XOR problem", + aspect="equal", +) + +# %% +from sklearn.calibration import CalibrationDisplay + +CalibrationDisplay.from_estimator(model, X, y, strategy="quantile", n_bins=10) + +# %% From cc7bc4ab5404581abd9e701718b80e2ba4b25c96 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 15 Aug 2024 11:51:21 +0200 Subject: [PATCH 02/28] test --- .github/workflows/testing.yml | 1 + book/_toc.yml | 1 + pixi.toml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 38e004c..95e3615 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -28,6 +28,7 @@ jobs: pixi run check-generated-predictions pixi run build-calibration-curve pixi run different-calibration-curves + pixi run cause-miscalibration - name: Test if we can build the documentation if: matrix.os == 'ubuntu-latest' diff --git a/book/_toc.yml b/book/_toc.yml index ff42a1c..ab95c9e 100644 --- a/book/_toc.yml +++ b/book/_toc.yml @@ -6,3 +6,4 @@ root: intro chapters: - file: content/notebooks/build_calibration_curve - file: content/notebooks/different_calibration_curves +- file: content/notebooks/cause_miscalibration diff --git a/pixi.toml b/pixi.toml index 4c16afb..a98604c 100644 --- a/pixi.toml +++ b/pixi.toml @@ -9,6 +9,7 @@ platforms = ["win-64", "linux-64", "osx-64", "osx-arm64"] [tasks] generate-predictions = { cmd = "python _generate_predictions.py", cwd = "content/python_files" } build-calibration-curve = { cmd = "ipython build_calibration_curve.py", cwd = "content/python_files" } +cause-miscalibration = { cmd = "ipython cause_miscalibration.py", cwd = "content/python_files" } check-generated-predictions = { cmd = "python check_generated_predictions.py", cwd = "tests" } different-calibration-curves = { cmd = "ipython different_calibration_curves.py", cwd = "content/python_files" } From e3d5edb75d7a25dd412d1ebd8de942c62bf27024 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 15 Aug 2024 11:56:06 +0200 Subject: [PATCH 03/28] iter --- .github/workflows/testing.yml | 2 +- book/_toc.yml | 2 +- pixi.toml | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 95e3615..48381df 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -28,7 +28,7 @@ jobs: pixi run check-generated-predictions pixi run build-calibration-curve pixi run different-calibration-curves - pixi run cause-miscalibration + pixi run causes-miscalibration - name: Test if we can build the documentation if: matrix.os == 'ubuntu-latest' diff --git a/book/_toc.yml b/book/_toc.yml index ab95c9e..924c2ca 100644 --- a/book/_toc.yml +++ b/book/_toc.yml @@ -6,4 +6,4 @@ root: intro chapters: - file: content/notebooks/build_calibration_curve - file: content/notebooks/different_calibration_curves -- file: content/notebooks/cause_miscalibration +- file: content/notebooks/causes_miscalibration diff --git a/pixi.toml b/pixi.toml index a98604c..20c1df5 100644 --- a/pixi.toml +++ b/pixi.toml @@ -9,9 +9,10 @@ platforms = ["win-64", "linux-64", "osx-64", "osx-arm64"] [tasks] generate-predictions = { cmd = "python _generate_predictions.py", cwd = "content/python_files" } build-calibration-curve = { cmd = "ipython build_calibration_curve.py", cwd = "content/python_files" } -cause-miscalibration = { cmd = "ipython cause_miscalibration.py", cwd = "content/python_files" } check-generated-predictions = { cmd = "python check_generated_predictions.py", cwd = "tests" } different-calibration-curves = { cmd = "ipython different_calibration_curves.py", cwd = "content/python_files" } +causes-miscalibration = { cmd = "ipython causes_miscalibration.py", cwd = "content/python_files" } + [dependencies] jupyterlab = ">=4.2.4,<5" From 9e816d3eb8898fb5a493ba76d571185b9676a3e1 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 15 Aug 2024 12:36:31 +0200 Subject: [PATCH 04/28] show effect of resampling --- content/python_files/causes_miscalibration.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index c2f5c41..7946a05 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -59,3 +59,72 @@ def xor_generator(n_samples=1_000, seed=0): CalibrationDisplay.from_estimator(model, X, y, strategy="quantile", n_bins=10) # %% +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split + +X, y = make_classification( + n_samples=20_000, + n_features=2, + n_redundant=0, + weights=[0.1, 0.9], + class_sep=1, + random_state=1, +) +X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0) + +# %% +model = LogisticRegression() +model.fit(X_train, y_train) + +# %% +from sklearn.metrics import classification_report + +print(classification_report(y_test, model.predict(X_test))) + +# %% +_, ax = plt.subplots() +DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict", + alpha=0.5, +) +ax.scatter(*X_test.T, c=y_test, cmap="coolwarm", alpha=0.5) +ax.set(xlabel="Feature 1", ylabel="Feature 2") + +# %% +model.set_params(class_weight="balanced").fit(X_train, y_train) + +# %% +from sklearn.metrics import classification_report + +print(classification_report(y_test, model.predict(X_test))) + +# %% +_, ax = plt.subplots() +DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict", + alpha=0.5, +) +ax.scatter(*X_test.T, c=y_test, cmap="coolwarm", alpha=0.5) +ax.set(xlabel="Feature 1", ylabel="Feature 2") + +# %% +model_vanilla = LogisticRegression().fit(X_train, y_train) +model_reweighted = LogisticRegression(class_weight="balanced").fit(X_train, y_train) + +# %% +disp = CalibrationDisplay.from_estimator( + model_vanilla, X_test, y_test, strategy="quantile" +) +CalibrationDisplay.from_estimator( + model_reweighted, X_test, y_test, strategy="quantile", ax=disp.ax_ +) + +# %% From a83221799e2f0d5a46328def9ad782088a6b55f7 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 19 Aug 2024 11:40:38 +0200 Subject: [PATCH 05/28] iter --- content/python_files/causes_miscalibration.py | 134 +++++++++++++++--- 1 file changed, 114 insertions(+), 20 deletions(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index 7946a05..f5739a5 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -1,21 +1,40 @@ +# %% [markdown] +# +# # The causes of miscalibration +# +# ## Effect of under-fitting and over-fitting on model calibration +# +# In this section, we look at the effect of under-fitting and over-fitting on the +# calibration of a model. +# +# Let's start by defining our classification problem: we use the so-called XOR problem. +# The function `xor_generator` generates a dataset with two features and the target +# variable following the XOR logic. We add some noise to the generative process. + # %% import numpy as np -def xor_generator(n_samples=1_000, seed=0): +def xor_generator(n_samples=1_000, seed=None): rng = np.random.default_rng(seed) X = rng.uniform(low=-3, high=3, size=(n_samples, 2)) - y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) + unobserved = rng.normal(loc=0, scale=0.5, size=(n_samples, 2)) + y = np.logical_xor(X[:, 0] + unobserved[:, 0] > 0, X[:, 1] + unobserved[:, 1] > 0) return X, y +# %% [markdown] +# +# We can now generate a dataset and visualize it. + + # %% import matplotlib.pyplot as plt -X, y = xor_generator(seed=0) +X_train, y_train = xor_generator(seed=0) _, ax = plt.subplots() -ax.scatter(*X.T, c=y, cmap="coolwarm", alpha=0.5) -ax.set( +ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", alpha=0.5) +_ = ax.set( xlim=(-3, 3), ylim=(-3, 3), xlabel="Feature 1", @@ -24,39 +43,114 @@ def xor_generator(n_samples=1_000, seed=0): aspect="equal", ) +# %% [markdown] +# +# The XOR problem exhibits a non-linear decision link between the features and the +# the target variable. Therefore, a linear model will not be able to separate the +# classes correctly. Let's confirm this intuition by fitting a logistic regression +# model to such dataset. + # %% -from sklearn.preprocessing import SplineTransformer, PolynomialFeatures from sklearn.linear_model import LogisticRegression -from sklearn.pipeline import make_pipeline -model = make_pipeline( - SplineTransformer(n_knots=20), - PolynomialFeatures(degree=2, interaction_only=True), - LogisticRegression(), -) -model.fit(X, y) +model = LogisticRegression() +model.fit(X_train, y_train) + +# %% [markdown] +# +# To check the decision boundary of the model, we will use an independent test set. # %% from sklearn.inspection import DecisionBoundaryDisplay -_, ax = plt.subplots() -DecisionBoundaryDisplay.from_estimator( - model, X, ax=ax, cmap="coolwarm", response_method="predict_proba" +X_test, y_test = xor_generator(n_samples=1_000, seed=1) + +fig, ax = plt.subplots() +params = { + "cmap": "coolwarm", + "response_method": "predict_proba", + "plot_method": "pcolormesh", + # make sure to have a range of 0 to 1 for the probability + "vmin": 0, + "vmax": 1, +} +disp = DecisionBoundaryDisplay.from_estimator(model, X_test, ax=ax, **params) +ax.scatter(*X_test.T, c=y_test, cmap=params["cmap"], alpha=0.5) +fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") +_ = ax.set( + xlim=(-3, 3), + ylim=(-3, 3), + xlabel="Feature 1", + ylabel="Feature 2", + title="Soft decision boundary of a logistic regression", + aspect="equal", ) -ax.scatter(*X.T, c=y, cmap="coolwarm", alpha=0.5) -ax.set( + +# %% [markdown] +# +# We see that the probability estimates is almost constant and the model is really +# uncertain with an estimated probability of 0.5 for all samples in the test set. +# +# We therefore need a more expressive model to capture the non-linear relationship +# between the features and the target variable. Crafting a pre-processing step to +# transform the features into a higher-dimensional space could help. We create a +# pipeline that includes a spline transformation and a polynomial transformation before +# to train our logistic regression model. + +# %% +from sklearn.preprocessing import SplineTransformer, PolynomialFeatures +from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import make_pipeline + +model = make_pipeline(SplineTransformer(), PolynomialFeatures(), LogisticRegression()) +model.fit(X_train, y_train) + +# %% [markdown] +# +# Let's check the decision boundary of the model on the test set. + +# %% +fig, ax = plt.subplots() +disp = DecisionBoundaryDisplay.from_estimator(model, X_test, ax=ax, **params) +ax.scatter(*X_test.T, c=y_test, cmap=params["cmap"], alpha=0.5) +fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") +_ = ax.set( xlim=(-3, 3), ylim=(-3, 3), xlabel="Feature 1", ylabel="Feature 2", - title="XOR problem", + title="Soft decision boundary of a logistic regression\n with pre-processing", aspect="equal", ) +# %% [markdown] +# +# We see that our model is capable of capturing the non-linear relationship between +# the features and the target variable. The probability estimates are now varying +# across the samples. We could check the calibration of our model using the calibration +# curve. + # %% from sklearn.calibration import CalibrationDisplay -CalibrationDisplay.from_estimator(model, X, y, strategy="quantile", n_bins=10) +CalibrationDisplay.from_estimator( + model, + X_test, + y_test, + strategy="quantile", + n_bins=10, + estimator="LogisticRegression", +) + +# %% [markdown] +# +# We observe that the calibration of the model is not perfect. So is there a way to +# improve the calibration of our model? + + +# %% +# +# ## Effect of resampling on model calibration # %% from sklearn.datasets import make_classification From b921b2591ba23a81c7c1412fb92dc07aefb46c56 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 19 Aug 2024 12:18:57 +0200 Subject: [PATCH 06/28] iter --- content/python_files/causes_miscalibration.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index f5739a5..e0aca8e 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -146,6 +146,54 @@ def xor_generator(n_samples=1_000, seed=None): # # We observe that the calibration of the model is not perfect. So is there a way to # improve the calibration of our model? +# +# As an exercise, you could try to: +# - modify the parameter `n_knots` of the `SplineTransformer`, +# - modify the parameter `degree` of the `PolynomialFeatures`, +# - modify the parameter `interaction_only` of the `PolynomialFeatures`, +# - modify the parameter `C` of the `LogisticRegression`. +# +# The idea is to observe the effect in terms of under-/over-fitting by looking at the +# decision boundary display and the effect on the model calibration on the calibration +# curve. + +# %% +import pprint +from sklearn.model_selection import ParameterGrid + +param_grid = ParameterGrid({ + "splinetransformer__n_knots": [5, 10, 20], + "polynomialfeatures__degree": [2, 5, 10], + "polynomialfeatures__interaction_only": [True, False], + "logisticregression__C": np.logspace(-3, 3, 10), +}) + +pp = pprint.PrettyPrinter(indent=4, width=1) +for model_params in param_grid: + # Fit a model + model.set_params(**model_params).fit(X_train, y_train) + # Display the results + fig, (ax_1, ax_2) = plt.subplots(ncols=2, figsize=(10, 8)) + disp = DecisionBoundaryDisplay.from_estimator(model, X_test, ax=ax_1, **params) + ax_1.scatter(*X_test.T, c=y_test, cmap=params["cmap"], edgecolor="black", alpha=0.5) + ax_1.set( + xlim=(-3, 3), + ylim=(-3, 3), + xlabel="Feature 1", + ylabel="Feature 2", + aspect="equal", + ) + CalibrationDisplay.from_estimator( + model, + X_test, + y_test, + strategy="quantile", + n_bins=10, + ax=ax_2, + ) + ax_2.set(aspect="equal") + fig.suptitle(f"Parameters:\n {pp.pformat(model_params)}", y=0.85) + # %% From ec1faf8ae2ea1ba0028d067b69f5249880ebaa5e Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 19 Aug 2024 12:28:05 +0200 Subject: [PATCH 07/28] iter --- content/python_files/causes_miscalibration.py | 1 - 1 file changed, 1 deletion(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index e0aca8e..d50b7b6 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -139,7 +139,6 @@ def xor_generator(n_samples=1_000, seed=None): y_test, strategy="quantile", n_bins=10, - estimator="LogisticRegression", ) # %% [markdown] From fcca66f9a7729991d02ebecec4b7b0f275b0233a Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 19 Aug 2024 21:42:02 +0200 Subject: [PATCH 08/28] iter --- content/python_files/causes_miscalibration.py | 68 ++++++++++++------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index d50b7b6..a823756 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -33,7 +33,7 @@ def xor_generator(n_samples=1_000, seed=None): X_train, y_train = xor_generator(seed=0) _, ax = plt.subplots() -ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", alpha=0.5) +ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black", alpha=0.5) _ = ax.set( xlim=(-3, 3), ylim=(-3, 3), @@ -75,7 +75,7 @@ def xor_generator(n_samples=1_000, seed=None): "vmax": 1, } disp = DecisionBoundaryDisplay.from_estimator(model, X_test, ax=ax, **params) -ax.scatter(*X_test.T, c=y_test, cmap=params["cmap"], alpha=0.5) +ax.scatter(*X_test.T, c=y_test, cmap=params["cmap"], edgecolors="black", alpha=0.5) fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") _ = ax.set( xlim=(-3, 3), @@ -102,7 +102,11 @@ def xor_generator(n_samples=1_000, seed=None): from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline -model = make_pipeline(SplineTransformer(), PolynomialFeatures(), LogisticRegression()) +model = make_pipeline( + SplineTransformer(), + PolynomialFeatures(), + LogisticRegression(), +) model.fit(X_train, y_train) # %% [markdown] @@ -112,7 +116,7 @@ def xor_generator(n_samples=1_000, seed=None): # %% fig, ax = plt.subplots() disp = DecisionBoundaryDisplay.from_estimator(model, X_test, ax=ax, **params) -ax.scatter(*X_test.T, c=y_test, cmap=params["cmap"], alpha=0.5) +ax.scatter(*X_test.T, c=y_test, cmap=params["cmap"], edgecolors="black", alpha=0.5) fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") _ = ax.set( xlim=(-3, 3), @@ -133,13 +137,14 @@ def xor_generator(n_samples=1_000, seed=None): # %% from sklearn.calibration import CalibrationDisplay -CalibrationDisplay.from_estimator( +disp = CalibrationDisplay.from_estimator( model, X_test, y_test, strategy="quantile", n_bins=10, ) +_ = disp.ax_.set(aspect="equal") # %% [markdown] # @@ -160,40 +165,53 @@ def xor_generator(n_samples=1_000, seed=None): import pprint from sklearn.model_selection import ParameterGrid -param_grid = ParameterGrid({ - "splinetransformer__n_knots": [5, 10, 20], - "polynomialfeatures__degree": [2, 5, 10], - "polynomialfeatures__interaction_only": [True, False], - "logisticregression__C": np.logspace(-3, 3, 10), -}) +param_grid = list( + ParameterGrid( + { + "logisticregression__C": np.logspace(-3, 3, 7), + "splinetransformer__n_knots": [5, 10], + "polynomialfeatures__degree": [2, 3], + "polynomialfeatures__interaction_only": [True, False], + } + ) +) -pp = pprint.PrettyPrinter(indent=4, width=1) -for model_params in param_grid: - # Fit a model +boundary_figure, boundary_axes = plt.subplots( + nrows=8, ncols=7, figsize=(50, 60), sharex=True, sharey=True +) +calibration_figure, calibration_axes = plt.subplots( + nrows=8, ncols=7, figsize=(50, 60), sharex=True, sharey=True +) +params["plot_method"] = "contourf" + +pp = pprint.PrettyPrinter(indent=1, width=1) +for idx, (model_params, ax_boundary, ax_calibration) in enumerate(zip( + param_grid, boundary_axes.ravel(), calibration_axes.ravel() +)): model.set_params(**model_params).fit(X_train, y_train) # Display the results - fig, (ax_1, ax_2) = plt.subplots(ncols=2, figsize=(10, 8)) - disp = DecisionBoundaryDisplay.from_estimator(model, X_test, ax=ax_1, **params) - ax_1.scatter(*X_test.T, c=y_test, cmap=params["cmap"], edgecolor="black", alpha=0.5) - ax_1.set( + disp = DecisionBoundaryDisplay.from_estimator( + model, X_test, ax=ax_boundary, **params + ) + ax_boundary.scatter( + *X_test.T, c=y_test, cmap=params["cmap"], edgecolor="black", alpha=0.5 + ) + ax_boundary.set( xlim=(-3, 3), ylim=(-3, 3), - xlabel="Feature 1", - ylabel="Feature 2", aspect="equal", + title=f"{pp.pformat(model_params)}", ) + CalibrationDisplay.from_estimator( model, X_test, y_test, strategy="quantile", n_bins=10, - ax=ax_2, + ax=ax_calibration, ) - ax_2.set(aspect="equal") - fig.suptitle(f"Parameters:\n {pp.pformat(model_params)}", y=0.85) - - + ax_calibration.set(aspect="equal", title=f"{pp.pformat(model_params)}") # %% # From b66d04ed636a35b3053a87777ccfcfcbbd7ec91c Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 20 Aug 2024 17:15:19 +0200 Subject: [PATCH 09/28] iter --- content/python_files/causes_miscalibration.py | 110 ++++++++++++++---- 1 file changed, 88 insertions(+), 22 deletions(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index a823756..994df9d 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -63,19 +63,19 @@ def xor_generator(n_samples=1_000, seed=None): # %% from sklearn.inspection import DecisionBoundaryDisplay -X_test, y_test = xor_generator(n_samples=1_000, seed=1) +X_test, y_test = xor_generator(n_samples=10_000, seed=1) fig, ax = plt.subplots() params = { "cmap": "coolwarm", "response_method": "predict_proba", - "plot_method": "pcolormesh", + "plot_method": "contourf", # make sure to have a range of 0 to 1 for the probability "vmin": 0, "vmax": 1, } disp = DecisionBoundaryDisplay.from_estimator(model, X_test, ax=ax, **params) -ax.scatter(*X_test.T, c=y_test, cmap=params["cmap"], edgecolors="black", alpha=0.5) +ax.scatter(*X_train.T, c=y_train, cmap=params["cmap"], edgecolors="black", alpha=0.5) fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") _ = ax.set( xlim=(-3, 3), @@ -104,8 +104,10 @@ def xor_generator(n_samples=1_000, seed=None): model = make_pipeline( SplineTransformer(), - PolynomialFeatures(), - LogisticRegression(), + # Only add interaction terms to avoid blowing up the number of features + PolynomialFeatures(interaction_only=True), + # Increase the number of iterations to ensure convergence + LogisticRegression(max_iter=10_000), ) model.fit(X_train, y_train) @@ -116,7 +118,7 @@ def xor_generator(n_samples=1_000, seed=None): # %% fig, ax = plt.subplots() disp = DecisionBoundaryDisplay.from_estimator(model, X_test, ax=ax, **params) -ax.scatter(*X_test.T, c=y_test, cmap=params["cmap"], edgecolors="black", alpha=0.5) +ax.scatter(*X_train.T, c=y_train, cmap=params["cmap"], edgecolors="black", alpha=0.5) fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") _ = ax.set( xlim=(-3, 3), @@ -151,6 +153,61 @@ def xor_generator(n_samples=1_000, seed=None): # We observe that the calibration of the model is not perfect. So is there a way to # improve the calibration of our model? # +# As an exercise, let's try to three different hyperparameters configurations: +# - one configuration with 5 knots (i.e. `n_knots`) for the spline transformation and a +# regularization parameter `C` of 1e-4 for the logistic regression, +# - one configuration with 7 knots for the spline transformation and a regularization +# parameter `C` of 1e1 for the logistic regression, +# - one configuration with 15 knots for the spline transformation and a regularization +# parameter `C` of 1e4 for the logistic regression. +# +# For each configuration, plot the decision boundary and the calibration curve. What +# can you observe in terms of under-/over-fitting and calibration? + +# %% + +param_configs = [ + {"splinetransformer__n_knots": 5, "logisticregression__C": 1e-4}, + {"splinetransformer__n_knots": 7, "logisticregression__C": 1e1}, + {"splinetransformer__n_knots": 15, "logisticregression__C": 1e4}, +] + +for model_params in param_configs: + model.set_params(**model_params) + model.fit(X_train, y_train) + + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 5)) + + disp = DecisionBoundaryDisplay.from_estimator(model, X_test, ax=ax[0], **params) + ax[0].scatter( + *X_train.T, c=y_train, cmap=params["cmap"], edgecolors="black", alpha=0.5 + ) + + _ = ax[0].set( + xlim=(-3, 3), + ylim=(-3, 3), + xlabel="Feature 1", + ylabel="Feature 2", + aspect="equal", + ) + + CalibrationDisplay.from_estimator( + model, + X_test, + y_test, + strategy="quantile", + n_bins=10, + ax=ax[1], + ) + _ = ax[1].set(aspect="equal") + + fig.suptitle( + f"Number of knots: {model_params['splinetransformer__n_knots']}, " + f"Regularization 'C': {model_params['logisticregression__C']}" + ) + +# %% +# # As an exercise, you could try to: # - modify the parameter `n_knots` of the `SplineTransformer`, # - modify the parameter `degree` of the `PolynomialFeatures`, @@ -162,45 +219,47 @@ def xor_generator(n_samples=1_000, seed=None): # curve. # %% -import pprint from sklearn.model_selection import ParameterGrid param_grid = list( ParameterGrid( { - "logisticregression__C": np.logspace(-3, 3, 7), - "splinetransformer__n_knots": [5, 10], - "polynomialfeatures__degree": [2, 3], - "polynomialfeatures__interaction_only": [True, False], + "logisticregression__C": np.logspace(-1, 3, 5), + "splinetransformer__n_knots": [5, 10, 15], + "polynomialfeatures": [None, PolynomialFeatures(interaction_only=True)], } ) ) boundary_figure, boundary_axes = plt.subplots( - nrows=8, ncols=7, figsize=(50, 60), sharex=True, sharey=True + nrows=5, ncols=6, figsize=(40, 35), sharex=True, sharey=True ) calibration_figure, calibration_axes = plt.subplots( - nrows=8, ncols=7, figsize=(50, 60), sharex=True, sharey=True + nrows=5, ncols=6, figsize=(40, 35), sharex=True, sharey=True ) -params["plot_method"] = "contourf" -pp = pprint.PrettyPrinter(indent=1, width=1) -for idx, (model_params, ax_boundary, ax_calibration) in enumerate(zip( - param_grid, boundary_axes.ravel(), calibration_axes.ravel() -)): +for idx, (model_params, ax_boundary, ax_calibration) in enumerate( + zip(param_grid, boundary_axes.ravel(), calibration_axes.ravel()) +): model.set_params(**model_params).fit(X_train, y_train) + # Create a title + title = ( + f"Number of knots: {model_params['splinetransformer__n_knots']},\n" + f"With interaction: {model_params['polynomialfeatures'] is not None},\n" + f"Regularization 'C': {model_params['logisticregression__C']}" + ) # Display the results disp = DecisionBoundaryDisplay.from_estimator( model, X_test, ax=ax_boundary, **params ) ax_boundary.scatter( - *X_test.T, c=y_test, cmap=params["cmap"], edgecolor="black", alpha=0.5 + *X_train.T, c=y_train, cmap=params["cmap"], edgecolor="black", alpha=0.5 ) ax_boundary.set( xlim=(-3, 3), ylim=(-3, 3), aspect="equal", - title=f"{pp.pformat(model_params)}", + title=title, ) CalibrationDisplay.from_estimator( @@ -211,7 +270,7 @@ def xor_generator(n_samples=1_000, seed=None): n_bins=10, ax=ax_calibration, ) - ax_calibration.set(aspect="equal", title=f"{pp.pformat(model_params)}") + ax_calibration.set(aspect="equal", title=title) # %% # @@ -225,7 +284,7 @@ def xor_generator(n_samples=1_000, seed=None): n_samples=20_000, n_features=2, n_redundant=0, - weights=[0.1, 0.9], + weights=[0.9, 0.1], class_sep=1, random_state=1, ) @@ -287,3 +346,10 @@ def xor_generator(n_samples=1_000, seed=None): ) # %% +from sklearn.metrics import RocCurveDisplay + +fig, ax = plt.subplots() +roc_display = RocCurveDisplay.from_estimator(model_vanilla, X_test, y_test, ax=ax) +roc_display.plot(ax=roc_display.ax_) + +# %% From 6f365bc4ca9ace7ea143f4771a18f059382ca4f7 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 20 Aug 2024 20:18:22 +0200 Subject: [PATCH 10/28] iter --- content/python_files/causes_miscalibration.py | 95 +++++++++++++++---- 1 file changed, 74 insertions(+), 21 deletions(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index 994df9d..d84cd7f 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -206,19 +206,32 @@ def xor_generator(n_samples=1_000, seed=None): f"Regularization 'C': {model_params['logisticregression__C']}" ) -# %% +# %% [markdown] # -# As an exercise, you could try to: -# - modify the parameter `n_knots` of the `SplineTransformer`, -# - modify the parameter `degree` of the `PolynomialFeatures`, -# - modify the parameter `interaction_only` of the `PolynomialFeatures`, -# - modify the parameter `C` of the `LogisticRegression`. +# From the previous exercise, we observe that whether we have an under-fitting or +# over-fitting model impact its calibration. With a high regularization (i.e. `C=1e-4`), +# we see that the model undefits since it does not discriminate between the two classes. +# It translates into obtaining a vertical calibration curve meaning that our model +# predicts the same probability for all fraction of positive samples. # -# The idea is to observe the effect in terms of under-/over-fitting by looking at the -# decision boundary display and the effect on the model calibration on the calibration -# curve. +# On the other hand, if we have a low regularization (i.e. `C=1e4`), and allows the +# the model to be flexible by having a large number of knots, we see that the model +# overfits since it is able to isolate noisy samples in the feature space. It translates +# into a calibration curve where we observe that our model is over-confident. +# +# Finally, there is a sweet spot where the model does not underfit nor overfit. In this +# case, we also get a calibrated model. +# +# We can push the analysis further by looking at a wider range of hyperparameters: +# +# - the impact of `n_knots` of the `SplineTransformer`, +# - whether or not to compute interaction terms using a `PolynomialFeatures`, +# - the impact of the parameter `C` of the `LogisticRegression`. +# +# We can plot the full grid of hyperparameters to see the effect on the decision +# boundary and the calibration curve. -# %% +## %% from sklearn.model_selection import ParameterGrid param_grid = list( @@ -231,23 +244,24 @@ def xor_generator(n_samples=1_000, seed=None): ) ) -boundary_figure, boundary_axes = plt.subplots( - nrows=5, ncols=6, figsize=(40, 35), sharex=True, sharey=True -) -calibration_figure, calibration_axes = plt.subplots( - nrows=5, ncols=6, figsize=(40, 35), sharex=True, sharey=True -) +fig_params = { + "nrows": 5, + "ncols": 6, + "figsize": (40, 35), + "sharex": True, + "sharey": True, +} +boundary_figure, boundary_axes = plt.subplots(**fig_params) +calibration_figure, calibration_axes = plt.subplots(**fig_params) for idx, (model_params, ax_boundary, ax_calibration) in enumerate( zip(param_grid, boundary_axes.ravel(), calibration_axes.ravel()) ): model.set_params(**model_params).fit(X_train, y_train) # Create a title - title = ( - f"Number of knots: {model_params['splinetransformer__n_knots']},\n" - f"With interaction: {model_params['polynomialfeatures'] is not None},\n" - f"Regularization 'C': {model_params['logisticregression__C']}" - ) + title = f"{model_params['splinetransformer__n_knots']} knots" + title += " with " if model_params["polynomialfeatures"] else " without " + title += "interaction terms" # Display the results disp = DecisionBoundaryDisplay.from_estimator( model, X_test, ax=ax_boundary, **params @@ -272,6 +286,45 @@ def xor_generator(n_samples=1_000, seed=None): ) ax_calibration.set(aspect="equal", title=title) + if idx % fig_params["ncols"] == 0: + for ax in (ax_boundary, ax_calibration): + ylabel = f"Regularization 'C': {model_params['logisticregression__C']}" + ylabel += f"\n\n\n{ax.get_ylabel()}" if ax.get_ylabel() else "" + ax.set(ylabel=ylabel) + +# %% [markdown] +# +# An obvious observation is that without explicitly creating the interaction terms, +# our model is mis-specified and the model cannot capture the non-linear relationship, +# whatever the other hyperparameters values. +# +# A larger number of knots in the spline transformation increases the flexibility of the +# decision boundary since it can vary at more locations into the feature space. +# Therefore, if we use a too large number of knots, then the model is able isolate noisy +# samples in this feature space, depending of the subsequent regularization parameter +# `C`. +# +# Indeed, the parameter `C` controls the loss function that is minimized during the +# training: a small value of `C` enforces to minimize the norm of the model coefficients +# and thus discard, more or less, the training error (i.e. the mean squared error). A +# large value of `C` enforces to prioritize minimizing the training error without +# constraining, more or less, the norm of the coefficients. +# +# Understanding the previous principles, it allows us to understand that we have an +# interaction between the number of knots and the regularization parameter `C`. Since a +# model with a larger number of knots is more flexible and thus more prone to +# overfitting, the value of the parameter `C` should be smaller (i.e. more +# regularization) than a model with a smaller number of knots. +# +# For instance, setting `C=100` with `n_knots=5` leads to a model with a similar +# calibration curve as setting `C=10` with `n_knots=15`. + +# %% [markdown] +# +# ### Hyperparameter tuning while considering calibration +# +# TODO: Add a section on how to tune the hyperparameters using a proper scoring rule. + # %% # # ## Effect of resampling on model calibration From ec1978053f27f64c26ad86cf800dcfcceb78c89b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 20 Aug 2024 21:12:27 +0200 Subject: [PATCH 11/28] iter --- content/python_files/causes_miscalibration.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index d84cd7f..07451db 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -319,6 +319,67 @@ def xor_generator(n_samples=1_000, seed=None): # For instance, setting `C=100` with `n_knots=5` leads to a model with a similar # calibration curve as setting `C=10` with `n_knots=15`. +# %% [markdown] +# +# TODO: add discussion regarding the HGBDT model. + +# %% +from sklearn.ensemble import HistGradientBoostingClassifier + +model = HistGradientBoostingClassifier() + +param_grid = list( + ParameterGrid( + {"max_leaf_nodes": [5, 10, 30, 50], "learning_rate": [0.01, 0.1, 1]} + ) +) + +fig_params = { + "nrows": 3, + "ncols": 4, + "figsize": (20, 16), + "sharex": True, + "sharey": True, +} +boundary_figure, boundary_axes = plt.subplots(**fig_params) +calibration_figure, calibration_axes = plt.subplots(**fig_params) + +for idx, (model_params, ax_boundary, ax_calibration) in enumerate( + zip(param_grid, boundary_axes.ravel(), calibration_axes.ravel()) +): + model.set_params(**model_params).fit(X_train, y_train) + # Create a title + title = f"Maximum number of leaf nodes: {model_params['max_leaf_nodes']}" + # Display the results + disp = DecisionBoundaryDisplay.from_estimator( + model, X_test, ax=ax_boundary, **params + ) + ax_boundary.scatter( + *X_train.T, c=y_train, cmap=params["cmap"], edgecolor="black", alpha=0.5 + ) + ax_boundary.set( + xlim=(-3, 3), + ylim=(-3, 3), + aspect="equal", + title=title, + ) + + CalibrationDisplay.from_estimator( + model, + X_test, + y_test, + strategy="quantile", + n_bins=10, + ax=ax_calibration, + ) + ax_calibration.set(aspect="equal", title=title) + + if idx % fig_params["ncols"] == 0: + for ax in (ax_boundary, ax_calibration): + ylabel = f"Learning rate: {model_params['learning_rate']}" + ylabel += f"\n\n\n{ax.get_ylabel()}" if ax.get_ylabel() else "" + ax.set(ylabel=ylabel) + # %% [markdown] # # ### Hyperparameter tuning while considering calibration From a2dd59197d0137a53cc696bef30f682b2c3bf7ed Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 21 Aug 2024 09:15:37 +0200 Subject: [PATCH 12/28] iter --- content/python_files/causes_miscalibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index 07451db..a5d90a8 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -231,7 +231,7 @@ def xor_generator(n_samples=1_000, seed=None): # We can plot the full grid of hyperparameters to see the effect on the decision # boundary and the calibration curve. -## %% +# %% from sklearn.model_selection import ParameterGrid param_grid = list( From b2f04b84f5afcedeea4edcc1c56d5bca8f4a35b9 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 21 Aug 2024 10:08:39 +0200 Subject: [PATCH 13/28] iter --- content/python_files/causes_miscalibration.py | 92 ++++++++++++++++++- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index a5d90a8..2e19259 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -321,7 +321,14 @@ def xor_generator(n_samples=1_000, seed=None): # %% [markdown] # -# TODO: add discussion regarding the HGBDT model. +# ### Is it true for other models? +# +# In this section, we want to show that the previous findings are not specific to the +# a linear model that relies on a pre-processing step. Here, we use a gradient-boosting +# model that naturally captures non-linear relationships of the XOR problem. +# +# We check that the calibration of the model by changing the hyperparameters +# `max_leaf_nodes` and `learning_rate` that are known to impact the model complexity. # %% from sklearn.ensemble import HistGradientBoostingClassifier @@ -330,14 +337,14 @@ def xor_generator(n_samples=1_000, seed=None): param_grid = list( ParameterGrid( - {"max_leaf_nodes": [5, 10, 30, 50], "learning_rate": [0.01, 0.1, 1]} + {"max_leaf_nodes": [5, 10, 30], "learning_rate": [0.01, 0.1, 1]} ) ) fig_params = { "nrows": 3, - "ncols": 4, - "figsize": (20, 16), + "ncols": 3, + "figsize": (16, 16), "sharex": True, "sharey": True, } @@ -380,11 +387,86 @@ def xor_generator(n_samples=1_000, seed=None): ylabel += f"\n\n\n{ax.get_ylabel()}" if ax.get_ylabel() else "" ax.set(ylabel=ylabel) +# %% [markdown] +# +# From the boundary decision plots, we observe that the model, whatever the +# hyperparameters, is capable of capturing the link between the features and the target. +# However, if we look at the probability estimates, we still observe the same effect of +# under-fitting and over-fitting as for the logistic regression model. It also means +# that tuning the parameter `max_leaf_nodes` on this specific dataset is not worth it +# since for a single decision tree, the perfect decision boundary is achieved with +# 4 leaf nodes. +# +# However, the learning rate is the parameter that controls the model to under-fit or +# over-fit. A too low learning rate leads to an under-fitting model and the model is +# under-confident with probability estimates that are too low. On the other hand, a too +# high learning rate leads to an over-fitting model and the model is over-confident with +# probability estimates that are too high. + # %% [markdown] # # ### Hyperparameter tuning while considering calibration # -# TODO: Add a section on how to tune the hyperparameters using a proper scoring rule. +# From the previous sections, we saw that the hyperparameters of a model while impacting +# its complexity also impact its calibration. It therefore becomes crucial to tune the +# hyperparameters of a model while considering if its calibration. While scikit-learn +# offers tools to tune hyperparameters such as `GridSearchCV` or `RandomizedSearchCV`, +# there is a caveat: the default metric used to select the best model is not necessarily +# the one leading to a well-calibrated model. +# +# To illustrate this point, we use the previous logistic regression model with the +# preprocessing step. From the previous experiment, we draw the conclusion that we +# need to have some regularization to avoid overfitting induced by the number of knots. +# Therefore, we plot the validate curve for different values of the regularization +# parameter `C`. In addition, since we want to see the impact of the metric used to +# tuned the hyperparameters, we plot different validation curves for different metrics: +# - the negative log-likelihood that is a proper scoring rule, +# - the ROC AUC that is a ranking metric, +# - the accuracy that is a thresholded metric. + +# %% +from sklearn.model_selection import ShuffleSplit, ValidationCurveDisplay + +model = make_pipeline( + SplineTransformer(n_knots=15), + PolynomialFeatures(interaction_only=True), + LogisticRegression(max_iter=10_000), +) + +_, axes = plt.subplots(ncols=3, figsize=(15, 5)) +for metric_name, ax in zip(["neg_log_loss", "roc_auc", "accuracy"], axes): + disp = ValidationCurveDisplay.from_estimator( + model, + X_train, + y_train, + param_name="logisticregression__C", + param_range=np.logspace(-6, 6, 25), + scoring=metric_name, + ax=ax, + cv=ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) + ) + ax.set( + xlabel="Regularization C", + xscale="log", + title=f"Validation curve on {disp.ax_.get_ylabel()}", + ) + +# %% [markdown] +# +# From the previous plots, there are three important observations. +# +# First, the proper scoring rule (i.e. the negative log-likelihood) depicts a more +# distinct bump in comparison to the ranking metric (i.e. the ROC AUC) and the +# thresholded metric (i.e. the accuracy). The bump is still present for the ROC AUC but +# it is less pronounced. The accuracy does not show any bump. +# +# Then, the proper scoring rule is the only one showing a significant decrease when +# the regularization is too low. The intuition is that the model becomes over-confident +# and thus not well-calibrated while the hard predictions will not be impacted. +# +# Lastly, the proper scoring rule is the metric showing the least variance across the +# different splits near of the optimal value. It therefore makes it a more robust metric +# to select the best model. # %% # From 0f393f340b7122eb53d96adf20eb85412d1ad7e1 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 21 Aug 2024 10:22:14 +0200 Subject: [PATCH 14/28] random search --- content/python_files/causes_miscalibration.py | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index 2e19259..6a6f019 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -467,6 +467,66 @@ def xor_generator(n_samples=1_000, seed=None): # Lastly, the proper scoring rule is the metric showing the least variance across the # different splits near of the optimal value. It therefore makes it a more robust metric # to select the best model. +# +# We therefore recommend to always use a proper scoring rule when tuning the +# hyperparemeters. Below, we show the methodology to pursue when using a proper scoring +# together with a `RandomizedSearchCV`. We therefore needs to set specifically +# `scoring` to `neg_log_loss` in the `RandomizedSearchCV`. + +# %% +from scipy.stats import loguniform +from sklearn.model_selection import RandomizedSearchCV + +param_distributions = { + "splinetransformer__n_knots": [5, 10, 15], + "logisticregression__C": loguniform(1e-6, 1e6), +} + +tuned_model = RandomizedSearchCV( + model, + param_distributions=param_distributions, + n_iter=50, + scoring="neg_log_loss", + cv=ShuffleSplit(n_splits=10, test_size=0.2, random_state=0), + random_state=0, +) +tuned_model.fit(X_train, y_train) + +# %% [markdown] +# +# Now that we train the model, we check if it is well-calibrated on the left-out +# test set. + +# %% +fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 5)) + +disp = DecisionBoundaryDisplay.from_estimator(tuned_model, X_test, ax=ax[0], **params) +ax[0].scatter( + *X_train.T, c=y_train, cmap=params["cmap"], edgecolors="black", alpha=0.5 +) + +_ = ax[0].set( + xlim=(-3, 3), + ylim=(-3, 3), + xlabel="Feature 1", + ylabel="Feature 2", + aspect="equal", +) + +CalibrationDisplay.from_estimator( + tuned_model, + X_test, + y_test, + strategy="quantile", + n_bins=10, + ax=ax[1], +) +_ = ax[1].set(aspect="equal") + +fig.suptitle( + f"Number of knots: {tuned_model.best_params_['splinetransformer__n_knots']}, " + f"Regularization 'C': {tuned_model.best_params_['logisticregression__C']}" +) # %% # From d4df538f9cf8e73f49953e06364d0127e0eacc8a Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 21 Aug 2024 10:23:57 +0200 Subject: [PATCH 15/28] stop using future tense --- content/python_files/causes_miscalibration.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index 6a6f019..c472721 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -46,7 +46,7 @@ def xor_generator(n_samples=1_000, seed=None): # %% [markdown] # # The XOR problem exhibits a non-linear decision link between the features and the -# the target variable. Therefore, a linear model will not be able to separate the +# the target variable. Therefore, a linear model is not be able to separate the # classes correctly. Let's confirm this intuition by fitting a logistic regression # model to such dataset. @@ -58,7 +58,7 @@ def xor_generator(n_samples=1_000, seed=None): # %% [markdown] # -# To check the decision boundary of the model, we will use an independent test set. +# To check the decision boundary of the model, we use an independent test set. # %% from sklearn.inspection import DecisionBoundaryDisplay @@ -462,7 +462,7 @@ def xor_generator(n_samples=1_000, seed=None): # # Then, the proper scoring rule is the only one showing a significant decrease when # the regularization is too low. The intuition is that the model becomes over-confident -# and thus not well-calibrated while the hard predictions will not be impacted. +# and thus not well-calibrated while the hard predictions are not be impacted. # # Lastly, the proper scoring rule is the metric showing the least variance across the # different splits near of the optimal value. It therefore makes it a more robust metric From 2785343de4dab2e98bf4853bad9795f118dac9d7 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 21 Aug 2024 12:23:34 +0200 Subject: [PATCH 16/28] add first discussion about resampling --- content/python_files/causes_miscalibration.py | 223 +++++++++++++++--- 1 file changed, 185 insertions(+), 38 deletions(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index c472721..4a862f7 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -183,7 +183,7 @@ def xor_generator(n_samples=1_000, seed=None): *X_train.T, c=y_train, cmap=params["cmap"], edgecolors="black", alpha=0.5 ) - _ = ax[0].set( + ax[0].set( xlim=(-3, 3), ylim=(-3, 3), xlabel="Feature 1", @@ -199,7 +199,7 @@ def xor_generator(n_samples=1_000, seed=None): n_bins=10, ax=ax[1], ) - _ = ax[1].set(aspect="equal") + ax[1].set(aspect="equal") fig.suptitle( f"Number of knots: {model_params['splinetransformer__n_knots']}, " @@ -336,9 +336,7 @@ def xor_generator(n_samples=1_000, seed=None): model = HistGradientBoostingClassifier() param_grid = list( - ParameterGrid( - {"max_leaf_nodes": [5, 10, 30], "learning_rate": [0.01, 0.1, 1]} - ) + ParameterGrid({"max_leaf_nodes": [5, 10, 30], "learning_rate": [0.01, 0.1, 1]}) ) fig_params = { @@ -443,7 +441,7 @@ def xor_generator(n_samples=1_000, seed=None): param_range=np.logspace(-6, 6, 25), scoring=metric_name, ax=ax, - cv=ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) + cv=ShuffleSplit(n_splits=10, test_size=0.2, random_state=0), ) ax.set( xlabel="Regularization C", @@ -501,9 +499,7 @@ def xor_generator(n_samples=1_000, seed=None): fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 5)) disp = DecisionBoundaryDisplay.from_estimator(tuned_model, X_test, ax=ax[0], **params) -ax[0].scatter( - *X_train.T, c=y_train, cmap=params["cmap"], edgecolors="black", alpha=0.5 -) +ax[0].scatter(*X_train.T, c=y_train, cmap=params["cmap"], edgecolors="black", alpha=0.5) _ = ax[0].set( xlim=(-3, 3), @@ -523,89 +519,240 @@ def xor_generator(n_samples=1_000, seed=None): ) _ = ax[1].set(aspect="equal") -fig.suptitle( +_ = fig.suptitle( f"Number of knots: {tuned_model.best_params_['splinetransformer__n_knots']}, " f"Regularization 'C': {tuned_model.best_params_['logisticregression__C']}" ) -# %% +# %% [markdown] # # ## Effect of resampling on model calibration +# +# Another cause for model miscalibration is related to training set resampling. In +# general, resampling is encountered when dealing with imbalanced datasets. In this +# section, we show the effect of resampling on model calibration and the methodology +# to use when it comes to imbalanced datasets. +# +# Let's synthetically generate an imbalanced dataset with 90% of the samples belonging +# to the majority class and 10% to the minority class. # %% from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split X, y = make_classification( - n_samples=20_000, + n_samples=50_000, n_features=2, n_redundant=0, - weights=[0.9, 0.1], - class_sep=1, + n_clusters_per_class=1, + weights=[0.99, 0.01], + class_sep=2, random_state=1, ) -X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0) +X_train, X_test, y_train, y_test = train_test_split( + X, y, stratify=y, test_size=0.9, random_state=0 +) -# %% -model = LogisticRegression() -model.fit(X_train, y_train) +# %% [markdown] +# +# As a model, we use a logistic regression model and check the classification report. # %% from sklearn.metrics import classification_report -print(classification_report(y_test, model.predict(X_test))) +logistic_regression = LogisticRegression().fit(X_train, y_train) +print(classification_report(y_test, logistic_regression.predict(X_test))) + +# %% [markdown] +# +# When it comes to imbalanced datasets, in general, data scientists tend to be +# unhappy with one of the statistical metrics used. Here, they might be unhappy with +# the recall metric that is too low for their taste. +# +# Let's check what would be the related decision boundary of our model. # %% _, ax = plt.subplots() DecisionBoundaryDisplay.from_estimator( - model, + logistic_regression, X_test, ax=ax, cmap="coolwarm", response_method="predict", - alpha=0.5, + alpha=0.8, ) -ax.scatter(*X_test.T, c=y_test, cmap="coolwarm", alpha=0.5) -ax.set(xlabel="Feature 1", ylabel="Feature 2") +ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") +_ = ax.set(xlabel="Feature 1", ylabel="Feature 2") -# %% -model.set_params(class_weight="balanced").fit(X_train, y_train) +# %% [markdown] +# +# So we see that our model is conservative by wrongly classifying sample from the +# majority class. However, if our data scientists want to improve the recall, they +# would like to move the decision boundary to classify correctly more samples from the +# minority class at the cost of misclassifying more samples from the majority class. +# +# A body of literature is usually advocating for resampling the training set such that +# the model is trained on a more balanced dataset. In scikit-learn, the effect of the +# parameter `class_weight` provide an equivalence to resampling the training set when +# set to `"balanced"`. +# +# We therefore repeat the previous experiment but setting this parameter and check the +# effect on the classification report and the decision boundary. # %% -from sklearn.metrics import classification_report - -print(classification_report(y_test, model.predict(X_test))) +logistic_regression_balanced = LogisticRegression(class_weight="balanced") +logistic_regression_balanced.fit(X_train, y_train) +print(classification_report(y_test, logistic_regression_balanced.predict(X_test))) # %% _, ax = plt.subplots() DecisionBoundaryDisplay.from_estimator( - model, + logistic_regression_balanced, X_test, ax=ax, cmap="coolwarm", response_method="predict", - alpha=0.5, + alpha=0.8, ) -ax.scatter(*X_test.T, c=y_test, cmap="coolwarm", alpha=0.5) -ax.set(xlabel="Feature 1", ylabel="Feature 2") +ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") +_ = ax.set(xlabel="Feature 1", ylabel="Feature 2") + +# %% [markdown] +# +# So we see that the recall increases at the cost of lowering the precision. This +# is confirmed by the decision boundary displacement. +# +# However, here we completely discard the potential effect on the calibration of the +# model. Instead to check the hard decision boundary, let's check the decision boundary +# based on the probability estimates. # %% -model_vanilla = LogisticRegression().fit(X_train, y_train) -model_reweighted = LogisticRegression(class_weight="balanced").fit(X_train, y_train) +fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5), sharex=True, sharey=True) +for ax, model in zip(axes.ravel(), [logistic_regression, logistic_regression_balanced]): + disp = DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict_proba", + alpha=0.8, + ) + ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") + ax.set(xlabel="Feature 1", ylabel="Feature 2") + fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") + +# %% [markdown] +# +# We see that the two models have a very different probability estimates. We should +# therefore check the calibration of the two models to check if one model is better +# calibrated than the other. # %% disp = CalibrationDisplay.from_estimator( - model_vanilla, X_test, y_test, strategy="quantile" + logistic_regression, X_test, y_test, strategy="quantile", name="Unbalanced LR" ) CalibrationDisplay.from_estimator( - model_reweighted, X_test, y_test, strategy="quantile", ax=disp.ax_ + logistic_regression_balanced, + X_test, + y_test, + strategy="quantile", + ax=disp.ax_, + name="Balanced LR", ) +disp.ax_.set(aspect="equal") +_ = disp.ax_.legend(loc="upper left") + +# %% [markdown] +# +# We clearly see that the balanced logistic regression model is completely +# miscalibrated. In short, this is the effect of resampling. We could have a look at the +# ROC curves of the two models to check if the predictions ranking changed. # %% from sklearn.metrics import RocCurveDisplay fig, ax = plt.subplots() -roc_display = RocCurveDisplay.from_estimator(model_vanilla, X_test, y_test, ax=ax) -roc_display.plot(ax=roc_display.ax_) +RocCurveDisplay.from_estimator( + logistic_regression, X_test, y_test, ax=ax, linestyle="-.", name="Unbalanced LR" +) +RocCurveDisplay.from_estimator( + logistic_regression_balanced, + X_test, + y_test, + ax=ax, + linestyle="--", + name="Balanced LR", +) + +# %% [markdown] +# +# We see that the two models have the same ROC curve. So it means, that the ranking of +# the predictions is the same. +# +# As a conclusion, we should not use resampling to deal with imbalanced datasets. +# Instead, if we are interesting in improving a given metric, we should instead +# tune the threshold that is set to 0.5 by default to transform the probability +# estimates into hard predictions. It will have the same effect as "moving" the +# decision boundary but it will not impact the calibration of the model. We will go +# in further details in this topic in the next section. But we can quickly experiment +# with the `FixedThresholdClassifier` from scikit-learn that allows to set a threshold +# to transform the probability estimates into hard predictions. + +# %% +from sklearn.model_selection import FixedThresholdClassifier + +threshold = 0.1 +logistic_regrssion_with_threshold = FixedThresholdClassifier( + logistic_regression, threshold=threshold +).fit(X_train, y_train) + +# %% +fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5), sharex=True, sharey=True) +for ax, model, title in zip( + axes.ravel(), + [logistic_regression, logistic_regrssion_with_threshold], + ["Threshold 0.5 (default)", f"Threshold {threshold}"], +): + disp = DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict", + alpha=0.8, + ) + ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") + ax.set(xlabel="Feature 1", ylabel="Feature 2", title=title) + +# %% [markdown] +# +# We see that the decision boundary similarly to the balanced logistic regression model. +# In addition, since we have a parameter to tune, we can easily target a certain score +# for some targetted metric that is not trivial with resampling. +# +# We can go further and check that the two models that we have are both calibrated the +# same way. # %% +fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5), sharex=True, sharey=True) +for ax, model, title in zip( + axes.ravel(), + [logistic_regression, logistic_regrssion_with_threshold], + ["Threshold 0.5 (default)", f"Threshold {threshold}"], +): + disp = DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict_proba", + alpha=0.8, + ) + ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") + ax.set(xlabel="Feature 1", ylabel="Feature 2", title=title) + fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") + +# %% [markdown] +# +# This is not a surprise since the thresholding is a post-processing that threshold the +# probability estimates. Therefore, it does not impact the calibration of the model. From b79c9b4289e48ca27b2b02334bf81da1c1ec8382 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 21 Aug 2024 15:50:04 +0200 Subject: [PATCH 17/28] iter --- content/python_files/causes_miscalibration.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index 4a862f7..10368ce 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -11,6 +11,11 @@ # The function `xor_generator` generates a dataset with two features and the target # variable following the XOR logic. We add some noise to the generative process. +# %% +# Make sure to have scikit-learn >= 1.5 +import sklearn +sklearn.__version__ + # %% import numpy as np From 64761ef9df05981cadcdaae7cb0991371eee79d0 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 11:29:22 +0200 Subject: [PATCH 18/28] iter --- content/python_files/causes_miscalibration.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/causes_miscalibration.py index 10368ce..c3d9bea 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/causes_miscalibration.py @@ -584,7 +584,7 @@ def xor_generator(n_samples=1_000, seed=None): ax=ax, cmap="coolwarm", response_method="predict", - alpha=0.8, + plot_method="contour" ) ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") _ = ax.set(xlabel="Feature 1", ylabel="Feature 2") @@ -617,7 +617,7 @@ def xor_generator(n_samples=1_000, seed=None): ax=ax, cmap="coolwarm", response_method="predict", - alpha=0.8, + plot_method="contour", ) ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") _ = ax.set(xlabel="Feature 1", ylabel="Feature 2") @@ -724,7 +724,7 @@ def xor_generator(n_samples=1_000, seed=None): ax=ax, cmap="coolwarm", response_method="predict", - alpha=0.8, + plot_method="contour", ) ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") ax.set(xlabel="Feature 1", ylabel="Feature 2", title=title) @@ -753,6 +753,14 @@ def xor_generator(n_samples=1_000, seed=None): response_method="predict_proba", alpha=0.8, ) + DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict", + plot_method="contour", + ) ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") ax.set(xlabel="Feature 1", ylabel="Feature 2", title=title) fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") From 277f30453504f94b38f60024b6fd4d3476ba1885 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 12:14:49 +0200 Subject: [PATCH 19/28] split --- .github/workflows/testing.yml | 3 +- .../miscalibration_reweighting.py | 251 ++++++++++++++++++ ...on.py => miscalibration_under_over_fit.py} | 243 +---------------- pixi.toml | 3 +- 4 files changed, 256 insertions(+), 244 deletions(-) create mode 100644 content/python_files/miscalibration_reweighting.py rename content/python_files/{causes_miscalibration.py => miscalibration_under_over_fit.py} (69%) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 48381df..4fe9593 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -28,7 +28,8 @@ jobs: pixi run check-generated-predictions pixi run build-calibration-curve pixi run different-calibration-curves - pixi run causes-miscalibration + pixi run miscalibration-under-over-fit + pixi run miscalibration-reweighting - name: Test if we can build the documentation if: matrix.os == 'ubuntu-latest' diff --git a/content/python_files/miscalibration_reweighting.py b/content/python_files/miscalibration_reweighting.py new file mode 100644 index 0000000..e3dd91b --- /dev/null +++ b/content/python_files/miscalibration_reweighting.py @@ -0,0 +1,251 @@ +# %% [markdown] +# +# ## Effect of resampling on model calibration +# +# Another cause for model miscalibration is related to training set resampling. In +# general, resampling is encountered when dealing with imbalanced datasets. In this +# section, we show the effect of resampling on model calibration and the methodology +# to use when it comes to imbalanced datasets. +# +# Let's synthetically generate an imbalanced dataset with 90% of the samples belonging +# to the majority class and 10% to the minority class. + +# %% +# Make sure to have scikit-learn >= 1.5 +import sklearn +sklearn.__version__ + +# %% +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split + +X, y = make_classification( + n_samples=50_000, + n_features=2, + n_redundant=0, + n_clusters_per_class=1, + weights=[0.99, 0.01], + class_sep=2, + random_state=1, +) +X_train, X_test, y_train, y_test = train_test_split( + X, y, stratify=y, test_size=0.9, random_state=0 +) + +# %% [markdown] +# +# As a model, we use a logistic regression model and check the classification report. + +# %% +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import classification_report + +logistic_regression = LogisticRegression().fit(X_train, y_train) +print(classification_report(y_test, logistic_regression.predict(X_test))) + +# %% [markdown] +# +# When it comes to imbalanced datasets, in general, data scientists tend to be +# unhappy with one of the statistical metrics used. Here, they might be unhappy with +# the recall metric that is too low for their taste. +# +# Let's check what would be the related decision boundary of our model. + +# %% +import matplotlib.pyplot as plt +from sklearn.inspection import DecisionBoundaryDisplay + +_, ax = plt.subplots() +DecisionBoundaryDisplay.from_estimator( + logistic_regression, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict", + plot_method="contour" +) +ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") +_ = ax.set(xlabel="Feature 1", ylabel="Feature 2") + +# %% [markdown] +# +# So we see that our model is conservative by wrongly classifying sample from the +# majority class. However, if our data scientists want to improve the recall, they +# would like to move the decision boundary to classify correctly more samples from the +# minority class at the cost of misclassifying more samples from the majority class. +# +# A body of literature is usually advocating for resampling the training set such that +# the model is trained on a more balanced dataset. In scikit-learn, the effect of the +# parameter `class_weight` provide an equivalence to resampling the training set when +# set to `"balanced"`. +# +# We therefore repeat the previous experiment but setting this parameter and check the +# effect on the classification report and the decision boundary. + +# %% +logistic_regression_balanced = LogisticRegression(class_weight="balanced") +logistic_regression_balanced.fit(X_train, y_train) +print(classification_report(y_test, logistic_regression_balanced.predict(X_test))) + +# %% +_, ax = plt.subplots() +DecisionBoundaryDisplay.from_estimator( + logistic_regression_balanced, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict", + plot_method="contour", +) +ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") +_ = ax.set(xlabel="Feature 1", ylabel="Feature 2") + +# %% [markdown] +# +# So we see that the recall increases at the cost of lowering the precision. This +# is confirmed by the decision boundary displacement. +# +# However, here we completely discard the potential effect on the calibration of the +# model. Instead to check the hard decision boundary, let's check the decision boundary +# based on the probability estimates. + +# %% +fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5), sharex=True, sharey=True) +for ax, model in zip(axes.ravel(), [logistic_regression, logistic_regression_balanced]): + disp = DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict_proba", + alpha=0.8, + ) + ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") + ax.set(xlabel="Feature 1", ylabel="Feature 2") + fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") + +# %% [markdown] +# +# We see that the two models have a very different probability estimates. We should +# therefore check the calibration of the two models to check if one model is better +# calibrated than the other. + +# %% +from sklearn.calibration import CalibrationDisplay + +disp = CalibrationDisplay.from_estimator( + logistic_regression, X_test, y_test, strategy="quantile", name="Unbalanced LR" +) +CalibrationDisplay.from_estimator( + logistic_regression_balanced, + X_test, + y_test, + strategy="quantile", + ax=disp.ax_, + name="Balanced LR", +) +disp.ax_.set(aspect="equal") +_ = disp.ax_.legend(loc="upper left") + +# %% [markdown] +# +# We clearly see that the balanced logistic regression model is completely +# miscalibrated. In short, this is the effect of resampling. We could have a look at the +# ROC curves of the two models to check if the predictions ranking changed. + +# %% +from sklearn.metrics import RocCurveDisplay + +fig, ax = plt.subplots() +RocCurveDisplay.from_estimator( + logistic_regression, X_test, y_test, ax=ax, linestyle="-.", name="Unbalanced LR" +) +RocCurveDisplay.from_estimator( + logistic_regression_balanced, + X_test, + y_test, + ax=ax, + linestyle="--", + name="Balanced LR", +) + +# %% [markdown] +# +# We see that the two models have the same ROC curve. So it means, that the ranking of +# the predictions is the same. +# +# As a conclusion, we should not use resampling to deal with imbalanced datasets. +# Instead, if we are interesting in improving a given metric, we should instead +# tune the threshold that is set to 0.5 by default to transform the probability +# estimates into hard predictions. It will have the same effect as "moving" the +# decision boundary but it will not impact the calibration of the model. We will go +# in further details in this topic in the next section. But we can quickly experiment +# with the `FixedThresholdClassifier` from scikit-learn that allows to set a threshold +# to transform the probability estimates into hard predictions. + +# %% +from sklearn.model_selection import FixedThresholdClassifier + +threshold = 0.1 +logistic_regrssion_with_threshold = FixedThresholdClassifier( + logistic_regression, threshold=threshold +).fit(X_train, y_train) + +# %% +fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5), sharex=True, sharey=True) +for ax, model, title in zip( + axes.ravel(), + [logistic_regression, logistic_regrssion_with_threshold], + ["Threshold 0.5 (default)", f"Threshold {threshold}"], +): + disp = DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict", + plot_method="contour", + ) + ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") + ax.set(xlabel="Feature 1", ylabel="Feature 2", title=title) + +# %% [markdown] +# +# We see that the decision boundary similarly to the balanced logistic regression model. +# In addition, since we have a parameter to tune, we can easily target a certain score +# for some targeted metric that is not trivial with resampling. +# +# We can go further and check that the two models that we have are both calibrated the +# same way. + +# %% +fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5), sharex=True, sharey=True) +for ax, model, title in zip( + axes.ravel(), + [logistic_regression, logistic_regrssion_with_threshold], + ["Threshold 0.5 (default)", f"Threshold {threshold}"], +): + disp = DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict_proba", + alpha=0.8, + ) + DecisionBoundaryDisplay.from_estimator( + model, + X_test, + ax=ax, + cmap="coolwarm", + response_method="predict", + plot_method="contour", + ) + ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") + ax.set(xlabel="Feature 1", ylabel="Feature 2", title=title) + fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") + +# %% [markdown] +# +# This is not a surprise since the thresholding is a post-processing that threshold the +# probability estimates. Therefore, it does not impact the calibration of the model. diff --git a/content/python_files/causes_miscalibration.py b/content/python_files/miscalibration_under_over_fit.py similarity index 69% rename from content/python_files/causes_miscalibration.py rename to content/python_files/miscalibration_under_over_fit.py index c3d9bea..1141991 100644 --- a/content/python_files/causes_miscalibration.py +++ b/content/python_files/miscalibration_under_over_fit.py @@ -527,245 +527,4 @@ def xor_generator(n_samples=1_000, seed=None): _ = fig.suptitle( f"Number of knots: {tuned_model.best_params_['splinetransformer__n_knots']}, " f"Regularization 'C': {tuned_model.best_params_['logisticregression__C']}" -) - -# %% [markdown] -# -# ## Effect of resampling on model calibration -# -# Another cause for model miscalibration is related to training set resampling. In -# general, resampling is encountered when dealing with imbalanced datasets. In this -# section, we show the effect of resampling on model calibration and the methodology -# to use when it comes to imbalanced datasets. -# -# Let's synthetically generate an imbalanced dataset with 90% of the samples belonging -# to the majority class and 10% to the minority class. - -# %% -from sklearn.datasets import make_classification -from sklearn.model_selection import train_test_split - -X, y = make_classification( - n_samples=50_000, - n_features=2, - n_redundant=0, - n_clusters_per_class=1, - weights=[0.99, 0.01], - class_sep=2, - random_state=1, -) -X_train, X_test, y_train, y_test = train_test_split( - X, y, stratify=y, test_size=0.9, random_state=0 -) - -# %% [markdown] -# -# As a model, we use a logistic regression model and check the classification report. - -# %% -from sklearn.metrics import classification_report - -logistic_regression = LogisticRegression().fit(X_train, y_train) -print(classification_report(y_test, logistic_regression.predict(X_test))) - -# %% [markdown] -# -# When it comes to imbalanced datasets, in general, data scientists tend to be -# unhappy with one of the statistical metrics used. Here, they might be unhappy with -# the recall metric that is too low for their taste. -# -# Let's check what would be the related decision boundary of our model. - -# %% -_, ax = plt.subplots() -DecisionBoundaryDisplay.from_estimator( - logistic_regression, - X_test, - ax=ax, - cmap="coolwarm", - response_method="predict", - plot_method="contour" -) -ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") -_ = ax.set(xlabel="Feature 1", ylabel="Feature 2") - -# %% [markdown] -# -# So we see that our model is conservative by wrongly classifying sample from the -# majority class. However, if our data scientists want to improve the recall, they -# would like to move the decision boundary to classify correctly more samples from the -# minority class at the cost of misclassifying more samples from the majority class. -# -# A body of literature is usually advocating for resampling the training set such that -# the model is trained on a more balanced dataset. In scikit-learn, the effect of the -# parameter `class_weight` provide an equivalence to resampling the training set when -# set to `"balanced"`. -# -# We therefore repeat the previous experiment but setting this parameter and check the -# effect on the classification report and the decision boundary. - -# %% -logistic_regression_balanced = LogisticRegression(class_weight="balanced") -logistic_regression_balanced.fit(X_train, y_train) -print(classification_report(y_test, logistic_regression_balanced.predict(X_test))) - -# %% -_, ax = plt.subplots() -DecisionBoundaryDisplay.from_estimator( - logistic_regression_balanced, - X_test, - ax=ax, - cmap="coolwarm", - response_method="predict", - plot_method="contour", -) -ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") -_ = ax.set(xlabel="Feature 1", ylabel="Feature 2") - -# %% [markdown] -# -# So we see that the recall increases at the cost of lowering the precision. This -# is confirmed by the decision boundary displacement. -# -# However, here we completely discard the potential effect on the calibration of the -# model. Instead to check the hard decision boundary, let's check the decision boundary -# based on the probability estimates. - -# %% -fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5), sharex=True, sharey=True) -for ax, model in zip(axes.ravel(), [logistic_regression, logistic_regression_balanced]): - disp = DecisionBoundaryDisplay.from_estimator( - model, - X_test, - ax=ax, - cmap="coolwarm", - response_method="predict_proba", - alpha=0.8, - ) - ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") - ax.set(xlabel="Feature 1", ylabel="Feature 2") - fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") - -# %% [markdown] -# -# We see that the two models have a very different probability estimates. We should -# therefore check the calibration of the two models to check if one model is better -# calibrated than the other. - -# %% -disp = CalibrationDisplay.from_estimator( - logistic_regression, X_test, y_test, strategy="quantile", name="Unbalanced LR" -) -CalibrationDisplay.from_estimator( - logistic_regression_balanced, - X_test, - y_test, - strategy="quantile", - ax=disp.ax_, - name="Balanced LR", -) -disp.ax_.set(aspect="equal") -_ = disp.ax_.legend(loc="upper left") - -# %% [markdown] -# -# We clearly see that the balanced logistic regression model is completely -# miscalibrated. In short, this is the effect of resampling. We could have a look at the -# ROC curves of the two models to check if the predictions ranking changed. - -# %% -from sklearn.metrics import RocCurveDisplay - -fig, ax = plt.subplots() -RocCurveDisplay.from_estimator( - logistic_regression, X_test, y_test, ax=ax, linestyle="-.", name="Unbalanced LR" -) -RocCurveDisplay.from_estimator( - logistic_regression_balanced, - X_test, - y_test, - ax=ax, - linestyle="--", - name="Balanced LR", -) - -# %% [markdown] -# -# We see that the two models have the same ROC curve. So it means, that the ranking of -# the predictions is the same. -# -# As a conclusion, we should not use resampling to deal with imbalanced datasets. -# Instead, if we are interesting in improving a given metric, we should instead -# tune the threshold that is set to 0.5 by default to transform the probability -# estimates into hard predictions. It will have the same effect as "moving" the -# decision boundary but it will not impact the calibration of the model. We will go -# in further details in this topic in the next section. But we can quickly experiment -# with the `FixedThresholdClassifier` from scikit-learn that allows to set a threshold -# to transform the probability estimates into hard predictions. - -# %% -from sklearn.model_selection import FixedThresholdClassifier - -threshold = 0.1 -logistic_regrssion_with_threshold = FixedThresholdClassifier( - logistic_regression, threshold=threshold -).fit(X_train, y_train) - -# %% -fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5), sharex=True, sharey=True) -for ax, model, title in zip( - axes.ravel(), - [logistic_regression, logistic_regrssion_with_threshold], - ["Threshold 0.5 (default)", f"Threshold {threshold}"], -): - disp = DecisionBoundaryDisplay.from_estimator( - model, - X_test, - ax=ax, - cmap="coolwarm", - response_method="predict", - plot_method="contour", - ) - ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") - ax.set(xlabel="Feature 1", ylabel="Feature 2", title=title) - -# %% [markdown] -# -# We see that the decision boundary similarly to the balanced logistic regression model. -# In addition, since we have a parameter to tune, we can easily target a certain score -# for some targetted metric that is not trivial with resampling. -# -# We can go further and check that the two models that we have are both calibrated the -# same way. - -# %% -fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5), sharex=True, sharey=True) -for ax, model, title in zip( - axes.ravel(), - [logistic_regression, logistic_regrssion_with_threshold], - ["Threshold 0.5 (default)", f"Threshold {threshold}"], -): - disp = DecisionBoundaryDisplay.from_estimator( - model, - X_test, - ax=ax, - cmap="coolwarm", - response_method="predict_proba", - alpha=0.8, - ) - DecisionBoundaryDisplay.from_estimator( - model, - X_test, - ax=ax, - cmap="coolwarm", - response_method="predict", - plot_method="contour", - ) - ax.scatter(*X_train.T, c=y_train, cmap="coolwarm", edgecolors="black") - ax.set(xlabel="Feature 1", ylabel="Feature 2", title=title) - fig.colorbar(disp.surface_, ax=ax, label="Probability estimate") - -# %% [markdown] -# -# This is not a surprise since the thresholding is a post-processing that threshold the -# probability estimates. Therefore, it does not impact the calibration of the model. +) \ No newline at end of file diff --git a/pixi.toml b/pixi.toml index 37e725a..10a7ff0 100644 --- a/pixi.toml +++ b/pixi.toml @@ -11,7 +11,8 @@ generate-predictions = { cmd = "python _generate_predictions.py", cwd = "content build-calibration-curve = { cmd = "ipython build_calibration_curve.py", cwd = "content/python_files" } check-generated-predictions = { cmd = "python check_generated_predictions.py", cwd = "tests" } different-calibration-curves = { cmd = "ipython different_calibration_curves.py", cwd = "content/python_files" } -causes-miscalibration = { cmd = "ipython causes_miscalibration.py", cwd = "content/python_files" } +miscalibration-under-over-fit = { cmd = "ipython miscalibration_under_over_fit.py", cwd = "content/python_files" } +miscalibration-reweighting = { cmd = "ipython miscalibration_reweigthing.py", cwd = "content/python_files" } [dependencies] From 68920c54b90a719e27ca6713e5005189155f3813 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 14:04:40 +0200 Subject: [PATCH 20/28] iter --- pixi.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixi.toml b/pixi.toml index 10a7ff0..dbfac9c 100644 --- a/pixi.toml +++ b/pixi.toml @@ -12,7 +12,7 @@ build-calibration-curve = { cmd = "ipython build_calibration_curve.py", cwd = "c check-generated-predictions = { cmd = "python check_generated_predictions.py", cwd = "tests" } different-calibration-curves = { cmd = "ipython different_calibration_curves.py", cwd = "content/python_files" } miscalibration-under-over-fit = { cmd = "ipython miscalibration_under_over_fit.py", cwd = "content/python_files" } -miscalibration-reweighting = { cmd = "ipython miscalibration_reweigthing.py", cwd = "content/python_files" } +miscalibration-reweighting = { cmd = "ipython miscalibration_reweighting.py.py", cwd = "content/python_files" } [dependencies] From 89ce43ec829c64f10ca1bc1871aad43a8570dc25 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 14:17:39 +0200 Subject: [PATCH 21/28] iter --- pixi.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixi.toml b/pixi.toml index dbfac9c..982bfff 100644 --- a/pixi.toml +++ b/pixi.toml @@ -12,7 +12,7 @@ build-calibration-curve = { cmd = "ipython build_calibration_curve.py", cwd = "c check-generated-predictions = { cmd = "python check_generated_predictions.py", cwd = "tests" } different-calibration-curves = { cmd = "ipython different_calibration_curves.py", cwd = "content/python_files" } miscalibration-under-over-fit = { cmd = "ipython miscalibration_under_over_fit.py", cwd = "content/python_files" } -miscalibration-reweighting = { cmd = "ipython miscalibration_reweighting.py.py", cwd = "content/python_files" } +miscalibration-reweighting = { cmd = "ipython miscalibration_reweighting.py", cwd = "content/python_files" } [dependencies] From 4f4e553540ed43c54b20469735e4dedad315fb4b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 14:55:43 +0200 Subject: [PATCH 22/28] iter --- .../miscalibration_under_over_fit.py | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/content/python_files/miscalibration_under_over_fit.py b/content/python_files/miscalibration_under_over_fit.py index 1141991..def7439 100644 --- a/content/python_files/miscalibration_under_over_fit.py +++ b/content/python_files/miscalibration_under_over_fit.py @@ -1,11 +1,11 @@ # %% [markdown] # -# # The causes of miscalibration +# # Miscalibration due to inappropriate model hyperparameters # -# ## Effect of under-fitting and over-fitting on model calibration -# -# In this section, we look at the effect of under-fitting and over-fitting on the -# calibration of a model. +# Models complexity are controlled via their hyperparameters. Depending on their values, +# we can have models that are under-fitting or over-fitting. In this notebook, we +# investigate the relationship between models hyperparameters, model complexity, and +# their calibration. # # Let's start by defining our classification problem: we use the so-called XOR problem. # The function `xor_generator` generates a dataset with two features and the target @@ -437,21 +437,27 @@ def xor_generator(n_samples=1_000, seed=None): ) _, axes = plt.subplots(ncols=3, figsize=(15, 5)) +full_metric_name = { + "neg_log_loss": "negative log loss", + "roc_auc": "ROC AUC", + "accuracy": "accuracy", +} for metric_name, ax in zip(["neg_log_loss", "roc_auc", "accuracy"], axes): disp = ValidationCurveDisplay.from_estimator( model, X_train, y_train, param_name="logisticregression__C", - param_range=np.logspace(-6, 6, 25), + param_range=np.logspace(-6, 6, 13), scoring=metric_name, ax=ax, cv=ShuffleSplit(n_splits=10, test_size=0.2, random_state=0), ) ax.set( xlabel="Regularization C", + ylabel=full_metric_name[metric_name], xscale="log", - title=f"Validation curve on {disp.ax_.get_ylabel()}", + title=f"Validation curve on {full_metric_name[metric_name]}", ) # %% [markdown] @@ -488,7 +494,7 @@ def xor_generator(n_samples=1_000, seed=None): tuned_model = RandomizedSearchCV( model, param_distributions=param_distributions, - n_iter=50, + n_iter=25, scoring="neg_log_loss", cv=ShuffleSplit(n_splits=10, test_size=0.2, random_state=0), random_state=0, @@ -521,10 +527,15 @@ def xor_generator(n_samples=1_000, seed=None): strategy="quantile", n_bins=10, ax=ax[1], + name="Tuned logistic regression", ) _ = ax[1].set(aspect="equal") _ = fig.suptitle( f"Number of knots: {tuned_model.best_params_['splinetransformer__n_knots']}, " f"Regularization 'C': {tuned_model.best_params_['logisticregression__C']}" -) \ No newline at end of file +) +# %% [markdown] +# +# We see that our hyperparameter tuning procedure leads to a well-calibrated model since +# we used a proper scoring rule. From 2e4f9c32b01fdf818e88a40b3ab89a5e5be0f8eb Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 16:09:40 +0200 Subject: [PATCH 23/28] iter --- .../miscalibration_under_over_fit.py | 68 +++++++++++++++---- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/content/python_files/miscalibration_under_over_fit.py b/content/python_files/miscalibration_under_over_fit.py index def7439..64ad88d 100644 --- a/content/python_files/miscalibration_under_over_fit.py +++ b/content/python_files/miscalibration_under_over_fit.py @@ -14,6 +14,7 @@ # %% # Make sure to have scikit-learn >= 1.5 import sklearn + sklearn.__version__ # %% @@ -428,7 +429,8 @@ def xor_generator(n_samples=1_000, seed=None): # - the accuracy that is a thresholded metric. # %% -from sklearn.model_selection import ShuffleSplit, ValidationCurveDisplay +from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable +from sklearn.model_selection import ShuffleSplit, validation_curve model = make_pipeline( SplineTransformer(n_knots=15), @@ -436,29 +438,67 @@ def xor_generator(n_samples=1_000, seed=None): LogisticRegression(max_iter=10_000), ) -_, axes = plt.subplots(ncols=3, figsize=(15, 5)) -full_metric_name = { - "neg_log_loss": "negative log loss", - "roc_auc": "ROC AUC", - "accuracy": "accuracy", -} -for metric_name, ax in zip(["neg_log_loss", "roc_auc", "accuracy"], axes): - disp = ValidationCurveDisplay.from_estimator( + +n_splits, param_range = 50, np.logspace(-2, 4, 30) +for metric_name in ["neg_log_loss", "roc_auc", "accuracy"]: + train_scores, test_scores = validation_curve( model, X_train, y_train, param_name="logisticregression__C", - param_range=np.logspace(-6, 6, 13), + param_range=param_range, scoring=metric_name, - ax=ax, - cv=ShuffleSplit(n_splits=10, test_size=0.2, random_state=0), + cv=ShuffleSplit(n_splits=n_splits, test_size=0.2, random_state=0), + n_jobs=-1, ) + +# %% +fig, axes = plt.subplots(ncols=3, figsize=(15, 5)) +full_metric_name = { + "neg_log_loss": "negative log loss", + "roc_auc": "ROC AUC", + "accuracy": "accuracy", +} +for idx, (metric_name, ax) in enumerate( + zip(["neg_log_loss", "roc_auc", "accuracy"], axes) +): + rng = np.random.default_rng(0) + bootstrap_size = 5 + ax_hist = make_axes_locatable(ax).append_axes("top", size="20%", pad=0.1, sharex=ax) + all_best_param_values = [] + for _ in range(200): + selected_fold_idx = rng.choice(n_splits, size=bootstrap_size, replace=False) + mean_test_score = test_scores[:, selected_fold_idx].mean(axis=1) + ax.plot( + param_range, + mean_test_score, + color="tab:blue", + linewidth=0.1, + zorder=-1, + ) + best_param_idx = mean_test_score.argmax() + best_param_value = param_range[best_param_idx] + best_test_score = mean_test_score[best_param_idx] + ax.vlines( + best_param_value, + ymin=test_scores.min(), + ymax=best_test_score, + linewidth=0.3, + color="tab:orange", + ) + all_best_param_values.append(best_param_value) ax.set( xlabel="Regularization C", ylabel=full_metric_name[metric_name], xscale="log", - title=f"Validation curve on {full_metric_name[metric_name]}", ) + bins = (param_range[:-1] + param_range[1:]) / 2 + ax_hist.hist( + all_best_param_values, bins=bins, color="tab:orange", edgecolor="black" + ) + ax_hist.xaxis.set_tick_params(labelleft=False, labelbottom=False) + ax_hist.yaxis.set_tick_params(labelleft=False, labelbottom=False) +_ = fig.suptitle("Stability of parameter tuning based on different metrics") # %% [markdown] # @@ -537,5 +577,5 @@ def xor_generator(n_samples=1_000, seed=None): ) # %% [markdown] # -# We see that our hyperparameter tuning procedure leads to a well-calibrated model since +# We see that our hyperparameter tuning procedure leadsx§ to a well-calibrated model since # we used a proper scoring rule. From 3bc76a58ff63bd0f9d7bd0bb33ec55fa4b12bf46 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 16:45:17 +0200 Subject: [PATCH 24/28] add results validation curve --- .../miscalibration_under_over_fit.py | 41 ++++++++++++------ content/results/validation_curve_accuracy.npz | Bin 0 -> 24276 bytes .../results/validation_curve_neg_log_loss.npz | Bin 0 -> 24276 bytes content/results/validation_curve_roc_auc.npz | Bin 0 -> 24276 bytes 4 files changed, 28 insertions(+), 13 deletions(-) create mode 100644 content/results/validation_curve_accuracy.npz create mode 100644 content/results/validation_curve_neg_log_loss.npz create mode 100644 content/results/validation_curve_roc_auc.npz diff --git a/content/python_files/miscalibration_under_over_fit.py b/content/python_files/miscalibration_under_over_fit.py index 64ad88d..62beaa5 100644 --- a/content/python_files/miscalibration_under_over_fit.py +++ b/content/python_files/miscalibration_under_over_fit.py @@ -429,6 +429,7 @@ def xor_generator(n_samples=1_000, seed=None): # - the accuracy that is a thresholded metric. # %% +from pathlib import Path from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable from sklearn.model_selection import ShuffleSplit, validation_curve @@ -438,19 +439,33 @@ def xor_generator(n_samples=1_000, seed=None): LogisticRegression(max_iter=10_000), ) +# Since the computation of the validation curve is expensive, we stored the results +# and commit them in the repository. If the folder containing the results does not +# exist, we compute the validation curve and store the results. -n_splits, param_range = 50, np.logspace(-2, 4, 30) +n_splits, param_range = 100, np.logspace(-2, 4, 30) +test_scores = {} for metric_name in ["neg_log_loss", "roc_auc", "accuracy"]: - train_scores, test_scores = validation_curve( - model, - X_train, - y_train, - param_name="logisticregression__C", - param_range=param_range, - scoring=metric_name, - cv=ShuffleSplit(n_splits=n_splits, test_size=0.2, random_state=0), - n_jobs=-1, - ) + results_file_path = Path(f"../results/validation_curve_{metric_name}.npz") + if not results_file_path.is_file(): + _, test_scores_metric = validation_curve( + model, + X_train, + y_train, + param_name="logisticregression__C", + param_range=param_range, + scoring=metric_name, + cv=ShuffleSplit(n_splits=n_splits, test_size=0.2, random_state=0), + n_jobs=-1, + ) + parent_folder = results_file_path.parent + if not parent_folder.is_dir(): + parent_folder.mkdir(parents=True) + np.savez(results_file_path, test_scores=test_scores_metric) + test_scores[metric_name] = test_scores_metric + else: + with np.load(results_file_path) as data: + test_scores[metric_name] = data["test_scores"] # %% fig, axes = plt.subplots(ncols=3, figsize=(15, 5)) @@ -468,7 +483,7 @@ def xor_generator(n_samples=1_000, seed=None): all_best_param_values = [] for _ in range(200): selected_fold_idx = rng.choice(n_splits, size=bootstrap_size, replace=False) - mean_test_score = test_scores[:, selected_fold_idx].mean(axis=1) + mean_test_score = test_scores[metric_name][:, selected_fold_idx].mean(axis=1) ax.plot( param_range, mean_test_score, @@ -481,7 +496,7 @@ def xor_generator(n_samples=1_000, seed=None): best_test_score = mean_test_score[best_param_idx] ax.vlines( best_param_value, - ymin=test_scores.min(), + ymin=test_scores[metric_name].min(), ymax=best_test_score, linewidth=0.3, color="tab:orange", diff --git a/content/results/validation_curve_accuracy.npz b/content/results/validation_curve_accuracy.npz new file mode 100644 index 0000000000000000000000000000000000000000..42a4b02b66ee40cd15cc2b69ae2a4720f62b106e GIT binary patch literal 24276 zcmbW5CR*4^VWlnB z*3{T)O#{rlAM>?4l-TObocp^c`+xIfjBE7oIrrXk&;Oir?{lAzdGEaV*yo?UQUAVn z<4<3D>8Jl0|DL$<(!v@9zDtzwy>@Zr=Fvjqm)tI>SHz;3wa|^U}>5 zzrOL#?bq(!fA!w&Z{526?bp9~`|Cf|KoeVx_hC%|BF}N zx_co$_55?s-1^3I&prLjt>4}HKmR@c;d^hr@c4(@)!#1Rz1_rreD>7O-u>Wcwf^ms zfB)|Kc=e zaWdM^R_9mlovkWx52s!8q+Xoh>sS9CtggcOej`1+a_5Byt1I`OY+TaIOJc?KV8r`i z@-BYsRmE$&`rZ|RqtSl4`uAn}KRaFV2QMgJ;tuKML4UZU&-i73?J(mYQCI#1LuQG8&=foy)5db0Rb-qs_3>rt=UQRhcv zogS~AzA|_^>sVch8-J_c`QAF_5C37tNk4Ygy}Gr2%&%Un9?$0fQtR<-ti!Xx?`*E$ z+OG~rd)INT=iOLe$Dm{R@X?;gEov`PF&0ljU80oiCvC zN9{koZxx^2Ja5&$em1ZF?3!O3AU#wcOENxC9Vg-e)pzcn_G@{>)AgywZT8L^^2|Pe z%pK#<`-`t$eC4bqeV-Z}JDVCu$cL_QaE3 ze(Ciyp1jmv>*#P^f6D*Ez4KMYr?0=}=eN4G?!*DoL;YduQ#KF8FJ*k8e&ovU$Md>d z_0qC)X6{?Yv&X+Be%hgNkUeqC{E)2&sP5ITdWY)L`mJ%FjPuhcBd?##`=pv@yLbI% zPkosm(kJqd|B1#!_Yt#%a48z z>%PHtxay~`PsQ_iKF5-0@k>;HP&;`EtzY(`^=kjIFF|_yR-)?>o@9K>zkVK%uYKCN z#=5RL>F2d6>!(hfhuBHvuYS;Y{n<4itbCu&=Q1^ZJ@U1l&#P)*+l=e-ZeD-QuWqtl z)os?1x?4hZtDVe_`Jj2o(D;<;hiZ zX#EzS!;!~>dB0%)F28nTA3vGbUDrkPiGy_pUH=m8d)6zN9cUiy&^Ysp3q6?~>sDRc zugTTF?~f{eeXcYwWH;xhhx{Q!b|9YOMuz%9dh@f3cdfs}aldmopTDc`TKBtgpV;pS z?T75K&o0C((Kx7`9Wow?c!>jao=P+xugq($+s!y{Z07sH^6zB6r(l=8M1DbjB^rl6 zq+g1k_(1Dde`pW&P5BJRtwc&Kp&q{a#Qzta$hPZgp+nuKaDr^UK{_$5juf z^Syk@`_FaMr+U?|=GlKgqF=5Hvhh%V^{;*s#UG!_U%$sM{qcC;eKOyJimN)#`dykQ z^Jcs}CdzyEV<+bkSG=twvbfa#)9?44FUnq@n`{2#alPvARoKmZWj@I17k^UEE?(ja z<;%JzLvhhAp7vu{=kN7={#t&XPM+-W6VJMyo$uZ6S3J*0ee9!g*^gbQ-FaR7_>u9V z7dLUTPVp^1{XDk#AI)>5`z>)}_qzEqPbu>!^CK_n8uEjjC@-dY4{*WD*dhswXUi5fD?ffnO`#!Md>Fbett~%=PkHphF{DbC8~_TZt1pt_w?OUzE>8`Gf4@#m_{1Q!k&X=ePYCU-_>2 zx8uHWH_so%>vVok$ba>Vw|#}I9j`?E@Zevf^}K}HKlvwL_e<7u#d|x?S#`GGW+5T)kPw{kH)9p9~J*TM^(K0epmeE124!geE1KO7d>PLvcoPt z8GrHSS0dguPd}e@U8?^5on7ViXr8x>m*0$oc9{L>(=UE!p7b*hd+d-CwLgme{etUH z&3iJ=&3!*)KKaOVz*7E}^z1BQj+1ZwwL|BZMD4}9-**?E{!Z3@UE_E2xu$q!9^?mV zCtGhzs7|xpc&MFy`RAAYMSjS&9{T-Y#qDsuFRS@`f3{y)H{u4xAM!uf9l!ZY<~QUo z9^^#rSoUGDLxq&@e*Hoh2q37 zn0mZjk0Aav|8DM2p5xos#h-uX=NCUyubm#Ip8d?1JgQIr;6rbm{@R_Noii#v+xgzN zuEYE1XXkbHA-m9gF#Vu6J`o@1S7===Ve#qj|Ej*u=5uxHw(7LMYpD45dAjn}?}yBv zevIpypFi}O-=#dukG$2sayH+?s?YN4bbd#p zAAR=2&;D$`s`}f^=g;bxAN=G$Ub!!l<&g}bPs)N2i z!c*NYA--9M#xLpBiTIE+PU1xmW` zzoGb$q4C-i%~$&VUZK`Y{~cZV+20?jXZ-MR9ky@Tmy#bp_y^N(GTu;~i+9$s{hXg< z{q4i9!?k|;{-Nw2&-V)U5A*XEZ;0oTobk|49OWZXyr6cn@$4m5Ui$o8`Pu)z&c1B@ zCQo^Vu1jQyr+Qez?8k3$#1lVuv_p0t?f20*H#ZiqkH+^Lz7yUIxu3hM6Zz(cdR#*N z?Z@n>S9&s}*B`3il+`nI-G!y^zpJbDaWwCPtrzih-#`}6MDf-SihJgd9*QG5Q9C=? zPH!Ksc=vZ^b$vP-?+g0_~5PB7yqU)rHMg6fRyoME5FzxwUwb4mM@dNw~h=7ZXy z{EL%(r=DyaUOA8P`jL${kNSDE9yarPQ9SEhc{K0$Yy9z$`}Pn=SUsAbKh`7v z@xc#jhxFN>U8tXVq58@Fil4B?AI$G2$<}p^@9znV&&m9|Q1O*D=Ji+n z@iwn{oR7&%nElcoIZ@p}dEl3IrT(?o_2+QB7izw>->v8Gja1zo&-0}HkUe~%JS<`6 zgA8*V|6%f{=MNMQvO3OnEWcF;{a&TU_4j8ru7Bsl`mwIa*6DSqe~uSl?PTlpI@Di& zVe#wlplcuK??(A6{_3&D_c_u!)o%T$Yx*Twp4E%|!OWXFrYFm*dcYGFpZ?vPny>GZ ziho~+@x~_+FM4vKb|?;HnB&O!;7i8$QGI_eTK*r5=hyvrD>ZIApKplsQeNr#NybB* zApiIU?KA3%tX&@YgCCh)a$T3!+Dw**ZqB2e8tl|>?WF*AK6aN9{bnn<=6fO?N4O& zA-~n$=c|g>cK)4)eb_mMACUieLG5IG67hrD$#`Ww>!%&B?8lD#1X%UkbzS>^|IQbC zc;E@~g7jH$^z1C5evsdZ@<5MoV%1^aFO=PbagN%5PFd@vziU^=)>W=Q>n{0{#f2Sc zT^etlS6=qNE7s3=yrFnj9rX7(Da&{9+5bC2ab-^)@KA3V=akh=qVt7y2=&W$>(=?B z&QoXe`+VmsSbp^18x{ZlJva8mO&;V`9^_jd#UI*tq5g^N>X-KQ=Lh@boWJtY_qWCO zcz)kq$l?C?~%%{?TiE7 z{Krpx<-vMO)V_rB3E34l^NE-CME&X2v2|K;>ihiMpVhNY#sb1Yh&859#IIJoIGoHjZ9Bizif{>QX%?>aUKCBg=2aw||eY z+B?rB_r7KSvu@ZmU!pkUBOZy`#m6}FK>kBu>il&ye}B?*Aolnx zKlaO|{ON~}@pvX`$2*Z9;!Dnc+O6BFi~jDr^4>fv4*h#7{7rs%(&NW(^QJ$^Gv%dm z^msz`MwWNaHJm45?Z5lq=@ifYp2I$EUOe!F>A!ybNmRG|v`)>ZP9eQ@rVcA_{r8i# zU-b0|->UciyV1(mZr=aa{@MA)2M;pD`#MxN?1%&YWc*?BCR?{q9?MSGbJgW;zVEL5 z_3z1;hhLCCP~O!Sq&JS7$PUC?-mgRR@wa&O{co+){`<*_cYiNh{_OwmCcouxDX;RU z-*w1tBKuIB#082ES^YxSr)uBK_rF!o{ddmd#*gd#=9hjc+t-%jp+7V~o{)Yi?{{9j y`MGBwd-NyhFMnJ9^1n~i;=cOd-`~81_(A$RFMi>Rm+G4r|9Rm8|8Vu=^#27<&m1}c literal 0 HcmV?d00001 diff --git a/content/results/validation_curve_neg_log_loss.npz b/content/results/validation_curve_neg_log_loss.npz new file mode 100644 index 0000000000000000000000000000000000000000..f1bd2b41881eda756aab312c165696fb2914911b GIT binary patch literal 24276 zcmbTe_dnJD8$XUHiKLX3P)59@Pzs5XLP;5+60$SOPNfnek%ktD22pk;B`#$YS!W*{ zoM~l~2>D*`+vofF2R?o|H*SsdIM3^GUH5f4dT5E@7QX-UrNn2hE|{|T|9^?|N%Dny zhJ?C=xStF54ABZaAIZ4LNd+k!w zR8;di7aSVw7U*&=*u(RGKer1G^TZ#A__&?-#BVq2ZqroM-nLC$Q!z^M|M5#id*$pt z=|LWlWnD|H+IaA=LFeu1*F5O$u1@pw<^jXDW^IrV5B}{YuXhOHg4w;f#F`fzu-3Lp z&Pim0p6J@KE7~leMIUSscI$x|K_jzE4h&!v-l#fpuN{W+eDro5rNNRf2`raK6i8jS zsbXaZ1m<5VwBOSOMYnffO^@k; z8?%g^VX-W5S9>i~PiI5oWYiJsc^;^ftQZ0PJb1hyOB?vXgC+a6`6oW)fo;hLwQUD@ zprgKe_woiV9NS#zGAqV~OqFTIyX74Cb~@m&NhJ$BuJ5xP-OGgO#f4yUco$?kdQs?a zJ0PlF)3D8)4r|Q5a*SfB@Kv~$QD;nnKTEa_hTb59ly62$?tU^XIrmuV%3U%Hu1}_Y zQlKOb5G_-hPF)9l*EAy{N>u3mRjcdJknY!A{Ud0)|#2lMvMckKGra5b1ww-Kg;%9(+kH! zuPsWC^Po9HK`^d@2iMf}&TYl#{A2jY>w7#d)IZ`)?o#2xtBa-VSt$+#tDZJy0}G59 zQ90eam{1}r@2O|m1>@?U%wkSOA>6g z99wE4OoHlP{nmF$L^yHD<;L1dB8VPd!YkolYX}AS zXe;(lmr&tpW@z}zoOalycyWtjcNdKKWDU91F(J9|M3yXv1Kf?7l+oM0z~xa|`z-sQ z)#Loje|)`=UcE#1*)R{RRvCOa-NJ(rIRmqtbRO6|Jd$_9k_S&)c$zXlxlojMkbeh0 z-}jMH2|t%`;Ee63*rN9=`2OAW%`H6dy^Bwu9CYu3O?5Wvr&Bs$yK84!qADHUmpAPE zZAOI)kh|M}M~3r9%zmy}z;i~ruOB&20_PpwR-VU6(7ZsiTJRvj=JLMx5BfX!(Z#aQ+P1$O5562$b)>M@#pzq9+-RT z^bw_b;4tMD(V5BxX3vMpA9N0g9Ss5V4mMbB?u!e|WPz`d7ERcI3CcO=|EtOGf({#p z*)f?;U`_4Tmik49LE-H0BKv7Dbw7L6FHtJc)K8UV?xjH4O6jnL7BZ|3W^ke(kU=R+ zKhf+183IG=kI73@fcW=M-#IP?9O@o0xLav(O(g!tiFrC)%ToO}Zqx~~PlcqEhPvR# z^Bps4f>>vqrD?`K*jGjsY&$1;pj-IZKK|%0h}WSVANe>2>UZmJ#ZB|TfA&&P1dj)2 z10BwaHS)lE%aFBe1`kXhJrIzz=K+|le7>E-g&pfuRhH{>A&TuRta+FN(YLD$!&Nc>w%Psua)G87TkV0lngt!IdA{A?n!33fn0Go4WqH&<%6mezRxr-#0T}xc11F z4eHq;f@fwp;FDA%(m|TBH zM`SA(M9-we*{X41)0Jv=PCE-Kj`;m2bczYVHtmL=p5Z<6z4hW%HJ(#M@ywe2bnrj; zbDM(+75;v>y-AKp2JJKS=%>{rpx#Z=A+E=JOqo}$Y9d1T_`sUJ8X{0loZpi-kf8R~ z^P?OF3DPR=KF_El!-U90hnF=K_)CzqS=gVK zZYa7xx)1BQ5&U((aY5Mnc!vQ$*0oMx%(g#lC>M8JJCn_Vg^GO}_o^_#;11#52E8sQ z6rC4r^ymPuN_}#NC>_QXFW35Q$2!*F3tjP>4B@+Nbds?z*BzCm-1|#{UO63m3mr1B zY^44=WRPK{y_?C-D-`fA_OaTtga#hNQJn_CbV%TbjrBbpu+@-xLJj+3>MM~g=D|#e z4*ag9N#ww|z18%>u3k9ts3HA)*dX|T(uK%VL$H67$Qa|(DAbq-@(E*KUT~?nzGjvO zL0R7{7g~9sQXtcQ=ME3bQd3vDY4ITMyyR?F8W%V>#rfXVb3m?N z1zWC}XI?wn10e_Bjd-tPK<>B0OL$7{aBtU&lTOd5!0=Tikn_o~@9We9y)7gNJpM?a zW)~4e-n0d`+#!JD!Q=Mfy9h9^Bp&RiPJrDTFRLy;K>+z*g>!sO1n_6GFYU}ELQeeB z;^SBsCz2oC-Fu4yPp#J9y11MUtQ|Iv$1Zii@nXxu*B82AOZ%lHeD7BZ8;q!aHI#4r8aF!*~9=Ja|8aAUG3nexcU|O z8x`vIx#iGFWEeKxHPjtT0^0z$J)SR#&^TSTwNsf04uJ-{&n*z(#{6MAXB!c$*-N{A zlo4SygYQ7K9|>M|Ie*#jM~3Y7S=svksNkUbgnuxd4#yPgxVnm+AQk#xyt2F-3PL3x zC?#^hef`mpv6sDIl)Aq{Z=MUe?i+kebH+gF=t4}YNH4q*Xp|JfzSWc0m7PW6;pg!1 z$Y1PN9xf%5`W8GW_;iRK_MHoa)3^C{3v=P7#ykEroJXpLc}LwVSTHX5AeeK63FcJ~ z9tz?;BK=F!-?IkaN2KrRnn5~eI?_DPpe(C;Y`@0LG zY-ZJ`a+#p?#dn|ZS2hgimyKL3=!F#iH`7;#`a#>OsU?Uo44GDMy~8KR;q%hWwvP)u zFtLBLz(2r)IDxpN{}5kQHYk{FOXb1U7Utw?Qyxrvt)FVE=fcSl2Z?$KF8nxF5-72b z1D~Ef+kL!?1?MGrhvK7{Ks$BC_lR~kh^?~{EP2}rGqy+D(;l?Lyk09`n;0E}rW3@( z@jmLhI)C>)PlXVvZ=AeiRCrZd*L*FP3XYBA+A$_Hkk?33^gl)io7K^W%Zcq^6sw%* zD9M17xeWy#k=+pTOH?v^h6!hHnV>LgIRzA>$td@Iz)QOYZp)SoL?_ znSM78diSr;*4IpcPu*dO?Kp=rmu~4=_|5~NoF`AZ(|GW;Goef4IO5E66wRhlF7Sp_ z7*i*>@T=V5&*fVjNSkerKY5W23}&r-jV9J}?0*4WmwLc<;}VyfPZ%)1=0C}qB^?l# zcd0S1ody?|`P$7H&yxoiS6xbPPCL!TVhWmz=X~xSCH{ICxOM8+8bTvQMfBVQV zJ+Uo(({nP2AC#^7=s*F(REz3W|0uB6KFVu<0~KP@Zi`30rDvUJJUmi0oX+_3H_@&;=?`Fi&% z#8m;a-98r)SH}Mh+OUQPa!LG!$1mbN2#gQ(LB4RqcYm$YbvE2jdt&)hfdvJVH)KV2 z^uTJRJq zbY-z2a?N3h*Oyq(9Qe<*^C$La`I9^5?lC}HK-BaZ-Z$0$hvAx%G_caM9JH39z<%<^ zs@(S^c=&YT(h52e_D7uF`O%LEsVVXgw_hNF!bjckXVXNu_$0n{S0f2bRu#21rQ-9B zN&VsZhXOr!-q5W~X`m>QpQz(ThmtjK%mv*$K%3%fK|9_F?`sQR3;*hZp{@QaWdaaK zhXoyp|ILPlytrlkB^=N-vGOR=>V?S@_xJ)3Z@jvo<$d56&fi~OymsdEV9o*R+z#`= zt2TcnyNU~EH{0(mo92MtRQlJbUN&%VhYk|cSdf_*X!u2$3D4i(Z{l~sJ|fcGlwHvQ zhGyMoZrz~+osv~~Qiuj`?6w5{>YzaRzVi=uAy0BUQ~yQ+KL_%^OXgxW6wv7Yo}yes zfsBAo@x^v3c13rk_*|g1Scdua4m{$R4;b-M?-E#toKknC{_$-bFF>Prw7sMbGEI zlX-9=K2yp9aYlaEAO4F3E(ngBhb`6O!j&gRg9a8HaJm~^wMv=|If`}ar7xI3En@xM zv8)@IQjUs>h^rWpO3Ej{(IEN9_~CPhDG*$E-gU&A1T}lu?eTJ0w;|N)%|{53uve6j z^}7uu^royg&$Ypi!21)naReyM+H>lIF+P9O+Fpks5-8E^q%#)DU^uw6`uPhgh(|h9 zEl8sa>_pY&2VAj_yb2(kUPFgaDzV`GX)5R@ z@th5}Qs6ah!%`WXr>-B)X}(b-gY;r>)F~%2C_KCzDVU4+>hsj?J`V~EI=O0&UZ=wP zcY(8kd+2bAo!>7n-T{r{@7LeVVnCi=S=xV1J;1kBzi2D~b%&m-^NUn2I6gX2a%o9F z+_y^6rDY62{^*Y19ot4g>7p=gBhDeuMUkvB#F6!l>An?NJgAw~k$7mbxoFHe-7D~ zuKWx8^C@E=QubvY$gedclVo@xV)l5Cc_J5LP1GdgsyNVnUR(A;5gS@a58}q0Sx_Rf zJ>Ys(5A5#f%}gm~fZmmS1-XHCc=`QrVsQ!$z8E@v%*1=St8cY}WgHnyWS;$={!RkZ zZ$h6eGD*;RxLC@&o&+A-zUYGs849VfTQ(UY-=&z@3F4d{AlG_YThoE}TmIaJZ>S^8 z);Hw@cS9>l@Q7zQ3wY(z!iGK^aNGZ>_wTxA5P%Jl9({H@VGo#2YI5Pi`r_`onBSx*H-EG zL$qp#kV(}L2)}B%GIDAhM1#YF#QA$+;K|ACj9wl*uM@XyLLNBLu=r5m3i9H&iCH)0 zcwo7UD!vx+^I?afd2SI0Ci=deH$c5^v`w`^SBeFpUe|w&vb*6P-zUHDtDTT0(GntM zLkHoo!<4#Q3WQ1r1~fe+flS_0Z^`WrVAguRFD~x_-BMlt zSiHZECSMooLO4JjzN)c;#)EsiRk`gFeGn7*dvxE_sAL$9vKBfY2=2=u)3T->u2)Cv1L-FLSE ziFDF8}8kV_k;PE7ITFL zcdv+Q)nOf<&D|vs!r*~2@xAVj_dJm7TE{jG<3Y{$npLMHd5|?MUi|Yq7hL~*ku5Le zfYB${_?A0tX#MxzXXO?am`^AQIP3R-*}?_EiscM2&z%SwU*8VYvwGc$$Onn1zIWRc zlffd*tI^~j2~zpDxklO$;XWaQ{iB)y+B%V|d~OjSR;{jH{tW?+1W((pm>@vf6_ z#36NdPx2J@kziU@D$!_+1Py0mipt2?*KfaUi$`6oydmt-0^)MLo*C2asMGcuGk?^C zcS3e{PPzO3ZfHIb&oOE1fvc+%rtOv^t|A-0HksnV(8X+lPy!FMU#xhw0` zD!kAEX=mO&R#T@#+{NODVIEYdbgb#Tsg8J<+1MtGI&|vBk;j9`_k*c2rHgoum9pFS zsP9JHY$y9)MFSqsbQ7uU~Y`#TX3LScugj&ljscUwQ=RQf7?k zVrvWb>DtPjcd=i^(Tx1~+;A?@ik7$GTv7_&@xa503!7CO7F428&!IagtVDh6xw#m- z6m|D6jL~&V`IxYnvhKa*R2LM~+6Ah3G9YG-cwtv$Cs+nl(o7W)AMAT%Cg9i!y^(!C z8AF|r+c9;*a2D&r_CfTA6Wt)vsOhXK%LMHtZpH^m7K9yfy)Tu9_{O{Fr+O^r7AxlEeJfzeYQ1n#su`U?5c^8stP#(wbO zbNs>wb2juAh#0KxWWv{+ovX+(-B8~DoAuv;PT={bDW_V{p~Hw>V9`c_#F}=FE0qLa z+Q}I~zC^h9yW6@bf&lr{AP~b+Y!Pi05V?&D99r4`AfGKia1RX!0_rFEloD8|p%6ajhGxAw! zeYZka?Lp}9zaka2ehkQS(xWOvJa~C?Qd|@3cs#VeQ5O9auY}rXzdiBXv&LU93Gkq< z^!x>-KrWPzwLLX>%t1UMr*S`w4N+5Lra#44pv_iK?CbA_O1?U$M@875t7Koj{D|+z zL})bWAQc1zhSmlLlVRjyTlg&UpVok)+b;r%@KE`wGkp~i^4(a8fBqxF0TD`&S~3yr z@}&;H3nzi3@)}0kW-=@U$B2wa;+$%Y*mz%`2Gi45gTtcR;n>0}?GeOrlFRgOstGaR z^4we%;axY}Af8;@C&`4v_n#?ftY^Wdj>ml(&TMcMmy4WTf%Es1{bl(X9!R(!*8?gK zwk|oqKKz0Qv@WSiK5HKMo)w#a)yxI4u7X1$;#^Qa`E1}P`YG#Hi5!kXe?cbh`^3m0 z?9+?hG;!1?Mar$dd}DUNq~E#FyASCA)jyIme^8NUQCr6@P@wLUj=#!FGQ615-FkN& z89Z3T72`N}Ru2hO9O5T~XOd54t^pZ_ifVO61t`$;QA~%li28?wTE~N9G?2?zIgo1B z4h|c%1Cmu4FfW|3S8ja|oPTte#mi$sjuqk80b|684iL1_u^&8cq>R``4uQe&8Fex2 zQ%YNchKD=RCo3sAsaJ;j24BOeWzDdbL#}Fj8V* z{fRLKp5~`LKmYd|9)8{MZ4CV~KE*A)c^n>;9*vbsufX#iIyvl*c!Rt%X@#j0>a;fe zKy)!XNz+`+@j5Q`fjUngw=cbup9cP`~QbI!Zaw1$$G!H1LpzmpZxa zYi^-~FXO(fl>_<%rEbs0kgqeus~q)^&ocbqQTUcpAzw**-?11fSX@@wqTx(~C|UXM zBe95Au1rem6}Lk|sqvl7gPjm#GOBubqzm@_NjzzR{5Irv`rL3R;(+=4qqeJfAbd!G zaci(2o{!&{h&KKON~?6Q=j4pQKa<76H_2n5m)p^G6X)*@^7&v3H2WNrQ^^!&*?rG_(NdPFYN`1 z?hm5ssJC0LUgENVefp~5&zuz0pIwqW_1t#yz#^#mCZU)M)I0X4{0BKeaJsbZ75b?S z`%m<{;PV|>JRX@i+5`7KoaLx|Wq@hS@G%Ob9U7O3m5|QRpzi0JGX=2}sQl%sWTHie znv-FBXz^^g6)Qf@i_?ZiwSsOf$#U-x(_ww zeZb3=|B^H}08+ih+$`h^)s4d$;>d%`1XeLd@x0Piy3cOi=0VbuAkv&J^5d^ne}3n4 zfoxxF-9NwqPSZViDw_?8myZNoPG^C%?my348cfK~tUK@={n^QiQD3be9iZ~%(zMlE zI;bDt{goj@10~jkdEh20W)XSHLQP&*TrLMPU*7sx4!tlTFPdh1 zZxB8V8%9|wj=_R=r_t|#QJDF%e#yN26gcSF8tHJ+*LOKQmWAi!PdxV~4e|3mM*TYJ zGkAYnb~#%L@<4WTdg#teT;OZUn$9TafO?MP>XG-T=iD+;o4&w;s_%#01!b5Z{OsH& zk+3eXAuoURVFmh=1dEaDQtfbi$6>RJc{B*vyZ(Y~H5HPBD|8HWswNoDVz`OY; z0tF!~s1#8pix;wCs3ak<0rjP~5a_j01Z)8cUQL(O19c z5xd)l1+DopyY}wy0S0BeUkU~BQ{lG8!N+t+*`oQCHc5ens`hVp(f4Xzr$9u zakYH{0b6ETFA_3>1 z&yl%U`J#>ou^Yn{x1#>+K76d3>cE5JHvjUrO>rUSwBuVV)HkAh z+GS6qa-hScZrRIdHmu)$z}{~w3&`eE|HQm|;A|)}K*gJZ=Z!P)zPbHukGjuBbwBrMellBE&ayKQ>FCEc$1;7t}7hMDE=#vFq6 zFIa_Bi$Gz>@VKWlTYkHN|@4ka?cinVmv{JDHw1lK{>TD#rsjPYPyUhQz&VRNE_L7hNr3$W`%z90H==+59H3e>)7EIe*a0#@bDgoRfGP?Td% z|GUrz?poJB9FA-S&^oX_^lA%4S4~K57Hffy_<9SmeJzmsI{vg|+RX!F3Qzp3i zsVYZ`uwm}3ZJx<6-p_xzH{%gMH|~sK_~-DzOxvZn!UJ&zUEuH>KjN;}%Ew*Lb3yms zr;$hKFXS!Tf2;Kt8{GfUm8XP-`wbgr-D8)QYX7%b;9Z|v*L(r zkM;k`G3o{#$6GgJidk?yvS(#D;=qaq&&L!0d%kYv;U`d^l(KhI;E~xsmUa=wBZq27ucl`g7 zZ(K8*?}5w{zq73W>w>3V&7T;V9bghHqLFlo4n_z2jQr3qFnW*_DvW$iGJK3%iaDLq zjJ@3WFfvdtPpW;eBf~qfBXaV`$k3sv6`y;X48bPpb?>AokbCmh=cINDm`fa{z9^z% z-+Q7Z{u=d+FV5HWdgxG23Q(S)b;7F*^<`&}H-!3~zP>Go1@*6It_cmXL02BuYFPC` z?Y_pVXOIt8-!Sp8MxHM;oBaJzCid%T6-t&P;;Q?jl6oUtu<(9(S=5#be{a8?_!-H8 zGLPqe2c6jHv#0f+oMi%|uO`MEKX1OG_hqdOonZXu=_#XWI*4?Zrj3_U;i1QC$KiQ0 zMEbovxPborP@qNoam;PF5nj8S4H40|Q&3bCCBgi;?OkfuNXQ2xz5k0LgYbsSj#n|y zBk$tB+)0}Tng`$ZofK(@%#9T*Ux#!6G0=VapV}@cdE#Rhi1SCbXf5%RItP@)7FW=_ zIWSX|{p1Y#E6KLOC(LjTN0>-S-a!BNi%VMI0`mSI9gkDnYk2UkZc8P>g%g}elO zef3NJ*dEjgw)0u&U892HgF_qn`021otla23`mAy``<-je7%+1Dpu_e;ysxK+v|SEy zpt<7MtKjlpu=CCP)p%_X?%z=JKC^BN431N8u!|<(-T3deC&(9e6>Yd$h<-}P*SBVV z#i&DP4*%YG9{X0VR4R2P@+_P3=8!9huNJPy?Jnj3?*fUq2lbUx->Yr!8e=_2D5r8m zdcfD8XF7QV`C!n8Vv`H)@Tl`@uowD+N0R+*e`ugCktk(bRZD_)Vb8PQxkTVtsDY_9 z5&U@;H9@jOFdlzmSb9l02$j zI}chNa@3m2(I>m(H_R80I##gOUy336EM}9m@Fw*0ub4_-{}1z`JH37tS>kzR{!6+p zk9o6_to>Zfg{a=DDp@Am4HA}%!E*%6A>DUw?xE4Z;dC-53j1=CURJwm011TB4DAJQ zp3ai5YkdEY0Bb#zC;Ri;!0`F**I^NDFd-)9Yx}efq#FE7mu(@yE8nYpe$z9nUaIonn9_--9;$Q|N0|p1NML zn+{L^iTB9oP+|A4*M9<^qfer=J>QcL{h*dtG0R%fAKNdHwv|N&>$j(#C|gq?y{b3A zb~zP?H=$p4N`FGKza2FAE;XDJW*gnt$> zEl*HK^vzC+yi~#kWh?SMZc`sD)q4D^?&bing1wne4AgJl|2xr+xGKW+WxpZbTVWTm zGn@AzFCO1Y zLaMG|In9_~^0_$pyct9e?H-Q^X#tJX_xO(AX@yDs?XuVL?>&F~zWxK^>8OH-LMiTK z5b5KW?8ls8$$MWbrR;Xll`Wdwas%_TZ&j<*&!K-=+elmej02rN-W-XT?S&Ehl6r!6 zFED!#Di%ujLa$f5Zp}Cknz&o4D&C`SrZ&5=3U$hLBf;*@ja(phR-HVGc{9T%LW0)> z8itv#b&YiyLhh27r>eBwu=8d+G!HsEOrVZyVq4ek z=ZSueD5WL)2@_&k(;uvJ#Q8uzuf@jP#em<_Z%yj`FdVf|1+t^SSasxBpVl~3Hx-wx z{V@fpYFiXOJop1H8#}C;v7S?;CU(p*k^j!$aZ|&7XZ`3 z%|4mQ6wVt&Cyz)LJ_OAjd3E&DW9&w`1S z%ak|Zb7|~9)fN4-A8bt8u5L~l0@|f>&RwOW;5@N9t%iqsdzka_62u#(R#DO~pYWhC zduX)Y7Jbm=zHn%W3s0|{R8O@>K4`2xGlA!o**Q;-b4R@{@VA;ZKMOPigl$bZh|{gx z;|&WtVLGg{PU8a|k}8X~XJ}G^eL_Ug8}U@8XR%HCcOt+`yE}L21mLo+B+ptBphkn; zc3`0m-aYy>8L*K6yIxZ7dZiFRch6q3{ShKe78tnq;k>^5TQ!cOO@@5gbc?h*6wI+p z#cOY(fnmL(%rdDCh%wyz&*@_qkQetv?o(sIPp>RxkyXfZO!LP2<@!)p*%$K{b&Z`- z?moVlGqYI~_~Dg}dC@ar*Fs&87iFYh3!dhJgyEO&JEyonuTGqK8H4@#P2{H`%oRId zbKEYm$OHwyi`MGh-JtpCKnMQ^%$e=8tyo8+gW-jHd)}Ty-23=U@0ypGIMGxsUnUiQth*s;EHxY!;dKZciB*)@!(o2?${h z>Boh3$?ce5vn}7gnXdyJ$~JFL3t#~4LD$R{E%Zgb`HV!-S8;1j)$6Xs9ISYdL6%%U ztdCr!!U`G%1qY?sy_n)9LvF-xS zqhqrlv)^+-J}5EoTLI>%1=ZSr`mrGIrAbD%5EB;iNAJ0#o+Nh4KQpSP10H=k<5pZo zhnnL?0ta`{pw}@<=ojiaQZ+7%@|b^InY(l*2XRGt-uUD(%tP1cTj-jHQ-EK^Gmo^I z3U*SuzRhcCP@NL}>MZ8^(`Wh(^Ut+IYM1aur$@+tS6m`0#C3z|;rZdy$5>GINUHzo zB@Rqmdud1PM*nYU@b=PGeW1?q=Waeai2D+fa&DL>+*-B#4Iko;KU>UlzTtD85xi7( z?iu=mIv;iWtg$LW-#$-3e^YcQ;s${m zXI3M=R`3%H4nW-})wP}XeUuJfjb}>&&vwA8%_keXm!OaS;LPA2;U1_NA>`T$1V%q<1_}+eGR6%G2`Io2B`;>d!a6O z)Zh~8cBT7fZtU#hLCFnelRDHnPMREBpR$7o;mqPjwM;HZE?lkEX~4Yb>&0r{Y&Pso z(9(NhfcoUi#`ZMR9_X*+y;oYofCC1%$_Fn zkG%KZ9wNcg+F02T#4+o5W#z>gG+?zH^v-zM4(6jSvcIk~Al#|9`qTFw7}dB^al)4k zuL6TiWKs8U(r4v??C^R{w)|i**+0o9T`xZ9qRo zufuIa%39?A;u7-GG90L_-ESS=!2hp_RjV{sP*gWOHK36J~Sa6khBTwRErc5u#`ZW)7{oZtg0$oDinctRCp=Zk(zKfZt zSGTX~{)%m_0Q^km@ixJKM-*a`O#PF`;Xg^|0?cSC7JAu z^>04&Y8m>V$sW)7mbzk|(9dPS4)u*D#bwh1sBf6O4=7%_5_2(13txRO{}h;Af7u7~ zIa7|u-5#B82kV=@JRQUX{9}Yz%6rQHa;@*A-;g29E!i-48}i@_*R%@p)kkSIBkMgG zyy%3nb3E{0^RbdsX@+8RU~I45JfeJ3w`TVq}UuVg0Y3vDa2T5G$)G8@rqZ zPX&IasNsCHRzCe}1@2vx+<2^YMXw)Bgr+>xSN6je@uB~=5c(jYYrLr+b#~u5yYpET z9t20IT07z@e>xZTjaXxi8g;hLi`cLQ_4HtKJmw_@%$$?KiYGcr8h@bz6 zw2ltXZG}&{|L9KB9A%##dCHRlqwOio?B;+C!&A4bB)}AZoI#R>9LjduW=p;FY%RR zaNu8A_f$CgM@2sKMB5-1gflz{Mcf|vVR-dUIh_HogkJ?b6+?Y=L{Pi$BMox0MiLHo zQ-J08GdL3cF~zUn^ZoFAZh=BT=%KGVvj*H6cDpk(PrskNwYn0}y*rlKCZKgNn$LRj$ z?Err>e!itpA+gQ!f*j_lo+2)#IJBe87ti~7bH3$P%)!3rR51TrO$N@tqwTGT zx6@_zMAqZylav1?w`nsCRH)Wg%W$7X@?%QrW_AahGf7I_iMa_C^{6M;yqVzpJ37`6 z`%>KZ9ss8)G`P z^CumC2=M9V4s<}J@}RMgdN=rI{Wh5y>Va#)(yo;U5D&fIrTvb}1)WDTZ$%3GK4L4O}Z*RVVEyG8ouC*_xI+=VYX5*7rM?bE4zaJek%Vp zt1k5S{o`zkpNX?z>ruR<7+4Gxj>s1`k=$-Z(MJ{{QkE$r(y~)FS%4x zZMoKR5-9MIV=Oli;he$2imcZJu+BXFOdS1$j#LIep_l*~E1%nJP{h3))At0aQ6i9x z+rJ2WKs1-&b6U>4jKp;g_aP+ zz<*^&gpGVVSpGP=W41Ww9;R$*C)6HLT<*KI8>-zN@r_}wKm1j*u=RE} zytCV!S&O_^*iC(A_3b|B4~p%N-t!BLTbDe`Zx{oy_m*D1$p2`A*C+oY;~q|?==Bv? z&rfCDl+PiaWazrAT||9yyc3_GwC}y|gh`E{K$UXD&srxMKHxqHr)&JrO7uZvQ$06I6%rw0kMky{a?FQ0+gaXM zC4iU~qxrN10Ysndh}emKgQTmf19THWE=#&YIe7BGgo4AeF;p)w(5<00=ZAYQe-r-Av&t2J7BhSlhY?y%}#VSw`_Zt8Z< zAu41Ed~-G-puVhC_Sf<&=Ds(niXKFtUwxuP%dr~guF+zdW*-fFG(Q#(#h|XPK3us> ztrHf`HKdhq?goXphMReqKR(iRx$*Wd77QK=au${0!kaCbqI#{p(CkEyv3DDUQ_G7y z$yo+DK#6sh??GWbK?X4fi;q$$8Klq0@`hscH$r1ff9;AoJSUE}Kd-H9kjSi1T`np zlaIfU7r#Aw$^HrnUhPqC_KPP&UW#4W=PdM%yuWR1Q9=E{+}7X|?jOdL6kevacR+ZY zXFs29H}tMqnSWW11#`Upt|o8sRiHLvaUW-L;jUfdeLH;Yh=Ipb}$Oi|-Y;K-J{+k#_)X4sgI+E_2 zO_R({&^~GU?4Wu(O#Fh1GefAS(h5BQ`}4UMOlH_#E}}W3v#0(E>T`E>NB(|hK+eGw z9UaVFr!=hkla4-cx%5+o=gFw64U`x*V2(vOqaypPJ?a^z*|(Bq2H|jC&G}DQ&zXYV z<39iMk2VmdWZ&}Oe%wj>&_K-D#mP7du0lOJ;^nQ7Yg`bLSlFF{`Pe_#{vPwV%Z8-< zIW@Ka5UXNM z&}guvg#?wPvb_n5sIxB(&6Rn8`O5Lor)=bj`~h2XE@M8Y(psgV`8e)d%Kp8+`#=Yj z(K7rd>JT^2sMNP&UFRDgG4Mh^Z0U`#Zz=>fObPcJ6>1`$2yVWYs?!f^PHRY0?FOOo z(Khm3?l9!tms)r2HxHj?18@Y7{9r6iEoh`T9 zVab}QHJg(=!N2|Hx09$}B?br6zv(i8zF&St@I3los{6h-f5coPXRWL8J>2gsE~&Is z?gxpK)%Kd}1|j4P)w0`X7~cEq4>paCK~&h*e@@7Yf|4A+J;uCnPDGC9!w;C#N$8@! zLp@g9|ING5t*E=#1tyJt;KFWJ;p;vjE?l(W*ItYJWh$nvy@A6l=one9GVzQF)rmrn zWFB>c%aT0lhuREK)bn&!+SUQ%ZZ2jS`E;c+1XHjYc_L0iN`afONxuC%~U9(5B5Asj{O`nEAFxs(9qY3SGs zWych@?KbI!>GQ7Jn7BVF#yp(!5_zynS7w{sRm9O@`LAv89#=<6E(=w_y$%1HO_luU zBMXL%c|B%<>bL$^)^k0uCh)qUWf=o(KN&g>hdh_(sBVJw0={(FOGYl?@zrHSTBzI3%fn-f=>`(xBnfXTHGhyyLkE4 zMNKlC3|=QMqdH4UfXd7?=S6o)efPvh%=WROD|fA{o7?- z_<6)vtOKjYq;bF0Zf)N2*2~DFV{3m0qK_VZH$f{=um=_lT4CVP zA8PIPVZRC+tZhP{AYkY6oRrBvsQ8q9MZ;+jl)dNneOcQJ7q@NHw?}>yQ>V0(g?QU* z@5tZ1DVTE?1SthA)MbgWs;5!kV2oezmZD;wP-!5Ag8XPSQdhO-GVZ|y%icY=4E;Id zka_>jxbNKcZEsg}2kd9a+%m92{yMue<@Qr5D6LJfls->^@pCgmBc)`R?dcad>q>@; z{#Wt@PU0TnS?)Xaax%~sd+hWT@%K3-NLwkEQ6OZI_pIC#USsw0f{8;D=@zw8u z)|g)}Rw{IX_%+tU49s7N-Ves+8h=#=dSAww6Nu+yPpfq&^zeFkq-ECU(oC z9!QkpsS10rV61FwV)`o%5aT~?GMK@=(Bik!x_X14+xt9B(q#-n--;xiv7Cg`&pAOG z!~Vc{_L3QmHS@qKs#YyTykR&}$|s7vH}s<<^Lj4k>`GOH?0ry|Es%Jyav2XUZ`rM@ z6M;D$-kwW2c^uI1|KL=e!iMo=?Jr+-SeVoO(D1;z2fh-1B);6jfPmA6VWOB{wc4?` zxQmFtH>9BX@E0~2HV*ogKqLtc@70@?x=sZC3j5o_y##oee(qmVHsSy4;>@F=?!z`t zR456pmh4$VDUn2$kR%kzRw!jjNKYh6i^>*RB9!GJOG-Q`3O9-*gIR6Em>F9nWGB3L z=e+;C@9C6tI&%8W{O0$)@9X+p7Yxqr)fB2Ag2&w3i8ulgC;`g?e)u`wr+baVZxr*- zMBfYhev!dqs4eWpEcQ9@M`Q`(Jo9s{H>D8$@09fc3RzWMaP2$IT#bVPhkcauALF@U zdT6(O3C>H=vTX49>?vK#L?7lzc5)Z;z;YLgPrbu= zQflwl!(%va%%qPJvQW>7U18b_7b3rvQz*F{$AWVPkza~5m>?IqaZ%Bp0nT0S7Mt|x zaA0{m&$43�+`~^WeF|cr`d9_ob8*(tk+w0hu~#d=jqr(RGzmpk8td+=_}uhDxf*|1uTOT=>p`QNcc&cFQ( z=RK;$7&u4sf84=xF2Ft+iA^E?Zm3(*_!N54XZU2QdnxJ!&Xt~<74k4w9dE~xtF)U1 zjR`yvN~B)M0MpX2fgWgMG@h;Yp~K`b;q<5v&b1{M+==Tu;EI~ij$-|Gcp1BIt_XQS z=A~jb6S<`{%P`ZkIDZx>ax{ELt~O@BO~P|d%(YhEH?zU}vGZ2swDe9Ea29PH|JjWB z0+D1P8Sx?L_4Kj{Jn$1vYd(6Y?KTc+-LfJ7ZJq)nWsV&WZDt_F#n>~Ny$G{=TvQcv zII8>B4_tnTzPJ7A$os(GuWYmA?hxVL+V#m7GInaL>|XF0Uls}kVz~Rvc+?_fBXBdA8>Cd{;D7SEgt8{A5Tx; zKZxg(ru`#oNgsH>T4Uvhd@SX^brjZF#oxn%RE-TY!o0_96$E4Fb&SakBOr>PO)-=|}A zB_1NYtk&Nya*zmwptg%^lW-3Ct~_9@fxhqB91(&i<{D~cHYOgx=Rvk^P*D&KCQH`q zJT&iuqw|f88f5ezJWLZE*05mbfP?IO7tY1839BqDF)ykb{<=#B`xz?vio-F-(2`go z;ntv_~@&0%y2*6KbH8oq7Qy6NI4k)WCPchm-@v77PyqSFGyZwLSgb< zP1kq^WUJiCS2LwUlr6n`;&=xRWXLln3EnOL z2;_f({S(o`CFe0OdMdluYCIKlEV*Zo$z#6%W9-PIvKT7BwpSnSwu4$m z>YpmuZVg>o)}}aVTK8g90|LW&kw@6 z)8EHWL=M5dn_HOne8Z4=Is5B6^o?E;uNGg!b2v0PJtMWIADp+oS$4sF!M=sN*HQv` zLzM}lE9xR;CUcjcG_YaR`>@fPk1W_`zVp&H5B&Ka)!#yC3{ZS!c`B`$4vx)5FAcFb zp>4GsO*6X<)Wl7dO{kayl{lYq3-`Wt2i1SCY9hmP-pZ$OK6viPI~m#?M_ux$Z@GCm z85VwOJ#oRjQrb9hh+V53AOVHmJ_pm4x$3BW{yYiz{0$31eQ}+3^BokKqza92H z#sI_7m4DmuejeLnz4#D$gd>rnQOzc8K>K-2gf|)c6^@X*S;#{+-7PYI(ni#mUrK659XeRM&+C;01;Q$>*SM!ppt|?RL`O3ftanPa@`$zJp5V5Sdlmg(qkmwpTuN3x z?t_7D?iKE>IA^V%^_BP+IcP3f;e@m3?>lBM&Z2*$|9cnApe}i+SYrx0+7GT)c|}>c~QUl(i2hVIYT)+v={XcV(#t5 z=Y3#vzog3>dpl&pYqN}TzUtI*CEG{y@xXX7;{;1+~(n&I7j9?HxpBsG3BnSwmWvMPYg|4S z&!%F&?66A^S)&UUxyn72k$213 zrGvUv#+mhHSCBj6qAd26?HB-;#LY?W@;^Z1+bf-(*P}q^8Ebu4F%BYnq0A`s70lli z3{QRUhmZF^YBs0iIhY6}aY|{EUKILHuOP(wov&a2j{?HxVF0aA*34dcd!{ zBrji0A%XUr4YGV$WH_f3*^!F#rSKW5M!f;{Uu^!Or-!`vJ-U1BMhQCj-QJ(7KaKq| zwnP3F*n4x?$$f(m8~Ycd6+gDtW6s;ZOzmK5KWs3#L+(F=b0n+$+@I+_D0H2QxN)Hm z2)}k{$l&$*AJ24!sI$Sj*y?Tv2zu*4Ykl10& z;VUHd^>E>wKPGHHl8O9lgt&rJ6$$z#eZGn-lfY0ev3wNwB>gg#FUj3Rpqp6lC2QgQ zyf{;eXAgAV{pYD3&KZWKjK@+56yOxr_5G!T`j@w9<$z>6P%iS$=$^p+F-*9N^^y*! z*^k@Cub`ert>hlq#s*)73mh((N7uVuI{$p@0Em%_r+tva+D8tLdxrB!Wpu%PVf4N0 zjI@*C2wp#>y*3^7tLk6lw;Frd5F)8xqlLXDw3m(UFdiVp3_+dBm0`+roN=I?`8sY zw$Bc!yEQ=?kL-ssgC?jBH{M%q-UR35KXYxQHo=k;t2SmCu#G`PItR8li-q(RX zSrA{u_Y$0!GjmL4aej6Ys!*TXf?R2l!ndU`JeSHXPTwp+{rRBehyTJc&+c{0pcwlK z!Xr0`Uco&x0aQ zxMy5Fu8jZRNb`KoyOFK%nfXR4J+l>>*3zF#^AbU!_IcdZEab5gPQ(OYPFBY8oNd@J z89WBridN{Wh}BbkZ{X|tIV<446sZ%^osueC(|h5S&AdRHR3BUu?eA^ef?N?Tt0vre z7#{Go_)iVub6fuHr#|EvHe71H9gO}_^jO-bw;q_|?|7%r%ZJyyuBwF(_XSW>sQQd~ zuAfyS)00sws7)Odw#NMa*3yQVHM$ItajW)95ut<6a(!r^E^-?KcOOS%--u5g>*A|v z3JA*`X5=TK9##71hvjP$KtW2Ld zzCPUv8Ew&u~lJ#t6%Fu&2Pe#A|k0vuXj zU5};XJTjgvv>(5ZC->8%y++1RVo4%iBQk#zqqdLI+4h=WJ6Tb^*Kn@U)Tv16B;> zRoAhZ@I*#M&%~Au^|Qh!fK({Jz1@LRg;aN$3H+rm zm!4w(K|@Z>`0;X_H)`9QpuGc3ty1@;UT*`wf$K>cQB-KvofwGI#OwSMlu>Vm`N#+5 z56|E}@4e8-C;gTLVwRUOZ+4TQ+CNCC%8!g(-+?pdkOvrklKSqkJry!9{^(X_(V*n6 z`XSLp+@n7XM9Jr(fAfM;Wa`C$!h>e|bLK1%HH%Ct$Il_bS9AMf1omDvXetK2{0S}2 z`{QY->?>FQbwC@C8rTpvxX3v4m#vNVYSlM1kUO^uuSU69k0Q=fziJL>lk^B6l z_`;)(47J3T?9Iq;7di9wPGJs5W!FlS+yn{2LkmVCbjh$UX=87`HU)Xp>oFf{C?I+) zdgPoR74B(H(+#iEAWwOP9R8^d#30k=5$5<`%c*-i8+AcZmgaeRayQhmguVVdik!uW z@p8*I7Ch47H1f8``S&HqKs@R>*#&EKEYUZslwT;$!E>0hiY`6z44-31twkqI(H{&R zdvOZ;W4+fmR@_;K{gX)-6rGn@;GwQEv?&++81Gp(@JjST$nKJY`|{n$^A$3M+&i%! z^7nbeo6)J;5)e&kuMeL zYYJA!oKeAHj`(`?kE#c{hBLA6xTf@!doTwb3jId&IIMcWTbIxDUpybgw2quE8p1xA z>l0P&UW2eQ(x#U<{R6bcPPCtRf{f6>9gAM!DaZ&jOl&~?IYCP!y#%kD=d-k?Q#}j}D|8(>ea>4kLhi=0>yJG9~NYyR4=WE0!CEckLe58&oZUoa3G=$^SZCI)_Nk>eSF_{LF7dH z&)CHz(R;wT$~OMZMkZu4g#1%6hkhzLG4!RvAVjEnaLNY`L(pi|rBsIxokDZV$5&k=jYOckgJmq&A%#-oSk)%;0=6Ux!euBI5CAe7Y>u}F{nSQ zY|vOehF;5p&)^XmBVvkvcc6B5X+1yo zU>Pb%T(Y4;^r4cNW^v@|I9w85OcUwHFe2~Ecn;%hxLOv%&!vO|F{*0eQm$K_hzIG zL!a`Fjmuj`!O+!ak1_IBC-%*RPW?fyPFgBl1LqA&Ia5b42Ys}!doQlG!q?%6;FLxy z`k~@!b`FA=yRxiLDdJ&6(a3@Ls1Hn-l6wnMW+{cYQN)kui*b(M5P@9X$7g*B{U#L^rF|o*hXX%=a zqAnQ`5;>vc%Yu~st1{Q(zHrK7(~KPM3D#w=Ap!UN^M)Y~d3gTnOeD)jWZ-k{TV?lx zJ)Xlpckahe_Q9H(CHu;=ct4*SB8A~TIB2#eG&K}^Rq6<4vzo}8sSjS2e}?(rxZQ>- zK!-UOmk(vTIw2wMRpu6sc32pm=kq^JgP4l5b5{6%I#mX5`>cch$&u0YNj#7L-E~=; z2l=F-deiLlW@PxMr)VYK75il*C%4+6F3|tB-%#6u0{o-uSHhkfsVE)Sdkk^Nnsvao)p}_n6Bnv*vK0NJ;IHmHS<^22S|0|@oG+Vvq?_YD`FCF}P2mcWV$NvDZ6hHL< literal 0 HcmV?d00001 diff --git a/content/results/validation_curve_roc_auc.npz b/content/results/validation_curve_roc_auc.npz new file mode 100644 index 0000000000000000000000000000000000000000..972b0eb441a431be1d70aae2c4a773532292dc2d GIT binary patch literal 24276 zcmbWfd0b6z_Xb>|gbIm_MJi*)k_@X-h|ol2NMw!%$ybGxAwwEvNGKVSLMRzCld(c+ z(0t~-DT-8juYEe_>GS^H|K6VSe7AjuJ>B=c*0rv6tz|b?spDvcw!gs&hnxdWw#xoA z6nZK6uG#AAx^=au&zh}9TfBBCDs)#cU(xnu`8Im~!g+Q}6czjw0u0^PY+dbRXr^m8 zb*-u4C|$#~o<6=lZd+VEecab@?>qTyTZ6Z^u6OfVgWX{h#*We*H+JmsQMx;I|9}2< zsea0z8GD~cx`UUG9o>gdgA*lJH(uwH+n?jzH1`T8{PnhBX}%Mym-_hX<^K(Gd!wD+;XD( z96niYsv0oFUO-D;CgLR|ySxn#Qgy}Dth*5}3F(*E;Kk7UcibOC2A+y|v83Q+mHAkQ zrMHg#w0P55>U+F3<=$+T2D&}7xU#5yJEwcsl8H_%EgX4shp#nDTkDUMf5g0qc(iub zXiJt>&Rp8(mI+Jqj}OO-rDHSqWrgGUbn}!rDPkTp)N_9fne=#N+~XBPztb1s3K_B( z<&|%j#gL_JJN1IS8;j?6d(`m>yOvdx$?($Idn&7G>#B>H6j(#0&f+=ymh-4-heP`T4hVWrk zXc9xBS1<3($2gbhJglqh#?shl-kJKCSGxyl`m7kq(%$v@fr}@z^fd6Im4sMwss3_p zHSTkpUZ+DhC$U7TQOP-&4~q5OD@-sSM(l7+9jV3ANZB?$&ePx@_A57jY?kz6NpQPT znLmuB+G{`9$N?+`$Fk;->^8@hL~Md;B$)PfItNNWQNU(37Ym+hcKm z^Dn&VxAcRM2CV9rZ6FfS!XGVvZoLyze6N{oL#l+X53P!?A>7}uuHk(TF*N#=kApUz zYsQ)Wk-DE5(&s+@%Fv(_n^bs34CSt{3{YhlTKXYuc7%wbzq(#H1Vg>&O&)%vr2TjI zh4Vc9^BGz;Wo&`^TZZgtR^!ggMO;@|d=u_{+~~>e$)IG041zEEl?HIbEosXP)kp zn`iQfH`Lp0eosC%+3ZO@+e<(hJEGj;426`vgW3dsIb8sOZ1f31P=2H1bSNYGty70!MqC z^ZQp#D=wNYzi3)RxyPec-!HBqjYloxI!Ey6MNO;7D2h+VUY#*u_yRik;_t`BIwFdF zw#jzdav;DgA!G4I^0U zk{W+ zU}biOC2zrC!$mEyhm$*vxdc1y;yLowoXYln>Cay3JihQ?eBtmK zQrQrpyuqu6%IZd!)iJnlU9~*jFdpH*YI*1Qlw{E_$)ZR=+gBO-tvVp0ZQND`e(6nS z;2VsO^f<|~t&kyQ?s(lGcFL^n5T1X5d_g zQn=%8gdbb_ux9BlhPY2d@chHFHmNq^eiHSvnh*TqleU3lr}I$)T5;E=TZfTCx^;2K z%i(4s8rS*i02g;LjpwE5SBFSQdC##KhjBiM{hdMk#0(8z7XG9j*ROg=XEp1>((=vE z|D^}8^mo_B`@YdEjsA3sH}Dcmhptx$kEOHp%VW{bv<#LmJLimlc7vs!z34^BZI+f= zP8WQ=%F>1*9{>4YVCnXxKTq7Ru(bcR)8orYEd5*ayV@-Q&nej6CO(Lzjq`pUb@XM) z_F|Wr`r9!6XH!xOU0D)kUQ^;9Vd>%6lOwc`v9kMODH^kjw_ZSv7N7oDd=k*_m%8RI z)KbPhziWsOHgI0Cib&x z`8t-WzXo81uypOK{o#m5EcsfWc~J0#rNsfy76m+G$?9!X*osV+#!DR&u5+N@v1_MT zx)7k0^9M9^0z5`n-C{(48TxttZ}LXpLFf>}K4Dm+jfy(vw(++ZI?a31q1BY#-6sn5zftKt7_B5dL3-qak?ft^Tp&aG`} zlF<7IwOfCxC8T@uiftw4+ZVQ@m&PGXvx^*Z!cw3yaE&40JfL_#0moIIfXKtjJh z1xzP4z;Bce7YZtx+m?gY_F-)7mr?^YBy6L7kb)SyHbN z->L*|R<3?(u)>@ran+-_nZRA!sz&?F2CnE@HBdQqEbv5SKz8B~;IGO?hYzrGM=DmI z$e^k_--UFO+pi=eGM%b5yA51eMXE58?k~VzxP3X8f;mbep5JXkQvsu z^aGaq7iE_A!tt$E1P@JDXQ^YNX>}d$Gf^c+=@b0G@wI)@C9va~Y{Ry-{Bh3j4^x zqd1n{55B1Ak;u0B2iVz99^3(0>f~?w(+u{>YGBKg)%$@fvmXP|vGiYTtGn|ujQ2yG zvl+mdGo%2DC4jc#%q}bq81?$Ub&aq`6|tw(Fpixwxg#?4MoJ7;z3fk7@I}uv2nBEZ<7~CUCUWU&F3T{W9j^SHOUSr80l|g=stP zYlh%`{Z?H9?&lo&Uf5%}(}%D>>$wvfFr+9AuYpUs`}i)QNbW>J38hOf5OEauJ)VT# za*i2z^1#`eJsbWADeJ=LFMF4YC?!=j^H!0F!tab}{2eN$-!Dq--907b^=aMjx|zUz z<&%t$Dzj9_Uz$8*H~gSjyzS&&mW(8=_lx0oyd($9n_plY7(K77T$W}_luiW04=GD_ z__}2y9uSvRC*5Z0wa8{{IQ;A_;n|6kFb`OPVov_i_T&2S5-i5Rzb)a#uMJ(pl6}p# zLZyW)t*W-yH<-ziX=Tf3Kg1pXD&tOk9fNqXd~V=J%y;?Lpggt5X84CG)o)5tX0Y_7 zQp?VDG)wP>PVDz2x`u9^?XtR*t)UAl9d;V{^2m8ZN#A|L`Q%T69L0_Tx-_}?{!|6v z=D(LWvb{vaC|RExuvtv|%jl!SPGav zq-q$};kaId^n-9-J2Q8)NrZX4N_8k7_DkV*`Wu3OwY~hK?_wjCy6d0Wy+jlEGv~+q zF0fDi%u0fXBJPNJI`@>?Gls4_zp_Occ&W?1S&@CtU>vSj-y8-!FyFDzr~-JZ)9_DT zreO@NJksK`aV`85ha(R%bQDpLyR$&Ht^{Odt5j$;P)OV=9`Qs-((ouuLf5jUeTT`KkypC+!0xdkuF5sr)>RJu8aB8S0DU9*Cnqkd$Y1Qj-egv8W;FI zXJ`hZEq&a#G|W53ki9I98alh)q8xs)v9JEFnYd4G)i3a4sCc>0`5%S?GMl|{g8ok2 z?}+opIzI(?jJz*pS~3?2A1&9!qT z;vceUsqf6vE?Jf2rKCVA^h+PrJ z`DV;@#Z?&Jz#V-x55W&Djr-*@q9032GJ8B%wZHC6%$Lp%&nA{;V;moze7-*d{wHm6 z{5$wZ!|(Hkj+hUAqwm8Wov^N-F3CILHWc&ipMlpetoO5IXf^kbScar#1lPrd z2b#q)91on$nZ?EinO(w`n`QBT5XZ>k18MvLJ1R9d@b6rh@GOm?F`OL)9_b~ulkM?< zG|q5l+OA`%>^Spe-?L{(I*oyQq;~QuLxL5ii{B{lY54W60hPP?^leyvzVa)ffp7v^)*llXDRm4 zz#f6HUqcg?_@Dg<{~3O=_CYE5v&b7m`jxO`omdm~?gQ{gdYH%ocFgqo*Tbt2H)u5~ z)?bN0+@!wtW$!(h_Y?oAn!rw_y1qzLgMHd}=%{0j0r3BwzZSEABd+Eh_1gryF2|!K ztXYW|_Py7@OF8p^+l&ex7V3RxXx=H4l#SrcD);6U{zDw*(x~daXcwQJd>dzc{WqWT ztotnYjTBHWx3w2i|H3I3mHk9?WzU1v8_$U;@vzZdL8XKSIVuIb{Dk#$`qy zuQuT38baYt`>k9E{*ooBZTJhE-(h$`@NhHL$sp7QBxP)x<|KmjG~?m5))n2|;Hzb~By^vZT{FgjOLg>-gGfP0!;j zO+0aXisEVDk%I<@pCquP=XlI!*(r=?Kp?5xO9A_HE@u|XUzd*_@B1p(?=58pEhB5r~`QD?&t42z;4d{AR1Ni z3+pd7%B>vuI^e(eIZCicc58x%|FQrdxuueEpTg1@_lE8-ED`5#Ui8^yD*Snj|Jc1c zES<^RGQ6)cOQ$45Qo@0se)PP@q#@o&v38#R8vdts)At}jFhdKY4m?#^$B@z$rI#NT z!k*n+eb^lK%VLX4y44=o5s$X-*&Q@&joplOzQefo>iTL@s9dw{W=b{b)tfn7)2pG! z(-ccIvukMDIFE;$BY7lv`&zFql20ioZXL2o5l~9PwSIrE2x-VL--6?BMO4zg&*~TZ z#l#(LXt|iC4S4r7cdCT^l2*(T!JZp^I1q0Lo`4z5{XjzV4Ugz-J};r)iZ6YiCrIcd zci0mW3jZXY{op9#oP?|1I$jdGbu#wH!xa*8j2Uk|+z~vL?6}|FN1Vu<06Uqq&eGpN zLMFc!&TuuBP?jq1$L-Y;$~VdBGv81`Hn*a!rr1cR8p<;vTPysoFdymzs z7m()3O{$|Kg(N<&@$K| za=>rP@Xb)(vFm1-Ut@%anO~Tn_eC$C+)H68T+BaJhxPqT{N2v{emmacOZwm4gmtx0 zvai#k=fIbeoeMfYLEIz_pkU8LHv@K@XDMIM!j1~V`r}1@j$X-9S=F>}Rnu5Xs8l=E z19oV*EUuErPx3g+PKL{7SI?WX+Y0k$&T-?6GvHyR{3xFecm3tX+a@3moj=qOQWOWj zn}l?ZqgPeL1Q0UXxW9z9sE<=y3BUGG#+zO~@A}_cJJ?MrvC2|9qK1yiz=@+;U#Hac2>)d%gnu zzxW_&tWTyV@EeC>oUjh$TO;HC4co`DRCN8(VGHYa92BEAq%rzs4gHR9@z2)g5k0Lg z&&}k~)AOF2`JsFgId%By0lW3%#jn1l@j|+>DI{{;DG^0jd6V&TouQ?Bbk?>l<)$`pcDw7fz+Auepr) zLH>TjWe3sY?(O>rUL~G>1pE;S1r4}=+Au>mhRcq9cpy1q49VASS@*9L(Yn~ zZf9Ea=zYIDm9b(TCA0Byd%(lbt~eI#WGp1_dl_Ya4vFY*N-0$m}Q9EN%_nMUK`(P#&6?S3;1m~ z27Xu?-}wk>85f7G5w-E9tHm<@Q_{whc9h{798VhefuE7`s9$h?(7U5yU!}b0H{=I6 zKnMIJjoV%`ZE@SZc0P9@_lNtF#&P=@;^5Sct+IF%c9*-1nkm4O9Pk2OEay(3#?Vjh zJgpKs#Njn<*>UkbDIT09!)aUayDoa$n-f#0(c}%p9~fd zf`5EbKwq``2rAwRsc7!{F$oVu^c<`0Q?8hF-rq;+1^gYN0s;6mY5E0xT-}Da8MaEm zA?Yu^Y}I4w{4*}I!BUIiy~+--H$7Axc6K*yfB#eNn5k=rf#;AWLRh+VnoHaON6GRr zbTZUaBap|o%Fd8E)R_~Q#?x@#&^Ryoj5zH{)dDtWqxjs z%-{CiHe$~X6P7}3{nh?C!oHd&I%dOOXH4*^RRLZYKgKeA0dT>w@w~#X*nea_-e0&F z`GU3TmP2qoL3@nCeMf_bJzJ-5-v@lw`xU)z;(Ci^cyY7(;lm9tfD>gr=BRF4T~1;C zt8_T9v*`o!N~0DG$;bTKGrntl7Vz@lp#gpo8Q|G9`Xmj-?0zqw$b$s}>WFN~C{{>%(myeIPhI=1&p-)1m_5Bb@*{W-VP`#c_@^I( zUR*o^UiL}4{@9}>46T^_WBFw8Eb=_*?srFB_BOZg-*vMbHVyV@a`BNjt)Cfs$w|gsJFt)62R@w(4DOnPxUlqf=)3qM z0$M#FO7F#BA%)wY7HdBTpR?>ze)cvIeRaHbY??Lfa_u9f?yX`f>;9@qZ4dClx%bZ9 zM94d&lQp04S_mlRiA=d7wi?}|QI`Dqz3H|;LV3%)*z7Ki z)c;Ncu70VJ_(CI*rF8=juN`<8{#cfGz+^k&ew3xq8^c$x2Ogh!P_iL!S9|^~bV~@8 zfbV4acY2JQ%Tn`OeQVb_ERC8uZd>6(L88;%M`}fCs^^5<# zxj;yBm!u}gK(}!}|K9B>55)h=Pc}R#SW^}NyMLnm>jC)DEkV^~%CmvTfAW~Ip2)x7 z-l3@*Us>-V?(~FP7okI!KdF~uPo*k^|_UVc^y#j=GJ!L^%;dH zC+=m)S?{x(G1irQTr*bYt?O#Q(4Wl(JIeepf80Es4h`bb>$MLihI{ZxWtx9TiV~mZ z_0l;%F@;aJf6Of?O%l+KGkm;+q&XngKHo)5rK^`#=qE|&+LwKX)tI+F{&PR?R)Y>f zTTA&lo}+I6CEY*cxf=}}o}P9V_G<6ae~*wC3v@UW>GBvnnogr*Xg2s)aaro_OxU$M zW?rd?gMBiOI&n;su_>f1hd`o>N`Cu7de9O@_%}^@V$549Aq5`K zba7NI@V43CeJ{IVT`a*~9iFG0ACvQ6Qku;i^O~dEusvF3iWi=@ZA(V7KJ0JVwCB-Y z0($<`f9RYJLK^Hkp68n_qea~AI_%uG~QjxZjfc}IwMePn3kim&rixvb6X~Jb=*Wu8qxMwZM zH3HwB{`zIhaPU^@pLh2yv4fp2i7OZa``TJ&_?pXibf~P~K1>&R+sd}Qjjt?Elv$N| zYy;xnKUEf4hhX1MSKYk(1ka0+#aDyM_Js&)8TwvYa8(FAw!XMN!yEoHzcA9#0QNfQ zOQvS?I)=`E)_PyN1o7;rt)cw6z;B-#Kf29tf4{ZBJK+0n;Jr`Ihn>yAU*>5l%m5za z=#xDyeCojQA}a(GJ=7*@=xYJ#V`GE7!kD`PFf^jJEi0ln-b75?*KDFB)UwxfBn@V0 zU5(+hB|Cs`If4ebr|o+T4SAolAQa;=6ZBASC&YtGKp+7}KU}?W<#%nYUt2ZFKuzEc zDZj&#JKjFg54=F=%{t8??fdfg$AkW{g`IH2w+fq(FX9Luj9(S#BvUa%(>Ov1aYLDu zCqn$l9e4onyOievE}vIWr3IcU=*lBwjlDuT#ibV~ifF)$WIdfK5oM@wnMpDIHMAJN z5PFfJo7s;K@I0k-5BSG}eFnRz>L5OS{K6y>_~?4*A>UZc%Ud%3#dgX&p~?o9E*=d$ zBIfK&&o_(9VV8rlhM&0vdlRswPfV&cOG(lMCZ5B%7_+)@h$m$_qaT_@PiBKB2_E}L z?*;7h{<5En+uwtaJ7VP;jrDXzn`4p~iXGbPSO1TQ!?!<;ZH4{zS@5`1;X~-r&OJ9Y z{EGbSeur7pG5@crwi2=%*(3yba~{1 z3FW#%>e8d|u*DS-oir#6E)tsr!Vj-6o7`tl18{8l(gCVnp*Kl8 zbL~9Ft2yA3x}E{XGho6+$v~Fs{U1%sKwOxf`lF|VI_C3LOFsqJ&tX?%zNP^`u1?`i zU5;@&bH(&rSFA^S%f*?Oas3A`1&w~5g8No;HlJ{Tp^4slJFjA1{nz!tnI-T?#i>`dSW@}q-@DDt| z(~>zmVZTm&DLTH*L`0u&@9Q#WxtP>n#qIwbB%y{bnkIUP59I#H(bnkdJNWyGTeDx! z-2*>w7jtjT>308;mhsJ`%NgL}jE(Q}k3qi?e0Uiz0)C>uN~*zOdJ-?!1)B^seWPVxjl7V>cBmS)4lMkFGoS@YQ^UtMogvRR~;2}@8NXEqi zFLd9RSPh)?-NEvJI0bfDAx3RZ7S`D{cc)kIv!B(KD>V@{>X-qBFS%~K`M&I|&!;N^Jiv6a&!}`f_&zh@>`O@9ktx3 zrLZecAG;{u`3HafU_?@@BA&xXKWh!FPx*GTj1P&E@sb=X`2cpBr_kj74(qdPSH6V@ z;%1*d;sxtqKeS~0`s|Kp)10xcfA#O5uJZtKg_36MaKuZin{zs7VVo5k4YCe>0e>)_ z-!J>PkWSpTGk?hw(#j^0cjQ_TO$)nTlHXZOF0UVJdgqJji2c8i{ov`8^XI6a31z5Z z;uD_*h-Z!RxKChD^+x@k_yjuMCL6m70XJ?tV~hU4?=D~U#S;8^Lm$JCS4!V$y7ai; zNzwJ&g5WOSNo!1mO>KwobVuma;MeIpz0FeoQ3E_O?ZeNhAASOl@6T@dTm^l@)E<-K zbAXrTD@UrNAzwaz_zK-<*tdU`?%gr3Y=!kW1METO$PI<@5}g&C}Eo}EJxIqANnGe@g9=4{8Nd9?r_A#$aWq= zs!#K8*M~{vU?I>|NRKPaXCIXD67Z8!o?^2s@3a#5LHftgB5rFe%b&yVacOPOzOwUZ zK*z$-C%`}ITmlMqQ_61?f{&HvqhxvR0kY#muY5(}uJ_+qG&k)~xAAi$5A_e;7hO+`r-x{H)cK`8iJkQ?-J+&^u z-+lUftix^Keopp*^=j;xcfjTjODhUazvtoe%af1Xu7bUaYVNyk6UN8R{G-8DUVO$=nk;c zE-p)+^f`yT7$*}s!;pJZ*Iuo0(5Z%-ge(LOD|@AUQV6>RG8Rm$kT`WivagWVBa3T= zxL_4Wo9q@*b1g^6i0Kcqt)2WO#ASWe@chGNcywhcm)OO9aA|!TZQ`A95YYImsa~#J~#` z<7V7&;o=ETtw7+io-!UNg@f(mS(3^^h5)CQg)b-uzK~wF67aW-&pG79DN~R)eeQE- z3V7zszk}a)fUb`H&|s7P2K$#Z^1K6pr}pfP{xkzR=ZJ{On=FwB$bWY$elnixm1mPa z!wzKhy4ElXI4Mtifm!*Ic0ZET{~cT+bVmzZ4$L`$`?x*!)SXDgv%&Vm-W*}cV8K<< zAK2TW!F_&R1KwWMKicXC{E*K4XMK+#j{KDVpP|o6@Rr)exyknJerRIFhrJNv(7K)% zbWMTV23oO0*TD{-Exn(P%HSb$Q z_RxX+l6%Nhi+4*uG{oV$u9nU__^Jpx zn$nl%^9vbTUTX0tANHJ;;pM1Oo3z2W?!c0>fana3FJ=^m-el-Yk!y!#xb6`}&wb7& zGIaCngp*@2Un2@t#&&`o_`kS%@t1LL_N+lX@%6^Y<)@%C`&5zkpA|zxHI+T zz_Eo;D-JZpx-!b}8w@-k&wq!Bz2EOS1^iU9Fnk2?gJId`?EUBAXTILBJ_q|{D_jz< z5Bz2FH%#p-^5uydb!ApT$aCs1O23YE8zbW}2itskx^)rkgiDu+hI8O|)*Ecyg?0Qm zWM=k9*iH9?9j0U&vgCL^*0Z=5;=Y7k(|5zp@x#W)dk%#?mE|2=f2`GX@_?Sbei#(J zHRSJpbH%AyJo;;Pr6RYIN5`}*F5iEIIPrnUnjdEcL}!on{_#^t=C&aNTt|p$%To?H zOUT?LUH3|hgl=~Es@VBI=;zgUSl`QP$G>u%SM%+8+8h?VX({&yo>hkXdX~)c(s|6# zJB#Z1J{b&Is}1a?4W2X8`*=;|9_TMrcfN9W#dfM7xBoKe>k{S!I?ltoQQlhTX~vL7 zr_#MQfcwJEM5(I5|Gb%`?lP}8@}%)5A2%ogf9IuK)6&N_eD#Xka|E<)w92HWeFFN~ zeL{kn3Ut8ro(i=mV5f68?Vb!>*1?6#uU<73(^1}&6Lq~MH2u=^^MhVOr>SS(^OXhG z*Nz~|M%>@F?7pu#op;B9>%aGdKe=mfQ(1!X_|MLiSz`u#U}Co=8}pz>y~-?CA9zF- z&o4e6cTS1H`o7N-b}U2wWdAS4x0v_4p7s9R6puKk{+VELPP5f3 zh&yDwgPd28-|si8&qKQ4et-TM9Tx#!OTIlU<1;>qF7swuVI9c$j28+k_8-7}yl&e` zT@k#~X;eA+w;&#>b{zN%JV5W*Ln&Nc$J8wY|BDB%Z1m_BVu0uT-`~y;CO*M@%8NT_ z4SO*8+dszy*n{3;A5CBQ<%i|E)5EhdUw*cH`cKFsb4|CW`6Kyc7WHQLY|P(hmZ=-Q zc?+od4g313rH~vr8N0oRj}hgD3khIU`TmKhSMF?%z-OlJUnmvI z17CO_I$B+acq!z}yDPxa^6MGO!CoKOt5J(Id%{j7d-=XOy-7gE96TN+B*lmK&vw)j zQPH34Q_>fUiMLGYPu(sF*}iodc5XFv+J==E9lye#q`b5~h3hIlnI|4IANHhi%O88h zaq@i*)&9UZDw&y-slm=%G?^|rI*O&Q1D9MifIUf54BSB&&nH0Z$r9v0f#&a3U>#3! z^ly2N`I)`&78mc@9_sTd3HHHq28Z++lEUy=$cG)}Zh)bHd%+v@RlwUy0XE`_@u5?_ zv|yLH(*yUeG%GAL7>_*BU`36M(Dj>a7ksJq@^Y{P`r89EQ#>S}<`5=AoRQa2q4cT5bnkQKlEM%5BW%F#TFH`jbsRc|&&vHQdg9;ARScm6K7Juiwh)S{UwkN@?yw;I8JWM(oQ- zmL3GuE>4ea-%kCvF{BhY?86fq`vQOPuSzQlCm;@5B-2@c-dt(B3D4=#Sg(I|nD@;x zJa6{k10>tnZ;G0ByASy4F*=n&upjdGv!%K#o{XR4lP@Q{LY>MV*a-2tq@moydeCPl+Obmlk$JCLsC80Wm((=`^>S2ch|#ibJqo4lv87kGe-R& zN8^ox4o}K20^dWfwR!~bo%DPRsY?brDxnVG@5>(zYi)%z=Xm(jB*Y6(&Z_Pm_c5IH*k%W8~ELjRJSd_6OSc| zH&xz3A9dht=Z{6ugANXVJhK$|*D*V`@-y^Qx)np~KO&xx5+7A0|`)hl;OM5mI5!ZAHUn&`0mdoo`{(;dBgRAq>%_crV0_Ri4DPOD!w`(Ji@XBia$9(sPKGbdYhU?22ikJ- z8~lcjfui$;!{9Y8>{TytMxDc#3dhpVHOTY$o!Mm1qjd&mLzQcIba(&nk3l#2baC7O z^M0uUa%QeQC`4ZWVygZwcM~y9U*e>b8!Ms5x?Uf4x-wKSr?Yu@A@aQ*yk%cuZ&w<) zl^(#l`lve$_zedjjq9Vz4ePjC!;NU_EU5r9C&eBTF~_)c>mK2S4|)#asfv z^;Y)1ena+77>eJUT>REOe>mbvn^{}kFrRsbV^p6X7EzpC59E7M^A%QS4XuQSwLmbZ|EbQWL7{PEd8;`15s zDFyy7yUf@EKl5Ts)csZMc@sGfDfn>BX9VKcWrla3wPHRF=z4vr2ly;Gev#t{`FWP* z9*TL7cz))<@dIWf4*4e2vv?J~@tulzAYbSKq5@C z!MNpUnY(gz!2b5zyA6O{YYior*i!H_xli>wVBQ{YS)DWXIP@hd>;5@JB0n>tqo->i z^gqU(4tLxzAV7Tq?Kg@F9sHtrIG_ldgo zNV`H$L^tM+(0S1!qERi51rJ|}sh{%6)#7CmdNpI2BjrIK`S|$)<-v%rS6-TReQEpn z#Pt5B+&>h&wQM{@oh%Oyk76iV_Wl&ZyL-Fs0&gK3$NBEPFO6CPJ;bWoaY`->{X09) zNe}*K_m6Ae4s=9Zp{y`Y2lu&tcznW0=#&!-95V~6z#q=hidRQGt~24>=Xt!r|zN_ipFze2@#ViVT0lu!acDXlY-kd*Gib+ieL z;OC@*AmBn-9RjCP><=7~4K?2#_^GzMG4O$GUn=*z44r~ZU+DYr?kpGhA!!*la0ZGc zcJ~Q^f8xpO}x>&}r!M^)0ZiLe7QJ_<$VI3xJx1?D&ebS}~aGH@p+i^9C0c6vwr z9IT5mVAWzzVSZmQ4lZax{n5qT6H2Hv^KN)yCwj%Ff1%_2D>g#6!IcHVo=vy96LtW6 zRzTwGM6b)xeSeA!_l`%M-l5AVIc&#o@BMC}BLGX2Qcf}J!Dn?GJ$>?PT<`Uv3FpcX z$8VSEkmdU3NK_FX+X+21qv5&(_(`s3K09M$aP1=0pK<9pY&%36ss+tNy-#7??kw0@ zc^!W|C%0AxPbTXV;BNVT({nY{lfD>WdKY$keeG)-Kk!`gyxT-)qw&Km5eMzjx5+vt zpl{W2OFTLYY5ef7maDfRzq0bE&FkJGDrcI!)JofN<=s(AQ!g)-P;J<-vBFOh`cNNv ztZoA4ty}zr@_mTQ@$S@D(EGW+IOY#~dskM^5w~nyXLIc0p#DhWw1cdLX@OoSsr7pCpBcPaX1BujO>$Yb7cnOFn8$s=jQ1wP&6!J0I_VX!L##7mg z4<1dNFQ7idcMSjdTkt*53wTY_ z@xej(Q_HHe27O^y=E?YDd0sPmcfRl}t|zi4xmgYCTfN2@&=dSSvdGgcfWHG5ruYJX z?5+ChG7-E_P+Uk{wT`}@+N=lL|G!6bQ<3dd)CJFat5KES)+m19?m$U$TOkEh4QG&8Q7?c$4)|J6~;b&O(Jy@FgnS=pltsKWMo8=s%n z{?B4y$0aSiYbT*&knOv4^cQUpg|1sxk8@GmA?Y^8SI&1@&RJEGun=}=g;{JF?ACpM zUXv@Hi;qlq`7iL_sQY8vc}jU5#IKFhlZvJRmwM#f?~JW<8Yfi~57nd2zpF{=F4m%s zP^uf}(aDcogAhL1NpHD>5wTlW~y~ zwn_+1O3?WOJR&DcH(^K{T|iQR*U;4HVw1ia~w6Jk1u)p+^5n1*xmTjTaKz@NYH_@mW`Z}#l@BNd$LudEpwD&6H1v@&a96g&NqP1LE$v^Z3GX8W$4|%hH zql?WHft%%g?{dqdV_l)kSSlN*^%eVSD-&39{qTIf;x*JG<=5wArLZ*Qi(0eR75L@6 z4e5ukLcdb{+OHY@Sw7zVI;_g-kM$NL8|TH7jXYSyn{xhHK29#%Ez>_^z03&p8*&iw z;Dy8eJT#$~_;_Zmnw~bs@yNx!7W!DvQL9=F2P5A1%Q$(-82r({5xWLx*U;Q62J_+; z){r>6cI(2{8rpq8@MdNek8rVP8lh_#Uca(41qx~GuS4tXYelq=E7!vMo#4oKuY>)S z>+xqfJ~`JDIQi|6>3f`fz*li;XV{_B%2EA4?Puvk%Q)xNVW<-Ia!^!-`aPVM}H_4_L4 zZqBej$lJ*D{0kp$^j87i80BjGAY>OG_QPX>5#ogNsa;+A^P$6XDKuvg|8LOJPrrn| zF-f~k@RCsc<1ToCHxQmx6Cxgc*WI^v3H-jizmdE@#V%Q&6nP(&-_|Pm&9J{K4=PSE z_e7oBTkeO@OBXe2wpv23<))1n?9id)r)hO?nX>*U^8QQmzA5s3xsLdeVCAW^z~|Do zEj;9vM~+*mB4B8o>^Qx$KP+?j1$%0*Iy|Fi4Lw>^u~e|BhE}goP0{1k(B6%%<0fC` zQETAHh1Jjt_l^$1J3@M!{4?R{X%Xo@4ruD$Q$i+a1`>xvO1hcl4IVrZ@_0TQ=$@!VP7TjhV-HR^MRsk=khTB8Cl(PEdSjV7uYL# zy>n7^@$6Z1Sd#12f~&5fb3uC_f;kmm{ZI5lp0a#a!f@cLfHKYC>%SSQt?WBec?{N1 z#U>{e;G0?l#TGLKA)W8F|GDFAA%$sQGh{P_6j!hMInrE2#}0Qp)SV}y{L;m{=bsW& z?uKoLbk|9!Aa2ssu>-(Y$#9M7lP89Q;fLz&{#p!1oZmt4Lb!A-OZt!hOkA@E`jrQZ znuCtE^K}=e@9tuD7(CD5TA;kmrOArL$Su zDXX)seyIC>YB%tWA+7gK{^ERHJf_|$!FX!8Myez3itlkfH3+=>lSxElJQ(r`F7gE1?m31d_g%p}(sIY%F^n+>ox2$l#{H)d5 z-e<%#|5Xp~(!Ibf`LBeBG{us8C0DeKDJaW>#Zx>s(mY0_=)L-D3eNAp56`lNhoHQVz2|C=}Ems}gP z2J^U6iP~2e{sr(8zSCCp&_bPhuKx8d z7~fCQLPqq>*%v9;_th8i^}&j3B}ZuU^Pp=!A7Gs|%OwJ4Sf#-M0Yt?X1Jx717{JR$H9vyS@YLdD)fmo4NvD z$og^o?zDsNhVj@ToBwTf*u1trP-^@(9S-z4Qr!#gcZjqvg|JPB6DVq{`%gw)9jAwZ zzOJp`ldNA8@@8E5cI-7-JrUNEw7v=WTG|f+>#VJ>l&s$Rt*nm|;wWif35=gq$8%m* zUxedv!D}c zdT>{^0rK>r3nrhzd{p10Ho6pc`tSO*#npb$)kiv7#{367wau%`TRew9viFm#nuQYB zU&}9pmZkZ;;i;Q6|r$f>%D-Z{w(R#zZP-WeYG7wg5ht@UH8~Dg$2LzD6F=B zHTu2SEB0)HpBB=YfzFbC= zPbO0zIK%>H)>(X5`x&^SU-nX6i&`ORBis3{gP7)W=}6#?FZ_)o8?b(TXT~%rBku7) z_po(0p@)OkHmMA{oY_XLgIf@vMQe9lr2d`uyX4=mLVf$xQCtJC?}US)MOgd3^kcj~ z*?N{wqsDi-!L74~6ZTE*dBx(68>W|#_ zuL6T5menDkzF5|OLtY;zk8kCADmyML&PE+d2*)^qpGDQd#$3cXJvrtJc1t>eq4$## zP{@}|;3DdF-izzbqmFqhrMck0r8EQbEAqHkD#Ka~e9VO@wy=+~I@(uKp&0z1G>!p| z=h`9yPtVEX?aKjP*4e8y^B#214w@}L&o!b><-)R`mNk5u@hxIfwVIG}P0yJ)dy1%L zs?ro;iH-J;mw{v=u`=E4YN?0y#ASQm zlkS}S?Hlqtvi?Qf-V*RqejI%P|G}{}T>Y}Tv_J+rSZPIOd*4bq|0t9OkPLBMR?;KD z=gRmZjdff^3SEknh(w*5)UGo0Ig#t&1pmxk_Jwx7a!KlOe#FNb+T-OhvVR}+6Wz5f zbX6LUmUXle@&i%tY`Xo%m1y)|aVVVZP$;Cl^M(42qY-b~UDZAa-PQ7KP0O~y51i54 z7q7Pux&i&5J(61F%RW^Lh66`CUn?>btVf*U`E_f*y|9-dHg@ZRk!P9JcsB_4^J2d0 zlT+YxA~jr`_rRXbdl$Tq!=WEXj5>ZAI5IBVw_E8H^zFEQ+iklc`gh%28@-|j@Jnjh zA|2qa2{$4_m3iO=|EnFU@)_~;kSl&M7}tk?UqAQ@e>myV;(q1$yyl3TqEkNdTZ<;$ z%K;zR+BIpT7k_mixmzQ(q#CK>T)f|HF-f6x?Uu!RGf7-`dqCKe9o5%j6%?iO^X=?-r4e z{uJ|m`>a#%OwUzQDQ|vUy||hr0XL%GIoHsE(4_`jo6(o@Nk(>6J@TbbCM;6`uO-Ty zH~R5yA?XHYiH1%Qqi>(UHNrze#^+xhKQbBh*%^_#=|`ZS4~U%Pm4kfs>^!5>z^(GS zp_3RpMm;kO<(Zm$3Cg99|B!CR6siiqVHFa3lAOxH`_I9Z!PEvJ_6k;wZW6ij|1lg z*VeB8kzUSaoM*D$oB-hAYUyoLTzEYWn zdXgn~mhi!2tWXO|w?MzEJ0q4$^7o+Lxi)O%J{9nkNnD~@Kxd?e8}+|RU)H2-7SZ)Z zev13Yh-qc?HRGI*h$nuVucZL=1N3}+^(gS%PO~2#yPcr#82@XdLJY3w-RHO@%&*XD z&cJgzu~5yIi09h;1LE>+SIWCapq~f)X|u}WukCY&h7I8^ z1orLE$i7;5&bF!p)2^Mu{atQ$jRu}AmwjKJPd^nh;PxQc$(`SN*3KSRL*5~5QcheA z>3alehyAXhiB%!Pk$Pdf<_sQb|c^@jlmaAk4f0XX1WxwDq z@}Ani2H9X9$m8HH^K#4fWBp!=`u+JX>d3~~4+{7aAVhyZt}%{~t{R>i)dhWGrR^f2 zb9aw>rF#?gMxn>JMvmw=GC-8JWgOODUE$4dSQp;1`X)y=u7ayQpR{FaiK7?nnl+bx z>e8+=I=RfkHVtvEyiRM=@ZSlYt?(RVaiTn4WE8l@&#+^%KG7)BLS%}#GCD7;7x1P0 zI8H68J9NPVj2X7{a0%v(s$=K)6PQQKr3o_FGg;qhySa)TMl1clThMd8<+=ZBNQ0-g d{{%?`nE%s|-fnIuW$t4|{BIhzd!)_j{~tI{JPH5+ literal 0 HcmV?d00001 From a2ccf24c16bb3451e86683c25f1e58da6038feb2 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 16:49:58 +0200 Subject: [PATCH 25/28] avoid tracking results folder --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c369a3f..8b741d2 100644 --- a/.gitignore +++ b/.gitignore @@ -77,8 +77,9 @@ jupyterlite_contents .pixi *.egg-info -# generate predictions +# generate predictions and results content/predictions +content/results # jupyter-book build book/_build From ccc61b3c54f6f89da9557b7c1155c918398f1311 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 16:52:59 +0200 Subject: [PATCH 26/28] iter --- content/python_files/miscalibration_reweighting.py | 2 +- content/python_files/miscalibration_under_over_fit.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/content/python_files/miscalibration_reweighting.py b/content/python_files/miscalibration_reweighting.py index e3dd91b..00c30a0 100644 --- a/content/python_files/miscalibration_reweighting.py +++ b/content/python_files/miscalibration_reweighting.py @@ -1,6 +1,6 @@ # %% [markdown] # -# ## Effect of resampling on model calibration +# # Miscalibration due to data points reweighting # # Another cause for model miscalibration is related to training set resampling. In # general, resampling is encountered when dealing with imbalanced datasets. In this diff --git a/content/python_files/miscalibration_under_over_fit.py b/content/python_files/miscalibration_under_over_fit.py index 62beaa5..09d4e9d 100644 --- a/content/python_files/miscalibration_under_over_fit.py +++ b/content/python_files/miscalibration_under_over_fit.py @@ -327,7 +327,7 @@ def xor_generator(n_samples=1_000, seed=None): # %% [markdown] # -# ### Is it true for other models? +# ## Is it true for other models? # # In this section, we want to show that the previous findings are not specific to the # a linear model that relies on a pre-processing step. Here, we use a gradient-boosting @@ -409,7 +409,7 @@ def xor_generator(n_samples=1_000, seed=None): # %% [markdown] # -# ### Hyperparameter tuning while considering calibration +# ## Hyperparameter tuning while considering calibration # # From the previous sections, we saw that the hyperparameters of a model while impacting # its complexity also impact its calibration. It therefore becomes crucial to tune the From 207a47de70dc580f6e7801f01b89a52b590ad7f6 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 16:59:08 +0200 Subject: [PATCH 27/28] book --- book/_toc.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/book/_toc.yml b/book/_toc.yml index 924c2ca..e8d5d83 100644 --- a/book/_toc.yml +++ b/book/_toc.yml @@ -6,4 +6,5 @@ root: intro chapters: - file: content/notebooks/build_calibration_curve - file: content/notebooks/different_calibration_curves -- file: content/notebooks/causes_miscalibration +- file: content/notebooks/miscalibration_under_over_fit +- file: content/notebooks/miscalibration_reweighting From 116d3efb43913b5d0dc8e0a0b94c271ec476cca1 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 22 Aug 2024 17:04:10 +0200 Subject: [PATCH 28/28] no need to execute twice --- pixi.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixi.toml b/pixi.toml index 982bfff..c741c65 100644 --- a/pixi.toml +++ b/pixi.toml @@ -37,7 +37,7 @@ jupyter-book = "*" [feature.doc.tasks] convert-to-notebooks = { cmd = "jupytext --to notebook ./content/python_files/*.py && mkdir -p ./content/notebooks && mv ./content/python_files/*.ipynb ./content/notebooks" } convert-to-executed-notebooks = { cmd = "jupytext --to notebook --execute ./content/python_files/*.py && mkdir -p ./content/notebooks && mv ./content/python_files/*.ipynb ./content/notebooks" } -build-book = { cmd = "jupyter-book build book", depends-on = ["convert-to-executed-notebooks"] } +build-book = { cmd = "jupyter-book build book", depends-on = ["convert-to-notebooks"] } [environments] lint = ["lint"]