Skip to content

Commit

Permalink
put functions in the example
Browse files Browse the repository at this point in the history
  • Loading branch information
Vincent-Maladiere committed Dec 24, 2023
1 parent 4926efd commit b03fe1a
Showing 1 changed file with 142 additions and 144 deletions.
286 changes: 142 additions & 144 deletions examples/debaised_bs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
#

# %%
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.base import clone

from hazardous.data._competing_weibull import (
make_complex_features_with_sparse_matrix,
Expand Down Expand Up @@ -38,26 +36,36 @@
)

# %%
import seaborn as sns
from matplotlib import pyplot as plt

from hazardous.data._competing_weibull import _censor


def plot_events(y, kind):
censoring_rate = int((y["event"] == 0).mean() * 100)
ax = sns.histplot(
y,
x="duration",
hue="event",
palette="magma",
multiple="stack",
)
title = (
f"Duration distributions when censoring is {kind} of X, {censoring_rate=!r}%"
)
ax.set(title=title)


y_censored_indep, shape_censoring_indep, scale_censoring_indep = _censor(
y_uncensored,
independent_censoring=True,
X=X,
features_censoring_rate=0.5,
censoring_relative_scale=0.5,
censoring_relative_scale=1,
random_state=seed,
)

ax = sns.histplot(
y_censored_indep,
x="duration",
hue="event",
palette="magma",
multiple="stack",
)
ax.set(title="Duration distributions when censoring is independent of X")
plot_events(y_censored_indep, kind="independent")


# %%
Expand All @@ -72,188 +80,178 @@
censoring_relative_scale=0.5,
random_state=seed,
)

ax = sns.histplot(
y_censored_dep,
x="duration",
hue="event",
palette="magma",
multiple="stack",
)
ax.set(title="Duration distributions when censoring is dependent of X")
plot_events(y_censored_dep, kind="dependent")


# %%
from hazardous import GradientBoostingIncidence


gbi_indep = GradientBoostingIncidence(
learning_rate=0.1,
n_iter=20,
max_leaf_nodes=15,
hard_zero_fraction=0.1,
min_samples_leaf=5,
loss="ibs",
show_progressbar=False,
random_state=seed,
event_of_interest=event_of_interest,
)
gbi_indep.fit(X, y_censored_indep)

gbi_dep = clone(gbi_indep)
gbi_dep.fit(X, y_censored_dep)

# %%
from matplotlib import pyplot as plt

from hazardous.metrics._brier_score import (
brier_score_incidence,
brier_score_true_probas_incidence,
)


time_grid = gbi_indep.time_grid_
y_pred_indep = gbi_indep.predict_cumulative_incidence(X, times=time_grid)
def plot_brier_scores_comparisons(X, y, shape, scale, kind, event_of_interest=1):
gbi = GradientBoostingIncidence(
learning_rate=0.1,
n_iter=20,
max_leaf_nodes=15,
hard_zero_fraction=0.1,
min_samples_leaf=5,
loss="ibs",
show_progressbar=False,
random_state=seed,
event_of_interest=event_of_interest,
)
gbi.fit(X, y)
y_pred = gbi.predict_cumulative_incidence(X)

time_grid = gbi.time_grid_
debiased_bs_scores = brier_score_true_probas_incidence(
y_train=y,
y_test=y,
y_pred=y_pred,
times=time_grid,
shape_censoring=shape,
scale_censoring=scale,
event_of_interest=event_of_interest,
)

scores_indep = brier_score_true_probas_incidence(
y_train=y_censored_indep,
y_test=y_censored_indep,
y_pred=y_pred_indep,
times=time_grid,
shape_censoring=shape_censoring_indep,
scale_censoring=scale_censoring_indep,
event_of_interest=event_of_interest,
)
bs_scores = brier_score_incidence(
y_train=y,
y_test=y,
y_pred=y_pred,
times=time_grid,
event_of_interest=event_of_interest,
)

y_pred_dep = gbi_dep.predict_cumulative_incidence(X, times=time_grid)
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(time_grid, debiased_bs_scores, label="from true distrib")
ax.plot(time_grid, bs_scores, label="from estimate distrib with km")
ax.set(
title=f"Time-varying Brier score, {kind} censoring",
)
ax.legend()

scores_dep = brier_score_true_probas_incidence(
y_train=y_censored_dep,
y_test=y_censored_dep,
y_pred=y_pred_dep,
times=time_grid,
shape_censoring=shape_censoring_dep,
scale_censoring=scale_censoring_dep,
event_of_interest=event_of_interest,
)

bs_scores = brier_score_incidence(
y_train=y_censored_indep,
y_test=y_censored_indep,
y_pred=y_pred_indep,
times=time_grid,
plot_brier_scores_comparisons(
X,
y_censored_indep,
shape=shape_censoring_indep,
scale=scale_censoring_indep,
kind="independent",
event_of_interest=event_of_interest,
)

fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(time_grid, scores_indep, label="from true distrib")
ax.plot(time_grid, bs_scores, label="from estimate distrib with km")
# %%

ax.set(
title="Time-varying Brier score, Independent censoring",
plot_brier_scores_comparisons(
X,
y_censored_dep,
shape=shape_censoring_dep,
scale=scale_censoring_dep,
kind="dependent",
event_of_interest=event_of_interest,
)
ax.legend()

fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(time_grid, scores_dep, label="from true distrib")
ax.plot(time_grid, bs_scores, label="from estimate distrib with km")

ax.set(
title="Time-varying Brier score, Dependent censoring",
)
ax.legend()

# %%

from lifelines import AalenJohansenFitter


aj = AalenJohansenFitter(calculate_variance=False, seed=seed)
n_events = y_uncensored["event"].max()
t_max = y_uncensored["duration"].max()
def plot_marginal_incidence(y_censored, y_uncensored, kind):
aj = AalenJohansenFitter(calculate_variance=False, seed=seed)
n_events = y_uncensored["event"].max()

coarse_timegrid = np.linspace(0, t_max, num=100)
censoring_fraction = (y_censored_indep["event"] == 0).mean()
censoring_fraction = (y_censored["event"] == 0).mean()

# Compute the estimate of the CIFs on a coarse grid.
_, axes = plt.subplots(figsize=(12, 5), ncols=n_events, sharey=True)

# Compute the estimate of the CIFs on a coarse grid.
_, axes = plt.subplots(figsize=(12, 4), ncols=n_events, sharey=True)
plt.suptitle(
f"Cause-specific cumulative incidence functions, {kind}"
f" ({censoring_fraction:.1%} censoring)"
)

plt.suptitle(
"Cause-specific cumulative incidence functions, Independent"
f" ({censoring_fraction:.1%} censoring)"
)
for event_id, ax in enumerate(axes, 1):
aj.fit(
y_uncensored["duration"],
y_uncensored["event"],
event_of_interest=event_id,
)
aj.plot(label="Aalen-Johansen_uncensored", ax=ax)

for event_id, ax in enumerate(axes, 1):
aj.fit(y_uncensored["duration"], y_uncensored["event"], event_of_interest=event_id)
aj.plot(label="Aalen-Johansen_uncensored", ax=ax)
aj.fit(
y_censored["duration"],
y_censored["event"],
event_of_interest=event_id,
)
aj.plot(label="Aalen-Johansen_censored", ax=ax)

aj.fit(
y_censored_indep["duration"],
y_censored_indep["event"],
event_of_interest=event_id,
)
aj.plot(label="Aalen-Johansen_censored", ax=ax)
ax.set_xlim(0, 8_000)
ax.set_ylim(0, 0.5)
ax.set_title(f"{event_id=!r}")
plt.legend()

ax.set_xlim(0, 8_000)
ax.set_ylim(0, 0.5)
# %%

aj = AalenJohansenFitter(calculate_variance=False, seed=seed)
n_events = y_uncensored["event"].max()
t_max = y_uncensored["duration"].max()
plot_marginal_incidence(y_censored_indep, y_uncensored, kind="independent")
# %%

coarse_timegrid = np.linspace(0, t_max, num=100)
censoring_fraction = (y_censored_dep["event"] == 0).mean()
plot_marginal_incidence(y_censored_dep, y_uncensored, kind="dependent")


# Compute the estimate of the CIFs on a coarse grid.
_, axes = plt.subplots(figsize=(12, 4), ncols=n_events, sharey=True)
# %%

plt.suptitle(
"Cause-specific cumulative incidence functions, Dependent"
f" ({censoring_fraction:.1%} censoring)"
)
from hazardous._ipcw import IPCWEstimator, IPCWSampler

for event_id, ax in enumerate(axes, 1):
aj.fit(y_uncensored["duration"], y_uncensored["event"], event_of_interest=event_id)
aj.plot(label="Aalen-Johansen_uncensored", ax=ax)

aj.fit(
y_censored_dep["duration"], y_censored_dep["event"], event_of_interest=event_id
)
aj.plot(label="Aalen-Johansen_censored", ax=ax)
def plot_ipcw_comparison(
y_uncensored, y_censored, shape_censoring, scale_censoring, kind
):
t_max = y_uncensored["duration"].max()
time_grid = np.linspace(0, t_max, 100)

ax.set_xlim(0, 8_000)
ax.set_ylim(0, 0.5)
estimator = IPCWEstimator().fit(y_censored)
ipcw_pred = estimator.compute_ipcw_at(time_grid)

# %%
sampler = IPCWSampler(
shape=shape_censoring,
scale=scale_censoring,
).fit(y_censored)

from hazardous._ipcw import IPCWEstimator, IPCWSampler
ipcw_samples = []
for time_step in time_grid:
time_step = np.full(y_censored.shape[0], fill_value=time_step)
ipcw_sample = sampler.compute_ipcw_at(time_step)
ipcw_samples.append(ipcw_sample)
ipcw_samples = np.mean(ipcw_samples, axis=1)

ipcw_est = IPCWEstimator().fit(y_censored_indep)
ipcw_y_est = ipcw_est.compute_ipcw_at(time_grid)
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(time_grid, ipcw_samples, label="$G^*$")
ax.plot(time_grid, ipcw_pred, label="$\hat{G}$, using KM")
ax.set_title(f"{kind} censoring")
plt.legend()

ipcw_sample_indep = IPCWSampler(
shape=shape_censoring_indep,
scale=scale_censoring_indep,
).fit(y_censored_indep)
ipcw_y_sample_indep = ipcw_sample_indep.compute_ipcw_at(time_grid)

fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(time_grid, ipcw_y_est, label="est")
ax.plot(time_grid, ipcw_y_sample_indep, label="sample indep")
# ax.plot(time_grid, ipcw_y_sample_dep, label="sample dep")
plt.legend()
plot_ipcw_comparison(
y_uncensored,
y_censored_indep,
shape_censoring=shape_censoring_indep,
scale_censoring=scale_censoring_indep,
kind="independent",
)


# %%


# ipcw_sample_dep = IPCWSampler(
# shape=shape_censoring_dep,
# scale=scale_censoring_dep,
# ).fit(y_censored_dep)
# ipcw_y_sample_dep = ipcw_sample_dep.compute_ipcw_at(time_grid_rescale)
plot_ipcw_comparison(
y_uncensored,
y_censored_dep,
shape_censoring=shape_censoring_dep,
scale_censoring=scale_censoring_dep,
kind="dependent",
)

# %%

0 comments on commit b03fe1a

Please sign in to comment.