Skip to content

Commit

Permalink
add tests and examples
Browse files Browse the repository at this point in the history
  • Loading branch information
qbarthelemy committed Apr 26, 2021
1 parent a617108 commit d18fe69
Show file tree
Hide file tree
Showing 7 changed files with 1,462 additions and 0 deletions.
6 changes: 6 additions & 0 deletions examples/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Examples Gallery
================

.. contents:: Contents
:local:
:depth: 1
84 changes: 84 additions & 0 deletions examples/compute_auroc_pvalue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
===============================================================================
Compute the p-value associated to AUROC
===============================================================================
Compute the p-value associated to the area under the curve (AUC) of the
receiver operating characteristic (ROC), for a binary (two-class)
classification problem.
Warning: this example requires scikit-learn dependency (0.22 at least).
"""
# Authors: Quentin Barthélemy

import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score
from pypermut.ml import permutation_metric, standard_error_auroc
from pypermut.misc import pvals_to_stars
from matplotlib import pyplot as plt


###############################################################################
# Artificial data and classifier
# ------------------------------

X, y = make_classification(n_classes=2, n_samples=100, n_features=100,
n_informative=10, n_redundant=10,
n_clusters_per_class=5, random_state=1)
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.5,
random_state=2)
# Logistic regression: a very good old-fashioned classifier
clf = LogisticRegression().fit(Xtrain, ytrain)
ytest_probas = clf.predict_proba(Xtest)[:, 1]


###############################################################################
# Compute ROC curve and AUC
# -------------------------

# In this example, we focus on the AUROC metric. However, this analysis can be
# applied to any metric measuring the model performance. It can be any function
# from sklearn.metrics, like roc_auc_score, log_loss, etc.

# ROC curve
fpr, tpr, _ = roc_curve(ytest, ytest_probas)

fig, ax = plt.subplots()
ax.set(title="ROC curve", xlabel='FPR', ylabel='TPR')
ax.plot(fpr, tpr, 'C1', label='Logistic regression')
ax.plot([0, 1], [0, 1], 'b--', label='Chance level')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.legend(loc="lower right")
plt.show()

# AUC
auroc = roc_auc_score(ytest, ytest_probas)
print('AUROC = {:.3f}\n'.format(auroc))


###############################################################################
# Compute standard error and p-value of AUC
# -----------------------------------------

n_perm = 5000
np.random.seed(17)

# standard error of AUC
auroc_se = standard_error_auroc(ytest, ytest_probas, auroc)

# p-value of AUC, by permutations
auroc_, pval = permutation_metric(ytest, ytest_probas, roc_auc_score,
side='right', n=n_perm)
print('AUROC = {:.3f} +/- {:.3f}, p={:.2e} ({})'
.format(auroc_, auroc_se, pval, pvals_to_stars(pval)))


###############################################################################
# Reference
# ---------
# [1] Hanley & McNeil, "The meaning and use of the area under a receiver
# operating characteristic (ROC) curve", Radiology, 1982.
142 changes: 142 additions & 0 deletions examples/compute_exact_ttest_ind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""
===============================================================================
Compute exact Student's t-test for independent samples
===============================================================================
Compute classical, permutated and exact right-sided Student's t-tests, for
independent / unpaired samples.
"""
# Authors: Quentin Barthélemy

import numpy as np
from scipy.stats import t, ttest_ind, mannwhitneyu, shapiro, levene
from pypermut.stats import permutation_ttest_ind, permutation_mannwhitneyu
from pypermut.misc import pvals_to_stars
from matplotlib import pyplot as plt


###############################################################################
# Univariate data
# ---------------

# Artificial small samples
s1 = np.array([7.5, 7.9, 8.3, 8.5, 8.7, 9.2, 9.6, 10.2])
s2 = np.array([5.9, 6.5, 6.9, 7.4, 7.8, 7.9, 8.1, 8.5, 8.7, 9.1, 9.9])

# Plot samples
fig, ax = plt.subplots()
ax.set(title='Boxplot of samples', xlabel='Samples', ylabel='Values')
box = ax.boxplot(np.array([s1, s2], dtype=object), medianprops={'color':'k'},
showmeans=True, meanline=True, meanprops={'color':'r'})
ax.plot([1.1, 1.9], [m.get_ydata()[0] for m in box.get('means')], c='r')
ax.scatter(np.ones(len(s1)), s1)
ax.scatter(2*np.ones(len(s2)), s2)
plt.show()


###############################################################################
# Student's t-tests for independent samples
# -----------------------------------------

# The null hypothesis is that these two samples s1 and s2 are drawn from the
# same Gaussian distribution.

# Classical test
t_class, p_class = ttest_ind(s1, s2, alternative='greater')
print('Classical t-test:\n t={:.2f}, p={:.3e} ({})'
.format(t_class, p_class, pvals_to_stars(p_class)))

# Permutated test
np.random.seed(17)
n_perm = 10000
t_perm, p_perm = permutation_ttest_ind(s1, s2, n=n_perm, side='one')
print('Permutation t-test:\n t={:.2f}, p={:.3e} ({})'
.format(t_perm[0], p_perm[0], pvals_to_stars(p_perm[0])))

# Exact test
t_exact, p_exact, t_dist = permutation_ttest_ind(s1, s2, n='all', side='one',
return_dist=True)
print('Exact t-test:\n t={:.2f}, p={:.3e} ({})\n'
.format(t_exact[0], p_exact[0], pvals_to_stars(p_exact[0])))

# Plot the difference between approximate and exact right-sided p-values
fig = plt.figure(figsize=(14, 5))
ax1 = fig.add_subplot(121, title='Approximate p-value', xlabel='t')
ax1.axvline(x=t_exact, c='r', label='Real statistic t')
df = len(s1) + len(s2) - 2
x = np.linspace(t.ppf(0.001, df), t.ppf(0.999, df), 100)
ax1.plot(x, t.pdf(x, df), label='Approximate t-distribution')
ax1.fill_between(x, t.pdf(x, df), where=(x>=t_exact), facecolor='r',
label='Approximate p-value')
plt.legend(loc='upper left')
ax2 = fig.add_subplot(122, title='Exact p-value', xlabel='t', sharey=ax1)
ax2.axvline(x=t_exact, c='r', label='Real statistic t')
y, x, _ = ax2.hist(t_dist, 50, density=True, alpha=0.4,
label='Exact t-distribution')
ax2.fill_between(x[1:], y, where=(x[1:]>=t_exact), facecolor='r', step='pre',
label='Exact p-value')
ax2.set_xlim(ax1.get_xlim())
plt.legend(loc='upper left')
plt.show()

# Classical t-test detects a trend in the difference between these samples,
# but is not able to reject the null hypothesis (with alpha = 0.05).
# Permutation and exact tests reject the null hypothesis, the later giving the
# exact p-value. Amazing!


###############################################################################
# Assumptions
# -----------

# But, wait! Is Student's t-test valid for such data?
# We have not checked the assumptions related to this parametric test.

alpha = 0.05

# Check homoscedasticity (homogeneity of variances) assumption, with a Levene's
# test
stat, p = levene(s1, s2)
print('Levene test:\n Equal var={}, p={:.3f} ({})'
.format(p < alpha, p, pvals_to_stars(p)))

# Ok, but we could use a Welch's t-test, which does not assume equal variance.

# Check normality / Gaussianity assumption, with Shapiro-Wilk tests
stat, p = shapiro(s1)
print('Shapiro-Wilk test, for first sample:\n Normality={}, p={:.3f} ({})'
.format(p < alpha, p, pvals_to_stars(p)))
stat, p = shapiro(s2)
print('Shapiro-Wilk test, for second sample:\n Normality={}, p={:.3f} ({})\n'
.format(p < alpha, p, pvals_to_stars(p)))

# Consequently, neither Student's t-test nor Welch's t-test can be applied on
# these non-Gaussian data. We must use the non-parametric version.


###############################################################################
# Mann-Whitney U tests for unpaired samples
# -----------------------------------------

# The null hypothesis is that these two samples s1 and s2 are drawn from the
# same distribution, without assumption on its shape.

# Classical test
U_class, p_class = mannwhitneyu(s1, s2, alternative='less')
print('Classical Mann-Whitney test:\n U={:.1f}, p={:.3e} ({})'
.format(U_class, p_class, pvals_to_stars(p_class)))

# Permutated test
U_perm, p_perm = permutation_mannwhitneyu(s1, s2, n=n_perm)
print('Permutation Mann-Whitney test:\n U={:.1f}, p={:.3e} ({})'
.format(U_perm[0], p_perm[0], pvals_to_stars(p_perm[0])))

# Exact test
U_exact, p_exact = permutation_mannwhitneyu(s1, s2, n='all')
print('Exact Mann-Whitney test:\n U={:.1f}, p={:.3e} ({})'
.format(U_exact[0], p_exact[0], pvals_to_stars(p_exact[0])))

# Finally, we must conclude that we can't reject the null hypothesis.


###############################################################################
103 changes: 103 additions & 0 deletions examples/correct_multiple_correlations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""
===============================================================================
Correct Pearson and Spearman correlations on Anscombe's data
===============================================================================
Compare classical and permutated tests on Anscombe's data sets [1],
for Pearson and Spearman correlations after correction for multiple tests.
"""
# Authors: Quentin Barthélemy

import numpy as np
from scipy.stats import pearsonr, spearmanr
from pypermut.stats import permutation_corr
from pypermut.misc import print_results, print_pvals
from matplotlib import pyplot as plt


###############################################################################
# Multivariate data
# -----------------

# Anscombe's trio: only the first three data sets of the Anscombe's quartet,
# because the last set does not share the same values x.
x = np.array([10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5])
Y = np.array([
[8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84,4.82, 5.68],
[9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74],
[7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73]
]).T

# Anscombe's anti-trio: to highlight the difference between classical and
# permutated tests, the number of variables is artificially increased.
Y = np.c_[Y, -Y]
vlabels = ['y{}'.format(v + 1) for v in range(Y.shape[1])]

# Plot Anscombe's trio and anti-trio
fig, ax = plt.subplots()
ax.set(title="Anscombe's trio and anti-trio", xlabel='x', ylabel='Y')
[plt.scatter(x, y, label='y{}'.format(i + 1)) for i, y in enumerate(Y.T)]
ax.legend()
plt.show()


###############################################################################
# Classical statistics
# --------------------

# Pearson test
r_class = [pearsonr(y, x) for y in Y.T]
print('Pearson tests:')
print_results(r_class, vlabels, 'r')

# Spearman test
rho_class = [spearmanr(y, x) for y in Y.T]
print('Spearman tests:')
print_results(rho_class, vlabels, 'rho')

# Correction for multiple tests, using Bonferroni's method: multiply p-values
# by the number of tests.
n_tests = Y.shape[1]

p_corrected = np.array(r_class)[:, 1] * n_tests
print('\nPearson tests, after Bonferroni correction:')
print_pvals(p_corrected, vlabels)

p_corrected = np.array(rho_class)[:, 1] * n_tests
print('Spearman tests, after Bonferroni correction:')
print_pvals(p_corrected, vlabels)

# Bonferroni correction is a crucial step to adjust p-values of each variable
# for multiple tests, avoiding type-I errors (ie, spurious correlations).
# However, it makes the hypothesis that tests are independent, that is not the
# case here.


###############################################################################
# Permutation statistics
# ----------------------

n_perm = 10000
np.random.seed(17)

# Permutated Pearson test, corrected by Rmax
r_perm, p_perm = permutation_corr(Y, x=x, n=n_perm, side='two')
print('\nPermutated Pearson tests, with {} permutations:'.format(n_perm))
print_results(np.c_[r_perm, p_perm], vlabels, 'r')

# Permutated Spearman test, corrected by Rmax
rho_perm, p_perm = permutation_corr(Y, x=x, n=n_perm, corr='spearman',
side='two')
print('Permutated Spearman tests, with {} permutations:'.format(n_perm))
print_results(np.c_[rho_perm, p_perm], vlabels, 'rho')

# The Rmax method of the permutation correlation tests is used for adjusting
# the p-values in a way that controls the family-wise error rate.
# It is more powerful than Bonferroni correction when different variables are
# correlated: it avoids type-I and type-II errors.


###############################################################################
# Reference
# ---------
# [1] Anscombe, "Graphs in Statistical Analysis", Am Stat, 1973.
Loading

0 comments on commit d18fe69

Please sign in to comment.