add tests and examples

qbarthelemy · Apr 26, 2021 · d18fe69 · d18fe69
1 parent a617108
commit d18fe69
Show file tree

Hide file tree

Showing 7 changed files with 1,462 additions and 0 deletions.
diff --git a/examples/README.txt b/examples/README.txt
@@ -0,0 +1,6 @@
+Examples Gallery
+================
+
+.. contents:: Contents
+   :local:
+   :depth: 1
diff --git a/examples/compute_auroc_pvalue.py b/examples/compute_auroc_pvalue.py
@@ -0,0 +1,84 @@
+"""
+===============================================================================
+Compute the p-value associated to AUROC
+===============================================================================
+
+Compute the p-value associated to the area under the curve (AUC) of the 
+receiver operating characteristic (ROC), for a binary (two-class)
+classification problem.
+
+Warning: this example requires scikit-learn dependency (0.22 at least).
+"""
+# Authors: Quentin Barthélemy
+
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_curve, roc_auc_score
+from pypermut.ml import permutation_metric, standard_error_auroc
+from pypermut.misc import pvals_to_stars
+from matplotlib import pyplot as plt
+
+
+###############################################################################
+# Artificial data and classifier
+# ------------------------------
+
+X, y = make_classification(n_classes=2, n_samples=100, n_features=100,
+                           n_informative=10, n_redundant=10,
+                           n_clusters_per_class=5, random_state=1)
+Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.5,
+                                                random_state=2)
+# Logistic regression: a very good old-fashioned classifier
+clf = LogisticRegression().fit(Xtrain, ytrain)
+ytest_probas = clf.predict_proba(Xtest)[:, 1]
+
+
+###############################################################################
+# Compute ROC curve and AUC
+# -------------------------
+
+# In this example, we focus on the AUROC metric. However, this analysis can be
+# applied to any metric measuring the model performance. It can be any function
+# from sklearn.metrics, like roc_auc_score, log_loss, etc.
+
+# ROC curve
+fpr, tpr, _ = roc_curve(ytest, ytest_probas)
+
+fig, ax = plt.subplots()
+ax.set(title="ROC curve", xlabel='FPR', ylabel='TPR')
+ax.plot(fpr, tpr, 'C1', label='Logistic regression')
+ax.plot([0, 1], [0, 1], 'b--', label='Chance level')
+plt.xlim([0.0, 1.0])
+plt.ylim([0.0, 1.05])
+plt.legend(loc="lower right")
+plt.show()
+
+# AUC
+auroc = roc_auc_score(ytest, ytest_probas)
+print('AUROC = {:.3f}\n'.format(auroc))
+
+
+###############################################################################
+# Compute standard error and p-value of AUC
+# -----------------------------------------
+
+n_perm = 5000
+np.random.seed(17)
+
+# standard error of AUC
+auroc_se = standard_error_auroc(ytest, ytest_probas, auroc)
+
+# p-value of AUC, by permutations
+auroc_, pval = permutation_metric(ytest, ytest_probas, roc_auc_score,
+                                  side='right', n=n_perm)
+print('AUROC = {:.3f} +/- {:.3f}, p={:.2e} ({})'
+      .format(auroc_, auroc_se, pval, pvals_to_stars(pval)))
+
+
+###############################################################################
+# Reference
+# ---------
+# [1] Hanley & McNeil, "The meaning and use of the area under a receiver
+# operating characteristic (ROC) curve", Radiology, 1982.
diff --git a/examples/compute_exact_ttest_ind.py b/examples/compute_exact_ttest_ind.py
@@ -0,0 +1,142 @@
+"""
+===============================================================================
+Compute exact Student's t-test for independent samples
+===============================================================================
+
+Compute classical, permutated and exact right-sided Student's t-tests, for
+independent / unpaired samples.
+"""
+# Authors: Quentin Barthélemy
+
+import numpy as np
+from scipy.stats import t, ttest_ind, mannwhitneyu, shapiro, levene
+from pypermut.stats import permutation_ttest_ind, permutation_mannwhitneyu
+from pypermut.misc import pvals_to_stars
+from matplotlib import pyplot as plt
+
+
+###############################################################################
+# Univariate data
+# ---------------
+
+# Artificial small samples
+s1 = np.array([7.5, 7.9, 8.3, 8.5, 8.7, 9.2, 9.6, 10.2])
+s2 = np.array([5.9, 6.5, 6.9, 7.4, 7.8, 7.9, 8.1, 8.5, 8.7, 9.1, 9.9])
+
+# Plot samples
+fig, ax = plt.subplots()
+ax.set(title='Boxplot of samples', xlabel='Samples', ylabel='Values')
+box = ax.boxplot(np.array([s1, s2], dtype=object), medianprops={'color':'k'},
+                 showmeans=True, meanline=True, meanprops={'color':'r'})
+ax.plot([1.1, 1.9], [m.get_ydata()[0] for m in box.get('means')], c='r')
+ax.scatter(np.ones(len(s1)), s1)
+ax.scatter(2*np.ones(len(s2)), s2)
+plt.show()
+
+
+###############################################################################
+# Student's t-tests for independent samples
+# -----------------------------------------
+
+# The null hypothesis is that these two samples s1 and s2 are drawn from the
+# same Gaussian distribution.
+
+# Classical test
+t_class, p_class = ttest_ind(s1, s2, alternative='greater')
+print('Classical t-test:\n t={:.2f}, p={:.3e} ({})'
+      .format(t_class, p_class, pvals_to_stars(p_class)))
+
+# Permutated test
+np.random.seed(17)
+n_perm = 10000
+t_perm, p_perm = permutation_ttest_ind(s1, s2, n=n_perm, side='one')
+print('Permutation t-test:\n t={:.2f}, p={:.3e} ({})'
+      .format(t_perm[0], p_perm[0], pvals_to_stars(p_perm[0])))
+
+# Exact test
+t_exact, p_exact, t_dist = permutation_ttest_ind(s1, s2, n='all', side='one',
+                                                 return_dist=True)
+print('Exact t-test:\n t={:.2f}, p={:.3e} ({})\n'
+      .format(t_exact[0], p_exact[0], pvals_to_stars(p_exact[0])))
+
+# Plot the difference between approximate and exact right-sided p-values
+fig = plt.figure(figsize=(14, 5))
+ax1 = fig.add_subplot(121, title='Approximate p-value', xlabel='t')
+ax1.axvline(x=t_exact, c='r', label='Real statistic t')
+df = len(s1) + len(s2) - 2
+x = np.linspace(t.ppf(0.001, df), t.ppf(0.999, df), 100)
+ax1.plot(x, t.pdf(x, df), label='Approximate t-distribution')
+ax1.fill_between(x, t.pdf(x, df), where=(x>=t_exact), facecolor='r',
+                 label='Approximate p-value')
+plt.legend(loc='upper left')
+ax2 = fig.add_subplot(122, title='Exact p-value', xlabel='t', sharey=ax1)
+ax2.axvline(x=t_exact, c='r', label='Real statistic t')
+y, x, _ = ax2.hist(t_dist, 50, density=True, alpha=0.4,
+                   label='Exact t-distribution')
+ax2.fill_between(x[1:], y, where=(x[1:]>=t_exact), facecolor='r', step='pre',
+                 label='Exact p-value')
+ax2.set_xlim(ax1.get_xlim())
+plt.legend(loc='upper left')
+plt.show()
+
+# Classical t-test detects a trend in the difference between these samples,
+# but is not able to reject the null hypothesis (with alpha = 0.05).
+# Permutation and exact tests reject the null hypothesis, the later giving the
+# exact p-value. Amazing!
+
+
+###############################################################################
+# Assumptions
+# -----------
+
+# But, wait! Is Student's t-test valid for such data?
+# We have not checked the assumptions related to this parametric test.
+
+alpha = 0.05
+
+# Check homoscedasticity (homogeneity of variances) assumption, with a Levene's
+# test
+stat, p = levene(s1, s2)
+print('Levene test:\n Equal var={}, p={:.3f} ({})'
+      .format(p < alpha, p, pvals_to_stars(p)))
+
+# Ok, but we could use a Welch's t-test, which does not assume equal variance.
+
+# Check normality / Gaussianity assumption, with Shapiro-Wilk tests
+stat, p = shapiro(s1)
+print('Shapiro-Wilk test, for first sample:\n Normality={}, p={:.3f} ({})'
+      .format(p < alpha, p, pvals_to_stars(p)))
+stat, p = shapiro(s2)
+print('Shapiro-Wilk test, for second sample:\n Normality={}, p={:.3f} ({})\n'
+      .format(p < alpha, p, pvals_to_stars(p)))
+
+# Consequently, neither Student's t-test nor Welch's t-test can be applied on
+# these non-Gaussian data. We must use the non-parametric version.
+
+
+###############################################################################
+# Mann-Whitney U tests for unpaired samples
+# -----------------------------------------
+
+# The null hypothesis is that these two samples s1 and s2 are drawn from the
+# same distribution, without assumption on its shape.
+
+# Classical test
+U_class, p_class = mannwhitneyu(s1, s2, alternative='less')
+print('Classical Mann-Whitney test:\n U={:.1f}, p={:.3e} ({})'
+      .format(U_class, p_class, pvals_to_stars(p_class)))
+
+# Permutated test
+U_perm, p_perm = permutation_mannwhitneyu(s1, s2, n=n_perm)
+print('Permutation Mann-Whitney test:\n U={:.1f}, p={:.3e} ({})'
+      .format(U_perm[0], p_perm[0], pvals_to_stars(p_perm[0])))
+
+# Exact test
+U_exact, p_exact = permutation_mannwhitneyu(s1, s2, n='all')
+print('Exact Mann-Whitney test:\n U={:.1f}, p={:.3e} ({})'
+      .format(U_exact[0], p_exact[0], pvals_to_stars(p_exact[0])))
+
+# Finally, we must conclude that we can't reject the null hypothesis.
+
+
+###############################################################################
diff --git a/examples/correct_multiple_correlations.py b/examples/correct_multiple_correlations.py
@@ -0,0 +1,103 @@
+"""
+===============================================================================
+Correct Pearson and Spearman correlations on Anscombe's data
+===============================================================================
+
+Compare classical and permutated tests on Anscombe's data sets [1],
+for Pearson and Spearman correlations after correction for multiple tests.
+"""
+# Authors: Quentin Barthélemy
+
+import numpy as np
+from scipy.stats import pearsonr, spearmanr
+from pypermut.stats import permutation_corr
+from pypermut.misc import print_results, print_pvals
+from matplotlib import pyplot as plt
+
+
+###############################################################################
+# Multivariate data
+# -----------------
+
+# Anscombe's trio: only the first three data sets of the Anscombe's quartet,
+# because the last set does not share the same values x.
+x = np.array([10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5])
+Y = np.array([
+    [8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84,4.82, 5.68],
+    [9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74],
+    [7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73]
+]).T
+
+# Anscombe's anti-trio: to highlight the difference between classical and
+# permutated tests, the number of variables is artificially increased.
+Y = np.c_[Y, -Y]
+vlabels = ['y{}'.format(v + 1) for v in range(Y.shape[1])]
+
+# Plot Anscombe's trio and anti-trio
+fig, ax = plt.subplots()
+ax.set(title="Anscombe's trio and anti-trio", xlabel='x', ylabel='Y')
+[plt.scatter(x, y, label='y{}'.format(i + 1)) for i, y in enumerate(Y.T)]
+ax.legend()
+plt.show()
+
+
+###############################################################################
+# Classical statistics
+# --------------------
+
+# Pearson test
+r_class = [pearsonr(y, x) for y in Y.T]
+print('Pearson tests:')
+print_results(r_class, vlabels, 'r')
+
+# Spearman test
+rho_class = [spearmanr(y, x) for y in Y.T]
+print('Spearman tests:')
+print_results(rho_class, vlabels, 'rho')
+
+# Correction for multiple tests, using Bonferroni's method: multiply p-values
+# by the number of tests.
+n_tests = Y.shape[1]
+
+p_corrected = np.array(r_class)[:, 1] * n_tests
+print('\nPearson tests, after Bonferroni correction:')
+print_pvals(p_corrected, vlabels)
+
+p_corrected = np.array(rho_class)[:, 1] * n_tests
+print('Spearman tests, after Bonferroni correction:')
+print_pvals(p_corrected, vlabels)
+
+# Bonferroni correction is a crucial step to adjust p-values of each variable
+# for multiple tests, avoiding type-I errors (ie, spurious correlations).
+# However, it makes the hypothesis that tests are independent, that is not the
+# case here.
+
+
+###############################################################################
+# Permutation statistics
+# ----------------------
+
+n_perm = 10000
+np.random.seed(17)
+
+# Permutated Pearson test, corrected by Rmax
+r_perm, p_perm = permutation_corr(Y, x=x, n=n_perm, side='two')
+print('\nPermutated Pearson tests, with {} permutations:'.format(n_perm))
+print_results(np.c_[r_perm, p_perm], vlabels, 'r')
+
+# Permutated Spearman test, corrected by Rmax
+rho_perm, p_perm = permutation_corr(Y, x=x, n=n_perm, corr='spearman',
+                                    side='two')
+print('Permutated Spearman tests, with {} permutations:'.format(n_perm))
+print_results(np.c_[rho_perm, p_perm], vlabels, 'rho')
+
+# The Rmax method of the permutation correlation tests is used for adjusting
+# the p-values in a way that controls the family-wise error rate.
+# It is more powerful than Bonferroni correction when different variables are
+# correlated: it avoids type-I and type-II errors.
+
+
+###############################################################################
+# Reference
+# ---------
+# [1] Anscombe, "Graphs in Statistical Analysis", Am Stat, 1973.