From aa9a60fd95755136e988566dbff90ffc740c3e93 Mon Sep 17 00:00:00 2001 From: George Ho <19851673+eigenfoo@users.noreply.github.com> Date: Wed, 26 Jun 2019 00:47:21 +0000 Subject: [PATCH] BLD: begin porting ANOVA chapter (#11) * MAINT: update toc function * BLD: building * BUG: add d as a good name to pylint * MAINT: remove patsy as req --- .pylintrc | 2 +- Makefile | 2 +- index.html | 346 ++++++++++++++++++++++++++++++++++-------- plots.py | 51 +++++++ requirements-dev.txt | 5 + requirements.txt | 5 - tests-as-linear.ipynb | 297 ++++++++++++++++++++++++++++++------ utils.py | 7 +- 8 files changed, 602 insertions(+), 113 deletions(-) create mode 100644 requirements-dev.txt diff --git a/.pylintrc b/.pylintrc index 399caca..7c70d31 100644 --- a/.pylintrc +++ b/.pylintrc @@ -94,7 +94,7 @@ evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / stateme bad-functions=map,filter,input # Good variable names which should always be accepted, separated by a comma -good-names=a,b,c,f,i,j,k,df,x,y,y2,_,fig,ax +good-names=a,b,c,d,f,i,j,k,df,x,y,y2,_,fig,ax # Bad variable names which should always be refused, separated by a comma bad-names=foo,bar,baz,toto,tutu,tata diff --git a/Makefile b/Makefile index 3f18912..7f342eb 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ venv: # Set up Python virtual environment. python -m venv ${VENV_PATH}; \ source ${VENV_PATH}/bin/activate; \ pip install -U pip; \ - pip install -r requirements.txt; \ + pip install -r requirements-dev.txt; \ deactivate; \ ) @printf "\n\nVirtual environment created! \033[1;34mRun \`source ${VENV_PATH}/bin/activate\` to activate it.\033[0m\n\n\n" diff --git a/index.html b/index.html index baae204..6e4e239 100644 --- a/index.html +++ b/index.html @@ -13187,7 +13187,7 @@

Com
-

Last updated: June 25, 2019

+

Last updated: June 26, 2019

@@ -13203,74 +13203,33 @@

Contents

Common statistical tests are linear models (or: how to teach stats))
  • 1 The simplicity underlying common tests
  • 2 Settings and toy data
  • -
  • 3 Pearson and Spearman correlation -
  • +
  • 3 Pearson and Spearman correlation
  • 4 One mean
  • 5 Two means
  • 6 Three or more means
  • 7 Proportions: Chi-square is a log-linear model
  • 8 Sources and further equivalences
  • 9 Teaching materials and a course outline
  • -
  • 10 Limitations
  • +
  • 10 Limitations +
  • @@ -14755,7 +14714,7 @@

    6 Three or more means6.1 One-way ANOVA and Kruskal-Wallis

    6.1.1 Theory: As linear models

    Model: One mean for each group predicts $y$.

    $y = \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \beta_3 x_3 +... \qquad \mathcal{H}_0: y = \beta_0$

    where $x_i$ are indicators ($x=0$ or $x=1$) where at most one $x_i=1$ while all others are $x_i=0$.

    -

    Notice how this is just "more of the same" of what we already did in other models above. When there are only two groups, this model is $y = \beta_0 + \beta_1*x$, i.e. the independent t-test. If there is only one group, it is $y = \beta_0$, i.e. the one-sample t-test. This is easy to see in the visualization below - just cover up a few groups and see that it matches the other visualizations above.

    +

    Notice how this is just "more of the same" of what we already did in other models above. When there are only two groups, this model is $y = \beta_0 + \beta_1*x$, i.e. the independent t-test. If there is only one group, it is $y = \beta_0$, i.e. the one-sample t-test. This is easy to see in the visualization below - just cover up a few groups and see that it matches the other visualizations above.

    @@ -14769,19 +14728,41 @@

    6.1 One-way ANOVA and Kruskal-Wall
    -
     
    +
    plots.one_way_anova_plot()
    +plt.show()
     
    +
    +
    + +
    +
    + + +
    + + + + + +
    + +
    + +
    +
    -

    A one-way ANOVA has a log-linear counterpart called goodness-of-fit test which we'll return to. By the way, since we now regress on more than one $x$, the one-way ANOVA is a multiple regression model.

    +

    A one-way ANOVA has a log-linear counterpart called goodness-of-fit test which we'll return to. By the way, since we now regress on more than one $x$, the one-way ANOVA is a multiple regression model.

    The Kruskal-Wallis test is simply a one-way ANOVA on the rank-transformed $y$ (value):

    -

    $rank(y) = \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \beta_3 x_3 +...$

    +

    $\text{rank}(y) = \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \beta_3 x_3 +...$

    This approximation is good enough for 12 or more data points. Again, if you do this for just one or two groups, we're already acquainted with those equations, i.e. the Wilcoxon signed-rank test or the Mann-Whitney U test respectively.

    @@ -14803,10 +14784,116 @@

    6.1.2 Example data
    -
     
    +
    num_points = 20
    +df = pd.DataFrame()
    +df["y"] = np.concatenate([
    +    np.random.normal(0.0, 1, num_points),
    +    np.random.normal(1.0, 1, num_points),
    +    np.random.normal(0.5, 1, num_points),
    +])
    +
    +df["group"] = list("".join([num_points * char for char in "abc"]))
    +df = df.join(pd.get_dummies(df.group, prefix="group", drop_first=True).astype(np.float64))
     
    +
    +

    + +
    + + + +
    +
    + +
    +
    +
    df.head()
    +
    + +
    +
    +
    + +
    +
    + + +
    + + + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ygroupgroup_bgroup_c
    01.080830a0.00.0
    1-2.316629a0.00.0
    2-0.138365a0.00.0
    32.003775a0.00.0
    42.051200a0.00.0
    +
    +
    + +
    +
    @@ -14834,10 +14921,97 @@

    6.1.3 Python code: one-way ANOVA
    -
     
    +
    F, p = scipy.stats.f_oneway(df[df["group"] == "a"].y,
    +                            df[df["group"] == "b"].y,
    +                            df[df["group"] == "c"].y)
    +
    +res = smf.ols("y ~ 1 + group_b + group_c", df).fit()
     
    +
    +

    + + + + + +
    +
    + +
    +
    +
    # FIXME what to tabulate here?
    +utils.tabulate_results([None, p, None, None, None],
    +                       res,
    +                       ["scipy.stats.f_oneway", "smf.ols (y ~ 1 + group_b + group_c)"],
    +                       coeff="group_b")
    +
    + +
    +
    +
    + +
    +
    + + +
    + + + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    valuep-valuest-values0.025 CI0.975 CI
    scipy.stats.f_onewayNaN0.015156NaNNaNNaN
    smf.ols (y ~ 1 + group_b + group_c)0.8018630.0121262.5911780.1821821.421545
    +
    +
    + +
    +
    @@ -14866,7 +15040,11 @@

    6.1.4 Python code: Kruskal-Wallis + + + +
    +
    + +
    +
    +
    !cat requirements.txt
    +
    + +
    +
    +
    + +
    +
    + + +
    + + + +
    +
    jupyter==1.0.0
    +matplotlib==3.1.0
    +numpy==1.16.4
    +pandas==0.24.2
    +patsy==0.5.1
    +scipy==1.3.0
    +statsmodels==0.10.0
    +
    +
    +
    + +
    +
    + +
    diff --git a/plots.py b/plots.py index 6b45fba..a9f0aba 100644 --- a/plots.py +++ b/plots.py @@ -182,3 +182,54 @@ def dummy_coding_plot(): ax.legend(fontsize="large") return fig, ax + + +def one_way_anova_plot(): + a = np.random.normal(0, 1, 20) + b = np.random.normal(-2, 1, 20) + c = np.random.normal(3, 1, 20) + d = np.random.normal(1.5, 1, 20) + + df = pd.DataFrame() + df["y"] = np.concatenate([a, b, c, d]) + df["group_2"] = np.concatenate( + [np.zeros_like(b)] + [np.ones_like(b)] + 2 * [np.zeros_like(b)] + ) + df["group_3"] = np.concatenate( + 2 * [np.zeros_like(c)] + [np.ones_like(c)] + [np.zeros_like(c)] + ) + df["group_4"] = np.concatenate(3 * [np.zeros_like(d)] + [np.ones_like(d)]) + + res = smf.ols("y ~ 1 + group_2 + group_3 + group_4", df).fit() + beta0, beta1, beta2, beta3 = res.params + + fig, ax = plt.subplots(figsize=[10, 8]) + ax.scatter(0 * np.ones_like(a), a, color="k") + ax.scatter(1 * np.ones_like(b), b, color="k") + ax.scatter(2 * np.ones_like(c), c, color="k") + ax.scatter(3 * np.ones_like(d), d, color="k") + + ax.axhline(beta0, color="b", label=r"$\beta_0$ (group 1 mean)") + + ax.plot([0.7, 1.3], 2 * [beta0 + beta1], color="navy") + ax.plot( + [0, 1], + [beta0, beta0 + beta1], + color="r", + label=r"$\beta_1, \beta_2, ...$ (slopes/differences to $\beta_0$)", + ) + + ax.plot( + [1.7, 2.3], + 2 * [beta0 + beta2], + color="navy", + label=r"$\beta_0+\beta_1, \beta_0+\beta_2 ...$ (group 2, 3 ... means)", + ) + ax.plot([1, 2], [beta0, beta0 + beta2], color="r") + + ax.plot([2.7, 3.3], 2 * [beta0 + beta3], color="navy") + ax.plot([2, 3], [beta0, beta0 + beta3], color="r") + + ax.legend(fontsize="large") + + return fig, ax diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..ccac16d --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,5 @@ +-r requirements.txt +black==19.3b0 +nbdime==1.0.6 +nbinteract==0.2.4 +pylint==2.3.1 diff --git a/requirements.txt b/requirements.txt index 487e94f..48e8d1b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,6 @@ -black==19.3b0 jupyter==1.0.0 matplotlib==3.1.0 -nbdime==1.0.6 -nbinteract==0.2.4 numpy==1.16.4 pandas==0.24.2 -patsy==0.5.1 -pylint==2.3.1 scipy==1.3.0 statsmodels==0.10.0 diff --git a/tests-as-linear.ipynb b/tests-as-linear.ipynb index 6a50d7d..2a184c6 100644 --- a/tests-as-linear.ipynb +++ b/tests-as-linear.ipynb @@ -17,7 +17,7 @@ { "data": { "text/markdown": [ - "Last updated: June 25, 2019" + "Last updated: June 26, 2019" ], "text/plain": [ "" @@ -34,49 +34,22 @@ "- [1 The simplicity underlying common tests](#1-The-simplicity-underlying-common-tests)\n", "- [2 Settings and toy data](#2-Settings-and-toy-data)\n", "- [3 Pearson and Spearman correlation](#3-Pearson-and-Spearman-correlation)\n", - " - [3.0.1 Theory: As linear models](#3.0.1-Theory:-As-linear-models)\n", - " - [3.0.2 Theory: rank-transformation](#3.0.2-Theory:-rank-transformation)\n", - " - [3.0.3 Python code: Pearson correlation](#3.0.3-Python-code:-Pearson-correlation)\n", - " - [3.0.4 Python code: Spearman correlation](#3.0.4-Python-code:-Spearman-correlation)\n", "- [4 One mean](#4-One-mean)\n", " - [4.1 One sample t-test and Wilcoxon signed-rank](#4.1-One-sample-t-test-and-Wilcoxon-signed-rank)\n", - " - [4.1.1 Theory: As linear models](#4.1.1-Theory:-As-linear-models)\n", - " - [4.1.2 Python code: One-sample t-test](#4.1.2-Python-code:-One-sample-t-test)\n", - " - [4.1.3 Python code: Wilcoxon signed-rank test](#4.1.3-Python-code:-Wilcoxon-signed-rank-test)\n", " - [4.2 Paired samples t-test and Wilcoxon matched pairs](#4.2-Paired-samples-t-test-and-Wilcoxon-matched-pairs)\n", - " - [4.2.1 Theory: As linear models](#4.2.1-Theory:-As-linear-models)\n", - " - [4.2.2 Python code: Paired sample t-test](#4.2.2-Python-code:-Paired-sample-t-test)\n", - " - [4.2.3 Python code: Wilcoxon matched pairs](#4.2.3-Python-code:-Wilcoxon-matched-pairs)\n", "- [5 Two means](#5-Two-means)\n", " - [5.1 Independent t-test and Mann-Whitney U](#5.1-Independent-t-test-and-Mann-Whitney-U)\n", - " - [5.1.1 Theory: As linear models](#5.1.1-Theory:-As-linear-models)\n", - " - [5.1.2 Theory: Dummy coding](#5.1.2-Theory:-Dummy-coding)\n", - " - [5.1.3 Theory: Dummy coding (continued)](#5.1.3-Theory:-Dummy-coding-(continued))\n", - " - [5.1.4 Python code: independent t-test](#5.1.4-Python-code:-independent-t-test)\n", - " - [5.1.5 Python code: Mann-Whitney U](#5.1.5-Python-code:-Mann-Whitney-U)\n", " - [5.2 Welch’s t-test](#5.2-Welch’s-t-test)\n", "- [6 Three or more means](#6-Three-or-more-means)\n", " - [6.1 One-way ANOVA and Kruskal-Wallis](#6.1-One-way-ANOVA-and-Kruskal-Wallis)\n", - " - [6.1.1 Theory: As linear models](#6.1.1-Theory:-As-linear-models)\n", - " - [6.1.2 Example data](#6.1.2-Example-data)\n", - " - [6.1.3 Python code: one-way ANOVA](#6.1.3-Python-code:-one-way-ANOVA)\n", - " - [6.1.4 Python code: Kruskal-Wallis](#6.1.4-Python-code:-Kruskal-Wallis)\n", " - [6.2 Two-way ANOVA](#6.2-Two-way-ANOVA)\n", - " - [6.2.1 Theory: As linear models](#6.2.1-Theory:-As-linear-models)\n", - " - [6.2.2 Python code: Two-way ANOVA](#6.2.2-Python-code:-Two-way-ANOVA)\n", - " - [6.3 ANCOVA](#6.3-ANCOVA)\n", "- [7 Proportions: Chi-square is a log-linear model](#7-Proportions:-Chi-square-is-a-log-linear-model)\n", " - [7.1 Goodness of fit](#7.1-Goodness-of-fit)\n", - " - [7.1.1 Theory: As log-linear model](#7.1.1-Theory:-As-log-linear-model)\n", - " - [7.1.2 Example data](#7.1.2-Example-data)\n", - " - [7.1.3 Python code: Goodness of fit](#7.1.3-Python-code:-Goodness-of-fit)\n", " - [7.2 Contingency tables](#7.2-Contingency-tables)\n", - " - [7.2.1 Theory: As log-linear model](#7.2.1-Theory:-As-log-linear-model)\n", - " - [7.2.2 Example data](#7.2.2-Example-data)\n", - " - [7.2.3 Python code: Chi-square test](#7.2.3-Python-code:-Chi-square-test)\n", "- [8 Sources and further equivalences](#8-Sources-and-further-equivalences)\n", "- [9 Teaching materials and a course outline](#9-Teaching-materials-and-a-course-outline)\n", - "- [10 Limitations](#10-Limitations)" + "- [10 Limitations](#10-Limitations)\n", + " - [11 Computing Environment](#11-Computing-Environment)" ], "text/plain": [ "" @@ -626,7 +599,9 @@ { "cell_type": "code", "execution_count": 13, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -1412,25 +1387,41 @@ "\n", "where $x_i$ are indicators ($x=0$ or $x=1$) where at most one $x_i=1$ while all others are $x_i=0$. \n", "\n", - "Notice how this is just \"more of the same\" of what we already did in other models above. When there are only two groups, this model is $y = \\beta_0 + \\beta_1*x$, i.e. the [independent t-test](#5.1.4-Python-code:-independent-t-test). If there is only one group, it is $y = \\beta_0$, i.e. the [one-sample t-test](#4.1-One-sample-t-test-and-Wilcoxon-signed-rank). This is easy to see in the visualization below - just cover up a few groups and see that it matches the other visualizations above." + "Notice how this is just \"more of the same\" of what we already did in other models above. When there are only two groups, this model is $y = \\beta_0 + \\beta_1*x$, i.e. the [independent t-test](#5.1-Independent-t-test-and-Mann-Whitney-U). If there is only one group, it is $y = \\beta_0$, i.e. the [one-sample t-test](#4.1-One-sample-t-test-and-Wilcoxon-signed-rank). This is easy to see in the visualization below - just cover up a few groups and see that it matches the other visualizations above." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
    " + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plots.one_way_anova_plot()\n", + "plt.show()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "A one-way ANOVA has a log-linear counterpart called [goodness-of-fit](#goodness) test which we'll return to. By the way, since we now regress on more than one $x$, the one-way ANOVA is a **multiple regression** model.\n", + "A one-way ANOVA has a log-linear counterpart called [goodness-of-fit](#7.1-Goodness-of-fit) test which we'll return to. By the way, since we now regress on more than one $x$, the one-way ANOVA is a **multiple regression** model.\n", "\n", "The **Kruskal-Wallis** test is simply a **one-way ANOVA** on the rank-transformed $y$ (`value`):\n", "\n", - "$rank(y) = \\beta_0 + \\beta_1 x_1 + \\beta_2 x_2 + \\beta_3 x_3 +...$\n", + "$\\text{rank}(y) = \\beta_0 + \\beta_1 x_1 + \\beta_2 x_2 + \\beta_3 x_3 +...$\n", "\n", "This approximation is [good enough for 12 or more data points](https://lindeloev.github.io/tests-as-linear/simulations/simulate_kruskall.html). Again, if you do this for just one or two groups, we're already acquainted with those equations, i.e. the [Wilcoxon signed-rank test](#4.1-One-sample-t-test-and-Wilcoxon-signed-rank) or the [Mann-Whitney U test](#5.1-Independent-t-test-and-Mann-Whitney-U) respectively. " ] @@ -1446,10 +1437,111 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "num_points = 20\n", + "df = pd.DataFrame()\n", + "df[\"y\"] = np.concatenate([\n", + " np.random.normal(0.0, 1, num_points),\n", + " np.random.normal(1.0, 1, num_points),\n", + " np.random.normal(0.5, 1, num_points),\n", + "])\n", + "\n", + "df[\"group\"] = list(\"\".join([num_points * char for char in \"abc\"]))\n", + "df = df.join(pd.get_dummies(df.group, prefix=\"group\", drop_first=True).astype(np.float64))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    ygroupgroup_bgroup_c
    01.080830a0.00.0
    1-2.316629a0.00.0
    2-0.138365a0.00.0
    32.003775a0.00.0
    42.051200a0.00.0
    \n", + "
    " + ], + "text/plain": [ + " y group group_b group_c\n", + "0 1.080830 a 0.0 0.0\n", + "1 -2.316629 a 0.0 0.0\n", + "2 -0.138365 a 0.0 0.0\n", + "3 2.003775 a 0.0 0.0\n", + "4 2.051200 a 0.0 0.0" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] }, { "cell_type": "markdown", @@ -1469,10 +1561,93 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "F, p = scipy.stats.f_oneway(df[df[\"group\"] == \"a\"].y,\n", + " df[df[\"group\"] == \"b\"].y,\n", + " df[df[\"group\"] == \"c\"].y)\n", + "\n", + "res = smf.ols(\"y ~ 1 + group_b + group_c\", df).fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    valuep-valuest-values0.025 CI0.975 CI
    scipy.stats.f_onewayNaN0.015156NaNNaNNaN
    smf.ols (y ~ 1 + group_b + group_c)0.8018630.0121262.5911780.1821821.421545
    \n", + "
    " + ], + "text/plain": [ + " value p-values t-values 0.025 CI \\\n", + "scipy.stats.f_oneway NaN 0.015156 NaN NaN \n", + "smf.ols (y ~ 1 + group_b + group_c) 0.801863 0.012126 2.591178 0.182182 \n", + "\n", + " 0.975 CI \n", + "scipy.stats.f_oneway NaN \n", + "smf.ols (y ~ 1 + group_b + group_c) 1.421545 " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# FIXME what to tabulate here?\n", + "utils.tabulate_results([None, p, None, None, None],\n", + " res,\n", + " [\"scipy.stats.f_oneway\", \"smf.ols (y ~ 1 + group_b + group_c)\"],\n", + " coeff=\"group_b\")" + ] }, { "cell_type": "markdown", @@ -1494,10 +1669,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "_, p = scipy.stats.kruskal(df[df[\"group\"] == \"a\"].y,\n", + " df[df[\"group\"] == \"b\"].y,\n", + " df[df[\"group\"] == \"c\"].y)\n", + "\n", + "res = smf.ols(\"y ~ 1 + group_b + group_c\", df).fit() # TODO rank" + ] }, { "cell_type": "markdown", @@ -1850,6 +2031,36 @@ "\n", "4. Several named tests are still missing from the list and may be added at a later time. This includes the Sign test (require large N to be reasonably approximated by a linear model), Friedman as RM-ANOVA on `rank(y)`, McNemar, and Binomial/Multinomial. See stuff on these in [the section on links to further equivalences](#links). If you think that they should be included here, feel free to submit \"solutions\" to [the github repo](https://github.com/lindeloev/tests-as-linear/) of this doc!\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 11 Computing Environment" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "jupyter==1.0.0\r\n", + "matplotlib==3.1.0\r\n", + "numpy==1.16.4\r\n", + "pandas==0.24.2\r\n", + "patsy==0.5.1\r\n", + "scipy==1.3.0\r\n", + "statsmodels==0.10.0\r\n" + ] + } + ], + "source": [ + "!cat requirements.txt" + ] } ], "metadata": { diff --git a/utils.py b/utils.py index 0f4e69f..624cace 100644 --- a/utils.py +++ b/utils.py @@ -58,7 +58,7 @@ def tabulate_results(test_values, ols_results, names, coeff="x"): return table -def generate_toc(notebook="tests-as-linear.ipynb"): +def generate_toc(notebook="tests-as-linear.ipynb", max_header_levels=2): """ Generates a table of contents in Markdown. @@ -70,6 +70,9 @@ def generate_toc(notebook="tests-as-linear.ipynb"): ---------- notebook : str Path to notebook for which to generate a table of contents. + max_header_levels : int + Maximum number of header levels to show in table of contents (i.e. the + depth of headers to display). Returns ------- @@ -83,7 +86,7 @@ def generate_toc(notebook="tests-as-linear.ipynb"): for cell in cells: if cell["cell_type"] == "markdown": for line in cell["source"]: - match = re.search(r"^#+ ", line) + match = re.search(r"^[#]{{1,{0}}} ".format(max_header_levels), line) if match: level = len(line) - len(line.lstrip("#")) link = line.strip(" #\n").replace(" ", "-")