From 25c64c3c10a320e78db10726a764af45ea6acc80 Mon Sep 17 00:00:00 2001 From: Christopher Brooks Date: Sun, 29 Sep 2019 16:07:07 +0000 Subject: [PATCH] files --- 190916_pandas.ipynb | 16 +- 190925_querying_dataframes.ipynb | 3334 +++++++++--------------------- 190930_more_pandas.ipynb | 184 ++ 191002_merging.ipynb | 394 ++++ 4 files changed, 1538 insertions(+), 2390 deletions(-) create mode 100644 190930_more_pandas.ipynb create mode 100644 191002_merging.ipynb diff --git a/190916_pandas.ipynb b/190916_pandas.ipynb index 2b2d5ca..f998fe0 100644 --- a/190916_pandas.ipynb +++ b/190916_pandas.ipynb @@ -174,13 +174,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "slideshow": { "slide_type": "slide" } }, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'np' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnan\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnan\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'np' is not defined" + ] + } + ], "source": [ "np.isnan(np.nan)" ] diff --git a/190925_querying_dataframes.ipynb b/190925_querying_dataframes.ipynb index e00030a..c0c1fdd 100644 --- a/190925_querying_dataframes.ipynb +++ b/190925_querying_dataframes.ipynb @@ -159,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "slideshow": { "slide_type": "subslide" @@ -184,7 +184,7 @@ "Name: chance of admit, Length: 400, dtype: bool" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -209,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": { "slideshow": { "slide_type": "subslide" @@ -336,7 +336,7 @@ "6 1 0.90 " ] }, - "execution_count": 5, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -360,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": { "slideshow": { "slide_type": "subslide" @@ -487,7 +487,7 @@ "6 1 0.90 " ] }, - "execution_count": 7, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -511,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": { "slideshow": { "slide_type": "subslide" @@ -638,7 +638,7 @@ "5 NaN NaN " ] }, - "execution_count": 8, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -663,7 +663,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -786,7 +786,7 @@ "6 1.0 0.90 " ] }, - "execution_count": 9, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -810,13 +810,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "slideshow": { "slide_type": "subslide" } }, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'chance of admit'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0.7\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'chance of admit'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0.9\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__nonzero__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1554\u001b[0m \u001b[0;34m\"The truth value of a {0} is ambiguous. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1555\u001b[0m \"Use a.empty, a.bool(), a.item(), a.any() or a.all().\".format(\n\u001b[0;32m-> 1556\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1557\u001b[0m )\n\u001b[1;32m 1558\u001b[0m )\n", + "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ] + } + ], "source": [ "(df['chance of admit'] > 0.7) and (df['chance of admit'] < 0.9)" ] @@ -911,294 +924,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Serial No.12345678910...391392393394395396397398399400
gre score337.00324.00316.00322.00314.00330.00321.00308.00302.0323.00...314.00318.00326.00317.00329.00324.00325.00330.00312.00333.00
toefl score118.00107.00104.00110.00103.00115.00109.00101.00102.0108.00...102.00106.00112.00104.00111.00110.00107.00116.00103.00117.00
university rating4.004.003.003.002.005.003.002.001.03.00...2.003.004.002.004.003.003.004.003.004.00
sop4.504.003.003.502.004.503.003.002.03.50...2.002.004.003.004.503.503.005.003.505.00
lor4.504.503.502.503.003.004.004.001.53.00...2.503.003.503.004.003.503.504.504.004.00
cgpa9.658.878.008.678.219.348.207.908.08.60...8.248.659.128.769.239.049.119.458.789.66
research1.001.001.001.000.001.001.000.000.00.00...0.000.001.000.001.001.001.001.000.001.00
chance of admit0.920.760.720.800.650.900.750.680.50.45...0.640.710.840.770.890.820.840.910.670.95
\n", - "

8 rows × 400 columns

\n", - "
" - ], - "text/plain": [ - "Serial No. 1 2 3 4 5 6 7 \\\n", - "gre score 337.00 324.00 316.00 322.00 314.00 330.00 321.00 \n", - "toefl score 118.00 107.00 104.00 110.00 103.00 115.00 109.00 \n", - "university rating 4.00 4.00 3.00 3.00 2.00 5.00 3.00 \n", - "sop 4.50 4.00 3.00 3.50 2.00 4.50 3.00 \n", - "lor 4.50 4.50 3.50 2.50 3.00 3.00 4.00 \n", - "cgpa 9.65 8.87 8.00 8.67 8.21 9.34 8.20 \n", - "research 1.00 1.00 1.00 1.00 0.00 1.00 1.00 \n", - "chance of admit 0.92 0.76 0.72 0.80 0.65 0.90 0.75 \n", - "\n", - "Serial No. 8 9 10 ... 391 392 393 394 \\\n", - "gre score 308.00 302.0 323.00 ... 314.00 318.00 326.00 317.00 \n", - "toefl score 101.00 102.0 108.00 ... 102.00 106.00 112.00 104.00 \n", - "university rating 2.00 1.0 3.00 ... 2.00 3.00 4.00 2.00 \n", - "sop 3.00 2.0 3.50 ... 2.00 2.00 4.00 3.00 \n", - "lor 4.00 1.5 3.00 ... 2.50 3.00 3.50 3.00 \n", - "cgpa 7.90 8.0 8.60 ... 8.24 8.65 9.12 8.76 \n", - "research 0.00 0.0 0.00 ... 0.00 0.00 1.00 0.00 \n", - "chance of admit 0.68 0.5 0.45 ... 0.64 0.71 0.84 0.77 \n", - "\n", - "Serial No. 395 396 397 398 399 400 \n", - "gre score 329.00 324.00 325.00 330.00 312.00 333.00 \n", - "toefl score 111.00 110.00 107.00 116.00 103.00 117.00 \n", - "university rating 4.00 3.00 3.00 4.00 3.00 4.00 \n", - "sop 4.50 3.50 3.00 5.00 3.50 5.00 \n", - "lor 4.00 3.50 3.50 4.50 4.00 4.00 \n", - "cgpa 9.23 9.04 9.11 9.45 8.78 9.66 \n", - "research 1.00 1.00 1.00 1.00 0.00 1.00 \n", - "chance of admit 0.89 0.82 0.84 0.91 0.67 0.95 \n", - "\n", - "[8 rows x 400 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.T" ] @@ -1216,7 +948,47 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "df.set_index('lor').head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "# of course, this didn't actually change our previous dataframe, right?\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Multilevel indexing\n", + "* we can have heirarchical indicies, which is pretty cool\n", + "* let's look at some census data" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": { "slideshow": { "slide_type": "subslide" @@ -1244,268 +1016,213 @@ " \n", " \n", " \n", - " gre score\n", - " toefl score\n", - " university rating\n", - " sop\n", - " cgpa\n", - " research\n", - " chance of admit\n", - " \n", - " \n", - " lor\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " SUMLEV\n", + " REGION\n", + " DIVISION\n", + " STATE\n", + " COUNTY\n", + " STNAME\n", + " CTYNAME\n", + " CENSUS2010POP\n", + " ESTIMATESBASE2010\n", + " POPESTIMATE2010\n", + " ...\n", + " RDOMESTICMIG2011\n", + " RDOMESTICMIG2012\n", + " RDOMESTICMIG2013\n", + " RDOMESTICMIG2014\n", + " RDOMESTICMIG2015\n", + " RNETMIG2011\n", + " RNETMIG2012\n", + " RNETMIG2013\n", + " RNETMIG2014\n", + " RNETMIG2015\n", " \n", " \n", " \n", " \n", - " 4.5\n", - " 337\n", - " 118\n", - " 4\n", - " 4.5\n", - " 9.65\n", + " 0\n", + " 40\n", + " 3\n", + " 6\n", " 1\n", - " 0.92\n", + " 0\n", + " Alabama\n", + " Alabama\n", + " 4779736\n", + " 4780127\n", + " 4785161\n", + " ...\n", + " 0.002295\n", + " -0.193196\n", + " 0.381066\n", + " 0.582002\n", + " -0.467369\n", + " 1.030015\n", + " 0.826644\n", + " 1.383282\n", + " 1.724718\n", + " 0.712594\n", " \n", " \n", - " 4.5\n", - " 324\n", - " 107\n", - " 4\n", - " 4.0\n", - " 8.87\n", + " 1\n", + " 50\n", + " 3\n", + " 6\n", " 1\n", - " 0.76\n", + " 1\n", + " Alabama\n", + " Autauga County\n", + " 54571\n", + " 54571\n", + " 54660\n", + " ...\n", + " 7.242091\n", + " -2.915927\n", + " -3.012349\n", + " 2.265971\n", + " -2.530799\n", + " 7.606016\n", + " -2.626146\n", + " -2.722002\n", + " 2.592270\n", + " -2.187333\n", " \n", " \n", - " 3.5\n", - " 316\n", - " 104\n", + " 2\n", + " 50\n", " 3\n", - " 3.0\n", - " 8.00\n", + " 6\n", " 1\n", - " 0.72\n", - " \n", - " \n", - " 2.5\n", - " 322\n", - " 110\n", " 3\n", - " 3.5\n", - " 8.67\n", - " 1\n", - " 0.80\n", - " \n", - " \n", - " 3.0\n", - " 314\n", - " 103\n", - " 2\n", - " 2.0\n", - " 8.21\n", - " 0\n", - " 0.65\n", - " \n", - " \n", - "\n", - "" - ], - "text/plain": [ - " gre score toefl score university rating sop cgpa research \\\n", - "lor \n", - "4.5 337 118 4 4.5 9.65 1 \n", - "4.5 324 107 4 4.0 8.87 1 \n", - "3.5 316 104 3 3.0 8.00 1 \n", - "2.5 322 110 3 3.5 8.67 1 \n", - "3.0 314 103 2 2.0 8.21 0 \n", - "\n", - " chance of admit \n", - "lor \n", - "4.5 0.92 \n", - "4.5 0.76 \n", - "3.5 0.72 \n", - "2.5 0.80 \n", - "3.0 0.65 " - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.set_index('lor').head()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
gre scoretoefl scoreuniversity ratingsoplorcgparesearchchance of admit
Serial No.
133711844.54.59.6510.92
232410744.04.58.8710.76AlabamaBaldwin County182265182265183193...14.83296017.64729321.84570519.24328717.19787215.84417618.55962722.72762620.31714218.293499
33161045033.03.58.00610.725AlabamaBarbour County274572745727341...-4.728132-2.500690-7.056824-3.904217-10.543299-4.874741-2.758113-7.167664-3.978583-10.543299
43221105033.52.58.67610.80
531410322.03.08.2100.657AlabamaBibb County229152291922861...-5.527043-5.068871-6.201001-0.1775370.177258-5.088389-4.363636-5.4037290.7545331.107861
\n", + "

5 rows × 100 columns

\n", "
" ], "text/plain": [ - " gre score toefl score university rating sop lor cgpa \\\n", - "Serial No. \n", - "1 337 118 4 4.5 4.5 9.65 \n", - "2 324 107 4 4.0 4.5 8.87 \n", - "3 316 104 3 3.0 3.5 8.00 \n", - "4 322 110 3 3.5 2.5 8.67 \n", - "5 314 103 2 2.0 3.0 8.21 \n", + " SUMLEV REGION DIVISION STATE COUNTY STNAME CTYNAME \\\n", + "0 40 3 6 1 0 Alabama Alabama \n", + "1 50 3 6 1 1 Alabama Autauga County \n", + "2 50 3 6 1 3 Alabama Baldwin County \n", + "3 50 3 6 1 5 Alabama Barbour County \n", + "4 50 3 6 1 7 Alabama Bibb County \n", "\n", - " research chance of admit \n", - "Serial No. \n", - "1 1 0.92 \n", - "2 1 0.76 \n", - "3 1 0.72 \n", - "4 1 0.80 \n", - "5 0 0.65 " + " CENSUS2010POP ESTIMATESBASE2010 POPESTIMATE2010 ... RDOMESTICMIG2011 \\\n", + "0 4779736 4780127 4785161 ... 0.002295 \n", + "1 54571 54571 54660 ... 7.242091 \n", + "2 182265 182265 183193 ... 14.832960 \n", + "3 27457 27457 27341 ... -4.728132 \n", + "4 22915 22919 22861 ... -5.527043 \n", + "\n", + " RDOMESTICMIG2012 RDOMESTICMIG2013 RDOMESTICMIG2014 RDOMESTICMIG2015 \\\n", + "0 -0.193196 0.381066 0.582002 -0.467369 \n", + "1 -2.915927 -3.012349 2.265971 -2.530799 \n", + "2 17.647293 21.845705 19.243287 17.197872 \n", + "3 -2.500690 -7.056824 -3.904217 -10.543299 \n", + "4 -5.068871 -6.201001 -0.177537 0.177258 \n", + "\n", + " RNETMIG2011 RNETMIG2012 RNETMIG2013 RNETMIG2014 RNETMIG2015 \n", + "0 1.030015 0.826644 1.383282 1.724718 0.712594 \n", + "1 7.606016 -2.626146 -2.722002 2.592270 -2.187333 \n", + "2 15.844176 18.559627 22.727626 20.317142 18.293499 \n", + "3 -4.874741 -2.758113 -7.167664 -3.978583 -10.543299 \n", + "4 -5.088389 -4.363636 -5.403729 0.754533 1.107861 \n", + "\n", + "[5 rows x 100 columns]" ] }, - "execution_count": 18, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# of course, this didn't actually change our previous dataframe, right?\n", + "df=pd.read_csv(\"datasets/census.csv\")\n", "df.head()" ] }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "# Multilevel indexing\n", - "* we can have heirarchical indicies, which is pretty cool\n", - "* let's look at some census data" - ] - }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 10, "metadata": { "slideshow": { - "slide_type": "subslide" + "slide_type": "slide" } }, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n" + ] + }, { "data": { "text/html": [ @@ -1552,236 +1269,8 @@ " \n", " \n", " \n", - " 0\n", - " 40\n", - " 3\n", - " 6\n", - " 1\n", - " 0\n", - " Alabama\n", - " Alabama\n", - " 4779736\n", - " 4780127\n", - " 4785161\n", - " ...\n", - " 0.002295\n", - " -0.193196\n", - " 0.381066\n", - " 0.582002\n", - " -0.467369\n", - " 1.030015\n", - " 0.826644\n", - " 1.383282\n", - " 1.724718\n", - " 0.712594\n", - " \n", - " \n", - " 1\n", - " 50\n", - " 3\n", - " 6\n", - " 1\n", - " 1\n", - " Alabama\n", - " Autauga County\n", - " 54571\n", - " 54571\n", - " 54660\n", - " ...\n", - " 7.242091\n", - " -2.915927\n", - " -3.012349\n", - " 2.265971\n", - " -2.530799\n", - " 7.606016\n", - " -2.626146\n", - " -2.722002\n", - " 2.592270\n", - " -2.187333\n", - " \n", - " \n", - " 2\n", - " 50\n", - " 3\n", - " 6\n", - " 1\n", - " 3\n", - " Alabama\n", - " Baldwin County\n", - " 182265\n", - " 182265\n", - " 183193\n", - " ...\n", - " 14.832960\n", - " 17.647293\n", - " 21.845705\n", - " 19.243287\n", - " 17.197872\n", - " 15.844176\n", - " 18.559627\n", - " 22.727626\n", - " 20.317142\n", - " 18.293499\n", - " \n", - " \n", - " 3\n", - " 50\n", - " 3\n", - " 6\n", - " 1\n", - " 5\n", - " Alabama\n", - " Barbour County\n", - " 27457\n", - " 27457\n", - " 27341\n", - " ...\n", - " -4.728132\n", - " -2.500690\n", - " -7.056824\n", - " -3.904217\n", - " -10.543299\n", - " -4.874741\n", - " -2.758113\n", - " -7.167664\n", - " -3.978583\n", - " -10.543299\n", - " \n", - " \n", - " 4\n", - " 50\n", - " 3\n", - " 6\n", - " 1\n", - " 7\n", - " Alabama\n", - " Bibb County\n", - " 22915\n", - " 22919\n", - " 22861\n", - " ...\n", - " -5.527043\n", - " -5.068871\n", - " -6.201001\n", - " -0.177537\n", - " 0.177258\n", - " -5.088389\n", - " -4.363636\n", - " -5.403729\n", - " 0.754533\n", - " 1.107861\n", - " \n", - " \n", - "\n", - "

5 rows × 100 columns

\n", - "" - ], - "text/plain": [ - " SUMLEV REGION DIVISION STATE COUNTY STNAME CTYNAME \\\n", - "0 40 3 6 1 0 Alabama Alabama \n", - "1 50 3 6 1 1 Alabama Autauga County \n", - "2 50 3 6 1 3 Alabama Baldwin County \n", - "3 50 3 6 1 5 Alabama Barbour County \n", - "4 50 3 6 1 7 Alabama Bibb County \n", - "\n", - " CENSUS2010POP ESTIMATESBASE2010 POPESTIMATE2010 ... RDOMESTICMIG2011 \\\n", - "0 4779736 4780127 4785161 ... 0.002295 \n", - "1 54571 54571 54660 ... 7.242091 \n", - "2 182265 182265 183193 ... 14.832960 \n", - "3 27457 27457 27341 ... -4.728132 \n", - "4 22915 22919 22861 ... -5.527043 \n", - "\n", - " RDOMESTICMIG2012 RDOMESTICMIG2013 RDOMESTICMIG2014 RDOMESTICMIG2015 \\\n", - "0 -0.193196 0.381066 0.582002 -0.467369 \n", - "1 -2.915927 -3.012349 2.265971 -2.530799 \n", - "2 17.647293 21.845705 19.243287 17.197872 \n", - "3 -2.500690 -7.056824 -3.904217 -10.543299 \n", - "4 -5.068871 -6.201001 -0.177537 0.177258 \n", - "\n", - " RNETMIG2011 RNETMIG2012 RNETMIG2013 RNETMIG2014 RNETMIG2015 \n", - "0 1.030015 0.826644 1.383282 1.724718 0.712594 \n", - "1 7.606016 -2.626146 -2.722002 2.592270 -2.187333 \n", - "2 15.844176 18.559627 22.727626 20.317142 18.293499 \n", - "3 -4.874741 -2.758113 -7.167664 -3.978583 -10.543299 \n", - "4 -5.088389 -4.363636 -5.403729 0.754533 1.107861 \n", - "\n", - "[5 rows x 100 columns]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df=pd.read_csv(\"datasets/census.csv\")\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -1936,7 +1425,7 @@ "[5 rows x 100 columns]" ] }, - "execution_count": 21, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1951,7 +1440,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 11, "metadata": { "slideshow": { "slide_type": "slide" @@ -2207,7 +1696,7 @@ "[5 rows x 98 columns]" ] }, - "execution_count": 23, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -2246,83 +1735,39 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" } }, - "outputs": [ - { - "data": { - "text/plain": [ - "2.0" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.loc['Michigan', 'Washtenaw County']['REGION']" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, - "outputs": [ - { - "data": { - "text/plain": [ - "2" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.loc['Michigan', 'REGION']['Washtenaw County']" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" } }, - "outputs": [ - { - "data": { - "text/plain": [ - "SUMLEV 50.000000\n", - "REGION 2.000000\n", - "DIVISION 3.000000\n", - "STATE 26.000000\n", - "COUNTY 161.000000\n", - " ... \n", - "RNETMIG2011 5.191395\n", - "RNETMIG2012 1.248106\n", - "RNETMIG2013 4.226778\n", - "RNETMIG2014 3.801394\n", - "RNETMIG2015 0.595048\n", - "Name: (Michigan, Washtenaw County), Length: 98, dtype: float64" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# It's a bit of ambiguous, I recommend passing keys as tuple instead\n", "df.loc[('Michigan', 'Washtenaw County')]" @@ -2330,13 +1775,70 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, + "outputs": [], + "source": [ + "df.loc[['Michigan', 'Washtenaw County']]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# In class activity time!\n", + "## Question 0: Which county has the largest population in Michigan?\n", + "* (Easy)\n", + "\n", + "## Question 1: Generate a new column for which is the largest absolute change in population within the period 2010-2015?\n", + "* (Hint: population values are stored in columns POPESTIMATE2010 through POPESTIMATE2015, you need to consider all six columns.)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Wayne County'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['Michigan']['CENSUS2010POP'].sort_values().index[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " after removing the cwd from sys.path.\n" + ] + }, { "data": { "text/html": [ @@ -2359,27 +1861,13 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2391,760 +1879,313 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -3154,855 +2195,372 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
SUMLEVREGIONDIVISIONSTATECOUNTYSTNAMECTYNAMECENSUS2010POPESTIMATESBASE2010POPESTIMATE2010...RDOMESTICMIG2011RDOMESTICMIG2012RDOMESTICMIG2013RDOMESTICMIG2014RDOMESTICMIG2015RNETMIG2011RNETMIG2012RNETMIG2013RNETMIG2014RNETMIG2015
150150361
SUMLEVREGIONDIVISIONSTATECOUNTYCENSUS2010POPESTIMATESBASE2010POPESTIMATE2010POPESTIMATE2011POPESTIMATE2012...RDOMESTICMIG2011RDOMESTICMIG2012RDOMESTICMIG2013RDOMESTICMIG2014RDOMESTICMIG2015RNETMIG2011RNETMIG2012RNETMIG2013RNETMIG2014RNETMIG2015POPESTIMATE2013POPESTIMATE2014POPESTIMATE2015abs
STNAME
MichiganAlcona County50232611094210942108901077510607...0.276944-2.8996358.782284-2.9495723.0773670.461574-2.5254899.160017-2.5689823.462038
Alger County502326396019601956495549496...1.1507480.3149614.527984-0.211093-2.4422621.3599750.6299214.8438900.105546-2.123706
Allegan County5023265111408111408111502111530111898...-6.035008-0.9130460.2675127.7033973.056470-5.604577-0.5460370.6776978.2694333.634485
Alpena County50232672959829598295392934229219...-3.498582-0.034152-3.2620830.172479-2.770323-3.2947810.204915-2.9873810.448446-2.493291
Antrim County50232692358023580234992337923337...-3.797090-0.342495-0.7302883.099240-0.819018-3.711762-0.128436-0.5154973.357510-0.560381
Arenac County502326111589915899158541562015496...-11.310923-3.0852291.1644830.1951542.288554-11.183834-2.9566781.2938700.3252562.419328
Baraga County5023261388608860884188208715...1.472170-9.580838-1.608641-3.115265-6.9694512.151634-8.896493-0.804320-2.307604-6.156348
Barry County502326155917359175590805897059070...-4.760695-0.847170-0.3722191.2671170.151831-4.591275-0.609963-0.1353521.5205400.404882
Bay County50232617107771107771107695107497107121...-2.444329-3.075231-1.270559-5.853274-5.162447-1.914569-2.441547-0.653964-5.130995-4.445178
Benzie County502326191752517525175071743617390...-2.4039150.8039973.9668859.913188-2.173789-2.4611510.7465693.9093949.855886-2.230994
Berrien County50232621156813156817156792156510156015...-6.058053-6.079514-7.739789-3.392755-7.685482-4.270640-4.159667-5.677986-1.023621-5.304338
Branch County502326234524845248451724389743770...-33.210208-4.699602-8.338965-0.366977-3.687079-32.379391-3.718617-7.3080490.779825-2.519122AlabamaAutauga County546605525355175550385529055347687
Baldwin County18319318665919039619512619971320370920516
Barbour County273412722627159269732681526489852
Bibb County228612273322642225122254922583349
Blount County573735771157776577345765857673403
Bullock County108871062910606106281082910696281
Butler County209442067320408202612027620154790
Calhoun County50232625136146136148136025135291134750...-8.034911-6.613810-3.146311-2.870994-7.230572-6.907075-5.317711-1.803192-1.342765-5.699742
Cass County502326275229352286521885245652036...2.350828-7.713509-1.771445-0.463280-1.8949652.580177-7.560387-1.598152-0.250943-1.662928
Charlevoix County502326292594925949258932601226071...4.7779600.2304012.7942050.1146110.8777126.4348331.9968134.6697672.1776092.938427
Cheboygan County5023263126152261501184371177681172861165751159931156202817
Chambers County340983399334075341533405234123160
Cherokee County25976260802594025779...-0.730488-2.397571-1.9067266.510341-5.047738-0.461361-2.010866-1.6343376.900181-4.656441
Chippewa County502326333852038673386003890239003...6.1675831.540338-10.273311-12.234720-8.4868967.3288433.055003-8.908686-10.728109-6.941443
Clare County502326353092630926310063096230788...1.065066-2.688259-4.1391004.8328110.3920101.161890-2.623482-4.0739174.9307730.490012
Clinton County502326377538275382754217620676463...4.313216-0.4716093.893888-0.945204-3.7223015.1837730.4847094.9357310.246012-2.546157
Crawford County502326391407414074140591403413994...1.6374190.428143-2.007168-6.79976910.0933091.6374190.499500-1.863799-6.65509310.238536
Delta County502326413706937069370703694436848...-0.864701-1.057025-0.488560-4.660035-3.016633-0.756614-0.894406-0.325706-4.496525-2.852089
Dickinson County502326432616826168261442607226215...2.5279616.655574-4.057183-2.346831-4.6003672.4896586.579073-4.133734-2.423776-4.677684
Eaton County50232645107759107759107758107882108023...-2.578371-2.658577-1.0725100.147467-3.015995-0.899648-0.7040130.8228742.267302-0.901120
Emmet County502326473269432694326443277632890...2.4457354.4162885.3977013.923699-1.5973242.8737394.8731465.9132124.497163-1.024698
Genesee County50232649425790425790425057421784418110...-10.519094-11.346670-8.268120-8.063922-6.848891-10.042027-10.829938-7.704439-7.417071-6.200662
Gladwin County502326512569225692257202584225515...7.912804-7.9833324.7405431.647543-6.1641818.223110-7.6328455.0931462.039816-5.769041
Gogebic County502326531642716427164001612116035...-10.4547831.181739-2.383117-6.469208-8.486290-10.4547831.368329-2.194977-6.278937-8.293420
Grand Traverse County502326558698686986869648813888988...12.2557147.5765279.8900916.2951186.29275612.4041997.90397810.1135966.5495786.555867
Gratiot County502326574247642476424254215142035...-7.732690-3.9911620.808869-9.753066-2.500812-7.377980-3.6348091.237094-9.274974-2.019886
Hillsdale County502326594668846688466194662246283...-1.909031-8.374146-4.198589-7.735263-2.046637-1.909031-8.374146-4.198589-7.735263-2.04663726023260842599525859225
Chilton County436654373943697437954392143943278
Choctaw County138411359313543133781328913170671
Clarke County2576725570251442511624847246751092
Clay County138801367013456134671353813555424
Cleburne County149731497114921150281507215018151
Coffee County5017750448511735075550831512111034
Colbert County545145444354472544715448054354160
Conecuh County132081312112996128751266212672546
Coosa County1175811348111951105910807107241034
Covington County377963806037818378303788837835264
Crenshaw County138531389613951139321394813963110
Cullman County8047380469803748075681221820051631
Dale County503585010950324498334950149565857
Dallas County4380343178427774202141662411312672
DeKalb County711427138770942708697101271130518
Elmore County7946580012804328088381022814682003
Escambia County383093821338034378573778437789525
Etowah County1044421042361042351038521034521030571385
Fayette County172311706216960168571684216759472
Franklin County317343172931648315073159231696227
............................................................
Mecosta County5023261074279842798428444344243519...11.821153-0.873955-8.388160-3.284110-5.79548913.0959830.459976-6.982452-1.665183-4.195934
Menominee County5023261092402924029239762392323748...-0.417545-3.8178353.620824-4.3913360.042410-0.167018-3.5661093.873440-4.1379890.296868
Midland County5023261118362983629836648376183660...-1.791847-5.077021-3.826653-5.351147-2.429523-0.322532-3.332915-1.925285-3.208294-0.287234
Missaukee County5023261131484914849148141494015031...6.9234391.6015482.125469-5.653851-5.6873317.6628352.2688602.789678-4.988692-5.018233
Monroe County502326115152021152021151945151535150878...-5.305127-5.892604-5.605534-2.858114-4.320347-4.883353-5.475955-5.160545-2.325132-3.786146
Montcalm County5023261176334263342633116322363103...-3.777641-5.097921-6.383333-1.956262-2.050141-3.382490-4.717952-5.954602-1.479125-1.589257
Montmorency County50232611997659765977595959494...-10.325245-2.304992-2.7574504.8241856.576465-10.325245-2.304992-2.7574504.8241856.576465
Muskegon County502326121172188172188171922169985170129...-14.565364-2.3521538.885851-2.008632-0.863543-14.138348-2.0052109.259746-1.567430-0.423078
Newaygo County5023261234846048460483784841847952...-1.880243-12.036941-1.668492-2.4627970.667842-1.714947-11.746394-1.355649-2.0453741.085244
Oakland County50232612512023621202362120293012112111221391...0.7447781.7142142.387837-1.182556-5.2727404.3419175.5545466.5465853.529064-0.595439
Oceana County5023261272657026570265222641726251...-9.180377-9.379509-4.613830-1.680223-2.142352-8.764805-8.999772-4.156260-1.183793-1.645020
Ogemaw County5023261292169921699216372153521424...0.185305-1.955353-3.096556-3.5059461.8118100.741221-1.396681-2.486629-2.8900362.431640
Ontonagon County50232613167806780674566146408...-9.431844-23.498695-6.922048-9.788975-11.015208-9.132420-23.345108-6.764729-9.628500-10.850801
Osceola County5023261332352823528235132344823265...-3.960733-10.018624-4.256417-4.396836-3.939223-3.705202-9.847366-4.084440-4.095092-3.636206
Oscoda County50232613586408640860386488593...9.506695-2.436054-19.5650893.348081-3.2528169.738566-2.204048-19.3293653.587229-3.011867
Otsego County5023261372416424164241512413124048...-2.112589-5.0644473.4045381.4915794.999897-1.822625-4.4417694.1518762.4030995.908969
Ottawa County502326139263801263801264141266429269616...1.3834184.1899474.5918894.7227653.2270063.0156255.8166766.3792626.7603215.228613
Presque Isle County5023261411337613376133041318513120...-1.9630791.2164992.7512423.763585-4.026326-1.8875761.2925302.8276653.840393-3.948897
Roscommon County5023261432444924449244702432224140...1.9675360.7841202.4944398.1700714.2996393.1152651.8983953.9079559.6706965.802425
Saginaw County502326145200169200169199916198855198329...-6.861081-5.498711-10.326683-10.012605-11.363139-6.053600-4.622543-9.369946-8.940920-10.287065
St. Clair County502326147163040163040162681161493160549...-7.563839-6.707200-2.063964-0.418449-1.487909-7.014751-6.055111-1.3780550.374731-0.693941
St. Joseph County5023261496129561295612886107860953...-7.387673-7.129336-4.151182-1.114206-5.518348-6.684863-6.277094-3.281567-0.049156-4.453978
Sanilac County5023261514311443114430774271342334...-8.835529-9.006784-8.832748-4.619988-3.705174-8.812216-8.889202-8.714028-4.500299-3.584876
Schoolcraft County50232615384858485847884828360...4.834906-12.112576-7.344089-4.7494373.7934415.070755-11.875074-7.103299-4.5058764.038179
Shiawassee County5023261557064870648706026994669269...-9.790250-10.573573-6.342724-0.058084-4.319749-9.548339-10.329347-6.0965460.232337-4.014312
Tuscola County5023261575572955729556965539254714...-4.734985-13.187292-9.106264-3.714827-3.730166-4.464929-12.932992-8.830873-3.437601-3.433237
Van Buren County5023261597625876265761557591475258...-6.286620-12.594925-1.952334-4.650916-4.059792-5.957822-12.237716-1.540617-4.106094-3.487428
Washtenaw County502326161344791345066345563349048351213...0.129569-4.309822-1.780293-2.955078-6.0789855.1913951.2481064.2267783.8013940.595048
Wayne County50232616318205841820641181519918012731792514...-13.340073-10.271616-14.119617-11.903253-8.762835-11.344758-8.098421-11.732437-9.161648-6.010195
Wexford County5023261653273532735327573269432597...-4.491910-3.982172-3.2854848.463184-0.758438-4.400238-3.859644-3.1626628.585396-0.637088WisconsinWashburn County159301578415831156571567815552378
Washington County1319671322251326491327581333011336741707
Waukesha County3900763908083927103940253953353964886412
Waupaca County524225234252035522135208851945477
Waushara County245062458124484243322417324033548
Winnebago County1670591676301687171694861696391695462580
Wood County7480774647743847399673597734351372
WyomingAlbany County3642836908373963764737918379561528
Big Horn County116721174511785120021191912022350
Campbell County4624446600478814812148243492202976
Carbon County158371581715678157871585615559297
Converse County138261372814025143431417214236615
Crook County711471297148716072647444330
Fremont County402224059141129410244071740315907
Goshen County134081359713666135651350913383283
Hot Springs County481348184846484647934741105
Johnson County85818636861086198552858584
Laramie County9227192663948949600696469971214850
Lincoln County180911802217943183281856418722779
Natrona County7547276420786998115681603821786706
Niobrara County24922485247525482530254273
Park County282592847328863292372912629228978
Platte County867887018732872887768812134
Sheridan County291462927529594297943002030009874
Sublette County10244101421041810086100399899519
Sweetwater County4359344041451044516244925446261569
Teton County2129721482216972234722905231251828
Uinta County211022091220989210222090320822280
Washakie County854584698443844383168328229
Weston County718171147065716071857234169
\n", - "

83 rows × 98 columns

\n", + "

3142 rows × 7 columns

\n", "
" ], "text/plain": [ - " SUMLEV REGION DIVISION STATE COUNTY \\\n", - "STNAME CTYNAME \n", - "Michigan Alcona County 50 2 3 26 1 \n", - " Alger County 50 2 3 26 3 \n", - " Allegan County 50 2 3 26 5 \n", - " Alpena County 50 2 3 26 7 \n", - " Antrim County 50 2 3 26 9 \n", - "... ... ... ... ... ... \n", - " Tuscola County 50 2 3 26 157 \n", - " Van Buren County 50 2 3 26 159 \n", - " Washtenaw County 50 2 3 26 161 \n", - " Wayne County 50 2 3 26 163 \n", - " Wexford County 50 2 3 26 165 \n", - "\n", - " CENSUS2010POP ESTIMATESBASE2010 POPESTIMATE2010 \\\n", - "STNAME CTYNAME \n", - "Michigan Alcona County 10942 10942 10890 \n", - " Alger County 9601 9601 9564 \n", - " Allegan County 111408 111408 111502 \n", - " Alpena County 29598 29598 29539 \n", - " Antrim County 23580 23580 23499 \n", - "... ... ... ... \n", - " Tuscola County 55729 55729 55696 \n", - " Van Buren County 76258 76265 76155 \n", - " Washtenaw County 344791 345066 345563 \n", - " Wayne County 1820584 1820641 1815199 \n", - " Wexford County 32735 32735 32757 \n", - "\n", - " POPESTIMATE2011 POPESTIMATE2012 ... \\\n", - "STNAME CTYNAME ... \n", - "Michigan Alcona County 10775 10607 ... \n", - " Alger County 9554 9496 ... \n", - " Allegan County 111530 111898 ... \n", - " Alpena County 29342 29219 ... \n", - " Antrim County 23379 23337 ... \n", - "... ... ... ... \n", - " Tuscola County 55392 54714 ... \n", - " Van Buren County 75914 75258 ... \n", - " Washtenaw County 349048 351213 ... \n", - " Wayne County 1801273 1792514 ... \n", - " Wexford County 32694 32597 ... \n", - "\n", - " RDOMESTICMIG2011 RDOMESTICMIG2012 \\\n", - "STNAME CTYNAME \n", - "Michigan Alcona County 0.276944 -2.899635 \n", - " Alger County 1.150748 0.314961 \n", - " Allegan County -6.035008 -0.913046 \n", - " Alpena County -3.498582 -0.034152 \n", - " Antrim County -3.797090 -0.342495 \n", - "... ... ... \n", - " Tuscola County -4.734985 -13.187292 \n", - " Van Buren County -6.286620 -12.594925 \n", - " Washtenaw County 0.129569 -4.309822 \n", - " Wayne County -13.340073 -10.271616 \n", - " Wexford County -4.491910 -3.982172 \n", - "\n", - " RDOMESTICMIG2013 RDOMESTICMIG2014 \\\n", - "STNAME CTYNAME \n", - "Michigan Alcona County 8.782284 -2.949572 \n", - " Alger County 4.527984 -0.211093 \n", - " Allegan County 0.267512 7.703397 \n", - " Alpena County -3.262083 0.172479 \n", - " Antrim County -0.730288 3.099240 \n", - "... ... ... \n", - " Tuscola County -9.106264 -3.714827 \n", - " Van Buren County -1.952334 -4.650916 \n", - " Washtenaw County -1.780293 -2.955078 \n", - " Wayne County -14.119617 -11.903253 \n", - " Wexford County -3.285484 8.463184 \n", + " POPESTIMATE2010 POPESTIMATE2011 POPESTIMATE2012 \\\n", + "STNAME CTYNAME \n", + "Alabama Autauga County 54660 55253 55175 \n", + " Baldwin County 183193 186659 190396 \n", + " Barbour County 27341 27226 27159 \n", + " Bibb County 22861 22733 22642 \n", + " Blount County 57373 57711 57776 \n", + "... ... ... ... \n", + "Wyoming Sweetwater County 43593 44041 45104 \n", + " Teton County 21297 21482 21697 \n", + " Uinta County 21102 20912 20989 \n", + " Washakie County 8545 8469 8443 \n", + " Weston County 7181 7114 7065 \n", "\n", - " RDOMESTICMIG2015 RNETMIG2011 RNETMIG2012 \\\n", - "STNAME CTYNAME \n", - "Michigan Alcona County 3.077367 0.461574 -2.525489 \n", - " Alger County -2.442262 1.359975 0.629921 \n", - " Allegan County 3.056470 -5.604577 -0.546037 \n", - " Alpena County -2.770323 -3.294781 0.204915 \n", - " Antrim County -0.819018 -3.711762 -0.128436 \n", - "... ... ... ... \n", - " Tuscola County -3.730166 -4.464929 -12.932992 \n", - " Van Buren County -4.059792 -5.957822 -12.237716 \n", - " Washtenaw County -6.078985 5.191395 1.248106 \n", - " Wayne County -8.762835 -11.344758 -8.098421 \n", - " Wexford County -0.758438 -4.400238 -3.859644 \n", + " POPESTIMATE2013 POPESTIMATE2014 POPESTIMATE2015 \\\n", + "STNAME CTYNAME \n", + "Alabama Autauga County 55038 55290 55347 \n", + " Baldwin County 195126 199713 203709 \n", + " Barbour County 26973 26815 26489 \n", + " Bibb County 22512 22549 22583 \n", + " Blount County 57734 57658 57673 \n", + "... ... ... ... \n", + "Wyoming Sweetwater County 45162 44925 44626 \n", + " Teton County 22347 22905 23125 \n", + " Uinta County 21022 20903 20822 \n", + " Washakie County 8443 8316 8328 \n", + " Weston County 7160 7185 7234 \n", "\n", - " RNETMIG2013 RNETMIG2014 RNETMIG2015 \n", - "STNAME CTYNAME \n", - "Michigan Alcona County 9.160017 -2.568982 3.462038 \n", - " Alger County 4.843890 0.105546 -2.123706 \n", - " Allegan County 0.677697 8.269433 3.634485 \n", - " Alpena County -2.987381 0.448446 -2.493291 \n", - " Antrim County -0.515497 3.357510 -0.560381 \n", - "... ... ... ... \n", - " Tuscola County -8.830873 -3.437601 -3.433237 \n", - " Van Buren County -1.540617 -4.106094 -3.487428 \n", - " Washtenaw County 4.226778 3.801394 0.595048 \n", - " Wayne County -11.732437 -9.161648 -6.010195 \n", - " Wexford County -3.162662 8.585396 -0.637088 \n", + " abs \n", + "STNAME CTYNAME \n", + "Alabama Autauga County 687 \n", + " Baldwin County 20516 \n", + " Barbour County 852 \n", + " Bibb County 349 \n", + " Blount County 403 \n", + "... ... \n", + "Wyoming Sweetwater County 1569 \n", + " Teton County 1828 \n", + " Uinta County 280 \n", + " Washakie County 229 \n", + " Weston County 169 \n", "\n", - "[83 rows x 98 columns]" + "[3142 rows x 7 columns]" ] }, - "execution_count": 36, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.loc[['Michigan', 'Washtenaw County']]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "# In class activity time!\n", - "## Question 0: Which county has the largest population in Michigan?\n", - "* (Easy)\n", + "import numpy as np\n", + "popcol=[\"POPESTIMATE2010\",\"POPESTIMATE2011\",\"POPESTIMATE2012\",\"POPESTIMATE2013\",\"POPESTIMATE2014\",\"POPESTIMATE2015\"]\n", + "new_df=df[popcol]\n", + "new_df[\"abs\"]=None\n", "\n", - "## Question 1: Generate a new column for which is the largest absolute change in population within the period 2010-2015?\n", - "* (Hint: population values are stored in columns POPESTIMATE2010 through POPESTIMATE2015, you need to consider all six columns.)" + "for i in range(0,len(new_df)):\n", + " new_df.iloc[i,-1]=np.max(new_df.iloc[i][popcol]) - np.min(new_df.iloc[i][popcol]) \n", + "new_df" ] } ], diff --git a/190930_more_pandas.ipynb b/190930_more_pandas.ipynb new file mode 100644 index 0000000..22b7e49 --- /dev/null +++ b/190930_more_pandas.ipynb @@ -0,0 +1,184 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Real Data Cleaning\n", + "* I need to learn about US presidents! Let's hit wikipedia..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df=pd.read_html(\"https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States\")[1]\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Let's try and build a dataframe that has the president number, start and end date of term, name and birth/death years. I think there are likely questions like this on the citizenship test." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.columns.levels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.columns = df.columns.droplevel(0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df=df.rename(columns={\"Presidency[a]\":\"Presidency\"})\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_start(values):\n", + " splits=values.split(\"–\")\n", + " return splits[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Presidency[a].1\"].apply(get_start)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# When using apply we have two differnet options, we can modify in place, or return a result\n", + "# talk about apply on a series object\n", + "df[\"start\"]=df[\"Presidency[a].1\"].apply(get_start)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# talk about apply on a dataframe object and the importance of the axis\n", + "def get_end(whole_row):\n", + " splits=whole_row[\"Presidency[a].1\"].split(\"–\")\n", + " whole_row[\"end\"]=splits[-1]\n", + " return whole_row\n", + "df.apply(get_end, axis='columns')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* apply is super powerful, super awesome, super useful, live and love apply. braodcast ftw!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# how about that president name?\n", + "# we can use regex in replace! and assign to new column!\n", + "df[\"name\"]=df[\"President.1\"].str.replace(\"\\d+.*\",\"\")\n", + "df[\"name\"].head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# extract will pull out multiple columns! this is amazing!\n", + "# and we can unpack columns into sub frames!\n", + "df[[\"born\",\"died\"]]=df[\"President.1\"].str.extract(\"(?P\\d\\d\\d\\d)–(?P\\d\\d\\d\\d)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# jimmy carter is a zombie!\n", + "df.iloc[69]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/191002_merging.ipynb b/191002_merging.ipynb new file mode 100644 index 0000000..33e7a8a --- /dev/null +++ b/191002_merging.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Merging\n", + "* There are many different ways to teach merging, and merging in pandas and sql are *very very very* similar\n", + "* This stack overflow post goes through a bunch of them: https://stackoverflow.com/questions/38549/what-is-the-difference-between-inner-join-and-outer-join\n", + "* Some people don't like the venn diagram approach, but for me it works well, so let's start there\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "* The crux of the question is, how do you take two dataframes and join them together into one?\n", + "* Remember that the dataframe is made up of two axes, rows and columns\n", + "* Rows and columns are actually identical underneath - they both have indicies (or names) and we can transform them trivially with `T`\n", + "* So the mental model I give you now is actually going to be a bit wrong, but hopefully it will suffice" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "* Here's our scenario, we have a `DataFrame` of students and one of staff\n", + "* Turns out students can be staff! Look at Jaik.\n", + "* So when we join our dataframes together, who are we interested in?\n", + "1. Only students who are also staff?\n", + "2. Students who are not staff? Staff who are not students?\n", + "3. Students, regardless of whether they are staff or not, but if they are staff we want the staff details too?\n", + "4. Ug. What a mess..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR'},\n", + " {'Name': 'Sally', 'Role': 'Course liasion'},\n", + " {'Name': 'James', 'Role': 'Grader'}])\n", + "# And lets index these staff by name\n", + "staff_df = staff_df.set_index('Name')\n", + "\n", + "# Now we'll create a student dataframe\n", + "student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business'},\n", + " {'Name': 'Mike', 'School': 'Law'},\n", + " {'Name': 'Sally', 'School': 'Engineering'}])\n", + "# And we'll index this by name too\n", + "student_df = student_df.set_index('Name')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "staff_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "student_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "* Ok, we have two different dataframes (one has a Role the other a School) but they are indexed the same. That's a good start\n", + "* Let's just try and get a list of everyone and their details. This is called a union, or outer join, and we're actually interested in unioning in both directions, along the rows and the columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "* Notice how we have both more columns and more rows, and how there are some missing values, since Kelly doesn't have a school and Mike doesn't have a role" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "pd.merge(staff_df, student_df, how='inner', left_index=True, right_index=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "* Now notice how we have only taken the place where there is overlap, but we have all of the columns of both DataFrames\n", + "* pandas looks for join membership on the index and not the columns, you always get all the columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "# what weill this produce?\n", + "pd.merge(staff_df, student_df, how='right', left_index=True, right_index=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "* Notice how pandas kept anyone involved in the right dataframe, the students, regardless of whether they were in the left dataframe\n", + "* People who were in the left dataframe had their new information populated, everyone else (Mike) just got NaN's" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "# We can also join on columns instead of indicies, which is cool!\n", + "staff_df = staff_df.reset_index()\n", + "student_df = student_df.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "staff_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "student_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "pd.merge(staff_df, student_df, how='right', on='Name')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "* (this is how I do it 90% of the time)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "* What if we have conflicts between dataframes?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR', \n", + " 'Location': 'State Street'},\n", + " {'Name': 'Sally', 'Role': 'Course liasion', \n", + " 'Location': 'Washington Avenue'},\n", + " {'Name': 'James', 'Role': 'Grader', \n", + " 'Location': 'Washington Avenue'}])\n", + "student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business', \n", + " 'Location': '1024 Billiard Avenue'},\n", + " {'Name': 'Mike', 'School': 'Law', \n", + " 'Location': 'Fraternity House #22'},\n", + " {'Name': 'Sally', 'School': 'Engineering', \n", + " 'Location': '512 Wilson Crescent'}])\n", + "student_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "# quick, what's the meaning of this merge?\n", + "pd.merge(staff_df, student_df, how='left', on='Name')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "# What do we do if we want to match on multiple columns, like first and last name?\n", + "staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins', \n", + " 'Role': 'Director of HR'},\n", + " {'First Name': 'Sally', 'Last Name': 'Brooks', \n", + " 'Role': 'Course liasion'},\n", + " {'First Name': 'James', 'Last Name': 'Wilde', \n", + " 'Role': 'Grader'}])\n", + "student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond', \n", + " 'School': 'Business'},\n", + " {'First Name': 'Mike', 'Last Name': 'Smith', \n", + " 'School': 'Law'},\n", + " {'First Name': 'Sally', 'Last Name': 'Brooks', \n", + " 'School': 'Engineering'}])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "pd.merge(staff_df, student_df, how='inner', on=['First Name','Last Name'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "* One last mention, if we want to just append a bunch of rows between dataframes we just use `pd.concat`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "staff1_df = pd.DataFrame([{'Name': 'James', 'Role': 'Grader', \n", + " 'Location': 'Washington Avenue'}])\n", + "staff2_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR', \n", + " 'Location': 'State Street'},\n", + " {'Name': 'Sally', 'Role': 'Course liasion', \n", + " 'Location': 'Washington Avenue'}])\n", + "\n", + "pd.concat([staff1_df,staff2_df], keys=['staff1','staff2']) #keys is optional if you want to preserve index" + ] + } + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}