From d63bdc372b74553293d01cecb1cf98e4c8855820 Mon Sep 17 00:00:00 2001 From: Christopher Brooks Date: Wed, 9 Oct 2019 12:16:30 +0000 Subject: [PATCH] Practice midterm, half length, from last year --- 330_midterm_w19/330_Midterm-student.ipynb | 182 ---------------------- 1 file changed, 182 deletions(-) diff --git a/330_midterm_w19/330_Midterm-student.ipynb b/330_midterm_w19/330_Midterm-student.ipynb index 0a1c01e..1b54d28 100644 --- a/330_midterm_w19/330_Midterm-student.ipynb +++ b/330_midterm_w19/330_Midterm-student.ipynb @@ -227,188 +227,6 @@ "source": [ "# Your code goes here. There is no written answer for this question." ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Question 6 (Pivoting, 25 points max)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate the following DataFrame from the movies data file:\n", - "![](assets/pivottable.png)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code goes here. There is no written answer for this question " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Question 7 (Natural Language Processing, 30 points max)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### List the adjectives that occur at least twice from the movie overviews for movies whose popularity values are greater than 100" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code goes here. There is no written answer for this question." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Question 8 (Efficiency, 30 points max)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8a. Given the code and line profiler output below, identify where the majority of time is being spent. \n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Your answer goes here" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "credits = pd.read_csv('data/tmdb_5000_credits_plus.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "# extract_genders will return a set of the genders found in whatever Series name is passed to it.\n", - "def extract_genders(x):\n", - " genders = set() # Create an empty set to hold out results\n", - " j = json.loads(x) # This loads the JSON string into a python data structure that consists of a list of dictionaries\n", - " for character in j:\n", - " str_gender = str(character['gender'])\n", - " genders.add(str_gender)\n", - " return \" \".join(genders)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "208 ms ± 8.88 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit credits['cast'].apply(extract_genders)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext line_profiler" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Timer unit: 1e-06 s\n", - "\n", - "Total time: 0.347512 s\n", - "File: \n", - "Function: extract_genders at line 4\n", - "\n", - "Line # Hits Time Per Hit % Time Line Contents\n", - "==============================================================\n", - " 4 def extract_genders(x):\n", - " 5 4803 2442.0 0.5 0.7 genders = set() # Create an empty set to hold out results\n", - " 6 4803 198111.0 41.2 57.0 j = json.loads(x) # This loads the JSON string into a python data structure that consists of a list of dictionaries\n", - " 7 111060 35616.0 0.3 10.2 for character in j:\n", - " 8 106257 63960.0 0.6 18.4 str_gender = str(character['gender'])\n", - " 9 106257 43705.0 0.4 12.6 genders.add(str_gender)\n", - " 10 4803 3678.0 0.8 1.1 return \" \".join(genders)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%lprun -f extract_genders credits['cast'].apply(extract_genders)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8b. Think carefully about what you might do to improve the performance and see if you can achieve an improvement. Suggest some changes that you think might improve the performance." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Insert your answer here." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# END OF MID-TERM" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {