Skip to content

Commit

Permalink
Reformat notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
gahjelle committed Nov 20, 2023
1 parent b0886f2 commit 65af430
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 18 deletions.
2 changes: 1 addition & 1 deletion python-for-data-analysis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ The `james_bond_data.csv` file contains the original uncleansed data and is the

A cleansed version of the original data is available in the `james_bond_data_cleansed.csv` file.

The complete code is available in the `Solution-New.ipynb` Jupyter notebook.
The complete code is available in the `james_bond_analysis.ipynb` Jupyter notebook.

## Setup

Expand Down
46 changes: 29 additions & 17 deletions python-for-data-analysis/james_bond_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -406,22 +406,32 @@
"metadata": {},
"outputs": [],
"source": [
"data = james_bond_data.combine_first(\n",
" pd.DataFrame({\"Avg_User_IMDB\": {10: 7.1}, \"Avg_User_Rtn_Tom\": {10: 6.8}})\n",
").assign(\n",
" US_Gross=lambda data: (\n",
" data[\"US_Gross\"].replace(\"[$,]\", \"\", regex=True).astype(float)\n",
" ),\n",
" World_Gross=lambda data: (\n",
" data[\"World_Gross\"].replace(\"[$,]\", \"\", regex=True).astype(float)\n",
" ),\n",
" Budget=lambda data: (\n",
" data[\"Budget ($ 000s)\"].replace(\"[$,]\", \"\", regex=True).astype(float)\n",
" ),\n",
" Film_Length=lambda data: (data[\"Film_Length\"].str.rstrip(\"mins\").astype(int)),\n",
" Release=lambda data: pd.to_datetime(data[\"Release\"], format=\"%B, %Y\"),\n",
" Release_Year=lambda data: data[\"Release\"].dt.year\n",
").drop_duplicates(ignore_index=True)\n",
"data = (\n",
" james_bond_data.combine_first(\n",
" pd.DataFrame(\n",
" {\"Avg_User_IMDB\": {10: 7.1}, \"Avg_User_Rtn_Tom\": {10: 6.8}}\n",
" )\n",
" )\n",
" .assign(\n",
" US_Gross=lambda data: (\n",
" data[\"US_Gross\"].replace(\"[$,]\", \"\", regex=True).astype(float)\n",
" ),\n",
" World_Gross=lambda data: (\n",
" data[\"World_Gross\"].replace(\"[$,]\", \"\", regex=True).astype(float)\n",
" ),\n",
" Budget=lambda data: (\n",
" data[\"Budget ($ 000s)\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .astype(float)\n",
" ),\n",
" Film_Length=lambda data: (\n",
" data[\"Film_Length\"].str.rstrip(\"mins\").astype(int)\n",
" ),\n",
" Release=lambda data: pd.to_datetime(data[\"Release\"], format=\"%B, %Y\"),\n",
" Release_Year=lambda data: data[\"Release\"].dt.year,\n",
" )\n",
" .drop_duplicates(ignore_index=True)\n",
")\n",
"\n",
"duplicate_movies = [\"The Man with the Golden Gun\", \"The Living Daylights\"]\n",
"data[data[\"Movie\"].isin(duplicate_movies)]"
Expand Down Expand Up @@ -721,7 +731,9 @@
"source": [
"length = data[\"Film_Length\"].value_counts(bins=7).sort_index()\n",
"length.plot.bar(\n",
" title=\"Film Length Distribution\", xlabel=\"Time Range (mins)\", ylabel=\"Count\"\n",
" title=\"Film Length Distribution\",\n",
" xlabel=\"Time Range (mins)\",\n",
" ylabel=\"Count\",\n",
")"
]
},
Expand Down

0 comments on commit 65af430

Please sign in to comment.