diff --git a/pandas-5-exercise-data-types-and-missing-values.ipynb b/pandas-5-exercise-data-types-and-missing-values.ipynb new file mode 100644 index 0000000..a1d68e8 --- /dev/null +++ b/pandas-5-exercise-data-types-and-missing-values.ipynb @@ -0,0 +1 @@ +{"cells":[{"source":"\"Kaggle\"","metadata":{},"cell_type":"markdown","outputs":[],"execution_count":0},{"cell_type":"markdown","id":"b928873f","metadata":{"papermill":{"duration":0.017824,"end_time":"2022-04-06T07:55:31.849531","exception":false,"start_time":"2022-04-06T07:55:31.831707","status":"completed"},"tags":[]},"source":["**This notebook is an exercise in the [Pandas](https://www.kaggle.com/learn/pandas) course. You can reference the tutorial at [this link](https://www.kaggle.com/residentmario/data-types-and-missing-values).**\n","\n","---\n"]},{"cell_type":"markdown","id":"81565163","metadata":{"papermill":{"duration":0.018679,"end_time":"2022-04-06T07:55:31.887309","exception":false,"start_time":"2022-04-06T07:55:31.86863","status":"completed"},"tags":[]},"source":["# Introduction\n","\n","Run the following cell to load your data and some utility functions."]},{"cell_type":"code","execution_count":1,"id":"46da1b25","metadata":{"execution":{"iopub.execute_input":"2022-04-06T07:55:31.930404Z","iopub.status.busy":"2022-04-06T07:55:31.929225Z","iopub.status.idle":"2022-04-06T07:55:34.562502Z","shell.execute_reply":"2022-04-06T07:55:34.561819Z","shell.execute_reply.started":"2022-04-06T07:52:35.711032Z"},"papermill":{"duration":2.656144,"end_time":"2022-04-06T07:55:34.562652","exception":false,"start_time":"2022-04-06T07:55:31.906508","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Setup complete.\n"]}],"source":["import pandas as pd\n","\n","reviews = pd.read_csv(\"../input/wine-reviews/winemag-data-130k-v2.csv\", index_col=0)\n","\n","from learntools.core import binder; binder.bind(globals())\n","from learntools.pandas.data_types_and_missing_data import *\n","print(\"Setup complete.\")"]},{"cell_type":"markdown","id":"cf143c12","metadata":{"papermill":{"duration":0.018648,"end_time":"2022-04-06T07:55:34.601","exception":false,"start_time":"2022-04-06T07:55:34.582352","status":"completed"},"tags":[]},"source":["# Exercises"]},{"cell_type":"markdown","id":"310e2344","metadata":{"papermill":{"duration":0.018354,"end_time":"2022-04-06T07:55:34.638301","exception":false,"start_time":"2022-04-06T07:55:34.619947","status":"completed"},"tags":[]},"source":["## 1. \n","What is the data type of the `points` column in the dataset?"]},{"cell_type":"code","execution_count":2,"id":"28edc9f0","metadata":{"execution":{"iopub.execute_input":"2022-04-06T07:55:34.683233Z","iopub.status.busy":"2022-04-06T07:55:34.682723Z","iopub.status.idle":"2022-04-06T07:55:34.69311Z","shell.execute_reply":"2022-04-06T07:55:34.692583Z","shell.execute_reply.started":"2022-04-06T07:52:36.991609Z"},"papermill":{"duration":0.036111,"end_time":"2022-04-06T07:55:34.693223","exception":false,"start_time":"2022-04-06T07:55:34.657112","status":"completed"},"tags":[]},"outputs":[{"data":{"application/javascript":["parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 1, \"questionId\": \"1_PointsDtype\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/markdown":["Correct"],"text/plain":["Correct"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":["dtype('int64')"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["# Your code here\n","dtype = reviews.points.dtype\n","\n","# Check your answer\n","q1.check()\n","dtype"]},{"cell_type":"code","execution_count":3,"id":"2d5eb66a","metadata":{"execution":{"iopub.execute_input":"2022-04-06T07:55:34.727191Z","iopub.status.busy":"2022-04-06T07:55:34.726185Z","iopub.status.idle":"2022-04-06T07:55:34.728698Z","shell.execute_reply":"2022-04-06T07:55:34.729191Z","shell.execute_reply.started":"2022-04-06T07:52:37.002503Z"},"papermill":{"duration":0.021435,"end_time":"2022-04-06T07:55:34.72934","exception":false,"start_time":"2022-04-06T07:55:34.707905","status":"completed"},"tags":[]},"outputs":[],"source":["#q1.hint()\n","#q1.solution()"]},{"cell_type":"markdown","id":"031d4da3","metadata":{"papermill":{"duration":0.01395,"end_time":"2022-04-06T07:55:34.757545","exception":false,"start_time":"2022-04-06T07:55:34.743595","status":"completed"},"tags":[]},"source":["## 2. \n","Create a Series from entries in the `points` column, but convert the entries to strings. Hint: strings are `str` in native Python."]},{"cell_type":"code","execution_count":4,"id":"a20aa346","metadata":{"execution":{"iopub.execute_input":"2022-04-06T07:55:34.790955Z","iopub.status.busy":"2022-04-06T07:55:34.788708Z","iopub.status.idle":"2022-04-06T07:55:34.924523Z","shell.execute_reply":"2022-04-06T07:55:34.924153Z","shell.execute_reply.started":"2022-04-06T07:52:37.013756Z"},"papermill":{"duration":0.152885,"end_time":"2022-04-06T07:55:34.924626","exception":false,"start_time":"2022-04-06T07:55:34.771741","status":"completed"},"tags":[]},"outputs":[{"data":{"application/javascript":["parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 1, \"questionId\": \"2_StrPoints\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/markdown":["Correct"],"text/plain":["Correct"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":["0 87\n","1 87\n","2 87\n","3 87\n","4 87\n"," ..\n","129966 90\n","129967 90\n","129968 90\n","129969 90\n","129970 90\n","Name: points, Length: 129971, dtype: object"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["point_strings = reviews.points.astype('str')\n","\n","# Check your answer\n","q2.check()\n","point_strings"]},{"cell_type":"code","execution_count":5,"id":"bbd77469","metadata":{"execution":{"iopub.execute_input":"2022-04-06T07:55:34.959829Z","iopub.status.busy":"2022-04-06T07:55:34.959308Z","iopub.status.idle":"2022-04-06T07:55:34.962447Z","shell.execute_reply":"2022-04-06T07:55:34.962046Z","shell.execute_reply.started":"2022-04-06T07:52:37.164624Z"},"papermill":{"duration":0.022195,"end_time":"2022-04-06T07:55:34.962554","exception":false,"start_time":"2022-04-06T07:55:34.940359","status":"completed"},"tags":[]},"outputs":[],"source":["#q2.hint()\n","#q2.solution()"]},{"cell_type":"markdown","id":"8a223ef5","metadata":{"papermill":{"duration":0.014908,"end_time":"2022-04-06T07:55:34.992782","exception":false,"start_time":"2022-04-06T07:55:34.977874","status":"completed"},"tags":[]},"source":["## 3.\n","Sometimes the price column is null. How many reviews in the dataset are missing a price?"]},{"cell_type":"code","execution_count":6,"id":"c3453646","metadata":{"execution":{"iopub.execute_input":"2022-04-06T07:55:35.027855Z","iopub.status.busy":"2022-04-06T07:55:35.027387Z","iopub.status.idle":"2022-04-06T07:55:35.034931Z","shell.execute_reply":"2022-04-06T07:55:35.034475Z","shell.execute_reply.started":"2022-04-06T07:52:37.169848Z"},"papermill":{"duration":0.027048,"end_time":"2022-04-06T07:55:35.035058","exception":false,"start_time":"2022-04-06T07:55:35.00801","status":"completed"},"tags":[]},"outputs":[{"data":{"application/javascript":["parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 1, \"questionId\": \"3_CountMissingPrices\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/markdown":["Correct"],"text/plain":["Correct"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":["8996"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["n_missing_prices = reviews.price.isnull().sum()\n","\n","# Check your answer\n","q3.check()\n","n_missing_prices"]},{"cell_type":"code","execution_count":7,"id":"4eaec47a","metadata":{"execution":{"iopub.execute_input":"2022-04-06T07:55:35.072853Z","iopub.status.busy":"2022-04-06T07:55:35.072106Z","iopub.status.idle":"2022-04-06T07:55:35.074443Z","shell.execute_reply":"2022-04-06T07:55:35.07394Z","shell.execute_reply.started":"2022-04-06T07:52:37.190558Z"},"papermill":{"duration":0.022842,"end_time":"2022-04-06T07:55:35.074559","exception":false,"start_time":"2022-04-06T07:55:35.051717","status":"completed"},"tags":[]},"outputs":[],"source":["#q3.hint()\n","#q3.solution()"]},{"cell_type":"markdown","id":"9ee45b96","metadata":{"papermill":{"duration":0.016882,"end_time":"2022-04-06T07:55:35.108943","exception":false,"start_time":"2022-04-06T07:55:35.092061","status":"completed"},"tags":[]},"source":["## 4.\n","What are the most common wine-producing regions? Create a Series counting the number of times each value occurs in the `region_1` field. This field is often missing data, so replace missing values with `Unknown`. Sort in descending order. Your output should look something like this:\n","\n","```\n","Unknown 21247\n","Napa Valley 4480\n"," ... \n","Bardolino Superiore 1\n","Primitivo del Tarantino 1\n","Name: region_1, Length: 1230, dtype: int64\n","```"]},{"cell_type":"code","execution_count":8,"id":"021ac91d","metadata":{"execution":{"iopub.execute_input":"2022-04-06T07:55:35.146928Z","iopub.status.busy":"2022-04-06T07:55:35.146412Z","iopub.status.idle":"2022-04-06T07:55:35.225484Z","shell.execute_reply":"2022-04-06T07:55:35.226031Z","shell.execute_reply.started":"2022-04-06T07:52:37.195928Z"},"papermill":{"duration":0.10014,"end_time":"2022-04-06T07:55:35.226196","exception":false,"start_time":"2022-04-06T07:55:35.126056","status":"completed"},"tags":[]},"outputs":[{"data":{"application/javascript":["parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"4_ReviewsPerRegion\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/markdown":["Correct"],"text/plain":["Correct"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":["Unknown 21247\n","Napa Valley 4480\n","Columbia Valley (WA) 4124\n","Russian River Valley 3091\n","California 2629\n"," ... \n","Offida Rosso 1\n","Corton Perrières 1\n","Isle St. George 1\n","Geelong 1\n","Paestum 1\n","Name: region_1, Length: 1230, dtype: int64"]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["reviews_per_region = reviews.region_1.fillna('Unknown').value_counts().sort_values(ascending=False)\n","\n","# Check your answer\n","q4.check()\n","reviews_per_region"]},{"cell_type":"code","execution_count":9,"id":"cf11159c","metadata":{"execution":{"iopub.execute_input":"2022-04-06T07:55:35.283244Z","iopub.status.busy":"2022-04-06T07:55:35.282645Z","iopub.status.idle":"2022-04-06T07:55:35.285413Z","shell.execute_reply":"2022-04-06T07:55:35.286006Z","shell.execute_reply.started":"2022-04-06T07:52:37.297963Z"},"papermill":{"duration":0.033009,"end_time":"2022-04-06T07:55:35.286142","exception":false,"start_time":"2022-04-06T07:55:35.253133","status":"completed"},"tags":[]},"outputs":[],"source":["#q4.hint()\n","#q4.solution()"]},{"cell_type":"markdown","id":"e3ae560b","metadata":{"papermill":{"duration":0.026323,"end_time":"2022-04-06T07:55:35.339207","exception":false,"start_time":"2022-04-06T07:55:35.312884","status":"completed"},"tags":[]},"source":["# Keep going\n","\n","Move on to **[renaming and combining](https://www.kaggle.com/residentmario/renaming-and-combining)**."]},{"cell_type":"markdown","id":"06de2c40","metadata":{"papermill":{"duration":0.026279,"end_time":"2022-04-06T07:55:35.392159","exception":false,"start_time":"2022-04-06T07:55:35.36588","status":"completed"},"tags":[]},"source":["---\n","\n","\n","\n","\n","*Have questions or comments? Visit the [course discussion forum](https://www.kaggle.com/learn/pandas/discussion) to chat with other learners.*"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.12"},"papermill":{"default_parameters":{},"duration":13.123286,"end_time":"2022-04-06T07:55:36.028654","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2022-04-06T07:55:22.905368","version":"2.3.3"}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file