-
Notifications
You must be signed in to change notification settings - Fork 2
/
PandasLib.py
79 lines (36 loc) · 1.1 KB
/
PandasLib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 18 17:34:37 2020
@author: shamaun
"""
import pandas as pd
#creating pandas dataframe
d = {'name':['Aman','Ananya',
'Anshuman'],
'roll':[1,2,3],
'marks':[89,87,86]}
df = pd.DataFrame(d)
#Setting index
df.set_index('roll',inplace=True)
#reading external data
#data = pd.read_csv(r"E:\auto-mpg.csv")
#data cleaning using pandas
data = pd.read_csv(r"E:\auto-mpg.csv",header=None)
"""setting column names"""
data.columns = ['mpg',
'cylinders',
'displacement',
'horsepower',
'weight',
'acceleration',
'model_year',
'origin',
'car name'
]
#counting "?" in horsepower column
print(sum(data.horsepower=='?'))
#replacing "?"
data['horsepower'].replace('?',150.0,inplace=True)
desc = data.describe(include='all')
data.horsepower = data['horsepower'].astype(float)
data.to_csv("auto-mpg-clean.csv")