forked from tejasnaik0509/CKD-Prediction
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Preprocessing.py
32 lines (24 loc) · 992 Bytes
/
Preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import numpy as np
import pandas as pd
# Read dataset file ckd.csv
dataset = pd.read_csv("ckd.csv",header=0, na_values="?")
# Replace null values "?" by numpy.NaN
dataset.replace("?", np.NaN)
# Convert nominal values to binary values
cleanup = {"Rbc": {"normal": 1, "abnormal": 0},
"Pc": {"normal": 1, "abnormal": 0},
"Pcc": {"present": 1, "notpresent": 0},
"Ba": {"present": 1, "notpresent": 0},
"Htn": {"yes": 1, "no": 0},
"Dm": {"yes": 1, "no": 0},
"Cad": {"yes": 1, "no": 0},
"Appet": {"good": 1, "poor": 0},
"pe": {"yes": 1, "no": 0},
"Ane": {"yes": 1, "no": 0}}
# Replace binary values into dataset
dataset.replace(cleanup, inplace=True)
# Fill null values with mean value of the respective column
dataset.fillna(round(dataset.mean(),2), inplace=True)
# print(dataset)
# Save this dataset as final.csv for further prediction
dataset.to_csv("final.csv", sep=',', index=False)