-
Notifications
You must be signed in to change notification settings - Fork 3
/
classification.py
117 lines (98 loc) · 3.72 KB
/
classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 11 00:12:03 2017
@author: hp
"""
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 10 23:05:51 2017
@author: hp
"""
## Importing the standard packages for a basic python file
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
import sys
## Reading data sent from the node file
dataset = pd.read_csv(sys.argv[1])
X = dataset.iloc[:, [0, 1]].values
y = dataset.iloc[:, 2].values
## Splitting data to train set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
W = np.mat(sys.argv[2])
W = W.reshape(1,2)
## Feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train.astype(float))
X_test = sc.transform(X_test.astype(float))
W = sc.transform(W.astype(float))
## Importing all classifier libraries
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
## An object that has all 7 classifiers
classifiers = { 'LogisticRegression':LogisticRegression(random_state = 0),
'KNeighbors': KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2),
'SVC(linear)': SVC(kernel = 'linear', random_state = 0),
'SVC(rbf)': SVC(kernel = 'rbf', random_state = 0),
'Gaussian': GaussianNB(),
'Decision Tree':DecisionTreeClassifier(criterion = 'entropy', random_state = 0),
'Random Forest':RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
}
## Length of classifiers object
n_classifiers = len(classifiers)
## Container variables
data = [ ]
answers = [ ]
count = 0
i=0
a = np.array(y_test)
b = a.ravel()
for index, (name, classifier) in enumerate(classifiers.items()):
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
#print index
#answers.append(classifier.predict(W))
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
falseReport=cm[0][1]+cm[1][0]
trueReport=cm[1][1]+cm[0][0]
accuracy = float((trueReport)/(falseReport+trueReport))
obj={
'name':name,
'accuracy':accuracy,
'plot':'./plotPics/'+name+'.png',
'prediction': np.asscalar(classifier.predict(W))
}
data.append(obj)
## Plotting classification results to get some intuition
from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('blue', 'pink')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('cyan', 'red'))(i), label = j)
plt.title('Classifier (Test set) ' )
plt.xlabel('Score1')
plt.ylabel('Score2')
plt.legend()
plt.savefig('./plotPics/'+name+'.png')
#plt.show()
plt.clf()
## End of for loop
## Dumping to a json file
with open('output', 'w') as outfile:
json.dump(data, outfile)
print(data)
sys.stdout.flush()