-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNearestNeighbor.py
67 lines (52 loc) · 2.08 KB
/
NearestNeighbor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
# coding: utf-8
# In[2]:
import numpy as np
#Print header
print("Grigor Sargsyan")
print()
#open csv files
input_train = "iris-training-data.csv"
input_test = "iris-testing-data.csv"
# Parse data from training and testing data
training_attributes = np.loadtxt(input_train, delimiter=',', usecols=(0,1,2,3))
training_labels = np.loadtxt(input_train, dtype='<U15', delimiter=',', usecols=(4))
testing_attributes = np.loadtxt(input_test, delimiter=',', usecols=(0,1,2,3))
testing_labels = np.loadtxt(input_test, dtype='<U15', delimiter=',', usecols=(4))
# Compute predicted labels and accuracy
k = len(training_labels)
min_distance_indx = 0
#function that calculates the distance between two rows
def getDistance(trainingRow, testingRow):
dist = 0.0;
for x in range(4):
dist = dist + np.power(trainingRow[x] - testingRow[x], 2)
return float(np.sqrt(dist))
#function finds the closest neighbor from training table for given row in testing
def getNearestNeighbor(trainingTable, testingRow):
#we start off with training table and testing row
minDist = 100.0
result = 0.0
closestNeighbor = 0
#go through all training rows and compare to testing row
for x in range(k):
result = getDistance(trainingTable[x], testingRow)
if result < minDist:
minDist = result
#this records the row that should be testing's closest neighbor
closestNeighbor = x
return int(closestNeighbor)
predicted_labels = np.array(['' for i in range(k)], dtype='<U15')
#populate predicted labels
for i in range(k):
min_distance_indx = getNearestNeighbor(training_attributes, testing_attributes[i])
predicted_labels[i] = training_labels[min_distance_indx]
#print testing and training labels and count the percentage
percentage_counter = k
print("#, True, Predicted")
for i in range(k):
print("%d,%s,%s" % (i+1,testing_labels[i],predicted_labels[i]))
#count number of mistakes
if testing_labels[i] != predicted_labels[i]:
percentage_counter -= 1
print("Accuracy: %.2f%%" % ((percentage_counter/k)*100))