-
Notifications
You must be signed in to change notification settings - Fork 2
/
knn.py
107 lines (80 loc) · 3.11 KB
/
knn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
Implementation of k-nearest-neighbor classifier
"""
from numpy import *
from pylab import *
from binary import *
class KNN(BinaryClassifier):
"""
This class defines a nearest neighbor classifier, that support
_both_ K-nearest neighbors _and_ epsilon ball neighbors.
"""
def __init__(self, opts):
"""
Initialize the classifier. There's actually basically nothing
to do here since nearest neighbors do not really train.
"""
# remember the options
self.opts = opts
# just call reset
self.reset()
def reset(self):
self.trX = zeros((0,0)) # where we will store the training examples
self.trY = zeros((0)) # where we will store the training labels
def online(self):
"""
We're not online
"""
return False
def __repr__(self):
"""
Return a string representation of the tree
"""
return "w=" + repr(self.weights)
def predict(self, X):
"""
X is a vector that we're supposed to make a prediction about.
Our return value should be the 'vote' in favor of a positive
or negative label. In particular, if, in our neighbor set,
there are 5 positive training examples and 2 negative
examples, we return 5-2=3.
Everything should be in terms of _Euclidean distance_, NOT
squared Euclidean distance or anything more exotic.
"""
isKNN = self.opts['isKNN'] # true for KNN, false for epsilon balls
N = self.trX.shape[0] # number of training examples
if self.trY.size == 0:
return 0 # if we haven't trained yet, return 0
elif isKNN:
# this is a K nearest neighbor model
# hint: look at the 'argsort' function in numpy
K = self.opts['K'] # how many NN to use
val = 0 # this is our return value: #pos - #neg of the K nearest neighbors of X
# get the dist, a single row vector
dist = sqrt(((X - self.trX) ** 2).sum(axis=1))
# argsort sorts and returns indices
dist_sorted = argsort(dist)
# take the first K indices, get their Y value and take the mode
val = util.mode(self.trY[dist_sorted[0:K]])
return val
else:
# this is an epsilon ball model
eps = self.opts['eps'] # how big is our epsilon ball
val = 0 # this is our return value: #pos - #neg within and epsilon ball of X
# get the euc distance of X to everything else.
dist = sqrt(((X - self.trX) ** 2).sum(axis=1))
# get the index of all pts that's within the eps ball
withinEpsY = self.trY[dist <= eps]
val = util.mode(withinEpsY)
return val
def getRepresentation(self):
"""
Return the weights
"""
return (self.trX, self.trY)
def train(self, X, Y):
"""
Just store the data.
"""
self.trX = X
self.trY = Y