forked from shunliz/Machine-Learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
knn-code.md
54 lines (45 loc) · 1.7 KB
/
knn-code.md
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
```py
#encoding:utf-8
from numpy import *
import operator
def createDataSet():
group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group,labels
def classify(inX,dataSet,labels,k):
#返回“数组”的行数,如果shape[1]返回的则是数组的列数
dataSetSize = dataSet.shape[0]
#两个“数组”相减,得到新的数组
diffMat = tile(inX,(dataSetSize,1))- dataSet
#求平方
sqDiffMat = diffMat **2
#求和,返回的是一维数组
sqDistances = sqDiffMat.sum(axis=1)
#开方,即测试点到其余各个点的距离
distances = sqDistances **0.5
#排序,返回值是原数组从小到大排序的下标值
sortedDistIndicies = distances.argsort()
#定义一个空的字典
classCount = {}
for i in range(k):
#返回距离最近的k个点所对应的标签值
voteIlabel = labels[sortedDistIndicies[i]]
#存放到字典中
classCount[voteIlabel] = classCount.get(voteIlabel,0)+1
#排序 classCount.iteritems() 输出键值对 key代表排序的关键字 True代表降序
sortedClassCount = sorted(classCount.iteritems(),key = operator.itemgetter(1),reverse = True)
#返回距离最小的点对应的标签
return sortedClassCount[0][0]
```
```
import kNN
from numpy import *
dataSet, labels = kNN.createDataSet()
testX = array([1.2, 1.0])
k = 3
outputLabel = kNN.kNNClassify(testX, dataSet, labels, 3)
print "Your input is:", testX, "and classified to class: ", outputLabel
testX = array([0.1, 0.3])
outputLabel = kNN.kNNClassify(testX, dataSet, labels, 3)
print "Your input is:", testX, "and classified to class: ", outputLabel
```