x螳 讌 螳 螳蟾 蟇磯Μ(企Μ 蟇磯Μ;Euclidean distance) k螳 谿場 れ, k螳 企 覿襯 讌襯 螳() 螳 襷 覿襯 蟆朱 x襯 覿襯 蠍磯
R
install.packages("class")
library("class")
tr <- sqldf("select var1, var2 from training")
te <- sqldf("select var1, var2 from test2")
pred <- knn(tr, te, training$is_out, k = 21, prob=TRUE)
table(pred, test2$is_out)
python
from numpy import *
import operator
def createDataSet():
group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0,0.1]])
labels = ['A', 'A', 'B', 'B']
return group, labels
def classfy0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize, 1)) - dataSet
sqDiffMat = diffMat ** 2
sqDistnaces = sqDiffMat.sum(axis = 1)
distnaces = sqDistnaces ** 0.5
sortedDistIndicies = distnaces.argsort()
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse = True)
return sortedClassCount[0][0]
import kNN
group, labels = kNN.createDataSet()
kNN.classfy0([0,0], group, labels, 3)