#title kNN {{{ #iris 데이터세트 만들기 import numpy as np import pandas as pd from sklearn.datasets import load_iris iris = load_iris() iris.data iris.feature_names iris.target iris.target_names iris_df = pd.DataFrame(iris.data, columns=iris.feature_names) iris_df["target"] = iris.target iris_df["target_names"] = iris.target_names[iris.target] iris_df[:5] #훈련세트, 테스트세트 나누기 from sklearn.model_selection import train_test_split train_set, test_set = train_test_split(iris_df, test_size = 0.3) train_set.shape test_set.shape #데이터 살펴보기 pd.tools.plotting.scatter_matrix(train_set, c=train_set.target, figsize=(15,15), marker="o", hist_kwds={"bins":20},s=60, alpha=0.8, cmap=mglearn.cm3) #s: marker 크기 #cmap: color map #kNN import sklearn.neighbors as nn knn = nn.KNeighborsClassifier(n_neighbors = 1) #훈련 knn.fit(X=train_set.ix[:, [0,1,2,3]], y=train_set.target) #테스트1 knn.score(test_set.ix[:, [0,1,2,3]], test_set.target) #정확도 95.6% #테스트2 pred = knn.predict(X=test_set.ix[:, [0,1,2,3]]) # consusion matrix는 다음을 참고 # https://uberpython.wordpress.com/2012/01/01/precision-recall-sensitivity-and-specificity/ # https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal from pandas_ml import ConfusionMatrix cm = ConfusionMatrix(test_set.target.values, pred) cm.print_stats() }}} 결과 {{{ cm.print_stats() Confusion Matrix: Predicted 0 1 2 __all__ Actual 0 16 0 0 16 1 0 16 0 16 2 0 2 11 13 __all__ 16 18 11 45 Overall Statistics: Accuracy: 0.955555555556 95% CI: (0.84850709975666083, 0.99457151129974908) No Information Rate: ToDo P-Value [Acc > NIR]: 2.8423103302e-15 Kappa: 0.932735426009 Mcnemar's Test P-Value: ToDo Class Statistics: Classes 0 1 2 Population 45 45 45 P: Condition positive 16 16 13 N: Condition negative 29 29 32 Test outcome positive 16 18 11 Test outcome negative 29 27 34 TP: True Positive 16 16 11 TN: True Negative 29 27 32 FP: False Positive 0 2 0 FN: False Negative 0 0 2 TPR: (Sensitivity, hit rate, recall) 1 1 0.846154 TNR=SPC: (Specificity) 1 0.931034 1 PPV: Pos Pred Value (Precision) 1 0.888889 1 NPV: Neg Pred Value 1 1 0.941176 FPR: False-out 0 0.0689655 0 FDR: False Discovery Rate 0 0.111111 0 FNR: Miss Rate 0 0 0.153846 ACC: Accuracy 1 0.955556 0.955556 F1 score 1 0.941176 0.916667 MCC: Matthews correlation coefficient 1 0.909718 0.892401 Informedness 1 0.931034 0.846154 Markedness 1 0.888889 0.941176 Prevalence 0.355556 0.355556 0.288889 LR+: Positive likelihood ratio inf 14.5 inf LR-: Negative likelihood ratio 0 0 0.153846 DOR: Diagnostic odds ratio inf inf inf FOR: False omission rate 0 0 0.0588235 }}} ---- CategoryMachineLearning