#iris 一危一誤 襷り鍵
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
iris.data
iris.feature_names
iris.target
iris.target_names
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df["target"] = iris.target
iris_df["target_names"] = iris.target_names[iris.target]
iris_df[:5]
#誤, ろ語誤 蠍
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(iris_df, test_size = 0.3)
train_set.shape
test_set.shape
#一危 危エ覲願鍵
pd.tools.plotting.scatter_matrix(train_set, c=train_set.target,
figsize=(15,15), marker="o",
hist_kwds={"bins":20},s=60, alpha=0.8, cmap=mglearn.cm3)
#s: marker 蠍
#cmap: color map
#kNN
import sklearn.neighbors as nn
knn = nn.KNeighborsClassifier(n_neighbors = 1)
#
knn.fit(X=train_set.ix[:, [0,1,2,3]], y=train_set.target)
#ろ1
knn.score(test_set.ix[:, [0,1,2,3]], test_set.target) # 95.6%
#ろ2
pred = knn.predict(X=test_set.ix[:, [0,1,2,3]])
# consusion matrix れ 谿瑚
# https://uberpython.wordpress.com/2012/01/01/precision-recall-sensitivity-and-specificity/
# https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal
from pandas_ml import ConfusionMatrix
cm = ConfusionMatrix(test_set.target.values, pred)
cm.print_stats()
蟆郁骸
cm.print_stats()
Confusion Matrix:
Predicted 0 1 2 __all__
Actual
0 16 0 0 16
1 0 16 0 16
2 0 2 11 13
__all__ 16 18 11 45
Overall Statistics:
Accuracy: 0.955555555556
95% CI: (0.84850709975666083, 0.99457151129974908)
No Information Rate: ToDo
P-Value [Acc > NIR]: 2.8423103302e-15
Kappa: 0.932735426009
Mcnemar's Test P-Value: ToDo
Class Statistics:
Classes 0 1 2
Population 45 45 45
P: Condition positive 16 16 13
N: Condition negative 29 29 32
Test outcome positive 16 18 11
Test outcome negative 29 27 34
TP: True Positive 16 16 11
TN: True Negative 29 27 32
FP: False Positive 0 2 0
FN: False Negative 0 0 2
TPR: (Sensitivity, hit rate, recall) 1 1 0.846154
TNR=SPC: (Specificity) 1 0.931034 1
PPV: Pos Pred Value (Precision) 1 0.888889 1
NPV: Neg Pred Value 1 1 0.941176
FPR: False-out 0 0.0689655 0
FDR: False Discovery Rate 0 0.111111 0
FNR: Miss Rate 0 0 0.153846
ACC: Accuracy 1 0.955556 0.955556
F1 score 1 0.941176 0.916667
MCC: Matthews correlation coefficient 1 0.909718 0.892401
Informedness 1 0.931034 0.846154
Markedness 1 0.888889 0.941176
Prevalence 0.355556 0.355556 0.288889
LR+: Positive likelihood ratio inf 14.5 inf
LR-: Negative likelihood ratio 0 0 0.153846
DOR: Diagnostic odds ratio inf inf inf
FOR: False omission rate 0 0 0.0588235