#iris 一危一誤 襷り鍵 import numpy as np import pandas as pd from sklearn.datasets import load_iris iris = load_iris() iris.data iris.feature_names iris.target iris.target_names iris_df = pd.DataFrame(iris.data, columns=iris.feature_names) iris_df["target"] = iris.target iris_df["target_names"] = iris.target_names[iris.target] #binary classification企襦 setosa企 1 覃 0朱 覿襯. from pandasql import sqldf pysqldf = lambda q: sqldf(q, globals()) iris_df["is_setosa"] = pysqldf(""" select *, case when target_names = 'setosa' then 1 else 0 end is_setosa from iris_df """)["is_setosa"] iris_df[:5] #誤, ろ語誤 蠍 from sklearn.model_selection import train_test_split train_set, test_set = train_test_split(iris_df, test_size = 0.5) train_set.shape test_set.shape #scatter plot import seaborn as sns sns.pairplot(x_vars=["sepal length (cm)"], y_vars=["petal length (cm)"], data=train_set, hue="target_names", size=5) #Logistic Classification from sklearn.linear_model import LogisticRegression model = LogisticRegression(C=10) #C螳 譟一 over fitting 襷. C螳 企 殊襦 over fitting 蟆 # model.fit(X=train_set[["sepal length (cm)", "petal length (cm)"]], y=train_set[["is_setosa"]]) #ろ pred = model.predict(X=test_set[["sepal length (cm)", "petal length (cm)"]]) # consusion matrix れ 谿瑚 # https://uberpython.wordpress.com/2012/01/01/precision-recall-sensitivity-and-specificity/ # https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal from pandas_ml import ConfusionMatrix cm = ConfusionMatrix(test_set.is_setosa.values, pred) cm.print_stats() # print(model.score(X=train_set[["sepal length (cm)", "petal length (cm)"]], y=train_set[["is_setosa"]])) print(model.score(X=test_set[["sepal length (cm)", "petal length (cm)"]], y=test_set[["is_setosa"]])) #plot from matplotlib import pyplot as plt fig = plt.figure() plt.scatter(iris_df[iris_df.is_setosa == 0]["sepal length (cm)"], iris_df[iris_df.is_setosa == 0]["petal length (cm)"], marker='+') plt.scatter(iris_df[iris_df.is_setosa == 1]["sepal length (cm)"], iris_df[iris_df.is_setosa == 1]["petal length (cm)"], c= 'green', marker='o') coef = model.coef_ intercept = model.intercept_ ex1 = np.linspace(4, 8.5) ex2 = -(coef[:, 0] * ex1 + intercept) / coef[:,1] plt.plot(ex1, ex2, color='r', label='decision boundary'); plt.legend();