#title Python-Linear Regression [[TableOfContents]] ==== OLS vs Ridge vs Lasso ==== Linear Regression의 기본적인 알고리즘은 OLS와 Ridge, Lasso가 같다. 단지 cost function이 다르다. attachment:Python-LinearRegression/lm_compare.png 뭐.. 그러하다. ==== 데이터 ==== {{{ #iris 데이터세트 만들기 import numpy as np import pandas as pd from sklearn.datasets import load_iris iris = load_iris() iris.data iris.feature_names iris.target iris.target_names iris_df = pd.DataFrame(iris.data, columns=iris.feature_names) iris_df["target"] = iris.target iris_df["target_names"] = iris.target_names[iris.target] iris_df[:5] #훈련세트, 테스트세트 나누기 from sklearn.model_selection import train_test_split train_set, test_set = train_test_split(iris_df, test_size = 0.3) train_set.shape test_set.shape }}} ==== OLS(ordinary least squares) ==== {{{ #선형 회귀(최소제곱) from sklearn.linear_model import LinearRegression as lm model_ols = lm().fit(X=train_set.ix[:, [2]], y=train_set.ix[:, [3]]) print(model_ols.coef_) print(model_ols.intercept_) #plot import matplotlib.pyplot as plt plt.scatter(train_set.ix[:, [2]], train_set.ix[:, [3]], color='black') plt.plot(test_set.ix[:, [2]], model_ols.predict(test_set.ix[:, [2]])) }}} 결과 attachment:Phthon-LinearRegression/lm.png ==== Ridge ==== {{{ #Ridge: alpha값을 조정하여 과대/과소적합을 피한다. from sklearn.linear_model import Ridge model_ridge = Ridge(alpha=10).fit(X=train_set.ix[:, [2]], y=train_set.ix[:, [3]]) #점수 print(model_ridge.score(X=train_set.ix[:, [2]], y=train_set.ix[:, [3]])) print(model_ridge.score(X=test_set.ix[:, [2]], y=test_set.ix[:, [3]])) #plot import matplotlib.pyplot as plt plt.scatter(train_set.ix[:, [2]], train_set.ix[:, [3]], color='black') plt.plot(test_set.ix[:, [2]], model_ridge.predict(test_set.ix[:, [2]])) }}} ==== Lasso ==== {{{ #Lasso: alpha값을 조정하여 과대/과소적합을 피한다. from sklearn.linear_model import Lasso model_lasso = Lasso(alpha=0.1, max_iter=1000).fit(X=train_set.ix[:, [0,1,2]], y=train_set.ix[:, [3]]) #점수 print(model_lasso.score(X=train_set.ix[:, [0,1,2]], y=train_set.ix[:, [3]])) print(model_lasso.score(X=test_set.ix[:, [0,1,2]], y=test_set.ix[:, [3]])) #사용한 특성수 print(np.sum(model_lasso.coef_ != 0)) }}} ---- CategoryMachineLearning