import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import model_selection, preprocessing, linear_model
import sklearn
sklearn.__version__
df=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\s+')
df.columns=['CRIM','ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
X_rm=df[['RM']].values
X=df.iloc[:, 0:13]
Y=df['MEDV'].values
sc=preprocessing.StandardScaler()
sc.fit(X)
X=sc.transform(X)
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.2, random_state=0)
clf = linear_model.SGDRegressor(max_iter=1000)
clf.fit(X_train, Y_train)
clf_lasso= linear_model.Lasso(alpha=0.5)
clf_lasso.fit(X_train, Y_train)
clf_ridge= linear_model.Ridge(alpha=0.5)
clf_ridge.fit(X_train, Y_train)
print("SGDの係数")
print(clf.intercept_)
print(clf.coef_)
print("\nLassoの係数")
print(clf_lasso.intercept_)
print(clf_lasso.coef_)
print("\nRidgeの係数")
print(clf_ridge.intercept_)
print(clf_ridge.coef_)
Y_pred=clf.predict(X_test)
Y_lasso_pred=clf_lasso.predict(X_test)
Y_ridge_pred=clf_ridge.predict(X_test)
RMS = np.mean((Y_pred - Y_test) ** 2)
RMS_lasso=np.mean((Y_lasso_pred - Y_test) ** 2)
RMS_ridge = np.mean((Y_ridge_pred - Y_test) ** 2)
print("SGD RMSE", RMS)
print("Lasso RMSE", RMS_lasso)
print("Ridge RMSE", RMS_ridge)
SGDの係数
[22.50529262]
[-0.96926518 0.94949489 -0.14527762 0.63794958 -1.6346609 2.68994304
-0.14346678 -2.84697474 1.62848272 -1.29629315 -2.19507166 0.72119816
-3.44826026]
Lassoの係数
22.551646497220098
[-0.2022351 0. -0. 0.34586306 -0. 2.85095977
-0. -0. -0. -0.24202672 -1.99201487 0.41276179
-3.49325645]
Ridgeの係数
22.480943943558106
[-1.02136951 1.03522818 0.02450249 0.59611373 -1.84847657 2.6082378
-0.0912668 -2.89943172 2.08520461 -1.815872 -2.25741944 0.73956343
-3.5082977 ]
SGD RMSE 33.38872967589145
Lasso RMSE 39.80131790956071
Ridge RMSE 33.49305020158983