Part1為純粹線性迴歸

Part2為「所有」的線性迴歸

一元線性回歸

`import pandas as pd  import numpy as np  import matplotlib.pyplot as plt  import seaborn as seabornInstance from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegressionfrom sklearn import metrics%matplotlib inline`

`dataset = pd.read_csv('200811-201811.csv')`

`dataset.describe()`

`dataset.plot(x='PM25', y='CO', style='o')  plt.title('PM25 vs CO')  plt.xlabel('PM25')  plt.ylabel('CO')  plt.show()`

`X = dataset['PM25'].values.reshape(-1,1)y = dataset['CO'].values.reshape(-1,1)#reshape(-1,1):換成單一列(垂直)#reshape(1,-1):換成單一行(水平)`

`X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)#80%為Training set，20%為Test set`

`regressor = LinearRegression()  regressor.fit(X_train, y_train) #訓練模型`

`df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})df  #顯示實際值和預測值df1 = df.head(25)`

`df1.plot(kind='bar',figsize=(10,5))plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')plt.show()`

`plt.scatter(X_test, y_test,  color='gray')plt.plot(X_test, y_pred, color='red', linewidth=2)plt.show()`

`print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))`

`regressor.score(X_train, y_train)`

多元線性迴歸

`import pandas as pd  import numpy as np  import matplotlib.pyplot as plt  import seaborn as seabornInstance from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegressionfrom sklearn import metrics%matplotlib inline`

`dataset = pd.read_csv('200811-201811.csv')`

`dataset.describe()`

`dataset.isnull().any()`

`dataset = dataset.fillna(method='ffill')`

`X = dataset[['SO2', 'CO', 'O3', 'Nox', 'NO', 'NO2', 'THC', 'NMHC', 'CH4', 'WindSpeed','TEMP','Humidity']].valuesy = dataset['PM25'].values#X是要拿來預測的資料#Y是想預測的結果`

`plt.figure(figsize=(10,5))plt.tight_layout()seabornInstance.distplot(dataset['PM25'])`

`X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)##80%為Training set，20%為Test set`

`regressor = LinearRegression()  regressor.fit(X_train, y_train) #訓練模型`

`df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})df1 = df.head(25) #顯示實際值和預測值`

`df1.plot(kind='bar',figsize=(10,8))plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')plt.show()`

`print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))`

`regressor.score(X_train, y_train)`

1.線性迴歸

`# Linear Regressionimport pandasfrom sklearn import model_selectionfrom sklearn.linear_model import LinearRegressionurl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.data"names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']dataframe = pandas.read_csv(url, delim_whitespace=True, names=names)array = dataframe.valuesX = array[:,0:13]Y = array[:,13]seed = 7kfold = model_selection.KFold(n_splits=10, random_state=seed)model = LinearRegression()scoring = 'neg_mean_squared_error'results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)print(results.mean())`

2.岭迴歸

`# Ridge Regressionimport pandasfrom sklearn import model_selectionfrom sklearn.linear_model import Ridgeurl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.data"names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']dataframe = pandas.read_csv(url, delim_whitespace=True, names=names)array = dataframe.valuesX = array[:,0:13]Y = array[:,13]seed = 7kfold = model_selection.KFold(n_splits=10, random_state=seed)model = Ridge()scoring = 'neg_mean_squared_error'results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)print(results.mean())`

3. LASSO 迴歸

`# Lasso Regressionimport pandasfrom sklearn import model_selectionfrom sklearn.linear_model import Lassourl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.data"names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']dataframe = pandas.read_csv(url, delim_whitespace=True, names=names)array = dataframe.valuesX = array[:,0:13]Y = array[:,13]seed = 7kfold = model_selection.KFold(n_splits=10, random_state=seed)model = Lasso()scoring = 'neg_mean_squared_error'results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)print(results.mean())`

4.ElasticNet彈性網路迴歸

`# ElasticNet Regressionimport pandasfrom sklearn import model_selectionfrom sklearn.linear_model import ElasticNeturl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.data"names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']dataframe = pandas.read_csv(url, delim_whitespace=True, names=names)array = dataframe.valuesX = array[:,0:13]Y = array[:,13]seed = 7kfold = model_selection.KFold(n_splits=10, random_state=seed)model = ElasticNet()scoring = 'neg_mean_squared_error'results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)print(results.mean())`

1. KNN迴歸

`# KNN Regressionimport pandasfrom sklearn import model_selectionfrom sklearn.neighbors import KNeighborsRegressorurl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.data"names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']dataframe = pandas.read_csv(url, delim_whitespace=True, names=names)array = dataframe.valuesX = array[:,0:13]Y = array[:,13]seed = 7kfold = model_selection.KFold(n_splits=10, random_state=seed)model = KNeighborsRegressor()scoring = 'neg_mean_squared_error'results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)print(results.mean())`

2.決策樹迴歸

`# Decision Tree Regressionimport pandasfrom sklearn import model_selectionfrom sklearn.tree import DecisionTreeRegressorurl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.data"names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']dataframe = pandas.read_csv(url, delim_whitespace=True, names=names)array = dataframe.valuesX = array[:,0:13]Y = array[:,13]seed = 7kfold = model_selection.KFold(n_splits=10, random_state=seed)model = DecisionTreeRegressor()scoring = 'neg_mean_squared_error'results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)print(results.mean())`

3.支持項量機迴歸(SVR)

`# SVM Regressionimport pandasfrom sklearn import model_selectionfrom sklearn.svm import SVRurl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.data"names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']dataframe = pandas.read_csv(url, delim_whitespace=True, names=names)array = dataframe.valuesX = array[:,0:13]Y = array[:,13]seed = 7kfold = model_selection.KFold(n_splits=10, random_state=seed)model = SVR()scoring = 'neg_mean_squared_error'results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)print(results.mean())`

Written by