Python機器學習筆記(零):完整流程

Yanwei Liu
2 min readMar 22, 2019

--

使用scikit-learn進行機器學習的流程

import pandas as pd                       #引入模組train=pd.read_csv('xxx.csv')              #讀取資料train.isnull().sum()                      #找缺失值impute_value=['Age'].median()             #算中位數,作為缺失值填補依據train['Age']=train['Age'].fillna(impute_value)  #填入中位數到缺失值中train['IsFemale']=(train['Sex']=='female').astype(int) #重新編碼predictors=['Pclass','IsFemale','Age']     #選擇標籤
X_train=train[predictors].values
y_train=train['Survived'].values #要預測的標籤
from sklearn.linear-model import LogisticRegression #引入模組
model=LogisticRegression() #建立LogisticRegression
model.fit(X_train,y_train) #fit模型y_predict-model.predict(X_test) #進行預測(y_true==y_predict).mean() #計算預測的正確率from sklearn.model_selection import cross_val_score #引入交叉驗證模組model=LogisticRegression(c=10)
scores-cross_val_score(model,X_train,y_train,cv=4) #進行交叉驗證

--

--

No responses yet