Rescale data

Standardize data

Normalize data

Binarize data

# 1. Rescale Data

`#將資料比例縮放到0與1之間# Rescale data (between 0 and 1)import pandasimport scipyimport numpyfrom sklearn.preprocessing import MinMaxScalerurl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']dataframe = pandas.read_csv(url, names=names)array = dataframe.values# separate array into input and output componentsX = array[:,0:8]Y = array[:,8]scaler = MinMaxScaler(feature_range=(0, 1))rescaledX = scaler.fit_transform(X)# summarize transformed datanumpy.set_printoptions(precision=3)print(rescaledX[0:5,:])`

# 2. Standardize Data

`#將資料常態分布化，平均值會變為0, 標準差變為1，使離群值影響降低#MinMaxScaler與StandardScaler類似from sklearn.preprocessing import StandardScalerimport pandasimport numpyurl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']dataframe = pandas.read_csv(url, names=names)array = dataframe.values# separate array into input and output componentsX = array[:,0:8]Y = array[:,8]scaler = StandardScaler().fit(X)rescaledX = scaler.transform(X)# summarize transformed datanumpy.set_printoptions(precision=3)print(rescaledX[0:5,:])`

# 3. Normalize Data

`#最大值變為1，最小值變為0from sklearn.preprocessing import Normalizerimport pandasimport numpyurl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']dataframe = pandas.read_csv(url, names=names)array = dataframe.values# separate array into input and output componentsX = array[:,0:8]Y = array[:,8]scaler = Normalizer().fit(X)normalizedX = scaler.transform(X)# summarize transformed datanumpy.set_printoptions(precision=3)print(normalizedX[0:5,:])`

# 4. Binarize Data (Make Binary)

`#資料二元化(0或者1)from sklearn.preprocessing import Binarizerimport pandasimport numpyurl = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']dataframe = pandas.read_csv(url, names=names)array = dataframe.values# separate array into input and output componentsX = array[:,0:8]Y = array[:,8]binarizer = Binarizer(threshold=0.0).fit(X)binaryX = binarizer.transform(X)# summarize transformed datanumpy.set_printoptions(precision=3)print(binaryX[0:5,:])`