Python深度學習筆記(四):使用Keras來進行電腦視覺

Yanwei Liu
9 min readMay 7, 2019

準備資料

from numpy import asarrayfrom PIL import Image# load imageimage = Image.open('bondi_beach.jpg')pixels = asarray(image)# confirm pixel range is 0-255print('Data Type: %s' % pixels.dtype)print('Min: %.3f, Max: %.3f' % (pixels.min(), pixels.max()))# convert from integers to floatspixels = pixels.astype('float32')# normalize to the range 0-1pixels /= 255.0# confirm the normalizationprint('Min: %.3f, Max: %.3f' % (pixels.min(), pixels.max()))

CNN卷積神經網路

# cnn with single convolutional, pooling and output layerfrom keras.models import Sequentialfrom keras.layers import Conv2Dfrom keras.layers import MaxPooling2Dfrom keras.layers import Flattenfrom keras.layers import Dense# create modelmodel = Sequential()# add convolutional layermodel.add(Conv2D(32, (3,3), input_shape=(256, 256, 1)))model.add(MaxPooling2D())model.add(Flatten())model.add(Dense(1, activation='sigmoid'))model.summary()

影像分類

# example of using a pre-trained model as a classifierfrom keras.preprocessing.image import load_imgfrom keras.preprocessing.image import img_to_arrayfrom keras.applications.vgg16 import preprocess_inputfrom keras.applications.vgg16 import decode_predictionsfrom keras.applications.vgg16 import VGG16# load an image from fileimage = load_img('dog.jpg', target_size=(224, 224))# convert the image pixels to a numpy arrayimage = img_to_array(image)# reshape data for the modelimage = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))# prepare the image for the VGG modelimage = preprocess_input(image)# load the modelmodel = VGG16()# predict the probability across all output classesyhat = model.predict(image)# convert the probabilities to class labelslabel = decode_predictions(yhat)# retrieve the most likely result, e.g. highest probabilitylabel = label[0][0]# print the classificationprint('%s (%.2f%%)' % (label[1], label[2]*100))

訓練影像分類模型

# fit a cnn on the fashion mnist datasetfrom keras.datasets import fashion_mnistfrom keras.utils import to_categoricalfrom keras.models import Sequentialfrom keras.layers import Conv2Dfrom keras.layers import MaxPooling2Dfrom keras.layers import Densefrom keras.layers import Flatten# load dataset(trainX, trainY), (testX, testY) = fashion_mnist.load_data()# reshape dataset to have a single channeltrainX = trainX.reshape((trainX.shape[0], 28, 28, 1))testX = testX.reshape((testX.shape[0], 28, 28, 1))# convert from integers to floatstrainX, testX = trainX.astype('float32'), testX.astype('float32')# normalize to range 0-1trainX,testX  = trainX / 255.0, testX / 255.0# one hot encode target valuestrainY, testY = to_categorical(trainY), to_categorical(testY)# define modelmodel = Sequential()model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))model.add(MaxPooling2D())model.add(Flatten())model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))model.add(Dense(10, activation='softmax'))model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])# fit modelmodel.fit(trainX, trainY, epochs=10, batch_size=32, verbose=2)# evaluate modelloss, acc = model.evaluate(testX, testY, verbose=0)print(loss, acc)

影像增強

# example using image augmentationfrom numpy import expand_dimsfrom keras.preprocessing.image import load_imgfrom keras.preprocessing.image import img_to_arrayfrom keras.preprocessing.image import ImageDataGeneratorfrom matplotlib import pyplot# load the imageimg = load_img('bird.jpg')# convert to numpy arraydata = img_to_array(img)# expand dimension to one samplesamples = expand_dims(data, 0)# create image data augmentation generatordatagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90)# prepare iteratorit = datagen.flow(samples, batch_size=1)# generate samples and plotfor i in range(9):# define subplotpyplot.subplot(330 + 1 + i)# generate batch of imagesbatch = it.next()# convert to unsigned integers for viewingimage = batch[0].astype('uint32')# plot raw pixel datapyplot.imshow(image)# show the figurepyplot.show()

臉部偵測

pip install mtcnn# face detection with mtcnn on a photographfrom matplotlib import pyplotfrom matplotlib.patches import Rectanglefrom mtcnn.mtcnn import MTCNN# load image from filepixels = pyplot.imread('street.jpg')# create the detector, using default weightsdetector = MTCNN()# detect faces in the imagefaces = detector.detect_faces(pixels)# plot the imagepyplot.imshow(pixels)# get the context for drawing boxesax = pyplot.gca()# get coordinates from the first facex, y, width, height = faces[0]['box']# create the shaperect = Rectangle((x, y), width, height, fill=False, color='red')# draw the boxax.add_patch(rect)# show the plotpyplot.show()

--

--