图片总共40个人,每人10张图片,每张图片高57,宽47。共400张图片。
读取图片的py文件
import numpy
import pandas
from PIL import Image
from keras import backend as K
from keras.utils import np_utils
"""
加载图像数据的函数,dataset_path即图像olivettifaces的路径
加载olivettifaces后,划分为train_data,valid_data,test_data三个数据集
函数返回train_data,valid_data,test_data以及对应的label
"""
# 400个样本,40个人,每人10张样本图。每张样本图高57*宽47,需要2679个像素点。每个像素点做了归一化处理
def load_data(dataset_path):
img = Image.open(dataset_path)
img_ndarray = numpy.asarray(img, dtype='float64') / 256
print(img_ndarray.shape)
faces = numpy.empty((400,57,47))
for row in range(20):
for column in range(20):
faces[row * 20 + column] = img_ndarray[row * 57:(row + 1) * 57, column * 47:(column + 1) * 47]
# 设置400个样本图的标签
label = numpy.empty(400)
for i in range(40):
label[i * 10:i * 10 + 10] = i
label = label.astype(numpy.int)
label = np_utils.to_categorical(label, 40) # 将40分类类标号转化为one-hot编码
# 分成训练集、验证集、测试集,大小如下
train_data = numpy.empty((320, 57,47)) # 320个训练样本
train_label = numpy.empty((320,40)) # 320个训练样本,每个样本40个输出概率
valid_data = numpy.empty((40, 57,47)) # 40个验证样本
valid_label = numpy.empty((40,40)) # 40个验证样本,每个样本40个输出概率
test_data = numpy.empty((40, 57,47)) # 40个测试样本
test_label = numpy.empty((40,40)) # 40个测试样本,每个样本40个输出概率
for i in range(40):
train_data[i * 8:i * 8 + 8] = faces[i * 10:i * 10 + 8]
train_label[i * 8:i * 8 + 8] = label[i * 10:i * 10 + 8]
valid_data[i] = faces[i * 10 + 8]
valid_label[i] = label[i * 10 + 8]
test_data[i] = faces[i * 10 + 9]
test_label[i] = label[i * 10 + 9]
return [(train_data, train_label), (valid_data, valid_label),(test_data, test_label)]
if __name__ == '__main__':
[(train_data, train_label), (valid_data, valid_label), (test_data, test_label)] = load_data('olivettifaces.gif')
oneimg = train_data[0]*256
print(oneimg)
im = Image.fromarray(oneimg)
im.show()
CNN人脸识别代码
import numpy as np
np.random.seed(1337) # for reproducibility
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D,AveragePooling2D
from PIL import Image
import FaceData
# 全局变量
batch_size = 128 # 批处理样本数量
nb_classes = 40 # 分类数目
epochs = 600 # 迭代次数
img_rows, img_cols = 57, 47 # 输入图片样本的宽高
nb_filters = 32 # 卷积核的个数
pool_size = (2, 2) # 池化层的大小
kernel_size = (5, 5) # 卷积核的大小
input_shape = (img_rows, img_cols,1) # 输入图片的维度
[(X_train, Y_train), (X_valid, Y_valid),(X_test, Y_test)] =FaceData.load_data('olivettifaces.gif')
X_train=X_train[:,:,:,np.newaxis] # 添加一个维度,代表图片通道。这样数据集共4个维度,样本个数、宽度、高度、通道数
X_valid=X_valid[:,:,:,np.newaxis] # 添加一个维度,代表图片通道。这样数据集共4个维度,样本个数、宽度、高度、通道数
X_test=X_test[:,:,:,np.newaxis] # 添加一个维度,代表图片通道。这样数据集共4个维度,样本个数、宽度、高度、通道数
print('样本数据集的维度:', X_train.shape,Y_train.shape)
print('测试数据集的维度:', X_test.shape,Y_test.shape)
# 构建模型
model = Sequential()
model.add(Conv2D(6,kernel_size,input_shape=input_shape,strides=1)) # 卷积层1
model.add(AveragePooling2D(pool_size=pool_size,strides=2)) # 池化层
model.add(Conv2D(12,kernel_size,strides=1)) # 卷积层2
model.add(AveragePooling2D(pool_size=pool_size,strides=2)) # 池化层
model.add(Flatten()) # 拉成一维数据
model.add(Dense(nb_classes)) # 全连接层2
model.add(Activation('sigmoid')) # sigmoid评分
# 编译模型
model.compile(loss='categorical_crossentropy',optimizer='adadelta',metrics=['accuracy'])
# 训练模型
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs,verbose=1, validation_data=(X_test, Y_test))
# 评估模型
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
y_pred = model.predict(X_test)
y_pred = y_pred.argmax(axis=1) # 获取概率最大的分类,获取每行最大值所在的列
for i in range(len(y_pred)):
oneimg = X_test[i,:,:,0]*256
im = Image.fromarray(oneimg)
im.show()
print('第%d个人识别为第%d个人'%(i,y_pred[i]))