mtcnn

时间:2022-03-27 06:01:32

1、widerface样本标签处理

图片名 x1  y1  x2  y2  x11 y11  x22  y22  多人脸框

# -*- coding: utf- -*-
"""
Created on Mon Jan :: @author: admin
""" import re
import linecache
import os # 本程序用于将widerface数据集中label部分分离出来并且重新保存
FILEDIR = "G:\\MTCNNTraining\\faceData\\widerFace\\wider_face_split\\"
file = open(FILEDIR+'wider_face_train_bbx_gt.txt', 'r') def count_lines(file):
lines_quantity =
while True:
buffer = file.read( * )
if not buffer:
break
lines_quantity += buffer.count('\n')
file.close()
return lines_quantity lines = count_lines(file) for i in range(lines):
line = linecache.getline(FILEDIR+'wider_face_train_bbx_gt.txt',i)#读取一行
if re.search('jpg', line):
position = line.index('/')
file_name = line[position + : -] #图片名
folder_name = line[:position] #文件夹名
print(file_name) with open(FILEDIR + "widerTrainx1x2y1y2Two.txt",'a') as f:
f.write(file_name + ".jpg" + " ") i +=
face_count = int(linecache.getline(FILEDIR+'wider_face_train_bbx_gt.txt', i))
for j in range(face_count):
box_line = linecache.getline(FILEDIR + 'wider_face_train_bbx_gt.txt', i+j+) #x1, y1, w, h, x1,y1 为人脸框左上角的坐标
po_x1 = box_line.index(' ')
x1 = box_line[:po_x1] po_y1 = box_line.index(' ', po_x1 + )
y1 = box_line[po_x1:po_y1] po_w = box_line.index(' ', po_y1 + )
w = box_line[po_y1:po_w]
ix2 = int(x1.strip()) + int(w.strip())
x2 = " " +str(ix2)+" " po_h = box_line.index(' ', po_w + )
h = box_line[po_w:po_h]
iy2 = int(y1.strip()) + int(h.strip())
y2 = str(iy2) #coordinates = x1 + y1 + w + h
coordinates = x1 + y1 + x2 + y2
# print(coordinates)
#if not(os.path.exists(FILEDIR + "wider_face_train\\" + folder_name)):
#os.makedirs(FILEDIR + "wider_face_train\\" + folder_name)
#with open(FILEDIR + "wider_face_train\\"+ folder_name + "\\" + file_name + ".txt", 'a') as f:
#f.write(coordinates + "\n")
with open(FILEDIR + "widerTrainx1x2y1y2Two.txt",'a') as f:
#f.write(file_name + ".jpg" + " " + coordinates+"\n")
f.write(coordinates+" ") with open(FILEDIR + "widerTrainx1x2y1y2Two.txt",'a') as f:
f.write("\n")
i += i + j +

2、PNET,制作正样本、负样本、部分样本

IOU:utils.py

import numpy as np

def IoU(box, boxes):
"""Compute IoU between detect box and gt boxes Parameters:
----------
box: numpy array , shape (, ): x1, y1, x2, y2, score
input box
boxes: numpy array, shape (n, ): x1, y1, x2, y2
input ground truth boxes Returns:
-------
ovr: numpy.array, shape (n, )
IoU
"""
box_area = (box[] - box[] + ) * (box[] - box[] + )
area = (boxes[:, ] - boxes[:, ] + ) * (boxes[:, ] - boxes[:, ] + )
xx1 = np.maximum(box[], boxes[:, ])
yy1 = np.maximum(box[], boxes[:, ])
xx2 = np.minimum(box[], boxes[:, ])
yy2 = np.minimum(box[], boxes[:, ]) # compute the width and height of the bounding box
w = np.maximum(, xx2 - xx1 + )
h = np.maximum(, yy2 - yy1 + ) inter = w * h
ovr = inter / (box_area + area - inter)
return ovr def convert_to_square(bbox):
"""Convert bbox to square Parameters:
----------
bbox: numpy array , shape n x
input bbox Returns:
-------
square bbox
"""
square_bbox = bbox.copy() h = bbox[:, ] - bbox[:, ] +
w = bbox[:, ] - bbox[:, ] +
max_side = np.maximum(h,w)
square_bbox[:, ] = bbox[:, ] + w*0.5 - max_side*0.5
square_bbox[:, ] = bbox[:, ] + h*0.5 - max_side*0.5
square_bbox[:, ] = square_bbox[:, ] + max_side -
square_bbox[:, ] = square_bbox[:, ] + max_side -
return square_bbox
import sys
import numpy as np
import cv2
import os
import numpy.random as npr
from utils import IoU stdsize =
anno_file = "G:/MTCNNTraining/widefaceData/widerTrainx1x2y1y2Two.txt"
im_dir = "G:/MTCNNTraining/widefaceData/widerfaceJPG/"
pos_save_dir = str(stdsize) + "/positive"
part_save_dir = str(stdsize) + "/part"
neg_save_dir = str(stdsize) + '/negative'
save_dir = "./" + str(stdsize) def mkr(dr):
if not os.path.exists(dr):
os.mkdir(dr) mkr(save_dir)
mkr(pos_save_dir)
mkr(part_save_dir)
mkr(neg_save_dir) f1 = open(os.path.join(save_dir, 'pos_' + str(stdsize) + '.txt'), 'w')
f2 = open(os.path.join(save_dir, 'neg_' + str(stdsize) + '.txt'), 'w')
f3 = open(os.path.join(save_dir, 'part_' + str(stdsize) + '.txt'), 'w') with open(anno_file, 'r') as f:
annotations = f.readlines()
num = len(annotations)
print ("%d pics in total" % num) p_idx = # positive
n_idx = # negative
d_idx = # dont care
idx =
box_idx = for annotation in annotations:
annotation = annotation.strip().split(' ')
im_path = annotation[]
bbox = list(map(float, annotation[:]))
boxes = np.array(bbox, dtype=np.float32).reshape(-, )
img = cv2.imread(im_dir+im_path)
idx +=
if idx % == :
print (idx, "images done") height, width, channel = img.shape #负样本
neg_num =
while neg_num < :
size = npr.randint(, min(width, height) / )
nx = npr.randint(, width - size)
ny = npr.randint(, height - size)
crop_box = np.array([nx, ny, nx + size, ny + size]) Iou = IoU(crop_box, boxes) cropped_im = img[ny : ny + size, nx : nx + size, :]
resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR) if np.max(Iou) < 0.3:
# Iou with all gts must below 0.3
save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)
f2.write(str(stdsize)+"/negative/%s"%n_idx + ' 0\n')
cv2.imwrite(save_file, resized_im)
n_idx +=
neg_num += #正样本、部分样本
for box in boxes:
# box (x_left, y_top, x_right, y_bottom)
x1, y1, x2, y2 = box
w = x2 - x1 +
h = y2 - y1 + # ignore small faces
# in case the ground truth boxes of small faces are not accurate
if max(w, h) < or x1 < or y1 < :
continue
     if w<5 or h<5:
       continue
# generate positive examples and part faces
for i in range():
size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) # delta here is the offset of box center
delta_x = npr.randint(-w * 0.2, w * 0.2)
delta_y = npr.randint(-h * 0.2, h * 0.2) nx1 = max(x1 + w / + delta_x - size / , )
ny1 = max(y1 + h / + delta_y - size / , )
nx2 = nx1 + size
ny2 = ny1 + size if nx2 > width or ny2 > height:
continue
crop_box = np.array([nx1, ny1, nx2, ny2]) offset_x1 = (x1 - nx1) / float(size)
offset_y1 = (y1 - ny1) / float(size)
offset_x2 = (x2 - nx1) / float(size)
offset_y2 = (y2 - ny1) / float(size) cropped_im = img[int(ny1) : int(ny2), int(nx1) : int(nx2), :]
resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR) box_ = box.reshape(, -)
if IoU(crop_box, box_) >= 0.65:
save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)
f1.write(str(stdsize)+"/positive/%s"%p_idx + ' 1 %f %f %f %f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
p_idx +=
elif IoU(crop_box, box_) >= 0.4:
save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx)
f3.write(str(stdsize)+"/part/%s"%d_idx + ' -1 %f %f %f %f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
d_idx += box_idx +=
print ("%s images done, pos: %s part: %s neg: %s"%(idx, p_idx, d_idx, n_idx)) f1.close()
f2.close()
f3.close()

生成train-label.txt

import sys
import os save_dir = "./12"
if not os.path.exists(save_dir):
os.mkdir(save_dir)
f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'r')
f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'r')
f3 = open(os.path.join(save_dir, 'part_12.txt'), 'r') pos = f1.readlines()
neg = f2.readlines()
part = f3.readlines()
f = open(os.path.join(save_dir, 'label-train.txt'), 'w') print (int(len(pos)))
for i in range(int(len(pos))):
p = pos[i].find(" ") +
print (p)
pos[i] = pos[i][:p-] + ".jpg " + pos[i][p:-] + "\n"
print (pos[i])
f.write(pos[i]) for i in range(int(len(neg))):
p = neg[i].find(" ") +
neg[i] = neg[i][:p-] + ".jpg " + neg[i][p:-] + " -1 -1 -1 -1\n"
f.write(neg[i]) for i in range(int(len(part))):
p = part[i].find(" ") +
part[i] = part[i][:p-] + ".jpg " + part[i][p:-] + "\n"
f.write(part[i]) f1.close()
f2.close()
f3.close()

生成lmdb数据

@echo off

if exist train_lmdb12 rd /q /s train_lmdb12

echo create train_lmdb12...
"G:/MTCNNTraining/caffe-buildx64-cpu/convert_imageset.exe" "" /label-train.txt train_lmdb12 --backend=mtcnn --shuffle=true echo done.
pause

开始训练

@echo off
"G:/MTCNNTraining/caffe-buildx64-cpu/caffe.exe" train --solver=solver-.prototxt --weights=det1.caffemodel
pause
solver-12.prototxt
net : "det1-train.prototxt"
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.001
momentum: 0.9
weight_decay: 0.004
# The learning rate policy
lr_policy: "step"
stepsize:
gamma: 0.8
display:
max_iter:
snapshot:
snapshot_prefix: "./models-12/"
solver_mode: GPU
det1-train.prototxt
name: "PNet"
layer {
name: "data"
type: "MTCNNData"
top: "data"
top: "label"
top: "roi"
transform_param {
mirror: false
mean_value: 127.5
scale: 0.0078125
#crop_size:
}
data_param {
source: "train_lmdb12"
batch_size:
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "PReLU1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size:
stride:
}
} layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "PReLU2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
} layer {
name: "conv3"
type: "Convolution"
bottom: "conv2"
top: "conv3"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "PReLU3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
} layer {
name: "conv4-1"
type: "Convolution"
bottom: "conv3"
top: "conv4-1"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
} layer {
name: "cls_loss"
type: "SoftmaxWithLoss"
bottom: "conv4-1"
bottom: "label"
top: "cls_loss"
propagate_down:
propagate_down:
loss_weight:
loss_param{
ignore_label: -
}
} layer {
name: "cls_Acc"
type: "Accuracy"
bottom: "conv4-1"
bottom: "label"
top: "cls_acc"
include {
phase: TRAIN
}
accuracy_param{
ignore_label: -
}
} layer {
name: "conv4-2"
type: "Convolution"
bottom: "conv3"
top: "conv4-2"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "roi_loss"
type: "MTCNNEuclideanLoss"
bottom: "conv4-2"
bottom: "roi"
bottom: "label"
top: "roi_loss"
loss_weight: 0.5
loss_param{
ignore_label:
}
}

train24.bat

@echo off
"caffe/caffe.exe" train --solver=solver-.prototxt --weights=det2.caffemodel
pause

solver-24.prototxt

net : "det2-train.prototxt"
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.001
momentum: 0.9
weight_decay: 0.004
# The learning rate policy
lr_policy: "step"
stepsize:
gamma: 0.8
display: #
max_iter:
snapshot:
snapshot_prefix: "./models-24/"
solver_mode: GPU

det2-train.prototxt

name: "RNet"
layer {
name: "data"
type: "MTCNNData"
top: "data"
top: "label"
top: "roi"
transform_param {
mirror: false
mean_value: 127.5
scale: 0.0078125
#crop_size:
}
data_param {
source: "train_lmdb24"
batch_size:
backend: LMDB
}
}
################################################
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "prelu1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size:
stride:
}
} layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "prelu2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size:
stride:
}
}
#################################### ##################################
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "prelu3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
}
############################### ############################### layer {
name: "conv4"
type: "InnerProduct"
bottom: "conv3"
top: "conv4"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
inner_product_param {
num_output:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "prelu4"
type: "PReLU"
bottom: "conv4"
top: "conv4"
} layer {
name: "conv5-1"
type: "InnerProduct"
bottom: "conv4"
top: "conv5-1"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
inner_product_param {
num_output:
#kernel_size:
#stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
} layer {
name: "cls_loss"
type: "SoftmaxWithLoss"
bottom: "conv5-1"
bottom: "label"
top: "cls_loss"
propagate_down:
propagate_down:
loss_weight:
loss_param{
ignore_label: -
}
} layer {
name: "cls_Acc"
type: "Accuracy"
bottom: "conv5-1"
bottom: "label"
top: "cls_acc"
include {
phase: TRAIN
}
accuracy_param{
ignore_label: -
}
} layer {
name: "conv5-2"
type: "InnerProduct"
bottom: "conv4"
top: "conv5-2"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
inner_product_param {
num_output:
#kernel_size:
#stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
} layer {
name: "roi_loss"
type: "MTCNNEuclideanLoss"
bottom: "conv5-2"
bottom: "roi"
bottom: "label"
top: "roi_loss"
loss_weight: 0.5
loss_param{
ignore_label:
}
}

train-48.bat

@echo off
"caffe/caffe.exe" train --solver=solver-.prototxt
pause
solver-48.prototxt
net : "det3-train.prototxt"
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.001
momentum: 0.9
weight_decay: 0.004
# The learning rate policy
lr_policy: "step"
stepsize:
gamma: 0.8
display: #
max_iter:
snapshot:
snapshot_prefix: "./models-48/"
solver_mode: GPU

det3-train.prototxt

name: "ONet"
layer {
name: "data"
type: "MTCNNData"
top: "data"
top: "label"
top: "roi"
transform_param {
mirror: false
mean_value: 127.5
scale: 0.0078125
#crop_size:
}
data_param {
source: "train_lmdb48"
batch_size:
backend: LMDB
}
}
##################################
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "prelu1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size:
stride:
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
stride:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
} layer {
name: "prelu2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size:
stride:
}
} layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "prelu3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size:
stride:
}
}
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
convolution_param {
num_output:
kernel_size:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "prelu4"
type: "PReLU"
bottom: "conv4"
top: "conv4"
} layer {
name: "conv5"
type: "InnerProduct"
bottom: "conv4"
top: "conv5"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
inner_product_param {
#kernel_size:
num_output:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
} layer {
name: "drop5"
type: "Dropout"
bottom: "conv5"
top: "conv5"
dropout_param {
dropout_ratio: 0.25
}
}
layer {
name: "prelu5"
type: "PReLU"
bottom: "conv5"
top: "conv5"
} layer {
name: "conv6-1"
type: "InnerProduct"
bottom: "conv5"
top: "conv6-1"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
inner_product_param {
#kernel_size:
num_output:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "cls_loss"
type: "SoftmaxWithLoss"
bottom: "conv6-1"
bottom: "label"
top: "cls_loss"
propagate_down:
propagate_down:
loss_weight:
loss_param{
ignore_label: -
}
} layer {
name: "cls_Acc"
type: "Accuracy"
bottom: "conv6-1"
bottom: "label"
top: "cls_acc"
include {
phase: TRAIN
}
accuracy_param{
ignore_label: -
}
} layer {
name: "conv6-2"
type: "InnerProduct"
bottom: "conv5"
top: "conv6-2"
param {
lr_mult:
decay_mult:
}
param {
lr_mult:
decay_mult:
}
inner_product_param {
#kernel_size:
num_output:
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value:
}
}
}
layer {
name: "roi_loss"
type: "MTCNNEuclideanLoss"
bottom: "conv6-2"
bottom: "roi"
bottom: "label"
top: "roi_loss"
loss_weight: 0.5
loss_param{
ignore_label:
}
accuracy_param{
ignore_label:
}
}