近段时间在搞opencv的视频人脸识别,无奈自带的分类器的准确度,实在是不怎么样,但又能怎样呢?自己又研究不清楚各大类检测算法。
正所谓,功能是由函数完成的,于是自己便看cvHaarDetectObjects 这个识别主函数的源代码,尝试了解并进行改造它,以提高精确度。
可惜实力有限啊,里面的结构非常复杂,参杂着更多的函数体,有一些是网上找不到用法的,导致最终无法整体了解,只搞了一般,这里分享
下我自己总结的注释。
CvSeq* cvHaarDetectObjects( const CvArr* _img,//传入图像
CvHaarClassifierCascade* cascade, //传入xml路径
CvMemStorage* storage,//传入内存容器
double scaleFactor,//传入缩放值
int minNeighbors,
int flags,
CvSize minSize,
CvSize maxSize ){ std::vector<int> fakeLevels;//int 类型的容器
std::vector<double> fakeWeights;//double
return cvHaarDetectObjectsForROC( _img, cascade, storage, fakeLevels, fakeWeights,
scaleFactor, minNeighbors, flags, minSize, maxSize, false );//进入这个参数
//执行目标检测,这个函数
} CvSeq* cvHaarDetectObjectsForROC(const CvArr* _img,
CvHaarClassifierCascade* cascade,
CvMemStorage* storage,
std::vector<int>& rejectLevels,
std::vector<double>& levelWeights,
double scaleFactor,
int minNeighbors,
int flags,
CvSize minSize,
CvSize maxSize,
bool outputRejectLevels ){ const double GROUP_EPS = 0.2;//定义一个double常数据
CvMat stub, *img = (CvMat*)_img;//定义一个矩阵stub和把传入的图片转化为矩阵
cv::Ptr<CvMat> temp, sum, tilted, sqsum, normImg, sumcanny, imgSmall;//定义矩阵类
CvSeq* result_seq = ;//定义最终返回的指针数据变量
cv::Ptr<CvMemStorage> temp_storage;//内存类的定义 std::vector<cv::Rect> allCandidates;//矩形类
std::vector<cv::Rect> rectList;//矩形类
std::vector<int> rweights;//int 容器
double factor;
int coi;
bool doCannyPruning = (flags & CV_HAAR_DO_CANNY_PRUNING) != ;//这三个都是判断传入的flags是什么类型,这个是做canny边缘处理
bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != ;
bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != ;
//CV_HAAR_DO_CANNY_PRUNING利用Canny边缘检测器来排除一些边缘很少或者很多的图像区域
//CV_HAAR_SCALE_IMAGE 按比例正常检测
//CV_HAAR_FIND_BIGGEST_OBJECT只检测最大的物体
//CV_HAAR_DO_ROUGH_SEARCH只做初略检测 cv::Mutex mtx;//定义互斥锁,确保线程唯一 if( !CV_IS_HAAR_CLASSIFIER(cascade) )
CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );//无效的级联分类器,输出 if( !storage )
CV_Error( CV_StsNullPtr, "Null storage pointer" );//内存为空 img = cvGetMat( img, &stub, &coi );//IplImage 到cvMat 的转换
if( coi )
CV_Error( CV_BadCOI, "COI is not supported" ); if( CV_MAT_DEPTH(img->type) != CV_8U )//对图像的深度判断
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" ); if( scaleFactor <= )//对缩放值的判断
CV_Error( CV_StsOutOfRange, "scale factor must be > 1" ); if( findBiggestObject )
flags &= ~CV_HAAR_SCALE_IMAGE; if( maxSize.height == || maxSize.width == )//判断,如果传进来的检测窗口的尺寸,如果有一个为0,下面赋值为矩阵的行数和列数
{
maxSize.height = img->rows;
maxSize.width = img->cols;
} temp = cvCreateMat( img->rows, img->cols, CV_8UC1 );//中间值矩阵模板初始化
sum = cvCreateMat( img->rows + , img->cols + , CV_32SC1 );//积分图求和的结果矩阵模板
sqsum = cvCreateMat( img->rows + , img->cols + , CV_64FC1 );////积分图求和的平方的结果 if( !cascade->hid_cascade )
icvCreateHidHaarClassifierCascade(cascade);//创建分类器,填写 casecade 中相关的头信息,如有多少个 stage, 每个 stage 下有多少个 tree ,每个 tree 下有多少个 node ,以及相关的阈值等信息 if( cascade->hid_cascade->has_tilted_features )
tilted = cvCreateMat( img->rows + , img->cols + , CV_32SC1 );//创建用于存放积分图求和并倾斜45度的检测结果矩阵 result_seq = cvCreateSeq( , sizeof(CvSeq), sizeof(CvAvgComp), storage );//初始化最总返回结果变量 if( CV_MAT_CN(img->type) > )//如果由传入的图片转化为的矩阵的数据类型是比32位浮点高为真,进入if语句
{
cvCvtColor( img, temp, CV_BGR2GRAY );//灰度转化,此时temp指针式灰度数据的
img = temp;//把值给会img,temp只起到一个中间保存的作用
} if( findBiggestObject )//是否只检测最大的物体,是,则进入if语句
flags &= ~(CV_HAAR_SCALE_IMAGE|CV_HAAR_DO_CANNY_PRUNING); if( flags & CV_HAAR_SCALE_IMAGE )//按比例正常检测,&是位运算 1|1=1,
{
CvSize winSize0 = cascade->orig_window_size;//获取检测窗口的大小,由分类器返回 //下面是定义块,如果有定义HAVE_IPP,那么进入下面的数据赋值
//但是在CvHaarClassifierCascade结构体里面的CvHidHaarClassifierCascade是空的
#ifdef HAVE_IPP
int use_ipp = cascade->hid_cascade->ipp_stages != ;
if( use_ipp )
normImg = cvCreateMat( img->rows, img->cols, CV_32FC1 );
#endif imgSmall = cvCreateMat( img->rows + , img->cols + , CV_8UC1 );//创建新矩阵 for( factor = ; ; factor *= scaleFactor )//无循环条件的死循环
{
//定义3个矩形 大小
//经输出测试过,矩阵的width和cols是一样大
//我们假设上面的 winSize0 的 width,height都是10,factor循环到4,那么winSize的width和height都是40
//我们再假设img的width和height都是10,sz的就变为2.5
//sz1的就变为负的了,下面直接跳出循环,所以一般图片的w和h都比检测的窗口size要大得多
//重新假设他们都是100,那么sz就是25,sz1就是16
//此时改factor为5,sz为20,sz1为20-10+1=11
//由此可知,随着factor的增大,sz1的双值减小,由于factor *= scaleFactor的,且scaleFactor比1大,所以
//sz1必递减
//综上述,检测窗口win会越来越大,sz类窗口会越来越小
CvSize winSize = { cvRound(winSize0.width*factor), cvRound(winSize0.height*factor) };
CvSize sz = { cvRound( img->cols/factor ), cvRound( img->rows/factor ) };
CvSize sz1 = { sz.width - winSize0.width + , sz.height - winSize0.height + }; //定义矩形框,icv_object_win_border,这个东西,找遍没找到 CvRect equRect = { icv_object_win_border, icv_object_win_border,
winSize0.width - icv_object_win_border*,
winSize0.height - icv_object_win_border* }; CvMat img1, sum1, sqsum1, norm1, tilted1, mask1;
CvMat* _tilted = ; if( sz1.width <= || sz1.height <= )//当sz1窗口大小为负的时候,循环结束。
break;
if( winSize.width > maxSize.width || winSize.height > maxSize.height )//当检测窗口过大,也跳出循环
break;
if( winSize.width < minSize.width || winSize.height < minSize.height )//过小,也跳出,不过它是继续循环
continue; //在还没跳出循环的情况下,下面分别以sz的宽和高创建矩阵
img1 = cvMat( sz.height, sz.width, CV_8UC1, imgSmall->data.ptr );
sum1 = cvMat( sz.height+, sz.width+, CV_32SC1, sum->data.ptr );
sqsum1 = cvMat( sz.height+, sz.width+, CV_64FC1, sqsum->data.ptr );
if( tilted )//这个是矩阵类
{
tilted1 = cvMat( sz.height+, sz.width+, CV_32SC1, tilted->data.ptr );//一样是初始化
_tilted = &tilted1;
} //这下面的是以sz1为基础初始化的矩阵
norm1 = cvMat( sz1.height, sz1.width, CV_32FC1, normImg ? normImg->data.ptr : );
mask1 = cvMat( sz1.height, sz1.width, CV_8UC1, temp->data.ptr ); cvResize( img, &img1, CV_INTER_LINEAR );//双线性插值,重新调整img的大小,相关数据存入img1
cvIntegral( &img1, &sum1, &sqsum1, _tilted );//由img1开始积分计算,存入sum1、sqsum1、tilted int ystep = factor > ? : ;//这里判断了下factor的大小,大于2,ystep就是1
const int LOCS_PER_THREAD = ;
//接着上面的假设,factor是4,那么此时的yster是1
//stripCount就是(11/1 * 11/1+1000/2)/1000 < 1
int stripCount = ((sz1.width/ystep)*(sz1.height + ystep-)/ystep + LOCS_PER_THREAD/)/LOCS_PER_THREAD;
stripCount = std::min(std::max(stripCount, ), );
//然后和1对比,找出最大值,再和100比较,找出最小 #ifdef HAVE_IPP
if( use_ipp )
{
cv::Mat fsum(sum1.rows, sum1.cols, CV_32F, sum1.data.ptr, sum1.step);
cv::Mat(&sum1).convertTo(fsum, CV_32F, , -(<<));
}
else
#endif
cvSetImagesForHaarClassifierCascade( cascade, &sum1, &sqsum1, _tilted, . );
//上面这个函数是为隐藏的cascade(hidden cascade)指定图像积分图像、平方和图像与倾斜和图像、特征矩形,然后让它检测
//sum1是上面生成的32bt积分图像,sqsum 单通道64比特图像的平方和图像
//tilted 单通道32比特整数格式的图像的倾斜和
//1是窗口比例,如果 scale=1, 就只用原始窗口尺寸检测 (只检测同样尺寸大小的目标物体)
//- 原始窗口尺寸在函数cvLoadHaarClassifierCascade中定义 (在 "<default_face_cascade>"中缺省为24x24),
//如果scale=2, 使用的窗口是上面的两倍 (在face cascade中缺省值是48x48 )。
//这样尽管可以将检测速度提高四倍,但同时尺寸小于48x48的人脸将不能被检测到
cv::Mat _norm1(&norm1), _mask1(&mask1); //HaarDetectObjects_ScaleImage_Invoker进行并行运算(可以返回rejectLevels和levelWeights)
cv::parallel_for_(cv::Range(, stripCount),
cv::HaarDetectObjects_ScaleImage_Invoker(cascade,
(((sz1.height + stripCount - )/stripCount + ystep-)/ystep)*ystep,
factor, cv::Mat(&sum1), cv::Mat(&sqsum1), &_norm1, &_mask1,
cv::Rect(equRect), allCandidates, rejectLevels, levelWeights, outputRejectLevels, &mtx));
}
}
else
{
int n_factors = ;
cv::Rect scanROI; cvIntegral( img, sum, sqsum, tilted );//由img1开始积分计算,存入sum1、sqsum1、tilted if( doCannyPruning )//边缘处理
{
sumcanny = cvCreateMat( img->rows + , img->cols + , CV_32SC1 );
cvCanny( img, temp, , , );//得到边缘图像
cvIntegral( temp, sumcanny );//再次积分
} for( n_factors = , factor = ;
factor*cascade->orig_window_size.width < img->cols - &&
factor*cascade->orig_window_size.height < img->rows - ;
n_factors++, factor *= scaleFactor )
; if( findBiggestObject )
{
scaleFactor = ./scaleFactor;
factor *= scaleFactor;
}
else
factor = ; for( ; n_factors-- > ; factor *= scaleFactor )
{
const double ystep = std::max( ., factor );
CvSize winSize = { cvRound( cascade->orig_window_size.width * factor ),
cvRound( cascade->orig_window_size.height * factor )};
CvRect equRect = { , , , };
int *p[] = {,,,};
int *pq[] = {,,,};
int startX = , startY = ;
int endX = cvRound((img->cols - winSize.width) / ystep);
int endY = cvRound((img->rows - winSize.height) / ystep); if( winSize.width < minSize.width || winSize.height < minSize.height )
{
if( findBiggestObject )
break;
continue;
} if ( winSize.width > maxSize.width || winSize.height > maxSize.height )
{
if( !findBiggestObject )
break;
continue;
} cvSetImagesForHaarClassifierCascade( cascade, sum, sqsum, tilted, factor );
cvZero( temp ); if( doCannyPruning )
{
equRect.x = cvRound(winSize.width*0.15);
equRect.y = cvRound(winSize.height*0.15);
equRect.width = cvRound(winSize.width*0.7);
equRect.height = cvRound(winSize.height*0.7); p[] = (int*)(sumcanny->data.ptr + equRect.y*sumcanny->step) + equRect.x;
p[] = (int*)(sumcanny->data.ptr + equRect.y*sumcanny->step)
+ equRect.x + equRect.width;
p[] = (int*)(sumcanny->data.ptr + (equRect.y + equRect.height)*sumcanny->step) + equRect.x;
p[] = (int*)(sumcanny->data.ptr + (equRect.y + equRect.height)*sumcanny->step)
+ equRect.x + equRect.width; pq[] = (int*)(sum->data.ptr + equRect.y*sum->step) + equRect.x;
pq[] = (int*)(sum->data.ptr + equRect.y*sum->step)
+ equRect.x + equRect.width;
pq[] = (int*)(sum->data.ptr + (equRect.y + equRect.height)*sum->step) + equRect.x;
pq[] = (int*)(sum->data.ptr + (equRect.y + equRect.height)*sum->step)
+ equRect.x + equRect.width;
} if( scanROI.area() > )
{
//adjust start_height and stop_height
startY = cvRound(scanROI.y / ystep);
endY = cvRound((scanROI.y + scanROI.height - winSize.height) / ystep); startX = cvRound(scanROI.x / ystep);
endX = cvRound((scanROI.x + scanROI.width - winSize.width) / ystep);
} cv::parallel_for_(cv::Range(startY, endY),
cv::HaarDetectObjects_ScaleCascade_Invoker(cascade, winSize, cv::Range(startX, endX),
ystep, sum->step, (const int**)p,
(const int**)pq, allCandidates, &mtx )); if( findBiggestObject && !allCandidates.empty() && scanROI.area() == )
{
rectList.resize(allCandidates.size());
std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin()); groupRectangles(rectList, std::max(minNeighbors, ), GROUP_EPS); if( !rectList.empty() )
{
size_t i, sz = rectList.size();
cv::Rect maxRect; for( i = ; i < sz; i++ )
{
if( rectList[i].area() > maxRect.area() )
maxRect = rectList[i];
} allCandidates.push_back(maxRect); scanROI = maxRect;
int dx = cvRound(maxRect.width*GROUP_EPS);
int dy = cvRound(maxRect.height*GROUP_EPS);
scanROI.x = std::max(scanROI.x - dx, );
scanROI.y = std::max(scanROI.y - dy, );
scanROI.width = std::min(scanROI.width + dx*, img->cols--scanROI.x);
scanROI.height = std::min(scanROI.height + dy*, img->rows--scanROI.y); double minScale = roughSearch ? 0.6 : 0.4;
minSize.width = cvRound(maxRect.width*minScale);
minSize.height = cvRound(maxRect.height*minScale);
}
}
}
} //上面的循环结束后,进入到这里
rectList.resize(allCandidates.size());
if(!allCandidates.empty())
std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin()); if( minNeighbors != || findBiggestObject )
{
if( outputRejectLevels )
{
groupRectangles(rectList, rejectLevels, levelWeights, minNeighbors, GROUP_EPS );
}
else
{
groupRectangles(rectList, rweights, std::max(minNeighbors, ), GROUP_EPS);
}
}
else
rweights.resize(rectList.size(),); if( findBiggestObject && rectList.size() )
{
CvAvgComp result_comp = {{,,,},}; for( size_t i = ; i < rectList.size(); i++ )
{
cv::Rect r = rectList[i];
if( r.area() > cv::Rect(result_comp.rect).area() )
{
result_comp.rect = r;
result_comp.neighbors = rweights[i];
}
}
cvSeqPush( result_seq, &result_comp );
}
else
{
for( size_t i = ; i < rectList.size(); i++ )
{
CvAvgComp c;
c.rect = rectList[i];
c.neighbors = !rweights.empty() ? rweights[i] : ;
cvSeqPush( result_seq, &c );
}
} return result_seq;
}
正在看本人博客的这位童鞋,我看你气度不凡,谈吐间隐隐有王者之气,日后必有一番作为!旁边有“推荐”二字,你就顺手把它点了吧,相得准,我分文不收;相不准,你也好回来找我。