Extremal Region(极值区域)文本定位与识别法-代码分析(一)

时间:2021-03-16 09:01:18

很惊喜地发现Jiri Matas的方法:Real-Time Scene TextLocalization and Recognition CVPR 2012 在OpenCV 3.0 的contrib中有实现,也许未来就会出现在OpenCV的正式版本中,太赞了,照源代码学习之!(四千多行代码,也是醉了~)

ERFilter源码地址:https://github.com/Itseez/opencv_contrib/tree/master/modules/text

  我用VC2012创建的可以直接运行的Demo工程,CSDN下载:http://download.csdn.net/detail/liveintoday/8447045 (要用OpenCV 3.0,不过稍加修改2.4也能运行)

1.structCV_EXPORTSERStat      代码中用一个ERStat结构体来代表一个分类的极值区域

struct CV_EXPORTS ERStat
{
public:
//! Constructor
//explicit构造函数必须显式调用,将构造函数声明为explicit(显式)的方式可以抑制隐式转换。隐式转换常常带来程序逻辑的错误,而且这种错误一旦发生是很难察觉的。
explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
//! Destructor
~ERStat() { }

//! seed point and the threshold (max grey-level value)
int pixel;
int level;

// 这些特征的详细描述参加我学习笔记(三)中的图解
//! incrementally computable features
int area;
int perimeter;
int euler; //!< euler number
Rect rect;
double raw_moments[2]; //!< order 1 raw moments to derive the centroid
double central_moments[3]; //!< order 2 central moments to construct the covariance matrix
std::deque<int> *crossings;//!< horizontal crossings
float med_crossings; //!< median of the crossings at three different height levels

//! 2nd stage features
float hole_area_ratio;
float convex_hull_ratio;
float num_inflexion_points;

// TODO Other features can be added (average color, standard deviation, and such)


// TODO shall we include the pixel list whenever available (i.e. after 2nd stage) ?
std::vector<int> *pixels;

//! probability that the ER belongs to the class we are looking for
double probability;
//程序中用一个广搜删除ERStat区域树
//static void deleteERStatTree(ERStat* root) {
// queue<ERStat*> to_delete;
// to_delete.push(root);
// while (!to_delete.empty()) {
// ERStat* n = to_delete.front();
// to_delete.pop();
// ERStat* c = n->child;
// if (c != NULL) {
// to_delete.push(c);
// ERStat* sibling = c->next;
// while (sibling != NULL) {
// to_delete.push(sibling);
// sibling = sibling->next;
// }
// }
// delete n;
// }
//}
//! pointers preserving the tree structure of the component tree
ERStat* parent;
ERStat* child;
ERStat* next;
ERStat* prev;

//! whenever the regions is a local maximum of the probability
bool local_maxima;
ERStat* max_probability_ancestor;
ERStat* min_probability_ancestor;
};


2.  class CV_EXPORTS ERFilter : public Algorithm    

CV_EXPORTS:实质上就是__declspec(dllexport)的定义,编译器使用 __declspec(dllexport)关键字声明编译为DLL后可以调用的数据、函数、类或类成员函数。

Algorithm:是OpenCV为用户自定义的高层算法实现提供的接口规范。里面规范了get,set,read,write等行为。

ERFilter 有一个内部类: Callback , 隐藏了 SVM,Boost 等分类器,使开发者可以使用自己的分类器识别。
class CV_EXPORTS Callback
{
public:
virtual ~Callback() { }
/** @brief The classifier must return probability measure for the region.
@param stat : The region to be classified
*/
virtual double eval(const ERStat& stat) = 0;
};

loadClassifierNM调用构造函数ERClassifierNM(conststring &filename)来加载Callback()

ERClassifierNM1::ERClassifierNM1(const string& filename)
{
if (ifstream(filename.c_str()))
{
boost = StatModel::load<Boost>( filename.c_str() );
if( boost.empty() )
{
cout << "Could not read the classifier " << filename.c_str() << endl;
CV_Error(Error::StsBadArg, "Could not read the default classifier!");
}
}
else
CV_Error(Error::StsBadArg, "Default classifier file not found!");
}

Ptr<ERFilter::Callback>loadDummyClassifier();创建一个伪分类器(仅仅是实现Callback类而已,没有内容),用于接受所有区域,即不过滤。

classCV_EXPORTSERClassifierNM1:publicERFilter::Callback

classCV_EXPORTSERClassifierNM2:publicERFilter::Callback

默认的第1,2阶段分类器都是:Ptr<Boost>boost;

classCV_EXPORTSERFilterNM:publicERFilter

这个类实现第1阶段和第2阶段的Neumannand Matas的算法接口

voidcomputeNMChannels(InputArray_src,OutputArrayOfArrays_channels,int_mode)

生成独立处理的各通道,论文中推荐的是H,S,IGrad强度四个通道,此函数还提供了R,G,B,L,Grad通道的实现。Demo用了R,G,B,L,Grad五个通道,并追加了R,G,B,L这个四个通道的反通道(255-channels,9个通道。

注:

HIS(huesaturationintensity)

HSV(huesaturationvalue)

HLS(huelightnesssaturation)

HLS类似于HSI,只是使用了术语lightness,而不是光强度intensity来表示亮度。HSIHSV颜色空间的差异在于亮度分量(I or V)的计算方式。HSI颜色空间适合传统的图像处理函数,如卷积、均化、直方图等,可以通过处理亮度值来实现这些操作,因为亮度IRGB值的依赖程度是一样的。HSV颜色空间适合处理色度和饱和度,因为它使得饱和度具有更大的动态取值范围。HSV使用单六角锥的颜色模型,HSI使用双六角锥的颜色模型


附上textdetection.cpp,论文实现代码在erfilter.cpp中。

/*
* textdetection.cpp
* A demo program of the Extremal Region Filter algorithm described in
* Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
*
* Created on: Sep 23, 2013
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
*/

#include "text.hpp"
#include "erfilter.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"

#include <vector>
#include <iostream>
#include <iomanip>

using namespace std;
using namespace cv;
using namespace cv::text;

void show_help_and_exit(const char *cmd);
void groups_draw(Mat &src, vector<Rect> &groups);
void er_show(vector<Mat> &channels, vector<vector<ERStat> > &regions);

int main(int argc, const char * argv[])
{
cout << endl << argv[0] << endl << endl;
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;

if (argc < 2) show_help_and_exit(argv[0]);

namedWindow("grouping",WINDOW_NORMAL);
Mat src = imread(argv[1]);

// Extract channels to be processed individually
vector<Mat> channels;
computeNMChannels(src, channels, ERFILTER_NM_IHSGrad);

int cn = (int)channels.size();
// Append negative channels to detect ER- (bright regions over dark background)
for (int c = 0; c < cn-1; c++)
channels.push_back(255-channels[c]);

// Create ERFilter objects with the 1st and 2nd stage default classifiers
Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),16,0.00015f,0.13f,0.2f,true,0.1f);
Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.5);

vector<vector<ERStat> > regions(channels.size());
// Apply the default cascade classifier to each independent channel (could be done in parallel)
cout << "Extracting Class Specific Extremal Regions from " << (int)channels.size() << " channels ..." << endl;
cout << " (...) this may take a while (...)" << endl << endl;
for (int c=0; c<(int)channels.size(); c++)
{
er_filter1->run(channels[c], regions[c]);
er_filter2->run(channels[c], regions[c]);
}

// Detect character groups
cout << "Grouping extracted ERs ... ";
vector< vector<Vec2i> > region_groups;
vector<Rect> groups_boxes;
erGrouping(src, channels, regions, region_groups, groups_boxes, ERGROUPING_ORIENTATION_HORIZ);
//erGrouping(src, channels, regions, region_groups, groups_boxes, ERGROUPING_ORIENTATION_ANY, "./trained_classifier_erGrouping.xml", 0.5);

// draw groups
groups_draw(src, groups_boxes);
imshow("grouping",src);

cout << "Done!" << endl << endl;
cout << "Press 'e' to show the extracted Extremal Regions, any other key to exit." << endl << endl;
if( waitKey (-1) == 101)
er_show(channels,regions);

// memory clean-up
er_filter1.release();
er_filter2.release();
regions.clear();
if (!groups_boxes.empty())
{
groups_boxes.clear();
}
}

// helper functions

void show_help_and_exit(const char *cmd)
{
cout << " Usage: " << cmd << " <input_image> " << endl;
cout << " Default classifier files (trained_classifierNM*.xml) must be in current directory" << endl << endl;
system("pause");
exit(-1);
}

void groups_draw(Mat &src, vector<Rect> &groups)
{
for (int i=(int)groups.size()-1; i>=0; i--)
{
if (src.type() == CV_8UC3)
rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 255, 255, 0 ), 1, 8 );
else
rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 255 ), 1, 8 );
}
}

void er_show(vector<Mat> &channels, vector<vector<ERStat> > &regions)
{
for (int c=0; c<(int)channels.size(); c++)
{
Mat dst = Mat::zeros(channels[0].rows+2,channels[0].cols+2,CV_8UC1);
for (int r=0; r<(int)regions[c].size(); r++)
{
ERStat er = regions[c][r];
if (er.parent != NULL) // deprecate the root region
{
int newMaskVal = 255;
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
floodFill(channels[c],dst,Point(er.pixel%channels[c].cols,er.pixel/channels[c].cols),
Scalar(255),0,Scalar(er.level),Scalar(0),flags);
}
}
char buff[10]; char *buff_ptr = buff;
sprintf(buff, "channel %d", c);
imshow(buff_ptr, dst);
}
waitKey(-1);
}