Extremal Region(极值区域)文本定位与识别法－代码分析（一）

很惊喜地发现Jiri Matas的方法：Real-Time Scene TextLocalization and Recognition CVPR 2012 在OpenCV 3.0 的contrib中有实现，也许未来就会出现在OpenCV的正式版本中，太赞了，照源代码学习之！（四千多行代码，也是醉了~）

ERFilter源码地址：https://github.com/Itseez/opencv_contrib/tree/master/modules/text

我用VC2012创建的可以直接运行的Demo工程,CSDN下载：http://download.csdn.net/detail/liveintoday/8447045 (要用OpenCV 3.0，不过稍加修改2.4也能运行)

1.structCV_EXPORTSERStat 代码中用一个ERStat结构体来代表一个分类的极值区域

struct CV_EXPORTS ERStat
{
public:
    //! Constructor
	//explicit构造函数必须显式调用，将构造函数声明为explicit（显式）的方式可以抑制隐式转换。隐式转换常常带来程序逻辑的错误，而且这种错误一旦发生是很难察觉的。
    explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
    //! Destructor
    ~ERStat() { }

    //! seed point and the threshold (max grey-level value)
    int pixel;
    int level;

	// 这些特征的详细描述参加我学习笔记（三）中的图解
    //! incrementally computable features
    int area;
    int perimeter;
    int euler;                 //!< euler number
    Rect rect;
    double raw_moments[2];     //!< order 1 raw moments to derive the centroid
    double central_moments[3]; //!< order 2 central moments to construct the covariance matrix
    std::deque<int> *crossings;//!< horizontal crossings
    float med_crossings;       //!< median of the crossings at three different height levels

    //! 2nd stage features
    float hole_area_ratio;
    float convex_hull_ratio;
    float num_inflexion_points;

    // TODO Other features can be added (average color, standard deviation, and such)


    // TODO shall we include the pixel list whenever available (i.e. after 2nd stage) ?
    std::vector<int> *pixels;

    //! probability that the ER belongs to the class we are looking for
    double probability;
	//程序中用一个广搜删除ERStat区域树
	//static void deleteERStatTree(ERStat* root) {
	//	queue<ERStat*> to_delete;
	//	to_delete.push(root);
	//	while (!to_delete.empty()) {
	//		ERStat* n = to_delete.front();
	//		to_delete.pop();
	//		ERStat* c = n->child;
	//		if (c != NULL) {
	//			to_delete.push(c);
	//			ERStat* sibling = c->next;
	//			while (sibling != NULL) {
	//				to_delete.push(sibling);
	//				sibling = sibling->next;
	//			}
	//		}
	//		delete n;
	//	}
	//}
    //! pointers preserving the tree structure of the component tree
    ERStat* parent;
    ERStat* child;
    ERStat* next;
    ERStat* prev;

    //! whenever the regions is a local maximum of the probability
    bool local_maxima;
    ERStat* max_probability_ancestor;
    ERStat* min_probability_ancestor;
};

2. class CV_EXPORTS ERFilter : public Algorithm

CV_EXPORTS:实质上就是__declspec(dllexport)的定义，编译器使用 __declspec(dllexport)关键字声明编译为DLL后可以调用的数据、函数、类或类成员函数。

Algorithm：是OpenCV为用户自定义的高层算法实现提供的接口规范。里面规范了get,set,read,write等行为。

ERFilter 有一个内部类： Callback , 隐藏了 SVM,Boost 等分类器，使开发者可以使用自己的分类器识别。

class CV_EXPORTS Callback
{
public:
   virtual ~Callback() { }
   /** @brief The classifier must return probability measure for the region.
   @param  stat :   The region to be classified
   */
   virtual double eval(const ERStat& stat) = 0; 
};

loadClassifierNM调用构造函数ERClassifierNM(conststring &filename)来加载Callback()

ERClassifierNM1::ERClassifierNM1(const string& filename)
{
    if (ifstream(filename.c_str()))
    {
        boost = StatModel::load<Boost>( filename.c_str() );
        if( boost.empty() )
        {
            cout << "Could not read the classifier " << filename.c_str() << endl;
            CV_Error(Error::StsBadArg, "Could not read the default classifier!");
        }
    }
    else
        CV_Error(Error::StsBadArg, "Default classifier file not found!");
}

Ptr<ERFilter::Callback>loadDummyClassifier();创建一个伪分类器（仅仅是实现Callback类而已，没有内容）,用于接受所有区域，即不过滤。

classCV_EXPORTSERClassifierNM1:publicERFilter::Callback

classCV_EXPORTSERClassifierNM2:publicERFilter::Callback

默认的第1,2阶段分类器都是：Ptr<Boost>boost;

classCV_EXPORTSERFilterNM:publicERFilter

这个类实现第1阶段和第2阶段的Neumannand Matas的算法接口

voidcomputeNMChannels(InputArray_src,OutputArrayOfArrays_channels,int_mode)

生成独立处理的各通道，论文中推荐的是H,S,I与Grad强度四个通道，此函数还提供了R,G,B,L,Grad通道的实现。Demo用了R,G,B,L,Grad五个通道，并追加了R,G,B,L这个四个通道的反通道（255-channels）,共9个通道。

注：

HIS(hue、saturation、intensity)

HSV(hue、saturation、value)

HLS(hue、lightness、saturation)

HLS类似于HSI，只是使用了术语lightness，而不是光强度intensity来表示亮度。HSI和HSV颜色空间的差异在于亮度分量(I or V)的计算方式。HSI颜色空间适合传统的图像处理函数，如卷积、均化、直方图等，可以通过处理亮度值来实现这些操作，因为亮度I对R、G、B值的依赖程度是一样的。HSV颜色空间适合处理色度和饱和度，因为它使得饱和度具有更大的动态取值范围。HSV使用单六角锥的颜色模型，HSI使用双六角锥的颜色模型

附上textdetection.cpp，论文实现代码在erfilter.cpp中。

/*
 * textdetection.cpp
 * A demo program of the Extremal Region Filter algorithm described in
 * Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
 *
 * Created on: Sep 23, 2013
 *     Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
 */

#include  "text.hpp"
#include  "erfilter.hpp"
#include  "opencv2/highgui.hpp"
#include  "opencv2/imgproc.hpp"

#include  <vector>
#include  <iostream>
#include  <iomanip>

using namespace std;
using namespace cv;
using namespace cv::text;

void show_help_and_exit(const char *cmd);
void groups_draw(Mat &src, vector<Rect> &groups);
void er_show(vector<Mat> &channels, vector<vector<ERStat> > &regions);

int main(int argc, const char * argv[])
{
    cout << endl << argv[0] << endl << endl;
    cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
    cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;

    if (argc < 2) show_help_and_exit(argv[0]);

    namedWindow("grouping",WINDOW_NORMAL);
    Mat src = imread(argv[1]);

    // Extract channels to be processed individually
    vector<Mat> channels;
	computeNMChannels(src, channels, ERFILTER_NM_IHSGrad);

    int cn = (int)channels.size();
    // Append negative channels to detect ER- (bright regions over dark background)
    for (int c = 0; c < cn-1; c++)
        channels.push_back(255-channels[c]);

    // Create ERFilter objects with the 1st and 2nd stage default classifiers
    Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),16,0.00015f,0.13f,0.2f,true,0.1f);
    Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.5);

    vector<vector<ERStat> > regions(channels.size());
    // Apply the default cascade classifier to each independent channel (could be done in parallel)
    cout << "Extracting Class Specific Extremal Regions from " << (int)channels.size() << " channels ..." << endl;
    cout << "    (...) this may take a while (...)" << endl << endl;
    for (int c=0; c<(int)channels.size(); c++)
    {
        er_filter1->run(channels[c], regions[c]);
        er_filter2->run(channels[c], regions[c]);
    }

    // Detect character groups
    cout << "Grouping extracted ERs ... ";
    vector< vector<Vec2i> > region_groups;
    vector<Rect> groups_boxes;
    erGrouping(src, channels, regions, region_groups, groups_boxes, ERGROUPING_ORIENTATION_HORIZ);
    //erGrouping(src, channels, regions, region_groups, groups_boxes, ERGROUPING_ORIENTATION_ANY, "./trained_classifier_erGrouping.xml", 0.5);

    // draw groups
    groups_draw(src, groups_boxes);
    imshow("grouping",src);

    cout << "Done!" << endl << endl;
    cout << "Press 'e' to show the extracted Extremal Regions, any other key to exit." << endl << endl;
    if( waitKey (-1) == 101)
        er_show(channels,regions);

    // memory clean-up
    er_filter1.release();
    er_filter2.release();
    regions.clear();
    if (!groups_boxes.empty())
    {
        groups_boxes.clear();
    }
}

// helper functions

void show_help_and_exit(const char *cmd)
{
    cout << "    Usage: " << cmd << " <input_image> " << endl;
    cout << "    Default classifier files (trained_classifierNM*.xml) must be in current directory" << endl << endl;
	system("pause");
    exit(-1);
}

void groups_draw(Mat &src, vector<Rect> &groups)
{
    for (int i=(int)groups.size()-1; i>=0; i--)
    {
        if (src.type() == CV_8UC3)
            rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 255, 255, 0 ), 1, 8 );
        else
            rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 255 ), 1, 8 );
    }
}

void er_show(vector<Mat> &channels, vector<vector<ERStat> > &regions)
{
    for (int c=0; c<(int)channels.size(); c++)
    {
        Mat dst = Mat::zeros(channels[0].rows+2,channels[0].cols+2,CV_8UC1);
        for (int r=0; r<(int)regions[c].size(); r++)
        {
            ERStat er = regions[c][r];
            if (er.parent != NULL) // deprecate the root region
            {
                int newMaskVal = 255;
                int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
                floodFill(channels[c],dst,Point(er.pixel%channels[c].cols,er.pixel/channels[c].cols),
                          Scalar(255),0,Scalar(er.level),Scalar(0),flags);
            }
        }
        char buff[10]; char *buff_ptr = buff;
        sprintf(buff, "channel %d", c);
        imshow(buff_ptr, dst);
    }
    waitKey(-1);
}

秒客网

Extremal Region(极值区域)文本定位与识别法－代码分析（一）

相关文章