最近研究c#相关的orc技术,图像识别一般c和c++这种底层语言做的比较多,c#主要是依托一些封装好的组件进行调用,这里介绍三种身份证识别的方法。
一:调用大公司api接口,百度、云脉,文通科技都有相关的api介绍。
二:调用图像处理类库,emgucv是opencv的一个跨平台的.net封装,该封装也可以被编译到mono平台和允许在windows、mac os、android、iphone、ipad等多个平台上运行
三:调用office2007 组件
一、证件识别api接口
以聚合数据中的api接口为例,因为官方api没有提供c#的调用方式,网址如下:证件识别接口
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
/// <summary>
/// 上传图片
/// </summary>
/// <returns></returns>
public static string cardupload()
{
try
{
string appkey = "网站自己申请的key" ; //配置您申请的appkey
httppostedfile file = httpcontext.current.request.files[0];
string url = "http://api2.juheapi.com/cardrecon/upload" ;
var parameters = new dictionary< string , string >();
parameters.add( "key" , appkey);
parameters.add( "cardtype" , "2" );
string result = httppostdata(url, 60000, "pic" , file.inputstream, parameters);
jobject info = jobject.parse(jobject.parse(result)[ "result" ].tostring());
var cardinfo = new
{
name = info[ "姓名" ],
card = info[ "公民身份号码" ]
};
return cardinfo.tojson();
}
catch (exception ex)
{
return ex.tostring();
}
}
/// <summary>
/// post调用api
/// </summary>
/// <param name="url">api地址</param>
/// <param name="timeout">访问超时时间</param>
/// <param name="filekeyname">文件参数名</param>
/// <param name="file">文件流</param>
/// <param name="stringdict">参数列表</param>
/// <returns>结果集</returns>
private static string httppostdata( string url, int timeout, string filekeyname,
stream file, dictionary< string , string > stringdict)
{
string responsecontent;
var memstream = new memorystream();
var webrequest = (httpwebrequest)webrequest.create(url);
// 边界符
var boundary = "---------------" + datetime.now.ticks.tostring( "x" );
// 边界符
var beginboundary = encoding.ascii.getbytes( "--" + boundary + "\r\n" );
// 最后的结束符
var endboundary = encoding.ascii.getbytes( "--" + boundary + "--\r\n" );
// 设置属性
webrequest.method = "post" ;
webrequest.timeout = timeout;
webrequest.contenttype = "multipart/form-data; boundary=" + boundary;
//写入开始边界符
memstream.write(beginboundary, 0, beginboundary.length);
// 写入文件
const string filepartheader =
"content-disposition: form-data; name=\"{0}\"; filename=\"{1}\"\r\n" +
"content-type: application/octet-stream\r\n\r\n" ;
var header = string .format(filepartheader, filekeyname, "card.jpg" );
var headerbytes = encoding.utf8.getbytes(header);
memstream.write(headerbytes, 0, headerbytes.length);
file.copyto(memstream);
// 写入字符串的key
var stringkeyheader = "\r\n--" + boundary +
"\r\ncontent-disposition: form-data; name=\"{0}\"" +
"\r\n\r\n{1}\r\n" ;
foreach ( byte [] formitembytes in from string key in stringdict.keys
select string .format(stringkeyheader, key, stringdict[key])
into formitem
select encoding.utf8.getbytes(formitem))
{
memstream.write(formitembytes, 0, formitembytes.length);
}
// 写入最后的结束边界符
memstream.write(endboundary, 0, endboundary.length);
webrequest.contentlength = memstream.length;
// 构造完毕,执行post方法
var requeststream = webrequest.getrequeststream();
memstream.position = 0;
var tempbuffer = new byte [memstream.length];
memstream.read(tempbuffer, 0, tempbuffer.length);
memstream.close();
requeststream.write(tempbuffer, 0, tempbuffer.length);
requeststream.close();
var httpwebresponse = (httpwebresponse)webrequest.getresponse();
using (var httpstreamreader = new streamreader(httpwebresponse.getresponsestream(),
encoding.getencoding( "utf-8" )))
{
responsecontent = httpstreamreader.readtoend();
}
httpwebresponse.close();
webrequest.abort();
return responsecontent;
}
|
二、emgucv类库调用
环境搭建
下载地址:emgucv官网
在file类别下下载这个exe,进行安装,安装后在目录下能找相应组件,还有些应用的案例。
c#进行识别,需进行图片二值化处理和ocr调用相关dll可在我整理的地址下载:360云盘 提取码:89f4
dll文件夹中的dll引用到c#项目中,x64,x86,tessdata对应ocr识别的类库和语言库,我tessdata中已添加中文语言包,将这三个文件夹放入程序执行文件夹中。
demo
自己做的小demo如图:身份证图片是百度上下载的
相关代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
using system;
using system.collections.generic;
using system.componentmodel;
using system.data;
using system.drawing;
using system.linq;
using system.text;
using system.windows.forms;
using emgu.cv;
using emgu.cv.ocr;
using emgu.cv.structure;
using system.io;
namespace imagemanage
{
public partial class form1 : form
{
image<gray, byte > imagethreshold;
public form1()
{
initializecomponent();
}
private void btn_convert_click( object sender, eventargs e)
{
//第一个参数是语言包文件夹的地址,不写默认在执行文件夹下
tesseract _ocr = new tesseract( "" , "chi_sim" , ocrenginemode.tesseractonly);
_ocr.recognize(imagethreshold);
string text = _ocr.gettext();
this .textbox1.text = text;
}
private void picturebox1_click( object sender, eventargs e)
{
openfiledialog of = new openfiledialog();
of.title = "请选择图片" ;
if (of.showdialog() == dialogresult.ok)
{
string file = of.filename;
image img = image.fromfile(file);
picturebox1.image = img;
}
bitmap bitmap = (bitmap) this .picturebox1.image;
image<bgr, byte > imagesource = new image<bgr, byte >(bitmap);
image<gray, byte > imagegrayscale = imagesource.convert<gray, byte >();
imagegrayscale = randon(imagegrayscale);
imagethreshold = imagegrayscale.thresholdbinary( new gray(100), new gray(255));
this .picturebox2.image = imagethreshold.tobitmap();
}
/// <summary>
/// 旋转校正
/// </summary>
/// <param name="imageinput"></param>
/// <returns></returns>
private image<gray, byte > randon(image<gray, byte > imageinput) //图像投影旋转法倾斜校正子函数定义
{
int nwidth = imageinput.width;
int nheight = imageinput.height;
int sum;
int sumofcha;
int sumofchatemp = 0;
int [] sumhang = new int [nheight];
image<gray, byte > resultimage = imageinput;
image<gray, byte > imrotaimage;
//20度范围内的调整
for ( int ang = -20; ang < 20; ang = ang + 1)
{
imrotaimage = imageinput.rotate(ang, new gray(1));
for ( int i = 0; i < nheight; i++)
{
sum = 0;
for ( int j = 0; j < nwidth; j++)
{
sum += imrotaimage.data[i, j, 0];
}
sumhang[i] = sum;
}
sumofcha = 0;
for ( int k = 0; k < nheight - 1; k++)
{
sumofcha = sumofcha + (math.abs(sumhang[k] - sumhang[k + 1]));
}
if (sumofcha > sumofchatemp)
{
resultimage = imrotaimage;
sumofchatemp = sumofcha;
}
}
return resultimage;
}
}
}
|
三、office 2007组件
该组件免费而且识别度比较高。
环境搭建
office 2007组件modi,需要安装ofiice2007,且由于兼容性需要安装补丁,sp1或者sp2都行,补丁下载地址如下:
sp1下载地址 sp2下载地址
安装后控制面板-->卸载或更新程序-->选择office2007-->选择更改-->选择添加或修复功能-->弹出下面界面,运行相应组件。
将office工具-->microsoft office document imaging 下的工具运行
在c#项目中引用com组件即可:
如果office组件应用不是在本地程序而需要部署在iis上,还需将应用程序的应用池的权限设置为如下图所示:程序应用池-->高级设置-->标识
demo
1
2
3
4
5
6
7
8
9
10
11
12
|
stringbuilder sb = new stringbuilder();
modi.document doc = new modi.document();
doc.create(fullfilename);
modi.image image;
modi.layout layout;
doc.ocr(modi.milanguages.milang_chinese_simplified, true , true ); // 识别文字类型
for ( int i = 0; i < doc.images.count; i++)
{
image = (modi.image)doc.images[i];
layout = image.layout;
sb.append(layout.text);
}
|
以上即一些c#进行身份证识别的方法,可根据自己项目的不同需求进行选用。
原文链接:http://www.cnblogs.com/kaoleba/p/5662575.html