C#版本源码下载地址:/charlesw/tesseract
其实在vs中可以直接用NuGet工具进行下载:
打开nuget,搜索tesseract,点安装即可。
.
源码是vs2015编译的,需要安装vs2015以上版本。
打开项目后如:
我们再添加一个winform项目,画界面如:
实现点击“选择需要识别的图片”,打开一张图片,调用算法并显示结果。比较简单。源码如下:
-
using System;
-
using ;
-
using ;
-
using ;
-
using ;
-
using ;
-
using ;
-
using ;
-
using ;
-
using Tesseract;
-
-
namespace TesseractDemo
-
{
-
public partial class Form1 : Form
-
{
-
public Form1()
-
{
-
InitializeComponent();
-
}
-
//选图片并调用ocr识别方法
-
private void btnRec_Click(object sender, EventArgs e)
-
{
-
// = "";
-
if (() == )
-
{
-
var imgPath = ;
-
=(imgPath);
-
string strResult = ImageToText(imgPath);
-
if (string.IsNullOrEmpty(strResult))
-
{
-
= "无法识别";
-
}
-
else
-
{
-
= strResult;
-
}
-
}
-
}
-
//调用tesseract实现OCR识别
-
public string ImageToText(string imgPath)
-
{
-
using (var engine = new TesseractEngine("tessdata", "eng", ))
-
{
-
using (var img = (imgPath))
-
{
-
using (var page = (img))
-
{
-
return ();
-
}
-
}
-
}
-
}
-
}
-
}
有一点要注意的是,tesseract的识别语言包要自己下载后包含到项目里面,并设置为始终复制,或者直接把这个文件包放到运行程序目录(bin\debug)下:
eng是英文字符的意思,要识别其他语言字符,需要自己下载:
Tesseract has unicode (UTF-8) support, and can recognize more than 100 languages "out of the box".
这个库支持100种语言的识别
字库下载地址为:GitHub - tesseract-ocr/tessdata: Trained models with support for legacy and LSTM OCR engine
用OpencvSharp先降噪再调OCR识别:
-
//用opencv进行降噪处理再ocr识别
-
private void button3_Click(object sender, EventArgs e)
-
{
-
//从网上读取一张图片
-
string imgUrl = "/user/validate/";
-
MemoryStream ms = ReadImgFromWeb(imgUrl);
-
Image img = (ms);
-
= img;
-
-
//降噪
-
Mat simg = (ms, );
-
("Input Image", simg);
-
//阈值操作 阈值参数可以用一些可视化工具来调试得到
-
Mat ThresholdImg = (29, 255, );
-
("Threshold", ThresholdImg);
-
("d:\\", ThresholdImg);
-
-
= ImageToText("d:\\");
-
}
-
-
/// <summary>
-
/// 从网上读取一张图片
-
/// </summary>
-
/// <param name="Url"></param>
-
public MemoryStream ReadImgFromWeb(string Url)
-
{
-
HttpWebRequest request = (HttpWebRequest)(Url);
-
= ; // 添加授权证书
-
= "Microsoft Internet Explorer";
-
WebResponse response = ();
-
Stream s = ();
-
byte[] data = new byte[1024];
-
int length = 0;
-
MemoryStream ms = new MemoryStream();
-
while ((length = (data, 0, )) > 0)
-
{
-
(data, 0, length);
-
}
-
(0, );
-
// = (ms);
-
return ms;
-
}
请自行用NuGet程序下载opencvsharp3.0库,参考使用C#版OpenCV进行圆心求取 - 小y - 博客园
另外专门有篇文章介绍中文识别:Tesseract-OCR识别中文与训练字库实例