利用OCR识别扫描的jpg、tif文件的文字

时间:2021-10-26 09:02:02

第一步:下载老马哥的从 office和sharepoint 提取出来的注册表和dll  http://115.com/file/dpa4qrt2  

或者直接安装office和sharepoint2007

第二步:下载我的demo   http://files.cnblogs.com/0banana0/OCR.zip

***识别度不是百分之百的  当然需要校准啦   在 编辑器里边修改错误的东西 !

 第三步:发布

本地环境无措 发布到iis包错 “Object hasn't been initialized and can't be used yet”

解决办法:Go to IIS->ApplicatioPools->Default Applicationpool->Identity->custom account->Give the user name and password.

****发布的时候 iis还报一个错:Compiler Error Message: CS0016拒绝访问

解决办法:给c:\windows\temp 加上 network service (只加这个我的不行)和 iis_iusrs(后来加上和这个才行)权限

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Collections;
using System.IO;
using System.Text;
using ContractManage.DAL;
using System.Threading;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace ContractManage.uploadify
{
/// <summary>
/// Summary description for OCR
/// </summary>
public class OCR : IHttpHandler
{
private MODI.Document _document;
private MODI.Images _images;
private MODI.Image _image;
private MODI.Layout _layout;
private ManualResetEvent _completedOCR = new ManualResetEvent(false);
public void ProcessRequest(HttpContext context)
{
context.Response.ContentType
= "text/plain";
if (!string.IsNullOrEmpty(context.Request.QueryString["cid"]))
{
string cid = context.Request.QueryString["cid"];
string sPath = context.Server.MapPath("uploadify");
string path = sPath.Substring(0, sPath.Length - 10) + "\\OCRFile\\" + cid;
CheckFileType(path, cid);
}
context.Response.Write(
" ");
}

public void CheckFileType(string directoryPath, string cid)
{
IEnumerator files
= Directory.GetFiles(directoryPath).GetEnumerator();
while (files.MoveNext())
{
//get file extension
string fileExtension = Path.GetExtension(Convert.ToString(files.Current));

//get file path without extenstion
string filePath = Convert.ToString(files.Current).Replace(fileExtension, string.Empty);

//get fileName
string fileName = Path.GetFileName(Convert.ToString(files.Current));
//Check for JPG File Format
if (fileExtension == ".jpg" || fileExtension == ".JPG" || fileExtension == ".tif" || fileExtension == ".tiff") // or // ImageFormat.Jpeg.ToString()
{
try
{
//OCR Operations ...
MODI.Document _document = new MODI.Document();
_document.OnOCRProgress
+= new MODI._IDocumentEvents_OnOCRProgressEventHandler(_document_OnOCRProgress);
_document.Create(Convert.ToString(files.Current));
_document.OCR(MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED,
true, true);

_completedOCR.WaitOne(
5000);
//_document.Save();

_images
= _document.Images;
_image
= (MODI.Image)_images[0];
//FileStream createFile = new FileStream(filePath + ".txt", FileMode.CreateNew);
//StreamWriter writeFile = new StreamWriter(createFile);
_layout = _image.Layout;
string strContent = _layout.Text;
_document.Close(
false);
//string strPath = "uploadify/OCRFile/" + cid + "/" + fileName;
//string strContent = "没有内容";
string strPath = "uploadify/OCRFile/" + cid + "/" + fileName;
SaveDate(strPath, strContent, cid);
//writeFile.Write(strContent);
//writeFile.Close();


}
catch (Exception ex)
{
throw ex;
//MessageBox.Show("This Image hasn't a text or has a problem",
//"OCR Notifications",
//MessageBoxButtons.OK, MessageBoxIcon.Information);
}
finally
{
}

}
}
}
void _document_OnOCRProgress(int Progress, ref bool Cancel)
{
if (Progress == 100)
{
_completedOCR.Set();
}
}
private static void SetComObjectToNull(params object[] objects)
{
for (int i = 0; i < objects.Length; i++)
{
object o = objects[i];
if (o != null)
{
Marshal.FinalReleaseComObject(o);
o
= null;
}
}
}


[MethodImpl(MethodImplOptions.NoInlining)]
public void Dispose()
{
SetComObjectToNull(_layout, _image, _images, _document);
GC.Collect();
GC.WaitForPendingFinalizers();
}



public void SaveDate(string strPath, string strContent, string cid)
{
try
{
Pt_ContractImg img
= new Pt_ContractImg();
img.Content
= strContent;
img.Path
= strPath;
img.ContractID
= Convert.ToInt32(cid);
Pt_ContractImg_DAO.Insert(img);
}
catch (Exception ex)
{

}
finally
{

}
}
public bool IsReusable
{
get
{
return false;
}
}

}
}