编译的过程中需要一些额外的组件,有可能MAC下没有,我的电脑上是下面的这些需要安装。如有不同,可自行查找错误信息,安装缺失的工具。
MAC下安装autoconf和automake
- 安装m4
-
curl -O http://mirrors.kernel.org/gnu/m4/m4-1.4.16.tar.gz tar -xzvf m4-1.4.16.tar.gz cd m4-1.4.16 ./configure --prefix=/usr/local make sudo make install
- 安装autoconf
curl -O http://mirrors.kernel.org/gnu/autoconf/autoconf-2.69.tar.gz tar -xzvf autoconf-2.69.tar.gz cd autoconf-2.69 ./configure --prefix=/usr/local make sudo make install
- 安装automake
-
curl -O http://mirrors.kernel.org/gnu/automake/automake-1.13.tar.gz tar -xzvf automake-1.13.tar.gz cd automake-1.13 ./configure --prefix=/usr/local make sudo make install
- 安装libtool
-
curl -O http://mirrors.kernel.org/gnu/libtool/libtool-2.4.tar.gz tar -xzvf libtool-2.4.tar.gz cd libtool-2.4 ./configure --prefix=/usr/local make sudo make install
编译tesseract
在本地新建一个文件夹,如:build_tesseract将下面2个主要组件下载解压到此文件夹下.命令如下:
mkdir build_tesseract
cd build_tesseract
在此文件夹下进行下面的命令:
- tesseract-ocr-3.02
curl -O http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz
tar -xzvf tesseract-ocr-3.02.02.tar.gz
- leptonica-1.69
curl -O http://www.leptonica.com/source/leptonica-1.69.tar.gz
tar -xzvf leptonica-1.69.tar.gz
- 创建编译脚本 build.sh
-
#!/bin/sh # Written by @kevincon, based on @williamsodell's modifications to @gali8's # original build_dependencies.sh script. # See README_howto_compile_libaries.md for full instructions. GLOBAL_OUTDIR="`pwd`/build" LOCAL_OUTDIR="./outdir" LEPTON_LIB="`pwd`/leptonica-1.71" TESSERACT_LIB="`pwd`/tesseract-3.03" IOS_BASE_SDK="7.0" IOS_DEPLOY_TGT="7.0" export CXX=`xcrun -find c++` export CC=`xcrun -find cc` export LD=`xcrun -find ld` export AR=`xcrun -find ar` export AS=`xcrun -find as` export NM=`xcrun -find nm` export RANLIB=`xcrun -find ranlib` XCODE_DEVELOPER_PATH=/Applications/Xcode.app/Contents/Developer XCODETOOLCHAIN_PATH=$XCODE_DEVELOPER_PATH/Toolchains/XcodeDefault.xctoolchain SDK_IPHONEOS_PATH=$(xcrun --sdk iphoneos --show-sdk-path) SDK_IPHONESIMULATOR_PATH=$(xcrun --sdk iphonesimulator --show-sdk-path) export PATH="$XCODETOOLCHAIN_PATH/usr/bin:$PATH" declare -a archs archs=(arm7 arm7s arm64 i386 x86_64) declare -a arch_name arch_names=(arm-apple-darwin7 arm-apple-darwin7s arm-apple-darwin64 i386-apple-darwin x86_64-apple-darwin) setenv_all() { # Add internal libs export CFLAGS="$CFLAGS -I$GLOBAL_OUTDIR/include -L$GLOBAL_OUTDIR/lib -Qunused-arguments" export LDFLAGS="-L$SDKROOT/usr/lib/" export CPPFLAGS=$CFLAGS export CXXFLAGS=$CFLAGS } setenv_arm7() { unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS export SDKROOT=$SDK_IPHONEOS_PATH export CFLAGS="-arch armv7 -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT -I$SDKROOT/usr/include/" setenv_all } setenv_arm7s() { unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS export SDKROOT=$SDK_IPHONEOS_PATH export CFLAGS="-arch armv7s -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT -I$SDKROOT/usr/include/" setenv_all } setenv_arm64() { unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS export SDKROOT=$SDK_IPHONEOS_PATH export CFLAGS="-arch arm64 -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT -I$SDKROOT/usr/include/" setenv_all } setenv_i386() { unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS export SDKROOT=$SDK_IPHONESIMULATOR_PATH export CFLAGS="-arch i386 -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT" setenv_all } setenv_x86_64() { unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS export SDKROOT=$SDK_IPHONESIMULATOR_PATH export CFLAGS="-arch x86_64 -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT" setenv_all } create_outdir_lipo() { for file in `find $LOCAL_OUTDIR/i386 -name "lib*.a"`; do lib_arm7=`echo $file | sed "s/i386/arm7/g"` lib_arm7s=`echo $file | sed "s/i386/arm7s/g"` lib_arm64=`echo $file | sed "s/i386/arm64/g"` lib_x86_64=`echo $file | sed "s/i386/x86_64/g"` lib_i386=`echo $file` lib=`echo $file | sed "s/i386//g"` xcrun -sdk iphoneos lipo -arch armv7s $lib_arm7s -arch armv7 $lib_arm7 -arch arm64 $lib_arm64 -arch i386 $lib_i386 -arch x86_64 $lib_x86_64 -create -output $lib done } merge_libfiles() { DIR=$1 LIBNAME=$2 cd $DIR for i in `find . -name "lib*.a"`; do $AR -x $i done $AR -r $LIBNAME *.o rm -rf *.o __* cd - } ####################### # Start clean ####################### rm -rf $GLOBAL_OUTDIR lib include ####################### # LEPTONLIB ####################### cd $LEPTON_LIB rm -rf $LOCAL_OUTDIR for n in "${!archs[@]}" do mkdir -p "$LOCAL_OUTDIR/${archs[$n]}" make clean 2> /dev/null make distclean 2> /dev/null eval "setenv_${archs[$n]}" ./configure --host="${arch_names[$n]}" --enable-shared=no --disable-programs --without-zlib --without-libpng --without-jpeg --without-giflib --without-libtiff make -j12 cp -rvf src/.libs/lib*.a "$LOCAL_OUTDIR/${archs[$n]}" done create_outdir_lipo mkdir -p $GLOBAL_OUTDIR/include/leptonica && find ./ -name '*.h' -exec cp {} $GLOBAL_OUTDIR/include/leptonica/ \; mkdir -p $GLOBAL_OUTDIR/lib && cp -rvf $LOCAL_OUTDIR/lib*.a $GLOBAL_OUTDIR/lib cd .. ####################### # TESSERACT-OCR ####################### cd $TESSERACT_LIB rm -rf $LOCAL_OUTDIR for n in "${!archs[@]}" do mkdir -p "$LOCAL_OUTDIR/${archs[$n]}" make clean 2> /dev/null make distclean 2> /dev/null eval "setenv_${archs[$n]}" bash autogen.sh LIBLEPT_HEADERSDIR=$GLOBAL_OUTDIR/include ./configure --host="${arch_names[$n]}" --enable-shared=no --disable-graphics make -j12 for i in `find . -name "lib*.a" | grep -v $LOCAL_OUTDIR`; do cp -rvf $i "$LOCAL_OUTDIR/${archs[$n]}"; done merge_libfiles "$LOCAL_OUTDIR/${archs[$n]}" libtesseract_all.a done create_outdir_lipo mkdir -p $GLOBAL_OUTDIR/include/tesseract && find ./ -name '*.h' -exec cp {} $GLOBAL_OUTDIR/include/tesseract/ \; mkdir -p $GLOBAL_OUTDIR/lib && cp -rvf $LOCAL_OUTDIR/lib*.a $GLOBAL_OUTDIR/lib make clean 2> /dev/null make distclean 2> /dev/null cd .. ####################### # Copying ####################### cp -rf $GLOBAL_OUTDIR/include . mkdir -p lib cp -rf $GLOBAL_OUTDIR/lib/libtesseract_all.a $GLOBAL_OUTDIR/lib/liblept.a lib/ echo "Finished!"
-
- 给 build.sh 添加执行权限,并执行
chmod +x build.sh ./build.sh
注意:有可能会报这个错误:autoheader: error: AC_CONFIG_HEADERS not found in configure.ac
这时需要修改 tesseract-3.02.02/configure.ac 文件
#AM_INIT_AUTOMAKE(dist-zip) AM_INIT_AUTOMAKE #AM_CONFIG_HEADER(config_auto.h:config/config.h.in) //将这句注释 AC_CONFIG_HEADER(config_auto.h:config/config.h.in) //修改为这个
-
- 使用
- Tesseract.h
// // Tesseract.h // Tesseract // #import <Foundation/Foundation.h> @interface Tesseract : NSObject { NSString* _dataPath; NSString* _language; NSMutableDictionary* _variables; } + (NSString *)version; - (id)initWithDataPath:(NSString *)dataPath language:(NSString *)language; - (void)setVariableValue:(NSString *)value forKey:(NSString *)key; - (void)setImage:(UIImage *)image; - (BOOL)setLanguage:(NSString *)language; - (BOOL)recognize; - (NSString *)recognizedText; @end
- Tesseract.mm
// // Tesseract.mm // Tesseract // #import "Tesseract.h" #import "baseapi.h" #import "environ.h" #import "pix.h" namespace tesseract { class TessBaseAPI; }; @interface Tesseract () { tesseract::TessBaseAPI* _tesseract; uint32_t* _pixels; } @end @implementation Tesseract + (NSString *)version { return [NSString stringWithFormat:@"%s", tesseract::TessBaseAPI::Version()]; } - (id)initWithDataPath:(NSString *)dataPath language:(NSString *)language { self = [super init]; if (self) { _dataPath = dataPath; _language = language; _variables = [[NSMutableDictionary alloc] init]; [self copyDataToDocumentsDirectory]; _tesseract = new tesseract::TessBaseAPI(); BOOL success = [self initEngine]; if (!success) { return NO; } } return self; } - (BOOL)initEngine { int returnCode = _tesseract->Init([_dataPath UTF8String], [_language UTF8String]); return (returnCode == 0) ? YES : NO; } - (void)copyDataToDocumentsDirectory { // Useful paths NSFileManager *fileManager = [NSFileManager defaultManager]; NSArray *documentPaths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES); NSString *documentPath = ([documentPaths count] > 0) ? [documentPaths objectAtIndex:0] : nil; NSString *dataPath = [documentPath stringByAppendingPathComponent:_dataPath]; // Copy data in Doc Directory if (![fileManager fileExistsAtPath:dataPath]) { NSString *bundlePath = [[NSBundle bundleForClass:[self class]] bundlePath]; NSString *tessdataPath = [bundlePath stringByAppendingPathComponent:_dataPath]; if (tessdataPath) { [fileManager createDirectoryAtPath:documentPath withIntermediateDirectories:YES attributes:nil error:NULL]; [fileManager copyItemAtPath:tessdataPath toPath:dataPath error:nil]; } } setenv("TESSDATA_PREFIX", [[documentPath stringByAppendingString:@"/"] UTF8String], 1); } - (void)setVariableValue:(NSString *)value forKey:(NSString *)key { /* * Example: * _tesseract->SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); * _tesseract->SetVariable("language_model_penalty_non_freq_dict_word", "0"); * _tesseract->SetVariable("language_model_penalty_non_dict_word ", "0"); */ [_variables setValue:value forKey:key]; _tesseract->SetVariable([key UTF8String], [value UTF8String]); } - (void)loadVariables { for (NSString* key in _variables) { NSString* value = [_variables objectForKey:key]; _tesseract->SetVariable([key UTF8String], [value UTF8String]); } } - (BOOL)setLanguage:(NSString *)language { _language = language; int returnCode = [self initEngine]; if (returnCode != 0) return NO; /* * "WARNING: On changing languages, all Tesseract parameters * are reset back to their default values." */ [self loadVariables]; return YES; } - (BOOL)recognize { int returnCode = _tesseract->Recognize(NULL); return (returnCode == 0) ? YES : NO; } - (NSString *)recognizedText { char* utf8Text = _tesseract->GetUTF8Text(); return [NSString stringWithUTF8String:utf8Text]; } - (void)setImage:(UIImage *)image { free(_pixels); CGSize size = [image size]; int width = size.width; int height = size.height; if (width <= 0 || height <= 0) { return; } _pixels = (uint32_t *) malloc(width * height * sizeof(uint32_t)); // Clear the pixels so any transparency is preserved memset(_pixels, 0, width * height * sizeof(uint32_t)); CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); // Create a context with RGBA _pixels CGContextRef context = CGBitmapContextCreate(_pixels, width, height, 8, width * sizeof(uint32_t), colorSpace, kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedLast); // Paint the bitmap to our context which will fill in the _pixels array CGContextDrawImage(context, CGRectMake(0, 0, width, height), [image CGImage]); // We're done with the context and color space CGContextRelease(context); CGColorSpaceRelease(colorSpace); _tesseract->SetImage((const unsigned char *) _pixels, width, height, sizeof(uint32_t), width * sizeof(uint32_t)); } @end
Code Sample
#import "Tesseract.h"
Tesseract* tesseract = [[Tesseract alloc] initWithDataPath:@"tessdata" language:@"eng"];
[tesseract setVariableValue:@"0123456789" forKey:@"tessedit_char_whitelist"]; //白名单
[tesseract setImage:[UIImage imageNamed:@"image_sample.jpg"]]; //验证码图片
[tesseract recognize];
NSLog(@"%@", [tesseract recognizedText]);