tesseract-ocr 3.0.2 iPhone编译 xcode6 sdk8

时间:2022-04-22 08:58:46

编译的过程中需要一些额外的组件,有可能MAC下没有,我的电脑上是下面的这些需要安装。如有不同,可自行查找错误信息,安装缺失的工具。

MAC下安装autoconf和automake

  • 安装m4
  • curl -O http://mirrors.kernel.org/gnu/m4/m4-1.4.16.tar.gz
    tar -xzvf m4-1.4.16.tar.gz
    cd m4-1.4.16
    ./configure --prefix=/usr/local
    make
    sudo make install


  • 安装autoconf
curl -O http://mirrors.kernel.org/gnu/autoconf/autoconf-2.69.tar.gz
tar -xzvf autoconf-2.69.tar.gz
cd autoconf-2.69
./configure --prefix=/usr/local
make
sudo make install


  • 安装automake

  • curl -O http://mirrors.kernel.org/gnu/automake/automake-1.13.tar.gz
    tar -xzvf automake-1.13.tar.gz
    cd automake-1.13
    ./configure --prefix=/usr/local
    make
    sudo make install


  • 安装libtool
  • curl -O http://mirrors.kernel.org/gnu/libtool/libtool-2.4.tar.gz
    tar -xzvf libtool-2.4.tar.gz
    cd libtool-2.4
    ./configure --prefix=/usr/local
    make
    sudo make install


编译tesseract

在本地新建一个文件夹,如:build_tesseract将下面2个主要组件下载解压到此文件夹下.命令如下:

mkdir build_tesseract
cd build_tesseract



在此文件夹下进行下面的命令:

  • tesseract-ocr-3.02
curl -O http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz
tar -xzvf tesseract-ocr-3.02.02.tar.gz


  • leptonica-1.69
curl -O http://www.leptonica.com/source/leptonica-1.69.tar.gz
tar -xzvf leptonica-1.69.tar.gz


  • 创建编译脚本 build.sh
  • #!/bin/sh
    
    # Written by @kevincon, based on @williamsodell's modifications to @gali8's
    # original build_dependencies.sh script.
    
    # See README_howto_compile_libaries.md for full instructions.
    
    GLOBAL_OUTDIR="`pwd`/build"
    LOCAL_OUTDIR="./outdir"
    LEPTON_LIB="`pwd`/leptonica-1.71"
    TESSERACT_LIB="`pwd`/tesseract-3.03"
    
    IOS_BASE_SDK="7.0"
    IOS_DEPLOY_TGT="7.0"
    
    export CXX=`xcrun -find c++`
    export CC=`xcrun -find cc`
    
    export LD=`xcrun -find ld`
    export AR=`xcrun -find ar`
    export AS=`xcrun -find as`
    export NM=`xcrun -find nm`
    export RANLIB=`xcrun -find ranlib`
    
    XCODE_DEVELOPER_PATH=/Applications/Xcode.app/Contents/Developer
    XCODETOOLCHAIN_PATH=$XCODE_DEVELOPER_PATH/Toolchains/XcodeDefault.xctoolchain
    SDK_IPHONEOS_PATH=$(xcrun --sdk iphoneos --show-sdk-path)
    SDK_IPHONESIMULATOR_PATH=$(xcrun --sdk iphonesimulator --show-sdk-path)
    
    export PATH="$XCODETOOLCHAIN_PATH/usr/bin:$PATH"
    
    declare -a archs
    archs=(arm7 arm7s arm64 i386 x86_64)
    
    declare -a arch_name
    arch_names=(arm-apple-darwin7 arm-apple-darwin7s arm-apple-darwin64 i386-apple-darwin x86_64-apple-darwin)
    
    setenv_all() {
    # Add internal libs
    export CFLAGS="$CFLAGS -I$GLOBAL_OUTDIR/include -L$GLOBAL_OUTDIR/lib -Qunused-arguments"
    
    export LDFLAGS="-L$SDKROOT/usr/lib/"
    
    export CPPFLAGS=$CFLAGS
    export CXXFLAGS=$CFLAGS
    }
    
    setenv_arm7() {
    unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS
    
    export SDKROOT=$SDK_IPHONEOS_PATH
    
    export CFLAGS="-arch armv7 -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT -I$SDKROOT/usr/include/"
    
    setenv_all
    }
    
    setenv_arm7s() {
    unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS
    
    export SDKROOT=$SDK_IPHONEOS_PATH
    
    export CFLAGS="-arch armv7s -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT -I$SDKROOT/usr/include/"
    
    setenv_all
    }
    
    setenv_arm64() {
    unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS
    
    export SDKROOT=$SDK_IPHONEOS_PATH
    
    export CFLAGS="-arch arm64 -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT -I$SDKROOT/usr/include/"
    
    setenv_all
    }
    
    setenv_i386() {
    unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS
    
    export SDKROOT=$SDK_IPHONESIMULATOR_PATH
    
    export CFLAGS="-arch i386 -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT"
    
    setenv_all
    }
    
    setenv_x86_64() {
    unset DEVROOT SDKROOT CFLAGS CPP CXXCPP LDFLAGS CPPFLAGS CXXFLAGS
    
    export SDKROOT=$SDK_IPHONESIMULATOR_PATH
    
    export CFLAGS="-arch x86_64 -pipe -no-cpp-precomp -isysroot $SDKROOT -miphoneos-version-min=$IOS_DEPLOY_TGT"
    
    setenv_all
    }
    
    create_outdir_lipo() {
    for file in `find $LOCAL_OUTDIR/i386 -name "lib*.a"`; do
    lib_arm7=`echo $file | sed "s/i386/arm7/g"`
    lib_arm7s=`echo $file | sed "s/i386/arm7s/g"`
    lib_arm64=`echo $file | sed "s/i386/arm64/g"`
    lib_x86_64=`echo $file | sed "s/i386/x86_64/g"`
    lib_i386=`echo $file`
    lib=`echo $file | sed "s/i386//g"`
    xcrun -sdk iphoneos lipo -arch armv7s $lib_arm7s -arch armv7 $lib_arm7 -arch arm64 $lib_arm64 -arch i386 $lib_i386 -arch x86_64 $lib_x86_64 -create -output $lib
    done
    }
    
    merge_libfiles() {
    DIR=$1
    LIBNAME=$2
    
    cd $DIR
    for i in `find . -name "lib*.a"`; do
    $AR -x $i
    done
    $AR -r $LIBNAME *.o
    rm -rf *.o __*
    cd -
    }
    
    #######################
    # Start clean
    #######################
    
    rm -rf $GLOBAL_OUTDIR lib include
    
    #######################
    # LEPTONLIB
    #######################
    cd $LEPTON_LIB
    rm -rf $LOCAL_OUTDIR
    
    for n in "${!archs[@]}"
    do
    mkdir -p "$LOCAL_OUTDIR/${archs[$n]}"
    make clean 2> /dev/null
    make distclean 2> /dev/null
    eval "setenv_${archs[$n]}"
    ./configure --host="${arch_names[$n]}" --enable-shared=no --disable-programs --without-zlib --without-libpng --without-jpeg --without-giflib --without-libtiff
    make -j12
    cp -rvf src/.libs/lib*.a "$LOCAL_OUTDIR/${archs[$n]}"
    done
    
    create_outdir_lipo
    mkdir -p $GLOBAL_OUTDIR/include/leptonica && find ./ -name '*.h' -exec cp {} $GLOBAL_OUTDIR/include/leptonica/  \;
    mkdir -p $GLOBAL_OUTDIR/lib && cp -rvf $LOCAL_OUTDIR/lib*.a $GLOBAL_OUTDIR/lib
    cd ..
    
    
    #######################
    # TESSERACT-OCR
    #######################
    cd $TESSERACT_LIB
    rm -rf $LOCAL_OUTDIR
    
    for n in "${!archs[@]}"
    do
    mkdir -p "$LOCAL_OUTDIR/${archs[$n]}"
    make clean 2> /dev/null
    make distclean 2> /dev/null
    eval "setenv_${archs[$n]}"
    bash autogen.sh
    LIBLEPT_HEADERSDIR=$GLOBAL_OUTDIR/include ./configure --host="${arch_names[$n]}" --enable-shared=no --disable-graphics
    make -j12
    for i in `find . -name "lib*.a" | grep -v $LOCAL_OUTDIR`; do cp -rvf $i "$LOCAL_OUTDIR/${archs[$n]}"; done
    merge_libfiles "$LOCAL_OUTDIR/${archs[$n]}" libtesseract_all.a
    done
    
    create_outdir_lipo
    
    mkdir -p $GLOBAL_OUTDIR/include/tesseract && find ./ -name '*.h' -exec cp {} $GLOBAL_OUTDIR/include/tesseract/  \;
    mkdir -p $GLOBAL_OUTDIR/lib && cp -rvf $LOCAL_OUTDIR/lib*.a $GLOBAL_OUTDIR/lib
    make clean 2> /dev/null
    make distclean 2> /dev/null
    cd ..
    
    #######################
    # Copying
    #######################
    
    cp -rf $GLOBAL_OUTDIR/include .
    mkdir -p lib
    cp -rf $GLOBAL_OUTDIR/lib/libtesseract_all.a $GLOBAL_OUTDIR/lib/liblept.a lib/
    
    echo "Finished!"
      • 给 build.sh 添加执行权限,并执行

     

    chmod +x build.sh
    ./build.sh

    注意:有可能会报这个错误:
    autoheader: error: AC_CONFIG_HEADERS not found in configure.ac

    这时需要修改 tesseract-3.02.02/configure.ac 文件

    #AM_INIT_AUTOMAKE(dist-zip)
     AM_INIT_AUTOMAKE
    #AM_CONFIG_HEADER(config_auto.h:config/config.h.in)  //将这句注释
    AC_CONFIG_HEADER(config_auto.h:config/config.h.in) //修改为这个
    


  • 使用

  • Tesseract.h
//
//  Tesseract.h
//  Tesseract
//
#import <Foundation/Foundation.h>

@interface Tesseract : NSObject {
    NSString* _dataPath;
    NSString* _language;
    NSMutableDictionary* _variables;
}

+ (NSString *)version;

- (id)initWithDataPath:(NSString *)dataPath language:(NSString *)language;
- (void)setVariableValue:(NSString *)value forKey:(NSString *)key;
- (void)setImage:(UIImage *)image;
- (BOOL)setLanguage:(NSString *)language;
- (BOOL)recognize;
- (NSString *)recognizedText;

@end
  • Tesseract.mm
//
//  Tesseract.mm
//  Tesseract
//

#import "Tesseract.h"

#import "baseapi.h"
#import "environ.h"
#import "pix.h"

namespace tesseract {
    class TessBaseAPI;
};

@interface Tesseract () {
    tesseract::TessBaseAPI* _tesseract;
    uint32_t* _pixels;
}

@end

@implementation Tesseract

+ (NSString *)version {
    return [NSString stringWithFormat:@"%s", tesseract::TessBaseAPI::Version()];
}

- (id)initWithDataPath:(NSString *)dataPath language:(NSString *)language {
    self = [super init];
    if (self) {
        _dataPath = dataPath;
        _language = language;
        _variables = [[NSMutableDictionary alloc] init];

        [self copyDataToDocumentsDirectory];
        _tesseract = new tesseract::TessBaseAPI();

        BOOL success = [self initEngine];
        if (!success) {
            return NO;
        }
    }
    return self;
}

- (BOOL)initEngine {
    int returnCode = _tesseract->Init([_dataPath UTF8String], [_language UTF8String]);
    return (returnCode == 0) ? YES : NO;
}

- (void)copyDataToDocumentsDirectory {

    // Useful paths
    NSFileManager *fileManager = [NSFileManager defaultManager];
    NSArray *documentPaths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);
    NSString *documentPath = ([documentPaths count] > 0) ? [documentPaths objectAtIndex:0] : nil;
    NSString *dataPath = [documentPath stringByAppendingPathComponent:_dataPath];

    // Copy data in Doc Directory
    if (![fileManager fileExistsAtPath:dataPath]) {
        NSString *bundlePath = [[NSBundle bundleForClass:[self class]] bundlePath];
        NSString *tessdataPath = [bundlePath stringByAppendingPathComponent:_dataPath];
        if (tessdataPath) {
            [fileManager createDirectoryAtPath:documentPath withIntermediateDirectories:YES attributes:nil error:NULL];
            [fileManager copyItemAtPath:tessdataPath toPath:dataPath error:nil];
        }
    }

    setenv("TESSDATA_PREFIX", [[documentPath stringByAppendingString:@"/"] UTF8String], 1);
}

- (void)setVariableValue:(NSString *)value forKey:(NSString *)key {
    /*
     * Example:
     * _tesseract->SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
     * _tesseract->SetVariable("language_model_penalty_non_freq_dict_word", "0");
     * _tesseract->SetVariable("language_model_penalty_non_dict_word ", "0");
     */

    [_variables setValue:value forKey:key];
    _tesseract->SetVariable([key UTF8String], [value UTF8String]);
}

- (void)loadVariables {
    for (NSString* key in _variables) {
        NSString* value = [_variables objectForKey:key];
        _tesseract->SetVariable([key UTF8String], [value UTF8String]);
    }
}

- (BOOL)setLanguage:(NSString *)language {
    _language = language;
    int returnCode = [self initEngine];
    if (returnCode != 0) return NO;

    /*
     * "WARNING: On changing languages, all Tesseract parameters
     * are reset back to their default values."
     */
    [self loadVariables];
    return YES;
}

- (BOOL)recognize {
    int returnCode = _tesseract->Recognize(NULL);
    return (returnCode == 0) ? YES : NO;
}

- (NSString *)recognizedText {
    char* utf8Text = _tesseract->GetUTF8Text();
    return [NSString stringWithUTF8String:utf8Text];
}

- (void)setImage:(UIImage *)image
{
    free(_pixels);

    CGSize size = [image size];
    int width = size.width;
    int height = size.height;

  if (width <= 0 || height <= 0) {
      return;
    }

    _pixels = (uint32_t *) malloc(width * height * sizeof(uint32_t));

    // Clear the pixels so any transparency is preserved
    memset(_pixels, 0, width * height * sizeof(uint32_t));

    CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();

    // Create a context with RGBA _pixels
    CGContextRef context = CGBitmapContextCreate(_pixels, width, height, 8, width * sizeof(uint32_t), colorSpace,
                                                 kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedLast);

    // Paint the bitmap to our context which will fill in the _pixels array
    CGContextDrawImage(context, CGRectMake(0, 0, width, height), [image CGImage]);

  // We're done with the context and color space
    CGContextRelease(context);
    CGColorSpaceRelease(colorSpace);

    _tesseract->SetImage((const unsigned char *) _pixels, width, height, sizeof(uint32_t), width * sizeof(uint32_t));
}

@end



Code Sample

#import "Tesseract.h"

Tesseract* tesseract = [[Tesseract alloc] initWithDataPath:@"tessdata" language:@"eng"];
[tesseract setVariableValue:@"0123456789" forKey:@"tessedit_char_whitelist"]; //白名单
[tesseract setImage:[UIImage imageNamed:@"image_sample.jpg"]];  //验证码图片
[tesseract recognize];

NSLog(@"%@", [tesseract recognizedText]);