python实现的ocr接口

时间:2024-03-12 07:58:39

太累了,,有时间再补解释

import pytesseract
import requests
from PIL import Image
from PIL import ImageFilter
from StringIO import StringIO
from werkzeug.utils import secure_filename
from gevent import monkey
from gevent.pywsgi import WSGIServer
monkey.patch_all()
from flask import Flask,render_template,jsonify,request,send_from_directory
import time
import os
import base64
import random


app = Flask(__name__)
UPLOAD_FOLDER=\'upload\'
app.config[\'UPLOAD_FOLDER\'] = UPLOAD_FOLDER
basedir = os.path.abspath(os.path.dirname(__file__))
ALLOWED_EXTENSIONS = set([\'png\',\'jpg\',\'JPG\',\'PNG\'])

def allowed_file(filename):
    return \'.\' in filename and filename.rsplit(\'.\',1)[1] in ALLOWED_EXTENSIONS

@app.route(\'/\',methods=[\'GET\'],strict_slashes=False)
def indexpage():
    return render_template(\'index.html\')

@app.route(\'/\',methods=[\'POST\'],strict_slashes=False)
def api_upload():
    log = open("error.log","w+")
    file_dir = os.path.join(basedir, app.config[\'UPLOAD_FOLDER\'])
    if not os.path.exists(file_dir):
        os.makedirs(file_dir)
    print request.headers
    print >> log, request.headers
    f = request.files[\'file\']
    postLang  = request.form.get("lang", type=str) 

    log.close()

    if f and allowed_file(f.filename):
        fname = secure_filename(f.filename)
        ext = fname.rsplit(\'.\',1)[1]
        unix_time = int(time.time())
        new_filename = str( random.randrange(0, 10001, 2))+str(unix_time)+\'.\'+ext
        f.save(os.path.join(file_dir,new_filename))
        if cmp(postLang, "chi_sim"):
            strboxs = pytesseract.image_to_boxes(Image.open("/var/OCRhtml/upload/" + new_filename), lang="chi_sim")
            strdata = pytesseract.image_to_string(Image.open("/var/OCRhtml/upload/" + new_filename), lang="chi_sim")
            print "Chinese"
        else:
            strboxs = pytesseract.image_to_boxes(Image.open("/var/OCRhtml/upload/"+new_filename))
            strdata = pytesseract.image_to_string(Image.open("/var/OCRhtml/upload/"+new_filename))
        return jsonify({"errno":0, "msg":"succeed ","data":strdata,"info":strboxs})
    else:
        return jsonify({"errno":1001, "errmsg":u"failed"})

if __name__ == \'__main__\':
    http_server = WSGIServer((\'\', 80), app)
    http_server.serve_forever()