一.官方文档
https://pypi.org/project/muggle-ocr/
二模块安装
1
2
|
pip install muggle - ocr
# 因模块过新,阿里/清华等第三方源可能尚未更新镜像,因此手动指定使用*源,为了提高依赖的安装速度,可预先自行安装依赖:tensorflow/numpy/opencv-python/pillow/pyyaml
|
三.使用代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
# 导入包
import muggle_ocr
# 初始化;model_type 包含了 ModelType.OCR/ModelType.Captcha 两种
sdk = muggle_ocr.SDK(model_type = muggle_ocr.ModelType.OCR)
# ModelType.OCR 可识别光学印刷文本 这里个人觉得应该是官方文档写错了 官方文档是ModelType.Captcha 可识别光学印刷文本
with open (r "test1.png" , "rb" ) as f:
b = f.read()
text = sdk.predict(image_bytes = b)
print (text)
# ModelType.Captcha 可识别4-6位验证码
sdk = muggle_ocr.SDK(model_type = muggle_ocr.ModelType.Captcha)
with open (r "test1.png" , "rb" ) as f:
b = f.read()
text = sdk.predict(image_bytes = b)
print (text)
|
PS:下面看下 Python 实现全自动登录(真正的全自动,自动识别验证码)
你没有看错,全自动验证~~~
黑科技?还是黑代码?
我感觉这个看在你用啥,对不对?反正我用来(* * * * ) 你懂得
好了,先说一下用到的东西
- selenium (本意是用来全自动测试)
- Phantomjs (一种没有界面的浏览器)
- ** 验证码识别器(一块钱可用100次的这种)
关门放代码
1
2
3
4
5
6
7
8
9
10
11
12
13
|
from selenium import webdriver
from PIL import Image
if __name__ = = '__main__' :
wbe = webdriver.PhantomJS()
wbe.get( "https://www.某个网站的登录页面.com/login/index.html" ) / / 你可以拿知乎,百度,等等测试
element = wbe.find_element_by_xpath( '//*[@id="entry_name"]/p[3]/img' ) / / 验证码所在的xpath路径
left = element.location[ 'x' ]
top = element.location[ 'y' ]
right = element.location[ 'x' ] + element.size[ 'width' ]
bottom = element.location[ 'y' ] + element.size[ 'height' ]
im = Image. open (r '登录页.png' ) / / 全页面截屏
im = im.crop((left, top, right, bottom))
im.save( '验证码.png' )
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
#!/usr/bin/env python
# coding:utf-8
import requests
from hashlib import md5
class RClient( object ):
def __init__( self , username, password, soft_id, soft_key):
self .username = username
self .password = md5(password).hexdigest()
self .soft_id = soft_id
self .soft_key = soft_key
self .base_params = {
'username' : self .username,
'password' : self .password,
'softid' : self .soft_id,
'softkey' : self .soft_key,
}
self .headers = {
'Connection' : 'Keep-Alive' ,
'Expect' : '100-continue' ,
'User-Agent' : 'ben' ,
}
def rk_create( self , im, im_type, timeout = 60 ):
"""
im: 图片字节
im_type: 题目类型
"""
params = {
'typeid' : im_type,
'timeout' : timeout,
}
params.update( self .base_params)
files = { 'image' : ( 'a.png' , im)}
r = requests.post( 'http://api.ruokuai.com/create.json' , data = params, files = files, headers = self .headers)
return r.json()
def rk_report_error( self , im_id):
"""
im_id:报错题目的ID
"""
params = {
'id' : im_id,
}
params.update( self .base_params)
r = requests.post( 'http://api.ruokuai.com/reporterror.json' , data = params, headers = self .headers)
return r.json()
def get_code():
rc = RClient( '用户名' , '密码' , '94522' , '62c235939b7240879453f31603733fd6' ) / / 想拿下测试的留言我,教你拿到测试账号
im = open ( 'a.png' , 'rb' ).read()
print rc.rk_create(im, 3040 )
|
完整代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
#!/usr/bin/env python
# coding:utf-8
from selenium import webdriver
from PIL import Image
import requests
from hashlib import md5
import time
class RClient( object ):
def __init__( self , username, password, soft_id, soft_key):
self .username = username
self .password = md5(password.encode( "utf-8" )).hexdigest()
self .soft_id = soft_id
self .soft_key = soft_key
self .base_params = {
'username' : self .username,
'password' : self .password,
'softid' : self .soft_id,
'softkey' : self .soft_key,
}
self .headers = {
'Connection' : 'Keep-Alive' ,
'Expect' : '100-continue' ,
'User-Agent' : 'ben' ,
}
def rk_create( self , im, im_type, timeout = 60 ):
"""
im: 图片字节
im_type: 题目类型
"""
params = {
'typeid' : im_type,
'timeout' : timeout,
}
params.update( self .base_params)
files = { 'image' : ( 'a.png' , im)}
r = requests.post( 'http://api.ruokuai.com/create.json' , data = params, files = files, headers = self .headers)
return r.json()
def rk_report_error( self , im_id):
"""
im_id:报错题目的ID
"""
params = {
'id' : im_id,
}
params.update( self .base_params)
r = requests.post( 'http://api.ruokuai.com/reporterror.json' , data = params, headers = self .headers)
return r.json()
def get_code(im_file):
rc = RClient( '账号' , '密码' , '94522' , '62c235939b7240879453f31603733fd6' )
im_source = open (im_file, "rb" ).read()
print (rc.rk_create(im_source, 3040 ))
if __name__ = = '__main__' :
wbe = webdriver.PhantomJS()
wbe.get( "https://www.dajiang365.com/login/index.html" )
time.sleep( 2 )
wbe.save_screenshot( "das.png" )
element = wbe.find_element_by_xpath( '//*[@id="entry_name"]/p[3]/img' )
left = element.location[ 'x' ]
top = element.location[ 'y' ]
right = element.location[ 'x' ] + element.size[ 'width' ]
bottom = element.location[ 'y' ] + element.size[ 'height' ]
im = Image. open (r 'das.png' )
im = im.crop((left, top, right, bottom))
im.save( 'a.png' )
time.sleep( 2 )
get_code( "a.png" )
|
总结
到此这篇关于python图片验证码识别最新模块muggle_ocr的示例代码的文章就介绍到这了,更多相关python 验证码识别模块muggle_ocr内容请搜索服务器之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持服务器之家!
原文链接:https://www.cnblogs.com/pythonywy/archive/2020/07/02/13226330.html