本文实例讲述了Python3实现的简单验证码识别功能。分享给大家供大家参考,具体如下:
这次的需求是自动登录某机构网站, 其验证码很具特色, 很适合做验证码识别入门demo, 先贴主要代码, 其中图片对比使用了编辑距离算法, 脚本使用了pillow库
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
from PIL import Image
import requests
import re
splitter = re. compile (r '\d{30}' ) # 分割二值化后的图片
# distance('11110000', '00000000')
# 比较两个字符串有多少位不同, 返回不同的位数
def distance(string1, string2):
d_str1 = len (string1)
d_str2 = len (string2)
d_arr = [[ 0 ] * d_str2 for i in range (d_str1)]
for i in range (d_str1):
for j in range (d_str2):
if string1[i] = = string2[j]:
if i = = 0 and j = = 0 :
d_arr[i][j] = 0
elif i ! = 0 and j = = 0 :
d_arr[i][j] = d_arr[i - 1 ][j]
elif i = = 0 and j ! = 0 :
d_arr[i][j] = d_arr[i][j - 1 ]
else :
d_arr[i][j] = d_arr[i - 1 ][j - 1 ]
else :
if i = = 0 and j = = 0 :
d_arr[i][j] = 1
elif i ! = 0 and j = = 0 :
d_arr[i][j] = d_arr[i - 1 ][j] + 1
elif i = = 0 and j ! = 0 :
d_arr[i][j] = d_arr[i][j - 1 ] + 1
else :
d_arr[i][j] = min (d_arr[i][j - 1 ], d_arr[i - 1 ][j], d_arr[i - 1 ][j - 1 ]) + 1
current = max (d_arr[d_str1 - 1 ][d_str2 - 1 ], abs (d_str2 - d_str1))
# print("Levenshtein Distance is",current)
# print(current)
return current
# 去除字符串里面连续的1
def no_one(string):
n_arr = splitter.findall(string)
n_arr = filter ( lambda each_str: each_str ! = '111111111111111111111111111111' , n_arr)
n_result = ''
for n_each in n_arr:
n_result + = str (n_each)
return n_result
opener = requests.session()
res = opener.get( 'http://60.211.254.236:8402/Ajax/ValidCodeImg.ashx' ).content
with open ( 'verify.gif' , 'wb' ) as v:
v.write(res)
img = Image. open ( 'verify.gif' )
img = img.convert( 'L' )
size = img.size
# img = img.point(table, '1')
img_arr = img.load()
# for x in range(size[0]):
# for y in range(size[1]):
# if img_arr[x, y] > 210:
# img_arr[x, y] = 1
# else:
# img_arr[x, y] = 0
# img.save('after.gif')
inc = 0
str1 = ''
str2 = ''
str3 = ''
cur_str = ''
for x in range (size[ 0 ]):
for y in range (size[ 1 ]):
if img_arr[x, y] > 210 :
cur_str + = '1'
else :
cur_str + = '0'
# print(img_arr[i, j], end='')
# cur_str += str(img_arr[x, y])
inc + = 1
# if inc % 18 == 0:
# print('\n----')
# else:
# print('')
if inc = = 18 :
str1 = cur_str
cur_str = ''
elif inc = = 36 :
str2 = cur_str
cur_str = ''
elif inc = = 54 :
str3 = cur_str
cur_str = ''
str1 = str1[: - 60 ]
str2 = str2[: - 60 ]
str3 = str3[: - 60 ]
str1 = no_one(str1)
str2 = no_one(str2)
str3 = no_one(str3)
str1 = str1.strip( '1' )
str2 = str2.strip( '1' )
str3 = str3.strip( '1' )
# print(str1)
# print(str3)
with open ( './dict/plus' ) as plus:
with open ( './dict/minus' ) as minus:
p = plus.read()
m = minus.read()
is_add = 1 if distance(p, str2) < distance(m, str2) else 0
arr1 = []
arr3 = []
for each in range ( 1 , 10 ):
with open ( './dict/{}' . format (each)) as f:
ff = f.read()
arr1.append([each, distance(ff, str1)])
arr3.append([each, distance(ff, str3)])
arr1 = sorted (arr1, key = lambda item: item[ 1 ])
arr3 = sorted (arr3, key = lambda item: item[ 1 ])
result = arr1[ 0 ][ 0 ] + arr3[ 0 ][ 0 ] if is_add else arr1[ 0 ][ 0 ] - arr3[ 0 ][ 0 ]
print (result)
# login_url = 'http://60.211.254.236:8402/Ajax/Login.ashx?Method=G3_Login'
# login_data = {
# 'loginname': usn,
# 'password': pwd,
# 'validcode': result,
#
# }
# opener.get(login_url, login_data)
|
字库已经部署到GitHub地址:https://github.com/hldh214/validCode/
希望本文所述对大家Python程序设计有所帮助。
原文链接:https://blog.csdn.net/hldh214/article/details/51364933