Python3实现的简单验证码识别功能示例

时间:2022-10-10 18:01:05

本文实例讲述了Python3实现的简单验证码识别功能。分享给大家供大家参考,具体如下:

这次的需求是自动登录某机构网站, 其验证码很具特色, 很适合做验证码识别入门demo, 先贴主要代码, 其中图片对比使用了编辑距离算法, 脚本使用了pillow库

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from PIL import Image
import requests
import re
splitter = re.compile(r'\d{30}') # 分割二值化后的图片
# distance('11110000', '00000000')
# 比较两个字符串有多少位不同, 返回不同的位数
def distance(string1, string2):
  d_str1 = len(string1)
  d_str2 = len(string2)
  d_arr = [[0] * d_str2 for i in range(d_str1)]
  for i in range(d_str1):
    for j in range(d_str2):
      if string1[i] == string2[j]:
        if i == 0 and j == 0:
          d_arr[i][j] = 0
        elif i != 0 and j == 0:
          d_arr[i][j] = d_arr[i - 1][j]
        elif i == 0 and j != 0:
          d_arr[i][j] = d_arr[i][j - 1]
        else:
          d_arr[i][j] = d_arr[i - 1][j - 1]
      else:
        if i == 0 and j == 0:
          d_arr[i][j] = 1
        elif i != 0 and j == 0:
          d_arr[i][j] = d_arr[i - 1][j] + 1
        elif i == 0 and j != 0:
          d_arr[i][j] = d_arr[i][j - 1] + 1
        else:
          d_arr[i][j] = min(d_arr[i][j - 1], d_arr[i - 1][j], d_arr[i - 1][j - 1]) + 1
  current = max(d_arr[d_str1 - 1][d_str2 - 1], abs(d_str2 - d_str1))
  # print("Levenshtein Distance is",current)
  # print(current)
  return current
# 去除字符串里面连续的1
def no_one(string):
  n_arr = splitter.findall(string)
  n_arr = filter(lambda each_str: each_str != '111111111111111111111111111111', n_arr)
  n_result = ''
  for n_each in n_arr:
    n_result += str(n_each)
  return n_result
opener = requests.session()
res = opener.get('http://60.211.254.236:8402/Ajax/ValidCodeImg.ashx').content
with open('verify.gif', 'wb') as v:
  v.write(res)
img = Image.open('verify.gif')
img = img.convert('L')
size = img.size
# img = img.point(table, '1')
img_arr = img.load()
# for x in range(size[0]):
#   for y in range(size[1]):
#     if img_arr[x, y] > 210:
#       img_arr[x, y] = 1
#     else:
#       img_arr[x, y] = 0
# img.save('after.gif')
inc = 0
str1 = ''
str2 = ''
str3 = ''
cur_str = ''
for x in range(size[0]):
  for y in range(size[1]):
    if img_arr[x, y] > 210:
      cur_str += '1'
    else:
      cur_str += '0'
    # print(img_arr[i, j], end='')
    # cur_str += str(img_arr[x, y])
  inc += 1
  # if inc % 18 == 0:
  #   print('\n----')
  # else:
  #   print('')
  if inc == 18:
    str1 = cur_str
    cur_str = ''
  elif inc == 36:
    str2 = cur_str
    cur_str = ''
  elif inc == 54:
    str3 = cur_str
    cur_str = ''
str1 = str1[:-60]
str2 = str2[:-60]
str3 = str3[:-60]
str1 = no_one(str1)
str2 = no_one(str2)
str3 = no_one(str3)
str1 = str1.strip('1')
str2 = str2.strip('1')
str3 = str3.strip('1')
# print(str1)
# print(str3)
with open('./dict/plus') as plus:
  with open('./dict/minus') as minus:
    p = plus.read()
    m = minus.read()
    is_add = 1 if distance(p, str2) < distance(m, str2) else 0
arr1 = []
arr3 = []
for each in range(1, 10):
  with open('./dict/{}'.format(each)) as f:
    ff = f.read()
    arr1.append([each, distance(ff, str1)])
    arr3.append([each, distance(ff, str3)])
arr1 = sorted(arr1, key=lambda item: item[1])
arr3 = sorted(arr3, key=lambda item: item[1])
result = arr1[0][0] + arr3[0][0] if is_add else arr1[0][0] - arr3[0][0]
print(result)
# login_url = 'http://60.211.254.236:8402/Ajax/Login.ashx?Method=G3_Login'
# login_data = {
#   'loginname': usn,
#   'password': pwd,
#   'validcode': result,
#
# }
# opener.get(login_url, login_data)

字库已经部署到GitHub地址:https://github.com/hldh214/validCode/

希望本文所述对大家Python程序设计有所帮助。

原文链接:https://blog.csdn.net/hldh214/article/details/51364933