利用svm向量机进行4位数字验证码识别
主要是思路和步骤如下:
一,素材收集
检查环境是否包含有相应的库:
1.在cmd中,通过 pip list
命令查看安装的库
2.再使用pip installrequests
安装requests库
3.再次使用pip list 命令
4.利用python获取验证码资源
编写代码:_downloadpic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
#!/usr/bin/nev python3
#利用python从站点下载验证码图片
import requests
## 1.在 http://www.xxx.com
# 获取验证码url
def downloads_pic(strpath, strname):
#设置url
url = 'http://www.xxx.com'
#以二进制方式发送get请求,
#将stream = true,
#数据读取完成前不要断开链接
rreq = requests.get(url, stream = true)
#尝试保存图片
with open (strpath + strname + '.png' , 'wb' ) as fppic:
#循环读取1024byte到bychunk中,读完则跳出
for bychunk in rreq.iter_content(chunk_size = 1024 ):
if bychunk:
fppic.write(bychunk)
fppic.flush()
fppic.close()
for i in range ( 1 , 10 + 1 ):
strfilename = "%03d" % i
downloads_pic( 'd:/1/' , strfilename)
|
二,素材处理
1.二值化处理,增加对比度,锐化,增加亮度,滤镜,转为黑白
2.去除噪点
3.切割图片
编写代码:_picdealwith.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
#!/usr/bin/env python3
import os
import os.path
from pil import image, imageenhance, imagefilter
import random
#二值化处理
#strimgpath 图片路径
def binaryzationimg(strimgpath):
#打开图片
imgoriimg = image. open (strimgpath)
#增加对比度
pocenhance = imageenhance.contrast(imgoriimg)
#增加255%对比度
imgoriimg = pocenhance.enhance( 2.55 )
#锐化
pocenhance = imageenhance.sharpness(imgoriimg)
#锐化200%
imgoriimg = pocenhance.enhance( 2.0 )
#增加亮度
pocenhance = imageenhance.brightness(imgoriimg)
#增加200%
imgoriimg = pocenhance.enhance( 2.0 )
#添加滤镜效果
imggryimg = imgoriimg.convert( 'l' ). filter (imagefilter.detail)
#二值化处理
imgbinimg = imggryimg.convert( '1' )
return imgbinimg
#去除噪点
def clearnoise(imgbinimg):
for x in range ( 1 , (imgbinimg.size[ 0 ] - 1 )):
for y in range ( 1 ,(imgbinimg.size[ 1 ] - 1 )):
#一个点为黑色,周围8个点为白色,则此点为噪点,设置为白色
if imgbinimg.getpixel((x, y)) = = 0 \
and imgbinimg.getpixel(((x - 1 ), (y + 1 ))) = = 255 \
and imgbinimg.getpixel(((x - 1 ), y)) = = 255 \
and imgbinimg.getpixel(((x - 1 ), (y - 1 ))) = = 255 \
and imgbinimg.getpixel(((x + 1 ), (y + 1 ))) = = 255 \
and imgbinimg.getpixel(((x + 1 ), y)) = = 255 \
and imgbinimg.getpixel(((x + 1 ), (y - 1 ))) = = 255 \
and imgbinimg.getpixel((x, (y + 1 ))) = = 255 \
and imgbinimg.getpixel((x, (y - 1 ))) = = 255 :
imgbinimg.putpixel([x, y], 255 )
return imgbinimg
#切割图片
def getcropimgs(imgclrimg):
imglist = []
for i in range ( 4 ):
x = 6 + i * 13
y = 3
subimg = imgclrimg.crop((x, y, x + 13 , y + 15 ))
imglist.append(subimg)
return imglist
#调用部分
def main():
g_count = 0
strstep1dir = 'd:/1/step1/'
strstep2dir = 'd:/1/step2/'
for parentpath, dirname, filenames in os.walk(strstep1dir):
for i in filenames:
#图片文件路径信息
strfullpath = os.path.join(parentpath, i)
imgbinimg = binaryzationimg(strfullpath)
imgclrimg = clearnoise(imgbinimg)
imglist = getcropimgs(imgclrimg)
for img in imglist:
strimgname = "%04d%04d.png" % (g_count, random.randint( 0 , 9999 ))
strimgpath = os.path.join(strstep2dir, strimgname)
img.save(strimgpath)
g_count + = 1
print ( "ok!" )
if __name__ = = '__mian__' :
main()
|
三,手工分类
将第二步切割好的图片进行分类,体力活
四,利用svm向量机建立模型
1.安装svm库
下载libsvm库,并解压
将库中的windows目录的路径添加到path环境变量中
将libsvm下的python文件夹中的svm.py和svmutil.py文件拷贝到你的python的路径中lib文件夹中
1
|
from svmutil import *
|
2.生成模型文件
2.1.将分好类的图片信息进行提取,生成特征值
2.2.输出向量数据
2.3.根据数据输出svm模型文件
编写代码:_svmdemo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
#!/usr/bin/env python3
#svm,验证码识别
import os
import sys
import random
import os.path
from pil import image, imageenhance, imagefilter
from svmutil import *
##记录像素点的值,描述特征,采用遍历每个像素点统计黑色点的数量
def getfeature(imgcropimg, nimgheight, nimgwidth):
pixelcountlist = []
for y in range (nimgheight):
countx = 0
for x in range (nimgwidth):
if imgcropimg.getpixel((x, y)) = = 0 :
countx + = 1
pixelcountlist.append(countx)
for x in range (nimgwidth):
county = 0
for y in range (nimgheight):
if imgcropimg.getpixel((x, y)) = = 0 :
county + = 1
pixelcountlist.append(county)
return pixelcountlist
##输出向量数据
def outputvectordata(strid, strmaterialdir, stroutpath):
for parentpath, dirnames, filenames in os.walk(strmaterialdir):
with open (stroutpath, 'a' ) as fpfea:
for fp in filenames:
#图片文件路径信息
strfullpath = os.path.join(parentpath, fp)
#打开图片
imgoriimg = image. open (strfullpath)
#生成特征值
featurelist = getfeature(imgoriimg, 15 , 13 )
strfeature = strid + ' '
ncount = 1
for i in featurelist:
strfeature = '%s%d:%d ' % (strfeature, ncount, i)
ncount + = 1
fpfea.write(strfeature + '\n' )
fpfea.flush()
fpfea.close()
#训练svm模型
def trainsvmmodel(strproblempath, strmodelpath):
y, x = svm_read_problem(strproblempath)
model = svm_train(y, x)
svm_save_model(strmodelpath, model)
#svm模型测试
def svmmodeltest(strproblempath, strmodelpath):
testy, testx = svm_read_problem(strproblempath)
model = svm_load_model(strmodelpath)
#返回识别结果
plabel, pacc, pval = svm_predict(testy, testx, model)
return plabel
##输出测试向量数据
def outputtestvectordata(strid, strdir, stroutpath):
filelist = []
for parentpath, strdir, filename in os.walk(strdir):
filelist = filename
with open (stroutpath, 'a' ) as fpfea:
for fp in filelist:
#图片文件路径信息
strfullpath = os.path.join(parentpath, fp)
#打开图片
imgoriimg = image. open (strfullpath)
#生成特征值
featurelist = getfeature(imgoriimg, 15 , 13 )
strfeature = strid + ' '
ncount = 1
for i in featurelist:
strfeature = '%s%d:%d ' % (strfeature, ncount, i)
ncount + = 1
fpfea.write(strfeature + '\n' )
fpfea.flush()
fpfea.close()
def main():
# 1.循环输出向量文件
for i in range ( 0 , 10 ):
strid = '%d' % i
outputvectordata(strid, 'd:/1/step3/' + strid, 'd:/1/step4/vector.txt' )
# 2.调用函数训练svm模型
trainsvmmodel( 'd:/1/step4/vector.txt' , 'd:/1/step5/model.txt' )
# 3.调用函数识别结果
plabel = svmmodeltest( 'd:/1/step6/vector.txt' , 'd:/1/step5/model.txt' )
for i in plabel:
print ( '%d' % i)
if __name__ = = '__main__' :
main()
|
五,测试
1.利用模型文件和向量文件进行测试验证码识别
##1.获取一张验证码图片
##2.对图片进行处理
## 2.1.二值化处理,增加对比度,锐化,增加亮度,滤镜,转为黑白,
## 2.2.去除噪点
## 2.3.切割图片
##3.生成向量文件
##4.再利用之前的模型文件进行识别测试
编写代码:_svmtest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
|
#!/usr/bin/env python3
#对一张验证码图片进行识别测试
##1.获取一张验证码图片
##2.对图片进行处理
## 2.1.二值化处理,增加对比度,锐化,增加亮度,滤镜,转为黑白,
## 2.2.去除噪点
## 2.3.切割图片
##3.生成向量文件
##4.再利用之前的模型文件进行识别测试
################
import _picdealwith
import os
import random
import _svmdemo
##测试
g_count = 0
strdirpath = 'd:/1/test/'
strfilename = '001.png'
#1.图片文件路径信息
strfullpath = os.path.join(strdirpath, strfilename)
#2.对图片进行处理
#2.1二值化处理
imgbinimg = _picdealwith.binaryzationimg(strfullpath)
#2.2去除噪点
imgclrimg = _picdealwith.clearnoise(imgbinimg)
#2.3切割图片
imglist = _picdealwith.getcropimgs(imgclrimg)
#2.3循环写入文件
for img in imglist:
strimgname = "%04d%04d.png" % (g_count, random.randint( 0 , 9999 ))
strimgpath = os.path.join(strdirpath, strimgname)
img.save(strimgpath)
g_count + = 1
print ( "ok" )
os.remove(strfullpath)
#3.生成向量文件
_svmdemo.outputtestvectordata( '0' , 'd:/1/test/' , 'd:/1/test/vector.txt' )
#4.利用之前的模型文件进行识别测试
plabel = _svmdemo.svmmodeltest( 'd:/1/test/vector.txt' , 'd:/1/step5/model.txt' )
for i in plabel:
print ( '%d' % i, end = '')
|
效果图:
总结
以上就是这篇文章的全部内容了,希望本文的内容对大家的学习或者工作具有一定的参考学习价值,谢谢大家对服务器之家的支持。
原文链接:https://blog.csdn.net/u011337769/article/details/69808412