本文实例讲述了python实现爬取百度图片的方法。分享给大家供大家参考,具体如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
import json
import itertools
import urllib
import requests
import os
import re
import sys
word = input ( "请输入关键字:" )
path = "./ok"
if not os.path.exists(path):
os.mkdir(path)
word = urllib.parse.quote(word)
url = r "http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&fp=result&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&st=-1&ic=0&word={word}&face=0&istype=2nc=1&pn={pn}&rn=60"
urls = (url. format (word = word,pn = x) for x in itertools.count(start = 0 ,step = 60 ))
index = 0
str_table = {
'_z2C$q' : ':' ,
'_z&e3B' : '.' ,
'AzdH3F' : '/'
}
char_table = {
'w' : 'a' ,
'k' : 'b' ,
'v' : 'c' ,
'1' : 'd' ,
'j' : 'e' ,
'u' : 'f' ,
'2' : 'g' ,
'i' : 'h' ,
't' : 'i' ,
'3' : 'j' ,
'h' : 'k' ,
's' : 'l' ,
'4' : 'm' ,
'g' : 'n' ,
'5' : 'o' ,
'r' : 'p' ,
'q' : 'q' ,
'6' : 'r' ,
'f' : 's' ,
'p' : 't' ,
'7' : 'u' ,
'e' : 'v' ,
'o' : 'w' ,
'8' : '1' ,
'd' : '2' ,
'n' : '3' ,
'9' : '4' ,
'c' : '5' ,
'm' : '6' ,
'0' : '7' ,
'b' : '8' ,
'l' : '9' ,
'a' : '0'
}
i = 1
char_table = { ord (key): ord (value) for key, value in char_table.items()}
for url in urls:
html = requests.get(url,timeout = 10 ).text
a = re. compile (r '"objURL":"(.*?)"' )
downURL = re.findall(a,html)
for t in downURL:
for key, value in str_table.items():
t = t.replace(key, value)
t = t.translate(char_table)
try :
html_1 = requests.get(t)
if str (html_1.status_code)[ 0 ] = = "4" :
print ( '失败1' )
continue
except Exception as e:
print ( '失败2' )
continue
with open (path + "/" + str (i) + ".jpg" , 'wb' ) as f:
f.write(html_1.content)
i = i + 1
|
希望本文所述对大家Python程序设计有所帮助。
原文链接:https://blog.51cto.com/11623741/2093582