python爬虫学习之定向爬取淘宝商品价格,供大家参考,具体内容如下
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
import requests
import re
def getHTMLText(url):
try :
r = requests.get(url, timeout = 30 )
r.raise_for_status() #如果发送了一个失败请求(非200响应),#我们可以通过 Response.raise_for_status() 来抛出异常:
r.encoding = r.apparent_encoding
return r.text
except :
return ""
def parsePage(ilt,html):
try :
plt = re.findall(r '\"view_price\"\:\"[\d\.]*?\"' ,html) #正则表达式来匹配 "view_price":"\d\."类型的字符串
tlt = re.findall(r '\"raw_title\"\:\".*?\"' ,html)
#正则表达式来匹配 "raw_title":".*?"类型的字符串,.*?是任意字符的最小匹配
for i in range ( len (plt)):
price = eval (plt[i].split( ':' )[ 1 ])
title = eval (tlt[i].split( ':' )[ 1 ])
ilt.append([price,title])
except :
print ("")
def PrintGoodsList(ilt):
tplt = "{:4}\t{:8}\t{:16}"
print (tplt. format ( "序号" , "价格" , "商品名称" ))
count = 0
for g in ilt:
count = count + 1
print (tplt. format (count,g[ 0 ],g[ 1 ]))
def main():
goods = '书包'
depth = 2
start_url = 'https://s.taobao.com/search?q=' + goods
infoList = []
for i in range (depth):
try :
url = start_url + '&s=' + str ( 44 * i)
html = getHTMLText(url)
parsePage(infoList,html)
except :
continue
PrintGoodsList(infoList)
main()
|
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:http://blog.csdn.net/learn_is_happy/article/details/78773956