python爬取淘宝商品销量的程序,运行程序,输入想要爬取的商品关键词,在代码中的‘###'可以进一步约束商品的属性,比如某某作者的书籍,可以在###处输入作者名字,以及时期等等。最后可以得到所要商品的总销量
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
import requests
import bs4
import re
import json
def open (keywords, page):
headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" }
payload = { 'q' :keywords, 'sort' : "sale-desc" , 's' :(page - 1 ) * 44 }
url = "https://s.taobao.com/search"
res = requests.get(url, params = payload)
return res
def get_item(res):
g_page_config = re.search(r 'g_page_config = (.*?);\n' , res.text)
page_config_json = json.loads(g_page_config.group( 1 ))
page_item = page_config_json[ 'mods' ][ 'itemlist' ][ 'data' ][ 'auctions' ]
result = [] #整理出我们关注的信息(ID,标题,链接,售价,销量和商家)
for each in page_item:
dict1 = dict .fromkeys(( 'id' , 'title' , 'link' , 'price' , 'sale' , 'shoper' ))
dict1[ 'id' ] = each[ 'nid' ]
dict1[ 'title' ] = each[ 'title' ]
dict1[ 'link' ] = each[ 'detail_url' ]
dict1[ 'price' ] = each[ 'view_price' ]
dict1[ 'sale' ] = each[ 'view_sales' ]
dict1[ 'shoper' ] = each[ 'nick' ]
result.append(dict1)
return result
def count_sales(items):
count = 0
for each in items:
if '###' in each[ 'title' ]:#规定只取标题中‘###'的商品
count + = int (re.search(r '\d+' ,each[ 'sale' ]).group())
return count
def main():
keywords = input ( "请输入搜索关键词:" ) #可以为各种商品名称
length = 10 #淘宝商品页数
total = 0
for each in range (length):
res = open (keywords, each + 1 )
items = get_item(res)
total + = count_sales(items) #销售总量
print (total)
if __name__ = = "__main__" :
main()
|
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/qq_25774883/article/details/81292383