import json
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from time import sleep
class VipSpider(object):
def __init__(self, url, search, start_page, end_page):
= url
= search
self.start_page = start_page
self.end_page = end_page
= {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36"}
= ()
def handle_click(self):
()
.find_elements_by_xpath("//*[@id='J_main_nav_link']/li[13]/a")[0].click()
sleep(2)
.find_elements_by_xpath("//*[@id='J-search']/div[1]/input")[0].send_keys()
sleep(2)
.find_elements_by_xpath("//*[@id='J-search']/div[1]/a/span")[0].click()
sleep(3)
def handle_url(self, page):
Durl = .current_url # "/?keyword=%E7%AF%AE%E7%90%83&ff=235|12|1|1"
index = ("&")
Durl = Durl[:index]
data = {
"page": page
}
res = (url=Durl, params=data, headers=)
newurl =
print(newurl)
return newurl
def scroll_page(self, req):
(req)
sleep(3)
for x in range(20):
js = "var q==10000"
.execute_script(js) # 执行脚本(滚动)
sleep(5)
html = .page_source
return html
def download(self, request):
soup = BeautifulSoup(request, "lxml")
SectionList = ("section#J_searchCatList")[0]
GoodsList = ("-goods")
items = []
for div in GoodsList:
item = {}
imageslink = ["data-original"]
title = ("-info a")[0].get_text()
discount = ("-info span")[0].get_text()
pricewra = ("-info em")[0].get_text()
marprice = ("-info -market-price ")[0].get_text()
item["图片链接"] = imageslink
item["商品名称"] = title
item["商品折扣"] = discount
item["特卖价格"] = pricewra
item["原始价格"] = marprice
(item)
return items
def startSpider(self):
htmlList = []
for page in range(int(self.start_page), int(self.end_page) + 1):
if page == 1:
self.handle_click()
req = self.handle_url(page)
newhtml = self.scroll_page(req)
htmlList += (newhtml)
else:
req = self.handle_url(page)
newhtml = self.scroll_page(req)
htmlList += (newhtml)
# 【数据的存储】写入json数据
# 将列表转化成json字符串
string = (htmlList)
with open("", "w", encoding="utf-8") as fp:
(string)
def main():
url = "/"
search = input("请输入你要搜索的商品:")
start_page = input("请输入你要爬取的起始页:")
end_page = input("请输入你要爬取的结束页:")
spider = VipSpider(url, search, start_page, end_page)
()
if __name__ == '__main__':
main()