spider随机请求头和ip

时间:2024-09-02 20:03:57

#创建爬虫

scrapy genspider randomIp_spider "taobao.com"

#把需要请求的url放到一个混淆的url请求list中去,避免被监测到总是访问此页面

import random

url_list = [
'https://detail.tmall.com/item.htm?id=522194707780&ali_refid=a3_430583_1006:1109696291:N:%E6%B6%88%E9%98%B2%E5%BA%94%E6%80%A5%E7%81%AF:eb9682757281a9ec406cb4647d3f584a&ali_trackid=1_eb9682757281a9ec406cb4647d3f584a&spm=a230r.1.14.3',
'https://item.taobao.com/item.htm?spm=a219r.lmn002.14.1.f3b87156TcpPbp&id=587398066660&ns=1&abbucket=16',
'https://item.taobao.com/item.htm?spm=a230r.1.14.50.1af3248cr0GGyM&id=576997844987&ns=1&abbucket=16#detail'
]
#随机去一个访问链接
start_urls = random.choice(url_list)

#到middlewares.py文件中去

#设置随机请求头
class UserAgentDownloadMiddleware(object):
USER_AGENTS = [
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; pl-PL; rv:1.0.1) Gecko/20021111 Chimera/0.6',
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; pl-PL; rv:1.0.1) Gecko/20021111 Chimera/0.6',
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US; rv:1.0.1) Gecko/20021111 Chimera/0.6',
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US; rv:1.0.1) Gecko/20021104 Chimera/0.6',
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20030111 Chimera/0.6',
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20030109 Chimera/0.6',
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021220 Chimera/0.6',
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021216 Chimera/0.6'
] def process_request(self, request, spider):
user_agent = random.choice(self.USER_AGENTS)
request.headers['User-Agent'] = user_agent
#设置随机ip
class IPProxyDownloadMiddleware(object):
PROXIES = [
'222.190.163.141:45334',
'183.143.73.146:31998',
'115.216.58.182:43060',
'116.209.129.167:27158',
'60.167.23.29.205:44728'
] def process_request(self, request, spider):
proxy = random.choice(self.PROXIES)
print('+' * )
print(proxy)
request.meta['proxy'] = proxy f = open("texr.json", encoding='utf-8')
setting = json.load(f)
family = setting['BaseSettings']['size']

#到settings.py文件中去,修改如下配置

DOWNLOADER_MIDDLEWARES = {
#随机请求头
'taobao_for_attack.middlewares.UserAgentDownloadMiddleware': ,、
#随机ip
'taobao_for_attack.middlewares.IPProxyDownloadMiddleware': ,
}