python使用代理ip发送http请求

时间:2021-12-24 14:46:30

一、需求背景

网站刷票时,经常会遇到限制一个ip只能投票一次的限制,为此需要使用代理ip

 

二、脚本如下:

1、Proxy_http.py使用代理ip发送httpr的get和post请求

 1 #coding:utf-8
 2 import urllib2,urllib,time,socket,random,Proxy_ip,Useragent
 3 
 4 
 5 def Visitpage(proxyip,url):
 6     socket.setdefaulttimeout(6)
 7     proxy_support = urllib2.ProxyHandler({'http':proxyip})
 8     user_agent = random.choice(Useragent.user_agents)
 9     opener = urllib2.build_opener(proxy_support,urllib2.HTTPHandler)
10     urllib2.install_opener(opener)
11     try:
12         request = urllib2.Request(url)
13         request.add_header('Referer','http://www.baidu.com')
14         request.add_header('User-Agent',user_agent)
15         html = urllib2.urlopen(request).read()
16         print html
17         time.sleep(random.randint(60,180))
18     except urllib2.URLError,e:
19         print 'URLError! The bad proxy is %s' %proxyip
20     except urllib2.HTTPError,e:
21         print 'HTTPError! The bad proxy is %s' %proxyip
22     except:
23         print 'Unknown Errors! The bad proxy is %s ' %proxyip
24 
25 
26 def Clicklikebutton(proxyip,url,data):
27     socket.setdefaulttimeout(6)
28     proxy_support = urllib2.ProxyHandler({'http':proxyip})
29     user_agent = random.choice(Useragent.user_agents)
30     opener = urllib2.build_opener(proxy_support,urllib2.HTTPHandler)
31     try:
32         request = urllib2.Request(url)
33         request.add_header('Referer','http://www.baidu.com')
34         request.add_header('User-Agent',user_agent)
35         data = urllib.urlencode(data)
36         resp = opener.open(request, data)
37         print resp.read()
38         time.sleep(random.randint(60,180))
39     except urllib2.URLError,e:
40         print 'URLError! The bad proxy is %s' %proxyip
41     except urllib2.HTTPError,e:
42         print 'HTTPError! The bad proxy is %s' %proxyip
43     except:
44         print 'Unknown Errors! The bad proxy is %s ' %proxyip
45 
46 def main():
47     for i in range(len(Proxy_ip.iplist)):
48         proxyip = Proxy_ip.iplist[i]
49         i += 1
50         print proxyip
51         for m in range(random.randint(2,4)):
52             Visitpage(proxyip,'你的get请求url地址')54         Clicklikebutton(proxyip,'你的post请求地址',{你的post请求参数})
55 
56 if __name__ == "__main__":
57     main()

2、Useragent.py文件为agent库

#!/usr/bin/python
#-*- coding:utf-8 -*-

user_agents = [
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60'
    'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50'
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50'
    'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'
    'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10'
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2'
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36'
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'
    'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16'
    'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
    'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0'
    'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)'
    'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36'
]

3、Proxy_ip.py为读取代理ip文件

#coding:utf-8

iplist = []
datafile = file( "E:\\monkey_test\\http\\1222-1.txt", "r" )

for line in datafile.readlines():
    line = line.strip('\n')
    iplist.append(line)

4、代理ip的获取

可在以下网站获取http://www.xicidaili.com/(可自行开发脚本爬取网站的ip,容易被封,脚本在此不提供)

也可以购买http://www.daxiangdaili.com/