1 # coding: utf-8 2 3 import urllib2 4 import re 5 import time 6 7 def getDL(page): 8 url = 'http://www.xicidaili.com/nt/{}'.format(page) 9 header = { 10 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' 11 } 12 13 req=urllib2.Request(url, headers=header) 14 res=urllib2.urlopen(req) 15 html=res.read() 16 17 srclist=re.findall(r'<tr class=(.|\n)*?<td>(\d+\.\d+\.\d+\.\d+)</td>(.|\n)*?<td>(\d+)</td>(.|\n)*?<td>(HTTP|HTTPS)</td>', html) 18 xlist = [] 19 for item in srclist: 20 xlist.append((item[5],item[1],item[3])) 21 return xlist 22 23 def testDL(ipstr): 24 proxy= urllib2.ProxyHandler({'http':"{}:{}".format(ipstr[1], ipstr[2])}) 25 opener=urllib2.build_opener(proxy) 26 urllib2.install_opener(opener) 27 28 try: 29 testUrl = 'http://httpbin.org/ip' 30 testUrl = 'http://2017.ip138.com/ic.asp' 31 req=urllib2.Request(testUrl) 32 res=urllib2.urlopen(req).read() 33 print "********************* √ {} -- {}".format(ipstr, res) 34 35 with open("ok.txt","a") as f: 36 f.write("{} {} {}\n".format(ipstr[0], ipstr[1], ipstr[2])) 37 f.close() 38 except Exception as e: 39 print "******** ×, {} -- {}".format(ipstr, e) 40 time.sleep(1) 41 42 def startTask(): 43 for page in xrange(5): 44 list=getDL(page+1) 45 for item in list: 46 testDL(item) 47 48 if __name__ == '__main__': 49 startTask()