python 获取取代理IP并验证可用性

时间:2022-12-27 16:58:06
# -* - coding: UTF-8 -* -
import requests
import random
import re
import time
from bs4 import BeautifulSoup
i=1
tds_list = []
while i<9:
url='http://www.xicidaili.com/nn/'+str(i)
Agent =[
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us)AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
]
User_Agent=random.choice(Agent)
headers={'User-Agent':User_Agent}
html=requests.get(url,headers=headers).text
soup=BeautifulSoup(html,'lxml')
trs=soup.find_all('tr',attrs={'class': re.compile("(odd)|()")})
td_list=[]
for te in trs:
tds=te.find_all('td')
ips = tds[1].string # ip
ports = tds[2].string # 端口
td_list.append(tds[1].string+':'+tds[2].string)

print(i)
i+=1
time.sleep(1)
tds_list.extend(td_list)

for p in tds_list:
proxies = {
"http": "http://%s" % p
}
try:
pro = proxies
req = requests.get('[图片]http://www.baidu.com',headers=headers,proxies=pro,timeout=3)
print(req.status_code)
print(pro['http'][7:])
except Exception as e:
tds_list.remove(pro['http'][7:])