python 爬取西刺免费代理ip 并使用telnetlib.Telnet验证是否有效

时间:2021-06-13 16:57:32

最近运行使用时间2017.12.01

运行结果正常

运行环境python.27

 
 
#coding:utf8
from bs4 import BeautifulSoup
import urllib2
import sys
reload(sys)
import telnetlib



def getProxyList(targeturl="http://www.xicidaili.com/nn/"):
    # 创建变量查看爬取IP数量
    countNum=0
    # 创建打开txt文件
    proxyFile=open('ip_port.txt','a')
    
    # 设置报头
    requestHeader={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0"}
    
    # 爬取前五页
    for page in range(1,5):
        url=targeturl+str(page)
        print(url)
        request=urllib2.Request(url,headers=requestHeader)
        html_doc=urllib2.urlopen(request).read()
        
        # 使用bs4匹配
        soup=BeautifulSoup(html_doc,"html.parser")
        trs=soup.find('table',id='ip_list').find_all('tr')
        for tr in trs[1:]:
            tds=tr.find_all('td')
            # 国家
            if tds[0].find('img') is None:
                nation='未知'
                locate='未知'
            else:
                nation=tds[0].find('img')['alt'].strip()
                locate=tds[4].text.strip()
            ip=tds[1].text.strip()
            # print(ip)
            port=tds[2].text.strip()
            # print(port)
            anony=tds[4].text.strip()
            # print(anony)
            protocol=tds[5].text.strip()
            # print(protocol)
            speed=tds[8].text.strip()
            time=tds[9].text.strip()
            proxyFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' % (nation, ip, port, locate, anony, protocol, speed, time))
            countNum += 1

    proxyFile.close()
    return countNum



# 验证代理有效性
def verifyProxyList():
    inFile = open('ip_port.txt', 'rb')
    outFile = open('verified.txt', 'w')
    while True:
        # 从ip_port.txt文件中把之前爬取的ip和端口取出来验证
        ll=inFile.readline().strip()
        if len(ll) == 0:
            break
        line=ll.strip().split('|')
        ip=line[1]
        port=line[2]
        
        # 判断ip是否可用,有效则写入到verified.txt文件中
        try:
            telnetlib.Telnet(ip, port, timeout=2)
            outFile.write(ll+"\n")
            print 'success' + ip + port
        except:
            print 'connect failed'
    
    # 关闭文件
    inFile.close()
    outFile.close()

if __name__=='__main__':
    proxynum=getProxyList("http://www.xicidaili.com/nn/")
    print(u"国内高匿:"+str(proxynum))
    # proxynum = getProxyList("http://www.xicidaili.com/nt/")
    # print u"国内透明:" + str(proxynum)
    # proxynum = getProxyList("http://www.xicidaili.com/wn/")
    # print u"国外高匿:" + str(proxynum)
    # proxynum = getProxyList("http://www.xicidaili.com/wt/")
    # print u"国外透明:" + str(proxynum)
    verify=verifyProxyList()
    print("降龙十八掌,打完收工")