编程的快乐只有在运行成功的那一刻才知道QAQ
目标网站:https://www.kuaidaili.com/free/inha/ #若有侵权请联系我
因为上面的代理都是http的所以没写这个判断
代码如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib.request
import re
import time
n = 1
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
def web(url):
req=urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(url)
html = response.read().decode('UTF-8','ignore')
ip = r'[0-9]+(?:\.[0-9]+){3}'
port = r'"PORT">(\d{0,1}\d{0,1}\d{0,1}\d{0,1}\d)<'
out = re.findall(ip,html)
out1 = re.findall(port,html)
i = 0
dictionary = {}
while i <= 14:
dictionary[0] = (out[i],out1[i])
store(dictionary)
i += 1
print(out,'\n',out1)
def store(dictionary):
with open('ip.txt','a') as f:
c = 'ip:' + dictionary[0][0] + '\tport:' + dictionary[0][1] + '\n'
f.write(c)
print('store successfully')
while n <= 3313:
url1 = "https://www.kuaidaili.com/free/inha/"
url = url1 + str(n) +'/'
web(url)
time.sleep(5)
n += 1