python爬煎蛋妹子图

时间:2023-03-08 21:48:56
 # python3
# jiandan meizi tu
import urllib
import urllib.request as req
import os
import time
import random def url_open(url):
req1 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.0'})
req2 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.1'})
req3 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.5'})
req4 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.1'}) req_list = [req1, req2,req3, req4]
response = urllib.request.urlopen(random.choice(req_list))
html = response.read()
# print ('url_open done!')
return html def url_open2(url):
req1 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.0'})
req2 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.1'})
req3 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.5'})
req4 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.1'})
req_list = [req1, req2,req3, req4] ip_list = ['117.135.251.136:82']
ip = random.choice(ip_list)
print (ip) proxy = req.ProxyHandler({'http': ip})
# auth = req.HTTPBasicAuthHandler()
opener = req.build_opener(proxy, req.HTTPHandler)
req.install_opener(opener)
conn = req.urlopen(random.choice(req_list))
return_str = conn.read()
return return_str def get_current_page(url):
html = url_open2(url).decode('utf-8')
a = html.find('current-comment-page') + 23
b = html.find(']',a)
return html[a:b] def find_imgs(url):
html = url_open2(url).decode('utf-8')
img_addrs = []
a = html.find('img src="http')
while a != -1:
b = html.find('.jpg',a, a+255)
if b != -1:
img_addrs.append(html[a+9:b+4])
else:
b = a + 13
a = html.find('img src="http', b)
return img_addrs def save_imgs(folder,img_addrs):
for each in img_addrs:
filename = each.split('/')[-1]
with open(filename,'wb') as f:
img = url_open2(each)
f.write(img) def download_mm(folder = 'xx',pages = 300):
# os.mkdir(folder)
os.chdir(folder) url = 'http://jandan.net/ooxx/'
current_page_num = int(get_current_page(url))
for i in range(pages):
print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),'current_page_num', current_page_num)
if i%3 == 0:
print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),"sleep 2 seconds...")
time.sleep(2)
current_page_num -= 1
page_url = url + 'page-' + str(current_page_num) + '#comments'
img_addrs = find_imgs(page_url)
save_imgs(folder, img_addrs) if __name__ == '__main__':
download_mm()