python爬煎蛋妹子图

 # python3

 # jiandan meizi tu

 import urllib

 import urllib.request as req

 import os

 import time

 import random

 def url_open(url):

     req1 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.0'})

     req2 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.1'})

     req3 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.5'})

     req4 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.1'})

     req_list = [req1, req2,req3, req4]

     response = urllib.request.urlopen(random.choice(req_list))

     html = response.read()

     # print ('url_open done!')

     return html

 def url_open2(url):

     req1 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.0'})

     req2 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.1'})

     req3 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.5'})

     req4 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.1'})

     req_list = [req1, req2,req3, req4]

     ip_list = ['117.135.251.136:82']

     ip = random.choice(ip_list)

     print (ip)

     proxy = req.ProxyHandler({'http': ip})

     # auth = req.HTTPBasicAuthHandler()

     opener = req.build_opener(proxy, req.HTTPHandler)

     req.install_opener(opener)

     conn = req.urlopen(random.choice(req_list))

     return_str = conn.read()

     return return_str

 def get_current_page(url):

     html = url_open2(url).decode('utf-8')

     a = html.find('current-comment-page') + 23

     b = html.find(']',a)

     return html[a:b]

 def find_imgs(url):

     html = url_open2(url).decode('utf-8')

     img_addrs = []

     a = html.find('img src="http')

     while a != -1:

         b = html.find('.jpg',a, a+255)

         if b != -1:

             img_addrs.append(html[a+9:b+4])

         else:

             b = a + 13

         a = html.find('img src="http', b)

     return img_addrs

 def save_imgs(folder,img_addrs):

     for each in img_addrs:

         filename = each.split('/')[-1]

         with open(filename,'wb') as f:

             img = url_open2(each)

             f.write(img)

 def download_mm(folder = 'xx',pages = 300):

     # os.mkdir(folder)

     os.chdir(folder)

     url = 'http://jandan.net/ooxx/'

     current_page_num = int(get_current_page(url))

     for i in range(pages):

         print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),'current_page_num', current_page_num)

         if i%3 == 0:

             print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),"sleep 2 seconds...")

             time.sleep(2)

         current_page_num -= 1

         page_url = url + 'page-' + str(current_page_num) + '#comments'

         img_addrs = find_imgs(page_url)

         save_imgs(folder, img_addrs)

 if __name__ == '__main__':

     download_mm()
秒客网

python爬煎蛋妹子图

相关文章