# nvshens按目录图片批量下载爬虫1.00(多线程版) from bs4 import BeautifulSoup import requests import datetime import urllib.request import os import threading user_agent=\'Mozilla/4.0 (compatible;MEIE 5.5;windows NT)\' headers={\'User-Agent\':user_agent} # 下载图片到本地 def downloadPics(pictures): while(len(pictures)>0): pic=pictures.pop() name=pic.split(\'/\')[-1] folder=pic.split(\'/\')[-2] # 判断目录是否存在,不存在则创建之 if os.path.exists(\'./\'+folder)==False: os.makedirs(\'./\'+folder) try: rsp=urllib.request.urlopen(pic) img=rsp.read() with open(\'./\'+folder+"/"+name,\'wb\') as f: f.write(img) print(\'图片\'+pic+\'下载完成\') except Exception as e: print(\'图片\'+pic+\'下载异常,塞回重试\') pictures.append(pic); #下载线程类 class dldThread(threading.Thread): def __init__(self,name,url): threading.Thread.__init__(self,name=name) self.name=name self.url=url self.pictures=[] def run(self): while(self.url!="none"): print("线程"+self.name+"开始爬取页面"+self.url); try: rsp=requests.get(self.url,headers=headers) self.url="none"#用完之后置空,看下一页能否取到值 soup= BeautifulSoup(rsp.text,\'html.parser\',from_encoding=\'utf-8\') for divs in soup.find_all(class_="gallery_wrapper"): # 把找到的图片放到数组里去 for img in divs.find_all(\'img\'): print(img.get("src")) self.pictures.append(img.get("src")) #找下一页 for link in divs.find_all(\'a\',class_=\'a1\'): if link.string==\'下一页\' and link.get("href").find(\'.html\')!=-1: self.url=\'https://www.nvshens.com\'+link.get("href") if self.url!="none": print("线程"+self.name+"前往下一页") continue else: print("线程"+self.name+\'爬取结束,开始下载...\') downloadPics(self.pictures) print("线程"+self.name+\'下载图片结束.\') except Exception as e: print("线程"+self.name+"发生异常。重新爬行")# 不管怎么出现的异常,就让它一直爬到底 continue # 循环下载图片 def main(): for i in range(10000,20000):#范围自己调整 url=\'https://www.nvshens.com/g/\'+str(i)+\'/\' th=dldThread(name=str(i),url=url) th.start() # Kickoff Start main()