# -*- coding: utf-8 -*- """ Created on Sat Jan 20 18:08:21 2018 @author: Administrator """ import requests from bs4 import BeautifulSoup import time from multiprocessing import Pool headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36' } def get_loupan(url): try: res=requests.get(url,headers=headers) soup=BeautifulSoup(res.text,'html.parser') titles=soup.find_all('span',class_='items-name') title=list(map(lambda x:x.text,titles)) dizhis=soup.find_all('span',class_='list-map') dizhi=list(map(lambda x:x.text,dizhis)) diqus=soup.find_all('span',class_='list-map') diqu=list(map(lambda x:x.text.split('\xa0')[1],diqus)) mianjis_quan=soup.find_all('a',class_='huxing') mianji_quan=list(map(lambda x:x.text,mianjis_quan)) mianjis=soup.find_all('a',class_='huxing') mianji=list(map(lambda x:x.text.split('\t')[-1].strip(),mianjis)) jiages=soup.find_all('a',class_='favor-pos') jiage=list(map(lambda x:x.p.text,jiages)) for tit,dizhi,diqu,mianq,mianj,jiage in zip(title,dizhi,diqu,mianji_quan,mianji,jiage): info={'标题':tit, '地址':dizhi, '地区':diqu, '面积(全)':mianq, '面积':mianj, '价格':jiage} return info except: return '' if __name__ == '__main__': start_1=time.time() for i in range(1,11): url='https://sh.fang.anjuke.com/loupan/all/p{}/'.format(i) get_loupan(url) time.sleep(1) end_1=time.time() print('串行爬虫:',end_1 - start_1) start_2=time.time() pool=Pool(processes=2) for i in range(1,11): url='https://sh.fang.anjuke.com/loupan/all/p{}/'.format(i) pool.map(get_loupan,url) time.sleep(1) end_2=time.time() print('两个进程:',end_2 - start_2) start_3=time.time() pool=Pool(processes=4) for i in range(1,11): url='https://sh.fang.anjuke.com/loupan/all/p{}/'.format(i) pool.map(get_loupan,url) time.sleep(1) end_3=time.time() print('四个进程:',end_3 - start_3) start_4=time.time() pool=Pool(processes=4) urls=[] for i in range(1,11): url='https://sh.fang.anjuke.com/loupan/all/p{}/'.format(i) urls.append(url) pool.map(get_loupan,urls) time.sleep(1) end_4=time.time() print('(四)个进程:',end_4 - start_4)