使用python爬取猎聘网的职位信息

时间:2024-10-06 07:43:38
#-*- coding:utf-8 -*- # 抓取猎聘的职位 import time import threading import requests import re from bs4 import BeautifulSoup def get_job_list(job): thread_name = threading.current_thread().name print(f'[{thread_name}]:{job}') page_num=0 while True: url='/city-bj/zhaopin/pn'+str(page_num)+'/?key='+job+'&d_sfrom=search_city&d_ckId=757adc2153c6034f3c9d7fc1970e617d&d_curPage=1&d_pageSize=40&d_headId=757adc2153c6034f3c9d7fc1970e617d' resp=requests.get(url) soup=BeautifulSoup(resp.text,'') try: for div in soup.find_all('div',class_='sojob-item-main clearfix'): print(div.find('a').text,end=' ') xinzi= div.find('span',class_='text-warning') print(xinzi.next,end=' ') area=div.find('a',class_='area') edu= div.find('span',class_='edu') print(area.text,end=' ') print(edu.next) except Exception as e: print(e) div=soup.find('a',string=re.compile('下一页')) if div: print(div.text) print(page_num) else: break time.sleep(1) page_num+=1 # 获取所有的职位类型 def get_all_job_type(): url='/city-bj/zhaogongzuo/?sfrom=click-pc_homepage-centre_keywordjobs-search_new' resp=requests.get(url) soup=BeautifulSoup(resp.text,'') all_list=[] for dd in soup.find_all('dd'): a_all=dd.find_all('a') for a in a_all: # print() all_list.append(a.text) job_list=all_list[:-21] return job_list job_list=get_all_job_type() for job in job_list: print(job) t=threading.Thread(target=get_job_list,args=(job,)) t.start()