''' 爬取豌豆荚app数据 -请求url: page1: https: / /wwW . wandouj ia . com/wdjweb/api/ category/more? catId=6001&subCatId=0&page=2&ctoken=vbw9lj1sRQsRddx0hD-XqCNF ''' # 1.发送请求 import re import requests from bs4 import BeautifulSoup def get_page(url): requests.get(url) # 2.解析数据 def parse_data(text): soup = BeautifulSoup(text,'lxml') print(soup) li_list = soup.find_all(name = 'li',class_='card') # print(li_list) for li in li_list: print(li) print('tank' * 100 ) app_name = li.find(name = 'a',class_='name').text print(app_name) qpp_url = li.find(name = 'a',class_='name').attrs.get('href') print(qpp_url) download_num = li.find(name='span',class_='install-count').text print(download_num) app_size = li.find(name='span',attrs={"title":re._compile('\d+MB')}).text print(app_size) app_data =''' 游戏名称:{} 游戏地址:{} 下载人数:{} 游戏大小:{} \n '''.format(app_name,qpp_url,download_num,app_size) print(app_data) with open('wandoujia.text','a',encoding='utf-8') as f: f.write(app_data) f.flush() if __name__ == '__main__': for line in range(1,31): url='https: / /wwW . wandouj ia . com/wdjweb/api/ category/more?catId=6001&subCatId=0&page=2{}&ctoken=vbw9lj1sRQsRddx0hD-XqCNF'.format(line) print(url) # 1.发送请求 # 往接口发送请求获取响应数据 response = get_page(url) # print(response.text) import json # json.loads(response.text) # print(type(response.jason())) # print(type(response.text)) # print('tank' * 1000) # 把json数据格式转换成python的字典 data = response.json() # print(data['state']) print(data.get('state').get('content')) parse_data(type(response.text))