爬虫流程:优先假设是JSON数据,抓包方式只能翻页 JSON数据 HTML数据 1.异步数据(即先返回HTML,再返回目标的数据,只是触发了JSON请求),不在HTML中 2.不能刷新网页,直接翻页
测试链接:/
源代码:
import requests, json, os class Two(object): def __init__(self): # 初始化 = 1 self.start_url = '/liveHttpUI/getLiveList?' = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36' } # params不能写在__init__()里面,因为__init__()只执行一次,params是变化的 def confrim_params(self): # 构造7页的params for i in range(1, 8): params = { 'iGid': '1663', 'iPageNo': '{}'.format(i), # i是字符串 'iPageSize': '120' } self.request_start_url(params) # break def request_start_url(self, params): # 请求起始地址 # 法一: # response = (self.start_url, headers = (), params=params).text # response = (response) # 法二(request自带,自动变字典)---常用: response = (self.start_url, headers=, params=params).json() self.parse_response(response) def parse_response(self, response): # 解析响应 for data in response['vList']: name = data['sNick'].replace('.', '').replace('/', '') link = data['sScreenshot'] self.request_link(name, link) def request_link(self, name, link): # 请求图片链接 img_data = (link, headers=).content self.create_dir(name, img_data) def create_dir(self, name, img_data): # 创建文件夹 if not ('../虎牙'): ('../虎牙') self.save_data(name, img_data) def save_data(self, name, img_data): # 保存图片 with open(f'虎牙/{name}.jpg', 'wb') as f: (img_data) print('ok 第{}张--{}'.format(, name)) += 1 def main(self): # 逻辑控制部分 self.confrim_params() if __name__ == '__main__': t = Two() ()