import requests import re class BaiduMusic(object): def __init__(self): self.url = 'http://musicapi.taihe.com/v1/restserver/ting?method=baidu.ting.song.playAAC&format=jsonp&songid={}' self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.20 Safari/537.36' } self.song_id = [] # 保存歌曲id self.song_url_data = [] # 保存歌曲url # 1.获取vip歌曲的id,以周杰伦歌单为列 def vipsong_id(self): url_id = 'http://music.taihe.com/artist/7994' response_id = requests.get(url_id, headers=self.headers) html = response_id.text self.song_id = set(re.findall('a href="/song/(\d*?)"', html)) # 2. 获取全部歌曲id的url def song_url(self): for i in self.song_id: url = self.url.format(i) self.song_url_data.append(url) # 3.发送歌曲请求开启下载 def response_down(self): self.vipsong_id() # 获取第1步中的歌曲id self.song_url() # 获取歌曲url for url in self.song_url_data: response = requests.get(url, headers=self.headers) data = response.json() file_link = data['bitrate']['file_link'] title = data['songinfo']['title'] print(file_link, title) # 测试 response_download = requests.get(file_link, headers=self.headers) with open(title+'.m4a', mode='wb')as f: f.write(response_download.content) if __name__ == '__main__': BaiduMusic().response_down()
---------------------------------------------------------------------------------------------------------------------------------------------------------------
爬取完毕后,打开文件就可看到对应的歌曲:
代码__init__中 self.url中的链接,是通过非VIP歌曲下载链接解析得到;
爬取该链接内容中找到下载链接,对应的参数也就是file_link;