抓取QQ音乐歌单
1.通过分析歌曲下载路径来分析所需参数: 通过比较, 得出其中歌曲下载url与参数vkey是可变的,
歌曲下载url中可变得值是请求歌单返回的歌曲数据的strMediaMid参数, 而vkey是通过请求歌单返回的歌曲数据的songmid参数再次请求另一个url返回vkey值
2.通过分析请求歌单url来分析所需参数: dissid
dissid为请求diss的返回数据中的dissid参数
import os import re import requests from urllib.parse import urlencode headers = { \'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36\', \'origin\': \'https://y.qq.com\', \'referer\': \'https://y.qq.com/portal/playlist.html\' } def fetch_url(url): try: r = requests.get(url, headers=headers) if r.status_code in [200, 201]: return r.json() except Exception as e: print(e) def down_song(path, strMediaMid, vkey): params = { \'guid\': \'5300386295\', \'vkey\': vkey, \'uin\': \'0\', \'fromtag\': \'66\' } url = \'http://222.73.132.154/amobile.music.tc.qq.com/C400{}.m4a?\'.format(strMediaMid) url += urlencode(params) r = requests.get(url, headers=headers) if r.status_code in [200, 201]: with open(path, \'wb\') as f: f.write(r.content) def get_vkey(songmid): url = \'https://u.y.qq.com/cgi-bin/musicu.fcg?\' params = { \'-\': \'getplaysongvkey7256617694143965\', \'g_tk\': \'5381\', \'loginUin\': \'0\', \'hostUin\': \'0\', \'format\': \'json\', \'inCharset\': \'utf8\', \'outCharset\': \'utf-8\', \'notice\': \'0\', \'platform\': \'yqq.json\', \'needNewCode\': \'0\', \'data\': \'{"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"5300386295","songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}\' % songmid } url += urlencode(params) result = fetch_url(url) vkey = result[\'req_0\'][\'data\'][\'midurlinfo\'][0][\'vkey\'] return vkey def get_song_info(disstid): url = \'https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?\' params = { \'type\': \'1\', \'json\': \'1\', \'utf8\': \'1\', \'onlysong\': \'0\', \'disstid\': disstid, \'g_tk\': \'5381\', \'loginUin\': \'0\', \'hostUin\': \'0\', \'format\': \'json\', \'inCharset\': \'utf8\', \'outCharset\': \'utf-8\', \'notice\': \'0\', \'platform\': \'yqq.json\', \'needNewCode\': \'0\', } url += urlencode(params) result = fetch_url(url) songlist = result[\'cdlist\'][0][\'songlist\'] for song in songlist: strMediaMid = song[\'strMediaMid\'] songMid = song[\'songmid\'] songname = song[\'songname\'] yield strMediaMid, songMid, songname def get_dist_info(page): url = \'https://c.y.qq.com/splcloud/fcgi-bin/fcg_get_diss_by_tag.fcg?\' params = { \'picmid\': \'1\', \'rnd\': \'0.15993662911508766\', \'g_tk\': \'5381\', \'loginUin\': \'0\', \'hostUin\': \'0\', \'format\': \'json\', \'inCharset\': \'utf8\', \'outCharset\': \'utf-8\', \'notice\': \'0\', \'platform\': \'yqq.json\', \'needNewCode\': \'0\', \'categoryId\': \'10000000\', \'sortId\': \'5\', \'sin\': int(page)*30-30, \'ein\': int(page)*30-1, } url += urlencode(params) result = fetch_url(url) disslist = result[\'data\'][\'list\'] for diss in disslist: yield diss[\'dissid\'], diss[\'dissname\'] def main(page): for item in get_dist_info(page): dissid, dissname = item for item in get_song_info(dissid): strMediaMid, songMid, songname = item vkey = get_vkey(songMid) pattern = re.compile(r\'[\\/::*?"<>|\r\n]+\') songname = re.sub(pattern, " ", songname) dissname = re.sub(pattern, " ", dissname) if not os.path.exists(\'d://data/{}/\'.format(dissname)): os.mkdir(\'d://data/{}/\'.format(dissname)) path = \'d://data/{0}/{1}.m4a\'.format(dissname, songname) print("正在下载:{}".format(songname)) down_song(path, strMediaMid, vkey) print("下载完成:{}".format(songname)) if __name__ == \'__main__\': page = 1 main(page)