抓取QQ音乐歌单

时间:2024-04-17 17:32:04
抓取QQ音乐歌单

1.通过分析歌曲下载路径来分析所需参数: 通过比较, 得出其中歌曲下载url与参数vkey是可变的,
歌曲下载url中可变得值是请求歌单返回的歌曲数据的strMediaMid参数, 而vkey是通过请求歌单返回的歌曲数据的songmid参数再次请求另一个url返回vkey值
2.通过分析请求歌单url来分析所需参数: dissid
dissid为请求diss的返回数据中的dissid参数

import os
import re
import requests
from urllib.parse import urlencode
headers = {
    \'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36\',
    \'origin\': \'https://y.qq.com\',
    \'referer\': \'https://y.qq.com/portal/playlist.html\'
}


def fetch_url(url):
    try:
        r = requests.get(url, headers=headers)
        if r.status_code in [200, 201]:
            return r.json()
    except Exception as e:
        print(e)


def down_song(path, strMediaMid, vkey):
    params = {
        \'guid\': \'5300386295\',
        \'vkey\': vkey,
        \'uin\': \'0\',
        \'fromtag\': \'66\'
    }
    url = \'http://222.73.132.154/amobile.music.tc.qq.com/C400{}.m4a?\'.format(strMediaMid)
    url += urlencode(params)
    r = requests.get(url, headers=headers)
    if r.status_code in [200, 201]:
        with open(path, \'wb\') as f:
            f.write(r.content)


def get_vkey(songmid):
    url = \'https://u.y.qq.com/cgi-bin/musicu.fcg?\'
    params = {
        \'-\': \'getplaysongvkey7256617694143965\',
        \'g_tk\': \'5381\',
        \'loginUin\': \'0\',
        \'hostUin\': \'0\',
        \'format\': \'json\',
        \'inCharset\': \'utf8\',
        \'outCharset\': \'utf-8\',
        \'notice\': \'0\',
        \'platform\': \'yqq.json\',
        \'needNewCode\': \'0\',
        \'data\': \'{"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"5300386295","songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}\' % songmid
    }
    url += urlencode(params)
    result = fetch_url(url)
    vkey = result[\'req_0\'][\'data\'][\'midurlinfo\'][0][\'vkey\']
    return vkey


def get_song_info(disstid):
    url = \'https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?\'
    params = {
        \'type\': \'1\',
        \'json\': \'1\',
        \'utf8\': \'1\',
        \'onlysong\': \'0\',
        \'disstid\': disstid,
        \'g_tk\': \'5381\',
        \'loginUin\': \'0\',
        \'hostUin\': \'0\',
        \'format\': \'json\',
        \'inCharset\': \'utf8\',
        \'outCharset\': \'utf-8\',
        \'notice\': \'0\',
        \'platform\': \'yqq.json\',
        \'needNewCode\': \'0\',
    }
    url += urlencode(params)
    result = fetch_url(url)
    songlist = result[\'cdlist\'][0][\'songlist\']
    for song in songlist:
        strMediaMid = song[\'strMediaMid\']
        songMid = song[\'songmid\']
        songname = song[\'songname\']
        yield strMediaMid, songMid, songname


def get_dist_info(page):
    url = \'https://c.y.qq.com/splcloud/fcgi-bin/fcg_get_diss_by_tag.fcg?\'
    params = {
        \'picmid\': \'1\',
        \'rnd\': \'0.15993662911508766\',
        \'g_tk\': \'5381\',
        \'loginUin\': \'0\',
        \'hostUin\': \'0\',
        \'format\': \'json\',
        \'inCharset\': \'utf8\',
        \'outCharset\': \'utf-8\',
        \'notice\': \'0\',
        \'platform\': \'yqq.json\',
        \'needNewCode\': \'0\',
        \'categoryId\': \'10000000\',
        \'sortId\': \'5\',
        \'sin\': int(page)*30-30,
        \'ein\': int(page)*30-1,
    }
    url += urlencode(params)
    result = fetch_url(url)
    disslist = result[\'data\'][\'list\']
    for diss in disslist:
        yield diss[\'dissid\'], diss[\'dissname\']


def main(page):
    for item in get_dist_info(page):
        dissid, dissname = item
        for item in get_song_info(dissid):
            strMediaMid, songMid, songname = item
            vkey = get_vkey(songMid)
            pattern = re.compile(r\'[\\/::*?"<>|\r\n]+\')
            songname = re.sub(pattern, " ", songname)
            dissname = re.sub(pattern, " ", dissname)
            if not os.path.exists(\'d://data/{}/\'.format(dissname)):
                os.mkdir(\'d://data/{}/\'.format(dissname))
            path = \'d://data/{0}/{1}.m4a\'.format(dissname, songname)
            print("正在下载:{}".format(songname))
            down_song(path, strMediaMid, vkey)
            print("下载完成:{}".format(songname))


if __name__ == \'__main__\':
    page = 1
    main(page)