爬虫下载QQ音乐：获取所有歌手-每个歌手的专辑-每个专辑里的歌曲

# coding=utf-8
# !/usr/bin/env python
\'\'\'
    author: dangxusheng
    desc  : 稍微有点难度，需要多次请求获取key
    date  : 2018-08-29
\'\'\'

# 导入模块
import requests
from lxml import etree
import json
import time

# 准备全局变量
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36 Qiyu/2.1.1.1",
    "Referer": "https://y.qq.com/portal/player.html"
}

SAVE_ROOT_PATH = \'H:/spider_download\'

def download_rank_list():
    # 直接打开  https://y.qq.com/n/yqq/toplist/62.html#stat=y_new.toplist.menu.62 找到
    #  https://u.y.qq.com/cgi-bin/musics.fcg?-=getUCGI****， 修改即可下载
    # 热歌榜
    # url = "https://u.y.qq.com/cgi-bin/musics.fcg?-=getUCGI4061906502614365&g_tk=5381&sign=zzaqgitaptrt3c68c23599f05a73b8beed6c1e387cb55&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0&data=%7B%22detail%22%3A%7B%22module%22%3A%22musicToplist.ToplistInfoServer%22%2C%22method%22%3A%22GetDetail%22%2C%22param%22%3A%7B%22topId%22%3A26%2C%22offset%22%3A0%2C%22num%22%3A20%2C%22period%22%3A%222021_14%22%7D%7D%2C%22comm%22%3A%7B%22ct%22%3A24%2C%22cv%22%3A0%7D%7D"
    url = "https://u.y.qq.com/cgi-bin/musics.fcg?-=getUCGI22105661521063658&g_tk=5381&sign=zza4sgwo26nxligs809cad6fef7d9240750df6df27bc0296&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0&data=%7B%22detail%22%3A%7B%22module%22%3A%22musicToplist.ToplistInfoServer%22%2C%22method%22%3A%22GetDetail%22%2C%22param%22%3A%7B%22topId%22%3A128%2C%22offset%22%3A0%2C%22num%22%3A20%2C%22period%22%3A%222021_14%22%7D%7D%2C%22comm%22%3A%7B%22ct%22%3A24%2C%22cv%22%3A0%7D%7D"

    headers[\'Referer\'] = "https://y.qq.com/n/yqq/toplist/62.html"
    ie = requests.session()
    rep = ie.get(url, headers=headers)
    html = rep.content.decode(\'utf-8\')
    song_list = json.loads(html)[\'detail\'][\'data\'][\'songInfoList\']
    for song in song_list:
        song_mid = song[\'mid\']
        song_name = song[\'name\']
        album_mid = song[\'album\'][\'mid\']
        album_name = song[\'album\'][\'name\']
        singer = \'&\'.join([item[\'name\'] for item in song[\'singer\']])
        media_id = song[\'file\'][\'media_mid\']
        print(song_name)
        print(song_mid)
        print(album_mid)
        print(singer)
        print(media_id)

        get_key_url = "https://u.y.qq.com/cgi-bin/musicu.fcg?g_tk=5381&jsonpCallback=getplaysongvkey0996617262271613&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq&needNewCode=0&data=%7B%22req_0%22%3A%7B%22module%22%3A%22vkey.GetVkeyServer%22%2C%22method%22%3A%22CgiGetVkey%22%2C%22param%22%3A%7B%22guid%22%3A%228216405924%22%2C%22songmid%22%3A%5B%22" + song_mid + "%22%5D%2C%22songtype%22%3A%5B0%5D%2C%22uin%22%3A%220%22%2C%22loginflag%22%3A1%2C%22platform%22%3A%2220%22%7D%7D%2C%22comm%22%3A%7B%22uin%22%3A0%2C%22format%22%3A%22json%22%2C%22ct%22%3A20%2C%22cv%22%3A0%7D%7D"
        rep = ie.get(get_key_url, headers=headers)
        html = rep.content.decode(\'utf-8\')
        data = json.loads(html)
        if data[\'code\'] == 0:
            if data[\'req_0\'][\'code\'] == 0:
                midurlinfo_list = data[\'req_0\'][\'data\'][\'midurlinfo\']
                if len(midurlinfo_list) > 0:
                    filename = midurlinfo_list[0][\'filename\']  # C400002Fc5Be34LLWm.m4a
                    purl = midurlinfo_list[0][\'purl\']
                    vkey = midurlinfo_list[0][\'vkey\']

                    download_url = f\'http://183.131.48.150/amobile.music.tc.qq.com/{purl}\'
                    rep = ie.get(download_url, headers=headers, stream=True)
                    fname = f"{song_name}-{singer}-{filename}"
                    # 去除不能作为文件名的特殊字符
                    fname = re.findall(r\'[^\*"/:?\\|<>]\', fname, re.S)
                    fname = "".join(fname)
                    save_filepath = f\'{SAVE_ROOT_PATH}/qqmusic-20210411/{fname}\'
                    os.makedirs(osp.dirname(save_filepath), exist_ok=True)
                    if osp.exists(save_filepath) and osp.getsize(save_filepath) > 100 * 1024: continue

                    with open(save_filepath, \'wb\') as file:
                        for byte_data in rep.iter_content(1024):
                            file.write(byte_data)
                    print(\'《%s》 下载成功！\' % song_name)
                    time.sleep(2)

        # return
    print(\'Done.\')

# 获取歌手列表
# https://y.qq.com/portal/singer_list.html
def get_singer_list():
    url = "https://u.y.qq.com/cgi-bin/musicu.fcg?callback=getUCGI25738961582047115&g_tk=5381&jsonpCallback=getUCGI25738961582047115&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq&needNewCode=0&data=%7B%22comm%22%3A%7B%22ct%22%3A24%2C%22cv%22%3A10000%7D%2C%22singerList%22%3A%7B%22module%22%3A%22Music.SingerListServer%22%2C%22method%22%3A%22get_singer_list%22%2C%22param%22%3A%7B%22area%22%3A-100%2C%22sex%22%3A-100%2C%22genre%22%3A-100%2C%22index%22%3A-100%2C%22sin%22%3A0%2C%22cur_page%22%3A1%7D%7D%7D"
    headers[\'Referer\'] = "https://y.qq.com/portal/singer_list.html"
    ie = requests.session()
    rep = ie.get(url, headers=headers)
    html = rep.content.decode(\'utf-8\')[25:-1]
    singer_list = json.loads(html)[\'singerList\'][\'data\'][\'singerlist\']
    ls = []
    for singer in singer_list:
        singer_mid = singer[\'singer_mid\']
        singer_name = singer[\'singer_name\']
        singer_pic = singer[\'singer_pic\']
        ls.append({\'singer_mid\': singer_mid, "singer_name": singer_name, "singer_pic": singer_pic})
    return ls
    # print(ls)
    # exit()


# 获取专辑列表
def get_album_list(singer_mid=\'\'):
    url = "https://u.y.qq.com/cgi-bin/musicu.fcg?callback=getUCGI2613146679247198&g_tk=5381&jsonpCallback=getUCGI2613146679247198&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq&needNewCode=0&data=%7B%22singerAlbum%22%3A%7B%22method%22%3A%22get_singer_album%22%2C%22param%22%3A%7B%22singermid%22%3A%22" + singer_mid + "%22%2C%22order%22%3A%22time%22%2C%22begin%22%3A0%2C%22num%22%3A5%2C%22exstatus%22%3A1%7D%2C%22module%22%3A%22music.web_singer_info_svr%22%7D%7D"
    headers[\'Referer\'] = "https://y.qq.com/n/yqq/singer/" + singer_mid + ".html"
    ie = requests.session()
    rep = ie.get(url, headers=headers)
    html = rep.content.decode(\'utf-8\')[24:-1]
    ablum_list = json.loads(html)[\'singerAlbum\'][\'data\'][\'list\']
    ls = []
    for item in ablum_list:
        album_mid = item[\'album_mid\']
        album_name = item[\'album_name\']
        singer_mid = item[\'singer_mid\']
        singer_name = item[\'singer_name\']
        ls.append(
            {\'album_mid\': album_mid, \'album_name\': album_name, \'singer_mid\': singer_mid, \'singer_name\': singer_name})
    return ls


# 根据专辑ID下载
def download_music_by_albumid(albummid=\'\', singername=\'\'):
    # albummid = "001mTkmb4GJlh4"
    url = "https://c.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=" + albummid + "&g_tk=5381&jsonpCallback=albuminfoCallback&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq&needNewCode=0"
    headers[\'Referer\'] = "https://y.qq.com/portal/player.html"
    ie = requests.session()
    rep = ie.get(url, headers=headers)
    html = rep.content.decode(\'utf-8\')[19:-1]
    song_list = json.loads(html)[\'data\'][\'list\']
    for song in song_list:
        song_name = song[\'songname\']
        song_mid = song[\'songmid\']
        get_key_url = "https://u.y.qq.com/cgi-bin/musicu.fcg?callback=getplaysongvkey0996617262271613&g_tk=5381&jsonpCallback=getplaysongvkey0996617262271613&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq&needNewCode=0&data=%7B%22req_0%22%3A%7B%22module%22%3A%22vkey.GetVkeyServer%22%2C%22method%22%3A%22CgiGetVkey%22%2C%22param%22%3A%7B%22guid%22%3A%228216405924%22%2C%22songmid%22%3A%5B%22" + song_mid + "%22%5D%2C%22songtype%22%3A%5B0%5D%2C%22uin%22%3A%220%22%2C%22loginflag%22%3A1%2C%22platform%22%3A%2220%22%7D%7D%2C%22comm%22%3A%7B%22uin%22%3A0%2C%22format%22%3A%22json%22%2C%22ct%22%3A20%2C%22cv%22%3A0%7D%7D"
        rep = ie.get(get_key_url, headers=headers)
        html = rep.content.decode(\'utf-8\')[32:-1]
        data = json.loads(html)[\'req_0\'][\'data\']
        download_url_1 = data[\'sip\'][0]
        download_url_2 = data[\'midurlinfo\'][0][\'purl\']
        filename = data[\'midurlinfo\'][0][\'filename\']
        url = download_url_1 + download_url_2
        rep = ie.get(url, headers=headers, stream=True)
        with open(\'./qqmusic/%s\' % (song_name + "-" + singername + "-" + filename), \'wb\') as file:
            for byte_data in rep.iter_content(1024):
                file.write(byte_data)
        print(\'《%s》 下载成功！\' % song_name)
        time.sleep(2)


# 入口函数
if __name__ == \'__main__\':
    # 先获取歌手列表
    ls = get_singer_list()
    for singer in ls:
        # 获取歌手的专辑列表
        album_list = get_album_list(singer[\'singer_mid\'])
        singer_name = singer[\'singer_name\']
        # 下载专辑的所有歌曲
        for album in album_list:
            download_music_by_albumid(album[\'album_mid\'], singer_name)
            time.sleep(2)
    exit()