爬取网易云音乐飙升榜(100首)
目的
- 学习request模块的使用
- 练习爬虫的机制
- 仅学习使用,不做非法事情
爬取页面
代码
import requests
from lxml import etree
import time
class DownLoad(object):
def __init__(self):
self.url = \'https://music.163.com/discover/toplist\'
self.headers = {\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64)\',
\'referer\': \'https://music.163.com/\'}
self.song_list = []
self.url_list = []
def project(self):
self.getresponse()
self.downsong()
def getresponse(self):
response = requests.get(url=self.url, headers=self.headers)
html_page = etree.HTML(response.content.decode())
song_info_list = html_page.xpath(\'//div/ul[@class="f-hide"]/li\')
self.getdownurl(song_info_list)
def getdownurl(self, url):
for item in url:
song_player_url = item.xpath("./a/@href")[0]
song_name = item.xpath("./a/text()")[0]
song_id = song_player_url.split(\'=\')[1]
download_url = \'http://music.163.com/song/media/outer/url?id=\' + str(song_id)
self.song_list.append(song_name)
self.url_list.append(download_url)
def downsong(self):
while True:
print(\'编号\t\t\', \'名称\')
self.display()
nums = int(input(\'请输入歌曲编号(退出请按:0): \'))
try:
if nums == 0:
break
else:
with open(\'./musics/{}\'.format(self.song_list[nums - 1]) + \'.mp3\', mode=\'wb\') as f:
f.write(requests.get(url=self.url_list[nums - 1], headers=self.headers).content)
print(\'{}\t\t\t下载成功......\'.format(self.song_list[nums - 1]))
time.sleep(3)
except Exception:
print(\'请输入有效数字编号......\')
time.sleep(3)
def display(self):
for item in self.song_list:
print(self.song_list.index(item) + 1, \'\t\t\', item)
if __name__ == \'__main__\':
down = DownLoad()
down.project()
结果演示
代码升级
import requests
from bs4 import BeautifulSoup
import threading
class Music(object):
def __init__(self):
self.url = \'https://music.163.com/discover/toplist\'
self.headers = {\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64)\'}
self.threads = []
def get_song(self):
response = requests.get(self.url, headers=self.headers).text
self.soup = BeautifulSoup(response, \'html.parser\')
song_ul_list = self.soup.find_all(\'ul\', class_=\'f-hide\')
for song_ul in song_ul_list:
song_a_list = song_ul.find_all(\'a\')
for song_a in song_a_list:
self.get_song_url(song_a[\'href\'], song_a.get_text())
for thread in self.threads:
thread.start()
def get_song_url(self, song_url, song_name):
song_id = song_url.split(\'=\')[1]
play_url = \'http://music.163.com/song/media/outer/url?id=\' + song_id
self.threads.append(threading.Thread(target=self.download_song, args=(play_url, song_name)))
def download_song(self, song_url, song_name):
response = requests.get(song_url, headers=self.headers).content
with open(f\'./music/{song_name}.mp3\', \'wb\') as f:
f.write(response)
if __name__ == \'__main__\':
app = Music()
app.get_song()