Python爬虫--爬取哔哩哔哩(B站)短视频平台视频

时间:2025-02-20 07:56:06
import os import requests import json import re from bs4 import BeautifulSoup import subprocess from detail_video import video_bvid # video_bvid 是一个从外部得到的单个视频ID video_bvid = 'your-single-bvid' class BilibiliVideoAudio: def __init__(self, bvid): = bvid = { "referer": "/all?keyword=%E4%B8%BB%E6%92%AD%E8%AF%B4%E8%81%94%E6%92%AD&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page=4&o=90", "origin": "", 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0', 'Accept-Encoding': 'gzip, deflate, br' } def get_video_audio(self): # 构造视频链接并发送请求获取页面内容 url = f'/video/{}/?spm_id_from=333.&vd_source=14378ecd144bed421affe1fe0ddd8981' content = (url, headers=).('utf-8') soup = BeautifulSoup(content, '') # 获取视频标题 meta_tag = ('meta', attrs={'name': 'title'}) title = meta_tag['content'] # 获取视频和音频链接 pattern = r'window\.__playinfo__=({.*?})\s*</script>' json_data = (pattern, content)[0] data = (json_data) video_url = data['data']['dash']['video'][0]['base_url'] audio_url = data['data']['dash']['audio'][0]['base_url'] return { 'title': title, 'video_url': video_url, 'audio_url': audio_url } def download_video_audio(self, url, filename): # 对文件名进行清理,去除不合规字符 filename = self.sanitize_filename(filename) try: # 发送请求下载视频或音频文件 resp = (url, headers=).content download_path = ('D:\\video', filename) # 构造下载路径 with open(download_path, mode='wb') as file: