使用文档
import requests
import re
import random
from concurrent.futures import ThreadPoolExecutor
import time
start = time.time()
pool = ThreadPoolExecutor(12)
def get_proxy():
return requests.get(\'http://127.0.0.1:5010/get/\').json()
def delete_proxy(proxy):
requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))
url = \'https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=9&start=0\'
video_list=[]
ret = requests.get(url)
reg = \'<a href="(.*?)" class="vervideo-lilink actplay">\'
video_urls = re.findall(reg, ret.text)
print(video_urls)
for url in video_urls:
proxy = get_proxy().get(\'proxy\')
print(proxy)
try:
ret_detail = requests.get(\'https://www.pearvideo.com/\' + url, proxies={"http": "http://{}".format(proxy)})
print(proxy)
reg = \'srcUrl="(.*?)",vdoUrl=srcUrl\'
mp4_url = re.findall(reg, ret_detail.text)[0] # type:str
video_name = mp4_url.rsplit(\'/\', 1)[-1]
dic = {
\'v_name\': video_name,
\'v_url\': mp4_url
}
video_list.append(dic)
except Exception:
delete_proxy(proxy)
def get_video(dic):
url = dic[\'v_url\']
name = dic[\'v_name\']
print(f\'开始下载{name}\')
video_data = requests.get(url=url)
print(url)
with open(name, \'wb\') as f:
for line in video_data.iter_content():
f.write(line)
print(f\'{name}下载完成\')
end = time.time()
ctime = end - start
print(ctime)
print(video_list)
def main():
for url in video_list:
done = pool.submit(get_video, url)
if __name__ == \'__main__\':
main()
pool.shutdown(wait=True)