背景:
无意间发现B站有个老师的课程特别好(python教学的视频),单位的网络限制了视频网站访问,所以尝试着去把视频下载保存起来,经过一段时间的研究终于完成代码的开发,大家有需要的可以搞下来,后续我会进行一个延伸优化,争取做到通过前端页面的视频名称输入作为爬取条件进行下载。
第一版;2021年02月23日
第一版;2021年02月24日
解决间歇不触发点击保存按钮的问题
#_author_=\'Lucky\';
#date: 2021/2/18
import time
from pywinauto import application
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
def chrome_options_setting(web_driver):
"""
设置 Chrome Browser 的下载前询问每个文件的保存位置选项为打开(true)
:param web_driver: 浏览器驱动
:return: None
"""
web_driver.get("chrome://settings/downloads")
time.sleep(2)
web_element = web_driver.find_element_by_xpath("//settings-ui")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
# shadowRoot 节点下不能使用 xpath 选择器
web_element = shadowRoot.find_element_by_id("container").find_element_by_id("main")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
web_element = shadowRoot.find_element_by_css_selector("settings-basic-page[role=\'main\']")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
web_element = shadowRoot.find_element_by_css_selector("settings-downloads-page")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
web_element = shadowRoot.find_element_by_css_selector("settings-toggle-button")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
result = shadowRoot.find_element_by_css_selector(
"#outerRow > cr-toggle[aria-describedby=\'sub-label-text\']").get_attribute("aria-pressed")
if result == "false":
shadowRoot.find_element_by_css_selector("#outerRow > cr-toggle[aria-describedby=\'sub-label-text\']").click()
def file_name_save(url, xpath):
bili_browser.get(url)
time.sleep(3)
file_name = bili_browser.find_element_by_xpath(xpath).text
print(file_name)
return file_name
def save_as_window(file_name):
app = application.Application().connect(title_re=u"另存为", class_name="#32770")
save_as_spec = app.window(title=u"另存为", class_name="#32770")
# print(save_as_spec.print_control_identifiers())
edit = save_as_spec["Edit"]
edit.set_text(file_name) # 第一种方法是直接设置edit的text
# edit.type_keys(file_name_save(), with_spaces=True) # 第二种是在里面模拟键盘输入(如果字符串中没有空格,可以省略后面的参数),殊途同归
while True:
time.sleep(1)
app[\'另存为\'][\'保存(&S)\'].click()
if app.window(title=u"另存为", class_name="#32770").exists() is False: break
def main_download(url):
bili_browser.get("https://xbeibeix.com/api/bilibili")
time.sleep(3)
bili_browser.find_element_by_xpath("//*[@placeholder=\'输入地址\']").clear()
time.sleep(1)
bili_browser.find_element_by_xpath("//*[@placeholder=\'输入地址\']").send_keys(url)
time.sleep(1)
bili_browser.find_element_by_id(\'button-1\').click()
time.sleep(2)
element2 = bili_browser.find_element_by_xpath("//a[contains(text(),\'MP4地址\')]")
ActionChains(bili_browser).key_down(u\'\ue00a\').click(element2).perform()
ActionChains(bili_browser).key_up(u\'\ue00a\')
if __name__ == "__main__":
bili_browser = webdriver.Chrome()
bili_browser.maximize_window()
# 设置Chrome浏览器下载前询问每个文件的保存位置选项
chrome_options_setting(web_driver=bili_browser)
time.sleep(2)
for index in range(456, 634):
video_url = \'https://www.bilibili.com/video/BV197411G75w?p=\' + str(index)
file_name_xpath = \'//*[@id="multi_page"]/div[2]/ul/li[%d]/a/div/div[1]/span[2]\' % index
video_name = file_name_save(url=video_url, xpath=file_name_xpath)
print(file_name_xpath)
# 处理Windows10 系统“另存为”窗口,并修改保存文件名称
main_download(video_url)
time.sleep(1)
save_as_window(file_name=video_name)
time.sleep(3)
time.sleep(5)
bili_browser.quit()