#导入相关库 import requests from bs4 import BeautifulSoup #浏览器标识头 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50" } # 获取该章节的内容 def get_fiction(url): r = (url, headers=headers) soup = BeautifulSoup(, '') page = ('div', class_='noveContent').text return page #获取小说链接、章节、名字 def get_links(index_url): links =[] titles =[] r = (url = index_url, headers=headers) soup = BeautifulSoup(, '') page = soup.find_all('div', class_='c_con_li_detail_p') for a in page: link = ['href'] # 这是取href title = ['title'] fic_url = "https:"+link (fic_url) (title) filename = ('div', class_='c_con_rl_title').text return links, titles, filename #下载小说名字、章节名字、章节内容、章节顺序 def download(fic_url, title, filename, path): content = get_fiction(fic_url) # 这里直接调用的第一步的函数,再将传入的fic_url传入 with open(f"{path}/{filename}.txt", 'a', encoding='utf-8') as f: (title + "\n\n") (content + "\n\n") ('--'*40 + "--") print(f'已下载{title}') if __name__ == '__main__': # 第三部,处理一些需要传入的信息 path = r'd:小说' # 存储路径 index_url = '/html_1217_1217177/' #小说目录页 links, titles, filename = get_links(index_url) # 将目录页返回出来的东西拿出来 for fic_url, title in zip(links, titles): # 将链接和标题压缩,同时遍历两个列表 download(fic_url, title, filename, path)
#小说未加密的爬取:
#1:提取小说的名字、章节的顺序、章节的名字、章节的内容
#2:下载小说,用合适的规则保存文章
#该页面在下载好库后,在d盘下建立一个小说 文件即可全部复制粘贴运用
提示!本文章仅供学习交流,严禁用于任何商业和非法用途,如有侵权,可联系本文作者删除!