代码如下
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
#encoding:utf-8
import requests
from lxml import etree
import xlwt
import os
# 爬取b站热门视频信息
def spider():
video_list = []
url = "https://www.bilibili.com/ranking?spm_id_from=333.851.b_7072696d61727950616765546162.3"
html = requests.get(url, headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36" }).text
html = etree.HTML(html)
infolist = html.xpath( "//li[@class='rank-item']" )
for item in infolist:
rank = " ".join(item.xpath(" . / div[@ class = 'num' ] / text()"))
video_link = " ".join(item.xpath(" . / / div[@ class = 'info' ] / a / @href"))
title = " ".join(item.xpath(" . / / div[@ class = 'info' ] / a / text()"))
payinfo = " ".join(item.xpath(" . / / div[@ class = 'detail' ] / span / text() ")).split(" 万")
play = payinfo[ 0 ] + "万"
comment = payinfo[ 1 ]
if comment.isdigit() = = False :
comment + = "万"
upname = " ".join(item.xpath(" . / / div[@ class = 'detail' ] / a / span / text()"))
uplink = "http://" + " ".join(item.xpath(" . / / div[@ class = 'detail' ] / a / @href"))
hot = " ".join(item.xpath(" . / / div[@ class = 'pts' ] / div / text()"))
video_list.append({
'rank' : rank,
'videolink' : video_link,
'title' : title,
'play' : play,
'comment' : comment,
'upname' : upname,
'uplink' : uplink,
'hot' : hot
})
return video_list
def write_Excel():
# 将爬取的信息添加到Excel
video_list = spider()
workbook = xlwt.Workbook() # 定义表格
sheet = workbook.add_sheet( "b站热门视频" ) # 添加sheet的name
xstyle = xlwt.XFStyle() # 实例化表格样式对象
xstyle.alignment.horz = 0x02 # 字体居中
xstyle.alignment.vert = 0x01
head = [ '视频名' , 'up主' , '排名' , '热度' , '播放量' , '评论数' ]
for h in range ( len (head)):
sheet.write( 0 , h, head[h], xstyle)
i = 1
for item in video_list:
# 向单元格(视频名)添加该视频的超链接
if '"' in item["title"]:
item["title"] = item["title"].split('"' )[ 1 ]
title_data = 'HYPERLINK("' + item["videolink "]+'" ; "'+item[" title "]+'" )' # 设置超链接
sheet.col( 0 ).width = int ( 256 * len (title_data) * 3 / 5 ) # 设置列宽
sheet.write(i, 0 , xlwt.Formula(title_data), xstyle)
name_data = 'HYPERLINK("' + item["uplink "]+'" ; "'+item[" upname "]+'" )'
sheet.col( 1 ).width = int ( 256 * len (name_data) * 3 / 5 )
sheet.write(i, 1 , xlwt.Formula(name_data), xstyle)
sheet.write(i, 2 , item[ "rank" ], xstyle)
sheet.write(i, 3 , item[ "hot" ], xstyle)
sheet.write(i, 4 , item[ "play" ], xstyle)
sheet.write(i, 5 , item[ "comment" ], xstyle)
i + = 1
# 如果文件存在,则将其删除
file = "b站热门视频信息.xls"
if os.path.exists( file ):
os.remove( file )
workbook.save( file )
if __name__ = = '__main__' :
write_Excel()
|
结果展示:
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://www.cnblogs.com/zhouzetian/p/12613930.html