import re
import pymongo
import requests
headers = {
# 请求工具标识
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (HTML, like Gecko) "
"Chrome/127.0.0.0 Safari/537.36"
}
url = '/top/?spm=C28340.PdNvWY0LYxCP.EtmP5mypaGE4.11'
res = (url, headers=headers)
con = ("utf8")
datas = (r'<ul>.*?</ul>', con, )
result = {
"热播榜": {
"name": "热播榜",
"items": []
},
"动画片": {
"name": "动画片",
"items": []
},
"电视剧": {
"name": "电视剧",
"items": []
},
"纪录片": {
"name": "纪录片",
"items": []
},
"特别节目": {
"name": "特别节目",
"items": []
}
}
# print(datas[1])
items = (
r'<li.*?lazy="(.*?)".*?<div class="text"><a href=".*?" target="_blank">(.*?)</a>'
r'</div>.*?<div class="column"><i class="icon_l"></i><a href=".*?" target="_blank">(.*?)</a>'
r'<i class="icon_r"></i></div>.*?</li>',
datas[1], )
for item in items:
# print(item)
result["热播榜"]["items"].append({
"img": item[0],
"title": item[1],
"category": item[2]
})
# pass
# print(datas[2])
items = (
r'<li.*?lazy="(.*?)" width="188" height="250"></a>.*?<span class="number"><i class="icon_l">'
r'</i><i class="txt">(.*?)</i><i class="icon_r"></i></span>.*?</div>.*?'
r'<a class="cover" href=".*?" target="_blank">.*?</a>.*?<div class="text">'
r'<a href=".*?" target="_blank">(.*?)</a></div>.*?<p><a href=".*?" target="_blank">(.*?)</a></p>.*?</li>'
, datas[2],
)
for item in items:
# print(item)
result["动画片"]["items"].append({
"img": item[0],
"title": item[2],
"category": item[1],
"synopsis": item[3]
})
# pass
# print(datas[3])
items = (
r'<li.*?lazy="(.*?)" width="188" height="250"></a>.*?<span class="number">'
r'<i class="icon_l"></i><i class="txt">(.*?)</i><i class="icon_r"></i></span>.*?</div>.*?'
r'<a class="cover" href=".*?" target="_blank">.*?</a>.*?<div class="text"><a href=".*?" target="_blank">(.*?)</a>'
r'</div>.*?<p><a href=".*?" target="_blank">(.*?)</a></p>.*?</li>'
, datas[3],
)
for item in items:
# print(item)
result["电视剧"]["items"].append({
"img": item[0],
"title": item[2],
"episode": item[1],
"synopsis": item[3]
})
# pass
# print(datas[4])
items = (
r'<li.*?lazy="(.*?)" width="188" height="250"></a>.*?<span class="number">'
r'<i class="icon_l"></i><i class="txt">(.*?)</i><i class="icon_r"></i>'
r'</span>.*?</div>.*?<a class="cover" href=".*?" target="_blank">.*?</a>.*?<div class="text">'
r'<a href=".*?" target="_blank">(.*?)</a></div>.*?<p><a href=".*?" target="_blank">(.*?)</a></p>.*?</li>'
, datas[4],
)
for item in items:
# print(item)
result["纪录片"]["items"].append({
"img": item[0],
"title": item[2],
"category": item[1],
"synopsis": item[3]
})
# pass
# print(datas[5])
items = (
r'<li.*?lazy="(.*?)" width="188" height="250"></a>.*?<span class="number">'
r'<i class="icon_l"></i><i class="txt">(.*?)</i><i class="icon_r"></i></span>.*?</div>.*?'
r'<a class="cover" href=".*?" target="_blank">.*?</a>.*?<div class="text"><a href=".*?" target="_blank">(.*?)</a>'
r'</div>.*?<p><a href=".*?" target="_blank">(.*?)</a></p>.*?</li>'
, datas[5],
)
for item in items:
# print(item)
result["特别节目"]["items"].append({
"img": item[0],
"title": item[2],
"tv": item[1],
"synopsis": item[3]
})
# pass
# print(result)
client = ()
db = client.get_default_database("cctv")
collection = db.get_collection("top")
collection.insert_one(result)
()