继续爬豆瓣电影

时间:2022-07-25 16:57:27
#将都把电影排行榜中的所有类型的前十部,保存到文件中

from urllib import request,parse
import json
# base_url = "https://movie.douban.com/j/chart/top_list?"

# data = {
#
11:'剧情',24:'喜剧',5:'动作',13:'爱情',17:'科幻',25:'动画',
#
10:'悬疑',19:'惊悚',20:'恐怖',1:'纪录片',23:'短片',6:'情色',
#
26:'同性',14:'音乐',7:'歌舞',28:'家庭',8:'儿童',2:'传记',
#
4:'历史',22:'战争',3:'犯罪',27:'西部',16:'奇幻',15:'冒险',
#
12:'灾难',29:'武侠',30:'古装',18:'运动',31:'黑色电影',9:'',21:''
#
}

movie_title
= ''
for i in range(31):
if i == 2:
break
# print(i)
if i == 8 or i == 20:
pass
else:
i
= i + 1
base_url
= "https://movie.douban.com/j/chart/top_list?type=%d&interval_id=100%%3A90&action=&start=0&limit=10"%(i)
# print(base_url)
response = request .urlopen(base_url)
content
= response.read()
content
= content.decode('utf-8')
# print(content)
json_data = json.loads(content)
print(json_data)
# print(data[i])data[i]
# title = title + '\n' + data[i]
# movie_title = movie_title + '\n' + str(json_data[0]['types'])
# print(json_data[1]['types'])
k = 1
for j in json_data:
# print(j['title'])
movie_title = movie_title + '\n' \
+ str(k) + ':' + j['title'] + '\n' + \
'影片类型:' + str(j['types']) + '\n' + \
'上映时间:' + j['release_date'] + '\n' + \
'地区:' + str(j['regions'])
# print(title)
k += 1

with open(
'movie.html','w',encoding='utf-8') as f:
print(movie_title.strip())
f.write(movie_title.strip())