1,分析url,获取热门电影的url
豆瓣采用的市ajax动态加载技术,我们可以直接找到json数据的url
url = 'https://movie.douban.com/j/search_subjectstype=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=330&page_start=0'
1,先获取到json数据
def get_data(): url = 'https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=330&page_start=0' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36' } json_data = requests.get(url,headers = headers) #print(json_data.text) data = json_data.text json_data = json.loads(data) subjects = json_data['subjects'] result = [] for movie in subjects: row = { 'movie_rate': movie['rate'], 'movie_name': movie['title'], 'movie_url' : movie['url'] } result.append(row) return result
2,保存到数据库
def sava_data(data): config = { 'host':'127.0.0.1', 'port':3306, 'user':'root', 'password':'', 'database':'douban', 'charset':'utf8' } conn = pymysql.connect(**config) cursor = conn.cursor() sql = ''' insert into t_movie (movie_rate, movie_name, movie_url) values (%(movie_rate)s,%(movie_name)s,%(movie_url)s) ''' cursor.executemany(sql,data) conn.commit() cursor.close() conn.close()
3,创建数据库
先创建数据库,如何插入数据,插入语句为:
create table t_movie( id int primary key auto_increment, movie_rate varchar(200), movie_name varchar(200), movie_url varchar(200) ) engine=Innodb charset utf8;
4,完整代码
import json import requests import pymysql def get_data(): url = 'https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=330&page_start=0' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36' } json_data = requests.get(url,headers = headers) #print(json_data.text) data = json_data.text json_data = json.loads(data) subjects = json_data['subjects'] result = [] for movie in subjects: row = { 'movie_rate': movie['rate'], 'movie_name': movie['title'], 'movie_url' : movie['url'] } result.append(row) return result def sava_data(data): config = { 'host':'127.0.0.1', 'port':3306, 'user':'root', 'password':'', 'database':'douban', 'charset':'utf8' } conn = pymysql.connect(**config) cursor = conn.cursor() sql = ''' insert into t_movie (movie_rate, movie_name, movie_url) values (%(movie_rate)s,%(movie_name)s,%(movie_url)s) ''' cursor.executemany(sql,data) conn.commit() cursor.close() conn.close() if __name__ == '__main__': sava_data(get_data())
运行完后查看数据库