豆瓣电影爬虫

时间:2022-12-22 12:31:08
import requests

class DoubanSpider(object):
    def __init__(self):
        self.url='https://movie.douban.com/j/chart/top_list?'
        self.headers = {
            "Accept": '*/*',
            "Accept-Encoding": 'gzip, deflate, br',
            "Accept-Language": 'zh-CN,zh;q=0.9',
            "Connection": 'keep-alive',
            "Cookie": 'bid=8-nCfhrghnU; __yadk_uid=YRJL4YjOgUBe7yEo3opPsDvJqnarI2oM; trc_cookie_storage=taboola%2520global%253Auser-id%3D8fc3589f-2abb-45b3-b21f-dabdd8ad9733-tuct3e78214; ll="108309"; ap_v=0,6.0; __utma=30149280.2076664567.1559562682.1559562682.1563870087.2; __utmb=30149280.0.10.1563870087; __utmc=30149280; __utmz=30149280.1563870087.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utma=223695111.1070403850.1559562682.1559562682.1563870087.2; __utmb=223695111.0.10.1563870087; __utmc=223695111; __utmz=223695111.1563870087.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1563870089%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D12rRCXsSrd1oLSL7dm_tBhgHaysfIxjCzkJqy6_F4kEL-HNdTUAh3Z6A-WLXShEt%26wd%3D%26eqid%3Dae785c100002a33c000000025d36c37d%22%5D; _pk_ses.100001.4cf6=*; _pk_id.100001.4cf6=1ee68b08630bb7f8.1559562681.2.1563872988.1559562681.',
            "Host": 'movie.douban.com',
            "Referer": 'https://movie.douban.com/typerank?type_name=%E7%88%B1%E6%83%85&type=13&interval_id=100:90&action=',
            "User-Agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
            "X-Requested-With": 'XMLHttpRequest',
        }
        #请求+解析
    def get_film_info(self,params):
        html_json=requests.get(
            url=self.url,
            headers=self.headers,
            params=params
        ).json()

        for film in html_json:
            #名称
            name=film['title']
            #评分
            score=film['score']
            print(name,score)
    def main(self):
        tp = input('请输入类型')
        num = input('请输入电影数量')
        params={
        'type': str(tp),
        'interval_id': '100:90',
        'action':'',
        'start': '0',
       'limit':str(num),
        }

        self.get_film_info(params)

if __name__ == '__main__':
    spider=DoubanSpider()
    spider.main()