【原创】抖音个人视频列表采集

时间:2021-09-25 00:38:52

<table><tr><td bgcolor=orange>本文所有教程及源码、软件仅为技术研究。不涉及计算机信息系统功能的删除、修改、增加、干扰,更不会影响计算机信息系统的正常运行。不得将代码用于非法用途,如侵立删!</td></tr></table>


抖音个人视频列表采集

环境

  • win10、mac
  • Python3.9

2023.3.3更新

现在需要效验ck中的tk参数,特此更新一下 【原创】抖音个人视频列表采集

获取所需参数

    def get_cookie(self):
        count = 10
        while count:
            try:
                session = requests.session()
                h = session.get(url, verify=False, allow_redirects=False, timeout=20).headers
                x_vc_bdturing_parameters = h.get('x-vc-bdturing-parameters')
                if not x_vc_bdturing_parameters:
                    count -= 1
                    logger.info(f'提取:x_vc_bdturing_parameters 失败,重试!')
                    time.sleep(random.randint(3, 5))
                    continue
                verify_data = json.loads(base64.b64decode(h.get('x-vc-bdturing-parameters')).decode("utf-8"))
                fp = verify_data.get("fp")
                detail = verify_data.get("detail")
                logger.info(f"成功提取:{fp}, 开始验证")
                try:  # 有几率报错,报错重试
                    msg = Verify().verify(fp, detail)
                    logger.info(msg)
                except Exception as e:
                    logger.info(f"{e}")
                    continue
                if msg.get('code') != 200:
                    logger.info(f"{msg.get('message')},重试")
                    continue
                logger.info(f"ck s_v_web_id:{fp}, {msg.get('message')}")
                s_v_web_id = f's_v_web_id={verify_data.get("fp")};'
                self.cookie = s_v_web_id
                return
            except Exception as e:
                logger.info(f'提取:x_vc_bdturing_parameters 出错:{e}')
                time.sleep(random.randint(3, 5))
                continue

根据作者id获取所有视频

    def get_aweme_list(self, sec_user_id):
        """
        根据作者ID获取所有视频ID
        """
        headers = {
            "accept": "application/json, text/plain, */*",
            "accept-language": "zh-CN,zh;q=0.9",
            "bd-ticket-guard-client-csr": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURSBSRVFVRVNULS0tLS0NCk1JSUJEekNCdFFJQkFEQW5NUXN3Q1FZRFZRUUdFd0pEVGpFWU1CWUdBMVVFQXd3UFltUmZkR2xqYTJWMFgyZDENCllYSmtNRmt3RXdZSEtvWkl6ajBDQVFZSUtvWkl6ajBEQVFjRFFnQUVWdHJwOUhyOTdwRCttcGVxcTZIZzBUanUNCnJQRVpGSVQzajBTUGFQNGVGaXRzeHU5U3U2ZWJFWHVDNDVlYkMxbExFVlBGVXNPZFF6TWlsTjFmWThDdlZxQXMNCk1Db0dDU3FHU0liM0RRRUpEakVkTUJzd0dRWURWUjBSQkJJd0VJSU9kM2QzTG1SdmRYbHBiaTVqYjIwd0NnWUkNCktvWkl6ajBFQXdJRFNRQXdSZ0loQU5WOWlTOUVzVGszem5KOFprTDVNKzNZTk11NTRRNnF6Qm5kUy9Yd1Y3b1INCkFpRUFxbERLTkcrcUMyMjBBQ1B2Z1IrVlI2VWh3RXhUOEZTS0N2LzU2clBrMmNzPQ0KLS0tLS1FTkQgQ0VSVElGSUNBVEUgUkVRVUVTVC0tLS0tDQo=",
            "bd-ticket-guard-version": "2",
            "cache-control": "no-cache",
            "pragma": "no-cache",
            "referer": "https://www.douyin.com/",
            "sec-ch-ua": "^\\^Chromium^^;v=^\\^110^^, ^\\^Not",
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": "^\\^Windows^^",
            "sec-fetch-dest": "empty",
            "sec-fetch-mode": "cors",
            "sec-fetch-site": "same-origin",
            "cookie": self.cookie,
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
        }
        params = {
            "aid": "6383",
            "channel": "channel_pc_web",
            "sec_user_id": sec_user_id,
            "max_cursor": int(time.time()) * 1000,
            "locate_item_id": "7064150578586193188",
            "locate_query": "false",
            "show_live_replay_strategy": "1",
            "count": "100",
            "publish_video_strategy_type": "2",
            "pc_client_type": "1",
            "version_code": "170400",
            "version_name": "17.4.0",
            "cookie_enabled": "true",
            "screen_width": "1680",
            "screen_height": "1050",
            "browser_language": "zh-CN",
            "browser_platform": "Win32",
            "browser_name": "Chrome",
            "browser_version": "108.0.0.0",
            "browser_online": "true",
            "engine_name": "Blink",
            "engine_version": "108.0.0.0",
            "os_name": "Windows",
            "os_version": "10",
            "cpu_core_num": "4",
            "device_memory": "8",
            "platform": "PC",
            "downlink": "10",
            "effective_type": "4g",
            "round_trip_time": "100",
            "msToken": ""
        }

        x_b = self._get_xb(params=params)
        print(x_b)
        params['X-Bogus'] = x_b
        response = self._parse_url(url, headers=headers, params=params)
        # print(response.text)
        # logger.info(response.text)
        aweme_list = response.json().get('aweme_list')
        if not aweme_list:
            logger.info(f'获取失败:{sec_user_id}')
            return None
        aweme_ids = [i.get('aweme_id') for i in aweme_list]
        descs = [i.get('desc') for i in aweme_list]
        logger.info(f'成功获取{sec_user_id}视频ID列表:{len(aweme_ids)}条')
        for i in zip(aweme_ids, descs):
            print(i)

效果

【原创】抖音个人视频列表采集


<table><tr><td bgcolor=orange>本文仅供学习交流使用,如侵立删!</td></tr></table>