通过requests登录店小秘解决验证码问题

时间:2022-12-19 11:01:15

登录常用手段就是:

  • request.post请求

  • selenium等自动化工具

这里讲一下使用requests实现自动登录

通过requests登录店小秘解决验证码问题 现在网站登录基本上都有验证码,requests登录的难点在于将验证码与账号联系起来,这里是cookie和referer。


def login_dianxiaomi():

    """

    店小秘

    将code与账号联系起来的纽带就在cookie和referer,需要考虑登录主页,code获取和登录ajax三方

    """

    i = 0

    while i < 3:

        ts = int(time.time())

        homepage = f'https://www.dianxiaomi.com/index.htm?ts={ts}'  # 主页,这里作为后面的referer

        headers = {

            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',

            'Accept-Encoding': 'gzip, deflate, br',

            'Accept-Language': 'zh-CN,zh;q=0.9',

            'Cookie': '_dxm_ad_client_id=C480E8334FD936CCDEB3BA1AD1F98E49F; Hm_lvt_f8001a3f3d9bf5923f780580eb550c0b=1670383137,1670400951; '

                      'JSESSIONID=857D325C586104913A42E668792A8870; dxm_vc=NjlhZDZiOGYwMzk0YzdiYjRlOWJlY2M2OWJiMGY4Y2MhMTY3MDQwMzcxMzg2NQ; '

                      'Hm_lpvt_f8001a3f3d9bf5923f780580eb550c0b=1670403714',  # 直接网页复制获取

            'Host': 'www.dianxiaomi.com',

            'Referer': 'https://www.dianxiaomi.com/home.htm',

            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'

            }

        res = requests.get(homepage, headers=headers, timeout=30)

        cookie = res.cookies.items()

        jsessionid = cookie[0][1]

        print('homepage jsessionid:', jsessionid)

        # 获取验证码

        url = f'https://www.dianxiaomi.com/verify/code.htm?t={int(time.time() * 1000)}'

        headers = {'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',

                    'Accept-Encoding': 'gzip, deflate, br',

                    'Accept-Language': 'zh-CN,zh;q=0.9',

                    'Cookie': '_dxm_ad_client_id=C480E8334FD936CCDEB3BA1AD1F98E49F; Hm_lvt_f8001a3f3d9bf5923f780580eb550c0b=1670383137,1670400951;'

                              f'Hm_lpvt_f8001a3f3d9bf5923f780580eb550c0b=1670402174; JSESSIONID={jsessionid}; '  # 这里的jessionid来自主页cookie

                              'dxm_vc=NjlhZDZiOGYwMzk0YzdiYjRlOWJlY2M2OWJiMGY4Y2MhMTY3MDQwMzcxMzg2NQ',

                    'Host': 'www.dianxiaomi.com',

                    'Referer': f'https://www.dianxiaomi.com/index.htm?ts={ts}',

                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'

                  }

        res = requests.get(url, headers=headers, timeout=30)

        cookie = res.cookies.items()

        print('code cookie:', cookie)

        if len(cookie) == 1:

            i += 1

            print('code url cookie not right', f'try {i} times')

            continue

        code_jsessionid = cookie[0][1]

        code_dxm_vc = cookie[1][1]

        with open('dianxiaomi.png', 'wb') as f:

            f.write(res.content)

        # 图像二值化

        im = Image.open('dianxiaomi.png')

        img_gray = im.convert('L')

        img_gray.save('dianxiaomi.png')

        code = get_code('dianxiaomi.png')

        print('code:', code)

        login_url = 'https://www.dianxiaomi.com/user/userLoginNew2.json'

        headers = {

                    'Accept': 'application/json, text/javascript, */*; q=0.01',

                    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',

                    'X-Requested-With': 'XMLHttpRequest',

                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',

                    'Referer': f'https://www.dianxiaomi.com/index.htm?ts={ts}',

                    'Accept-Encoding': 'gzip, deflate, br',

                    'Accept-Language': 'zh-CN,zh;q=0.9',

                    'Cookie': '_dxm_ad_client_id=C480E8334FD936CCDEB3BA1AD1F98E49F; Hm_lvt_f8001a3f3d9bf5923f780580eb550c0b=1670383137,1670400951; '

                              f'Hm_lpvt_f8001a3f3d9bf5923f780580eb550c0b=1670402174; JSESSIONID={code_jsessionid}; '  # 这里的jessionid和dxm_vc来自请求code的cookie

                              f'dxm_vc={code_dxm_vc}'

                  }

        data = {

            'account': 'xx',

            'password': 'xxx',

            'dxmVerify': code,

            'loginVerifyCode': '',

            'remeber': 'remeber',

            'url': ''

        }

        response = requests.post(login_url, data=data, headers=headers, timeout=30)

        print(response.text)

        # if '有用户在其它地方,登录此账号' in response.text:

        #    print('有用户在其它地方,登录此账号')

        #    return

        res = response.json()

        if res['code'] == -1:

            i += 1

            print(res['error'], f'try {i} times')

            continue

        else:

            # 获取requests请求返回的cookie

            cookie = response.cookies.items()

            print(cookie)

            cookies = ''

            for c in cookie:

                cookies += c[0] + '=' + c[1] + ';'

            print(11, cookies)

            return cookies

欢迎关注,爬虫王者