python爬虫获取steam验证码

时间:2024-05-19 13:01:24

在尝试着写一个自动注册steam账号的程序,注册的时候需要验证码,
用pytesseract试着对steam验证码进行识别,但是结果很不理想,几乎没有正确的

得到的验证码结果,下一步尝试着训练个模型来识别验证码
python爬虫获取steam验证码
python爬虫获取steam验证码
python爬虫获取steam验证码

import requests
import time
import json

login_url ='https://store.steampowered.com/login'
refreshcaptcha_url = 'https://store.steampowered.com/join/refreshcaptcha/'
captcha_base_url = 'https://store.steampowered.com/login/rendercaptcha?'

req = requests.session()
headers1={
        'Referer': 'https://store.steampowered.com/join',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
          }

headers2 = {
        'Accept': 'text/javascript, text/html, application/xml, text/xml, */*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Content-Length': '7',
        'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'store.steampowered.com',
        'Origin': 'https://store.steampowered.com',
        'Referer': 'https://store.steampowered.com/join',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
        'X-Prototype-Version': '1.7',
        'X-Requested-With': 'XMLHttpRequest',
#        'Cookie':'browserid=1235586424952338655; steamCountry=CN%7Cbf00849a4506938104400d71a29f64be; sessionid=b86d35a5f5d5b57f3b730616; timezoneOffset=28800,0; _ga=GA1.2.321650414.1556974950; _gid=GA1.2.1323839835.1556974950',
        }

headers3={
        'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Host': 'store.steampowered.com',
        'Referer': 'https://store.steampowered.com/join',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
        }

#get sessionid browserid steamCountry
_ = req.get(url=login_url,headers=headers1)

cookies_dict = requests.utils.dict_from_cookiejar(req.cookies)
browserid = cookies_dict.get('browserid')
sessionid = cookies_dict.get('sessionid')
steamCountry = cookies_dict.get('steamCountry')

cookie = requests.cookies.RequestsCookieJar()
cookie.set('browserid',browserid)
cookie.set('sessionid',sessionid)
cookie.set('steamCountry',steamCountry)
cookie.set('browserid',browserid)
cookie.set('timezoneOffset','28800,0')
cookie.set('_ga','GA1.2.321650414.1556974950')
cookie.set('_gid','GA1.2.1323839835.1556974950')

def get_captcha_img(i):
    
    post_data = {'count':str(i)}
    #得到gid
    response = req.post(url=refreshcaptcha_url,headers=headers2,data=post_data,cookies=cookie)
    #print(response.json())
    gid = response.json().get('gid')
    print('\ngid\n',gid)
    captcha_url = captcha_base_url + 'gid=' +gid
    #根据gid来产生验证码
    img_content = req.get(url=captcha_url,headers=headers3,cookies=cookie).content   
    #保存验证码图片
    with open(r'C:\Users\Administrator\Desktop\captcha\{}.png'.format(i),'wb') as fn:
        fn.write(img_content)


for i in range(200):
    i=i+783
    get_captcha_img(i)
    time.sleep(2)                #设置为2或者更大的值,太小的时候很容易被服务器ban掉