抖音web爬虫【滑块验证码解决方法】

时间:2024-11-08 07:58:46

文章内容仅供参考学习,如有侵权请联系作者进行删除


实现过程:

1、通过node下载滑块的图片

2、通过python的cv2视觉识别图对图片进行定位位置

3、通过获取的位置再通过node的控制鼠标自动移动,已验证识别精准度为1/5左右

代码:

//nodejs
const puppeteer = require('puppeteer')
const request = require('request')
const fs = require('fs')
const execSync = require('child_process').execSync

async function run(){

    let options={
        args: ['--no-sandbox'],
        headless: true,
    }
    //返回浏览器实例
    let browser=await (options);
    //创建新页面,并返回页面对象
    let page=await ();

    //进入指定页面
    await ("/hot");

    await sleep(3000)

    //下载滑块图片
    const backgroundSrc = await page.$eval('#captcha-verify-image', el => );
    const slideSrc = await page.$eval('.captcha_verify_img_slide', el => );
    await downloadImg(backgroundSrc, "./python/")
    await downloadImg(slideSrc, "./python/")

    //识别移动位置
    await sleep(2000)

    //文件则是下面python代码
    const output = execSync('python python/'); 
    let moveSize = parseInt(());

    //cv2视觉机器学习
    if(moveSize > 200){
        moveSize = moveSize-65;
    }else if(moveSize > 150){
        moveSize = moveSize-25;
    }else if(moveSize <= 70){
        moveSize = moveSize+30;
    }else if(moveSize < 100){
        moveSize = moveSize+10;
    }else if(moveSize < 134){
        moveSize = moveSize+30;
    }
    //(moveSize)


    //开始滑动
    const element = await page.$('#secsdk-captcha-drag-wrapper>div:nth-child(2)');
    const size = await ();
    await (, );//鼠标移动到滑块的的位置上方
    await ();//按下鼠标
    await (+moveSize+20, , {steps:200});//鼠标向右移动滑块,y坐标不变,x坐标取值1000
    await ();//松开鼠标

    //获取cookie
    await sleep(3000)
    await ("<script>...</script>")
    let cookie = await (() => );


    //此处则拿到能正常请求的cookie,END
    ('cookie: '+cookie);
    ();

}

async function downloadImg(src, path) {
  return new Promise(async function (resolve, reject) {
    let writeStream = (path);
    let readStream = await request(src);
    await (writeStream);
    ("end", function () {
    });
    ("error", function () {
    });
    ("finish", function () {
      ();
      resolve();
    });
  });
}

function sleep(ms){
    return new Promise(resolve=>{
        setTimeout(resolve,ms)
    })
}

run();

---------------------------------------------------------------------------------------------------------------------------------

//python
# coding=UTF-8

import cv2
import sys

def show(name):
    # 展示圈出来的位置
    ('Show', name)
    (0)
    ()


def _tran_canny(image):
    # 消除噪声
    image = (image, (3, 3), 0)
    return (image, 50, 150)


def detect_displacement(img_slider_path, image_background_path):
    # detect displacement
    # # 参数0是灰度模式
    image = (img_slider_path, 0)
    template = (image_background_path, 0)

    # 寻找最佳匹配
    res = (_tran_canny(image), _tran_canny(template), cv2.TM_CCOEFF_NORMED)
    # 最小值,最大值,并得到最小值, 最大值的索引
    min_val, max_val, min_loc, max_loc = (res)

    top_left = max_loc[0]  # 横坐标
    # 展示圈出来的区域
    x, y = max_loc  # 获取x,y位置坐标

    w, h = [::-1]  # 宽高
    (template, (x, y), (x + w, y + h), (7, 249, 151), 2)
    #show(template)
    return top_left-w

if __name__ == '__main__':
    top_left = detect_displacement([0]+"/", [0]+"/")
    print(top_left)