根据业务摸索出的一个selenium代码模版（python）

前言

总算入行上班几个月了，不得不说业务是真的不消停啊。。

本人工作上经常遇到一种场景：为甲方做自动化接口处理工具，登录需要短信验证码，，

嘛算是摸索出了一套selenium代码模板，主要解决如下痛点

会话超时/断开时，又要找甲方问短信等验证码登录
调试途中增减修改功能，算是调试中热更新

分享一下

模板代码

app.py

#!/usr/bin/python

# -*- coding: utf-8 -*-

import os

import importlib

from selenium import webdriver

from selenium.webdriver import ActionChains

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.support import expected_conditions as EC

import backend

basepath = os.path.abspath(os.path.dirname(__file__))

driver_path = os.path.join(basepath, 'chromedriver.exe')

logger = backend.logger

def init_browser(driver_path=None):

    options = webdriver.ChromeOptions()

    options.add_argument('--no-sandbox')

    options.add_argument('--disable-gpu')

    prefs = {

        'profile.default_content_setting_values': {

            'notifications': 2

        }}

    options.add_experimental_option('prefs', prefs)

    options.add_experimental_option('excludeSwitches', ['enable-automation'])

    options.add_experimental_option("useAutomationExtension", False)

    browser = webdriver.Chrome(options=options, executable_path=driver_path)

    browser.maximize_window()

    browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {

        "source": """

        Object.defineProperty(navigator, 'webdriver', {

          get: () => undefined

        })

      """

    })

    return browser

def jump_security(wait, mouse):

    wait.until(EC.presence_of_element_located((By.ID, 'details-button'))).click()

    ele = wait.until(EC.presence_of_element_located((By.ID, 'proceed-link')))

    mouse.move_to_element(ele).click().perform()

def init_login(driver, wait, mouse):

    username_inp = wait.until(EC.presence_of_element_located((By.ID, "username")))

    username_inp.send_keys("user")

    password_inp = driver.find_element_by_id("password")

    password_inp.send_keys("password")

class App(object):

    def __new__(cls, *args, **kwargs):

        if not hasattr(cls, '_instance'):

            cls.error_num = 0

            cls.driver = init_browser(driver_path)

            cls.wait = WebDriverWait(cls.driver, 20)

            cls.mouse = ActionChains(cls.driver)

            cls.driver.get('https://www.target.com/login')

            # jump_security(cls.wait, cls.mouse)

            init_login(cls.driver, cls.wait, cls.mouse)

            cls._instance = object.__new__(cls)

        return cls._instance

# 模式1：client无限循环

def run_unlimited():

    while True:

        try:

            obj = App()

            input('等待登录并进入目标页面后，回此处按回车 >>> ')

            back = backend.Backend(obj)

            results = back.main()

        except Exception as e:

            pass

        finally:

            mode = input('供backend修改的阻塞暂停')

            importlib.reload(backend)

# 模式2：构建本地api服务

from flask import Flask

app = Flask(__name__)

@app.route("/", methods=["GET"])

def main():

    importlib.reload(backend)

    back = backend.Backend(App())

    results = back.main()

if __name__ == '__main__':

    os.system('taskkill /im chromedriver.exe /F')	# win专用，清残留进程

    os.system('taskkill /im chrome.exe /F')

    run_unlimited()

    # app.run()

前端有两部分，一是单例的selenium，二是此自动化处理工具的形式：client循环形式 / api服务形式

单例的 _new_ 里init一些属性，处理登录那部分也可以放后台
两种形式其实就是看形式是要主动触发还是被动触发，至于具体做什么就放后台

backend.py

#!/usr/bin/python

# -*- coding: utf-8 -*-

import json

import os

import re

from concurrent.futures import ThreadPoolExecutor, as_completed

import requests

import simplejson

from loguru import logger

from retry import retry

from tqdm import tqdm, trange

import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

basepath = os.path.abspath('./')

logger.add(f'{basepath}/logs/{os.path.basename(__file__)[:-3]}.log',

           format="{level} | {time:YYYY-MM-DD HH:mm:ss} | {function}:{line} - {message}",

           level="INFO", retention='5 days')

class Backend(object):

    def __init__(self, obj):

        self.sess = requests.session()

        self.driver = obj.driver

        self.sess.headers = {'Accept': 'application/json, text/javascript, */*; q=0.01',

                             'Accept-Encoding': 'gzip, deflate',

                             'Accept-Language': 'zh-Hans-CN, zh-Hans; q=0.5',

                             'Cache-Control': 'no-cache',

                             'Connection': 'Keep-Alive',

                             'Content-Length': '561',

                             'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',

                             'Cookie': 'SESSION=abcdefg',

                             'Host': 'www.target.com',

                             'Referer': 'https://www.target.com/path',

                             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',

                             'X-Requested-With': 'XMLHttpRequest'

                             }

    def get_cookie(self):

        self.driver.find_element_by_xpath('//input[@class="e.g:trigger btn"]').click()

        cookies = {_["name"]: _["value"] for _ in self.driver.get_cookies()}

        return cookies

    def get_headers(self):

        cookies = self.get_cookie()

        token = self.driver.execute_script('return window.sessionStorage.getItem("token")')

        self.sess.headers.update({

            'Authorization': token,

            'Cookie': f'SESSION={cookies["SESSION"]}; acw_tc={cookies["acw_tc"]}'

        })

    @retry((json.decoder.JSONDecodeError, simplejson.errors.JSONDecodeError, KeyError, ValueError), tries=3, delay=1)

    def do_api(self):

        url = 'https://www.target.com/api/path'

        payload = {

            'params': '31b1xu0',

        }

        self.get_headers()

        resp = self.sess.post(url, json=payload, verify=False, timeout=10)

        if resp.status_code == 200:

            self.pre_api_task(resp.json())  # do what you need todo

        else:

            raise ValueError(f'do_api failed:: {resp.text}')

    def do_selenium_command(self):

        self.driver.execute_script("$('p[class=imgShow]').click()")

        self.driver.execute_script("document.getElementsByClassName('supportRadioOptional1 checked')[0].click();")

        pagenum = int(re.search(r'共 (\d+) 页', self.driver.page_source).group(1))

        for _ in trange(pagenum, ncols=40):

            self.pre_page_task()			# do what you need todo

            self.driver.execute_script(f"PaginationpageTable.gotoPage('next', '{_+2}', '50');")

    def main(self):

        self.do_selenium_command()

        self.do_api()

if __name__ == '__main__':

    requests.get('127.0.0.1:5000')

基于前面说的短信验证码，让甲方登录后selenium一顿操作就把api的headers补完了，可以愉快地请求接口了

需要js取参数的话可以这样写token = self.driver.execute_script('return window.sessionStorage.getItem("token")')

目前遇到的一些注意点：

渲染的页面带frame，需要switch_to再xpath等处理，可把driver.page_source写进文件判断是否该目标页顺带测定位
有时driver.find_element_by_*无法定位，试试用js；有些JS/Jquery功能在老版IE上用不了，回用mouse处理（套娃呢喂）；连续使用js时要注意响应等待时间

basepath处用'./'取巧了一下(与pyinstaller打包有关)，可以基于此变量做一些本地文件处理

Last

毕竟最终是为甲方做的，程序要以甲方设备为准即使它是win7,用pywin32定位句柄出现兼容问题即使业务网站只兼容IE内核，js部分功能无法用头发掉光了啊

毕竟是个人摸索出的，可能有更优解，如大佬路过还请不要吝啬交(p)流(y)一下心得

秒客网

根据业务摸索出的一个selenium代码模版（python）

前言

模板代码

app.py

backend.py

Last

相关文章