某多批发信息采集并写到mongdb

时间:2024-07-07 12:00:31
import asyncio
from playwright.async_api import async_playwright, BrowserType
import pymongo
import json
import gzip
import io
import time
import asyncio
from playwright.async_api import async_playwright, BrowserType
from asyncqt import QEventLoop
from PyQt5.QtWidgets import QApplication, QPushButton, QVBoxLayout, QWidget, QTextEdit
import sys

class BaseAsync:
    def __init__(self):
        self._playwright = None
        self._browser = None
        self._context = None
        self._pages = []  # 维护一个页面列表
        self.user_data_dir = 'C:/Users/Administrator/AppData/Local/Google/Chrome/User Data/Default'

        self.client = pymongo.MongoClient('localhost', 27017)
        self.db = self.client['pdd']
        self.collection_name=None


    async def setup_playwright(self):
        """Asynchronously setup Playwright instance."""
        self._playwright = await async_playwright().start()
        # 使用 launch_persistent_context 而不是 launch
        self._context = await self._playwright.chromium.launch_persistent_context(user_data_dir=self.user_data_dir,
                                                                                  headless=False)



    async def modify_navigator_properties(self):
        """
        Modify properties on the navigator object to avoid detection.
        This script will run on every new page created in the context.
        """
        script = """
           Object.defineProperty(navigator, 'webdriver', {
             get: () => undefined
           });
           """
        await self._context.add_init_script(script)

    asy