Spaces:

hkfires
/

AIStudioBuildWS

Running

App Files Files Community

hkfires commited on 21 days ago

Commit

3085164

verified ·

1 Parent(s): f7cd5e8

Upload 10 files

Browse files

Files changed (10) hide show

Dockerfile +31 -0
browser/instance.py +208 -0
browser/navigation.py +57 -0
main.py +235 -0
requirements.txt +33 -0
utils/common.py +52 -0
utils/cookie_handler.py +97 -0
utils/cookie_manager.py +201 -0
utils/logger.py +42 -0
utils/paths.py +32 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+# 使用一个轻量的 Python 官方镜像作为基础
+FROM python:3.11-slim-bookworm
+# 设置工作目录，后续的命令都在这个目录下执行
+WORKDIR /app
+# 安装运行 Playwright 所需的最小系统依赖集
+# 在同一层中清理 apt 缓存以减小镜像体积
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libatk1.0-0 libatk-bridge2.0-0 libcups2 libdbus-1-3 libdrm2 libgbm1 libgtk-3-0 \
+    libnspr4 libnss3 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxdamage1 \
+    libxext6 libxfixes3 libxrandr2 libxrender1 libxtst6 ca-certificates \
+    fonts-liberation libasound2 libpangocairo-1.0-0 libpango-1.0-0 libu2f-udev xvfb \
+    && rm -rf /var/lib/apt/lists/*
+# 拷贝并安装 Python 依赖
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# 下载 camoufox
+RUN camoufox fetch
+# 将项目中的所有文件拷贝到工作目录
+COPY . .
+# 暴露 Hugging Face Spaces 期望的端口（仅在服务器模式下使用）
+EXPOSE 7860
+# 设置容器启动时要执行的命令
+CMD ["python", "main.py"]

browser/instance.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import os
+from playwright.sync_api import TimeoutError, Error as PlaywrightError
+from utils.logger import setup_logging
+from utils.cookie_manager import CookieManager
+from browser.navigation import handle_successful_navigation
+from camoufox.sync_api import Camoufox
+from utils.paths import logs_dir
+from utils.common import parse_headless_mode, ensure_dir
+def run_browser_instance(config):
+    """
+    根据最终合并的配置，启动并管理一个单独的 Camoufox 浏览器实例。
+    使用CookieManager统一管理cookie加载，避免重复的扫描逻辑。
+    """
+    cookie_source = config.get('cookie_source')
+    if not cookie_source:
+        # 使用默认logger进行错误报告
+        logger = setup_logging(os.path.join(logs_dir(), 'app.log'))
+        logger.error("错误: 配置中缺少cookie_source对象")
+        return
+    instance_label = cookie_source.display_name
+    logger = setup_logging(
+        os.path.join(logs_dir(), 'app.log'), prefix=instance_label
+    )
+    diagnostic_tag = instance_label.replace(os.sep, "_")
+    expected_url = config.get('url')
+    proxy = config.get('proxy')
+    headless_setting = config.get('headless', 'virtual')
+    # 使用CookieManager加载cookie
+    cookie_manager = CookieManager(logger)
+    all_cookies = []
+    try:
+        # 直接使用CookieSource对象加载cookie
+        cookies = cookie_manager.load_cookies(cookie_source)
+        all_cookies.extend(cookies)
+    except Exception as e:
+        logger.error(f"从cookie来源加载时出错: {e}")
+        return
+    # 3. 检查是否有任何cookie可用
+    if not all_cookies:
+        logger.error("错误: 没有可用的cookie（既没有有效的JSON文件，也没有环境变量）")
+        return
+    cookies = all_cookies
+    headless_mode = parse_headless_mode(headless_setting)
+    launch_options = {"headless": headless_mode}
+    if proxy:
+        logger.info(f"使用代理: {proxy} 访问")
+        launch_options["proxy"] = {"server": proxy, "bypass": "localhost, 127.0.0.1"}
+    # 无需禁用图片加载, 因为图片很少, 禁用还可能导致风控增加
+    # launch_options["block_images"] = True
+    screenshot_dir = logs_dir()
+    ensure_dir(screenshot_dir)
+    try:
+        with Camoufox(**launch_options) as browser:
+            context = browser.new_context()
+            context.add_cookies(cookies)
+            page = context.new_page()
+            # ####################################################################
+            # ############ 增强的 page.goto() 错误处理和日志记录 ###############
+            # ####################################################################
+            response = None
+            try:
+                logger.info(f"正在导航到: {expected_url} (超时设置为 120 秒)")
+                # page.goto() 会返回一个 response 对象，我们可以用它来获取状态码等信息
+                response = page.goto(expected_url, wait_until='domcontentloaded', timeout=120000)
+                # 检查HTTP响应状态码
+                if response:
+                    logger.info(f"导航初步成功，服务器响应状态码: {response.status} {response.status_text}")
+                    if not response.ok: # response.ok 检查状态码是否在 200-299 范围内
+                        logger.warning(f"警告：页面加载成功，但HTTP状态码表示错误: {response.status}")
+                        # 即使状态码错误，也保存快照以供分析
+                        page.screenshot(path=os.path.join(screenshot_dir, f"WARN_http_status_{response.status}_{diagnostic_tag}.png"))
+                else:
+                    # 对于非http/https的导航（如 about:blank），response可能为None
+                    logger.warning("page.goto 未返回响应对象，可能是一个非HTTP导航。")
+            except TimeoutError:
+                # 这是最常见的错误：超时
+                logger.error(f"导航到 {expected_url} 超时 (超过120秒)。")
+                logger.error("可能原因：网络连接缓慢、目标网站服务器无响应、代理问题、或页面资源被阻塞。")
+                # 尝试保存诊断信息
+                try:
+                    # 截图对于看到页面卡在什么状态非常有帮助（例如，空白页、加载中、Chrome错误页）
+                    screenshot_path = os.path.join(screenshot_dir, f"FAIL_timeout_{diagnostic_tag}.png")
+                    page.screenshot(path=screenshot_path, full_page=True)
+                    logger.info(f"已截取超时时的屏幕快照: {screenshot_path}")
+                    # 保存HTML可以帮助分析DOM结构，即使在无头模式下也很有用
+                    html_path = os.path.join(screenshot_dir, f"FAIL_timeout_{diagnostic_tag}.html")
+                    with open(html_path, 'w', encoding='utf-8') as f:
+                        f.write(page.content())
+                    logger.info(f"已保存超时时的页面HTML: {html_path}")
+                except Exception as diag_e:
+                    logger.error(f"在尝试进行超时诊断（截图/保存HTML）时发生额外错误: {diag_e}")
+                return # 超时后，后续操作无意义，直接终止
+            except PlaywrightError as e:
+                # 捕获其他Playwright相关的网络错误，例如DNS解析失败、连接被拒绝等
+                error_message = str(e)
+                logger.error(f"导航到 {expected_url} 时发生 Playwright 网络错误。")
+                logger.error(f"错误详情: {error_message}")
+                # Playwright的错误信息通常很具体，例如 "net::ERR_CONNECTION_REFUSED"
+                if "net::ERR_NAME_NOT_RESOLVED" in error_message:
+                    logger.error("排查建议：检查DNS设置或域名是否正确。")
+                elif "net::ERR_CONNECTION_REFUSED" in error_message:
+                    logger.error("排查建议：目标服务器可能已关闭，或代理/防火墙阻止了连接。")
+                elif "net::ERR_INTERNET_DISCONNECTED" in error_message:
+                    logger.error("排查建议：检查本机的网络连接。")
+                # 同样，尝试截图，尽管此时页面可能完全无法访问
+                try:
+                    screenshot_path = os.path.join(screenshot_dir, f"FAIL_network_error_{diagnostic_tag}.png")
+                    page.screenshot(path=screenshot_path)
+                    logger.info(f"已截取网络错误时的屏幕快照: {screenshot_path}")
+                except Exception as diag_e:
+                    logger.error(f"在尝试进行网络错误诊断（截图）时发生额外错误: {diag_e}")
+                return # 网络错误，终止
+            # --- 如果导航没有抛出异常，继续执行后续逻辑 ---
+            logger.info("页面初步加载完成，正在检查并处理初始弹窗...")
+            page.wait_for_timeout(2000)
+            final_url = page.url
+            logger.info(f"导航完成。最终URL为: {final_url}")
+            # ... 你原有的URL检查逻辑保持不变 ...
+            if "accounts.google.com/v3/signin/identifier" in final_url:
+                logger.error("检测到Google登录页面（需要输入邮箱）。Cookie已完全失效。")
+                page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_identifier_page_{diagnostic_tag}.png"))
+                return
+            elif expected_url.split('?')[0] in final_url:
+                logger.info("URL正确。现在等待页面完成初始加载...")
+                # --- NEW ROBUST STRATEGY: Wait for the loading spinner to disappear ---
+                # This is the key to solving the race condition. The error message or
+                # content will only appear AFTER the initial loading is done.
+                spinner_locator = page.locator('mat-spinner')
+                try:
+                    logger.info("正在等待加载指示器 (spinner) 消失... (最长等待30秒)")
+                    # We wait for the spinner to be 'hidden' or not present in the DOM.
+                    spinner_locator.wait_for(state='hidden', timeout=30000)
+                    logger.info("加载指示器已消失。页面已完成异步加载。")
+                except TimeoutError:
+                    logger.error("页面加载指示器在30秒内未消失。页面可能已卡住。")
+                    page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_spinner_stuck_{diagnostic_tag}.png"))
+                    return # Exit if the page is stuck loading
+                # --- NOW, we can safely check for the error message ---
+                # We use the most specific text possible to avoid false positives.
+                auth_error_text = "authentication error"
+                auth_error_locator = page.get_by_text(auth_error_text, exact=False)
+                # We only need a very short timeout here because the page should be stable.
+                if auth_error_locator.is_visible(timeout=2000):
+                    logger.error(f"检测到认证失败的错误横幅: '{auth_error_text}'. Cookie已过期或无效。")
+                    screenshot_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.png")
+                    page.screenshot(path=screenshot_path)
+                    # html_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.html")
+                    # with open(html_path, 'w', encoding='utf-8') as f:
+                    #     f.write(page.content())
+                    # logger.info(f"已保存包含错误信息的页面HTML: {html_path}")
+                    return # Definitive failure, so we exit.
+                # --- If no error, proceed to final confirmation (as a fallback) ---
+                logger.info("未检测到认证错误横幅。进行最终确认。")
+                login_button_cn = page.get_by_role('button', name='登录')
+                login_button_en = page.get_by_role('button', name='Login')
+                if login_button_cn.is_visible(timeout=1000) or login_button_en.is_visible(timeout=1000):
+                    logger.error("页面上仍显示'登录'按钮。Cookie无效。")
+                    page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_login_button_visible_{diagnostic_tag}.png"))
+                    return
+                # --- If all checks pass, we assume success ---
+                logger.info("所有验证通过，确认已成功登录。")
+                handle_successful_navigation(page, logger, diagnostic_tag)
+            elif "accounts.google.com/v3/signin/accountchooser" in final_url:
+                logger.warning("检测到Google账户选择页面。登录失败或Cookie已过期。")
+                page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
+                return
+            else:
+                logger.error(f"导航到了一个意外的URL: {final_url}")
+                page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_unexpected_url_{diagnostic_tag}.png"))
+                return
+    except KeyboardInterrupt:
+        logger.info(f"用户中断，正在关闭...")
+    except Exception as e:
+        # 这是一个最终的捕获，用于捕获所有未预料到的错误
+        logger.exception(f"运行 Camoufox 实例时发生未预料的严重错误: {e}")

browser/navigation.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import time
+import os
+from playwright.sync_api import Page, expect
+from utils.paths import logs_dir
+from utils.common import ensure_dir
+def handle_untrusted_dialog(page: Page, logger=None):
+    """
+    检查并处理 "Last modified by..." 的弹窗。
+    如果弹窗出现，则点击 "OK" 按钮。
+    """
+    ok_button_locator = page.get_by_role("button", name="OK")
+    try:
+        if ok_button_locator.is_visible(timeout=10000): # 等待最多10秒
+            logger.info(f"检测到弹窗，正在点击 'OK' 按钮...")
+            ok_button_locator.click(force=True)
+            logger.info(f"'OK' 按钮已点击。")
+            expect(ok_button_locator).to_be_hidden(timeout=1000)
+            logger.info(f"弹窗已确认关闭。")
+        else:
+            logger.info(f"在10秒内未检测到弹窗，继续执行...")
+    except Exception as e:
+        logger.info(f"检查弹窗时发生意外：{e}，将继续执行...")
+def handle_successful_navigation(page: Page, logger, cookie_file_config):
+    """
+    在成功导航到目标页面后，执行后续操作（处理弹窗、保持运行）。
+    """
+    logger.info("已成功到达目标页面。")
+    page.click('body') # 给予页面焦点
+    # 检查并处理 "Last modified by..." 的弹窗
+    handle_untrusted_dialog(page, logger=logger)
+    # 等待页面加载和渲染
+    logger.info("等待15秒以便页面完全渲染...")
+    time.sleep(15)
+    logger.info("实例将保持运行状态。每10秒点击一次页面以保持活动。")
+    while True:
+        try:
+            page.click('body')
+            time.sleep(10)
+        except Exception as e:
+            logger.error(f"在保持活动循环中出错: {e}")
+            # 在保持活动循环中出错时截屏
+            try:
+                screenshot_dir = logs_dir()
+                ensure_dir(screenshot_dir)
+                screenshot_filename = os.path.join(screenshot_dir, f"FAIL_keep_alive_error_{cookie_file_config}.png")
+                page.screenshot(path=screenshot_filename, full_page=True)
+                logger.info(f"已在保持活动循环出错时截屏: {screenshot_filename}")
+            except Exception as screenshot_e:
+                logger.error(f"在保持活动循环出错时截屏失败: {screenshot_e}")
+            break # 如果页面关闭或出错，则退出循环

main.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import os
+import threading
+import multiprocessing
+import signal
+import sys
+import time
+from browser.instance import run_browser_instance
+from utils.logger import setup_logging
+from utils.paths import cookies_dir, logs_dir
+from utils.cookie_manager import CookieManager
+from utils.common import clean_env_value, ensure_dir
+# 全局变量
+browser_processes = []
+app_running = False
+flask_app = None
+def load_instance_configurations(logger):
+    """
+    使用CookieManager解析环境变量和cookies目录，为每个cookie来源创建独立的浏览器实例配置。
+    """
+    # 1. 读取所有实例共享的URL
+    shared_url = clean_env_value(os.getenv("CAMOUFOX_INSTANCE_URL"))
+    if not shared_url:
+        logger.error("错误: 缺少环境变量 CAMOUFOX_INSTANCE_URL。所有实例需要一个共享的目标URL。")
+        return None, None
+    # 2. 读取全局设置
+    global_settings = {
+        "headless": clean_env_value(os.getenv("CAMOUFOX_HEADLESS")) or "virtual",
+        "url": shared_url  # 所有实例都使用这个URL
+    }
+    proxy_value = clean_env_value(os.getenv("CAMOUFOX_PROXY"))
+    if proxy_value:
+        global_settings["proxy"] = proxy_value
+    # 3. 使用CookieManager检测所有cookie来源
+    cookie_manager = CookieManager(logger)
+    sources = cookie_manager.detect_all_sources()
+    # 检查是否有任何cookie来源
+    if not sources:
+        logger.error("错误: 未找到任何cookie来源（既没有JSON文件，也没有环境变量cookie）。")
+        return None, None
+    # 4. 为每个cookie来源创建实例配置
+    instances = []
+    for source in sources:
+        if source.type == "file":
+            instances.append({
+                "cookie_file": source.identifier,
+                "cookie_source": source
+            })
+        elif source.type == "env_var":
+            # 从环境变量名中提取索引，如 "USER_COOKIE_1" -> 1
+            env_index = source.identifier.split("_")[-1]
+            instances.append({
+                "cookie_file": None,
+                "env_cookie_index": int(env_index),
+                "cookie_source": source
+            })
+    logger.info(f"将启动 {len(instances)} 个浏览器实例")
+    return global_settings, instances
+def start_browser_instances():
+    """启动浏览器实例的核心逻辑"""
+    global browser_processes, app_running
+    log_dir = logs_dir()
+    logger = setup_logging(str(log_dir / 'app.log'))
+    logger.info("---------------------Camoufox 实例管理器开始启动---------------------")
+    global_settings, instance_profiles = load_instance_configurations(logger)
+    if not instance_profiles:
+        logger.error("错误: 环境变量中未找到任何实例配置。")
+        return
+    for i, profile in enumerate(instance_profiles, 1):
+        if not app_running:
+            break
+        final_config = global_settings.copy()
+        final_config.update(profile)
+        if 'url' not in final_config:
+            logger.warning(f"警告: 跳过一个无效的配置项 (缺少 url): {profile}")
+            continue
+        cookie_source = final_config.get('cookie_source')
+        if cookie_source:
+            if cookie_source.type == "file":
+                logger.info(
+                    f"正在启动第 {i}/{len(instance_profiles)} 个浏览器实例 (file: {cookie_source.display_name})..."
+                )
+            elif cookie_source.type == "env_var":
+                logger.info(
+                    f"正在启动第 {i}/{len(instance_profiles)} 个浏览器实例 (env: {cookie_source.display_name})..."
+                )
+        else:
+            logger.error(f"错误: 配置中缺少cookie_source对象")
+            continue
+        process = multiprocessing.Process(target=run_browser_instance, args=(final_config,))
+        browser_processes.append(process)
+        process.start()
+        # 如果不是最后一个实例，等待30秒再启动下一个实例，避免并发启动导致的高CPU占用
+        if i < len(instance_profiles):
+            logger.info(f"等待 30 秒后启动下一个实例...")
+            time.sleep(30)
+    # 等待所有进程
+    try:
+        while app_running and browser_processes:
+            for process in browser_processes[:]:
+                if not process.is_alive():
+                    browser_processes.remove(process)
+                else:
+                    process.join(timeout=1)
+            time.sleep(1)
+    except KeyboardInterrupt:
+        logger.info("捕获到终止信号，正在关闭所有浏览器进程...")
+        for process in browser_processes:
+            process.terminate()
+            process.join()
+def run_standalone_mode():
+    """独立模式"""
+    global app_running
+    app_running = True
+    start_browser_instances()
+def run_server_mode():
+    """服务器模式"""
+    global app_running, flask_app
+    log_dir = logs_dir()
+    server_logger = setup_logging(str(log_dir / 'app.log'), prefix="server")
+    # 动态导入 Flask（只在需要时）
+    try:
+        from flask import Flask, jsonify
+        flask_app = Flask(__name__)
+    except ImportError:
+        server_logger.error("错误: 服务器模式需要 Flask，请安装: pip install flask")
+        return
+    app_running = True
+    # 在后台线程中启动浏览器实例
+    browser_thread = threading.Thread(target=start_browser_instances, daemon=True)
+    browser_thread.start()
+    # 定义路由
+    @flask_app.route('/health')
+    def health_check():
+        """健康检查端点"""
+        running_count = sum(1 for p in browser_processes if p.is_alive())
+        return jsonify({
+            'status': 'healthy',
+            'browser_instances': len(browser_processes),
+            'running_instances': running_count,
+            'message': f'Application is running with {running_count} active browser instances'
+        })
+    @flask_app.route('/')
+    def index():
+        """主页端点"""
+        running_count = sum(1 for p in browser_processes if p.is_alive())
+        return jsonify({
+            'status': 'running',
+            'browser_instances': len(browser_processes),
+            'running_instances': running_count,
+            'run_mode': 'server',
+            'message': 'Camoufox Browser Automation is running in server mode'
+        })
+    # 禁用 Flask 的默认日志
+    import logging
+    log = logging.getLogger('werkzeug')
+    log.setLevel(logging.ERROR)
+    # 启动 Flask 服务器
+    try:
+        flask_app.run(host='0.0.0.0', port=7860, debug=False)
+    except KeyboardInterrupt:
+        server_logger.info("服务器正在关闭...")
+def signal_handler(signum, frame):
+    """统一的信号处理器"""
+    global app_running
+    logger = setup_logging(str(logs_dir() / 'app.log'), prefix="signal")
+    logger.info(f"接收到信号 {signum}，正在关闭应用...")
+    app_running = False
+    # 关闭所有浏览器进程
+    for process in browser_processes:
+        if process.is_alive():
+            process.terminate()
+            try:
+                process.join(timeout=5)
+            except:
+                process.kill()
+    logger.info("所有进程已关闭")
+    sys.exit(0)
+def main():
+    """主入口函数"""
+    # 初始化必要的目录
+    ensure_dir(logs_dir())
+    ensure_dir(cookies_dir())
+    # 注册信号处理器
+    signal.signal(signal.SIGTERM, signal_handler)
+    signal.signal(signal.SIGINT, signal_handler)
+    # 检查运行模式环境变量
+    hg_mode = os.getenv('HG', '').lower()
+    if hg_mode == 'true':
+        run_server_mode()
+    else:
+        run_standalone_mode()
+if __name__ == "__main__":
+    multiprocessing.freeze_support()
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,33 @@

+camoufox[geoip]==0.4.11
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.13
+aiosignal==1.3.2
+attrs==25.3.0
+browserforge==1.2.3
+certifi==2025.6.15
+charset-normalizer==3.4.2
+click==8.2.1
+frozenlist==1.7.0
+flask==3.0.0
+geoip2==5.1.0
+greenlet==3.2.3
+idna==3.10
+language-tags==1.2.0
+lxml==5.4.0
+maxminddb==2.7.0
+multidict==6.5.0
+numpy==2.3.0
+orjson==3.10.18
+platformdirs==4.3.8
+playwright==1.52.0
+propcache==0.3.2
+pyee==13.0.0
+PySocks==1.7.1
+requests==2.32.4
+screeninfo==0.8.1
+tqdm==4.67.1
+typing_extensions==4.14.0
+ua-parser==1.0.1
+ua-parser-builtins==0.18.0.post1
+urllib3==2.4.0
+yarl==1.20.1

utils/common.py ADDED Viewed

	@@ -0,0 +1,52 @@

+"""
+通用工具函数
+提供项目中常用的基础功能
+"""
+import os
+from pathlib import Path
+def clean_env_value(value):
+    """
+    清理环境变量值，去除首尾空白字符
+    Args:
+        value: 环境变量的原始值
+    Returns:
+        str or None: 清理后的值，如果为空或None则返回None
+    """
+    if value is None:
+        return None
+    stripped = value.strip()
+    return stripped or None
+def parse_headless_mode(headless_setting):
+    """
+    解析headless模式配置
+    Args:
+        headless_setting: headless配置值
+    Returns:
+        bool or str: True表示headless，False表示有界面，'virtual'表示虚拟模式
+    """
+    if str(headless_setting).lower() == 'true':
+        return True
+    elif str(headless_setting).lower() == 'false':
+        return False
+    else:
+        return 'virtual'
+def ensure_dir(path):
+    """
+    确保目录存在，如果不存在则创建
+    Args:
+        path: 目录路径（可以是字符串或Path对象）
+    """
+    if isinstance(path, str):
+        path = Path(path)
+    os.makedirs(path, exist_ok=True)

utils/cookie_handler.py ADDED Viewed

	@@ -0,0 +1,97 @@

+def convert_cookie_editor_to_playwright(cookies_from_editor, logger=None):
+    """
+    将从 Cookie-Editor 插件导出的 Cookie 列表转换为 Playwright 兼容的格式。
+    """
+    playwright_cookies = []
+    allowed_keys = {'name', 'value', 'domain', 'path', 'expires', 'httpOnly', 'secure', 'sameSite'}
+    for cookie in cookies_from_editor:
+        pw_cookie = {}
+        for key in ['name', 'value', 'domain', 'path', 'httpOnly', 'secure']:
+            if key in cookie:
+                pw_cookie[key] = cookie[key]
+        if cookie.get('session', False):
+            pw_cookie['expires'] = -1
+        elif 'expirationDate' in cookie:
+            if cookie['expirationDate'] is not None:
+                pw_cookie['expires'] = int(cookie['expirationDate'])
+            else:
+                pw_cookie['expires'] = -1
+        if 'sameSite' in cookie:
+            same_site_value = str(cookie['sameSite']).lower()
+            if same_site_value == 'no_restriction':
+                pw_cookie['sameSite'] = 'None'
+            elif same_site_value in ['lax', 'strict']:
+                pw_cookie['sameSite'] = same_site_value.capitalize()
+            elif same_site_value == 'unspecified':
+                pw_cookie['sameSite'] = 'Lax'
+        if all(key in pw_cookie for key in ['name', 'value', 'domain', 'path']):
+            playwright_cookies.append(pw_cookie)
+        else:
+            if logger:
+                logger.warning(f"跳过一个格式不完整的 Cookie: {cookie}")
+    return playwright_cookies
+def convert_kv_to_playwright(kv_string, default_domain=".google.com", logger=None):
+    """
+    将键值对格式的 Cookie 字符串转换为 Playwright 兼容的格式。
+    Args:
+        kv_string (str): 包含 Cookie 的键值对字符串，格式为 "name1=value1; name2=value2; ..."
+        default_domain (str): 默认域名，默认为".google.com"
+        logger: 日志记录器
+    Returns:
+        list: Playwright 兼容的 Cookie 列表
+    """
+    import re
+    playwright_cookies = []
+    # 按分号分割 Cookie
+    cookie_pairs = kv_string.split(';')
+    for pair in cookie_pairs:
+        pair = pair.strip()  # 去除首尾空白字符
+        if not pair:  # 跳过空字符串
+            continue
+        # 跳过无效的 Cookie（不包含等号）
+        if '=' not in pair:
+            if logger:
+                logger.warning(f"跳过无效的 Cookie 格式: '{pair}'")
+            continue
+        # 分割name和value
+        name, value = pair.split('=', 1)  # 只分割第一个等号
+        name = name.strip()
+        value = value.strip()
+        if not name:  # 跳过空名称
+            if logger:
+                logger.warning(f"跳过空名称的 Cookie: '{pair}'")
+            continue
+        # 构造 Playwright 格式的 Cookie
+        pw_cookie = {
+            'name': name,
+            'value': value,
+            'domain': default_domain,
+            'path': '/',
+            'expires': -1,  # 默认为会话 Cookie
+            'httpOnly': False,  # KV 格式无法确定 httpOnly 状态，默认为 False
+            'secure': True,     # 假设为安全 Cookie
+            'sameSite': 'Lax'   # 默认 SameSite 策略
+        }
+        playwright_cookies.append(pw_cookie)
+        if logger:
+            logger.debug(f"成功转换 Cookie: {name} -> domain={default_domain}")
+    return playwright_cookies

utils/cookie_manager.py ADDED Viewed

	@@ -0,0 +1,201 @@

+"""
+统一的Cookie管理器
+整合JSON文件和环境变量cookie的检测、加载和管理功能
+"""
+import os
+import json
+from dataclasses import dataclass
+from typing import List, Dict, Optional
+from utils.paths import cookies_dir
+from utils.cookie_handler import convert_cookie_editor_to_playwright, convert_kv_to_playwright
+from utils.common import clean_env_value
+@dataclass
+class CookieSource:
+    """Cookie来源的统一表示"""
+    type: str  # "file" | "env_var"
+    identifier: str  # filename or "USER_COOKIE_1"
+    display_name: str  # 显示名称
+    exists: bool = True
+    def __str__(self):
+        return f"{self.type}:{self.identifier}"
+class CookieManager:
+    """
+    统一的Cookie管理器
+    负责检测、加载和缓存所有来源的cookie数据
+    """
+    def __init__(self, logger=None):
+        self.logger = logger
+        self._detected_sources: Optional[List[CookieSource]] = None
+        self._cookie_cache: Dict[str, List[Dict]] = {}
+    def detect_all_sources(self) -> List[CookieSource]:
+        """
+        检测所有可用的cookie来源（JSON文件 + 环境变量）
+        结果会被缓存，避免重复扫描
+        """
+        if self._detected_sources is not None:
+            return self._detected_sources
+        sources = []
+        # 1. 扫描cookies目录中的JSON文件
+        try:
+            cookie_path = cookies_dir()
+            if os.path.isdir(cookie_path):
+                cookie_files = [f for f in os.listdir(cookie_path) if f.lower().endswith('.json')]
+                for cookie_file in cookie_files:
+                    source = CookieSource(
+                        type="file",
+                        identifier=cookie_file,
+                        display_name=cookie_file
+                    )
+                    sources.append(source)
+                if cookie_files and self.logger:
+                    self.logger.info(f"发现 {len(cookie_files)} 个 Cookie 文件")
+                elif self.logger:
+                    self.logger.info(f"在 {cookie_path} 目录下未找到任何 .json 格式的 Cookie 文件")
+            else:
+                if self.logger:
+                    self.logger.error(f"Cookie 目录不存在: {cookie_path}")
+        except Exception as e:
+            if self.logger:
+                self.logger.error(f"扫描 Cookie 目录时出错: {e}")
+        # 2. 扫描USER_COOKIE环境变量
+        cookie_index = 1
+        env_cookie_count = 0
+        while True:
+            env_var_name = f"USER_COOKIE_{cookie_index}"
+            env_value = clean_env_value(os.getenv(env_var_name))
+            if not env_value:
+                if cookie_index == 1 and self.logger:
+                    self.logger.info(f"未检测到任何 USER_COOKIE 环境变量")
+                break
+            source = CookieSource(
+                type="env_var",
+                identifier=env_var_name,
+                display_name=env_var_name
+            )
+            sources.append(source)
+            env_cookie_count += 1
+            cookie_index += 1
+        if env_cookie_count > 0 and self.logger:
+            self.logger.info(f"发现 {env_cookie_count} 个 Cookie 环境变量")
+        # 缓存结果
+        self._detected_sources = sources
+        return sources
+    def load_cookies(self, source: CookieSource) -> List[Dict]:
+        """
+        从指定来源加载cookie数据
+        Args:
+            source: Cookie来源对象
+        Returns:
+            Playwright兼容的cookie列表
+        """
+        cache_key = str(source)
+        # 检查缓存
+        if cache_key in self._cookie_cache:
+            if self.logger:
+                self.logger.debug(f"从缓存加载 Cookie: {source.display_name}")
+            return self._cookie_cache[cache_key]
+        cookies = []
+        try:
+            if source.type == "file":
+                cookies = self._load_from_file(source.identifier)
+            elif source.type == "env_var":
+                cookies = self._load_from_env(source.identifier)
+            else:
+                if self.logger:
+                    self.logger.error(f"未知的 Cookie 来源类型: {source.type}")
+                return []
+            # 缓存结果
+            self._cookie_cache[cache_key] = cookies
+            if self.logger:
+                self.logger.info(f"从 {source.display_name} 加载了 {len(cookies)} 个 Cookie 数据")
+        except Exception as e:
+            if self.logger:
+                self.logger.error(f"从 {source.display_name} 加载 Cookie 时出错: {e}")
+            return []
+        return cookies
+    def _load_from_file(self, filename: str) -> List[Dict]:
+        """从JSON文件加载 Cookie"""
+        cookie_path = cookies_dir() / filename
+        if not os.path.exists(cookie_path):
+            raise FileNotFoundError(f"Cookie 文件不存在: {cookie_path}")
+        with open(cookie_path, 'r', encoding='utf-8') as f:
+            cookies_from_file = json.load(f)
+        return convert_cookie_editor_to_playwright(cookies_from_file, logger=self.logger)
+    def _load_from_env(self, env_var_name: str) -> List[Dict]:
+        """从环境变量加载 Cookie"""
+        env_value = clean_env_value(os.getenv(env_var_name))
+        if not env_value:
+            raise ValueError(f"环境变量 {env_var_name} 不存在或为空")
+        return convert_kv_to_playwright(
+            env_value,
+            default_domain=".google.com",
+            logger=self.logger
+        )
+    def get_all_sources(self) -> List[CookieSource]:
+        """获取所有检测到的 Cookie 来源"""
+        return self.detect_all_sources()
+    def clear_cache(self):
+        """清空 Cookie 缓存"""
+        self._cookie_cache.clear()
+        if self.logger:
+            self.logger.debug("Cookie 缓存已清空")
+    def get_source_summary(self) -> Dict[str, int]:
+        """
+        获取 Cookie 来源统计信息
+        Returns:
+            包含各类型来源数量的字典
+        """
+        sources = self.detect_all_sources()
+        summary = {
+            "total": len(sources),
+            "files": 0,
+            "env_vars": 0
+        }
+        for source in sources:
+            if source.type == "file":
+                summary["files"] += 1
+            elif source.type == "env_var":
+                summary["env_vars"] += 1
+        return summary

utils/logger.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import logging
+def setup_logging(log_file, prefix=None, level=logging.INFO):
+    """
+    配置日志记录器，使其输出到文件和控制台。
+    支持一个可选的前缀，用于标识日志来源。
+    每次调用都会重新配置处理器，以适应多进程环境。
+    :param log_file: 日志文件的路径。
+    :param prefix: (可选) 要添加到每条日志消息开头的字符串前缀。
+    :param level: 日志级别。
+    """
+    logger = logging.getLogger('my_app_logger')
+    logger.setLevel(level)
+    if logger.hasHandlers():
+        logger.handlers.clear()
+    base_format = '%(asctime)s - %(process)d - %(levelname)s - %(message)s'
+    if prefix:
+        log_format = f'%(asctime)s - %(process)d - %(levelname)s - {prefix} - %(message)s'
+    else:
+        log_format = base_format
+    fh = logging.FileHandler(log_file)
+    fh.setLevel(level)
+    ch = logging.StreamHandler()
+    ch.setLevel(level)
+    formatter = logging.Formatter(log_format)
+    fh.setFormatter(formatter)
+    ch.setFormatter(formatter)
+    logger.addHandler(fh)
+    logger.addHandler(ch)
+    logger.propagate = False
+    return logger

utils/paths.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import os
+from functools import lru_cache
+from pathlib import Path
+@lru_cache(maxsize=1)
+def project_root() -> Path:
+    """
+    Return the repository root so callers can build absolute paths that do not
+    depend on the current working directory.
+    """
+    env_root = os.getenv("CAMOUFOX_PROJECT_ROOT")
+    if env_root:
+        return Path(env_root).expanduser().resolve()
+    current = Path(__file__).resolve()
+    for parent in current.parents:
+        if (parent / "cookies").exists():
+            return parent
+    # Fallback to the original behaviour if the marker directory is missing.
+    return current.parents[min(2, len(current.parents) - 1)]
+def logs_dir() -> Path:
+    """Root-level directory that stores log files and screenshots."""
+    return project_root() / "logs"
+def cookies_dir() -> Path:
+    """Root-level directory that stores persistent cookie JSON files."""
+    return project_root() / "cookies"