Spaces:

hkfires
/

AIStudioBuildWS

Running

App Files Files Community

hkfires commited on 6 days ago

Commit

e15281a

verified ·

1 Parent(s): a61399c

feat(browser): add cookie validator and lifecycle handling

Browse files

Files changed (7) hide show

browser/cookie_validator.py +85 -0
browser/instance.py +44 -25
browser/navigation.py +30 -4
main.py +260 -42
utils/cookie_handler.py +1 -4
utils/cookie_manager.py +6 -39
utils/paths.py +4 -5

browser/cookie_validator.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import time
+import sys
+from playwright.sync_api import TimeoutError, Error as PlaywrightError
+class CookieValidator:
+    """Cookie验证器，负责定期验证Cookie的有效性。"""
+    def __init__(self, page, context, logger):
+        """
+        初始化Cookie验证器
+        Args:
+            page: 主页面实例
+            context: 浏览器上下文
+            logger: 日志记录器
+        """
+        self.page = page
+        self.context = context
+        self.logger = logger
+    def validate_cookies_in_main_thread(self):
+        """
+        在主线程中执行Cookie验证（由主线程调用）
+        Returns:
+            bool: Cookie是否有效
+        """
+        validation_page = None
+        try:
+            # 创建新标签页（在主线程中执行）
+            self.logger.info("开始Cookie验证...")
+            validation_page = self.context.new_page()
+            # 访问验证URL
+            validation_url = "https://aistudio.google.com/apps"
+            validation_page.goto(validation_url, wait_until='domcontentloaded', timeout=30000)
+            # 等待页面加载
+            validation_page.wait_for_timeout(2000)
+            # 获取最终URL
+            final_url = validation_page.url
+            # 检查是否被重定向到登录页面
+            if "accounts.google.com/v3/signin/identifier" in final_url:
+                self.logger.error("Cookie验证失败: 被重定向到登录页面")
+                return False
+            if "accounts.google.com/v3/signin/accountchooser" in final_url:
+                self.logger.error("Cookie验证失败: 被重定向到账户选择页面")
+                return False
+            # 如果没有跳转到登录页面，就算成功
+            self.logger.info("Cookie验证成功")
+            return True
+        except TimeoutError:
+            self.logger.error("Cookie验证失败: 页面加载超时")
+            return False
+        except PlaywrightError as e:
+            self.logger.error(f"Cookie验证失败: {e}")
+            return False
+        except Exception as e:
+            self.logger.error(f"Cookie验证失败: {e}")
+            return False
+        finally:
+            # 关闭验证标签页
+            if validation_page:
+                try:
+                    validation_page.close()
+                except Exception:
+                    pass  # 忽略关闭错误
+    def shutdown_instance_on_cookie_failure(self):
+        """
+        因Cookie失效而关闭实例
+        """
+        self.logger.error("Cookie失效，关闭实例")
+        time.sleep(1)
+        sys.exit(1)

browser/instance.py CHANGED Viewed

@@ -1,19 +1,26 @@
 import os
 from playwright.sync_api import TimeoutError, Error as PlaywrightError
 from utils.logger import setup_logging
 from utils.cookie_manager import CookieManager
 from browser.navigation import handle_successful_navigation
 from camoufox.sync_api import Camoufox
 from utils.paths import logs_dir
 from utils.common import parse_headless_mode, ensure_dir
 from utils.url_helper import extract_url_path
-def run_browser_instance(config):
     """
     根据最终合并的配置，启动并管理一个单独的 Camoufox 浏览器实例。
-    使用CookieManager统一管理cookie加载，避免重复的扫描逻辑。
     """
     cookie_source = config.get('cookie_source')
     if not cookie_source:
         # 使用默认logger进行错误报告
@@ -31,33 +38,33 @@ def run_browser_instance(config):
     proxy = config.get('proxy')
     headless_setting = config.get('headless', 'virtual')
-    # 使用CookieManager加载cookie
     cookie_manager = CookieManager(logger)
     all_cookies = []
     try:
-        # 直接使用CookieSource对象加载cookie
         cookies = cookie_manager.load_cookies(cookie_source)
         all_cookies.extend(cookies)
     except Exception as e:
-        logger.error(f"从cookie来源加载时出错: {e}")
         return
-    # 3. 检查是否有任何cookie可用
     if not all_cookies:
-        logger.error("错误: 没有可用的cookie（既没有有效的JSON文件，也没有环境变量）")
         return
     cookies = all_cookies
     headless_mode = parse_headless_mode(headless_setting)
     launch_options = {"headless": headless_mode}
     if proxy:
         logger.info(f"使用代理: {proxy} 访问")
         launch_options["proxy"] = {"server": proxy, "bypass": "localhost, 127.0.0.1"}
-    # 无需禁用图片加载, 因为图片很少, 禁用还可能导致风控增加
-    # launch_options["block_images"] = True
     screenshot_dir = logs_dir()
     ensure_dir(screenshot_dir)
@@ -67,16 +74,19 @@ def run_browser_instance(config):
             context = browser.new_context()
             context.add_cookies(cookies)
             page = context.new_page()
             # ####################################################################
             # ############ 增强的 page.goto() 错误处理和日志记录 ###############
             # ####################################################################
             response = None
             try:
-                logger.info(f"正在导航到: {expected_url} (超时设置为 120 秒)")
                 # page.goto() 会返回一个 response 对象，我们可以用它来获取状态码等信息
-                response = page.goto(expected_url, wait_until='domcontentloaded', timeout=120000)
                 # 检查HTTP响应状态码
                 if response:
@@ -151,28 +161,27 @@ def run_browser_instance(config):
             final_path = extract_url_path(final_url)
             if expected_path and expected_path in final_path:
-                logger.info(f"URL验证通过。预期路径: {expected_path}, 最终URL: {final_url}")
-                # --- NEW ROBUST STRATEGY: Wait for the loading spinner to disappear ---
-                # This is the key to solving the race condition. The error message or
-                # content will only appear AFTER the initial loading is done.
                 spinner_locator = page.locator('mat-spinner')
                 try:
                     logger.info("正在等待加载指示器 (spinner) 消失... (最长等待30秒)")
-                    # We wait for the spinner to be 'hidden' or not present in the DOM.
                     spinner_locator.wait_for(state='hidden', timeout=30000)
                     logger.info("加载指示器已消失。页面已完成异步加载。")
                 except TimeoutError:
                     logger.error("页面加载指示器在30秒内未消失。页面可能已卡住。")
                     page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_spinner_stuck_{diagnostic_tag}.png"))
-                    return # Exit if the page is stuck loading
-                # --- NOW, we can safely check for the error message ---
-                # We use the most specific text possible to avoid false positives.
                 auth_error_text = "authentication error"
                 auth_error_locator = page.get_by_text(auth_error_text, exact=False)
-                # We only need a very short timeout here because the page should be stable.
                 if auth_error_locator.is_visible(timeout=2000):
                     logger.error(f"检测到认证失败的错误横幅: '{auth_error_text}'. Cookie已过期或无效。")
                     screenshot_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.png")
@@ -182,9 +191,9 @@ def run_browser_instance(config):
                     # with open(html_path, 'w', encoding='utf-8') as f:
                     #     f.write(page.content())
                     # logger.info(f"已保存包含错误信息的页面HTML: {html_path}")
-                    return # Definitive failure, so we exit.
-                # --- If no error, proceed to final confirmation (as a fallback) ---
                 logger.info("未检测到认证错误横幅。进行最终确认。")
                 login_button_cn = page.get_by_role('button', name='登录')
                 login_button_en = page.get_by_role('button', name='Login')
@@ -194,9 +203,13 @@ def run_browser_instance(config):
                     page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_login_button_visible_{diagnostic_tag}.png"))
                     return
-                # --- If all checks pass, we assume success ---
                 logger.info("所有验证通过，确认已成功登录。")
-                handle_successful_navigation(page, logger, diagnostic_tag)
             elif "accounts.google.com/v3/signin/accountchooser" in final_url:
                 logger.warning("检测到Google账户选择页面。登录失败或Cookie已过期。")
                 page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
@@ -211,6 +224,12 @@ def run_browser_instance(config):
     except KeyboardInterrupt:
         logger.info(f"用户中断，正在关闭...")
     except Exception as e:
         # 这是一个最终的捕获，用于捕获所有未预料到的错误
         logger.exception(f"运行 Camoufox 实例时发生未预料的严重错误: {e}")

 import os
+import signal
 from playwright.sync_api import TimeoutError, Error as PlaywrightError
 from utils.logger import setup_logging
 from utils.cookie_manager import CookieManager
 from browser.navigation import handle_successful_navigation
+from browser.cookie_validator import CookieValidator
 from camoufox.sync_api import Camoufox
 from utils.paths import logs_dir
 from utils.common import parse_headless_mode, ensure_dir
 from utils.url_helper import extract_url_path
+def run_browser_instance(config, shutdown_event=None):
     """
     根据最终合并的配置，启动并管理一个单独的 Camoufox 浏览器实例。
+    使用CookieManager统一管理Cookie加载，避免重复的扫描逻辑。
     """
+    # 重置信号处理器，确保子进程能响应 SIGTERM
+    signal.signal(signal.SIGTERM, signal.SIG_DFL)
+    # 忽略 SIGINT (Ctrl+C)，让主进程统一处理
+    signal.signal(signal.SIGINT, signal.SIG_IGN)
     cookie_source = config.get('cookie_source')
     if not cookie_source:
         # 使用默认logger进行错误报告
     proxy = config.get('proxy')
     headless_setting = config.get('headless', 'virtual')
+    # 使用CookieManager加载Cookie
     cookie_manager = CookieManager(logger)
     all_cookies = []
     try:
+        # 直接使用CookieSource对象加载Cookie
         cookies = cookie_manager.load_cookies(cookie_source)
         all_cookies.extend(cookies)
     except Exception as e:
+        logger.error(f"从Cookie来源加载时出错: {e}")
         return
+    # 3. 检查是否有任何Cookie可用
     if not all_cookies:
+        logger.error("错误: 没有可用的Cookie（既没有有效的JSON文件，也没有环境变量）")
         return
     cookies = all_cookies
     headless_mode = parse_headless_mode(headless_setting)
     launch_options = {"headless": headless_mode}
+    # launch_options["block_images"] = True  # 禁用图片加载
     if proxy:
         logger.info(f"使用代理: {proxy} 访问")
         launch_options["proxy"] = {"server": proxy, "bypass": "localhost, 127.0.0.1"}
     screenshot_dir = logs_dir()
     ensure_dir(screenshot_dir)
             context = browser.new_context()
             context.add_cookies(cookies)
             page = context.new_page()
+            # 创建Cookie验证器
+            cookie_validator = CookieValidator(page, context, logger)
             # ####################################################################
             # ############ 增强的 page.goto() 错误处理和日志记录 ###############
             # ####################################################################
             response = None
             try:
+                logger.info(f"正在导航到: {expected_url} (超时设置为 90 秒)")
                 # page.goto() 会返回一个 response 对象，我们可以用它来获取状态码等信息
+                response = page.goto(expected_url, wait_until='domcontentloaded', timeout=90000)
                 # 检查HTTP响应状态码
                 if response:
             final_path = extract_url_path(final_url)
             if expected_path and expected_path in final_path:
+                logger.info(f"URL验证通过。预期路径: {expected_path}")
+                # --- 新的健壮策略：等待加载指示器消失 ---
+                # 这是解决竞态条件的关键。错误消息或内容只在初始加载完成后才会出现。
                 spinner_locator = page.locator('mat-spinner')
                 try:
                     logger.info("正在等待加载指示器 (spinner) 消失... (最长等待30秒)")
+                    # 我们等待spinner变为'隐藏'状态或从DOM中消失。
                     spinner_locator.wait_for(state='hidden', timeout=30000)
                     logger.info("加载指示器已消失。页面已完成异步加载。")
                 except TimeoutError:
                     logger.error("页面加载指示器在30秒内未消失。页面可能已卡住。")
                     page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_spinner_stuck_{diagnostic_tag}.png"))
+                    return # 如果页面加载卡住则退出
+                # --- 现在我们可以安全地检查错误消息 ---
+                # 我们使用最具体的文本以避免误判。
                 auth_error_text = "authentication error"
                 auth_error_locator = page.get_by_text(auth_error_text, exact=False)
+                # 这里我们只需要很短的超时时间，因为页面应该是稳定的。
                 if auth_error_locator.is_visible(timeout=2000):
                     logger.error(f"检测到认证失败的错误横幅: '{auth_error_text}'. Cookie已过期或无效。")
                     screenshot_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.png")
                     # with open(html_path, 'w', encoding='utf-8') as f:
                     #     f.write(page.content())
                     # logger.info(f"已保存包含错误信息的页面HTML: {html_path}")
+                    return # 明确的失败，因此我们退出。
+                # --- 如果没有错误，进行最终确认（作为后备方案） ---
                 logger.info("未检测到认证错误横幅。进行最终确认。")
                 login_button_cn = page.get_by_role('button', name='登录')
                 login_button_en = page.get_by_role('button', name='Login')
                     page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_login_button_visible_{diagnostic_tag}.png"))
                     return
+                # --- 如果所有检查都通过，我们假设成功 ---
                 logger.info("所有验证通过，确认已成功登录。")
+                # 创建Cookie验证器（验证将在主线程中执行，避免线程问题）
+                logger.info("Cookie验证器已创建，将定期验证Cookie有效性")
+                handle_successful_navigation(page, logger, diagnostic_tag, shutdown_event, cookie_validator)
             elif "accounts.google.com/v3/signin/accountchooser" in final_url:
                 logger.warning("检测到Google账户选择页面。登录失败或Cookie已过期。")
                 page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
     except KeyboardInterrupt:
         logger.info(f"用户中断，正在关闭...")
+    except SystemExit as e:
+        # 捕获Cookie验证失败时的系统退出
+        if e.code == 1:
+            logger.error("Cookie验证失败，关闭进程实例")
+        else:
+            logger.info(f"实例正常退出，退出码: {e.code}")
     except Exception as e:
         # 这是一个最终的捕获，用于捕获所有未预料到的错误
         logger.exception(f"运行 Camoufox 实例时发生未预料的严重错误: {e}")

browser/navigation.py CHANGED Viewed

@@ -24,7 +24,7 @@ def handle_untrusted_dialog(page: Page, logger=None):
     except Exception as e:
         logger.info(f"检查弹窗时发生意外：{e}，将继续执行...")
-def handle_successful_navigation(page: Page, logger, cookie_file_config):
     """
     在成功导航到目标页面后，执行后续操作（处理弹窗、保持运行）。
     """
@@ -34,15 +34,41 @@ def handle_successful_navigation(page: Page, logger, cookie_file_config):
     # 检查并处理 "Last modified by..." 的弹窗
     handle_untrusted_dialog(page, logger=logger)
     # 等待页面加载和渲染
-    logger.info("等待15秒以便页面完全渲染...")
     time.sleep(15)
-    logger.info("实例将保持运行状态。每10秒点击一次页面以保持活动。")
     while True:
         try:
             page.click('body')
-            time.sleep(10)
         except Exception as e:
             logger.error(f"在保持活动循环中出错: {e}")
             # 在保持活动循环中出错时截屏

     except Exception as e:
         logger.info(f"检查弹窗时发生意外：{e}，将继续执行...")
+def handle_successful_navigation(page: Page, logger, cookie_file_config, shutdown_event=None, cookie_validator=None):
     """
     在成功导航到目标页面后，执行后续操作（处理弹窗、保持运行）。
     """
     # 检查并处理 "Last modified by..." 的弹窗
     handle_untrusted_dialog(page, logger=logger)
+    logger.info("实例将保持运行状态。每10秒点击一次页面以保持活动。")
     # 等待页面加载和渲染
     time.sleep(15)
+    # 添加Cookie验证计数器
+    click_counter = 0
     while True:
+        # 检查是否收到关闭信号
+        if shutdown_event and shutdown_event.is_set():
+            logger.info("收到关闭信号，正在优雅退出保持活动循环...")
+            break
         try:
             page.click('body')
+            click_counter += 1
+            # 每360次点击（1小时）执行一次完整的Cookie验证
+            if cookie_validator and click_counter >= 360:  # 360 * 10秒 = 3600秒 = 1小时
+                is_valid = cookie_validator.validate_cookies_in_main_thread()
+                if not is_valid:
+                    cookie_validator.shutdown_instance_on_cookie_failure()
+                    return
+                click_counter = 0  # 重置计数器
+            # 使用可中断的睡眠，每秒检查一次关闭信号
+            for _ in range(10):  # 10秒 = 10次1秒检查
+                if shutdown_event and shutdown_event.is_set():
+                    logger.info("收到关闭信号，正在优雅退出保持活动循环...")
+                    return
+                time.sleep(1)
         except Exception as e:
             logger.error(f"在保持活动循环中出错: {e}")
             # 在保持活动循环中出错时截屏

main.py CHANGED Viewed

@@ -12,14 +12,174 @@ from utils.cookie_manager import CookieManager
 from utils.common import clean_env_value, ensure_dir
 # 全局变量
-browser_processes = []
 app_running = False
 flask_app = None
 def load_instance_configurations(logger):
     """
-    使用CookieManager解析环境变量和cookies目录，为每个cookie来源创建独立的浏览器实例配置。
     """
     # 1. 读取所有实例共享的URL
     shared_url = clean_env_value(os.getenv("CAMOUFOX_INSTANCE_URL"))
@@ -37,16 +197,16 @@ def load_instance_configurations(logger):
     if proxy_value:
         global_settings["proxy"] = proxy_value
-    # 3. 使用CookieManager检测所有cookie来源
     cookie_manager = CookieManager(logger)
     sources = cookie_manager.detect_all_sources()
-    # 检查是否有任何cookie来源
     if not sources:
-        logger.error("错误: 未找到任何cookie来源（既没有JSON文件，也没有环境变量cookie）。")
         return None, None
-    # 4. 为每个cookie来源创建实例配置
     instances = []
     for source in sources:
         if source.type == "file":
@@ -67,13 +227,15 @@ def load_instance_configurations(logger):
     return global_settings, instances
-def start_browser_instances():
     """启动浏览器实例的核心逻辑"""
-    global browser_processes, app_running
     log_dir = logs_dir()
     logger = setup_logging(str(log_dir / 'app.log'))
     logger.info("---------------------Camoufox 实例管理器开始启动---------------------")
     global_settings, instance_profiles = load_instance_configurations(logger)
     if not instance_profiles:
@@ -106,36 +268,58 @@ def start_browser_instances():
             logger.error(f"错误: 配置中缺少cookie_source对象")
             continue
-        process = multiprocessing.Process(target=run_browser_instance, args=(final_config,))
-        browser_processes.append(process)
         process.start()
-        # 如果不是最后一个实例，等待30秒再启动下一个实例，避免并发启动导致的高CPU占用
-        if i < len(instance_profiles):
-            logger.info(f"等待 30 秒后启动下一个实例...")
-            time.sleep(30)
     # 等待所有进程
     try:
-        while app_running and browser_processes:
-            for process in browser_processes[:]:
-                if not process.is_alive():
-                    browser_processes.remove(process)
-                else:
                     process.join(timeout=1)
             time.sleep(1)
     except KeyboardInterrupt:
-        logger.info("捕获到终止信号，正在关闭所有浏览器进程...")
-        for process in browser_processes:
-            process.terminate()
-            process.join()
 def run_standalone_mode():
     """独立模式"""
     global app_running
     app_running = True
-    start_browser_instances()
 def run_server_mode():
     """服务器模式"""
@@ -155,17 +339,19 @@ def run_server_mode():
     app_running = True
     # 在后台线程中启动浏览器实例
-    browser_thread = threading.Thread(target=start_browser_instances, daemon=True)
     browser_thread.start()
     # 定义路由
     @flask_app.route('/health')
     def health_check():
         """健康检查端点"""
-        running_count = sum(1 for p in browser_processes if p.is_alive())
         return jsonify({
             'status': 'healthy',
-            'browser_instances': len(browser_processes),
             'running_instances': running_count,
             'message': f'Application is running with {running_count} active browser instances'
         })
@@ -173,10 +359,12 @@ def run_server_mode():
     @flask_app.route('/')
     def index():
         """主页端点"""
-        running_count = sum(1 for p in browser_processes if p.is_alive())
         return jsonify({
             'status': 'running',
-            'browser_instances': len(browser_processes),
             'running_instances': running_count,
             'run_mode': 'server',
             'message': 'Camoufox Browser Automation is running in server mode'
@@ -194,22 +382,43 @@ def run_server_mode():
         server_logger.info("服务器正在关闭...")
 def signal_handler(signum, frame):
-    """统一的信号处理器"""
-    global app_running
     logger = setup_logging(str(logs_dir() / 'app.log'), prefix="signal")
-    logger.info(f"接收到信号 {signum}，正在关闭应用...")
     app_running = False
-    # 关闭所有浏览器进程
-    for process in browser_processes:
-        if process.is_alive():
-            process.terminate()
-            try:
-                process.join(timeout=5)
-            except:
-                process.kill()
-    logger.info("所有进程已关闭")
     sys.exit(0)
 def main():
@@ -218,9 +427,18 @@ def main():
     ensure_dir(logs_dir())
     ensure_dir(cookies_dir())
-    # 注册信号处理器
     signal.signal(signal.SIGTERM, signal_handler)
     signal.signal(signal.SIGINT, signal_handler)
     # 检查运行模式环境变量
     hg_mode = os.getenv('HG', '').lower()

 from utils.common import clean_env_value, ensure_dir
 # 全局变量
 app_running = False
 flask_app = None
+# 使用 multiprocessing.Event 实现跨进程通信
+shutdown_event = multiprocessing.Event()
+class ProcessManager:
+    """进程管理器，负责跟踪和管理浏览器进程"""
+    def __init__(self):
+        self.processes = {}  # {process_id: process_info}
+        self.lock = threading.RLock()
+        self.logger = setup_logging(str(logs_dir() / 'app.log'), prefix="manager")
+    def add_process(self, process, config=None):
+        """添加进程到管理器"""
+        with self.lock:
+            pid = process.pid if process and hasattr(process, 'pid') else None
+            # 允许添加PID为None的进程（可能还在启动中），但会记录这个情况
+            if pid is None:
+                # 使用临时ID作为key，等获得真实PID后再更新
+                temp_id = f"temp_{len(self.processes)}"
+                self.logger.warning(f"进程PID暂时为None，使用临时ID {temp_id}")
+            else:
+                temp_id = pid
+            process_info = {
+                'process': process,
+                'config': config,
+                'pid': pid,
+                'is_alive': True,
+                'start_time': time.time()
+            }
+            self.processes[temp_id] = process_info
+    def update_temp_pids(self):
+        """更新临时PID为真实PID"""
+        with self.lock:
+            temp_ids = [k for k in self.processes.keys() if isinstance(k, str) and k.startswith("temp_")]
+            for temp_id in temp_ids:
+                process_info = self.processes[temp_id]
+                process = process_info['process']
+                if process and hasattr(process, 'pid') and process.pid is not None:
+                    # 更新为真实PID
+                    self.processes[process.pid] = process_info
+                    del self.processes[temp_id]
+                    process_info['pid'] = process.pid
+    def remove_process(self, pid):
+        """从管理器中移除进程"""
+        with self.lock:
+            if pid in self.processes:
+                del self.processes[pid]
+    def get_alive_processes(self):
+        """获取所有存活进程"""
+        with self.lock:
+            # 首先尝试更新临时PID
+            self.update_temp_pids()
+            alive = []
+            dead_pids = []
+            for pid, info in self.processes.items():
+                process = info['process']
+                try:
+                    # 检查进程是否真实存在且是子进程
+                    if process and hasattr(process, 'is_alive') and process.is_alive():
+                        alive.append(process)
+                    else:
+                        dead_pids.append(pid)
+                except (ValueError, ProcessLookupError) as e:
+                    # 进程已经不存在
+                    dead_pids.append(pid)
+                    self.logger.warning(f"进程 {pid} 检查时出错: {e}")
+            # 清理死进程记录
+            for pid in dead_pids:
+                self.remove_process(pid)
+            return alive
+    def terminate_all(self, timeout=10):
+        """优雅地终止所有进程"""
+        with self.lock:
+            # logger = setup_logging(str(logs_dir() / 'app.log'), prefix="signal")
+            # 直接使用 self.logger，避免重复 setup_logging
+            # 首先更新临时PID
+            self.update_temp_pids()
+            if not self.processes:
+                self.logger.info("没有活跃的进程需要关闭")
+                return
+            self.logger.info(f"开始关闭 {len(self.processes)} 个进程...")
+            # 第一阶段：发送SIGTERM信号
+            active_pids = []
+            for pid, info in list(self.processes.items()):
+                process = info['process']
+                try:
+                    # 检查进程对象是否有效且进程存活
+                    if process and hasattr(process, 'is_alive') and process.is_alive() and pid is not None:
+                        self.logger.info(f"发送SIGTERM给进程 {pid} (运行时长: {time.time() - info['start_time']:.1f}秒)")
+                        process.terminate()
+                        active_pids.append(pid)
+                    else:
+                        self.logger.info(f"进程 {pid if pid is not None else 'None'} 已经停止或无效")
+                except (ValueError, ProcessLookupError, AttributeError) as e:
+                    self.logger.warning(f"进程 {pid if pid is not None else 'None'} 访问出错: {e}")
+            if not active_pids:
+                self.logger.info("所有进程已经停止")
+                return
+            # 第二阶段：等待进程退出
+            self.logger.info(f"等待 {len(active_pids)} 个进程优雅退出...")
+            start_wait = time.time()
+            while time.time() - start_wait < 5:  # 最多等待5秒
+                still_alive = []
+                for pid in active_pids:
+                    if pid in self.processes:
+                        process = self.processes[pid]['process']
+                        try:
+                            if process and hasattr(process, 'is_alive') and process.is_alive():
+                                still_alive.append(pid)
+                        except (ValueError, ProcessLookupError, AttributeError):
+                                pass
+                if not still_alive:
+                    self.logger.info("所有进程已优雅退出")
+                    return
+                time.sleep(0.5)
+            self.logger.info(f"仍有 {len(still_alive)} 个进程在运行，准备强制关闭...")
+            # 第三阶段：强制杀死仍在运行的进程
+            for pid in active_pids:
+                if pid in self.processes and pid is not None:
+                    process = self.processes[pid]['process']
+                    try:
+                        if process and hasattr(process, 'is_alive') and process.is_alive():
+                            self.logger.warning(f"进程 {pid} 未响应SIGTERM，强制终止")
+                            process.kill()
+                    except (ValueError, ProcessLookupError, AttributeError) as e:
+                        self.logger.info(f"进程 {pid} 已终止: {e}")
+            self.logger.info("所有进程关闭完成")
+    def get_count(self):
+        """获取管理的进程总数"""
+        with self.lock:
+            return len(self.processes)
+    def get_alive_count(self):
+        """获取存活进程数"""
+        return len(self.get_alive_processes())
+# 全局进程管理器
+process_manager = ProcessManager()
 def load_instance_configurations(logger):
     """
+    使用CookieManager解析环境变量和Cookies目录，为每个Cookie来源创建独立的浏览器实例配置。
     """
     # 1. 读取所有实例共享的URL
     shared_url = clean_env_value(os.getenv("CAMOUFOX_INSTANCE_URL"))
     if proxy_value:
         global_settings["proxy"] = proxy_value
+    # 3. 使用CookieManager检测所有Cookie来源
     cookie_manager = CookieManager(logger)
     sources = cookie_manager.detect_all_sources()
+    # 检查是否有任何Cookie来源
     if not sources:
+        logger.error("错误: 未找到任何Cookie来源（既没有JSON文件，也没有环境变量Cookie）。")
         return None, None
+    # 4. 为每个Cookie来源创建实例配置
     instances = []
     for source in sources:
         if source.type == "file":
     return global_settings, instances
+def start_browser_instances(run_mode="standalone"):
     """启动浏览器实例的核心逻辑"""
+    global app_running, process_manager, shutdown_event
     log_dir = logs_dir()
     logger = setup_logging(str(log_dir / 'app.log'))
     logger.info("---------------------Camoufox 实例管理器开始启动---------------------")
+    start_delay = int(os.getenv("INSTANCE_START_DELAY", "30"))
+    logger.info(f"运行模式: {run_mode}; 实例启动间隔: {start_delay} 秒")
     global_settings, instance_profiles = load_instance_configurations(logger)
     if not instance_profiles:
             logger.error(f"错误: 配置中缺少cookie_source对象")
             continue
+        # 传递 shutdown_event 给子进程
+        process = multiprocessing.Process(target=run_browser_instance, args=(final_config, shutdown_event))
         process.start()
+        # 等待一小段时间让进程获得PID，然后再添加到管理器
+        time.sleep(0.1)
+        process_manager.add_process(process, final_config)
+        # 等待配置的时间，避免并发启动导致的高CPU占用
+        # 即使是最后一个实例，也等待一段时间让其初始化，然后再进入主循环
+        time.sleep(start_delay)
     # 等待所有进程
+    previous_count = None
+    last_log_time = 0
     try:
+        while app_running:
+            alive_processes = process_manager.get_alive_processes()
+            current_count = len(alive_processes)
+            # 仅在数量变化或间隔一段时间后再记录，避免过于频繁的日志
+            now = time.time()
+            if current_count != previous_count or now - last_log_time >= 600:
+                logger.info(f"当前运行的浏览器实例数: {current_count}")
+                previous_count = current_count
+                last_log_time = now
+            if not alive_processes:
+                logger.info("所有浏览器进程已结束，主进程即将退出")
+                break
+            # 等待进程并清理死进程
+            for process in alive_processes:
+                try:
                     process.join(timeout=1)
+                except:
+                    pass
             time.sleep(1)
     except KeyboardInterrupt:
+        logger.info("捕获到键盘中断信号，等待信号处理器完成关闭...")
+        # 不在这里关闭进程，让信号处理器统一处理
+        pass
+    # 确保在所有进程结束后退出
+    logger.info("浏览器实例管理器运行结束")
 def run_standalone_mode():
     """独立模式"""
     global app_running
     app_running = True
+    start_browser_instances(run_mode="standalone")
 def run_server_mode():
     """服务器模式"""
     app_running = True
     # 在后台线程中启动浏览器实例
+    browser_thread = threading.Thread(target=lambda: start_browser_instances(run_mode="server"), daemon=True)
     browser_thread.start()
     # 定义路由
     @flask_app.route('/health')
     def health_check():
         """健康检查端点"""
+        global process_manager
+        running_count = process_manager.get_alive_count()
+        total_count = process_manager.get_count()
         return jsonify({
             'status': 'healthy',
+            'browser_instances': total_count,
             'running_instances': running_count,
             'message': f'Application is running with {running_count} active browser instances'
         })
     @flask_app.route('/')
     def index():
         """主页端点"""
+        global process_manager
+        running_count = process_manager.get_alive_count()
+        total_count = process_manager.get_count()
         return jsonify({
             'status': 'running',
+            'browser_instances': total_count,
             'running_instances': running_count,
             'run_mode': 'server',
             'message': 'Camoufox Browser Automation is running in server mode'
         server_logger.info("服务器正在关闭...")
 def signal_handler(signum, frame):
+    """统一的信号处理器 - 只有主进程应该执行这个逻辑"""
+    global app_running, process_manager, shutdown_event
+    # 立即设置日志，确保能看到后续信息
     logger = setup_logging(str(logs_dir() / 'app.log'), prefix="signal")
+    logger.info(f"接收到信号 {signum}，开始处理...")
+    # 检查是否是主进程，防止子进程执行关闭逻辑
+    current_pid = os.getpid()
+    # 使用一个简单的方法来判断：如果是子进程，通常没有全局变量 process_manager 的控制权
+    # 或者通过判断 multiprocessing.current_process().name
+    if multiprocessing.current_process().name != 'MainProcess':
+         # 子进程接收到信号，通常应该由主进程来管理，或者子进程会因为主进程发送的SIGTERM而终止
+         # 这里我们选择忽略，让主进程通过terminate来管理，或者子进程通过shutdown_event来退出
+         logger.info(f"子进程 {current_pid} 接收到信号 {signum}，忽略主进程信号处理逻辑")
+         return
+    logger.info(f"主进程 {current_pid} 接收到信号 {signum}，正在关闭应用...")
+    # 1. 立即设置全局标志，阻止新的进程创建
     app_running = False
+    # 2. 设置跨进程关闭事件，通知所有子进程优雅退出
+    try:
+        shutdown_event.set()
+        logger.info("已设置全局关闭事件 (shutdown_event)")
+    except Exception as e:
+        logger.error(f"设置关闭事件时发生错误: {e}")
+    # 3. 调用进程管理器的优雅终止方法
+    try:
+        process_manager.terminate_all(timeout=10)
+    except Exception as e:
+        logger.error(f"调用 terminate_all 时发生错误: {e}")
+    logger.info("应用关闭流程结束，主进程退出。")
     sys.exit(0)
 def main():
     ensure_dir(logs_dir())
     ensure_dir(cookies_dir())
+    # 注册信号处理器 - 添加更多信号的捕获
     signal.signal(signal.SIGTERM, signal_handler)
     signal.signal(signal.SIGINT, signal_handler)
+    # 在某些环境中可能还有其他信号
+    try:
+        signal.signal(signal.SIGQUIT, signal_handler)
+    except (ValueError, AttributeError):
+        pass
+    try:
+        signal.signal(signal.SIGHUP, signal_handler)
+    except (ValueError, AttributeError):
+        pass
     # 检查运行模式环境变量
     hg_mode = os.getenv('HG', '').lower()

utils/cookie_handler.py CHANGED Viewed

@@ -3,7 +3,6 @@ def convert_cookie_editor_to_playwright(cookies_from_editor, logger=None):
     将从 Cookie-Editor 插件导出的 Cookie 列表转换为 Playwright 兼容的格式。
     """
     playwright_cookies = []
-    allowed_keys = {'name', 'value', 'domain', 'path', 'expires', 'httpOnly', 'secure', 'sameSite'}
     for cookie in cookies_from_editor:
         pw_cookie = {}
@@ -48,8 +47,6 @@ def convert_kv_to_playwright(kv_string, default_domain=".google.com", logger=Non
     Returns:
         list: Playwright 兼容的 Cookie 列表
     """
-    import re
     playwright_cookies = []
     # 按分号分割 Cookie
@@ -144,4 +141,4 @@ def auto_convert_to_playwright(cookie_data, default_domain=".google.com", logger
     error_msg = f"无法识别的 Cookie 数据格式: {type(cookie_data).__name__}"
     if logger:
         logger.error(error_msg)
-    raise ValueError(error_msg)

     将从 Cookie-Editor 插件导出的 Cookie 列表转换为 Playwright 兼容的格式。
     """
     playwright_cookies = []
     for cookie in cookies_from_editor:
         pw_cookie = {}
     Returns:
         list: Playwright 兼容的 Cookie 列表
     """
     playwright_cookies = []
     # 按分号分割 Cookie
     error_msg = f"无法识别的 Cookie 数据格式: {type(cookie_data).__name__}"
     if logger:
         logger.error(error_msg)
+    raise ValueError(error_msg)

utils/cookie_manager.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 统一的Cookie管理器
-整合JSON文件和环境变量cookie的检测、加载和管理功能
 """
 import os
@@ -17,7 +17,6 @@ class CookieSource:
     type: str  # "file" | "env_var"
     identifier: str  # filename or "USER_COOKIE_1"
     display_name: str  # 显示名称
-    exists: bool = True
     def __str__(self):
         return f"{self.type}:{self.identifier}"
@@ -26,7 +25,7 @@ class CookieSource:
 class CookieManager:
     """
     统一的Cookie管理器
-    负责检测、加载和缓存所有来源的cookie数据
     """
     def __init__(self, logger=None):
@@ -36,7 +35,7 @@ class CookieManager:
     def detect_all_sources(self) -> List[CookieSource]:
         """
-        检测所有可用的cookie来源（JSON文件 + 环境变量）
         结果会被缓存，避免重复扫描
         """
         if self._detected_sources is not None:
@@ -44,7 +43,7 @@ class CookieManager:
         sources = []
-        # 1. 扫描cookies目录中的JSON文件
         try:
             cookie_path = cookies_dir()
             if os.path.isdir(cookie_path):
@@ -61,7 +60,7 @@ class CookieManager:
                 if cookie_files and self.logger:
                     self.logger.info(f"发现 {len(cookie_files)} 个 Cookie 文件")
                 elif self.logger:
-                    self.logger.info(f"在 {cookie_path} 目录下未找到任何 .json 格式的 Cookie 文件")
             else:
                 if self.logger:
                     self.logger.error(f"Cookie 目录不存在: {cookie_path}")
@@ -102,7 +101,7 @@ class CookieManager:
     def load_cookies(self, source: CookieSource) -> List[Dict]:
         """
-        从指定来源加载cookie数据
         Args:
             source: Cookie来源对象
@@ -197,35 +196,3 @@ class CookieManager:
                 default_domain=".google.com",
                 logger=self.logger
             )
-    def get_all_sources(self) -> List[CookieSource]:
-        """获取所有检测到的 Cookie 来源"""
-        return self.detect_all_sources()
-    def clear_cache(self):
-        """清空 Cookie 缓存"""
-        self._cookie_cache.clear()
-        if self.logger:
-            self.logger.debug("Cookie 缓存已清空")
-    def get_source_summary(self) -> Dict[str, int]:
-        """
-        获取 Cookie 来源统计信息
-        Returns:
-            包含各类型来源数量的字典
-        """
-        sources = self.detect_all_sources()
-        summary = {
-            "total": len(sources),
-            "files": 0,
-            "env_vars": 0
-        }
-        for source in sources:
-            if source.type == "file":
-                summary["files"] += 1
-            elif source.type == "env_var":
-                summary["env_vars"] += 1
-        return summary

 """
 统一的Cookie管理器
+整合JSON文件和环境变量Cookie的检测、加载和管理功能
 """
 import os
     type: str  # "file" | "env_var"
     identifier: str  # filename or "USER_COOKIE_1"
     display_name: str  # 显示名称
     def __str__(self):
         return f"{self.type}:{self.identifier}"
 class CookieManager:
     """
     统一的Cookie管理器
+    负责检测、加载和缓存所有来源的Cookie数据
     """
     def __init__(self, logger=None):
     def detect_all_sources(self) -> List[CookieSource]:
         """
+        检测所有可用的Cookie来源（JSON文件 + 环境变量）
         结果会被缓存，避免重复扫描
         """
         if self._detected_sources is not None:
         sources = []
+        # 1. 扫描Cookies目录中的JSON文件
         try:
             cookie_path = cookies_dir()
             if os.path.isdir(cookie_path):
                 if cookie_files and self.logger:
                     self.logger.info(f"发现 {len(cookie_files)} 个 Cookie 文件")
                 elif self.logger:
+                    self.logger.info(f"在 {cookie_path} 目录下未找到任何格式的 Cookie 文件")
             else:
                 if self.logger:
                     self.logger.error(f"Cookie 目录不存在: {cookie_path}")
     def load_cookies(self, source: CookieSource) -> List[Dict]:
         """
+        从指定来源加载Cookie数据
         Args:
             source: Cookie来源对象
                 default_domain=".google.com",
                 logger=self.logger
             )

utils/paths.py CHANGED Viewed

@@ -6,8 +6,7 @@ from pathlib import Path
 @lru_cache(maxsize=1)
 def project_root() -> Path:
     """
-    Return the repository root so callers can build absolute paths that do not
-    depend on the current working directory.
     """
     env_root = os.getenv("CAMOUFOX_PROJECT_ROOT")
     if env_root:
@@ -18,15 +17,15 @@ def project_root() -> Path:
         if (parent / "cookies").exists():
             return parent
-    # Fallback to the original behaviour if the marker directory is missing.
     return current.parents[min(2, len(current.parents) - 1)]
 def logs_dir() -> Path:
-    """Root-level directory that stores log files and screenshots."""
     return project_root() / "logs"
 def cookies_dir() -> Path:
-    """Root-level directory that stores persistent cookie JSON files."""
     return project_root() / "cookies"

 @lru_cache(maxsize=1)
 def project_root() -> Path:
     """
+    返回代码仓库根目录，使调用者能够构建不依赖当前工作目录的绝对路径。
     """
     env_root = os.getenv("CAMOUFOX_PROJECT_ROOT")
     if env_root:
         if (parent / "cookies").exists():
             return parent
+    # 如果标记目录缺失，则回退到原始行为
     return current.parents[min(2, len(current.parents) - 1)]
 def logs_dir() -> Path:
+    """存储日志文件和截图的根级目录。"""
     return project_root() / "logs"
 def cookies_dir() -> Path:
+    """存储持久化Cookie JSON文件的根级目录。"""
     return project_root() / "cookies"