Spaces:
Running
Running
feat(browser): add cookie validator and lifecycle handling
Browse files- browser/cookie_validator.py +85 -0
- browser/instance.py +44 -25
- browser/navigation.py +30 -4
- main.py +260 -42
- utils/cookie_handler.py +1 -4
- utils/cookie_manager.py +6 -39
- utils/paths.py +4 -5
browser/cookie_validator.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import sys
|
| 3 |
+
from playwright.sync_api import TimeoutError, Error as PlaywrightError
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class CookieValidator:
|
| 7 |
+
"""Cookie验证器,负责定期验证Cookie的有效性。"""
|
| 8 |
+
|
| 9 |
+
def __init__(self, page, context, logger):
|
| 10 |
+
"""
|
| 11 |
+
初始化Cookie验证器
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
page: 主页面实例
|
| 15 |
+
context: 浏览器上下文
|
| 16 |
+
logger: 日志记录器
|
| 17 |
+
"""
|
| 18 |
+
self.page = page
|
| 19 |
+
self.context = context
|
| 20 |
+
self.logger = logger
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def validate_cookies_in_main_thread(self):
|
| 24 |
+
"""
|
| 25 |
+
在主线程中执行Cookie验证(由主线程调用)
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
bool: Cookie是否有效
|
| 29 |
+
"""
|
| 30 |
+
validation_page = None
|
| 31 |
+
try:
|
| 32 |
+
# 创建新标签页(在主线程中执行)
|
| 33 |
+
self.logger.info("开始Cookie验证...")
|
| 34 |
+
validation_page = self.context.new_page()
|
| 35 |
+
|
| 36 |
+
# 访问验证URL
|
| 37 |
+
validation_url = "https://aistudio.google.com/apps"
|
| 38 |
+
validation_page.goto(validation_url, wait_until='domcontentloaded', timeout=30000)
|
| 39 |
+
|
| 40 |
+
# 等待页面加载
|
| 41 |
+
validation_page.wait_for_timeout(2000)
|
| 42 |
+
|
| 43 |
+
# 获取最终URL
|
| 44 |
+
final_url = validation_page.url
|
| 45 |
+
|
| 46 |
+
# 检查是否被重定向到登录页面
|
| 47 |
+
if "accounts.google.com/v3/signin/identifier" in final_url:
|
| 48 |
+
self.logger.error("Cookie验证失败: 被重定向到登录页面")
|
| 49 |
+
return False
|
| 50 |
+
|
| 51 |
+
if "accounts.google.com/v3/signin/accountchooser" in final_url:
|
| 52 |
+
self.logger.error("Cookie验证失败: 被重定向到账户选择页面")
|
| 53 |
+
return False
|
| 54 |
+
|
| 55 |
+
# 如果没有跳转到登录页面,就算成功
|
| 56 |
+
self.logger.info("Cookie验证成功")
|
| 57 |
+
return True
|
| 58 |
+
|
| 59 |
+
except TimeoutError:
|
| 60 |
+
self.logger.error("Cookie验证失败: 页面加载超时")
|
| 61 |
+
return False
|
| 62 |
+
|
| 63 |
+
except PlaywrightError as e:
|
| 64 |
+
self.logger.error(f"Cookie验证失败: {e}")
|
| 65 |
+
return False
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
self.logger.error(f"Cookie验证失败: {e}")
|
| 69 |
+
return False
|
| 70 |
+
|
| 71 |
+
finally:
|
| 72 |
+
# 关闭验证标签页
|
| 73 |
+
if validation_page:
|
| 74 |
+
try:
|
| 75 |
+
validation_page.close()
|
| 76 |
+
except Exception:
|
| 77 |
+
pass # 忽略关闭错误
|
| 78 |
+
|
| 79 |
+
def shutdown_instance_on_cookie_failure(self):
|
| 80 |
+
"""
|
| 81 |
+
因Cookie失效而关闭实例
|
| 82 |
+
"""
|
| 83 |
+
self.logger.error("Cookie失效,关闭实例")
|
| 84 |
+
time.sleep(1)
|
| 85 |
+
sys.exit(1)
|
browser/instance.py
CHANGED
|
@@ -1,19 +1,26 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
from playwright.sync_api import TimeoutError, Error as PlaywrightError
|
| 3 |
from utils.logger import setup_logging
|
| 4 |
from utils.cookie_manager import CookieManager
|
| 5 |
from browser.navigation import handle_successful_navigation
|
|
|
|
| 6 |
from camoufox.sync_api import Camoufox
|
| 7 |
from utils.paths import logs_dir
|
| 8 |
from utils.common import parse_headless_mode, ensure_dir
|
| 9 |
from utils.url_helper import extract_url_path
|
| 10 |
|
| 11 |
|
| 12 |
-
def run_browser_instance(config):
|
| 13 |
"""
|
| 14 |
根据最终合并的配置,启动并管理一个单独的 Camoufox 浏览器实例。
|
| 15 |
-
使用CookieManager统一管理
|
| 16 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
cookie_source = config.get('cookie_source')
|
| 18 |
if not cookie_source:
|
| 19 |
# 使用默认logger进行错误报告
|
|
@@ -31,33 +38,33 @@ def run_browser_instance(config):
|
|
| 31 |
proxy = config.get('proxy')
|
| 32 |
headless_setting = config.get('headless', 'virtual')
|
| 33 |
|
| 34 |
-
# 使用CookieManager加载
|
| 35 |
cookie_manager = CookieManager(logger)
|
| 36 |
all_cookies = []
|
| 37 |
|
| 38 |
try:
|
| 39 |
-
# 直接使用CookieSource对象加载
|
| 40 |
cookies = cookie_manager.load_cookies(cookie_source)
|
| 41 |
all_cookies.extend(cookies)
|
| 42 |
|
| 43 |
except Exception as e:
|
| 44 |
-
logger.error(f"从
|
| 45 |
return
|
| 46 |
|
| 47 |
-
# 3. 检查是否有任何
|
| 48 |
if not all_cookies:
|
| 49 |
-
logger.error("错误: 没有可用的
|
| 50 |
return
|
| 51 |
|
| 52 |
cookies = all_cookies
|
| 53 |
|
| 54 |
headless_mode = parse_headless_mode(headless_setting)
|
| 55 |
launch_options = {"headless": headless_mode}
|
|
|
|
|
|
|
| 56 |
if proxy:
|
| 57 |
logger.info(f"使用代理: {proxy} 访问")
|
| 58 |
launch_options["proxy"] = {"server": proxy, "bypass": "localhost, 127.0.0.1"}
|
| 59 |
-
# 无需禁用图片加载, 因为图片很少, 禁用还可能导致风控增加
|
| 60 |
-
# launch_options["block_images"] = True
|
| 61 |
|
| 62 |
screenshot_dir = logs_dir()
|
| 63 |
ensure_dir(screenshot_dir)
|
|
@@ -67,16 +74,19 @@ def run_browser_instance(config):
|
|
| 67 |
context = browser.new_context()
|
| 68 |
context.add_cookies(cookies)
|
| 69 |
page = context.new_page()
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
| 71 |
# ####################################################################
|
| 72 |
# ############ 增强的 page.goto() 错误处理和日志记录 ###############
|
| 73 |
# ####################################################################
|
| 74 |
|
| 75 |
response = None
|
| 76 |
try:
|
| 77 |
-
logger.info(f"正在导航到: {expected_url} (超时设置为
|
| 78 |
# page.goto() 会返回一个 response 对象,我们可以用它来获取状态码等信息
|
| 79 |
-
response = page.goto(expected_url, wait_until='domcontentloaded', timeout=
|
| 80 |
|
| 81 |
# 检查HTTP响应状态码
|
| 82 |
if response:
|
|
@@ -151,28 +161,27 @@ def run_browser_instance(config):
|
|
| 151 |
final_path = extract_url_path(final_url)
|
| 152 |
|
| 153 |
if expected_path and expected_path in final_path:
|
| 154 |
-
logger.info(f"URL验证通过。预期路径: {expected_path}
|
| 155 |
|
| 156 |
-
# ---
|
| 157 |
-
#
|
| 158 |
-
# content will only appear AFTER the initial loading is done.
|
| 159 |
spinner_locator = page.locator('mat-spinner')
|
| 160 |
try:
|
| 161 |
logger.info("正在等待加载指示器 (spinner) 消失... (最长等待30秒)")
|
| 162 |
-
#
|
| 163 |
spinner_locator.wait_for(state='hidden', timeout=30000)
|
| 164 |
logger.info("加载指示器已消失。页面已完成异步加载。")
|
| 165 |
except TimeoutError:
|
| 166 |
logger.error("页面加载指示器在30秒内未消失。页面可能已卡住。")
|
| 167 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_spinner_stuck_{diagnostic_tag}.png"))
|
| 168 |
-
return #
|
| 169 |
|
| 170 |
-
# ---
|
| 171 |
-
#
|
| 172 |
auth_error_text = "authentication error"
|
| 173 |
auth_error_locator = page.get_by_text(auth_error_text, exact=False)
|
| 174 |
|
| 175 |
-
#
|
| 176 |
if auth_error_locator.is_visible(timeout=2000):
|
| 177 |
logger.error(f"检测到认证失败的错误横幅: '{auth_error_text}'. Cookie已过期或无效。")
|
| 178 |
screenshot_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.png")
|
|
@@ -182,9 +191,9 @@ def run_browser_instance(config):
|
|
| 182 |
# with open(html_path, 'w', encoding='utf-8') as f:
|
| 183 |
# f.write(page.content())
|
| 184 |
# logger.info(f"已保存包含错误信息的页面HTML: {html_path}")
|
| 185 |
-
return #
|
| 186 |
|
| 187 |
-
# ---
|
| 188 |
logger.info("未检测到认证错误横幅。进行最终确认。")
|
| 189 |
login_button_cn = page.get_by_role('button', name='登录')
|
| 190 |
login_button_en = page.get_by_role('button', name='Login')
|
|
@@ -194,9 +203,13 @@ def run_browser_instance(config):
|
|
| 194 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_login_button_visible_{diagnostic_tag}.png"))
|
| 195 |
return
|
| 196 |
|
| 197 |
-
# ---
|
| 198 |
logger.info("所有验证通过,确认已成功登录。")
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
elif "accounts.google.com/v3/signin/accountchooser" in final_url:
|
| 201 |
logger.warning("检测到Google账户选择页面。登录失败或Cookie已过期。")
|
| 202 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
|
|
@@ -211,6 +224,12 @@ def run_browser_instance(config):
|
|
| 211 |
|
| 212 |
except KeyboardInterrupt:
|
| 213 |
logger.info(f"用户中断,正在关闭...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
except Exception as e:
|
| 215 |
# 这是一个最终的捕获,用于捕获所有未预料到的错误
|
| 216 |
logger.exception(f"运行 Camoufox 实例时发生未预料的严重错误: {e}")
|
|
|
|
| 1 |
import os
|
| 2 |
+
import signal
|
| 3 |
from playwright.sync_api import TimeoutError, Error as PlaywrightError
|
| 4 |
from utils.logger import setup_logging
|
| 5 |
from utils.cookie_manager import CookieManager
|
| 6 |
from browser.navigation import handle_successful_navigation
|
| 7 |
+
from browser.cookie_validator import CookieValidator
|
| 8 |
from camoufox.sync_api import Camoufox
|
| 9 |
from utils.paths import logs_dir
|
| 10 |
from utils.common import parse_headless_mode, ensure_dir
|
| 11 |
from utils.url_helper import extract_url_path
|
| 12 |
|
| 13 |
|
| 14 |
+
def run_browser_instance(config, shutdown_event=None):
|
| 15 |
"""
|
| 16 |
根据最终合并的配置,启动并管理一个单独的 Camoufox 浏览器实例。
|
| 17 |
+
使用CookieManager统一管理Cookie加载,避免重复的扫描逻辑。
|
| 18 |
"""
|
| 19 |
+
# 重置信号处理器,确保子进程能响应 SIGTERM
|
| 20 |
+
signal.signal(signal.SIGTERM, signal.SIG_DFL)
|
| 21 |
+
# 忽略 SIGINT (Ctrl+C),让主进程统一处理
|
| 22 |
+
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
| 23 |
+
|
| 24 |
cookie_source = config.get('cookie_source')
|
| 25 |
if not cookie_source:
|
| 26 |
# 使用默认logger进行错误报告
|
|
|
|
| 38 |
proxy = config.get('proxy')
|
| 39 |
headless_setting = config.get('headless', 'virtual')
|
| 40 |
|
| 41 |
+
# 使用CookieManager加载Cookie
|
| 42 |
cookie_manager = CookieManager(logger)
|
| 43 |
all_cookies = []
|
| 44 |
|
| 45 |
try:
|
| 46 |
+
# 直接使用CookieSource对象加载Cookie
|
| 47 |
cookies = cookie_manager.load_cookies(cookie_source)
|
| 48 |
all_cookies.extend(cookies)
|
| 49 |
|
| 50 |
except Exception as e:
|
| 51 |
+
logger.error(f"从Cookie来源加载时出错: {e}")
|
| 52 |
return
|
| 53 |
|
| 54 |
+
# 3. 检查是否有任何Cookie可用
|
| 55 |
if not all_cookies:
|
| 56 |
+
logger.error("错误: 没有可用的Cookie(既没有有效的JSON文件,也没有环境变量)")
|
| 57 |
return
|
| 58 |
|
| 59 |
cookies = all_cookies
|
| 60 |
|
| 61 |
headless_mode = parse_headless_mode(headless_setting)
|
| 62 |
launch_options = {"headless": headless_mode}
|
| 63 |
+
# launch_options["block_images"] = True # 禁用图片加载
|
| 64 |
+
|
| 65 |
if proxy:
|
| 66 |
logger.info(f"使用代理: {proxy} 访问")
|
| 67 |
launch_options["proxy"] = {"server": proxy, "bypass": "localhost, 127.0.0.1"}
|
|
|
|
|
|
|
| 68 |
|
| 69 |
screenshot_dir = logs_dir()
|
| 70 |
ensure_dir(screenshot_dir)
|
|
|
|
| 74 |
context = browser.new_context()
|
| 75 |
context.add_cookies(cookies)
|
| 76 |
page = context.new_page()
|
| 77 |
+
|
| 78 |
+
# 创建Cookie验证器
|
| 79 |
+
cookie_validator = CookieValidator(page, context, logger)
|
| 80 |
+
|
| 81 |
# ####################################################################
|
| 82 |
# ############ 增强的 page.goto() 错误处理和日志记录 ###############
|
| 83 |
# ####################################################################
|
| 84 |
|
| 85 |
response = None
|
| 86 |
try:
|
| 87 |
+
logger.info(f"正在导航到: {expected_url} (超时设置为 90 秒)")
|
| 88 |
# page.goto() 会返回一个 response 对象,我们可以用它来获取状态码等信息
|
| 89 |
+
response = page.goto(expected_url, wait_until='domcontentloaded', timeout=90000)
|
| 90 |
|
| 91 |
# 检查HTTP响应状态码
|
| 92 |
if response:
|
|
|
|
| 161 |
final_path = extract_url_path(final_url)
|
| 162 |
|
| 163 |
if expected_path and expected_path in final_path:
|
| 164 |
+
logger.info(f"URL验证通过。预期路径: {expected_path}")
|
| 165 |
|
| 166 |
+
# --- 新的健壮策略:等待加载指示器消失 ---
|
| 167 |
+
# 这是解决竞态条件的关键。错误消息或内容只在初始加载完成后才会出现。
|
|
|
|
| 168 |
spinner_locator = page.locator('mat-spinner')
|
| 169 |
try:
|
| 170 |
logger.info("正在等待加载指示器 (spinner) 消失... (最长等待30秒)")
|
| 171 |
+
# 我们等待spinner变为'隐藏'状态或从DOM中消失。
|
| 172 |
spinner_locator.wait_for(state='hidden', timeout=30000)
|
| 173 |
logger.info("加载指示器已消失。页面已完成异步加载。")
|
| 174 |
except TimeoutError:
|
| 175 |
logger.error("页面加载指示器在30秒内未消失。页面可能已卡住。")
|
| 176 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_spinner_stuck_{diagnostic_tag}.png"))
|
| 177 |
+
return # 如果页面加载卡住则退出
|
| 178 |
|
| 179 |
+
# --- 现在我们可以安全地检查错误消息 ---
|
| 180 |
+
# 我们使用最具体的文本以避免误判。
|
| 181 |
auth_error_text = "authentication error"
|
| 182 |
auth_error_locator = page.get_by_text(auth_error_text, exact=False)
|
| 183 |
|
| 184 |
+
# 这里我们只需要很短的超时时间,因为页面应该是稳定的。
|
| 185 |
if auth_error_locator.is_visible(timeout=2000):
|
| 186 |
logger.error(f"检测到认证失败的错误横幅: '{auth_error_text}'. Cookie已过期或无效。")
|
| 187 |
screenshot_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.png")
|
|
|
|
| 191 |
# with open(html_path, 'w', encoding='utf-8') as f:
|
| 192 |
# f.write(page.content())
|
| 193 |
# logger.info(f"已保存包含错误信息的页面HTML: {html_path}")
|
| 194 |
+
return # 明确的失败,因此我们退出。
|
| 195 |
|
| 196 |
+
# --- 如果没有错误,进行最终确认(作为后备方案) ---
|
| 197 |
logger.info("未检测到认证错误横幅。进行最终确认。")
|
| 198 |
login_button_cn = page.get_by_role('button', name='登录')
|
| 199 |
login_button_en = page.get_by_role('button', name='Login')
|
|
|
|
| 203 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_login_button_visible_{diagnostic_tag}.png"))
|
| 204 |
return
|
| 205 |
|
| 206 |
+
# --- 如果所有检查都通过,我们假设成功 ---
|
| 207 |
logger.info("所有验证通过,确认已成功登录。")
|
| 208 |
+
|
| 209 |
+
# 创建Cookie验证器(验证将在主线程中执行,避免线程问题)
|
| 210 |
+
logger.info("Cookie验证器已创建,将定期验证Cookie有效性")
|
| 211 |
+
|
| 212 |
+
handle_successful_navigation(page, logger, diagnostic_tag, shutdown_event, cookie_validator)
|
| 213 |
elif "accounts.google.com/v3/signin/accountchooser" in final_url:
|
| 214 |
logger.warning("检测到Google账户选择页面。登录失败或Cookie已过期。")
|
| 215 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
|
|
|
|
| 224 |
|
| 225 |
except KeyboardInterrupt:
|
| 226 |
logger.info(f"用户中断,正在关闭...")
|
| 227 |
+
except SystemExit as e:
|
| 228 |
+
# 捕获Cookie验证失败时的系统退出
|
| 229 |
+
if e.code == 1:
|
| 230 |
+
logger.error("Cookie验证失败,关闭进程实例")
|
| 231 |
+
else:
|
| 232 |
+
logger.info(f"实例正常退出,退出码: {e.code}")
|
| 233 |
except Exception as e:
|
| 234 |
# 这是一个最终的捕获,用于捕获所有未预料到的错误
|
| 235 |
logger.exception(f"运行 Camoufox 实例时发生未预料的严重错误: {e}")
|
browser/navigation.py
CHANGED
|
@@ -24,7 +24,7 @@ def handle_untrusted_dialog(page: Page, logger=None):
|
|
| 24 |
except Exception as e:
|
| 25 |
logger.info(f"检查弹窗时发生意外:{e},将继续执行...")
|
| 26 |
|
| 27 |
-
def handle_successful_navigation(page: Page, logger, cookie_file_config):
|
| 28 |
"""
|
| 29 |
在成功导航到目标页面后,执行后续操作(处理弹窗、保持运行)。
|
| 30 |
"""
|
|
@@ -34,15 +34,41 @@ def handle_successful_navigation(page: Page, logger, cookie_file_config):
|
|
| 34 |
# 检查并处理 "Last modified by..." 的弹窗
|
| 35 |
handle_untrusted_dialog(page, logger=logger)
|
| 36 |
|
|
|
|
|
|
|
| 37 |
# 等待页面加载和渲染
|
| 38 |
-
logger.info("等待15秒以便页面完全渲染...")
|
| 39 |
time.sleep(15)
|
| 40 |
|
| 41 |
-
|
|
|
|
|
|
|
| 42 |
while True:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
try:
|
| 44 |
page.click('body')
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
except Exception as e:
|
| 47 |
logger.error(f"在保持活动循环中出错: {e}")
|
| 48 |
# 在保持活动循环中出错时截屏
|
|
|
|
| 24 |
except Exception as e:
|
| 25 |
logger.info(f"检查弹窗时发生意外:{e},将继续执行...")
|
| 26 |
|
| 27 |
+
def handle_successful_navigation(page: Page, logger, cookie_file_config, shutdown_event=None, cookie_validator=None):
|
| 28 |
"""
|
| 29 |
在成功导航到目标页面后,执行后续操作(处理弹窗、保持运行)。
|
| 30 |
"""
|
|
|
|
| 34 |
# 检查并处理 "Last modified by..." 的弹窗
|
| 35 |
handle_untrusted_dialog(page, logger=logger)
|
| 36 |
|
| 37 |
+
logger.info("实例将保持运行状态。每10秒点击一次页面以保持活动。")
|
| 38 |
+
|
| 39 |
# 等待页面加载和渲染
|
|
|
|
| 40 |
time.sleep(15)
|
| 41 |
|
| 42 |
+
# 添加Cookie验证计数器
|
| 43 |
+
click_counter = 0
|
| 44 |
+
|
| 45 |
while True:
|
| 46 |
+
# 检查是否收到关闭信号
|
| 47 |
+
if shutdown_event and shutdown_event.is_set():
|
| 48 |
+
logger.info("收到关闭信号,正在优雅退出保持活动循环...")
|
| 49 |
+
break
|
| 50 |
+
|
| 51 |
try:
|
| 52 |
page.click('body')
|
| 53 |
+
click_counter += 1
|
| 54 |
+
|
| 55 |
+
# 每360次点击(1小时)执行一次完整的Cookie验证
|
| 56 |
+
if cookie_validator and click_counter >= 360: # 360 * 10秒 = 3600秒 = 1小时
|
| 57 |
+
is_valid = cookie_validator.validate_cookies_in_main_thread()
|
| 58 |
+
|
| 59 |
+
if not is_valid:
|
| 60 |
+
cookie_validator.shutdown_instance_on_cookie_failure()
|
| 61 |
+
return
|
| 62 |
+
|
| 63 |
+
click_counter = 0 # 重置计数器
|
| 64 |
+
|
| 65 |
+
# 使用可中断的睡眠,每秒检查一次关闭信号
|
| 66 |
+
for _ in range(10): # 10秒 = 10次1秒检查
|
| 67 |
+
if shutdown_event and shutdown_event.is_set():
|
| 68 |
+
logger.info("收到关闭信号,正在优雅退出保持活动循环...")
|
| 69 |
+
return
|
| 70 |
+
time.sleep(1)
|
| 71 |
+
|
| 72 |
except Exception as e:
|
| 73 |
logger.error(f"在保持活动循环中出错: {e}")
|
| 74 |
# 在保持活动循环中出错时截屏
|
main.py
CHANGED
|
@@ -12,14 +12,174 @@ from utils.cookie_manager import CookieManager
|
|
| 12 |
from utils.common import clean_env_value, ensure_dir
|
| 13 |
|
| 14 |
# 全局变量
|
| 15 |
-
browser_processes = []
|
| 16 |
app_running = False
|
| 17 |
flask_app = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def load_instance_configurations(logger):
|
| 21 |
"""
|
| 22 |
-
使用CookieManager解析环境变量和
|
| 23 |
"""
|
| 24 |
# 1. 读取所有实例共享的URL
|
| 25 |
shared_url = clean_env_value(os.getenv("CAMOUFOX_INSTANCE_URL"))
|
|
@@ -37,16 +197,16 @@ def load_instance_configurations(logger):
|
|
| 37 |
if proxy_value:
|
| 38 |
global_settings["proxy"] = proxy_value
|
| 39 |
|
| 40 |
-
# 3. 使用CookieManager检测所有
|
| 41 |
cookie_manager = CookieManager(logger)
|
| 42 |
sources = cookie_manager.detect_all_sources()
|
| 43 |
|
| 44 |
-
# 检查是否有任何
|
| 45 |
if not sources:
|
| 46 |
-
logger.error("错误: 未找到任何
|
| 47 |
return None, None
|
| 48 |
|
| 49 |
-
# 4. 为每个
|
| 50 |
instances = []
|
| 51 |
for source in sources:
|
| 52 |
if source.type == "file":
|
|
@@ -67,13 +227,15 @@ def load_instance_configurations(logger):
|
|
| 67 |
|
| 68 |
return global_settings, instances
|
| 69 |
|
| 70 |
-
def start_browser_instances():
|
| 71 |
"""启动浏览器实例的核心逻辑"""
|
| 72 |
-
global
|
| 73 |
|
| 74 |
log_dir = logs_dir()
|
| 75 |
logger = setup_logging(str(log_dir / 'app.log'))
|
| 76 |
logger.info("---------------------Camoufox 实例管理器开始启动---------------------")
|
|
|
|
|
|
|
| 77 |
|
| 78 |
global_settings, instance_profiles = load_instance_configurations(logger)
|
| 79 |
if not instance_profiles:
|
|
@@ -106,36 +268,58 @@ def start_browser_instances():
|
|
| 106 |
logger.error(f"错误: 配置中缺少cookie_source对象")
|
| 107 |
continue
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
process.start()
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
#
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
time.sleep(30)
|
| 117 |
|
| 118 |
# 等待所有进程
|
|
|
|
|
|
|
| 119 |
try:
|
| 120 |
-
while app_running
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
process.join(timeout=1)
|
|
|
|
|
|
|
|
|
|
| 126 |
time.sleep(1)
|
| 127 |
except KeyboardInterrupt:
|
| 128 |
-
logger.info("
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
|
| 133 |
def run_standalone_mode():
|
| 134 |
"""独立模式"""
|
| 135 |
global app_running
|
| 136 |
app_running = True
|
| 137 |
|
| 138 |
-
start_browser_instances()
|
| 139 |
|
| 140 |
def run_server_mode():
|
| 141 |
"""服务器模式"""
|
|
@@ -155,17 +339,19 @@ def run_server_mode():
|
|
| 155 |
app_running = True
|
| 156 |
|
| 157 |
# 在后台线程中启动浏览器实例
|
| 158 |
-
browser_thread = threading.Thread(target=start_browser_instances, daemon=True)
|
| 159 |
browser_thread.start()
|
| 160 |
|
| 161 |
# 定义路由
|
| 162 |
@flask_app.route('/health')
|
| 163 |
def health_check():
|
| 164 |
"""健康检查端点"""
|
| 165 |
-
|
|
|
|
|
|
|
| 166 |
return jsonify({
|
| 167 |
'status': 'healthy',
|
| 168 |
-
'browser_instances':
|
| 169 |
'running_instances': running_count,
|
| 170 |
'message': f'Application is running with {running_count} active browser instances'
|
| 171 |
})
|
|
@@ -173,10 +359,12 @@ def run_server_mode():
|
|
| 173 |
@flask_app.route('/')
|
| 174 |
def index():
|
| 175 |
"""主页端点"""
|
| 176 |
-
|
|
|
|
|
|
|
| 177 |
return jsonify({
|
| 178 |
'status': 'running',
|
| 179 |
-
'browser_instances':
|
| 180 |
'running_instances': running_count,
|
| 181 |
'run_mode': 'server',
|
| 182 |
'message': 'Camoufox Browser Automation is running in server mode'
|
|
@@ -194,22 +382,43 @@ def run_server_mode():
|
|
| 194 |
server_logger.info("服务器正在关闭...")
|
| 195 |
|
| 196 |
def signal_handler(signum, frame):
|
| 197 |
-
"""统一的信号处理器"""
|
| 198 |
-
global app_running
|
|
|
|
|
|
|
| 199 |
logger = setup_logging(str(logs_dir() / 'app.log'), prefix="signal")
|
| 200 |
-
logger.info(f"接收到信号 {signum}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
app_running = False
|
| 202 |
|
| 203 |
-
#
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
-
logger.info("
|
| 213 |
sys.exit(0)
|
| 214 |
|
| 215 |
def main():
|
|
@@ -218,9 +427,18 @@ def main():
|
|
| 218 |
ensure_dir(logs_dir())
|
| 219 |
ensure_dir(cookies_dir())
|
| 220 |
|
| 221 |
-
# 注册信号处理器
|
| 222 |
signal.signal(signal.SIGTERM, signal_handler)
|
| 223 |
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
# 检查运行模式环境变量
|
| 226 |
hg_mode = os.getenv('HG', '').lower()
|
|
|
|
| 12 |
from utils.common import clean_env_value, ensure_dir
|
| 13 |
|
| 14 |
# 全局变量
|
|
|
|
| 15 |
app_running = False
|
| 16 |
flask_app = None
|
| 17 |
+
# 使用 multiprocessing.Event 实现跨进程通信
|
| 18 |
+
shutdown_event = multiprocessing.Event()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class ProcessManager:
|
| 22 |
+
"""进程管理器,负责跟踪和管理浏览器进程"""
|
| 23 |
+
|
| 24 |
+
def __init__(self):
|
| 25 |
+
self.processes = {} # {process_id: process_info}
|
| 26 |
+
self.lock = threading.RLock()
|
| 27 |
+
self.logger = setup_logging(str(logs_dir() / 'app.log'), prefix="manager")
|
| 28 |
+
|
| 29 |
+
def add_process(self, process, config=None):
|
| 30 |
+
"""添加进程到管理器"""
|
| 31 |
+
with self.lock:
|
| 32 |
+
pid = process.pid if process and hasattr(process, 'pid') else None
|
| 33 |
+
|
| 34 |
+
# 允许添加PID为None的进程(可能还在启动中),但会记录这个情况
|
| 35 |
+
if pid is None:
|
| 36 |
+
# 使用临时ID作为key,等获得真实PID后再更新
|
| 37 |
+
temp_id = f"temp_{len(self.processes)}"
|
| 38 |
+
self.logger.warning(f"进程PID暂时为None,使用临时ID {temp_id}")
|
| 39 |
+
else:
|
| 40 |
+
temp_id = pid
|
| 41 |
+
|
| 42 |
+
process_info = {
|
| 43 |
+
'process': process,
|
| 44 |
+
'config': config,
|
| 45 |
+
'pid': pid,
|
| 46 |
+
'is_alive': True,
|
| 47 |
+
'start_time': time.time()
|
| 48 |
+
}
|
| 49 |
+
self.processes[temp_id] = process_info
|
| 50 |
+
|
| 51 |
+
def update_temp_pids(self):
|
| 52 |
+
"""更新临时PID为真实PID"""
|
| 53 |
+
with self.lock:
|
| 54 |
+
temp_ids = [k for k in self.processes.keys() if isinstance(k, str) and k.startswith("temp_")]
|
| 55 |
+
for temp_id in temp_ids:
|
| 56 |
+
process_info = self.processes[temp_id]
|
| 57 |
+
process = process_info['process']
|
| 58 |
+
|
| 59 |
+
if process and hasattr(process, 'pid') and process.pid is not None:
|
| 60 |
+
# 更新为真实PID
|
| 61 |
+
self.processes[process.pid] = process_info
|
| 62 |
+
del self.processes[temp_id]
|
| 63 |
+
process_info['pid'] = process.pid
|
| 64 |
+
|
| 65 |
+
def remove_process(self, pid):
|
| 66 |
+
"""从管理器中移除进程"""
|
| 67 |
+
with self.lock:
|
| 68 |
+
if pid in self.processes:
|
| 69 |
+
del self.processes[pid]
|
| 70 |
+
|
| 71 |
+
def get_alive_processes(self):
|
| 72 |
+
"""获取所有存活进程"""
|
| 73 |
+
with self.lock:
|
| 74 |
+
# 首先尝试更新临时PID
|
| 75 |
+
self.update_temp_pids()
|
| 76 |
+
|
| 77 |
+
alive = []
|
| 78 |
+
dead_pids = []
|
| 79 |
+
|
| 80 |
+
for pid, info in self.processes.items():
|
| 81 |
+
process = info['process']
|
| 82 |
+
try:
|
| 83 |
+
# 检查进程是否真实存在且是子进程
|
| 84 |
+
if process and hasattr(process, 'is_alive') and process.is_alive():
|
| 85 |
+
alive.append(process)
|
| 86 |
+
else:
|
| 87 |
+
dead_pids.append(pid)
|
| 88 |
+
except (ValueError, ProcessLookupError) as e:
|
| 89 |
+
# 进程已经不存在
|
| 90 |
+
dead_pids.append(pid)
|
| 91 |
+
self.logger.warning(f"进程 {pid} 检查时出错: {e}")
|
| 92 |
+
|
| 93 |
+
# 清理死进程记录
|
| 94 |
+
for pid in dead_pids:
|
| 95 |
+
self.remove_process(pid)
|
| 96 |
+
|
| 97 |
+
return alive
|
| 98 |
+
|
| 99 |
+
def terminate_all(self, timeout=10):
|
| 100 |
+
"""优雅地终止所有进程"""
|
| 101 |
+
with self.lock:
|
| 102 |
+
# logger = setup_logging(str(logs_dir() / 'app.log'), prefix="signal")
|
| 103 |
+
# 直接使用 self.logger,避免重复 setup_logging
|
| 104 |
+
|
| 105 |
+
# 首先更新临时PID
|
| 106 |
+
self.update_temp_pids()
|
| 107 |
+
|
| 108 |
+
if not self.processes:
|
| 109 |
+
self.logger.info("没有活跃的进程需要关闭")
|
| 110 |
+
return
|
| 111 |
+
|
| 112 |
+
self.logger.info(f"开始关闭 {len(self.processes)} 个进程...")
|
| 113 |
+
|
| 114 |
+
# 第一阶段:发送SIGTERM信号
|
| 115 |
+
active_pids = []
|
| 116 |
+
for pid, info in list(self.processes.items()):
|
| 117 |
+
process = info['process']
|
| 118 |
+
try:
|
| 119 |
+
# 检查进程对象是否有效且进程存活
|
| 120 |
+
if process and hasattr(process, 'is_alive') and process.is_alive() and pid is not None:
|
| 121 |
+
self.logger.info(f"发送SIGTERM给进程 {pid} (运行时长: {time.time() - info['start_time']:.1f}秒)")
|
| 122 |
+
process.terminate()
|
| 123 |
+
active_pids.append(pid)
|
| 124 |
+
else:
|
| 125 |
+
self.logger.info(f"进程 {pid if pid is not None else 'None'} 已经停止或无效")
|
| 126 |
+
except (ValueError, ProcessLookupError, AttributeError) as e:
|
| 127 |
+
self.logger.warning(f"进程 {pid if pid is not None else 'None'} 访问出错: {e}")
|
| 128 |
+
|
| 129 |
+
if not active_pids:
|
| 130 |
+
self.logger.info("所有进程已经停止")
|
| 131 |
+
return
|
| 132 |
+
|
| 133 |
+
# 第二阶段:等待进程退出
|
| 134 |
+
self.logger.info(f"等待 {len(active_pids)} 个进程优雅退出...")
|
| 135 |
+
start_wait = time.time()
|
| 136 |
+
while time.time() - start_wait < 5: # 最多等待5秒
|
| 137 |
+
still_alive = []
|
| 138 |
+
for pid in active_pids:
|
| 139 |
+
if pid in self.processes:
|
| 140 |
+
process = self.processes[pid]['process']
|
| 141 |
+
try:
|
| 142 |
+
if process and hasattr(process, 'is_alive') and process.is_alive():
|
| 143 |
+
still_alive.append(pid)
|
| 144 |
+
except (ValueError, ProcessLookupError, AttributeError):
|
| 145 |
+
pass
|
| 146 |
+
if not still_alive:
|
| 147 |
+
self.logger.info("所有进程已优雅退出")
|
| 148 |
+
return
|
| 149 |
+
time.sleep(0.5)
|
| 150 |
+
|
| 151 |
+
self.logger.info(f"仍有 {len(still_alive)} 个进程在运行,准备强制关闭...")
|
| 152 |
+
|
| 153 |
+
# 第三阶段:强制杀死仍在运行的进程
|
| 154 |
+
for pid in active_pids:
|
| 155 |
+
if pid in self.processes and pid is not None:
|
| 156 |
+
process = self.processes[pid]['process']
|
| 157 |
+
try:
|
| 158 |
+
if process and hasattr(process, 'is_alive') and process.is_alive():
|
| 159 |
+
self.logger.warning(f"进程 {pid} 未响应SIGTERM,强制终止")
|
| 160 |
+
process.kill()
|
| 161 |
+
except (ValueError, ProcessLookupError, AttributeError) as e:
|
| 162 |
+
self.logger.info(f"进程 {pid} 已终止: {e}")
|
| 163 |
+
|
| 164 |
+
self.logger.info("所有进程关闭完成")
|
| 165 |
+
|
| 166 |
+
def get_count(self):
|
| 167 |
+
"""获取管理的进程总数"""
|
| 168 |
+
with self.lock:
|
| 169 |
+
return len(self.processes)
|
| 170 |
+
|
| 171 |
+
def get_alive_count(self):
|
| 172 |
+
"""获取存活进程数"""
|
| 173 |
+
return len(self.get_alive_processes())
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
# 全局进程管理器
|
| 177 |
+
process_manager = ProcessManager()
|
| 178 |
|
| 179 |
|
| 180 |
def load_instance_configurations(logger):
|
| 181 |
"""
|
| 182 |
+
使用CookieManager解析环境变量和Cookies目录,为每个Cookie来源创建独立的浏览器实例配置。
|
| 183 |
"""
|
| 184 |
# 1. 读取所有实例共享的URL
|
| 185 |
shared_url = clean_env_value(os.getenv("CAMOUFOX_INSTANCE_URL"))
|
|
|
|
| 197 |
if proxy_value:
|
| 198 |
global_settings["proxy"] = proxy_value
|
| 199 |
|
| 200 |
+
# 3. 使用CookieManager检测所有Cookie来源
|
| 201 |
cookie_manager = CookieManager(logger)
|
| 202 |
sources = cookie_manager.detect_all_sources()
|
| 203 |
|
| 204 |
+
# 检查是否有任何Cookie来源
|
| 205 |
if not sources:
|
| 206 |
+
logger.error("错误: 未找到任何Cookie来源(既没有JSON文件,也没有环境变量Cookie)。")
|
| 207 |
return None, None
|
| 208 |
|
| 209 |
+
# 4. 为每个Cookie来源创建实例配置
|
| 210 |
instances = []
|
| 211 |
for source in sources:
|
| 212 |
if source.type == "file":
|
|
|
|
| 227 |
|
| 228 |
return global_settings, instances
|
| 229 |
|
| 230 |
+
def start_browser_instances(run_mode="standalone"):
|
| 231 |
"""启动浏览器实例的核心逻辑"""
|
| 232 |
+
global app_running, process_manager, shutdown_event
|
| 233 |
|
| 234 |
log_dir = logs_dir()
|
| 235 |
logger = setup_logging(str(log_dir / 'app.log'))
|
| 236 |
logger.info("---------------------Camoufox 实例管理器开始启动---------------------")
|
| 237 |
+
start_delay = int(os.getenv("INSTANCE_START_DELAY", "30"))
|
| 238 |
+
logger.info(f"运行模式: {run_mode}; 实例启动间隔: {start_delay} 秒")
|
| 239 |
|
| 240 |
global_settings, instance_profiles = load_instance_configurations(logger)
|
| 241 |
if not instance_profiles:
|
|
|
|
| 268 |
logger.error(f"错误: 配置中缺少cookie_source对象")
|
| 269 |
continue
|
| 270 |
|
| 271 |
+
# 传递 shutdown_event 给子进程
|
| 272 |
+
process = multiprocessing.Process(target=run_browser_instance, args=(final_config, shutdown_event))
|
| 273 |
process.start()
|
| 274 |
+
# 等待一小段时间让进程获得PID,然后再添加到管理器
|
| 275 |
+
time.sleep(0.1)
|
| 276 |
+
process_manager.add_process(process, final_config)
|
| 277 |
|
| 278 |
+
# 等待配置的时间,避免并发启动导致的高CPU占用
|
| 279 |
+
# 即使是最后一个实例,也等待一段时间让其初始化,然后再进入主循环
|
| 280 |
+
time.sleep(start_delay)
|
|
|
|
| 281 |
|
| 282 |
# 等待所有进程
|
| 283 |
+
previous_count = None
|
| 284 |
+
last_log_time = 0
|
| 285 |
try:
|
| 286 |
+
while app_running:
|
| 287 |
+
alive_processes = process_manager.get_alive_processes()
|
| 288 |
+
current_count = len(alive_processes)
|
| 289 |
+
|
| 290 |
+
# 仅在数量变化或间隔一段时间后再记录,避免过于频繁的日志
|
| 291 |
+
now = time.time()
|
| 292 |
+
if current_count != previous_count or now - last_log_time >= 600:
|
| 293 |
+
logger.info(f"当前运行的浏览器实例数: {current_count}")
|
| 294 |
+
previous_count = current_count
|
| 295 |
+
last_log_time = now
|
| 296 |
+
|
| 297 |
+
if not alive_processes:
|
| 298 |
+
logger.info("所有浏览器进程已结束,主进程即将退出")
|
| 299 |
+
break
|
| 300 |
+
|
| 301 |
+
# 等待进程并清理死进程
|
| 302 |
+
for process in alive_processes:
|
| 303 |
+
try:
|
| 304 |
process.join(timeout=1)
|
| 305 |
+
except:
|
| 306 |
+
pass
|
| 307 |
+
|
| 308 |
time.sleep(1)
|
| 309 |
except KeyboardInterrupt:
|
| 310 |
+
logger.info("捕获到键盘中断信号,等待信号处理器完成关闭...")
|
| 311 |
+
# 不在这里关闭进程,让信号处理器统一处理
|
| 312 |
+
pass
|
| 313 |
+
|
| 314 |
+
# 确保在所有进程结束后退出
|
| 315 |
+
logger.info("浏览器实例管理器运行结束")
|
| 316 |
|
| 317 |
def run_standalone_mode():
|
| 318 |
"""独立模式"""
|
| 319 |
global app_running
|
| 320 |
app_running = True
|
| 321 |
|
| 322 |
+
start_browser_instances(run_mode="standalone")
|
| 323 |
|
| 324 |
def run_server_mode():
|
| 325 |
"""服务器模式"""
|
|
|
|
| 339 |
app_running = True
|
| 340 |
|
| 341 |
# 在后台线程中启动浏览器实例
|
| 342 |
+
browser_thread = threading.Thread(target=lambda: start_browser_instances(run_mode="server"), daemon=True)
|
| 343 |
browser_thread.start()
|
| 344 |
|
| 345 |
# 定义路由
|
| 346 |
@flask_app.route('/health')
|
| 347 |
def health_check():
|
| 348 |
"""健康检查端点"""
|
| 349 |
+
global process_manager
|
| 350 |
+
running_count = process_manager.get_alive_count()
|
| 351 |
+
total_count = process_manager.get_count()
|
| 352 |
return jsonify({
|
| 353 |
'status': 'healthy',
|
| 354 |
+
'browser_instances': total_count,
|
| 355 |
'running_instances': running_count,
|
| 356 |
'message': f'Application is running with {running_count} active browser instances'
|
| 357 |
})
|
|
|
|
| 359 |
@flask_app.route('/')
|
| 360 |
def index():
|
| 361 |
"""主页端点"""
|
| 362 |
+
global process_manager
|
| 363 |
+
running_count = process_manager.get_alive_count()
|
| 364 |
+
total_count = process_manager.get_count()
|
| 365 |
return jsonify({
|
| 366 |
'status': 'running',
|
| 367 |
+
'browser_instances': total_count,
|
| 368 |
'running_instances': running_count,
|
| 369 |
'run_mode': 'server',
|
| 370 |
'message': 'Camoufox Browser Automation is running in server mode'
|
|
|
|
| 382 |
server_logger.info("服务器正在关闭...")
|
| 383 |
|
| 384 |
def signal_handler(signum, frame):
|
| 385 |
+
"""统一的信号处理器 - 只有主进程应该执行这个逻辑"""
|
| 386 |
+
global app_running, process_manager, shutdown_event
|
| 387 |
+
|
| 388 |
+
# 立即设置日志,确保能看到后续信息
|
| 389 |
logger = setup_logging(str(logs_dir() / 'app.log'), prefix="signal")
|
| 390 |
+
logger.info(f"接收到信号 {signum},开始处理...")
|
| 391 |
+
|
| 392 |
+
# 检查是否是主进程,防止子进程执行关闭逻辑
|
| 393 |
+
current_pid = os.getpid()
|
| 394 |
+
|
| 395 |
+
# 使用一个简单的方法来判断:如果是子进程,通常没有全局变量 process_manager 的控制权
|
| 396 |
+
# 或者通过判断 multiprocessing.current_process().name
|
| 397 |
+
if multiprocessing.current_process().name != 'MainProcess':
|
| 398 |
+
# 子进程接收到信号,通常应该由主进程来管理,或者子进程会因为主进程发送的SIGTERM而终止
|
| 399 |
+
# 这里我们选择忽略,让主进程通过terminate来管理,或者子进程通过shutdown_event来退出
|
| 400 |
+
logger.info(f"子进程 {current_pid} 接收到信号 {signum},忽略主进程信号处理逻辑")
|
| 401 |
+
return
|
| 402 |
+
|
| 403 |
+
logger.info(f"主进程 {current_pid} 接收到信号 {signum},正在关闭应用...")
|
| 404 |
+
|
| 405 |
+
# 1. 立即设置全局标志,阻止新的进程创建
|
| 406 |
app_running = False
|
| 407 |
|
| 408 |
+
# 2. 设置跨进程关闭事件,通知所有子进程优雅退出
|
| 409 |
+
try:
|
| 410 |
+
shutdown_event.set()
|
| 411 |
+
logger.info("已设置全局关闭事件 (shutdown_event)")
|
| 412 |
+
except Exception as e:
|
| 413 |
+
logger.error(f"设置关闭事件时发生错误: {e}")
|
| 414 |
+
|
| 415 |
+
# 3. 调用进程管理器的优雅终止方法
|
| 416 |
+
try:
|
| 417 |
+
process_manager.terminate_all(timeout=10)
|
| 418 |
+
except Exception as e:
|
| 419 |
+
logger.error(f"调用 terminate_all 时发生错误: {e}")
|
| 420 |
|
| 421 |
+
logger.info("应用关闭流程结束,主进程退出。")
|
| 422 |
sys.exit(0)
|
| 423 |
|
| 424 |
def main():
|
|
|
|
| 427 |
ensure_dir(logs_dir())
|
| 428 |
ensure_dir(cookies_dir())
|
| 429 |
|
| 430 |
+
# 注册信号处理器 - 添加更多信号的捕获
|
| 431 |
signal.signal(signal.SIGTERM, signal_handler)
|
| 432 |
signal.signal(signal.SIGINT, signal_handler)
|
| 433 |
+
# 在某些环境中可能还有其他信号
|
| 434 |
+
try:
|
| 435 |
+
signal.signal(signal.SIGQUIT, signal_handler)
|
| 436 |
+
except (ValueError, AttributeError):
|
| 437 |
+
pass
|
| 438 |
+
try:
|
| 439 |
+
signal.signal(signal.SIGHUP, signal_handler)
|
| 440 |
+
except (ValueError, AttributeError):
|
| 441 |
+
pass
|
| 442 |
|
| 443 |
# 检查运行模式环境变量
|
| 444 |
hg_mode = os.getenv('HG', '').lower()
|
utils/cookie_handler.py
CHANGED
|
@@ -3,7 +3,6 @@ def convert_cookie_editor_to_playwright(cookies_from_editor, logger=None):
|
|
| 3 |
将从 Cookie-Editor 插件导出的 Cookie 列表转换为 Playwright 兼容的格式。
|
| 4 |
"""
|
| 5 |
playwright_cookies = []
|
| 6 |
-
allowed_keys = {'name', 'value', 'domain', 'path', 'expires', 'httpOnly', 'secure', 'sameSite'}
|
| 7 |
|
| 8 |
for cookie in cookies_from_editor:
|
| 9 |
pw_cookie = {}
|
|
@@ -48,8 +47,6 @@ def convert_kv_to_playwright(kv_string, default_domain=".google.com", logger=Non
|
|
| 48 |
Returns:
|
| 49 |
list: Playwright 兼容的 Cookie 列表
|
| 50 |
"""
|
| 51 |
-
import re
|
| 52 |
-
|
| 53 |
playwright_cookies = []
|
| 54 |
|
| 55 |
# 按分号分割 Cookie
|
|
@@ -144,4 +141,4 @@ def auto_convert_to_playwright(cookie_data, default_domain=".google.com", logger
|
|
| 144 |
error_msg = f"无法识别的 Cookie 数据格式: {type(cookie_data).__name__}"
|
| 145 |
if logger:
|
| 146 |
logger.error(error_msg)
|
| 147 |
-
raise ValueError(error_msg)
|
|
|
|
| 3 |
将从 Cookie-Editor 插件导出的 Cookie 列表转换为 Playwright 兼容的格式。
|
| 4 |
"""
|
| 5 |
playwright_cookies = []
|
|
|
|
| 6 |
|
| 7 |
for cookie in cookies_from_editor:
|
| 8 |
pw_cookie = {}
|
|
|
|
| 47 |
Returns:
|
| 48 |
list: Playwright 兼容的 Cookie 列表
|
| 49 |
"""
|
|
|
|
|
|
|
| 50 |
playwright_cookies = []
|
| 51 |
|
| 52 |
# 按分号分割 Cookie
|
|
|
|
| 141 |
error_msg = f"无法识别的 Cookie 数据格式: {type(cookie_data).__name__}"
|
| 142 |
if logger:
|
| 143 |
logger.error(error_msg)
|
| 144 |
+
raise ValueError(error_msg)
|
utils/cookie_manager.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
统一的Cookie管理器
|
| 3 |
-
整合JSON文件和环境变量
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
@@ -17,7 +17,6 @@ class CookieSource:
|
|
| 17 |
type: str # "file" | "env_var"
|
| 18 |
identifier: str # filename or "USER_COOKIE_1"
|
| 19 |
display_name: str # 显示名称
|
| 20 |
-
exists: bool = True
|
| 21 |
|
| 22 |
def __str__(self):
|
| 23 |
return f"{self.type}:{self.identifier}"
|
|
@@ -26,7 +25,7 @@ class CookieSource:
|
|
| 26 |
class CookieManager:
|
| 27 |
"""
|
| 28 |
统一的Cookie管理器
|
| 29 |
-
负责检测、加载和缓存所有来源的
|
| 30 |
"""
|
| 31 |
|
| 32 |
def __init__(self, logger=None):
|
|
@@ -36,7 +35,7 @@ class CookieManager:
|
|
| 36 |
|
| 37 |
def detect_all_sources(self) -> List[CookieSource]:
|
| 38 |
"""
|
| 39 |
-
检测所有可用的
|
| 40 |
结果会被缓存,避免重复扫描
|
| 41 |
"""
|
| 42 |
if self._detected_sources is not None:
|
|
@@ -44,7 +43,7 @@ class CookieManager:
|
|
| 44 |
|
| 45 |
sources = []
|
| 46 |
|
| 47 |
-
# 1. 扫描
|
| 48 |
try:
|
| 49 |
cookie_path = cookies_dir()
|
| 50 |
if os.path.isdir(cookie_path):
|
|
@@ -61,7 +60,7 @@ class CookieManager:
|
|
| 61 |
if cookie_files and self.logger:
|
| 62 |
self.logger.info(f"发现 {len(cookie_files)} 个 Cookie 文件")
|
| 63 |
elif self.logger:
|
| 64 |
-
self.logger.info(f"在 {cookie_path}
|
| 65 |
else:
|
| 66 |
if self.logger:
|
| 67 |
self.logger.error(f"Cookie 目录不存在: {cookie_path}")
|
|
@@ -102,7 +101,7 @@ class CookieManager:
|
|
| 102 |
|
| 103 |
def load_cookies(self, source: CookieSource) -> List[Dict]:
|
| 104 |
"""
|
| 105 |
-
从指定来源加载
|
| 106 |
|
| 107 |
Args:
|
| 108 |
source: Cookie来源对象
|
|
@@ -197,35 +196,3 @@ class CookieManager:
|
|
| 197 |
default_domain=".google.com",
|
| 198 |
logger=self.logger
|
| 199 |
)
|
| 200 |
-
|
| 201 |
-
def get_all_sources(self) -> List[CookieSource]:
|
| 202 |
-
"""获取所有检测到的 Cookie 来源"""
|
| 203 |
-
return self.detect_all_sources()
|
| 204 |
-
|
| 205 |
-
def clear_cache(self):
|
| 206 |
-
"""清空 Cookie 缓存"""
|
| 207 |
-
self._cookie_cache.clear()
|
| 208 |
-
if self.logger:
|
| 209 |
-
self.logger.debug("Cookie 缓存已清空")
|
| 210 |
-
|
| 211 |
-
def get_source_summary(self) -> Dict[str, int]:
|
| 212 |
-
"""
|
| 213 |
-
获取 Cookie 来源统计信息
|
| 214 |
-
|
| 215 |
-
Returns:
|
| 216 |
-
包含各类型来源数量的字典
|
| 217 |
-
"""
|
| 218 |
-
sources = self.detect_all_sources()
|
| 219 |
-
summary = {
|
| 220 |
-
"total": len(sources),
|
| 221 |
-
"files": 0,
|
| 222 |
-
"env_vars": 0
|
| 223 |
-
}
|
| 224 |
-
|
| 225 |
-
for source in sources:
|
| 226 |
-
if source.type == "file":
|
| 227 |
-
summary["files"] += 1
|
| 228 |
-
elif source.type == "env_var":
|
| 229 |
-
summary["env_vars"] += 1
|
| 230 |
-
|
| 231 |
-
return summary
|
|
|
|
| 1 |
"""
|
| 2 |
统一的Cookie管理器
|
| 3 |
+
整合JSON文件和环境变量Cookie的检测、加载和管理功能
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
|
|
| 17 |
type: str # "file" | "env_var"
|
| 18 |
identifier: str # filename or "USER_COOKIE_1"
|
| 19 |
display_name: str # 显示名称
|
|
|
|
| 20 |
|
| 21 |
def __str__(self):
|
| 22 |
return f"{self.type}:{self.identifier}"
|
|
|
|
| 25 |
class CookieManager:
|
| 26 |
"""
|
| 27 |
统一的Cookie管理器
|
| 28 |
+
负责检测、加载和缓存所有来源的Cookie数据
|
| 29 |
"""
|
| 30 |
|
| 31 |
def __init__(self, logger=None):
|
|
|
|
| 35 |
|
| 36 |
def detect_all_sources(self) -> List[CookieSource]:
|
| 37 |
"""
|
| 38 |
+
检测所有可用的Cookie来源(JSON文件 + 环境变量)
|
| 39 |
结果会被缓存,避免重复扫描
|
| 40 |
"""
|
| 41 |
if self._detected_sources is not None:
|
|
|
|
| 43 |
|
| 44 |
sources = []
|
| 45 |
|
| 46 |
+
# 1. 扫描Cookies目录中的JSON文件
|
| 47 |
try:
|
| 48 |
cookie_path = cookies_dir()
|
| 49 |
if os.path.isdir(cookie_path):
|
|
|
|
| 60 |
if cookie_files and self.logger:
|
| 61 |
self.logger.info(f"发现 {len(cookie_files)} 个 Cookie 文件")
|
| 62 |
elif self.logger:
|
| 63 |
+
self.logger.info(f"在 {cookie_path} 目录下未找到任何格式的 Cookie 文件")
|
| 64 |
else:
|
| 65 |
if self.logger:
|
| 66 |
self.logger.error(f"Cookie 目录不存在: {cookie_path}")
|
|
|
|
| 101 |
|
| 102 |
def load_cookies(self, source: CookieSource) -> List[Dict]:
|
| 103 |
"""
|
| 104 |
+
从指定来源加载Cookie数据
|
| 105 |
|
| 106 |
Args:
|
| 107 |
source: Cookie来源对象
|
|
|
|
| 196 |
default_domain=".google.com",
|
| 197 |
logger=self.logger
|
| 198 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/paths.py
CHANGED
|
@@ -6,8 +6,7 @@ from pathlib import Path
|
|
| 6 |
@lru_cache(maxsize=1)
|
| 7 |
def project_root() -> Path:
|
| 8 |
"""
|
| 9 |
-
|
| 10 |
-
depend on the current working directory.
|
| 11 |
"""
|
| 12 |
env_root = os.getenv("CAMOUFOX_PROJECT_ROOT")
|
| 13 |
if env_root:
|
|
@@ -18,15 +17,15 @@ def project_root() -> Path:
|
|
| 18 |
if (parent / "cookies").exists():
|
| 19 |
return parent
|
| 20 |
|
| 21 |
-
#
|
| 22 |
return current.parents[min(2, len(current.parents) - 1)]
|
| 23 |
|
| 24 |
|
| 25 |
def logs_dir() -> Path:
|
| 26 |
-
"""
|
| 27 |
return project_root() / "logs"
|
| 28 |
|
| 29 |
|
| 30 |
def cookies_dir() -> Path:
|
| 31 |
-
"""
|
| 32 |
return project_root() / "cookies"
|
|
|
|
| 6 |
@lru_cache(maxsize=1)
|
| 7 |
def project_root() -> Path:
|
| 8 |
"""
|
| 9 |
+
返回代码仓库根目录,使调用者能够构建不依赖当前工作目录的绝对路径。
|
|
|
|
| 10 |
"""
|
| 11 |
env_root = os.getenv("CAMOUFOX_PROJECT_ROOT")
|
| 12 |
if env_root:
|
|
|
|
| 17 |
if (parent / "cookies").exists():
|
| 18 |
return parent
|
| 19 |
|
| 20 |
+
# 如果标记目录缺失,则回退到原始行为
|
| 21 |
return current.parents[min(2, len(current.parents) - 1)]
|
| 22 |
|
| 23 |
|
| 24 |
def logs_dir() -> Path:
|
| 25 |
+
"""存储日志文件和截图的根级目录。"""
|
| 26 |
return project_root() / "logs"
|
| 27 |
|
| 28 |
|
| 29 |
def cookies_dir() -> Path:
|
| 30 |
+
"""存储持久化Cookie JSON文件的根级目录。"""
|
| 31 |
return project_root() / "cookies"
|