hkfires commited on
Commit
3085164
·
verified ·
1 Parent(s): f7cd5e8

Upload 10 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 使用一个轻量的 Python 官方镜像作为基础
2
+ FROM python:3.11-slim-bookworm
3
+
4
+ # 设置工作目录,后续的命令都在这个目录下执行
5
+ WORKDIR /app
6
+
7
+ # 安装运行 Playwright 所需的最小系统依赖集
8
+ # 在同一层中清理 apt 缓存以减小镜像体积
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
+ libatk1.0-0 libatk-bridge2.0-0 libcups2 libdbus-1-3 libdrm2 libgbm1 libgtk-3-0 \
11
+ libnspr4 libnss3 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxdamage1 \
12
+ libxext6 libxfixes3 libxrandr2 libxrender1 libxtst6 ca-certificates \
13
+ fonts-liberation libasound2 libpangocairo-1.0-0 libpango-1.0-0 libu2f-udev xvfb \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # 拷贝并安装 Python 依赖
17
+ COPY requirements.txt .
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # 下载 camoufox
21
+ RUN camoufox fetch
22
+
23
+ # 将项目中的所有文件拷贝到工作目录
24
+ COPY . .
25
+
26
+ # 暴露 Hugging Face Spaces 期望的端口(仅在服务器模式下使用)
27
+ EXPOSE 7860
28
+
29
+
30
+ # 设置容器启动时要执行的命令
31
+ CMD ["python", "main.py"]
browser/instance.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from playwright.sync_api import TimeoutError, Error as PlaywrightError
3
+ from utils.logger import setup_logging
4
+ from utils.cookie_manager import CookieManager
5
+ from browser.navigation import handle_successful_navigation
6
+ from camoufox.sync_api import Camoufox
7
+ from utils.paths import logs_dir
8
+ from utils.common import parse_headless_mode, ensure_dir
9
+
10
+
11
+ def run_browser_instance(config):
12
+ """
13
+ 根据最终合并的配置,启动并管理一个单独的 Camoufox 浏览器实例。
14
+ 使用CookieManager统一管理cookie加载,避免重复的扫描逻辑。
15
+ """
16
+ cookie_source = config.get('cookie_source')
17
+ if not cookie_source:
18
+ # 使用默认logger进行错误报告
19
+ logger = setup_logging(os.path.join(logs_dir(), 'app.log'))
20
+ logger.error("错误: 配置中缺少cookie_source对象")
21
+ return
22
+
23
+ instance_label = cookie_source.display_name
24
+ logger = setup_logging(
25
+ os.path.join(logs_dir(), 'app.log'), prefix=instance_label
26
+ )
27
+ diagnostic_tag = instance_label.replace(os.sep, "_")
28
+
29
+ expected_url = config.get('url')
30
+ proxy = config.get('proxy')
31
+ headless_setting = config.get('headless', 'virtual')
32
+
33
+ # 使用CookieManager加载cookie
34
+ cookie_manager = CookieManager(logger)
35
+ all_cookies = []
36
+
37
+ try:
38
+ # 直接使用CookieSource对象加载cookie
39
+ cookies = cookie_manager.load_cookies(cookie_source)
40
+ all_cookies.extend(cookies)
41
+
42
+ except Exception as e:
43
+ logger.error(f"从cookie来源加载时出错: {e}")
44
+ return
45
+
46
+ # 3. 检查是否有任何cookie可用
47
+ if not all_cookies:
48
+ logger.error("错误: 没有可用的cookie(既没有有效的JSON文件,也没有环境变量)")
49
+ return
50
+
51
+ cookies = all_cookies
52
+
53
+ headless_mode = parse_headless_mode(headless_setting)
54
+ launch_options = {"headless": headless_mode}
55
+ if proxy:
56
+ logger.info(f"使用代理: {proxy} 访问")
57
+ launch_options["proxy"] = {"server": proxy, "bypass": "localhost, 127.0.0.1"}
58
+ # 无需禁用图片加载, 因为图片很少, 禁用还可能导致风控增加
59
+ # launch_options["block_images"] = True
60
+
61
+ screenshot_dir = logs_dir()
62
+ ensure_dir(screenshot_dir)
63
+
64
+ try:
65
+ with Camoufox(**launch_options) as browser:
66
+ context = browser.new_context()
67
+ context.add_cookies(cookies)
68
+ page = context.new_page()
69
+
70
+ # ####################################################################
71
+ # ############ 增强的 page.goto() 错误处理和日志记录 ###############
72
+ # ####################################################################
73
+
74
+ response = None
75
+ try:
76
+ logger.info(f"正在导航到: {expected_url} (超时设置为 120 秒)")
77
+ # page.goto() 会返回一个 response 对象,我们可以用它来获取状态码等信息
78
+ response = page.goto(expected_url, wait_until='domcontentloaded', timeout=120000)
79
+
80
+ # 检查HTTP响应状态码
81
+ if response:
82
+ logger.info(f"导航初步成功,服务器响应状态码: {response.status} {response.status_text}")
83
+ if not response.ok: # response.ok 检查状态码是否在 200-299 范围内
84
+ logger.warning(f"警告:页面加载成功,但HTTP状态码表示错误: {response.status}")
85
+ # 即使状态码错误,也保存快照以供分析
86
+ page.screenshot(path=os.path.join(screenshot_dir, f"WARN_http_status_{response.status}_{diagnostic_tag}.png"))
87
+ else:
88
+ # 对于非http/https的导航(如 about:blank),response可能为None
89
+ logger.warning("page.goto 未返回响应对象,可能是一个非HTTP导航。")
90
+
91
+ except TimeoutError:
92
+ # 这是最常见的错误:超时
93
+ logger.error(f"导航到 {expected_url} 超时 (超过120秒)。")
94
+ logger.error("可能原因:网络连接缓慢、目标网站服务器无响应、代理问题、或页面资源被阻塞。")
95
+ # 尝试保存诊断信息
96
+ try:
97
+ # 截图对于看到页面卡在什么状态非常有帮助(例如,空白页、加载中、Chrome错误页)
98
+ screenshot_path = os.path.join(screenshot_dir, f"FAIL_timeout_{diagnostic_tag}.png")
99
+ page.screenshot(path=screenshot_path, full_page=True)
100
+ logger.info(f"已截取超时时的屏幕快照: {screenshot_path}")
101
+
102
+ # 保存HTML可以帮助分析DOM结构,即使在无头模式下也很有用
103
+ html_path = os.path.join(screenshot_dir, f"FAIL_timeout_{diagnostic_tag}.html")
104
+ with open(html_path, 'w', encoding='utf-8') as f:
105
+ f.write(page.content())
106
+ logger.info(f"已保存超时时的页面HTML: {html_path}")
107
+ except Exception as diag_e:
108
+ logger.error(f"在尝试进行超时诊断(截图/保存HTML)时发生额外错误: {diag_e}")
109
+ return # 超时后,后续操作无意义,直接终止
110
+
111
+ except PlaywrightError as e:
112
+ # 捕获其他Playwright相关的网络错误,例如DNS解析失败、连接被拒绝等
113
+ error_message = str(e)
114
+ logger.error(f"导航到 {expected_url} 时发生 Playwright 网络错误。")
115
+ logger.error(f"错误详情: {error_message}")
116
+
117
+ # Playwright的错误信息通常很具体,例如 "net::ERR_CONNECTION_REFUSED"
118
+ if "net::ERR_NAME_NOT_RESOLVED" in error_message:
119
+ logger.error("排查建议:检查DNS设置或域名是否正确。")
120
+ elif "net::ERR_CONNECTION_REFUSED" in error_message:
121
+ logger.error("排查建议:目标服务器可能已关闭,或代理/防火墙阻止了连接。")
122
+ elif "net::ERR_INTERNET_DISCONNECTED" in error_message:
123
+ logger.error("排查建议:检查本机的网络连接。")
124
+
125
+ # 同样,尝试截图,尽管此时页面可能完全无法访问
126
+ try:
127
+ screenshot_path = os.path.join(screenshot_dir, f"FAIL_network_error_{diagnostic_tag}.png")
128
+ page.screenshot(path=screenshot_path)
129
+ logger.info(f"已截取网络错误时的屏幕快照: {screenshot_path}")
130
+ except Exception as diag_e:
131
+ logger.error(f"在尝试进行网络错误诊断(截图)时发生额外错误: {diag_e}")
132
+ return # 网络错误,终止
133
+
134
+ # --- 如果导航没有抛出异常,继续执行后续逻辑 ---
135
+
136
+ logger.info("页面初步加载完成,正在检查并处理初始弹窗...")
137
+ page.wait_for_timeout(2000)
138
+
139
+ final_url = page.url
140
+ logger.info(f"导航完成。最终URL为: {final_url}")
141
+
142
+ # ... 你原有的URL检查逻辑保持不变 ...
143
+ if "accounts.google.com/v3/signin/identifier" in final_url:
144
+ logger.error("检测到Google登录页面(需要输入邮箱)。Cookie已完全失效。")
145
+ page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_identifier_page_{diagnostic_tag}.png"))
146
+ return
147
+ elif expected_url.split('?')[0] in final_url:
148
+
149
+ logger.info("URL正确。现在等待页面完成初始加载...")
150
+
151
+ # --- NEW ROBUST STRATEGY: Wait for the loading spinner to disappear ---
152
+ # This is the key to solving the race condition. The error message or
153
+ # content will only appear AFTER the initial loading is done.
154
+ spinner_locator = page.locator('mat-spinner')
155
+ try:
156
+ logger.info("正在等待加载指示器 (spinner) 消失... (最长等待30秒)")
157
+ # We wait for the spinner to be 'hidden' or not present in the DOM.
158
+ spinner_locator.wait_for(state='hidden', timeout=30000)
159
+ logger.info("加载指示器已消失。页面已完成异步加载。")
160
+ except TimeoutError:
161
+ logger.error("页面加载指示器在30秒内未消失。页面可能已卡住。")
162
+ page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_spinner_stuck_{diagnostic_tag}.png"))
163
+ return # Exit if the page is stuck loading
164
+
165
+ # --- NOW, we can safely check for the error message ---
166
+ # We use the most specific text possible to avoid false positives.
167
+ auth_error_text = "authentication error"
168
+ auth_error_locator = page.get_by_text(auth_error_text, exact=False)
169
+
170
+ # We only need a very short timeout here because the page should be stable.
171
+ if auth_error_locator.is_visible(timeout=2000):
172
+ logger.error(f"检测到认证失败的错误横幅: '{auth_error_text}'. Cookie已过期或无效。")
173
+ screenshot_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.png")
174
+ page.screenshot(path=screenshot_path)
175
+
176
+ # html_path = os.path.join(screenshot_dir, f"FAIL_auth_error_banner_{diagnostic_tag}.html")
177
+ # with open(html_path, 'w', encoding='utf-8') as f:
178
+ # f.write(page.content())
179
+ # logger.info(f"已保存包含错误信息的页面HTML: {html_path}")
180
+ return # Definitive failure, so we exit.
181
+
182
+ # --- If no error, proceed to final confirmation (as a fallback) ---
183
+ logger.info("未检测到认证错误横幅。进行最终确认。")
184
+ login_button_cn = page.get_by_role('button', name='登录')
185
+ login_button_en = page.get_by_role('button', name='Login')
186
+
187
+ if login_button_cn.is_visible(timeout=1000) or login_button_en.is_visible(timeout=1000):
188
+ logger.error("页面上仍显示'登录'按钮。Cookie无效。")
189
+ page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_login_button_visible_{diagnostic_tag}.png"))
190
+ return
191
+
192
+ # --- If all checks pass, we assume success ---
193
+ logger.info("所有验证通过,确认已成功登录。")
194
+ handle_successful_navigation(page, logger, diagnostic_tag)
195
+ elif "accounts.google.com/v3/signin/accountchooser" in final_url:
196
+ logger.warning("检测到Google账户选择页面。登录失败或Cookie已过期。")
197
+ page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
198
+ return
199
+ else:
200
+ logger.error(f"导航到了一个意外的URL: {final_url}")
201
+ page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_unexpected_url_{diagnostic_tag}.png"))
202
+ return
203
+
204
+ except KeyboardInterrupt:
205
+ logger.info(f"用户中断,正在关闭...")
206
+ except Exception as e:
207
+ # 这是一个最终的捕获,用于捕获所有未预料到的错误
208
+ logger.exception(f"运行 Camoufox 实例时发生未预料的严重错误: {e}")
browser/navigation.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import os
3
+ from playwright.sync_api import Page, expect
4
+ from utils.paths import logs_dir
5
+ from utils.common import ensure_dir
6
+
7
+ def handle_untrusted_dialog(page: Page, logger=None):
8
+ """
9
+ 检查并处理 "Last modified by..." 的弹窗。
10
+ 如果弹窗出现,则点击 "OK" 按钮。
11
+ """
12
+ ok_button_locator = page.get_by_role("button", name="OK")
13
+
14
+ try:
15
+ if ok_button_locator.is_visible(timeout=10000): # 等待最多10秒
16
+ logger.info(f"检测到弹窗,正在点击 'OK' 按钮...")
17
+
18
+ ok_button_locator.click(force=True)
19
+ logger.info(f"'OK' 按钮已点击。")
20
+ expect(ok_button_locator).to_be_hidden(timeout=1000)
21
+ logger.info(f"弹窗已确认关闭。")
22
+ else:
23
+ logger.info(f"在10秒内未检测到弹窗,继续执行...")
24
+ except Exception as e:
25
+ logger.info(f"检查弹窗时发生意外:{e},将继续执行...")
26
+
27
+ def handle_successful_navigation(page: Page, logger, cookie_file_config):
28
+ """
29
+ 在成功导航到目标页面后,执行后续操作(处理弹窗、保持运行)。
30
+ """
31
+ logger.info("已成功到达目标页面。")
32
+ page.click('body') # 给予页面焦点
33
+
34
+ # 检查并处理 "Last modified by..." 的弹窗
35
+ handle_untrusted_dialog(page, logger=logger)
36
+
37
+ # 等待页面加载和渲染
38
+ logger.info("等待15秒以便页面完全渲染...")
39
+ time.sleep(15)
40
+
41
+ logger.info("实例将保持运行状态。每10秒点击一次页面以保持活动。")
42
+ while True:
43
+ try:
44
+ page.click('body')
45
+ time.sleep(10)
46
+ except Exception as e:
47
+ logger.error(f"在保持活动循环中出错: {e}")
48
+ # 在保持活动循环中出错时截屏
49
+ try:
50
+ screenshot_dir = logs_dir()
51
+ ensure_dir(screenshot_dir)
52
+ screenshot_filename = os.path.join(screenshot_dir, f"FAIL_keep_alive_error_{cookie_file_config}.png")
53
+ page.screenshot(path=screenshot_filename, full_page=True)
54
+ logger.info(f"已在保持活动循环出错时截屏: {screenshot_filename}")
55
+ except Exception as screenshot_e:
56
+ logger.error(f"在保持活动循环出错时截屏失败: {screenshot_e}")
57
+ break # 如果页面关闭或出错,则退出循环
main.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+ import multiprocessing
4
+ import signal
5
+ import sys
6
+ import time
7
+
8
+ from browser.instance import run_browser_instance
9
+ from utils.logger import setup_logging
10
+ from utils.paths import cookies_dir, logs_dir
11
+ from utils.cookie_manager import CookieManager
12
+ from utils.common import clean_env_value, ensure_dir
13
+
14
+ # 全局变量
15
+ browser_processes = []
16
+ app_running = False
17
+ flask_app = None
18
+
19
+
20
+ def load_instance_configurations(logger):
21
+ """
22
+ 使用CookieManager解析环境变量和cookies目录,为每个cookie来源创建独立的浏览器实例配置。
23
+ """
24
+ # 1. 读取所有实例共享的URL
25
+ shared_url = clean_env_value(os.getenv("CAMOUFOX_INSTANCE_URL"))
26
+ if not shared_url:
27
+ logger.error("错误: 缺少环境变量 CAMOUFOX_INSTANCE_URL。所有实例需要一个共享的目标URL。")
28
+ return None, None
29
+
30
+ # 2. 读取全局设置
31
+ global_settings = {
32
+ "headless": clean_env_value(os.getenv("CAMOUFOX_HEADLESS")) or "virtual",
33
+ "url": shared_url # 所有实例都使用这个URL
34
+ }
35
+
36
+ proxy_value = clean_env_value(os.getenv("CAMOUFOX_PROXY"))
37
+ if proxy_value:
38
+ global_settings["proxy"] = proxy_value
39
+
40
+ # 3. 使用CookieManager检测所有cookie来源
41
+ cookie_manager = CookieManager(logger)
42
+ sources = cookie_manager.detect_all_sources()
43
+
44
+ # 检查是否有任何cookie来源
45
+ if not sources:
46
+ logger.error("错误: 未找到任何cookie来源(既没有JSON文件,也没有环境变量cookie)。")
47
+ return None, None
48
+
49
+ # 4. 为每个cookie来源创建实例配置
50
+ instances = []
51
+ for source in sources:
52
+ if source.type == "file":
53
+ instances.append({
54
+ "cookie_file": source.identifier,
55
+ "cookie_source": source
56
+ })
57
+ elif source.type == "env_var":
58
+ # 从环境变量名中提取索引,如 "USER_COOKIE_1" -> 1
59
+ env_index = source.identifier.split("_")[-1]
60
+ instances.append({
61
+ "cookie_file": None,
62
+ "env_cookie_index": int(env_index),
63
+ "cookie_source": source
64
+ })
65
+
66
+ logger.info(f"将启动 {len(instances)} 个浏览器实例")
67
+
68
+ return global_settings, instances
69
+
70
+ def start_browser_instances():
71
+ """启动浏览器实例的核心逻辑"""
72
+ global browser_processes, app_running
73
+
74
+ log_dir = logs_dir()
75
+ logger = setup_logging(str(log_dir / 'app.log'))
76
+ logger.info("---------------------Camoufox 实例管理器开始启动---------------------")
77
+
78
+ global_settings, instance_profiles = load_instance_configurations(logger)
79
+ if not instance_profiles:
80
+ logger.error("错误: 环境变量中未找到任何实例配置。")
81
+ return
82
+
83
+ for i, profile in enumerate(instance_profiles, 1):
84
+ if not app_running:
85
+ break
86
+
87
+ final_config = global_settings.copy()
88
+ final_config.update(profile)
89
+
90
+ if 'url' not in final_config:
91
+ logger.warning(f"警告: 跳过一个无效的配置项 (缺少 url): {profile}")
92
+ continue
93
+
94
+ cookie_source = final_config.get('cookie_source')
95
+
96
+ if cookie_source:
97
+ if cookie_source.type == "file":
98
+ logger.info(
99
+ f"正在启动第 {i}/{len(instance_profiles)} 个浏览器实例 (file: {cookie_source.display_name})..."
100
+ )
101
+ elif cookie_source.type == "env_var":
102
+ logger.info(
103
+ f"正在启动第 {i}/{len(instance_profiles)} 个浏览器实例 (env: {cookie_source.display_name})..."
104
+ )
105
+ else:
106
+ logger.error(f"错误: 配置中缺少cookie_source对象")
107
+ continue
108
+
109
+ process = multiprocessing.Process(target=run_browser_instance, args=(final_config,))
110
+ browser_processes.append(process)
111
+ process.start()
112
+
113
+ # 如果不是最后一个实例,等待30秒再启动下一个实例,避免并发启动导致的高CPU占用
114
+ if i < len(instance_profiles):
115
+ logger.info(f"等待 30 秒后启动下一个实例...")
116
+ time.sleep(30)
117
+
118
+ # 等待所有进程
119
+ try:
120
+ while app_running and browser_processes:
121
+ for process in browser_processes[:]:
122
+ if not process.is_alive():
123
+ browser_processes.remove(process)
124
+ else:
125
+ process.join(timeout=1)
126
+ time.sleep(1)
127
+ except KeyboardInterrupt:
128
+ logger.info("捕获到终止信号,正在关闭所有浏览器进程...")
129
+ for process in browser_processes:
130
+ process.terminate()
131
+ process.join()
132
+
133
+ def run_standalone_mode():
134
+ """独立模式"""
135
+ global app_running
136
+ app_running = True
137
+
138
+ start_browser_instances()
139
+
140
+ def run_server_mode():
141
+ """服务器模式"""
142
+ global app_running, flask_app
143
+
144
+ log_dir = logs_dir()
145
+ server_logger = setup_logging(str(log_dir / 'app.log'), prefix="server")
146
+
147
+ # 动态导入 Flask(只在需要时)
148
+ try:
149
+ from flask import Flask, jsonify
150
+ flask_app = Flask(__name__)
151
+ except ImportError:
152
+ server_logger.error("错误: 服务器模式需要 Flask,请安装: pip install flask")
153
+ return
154
+
155
+ app_running = True
156
+
157
+ # 在后台线程中启动浏览器实例
158
+ browser_thread = threading.Thread(target=start_browser_instances, daemon=True)
159
+ browser_thread.start()
160
+
161
+ # 定义路由
162
+ @flask_app.route('/health')
163
+ def health_check():
164
+ """健康检查端点"""
165
+ running_count = sum(1 for p in browser_processes if p.is_alive())
166
+ return jsonify({
167
+ 'status': 'healthy',
168
+ 'browser_instances': len(browser_processes),
169
+ 'running_instances': running_count,
170
+ 'message': f'Application is running with {running_count} active browser instances'
171
+ })
172
+
173
+ @flask_app.route('/')
174
+ def index():
175
+ """主页端点"""
176
+ running_count = sum(1 for p in browser_processes if p.is_alive())
177
+ return jsonify({
178
+ 'status': 'running',
179
+ 'browser_instances': len(browser_processes),
180
+ 'running_instances': running_count,
181
+ 'run_mode': 'server',
182
+ 'message': 'Camoufox Browser Automation is running in server mode'
183
+ })
184
+
185
+ # 禁用 Flask 的默认日志
186
+ import logging
187
+ log = logging.getLogger('werkzeug')
188
+ log.setLevel(logging.ERROR)
189
+
190
+ # 启动 Flask 服务器
191
+ try:
192
+ flask_app.run(host='0.0.0.0', port=7860, debug=False)
193
+ except KeyboardInterrupt:
194
+ server_logger.info("服务器正在关闭...")
195
+
196
+ def signal_handler(signum, frame):
197
+ """统一的信号处理器"""
198
+ global app_running
199
+ logger = setup_logging(str(logs_dir() / 'app.log'), prefix="signal")
200
+ logger.info(f"接收到信号 {signum},正在关闭应用...")
201
+ app_running = False
202
+
203
+ # 关闭所有浏览器进程
204
+ for process in browser_processes:
205
+ if process.is_alive():
206
+ process.terminate()
207
+ try:
208
+ process.join(timeout=5)
209
+ except:
210
+ process.kill()
211
+
212
+ logger.info("所有进程已关闭")
213
+ sys.exit(0)
214
+
215
+ def main():
216
+ """主入口函数"""
217
+ # 初始化必要的目录
218
+ ensure_dir(logs_dir())
219
+ ensure_dir(cookies_dir())
220
+
221
+ # 注册信号处理器
222
+ signal.signal(signal.SIGTERM, signal_handler)
223
+ signal.signal(signal.SIGINT, signal_handler)
224
+
225
+ # 检查运行模式环境变量
226
+ hg_mode = os.getenv('HG', '').lower()
227
+
228
+ if hg_mode == 'true':
229
+ run_server_mode()
230
+ else:
231
+ run_standalone_mode()
232
+
233
+ if __name__ == "__main__":
234
+ multiprocessing.freeze_support()
235
+ main()
requirements.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ camoufox[geoip]==0.4.11
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.12.13
4
+ aiosignal==1.3.2
5
+ attrs==25.3.0
6
+ browserforge==1.2.3
7
+ certifi==2025.6.15
8
+ charset-normalizer==3.4.2
9
+ click==8.2.1
10
+ frozenlist==1.7.0
11
+ flask==3.0.0
12
+ geoip2==5.1.0
13
+ greenlet==3.2.3
14
+ idna==3.10
15
+ language-tags==1.2.0
16
+ lxml==5.4.0
17
+ maxminddb==2.7.0
18
+ multidict==6.5.0
19
+ numpy==2.3.0
20
+ orjson==3.10.18
21
+ platformdirs==4.3.8
22
+ playwright==1.52.0
23
+ propcache==0.3.2
24
+ pyee==13.0.0
25
+ PySocks==1.7.1
26
+ requests==2.32.4
27
+ screeninfo==0.8.1
28
+ tqdm==4.67.1
29
+ typing_extensions==4.14.0
30
+ ua-parser==1.0.1
31
+ ua-parser-builtins==0.18.0.post1
32
+ urllib3==2.4.0
33
+ yarl==1.20.1
utils/common.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 通用工具函数
3
+ 提供项目中常用的基础功能
4
+ """
5
+
6
+ import os
7
+ from pathlib import Path
8
+
9
+ def clean_env_value(value):
10
+ """
11
+ 清理环境变量值,去除首尾空白字符
12
+
13
+ Args:
14
+ value: 环境变量的原始值
15
+
16
+ Returns:
17
+ str or None: 清理后的值,如果为空或None则返回None
18
+ """
19
+ if value is None:
20
+ return None
21
+ stripped = value.strip()
22
+ return stripped or None
23
+
24
+
25
+ def parse_headless_mode(headless_setting):
26
+ """
27
+ 解析headless模式配置
28
+
29
+ Args:
30
+ headless_setting: headless配置值
31
+
32
+ Returns:
33
+ bool or str: True表示headless,False表示有界面,'virtual'表示虚拟模式
34
+ """
35
+ if str(headless_setting).lower() == 'true':
36
+ return True
37
+ elif str(headless_setting).lower() == 'false':
38
+ return False
39
+ else:
40
+ return 'virtual'
41
+
42
+
43
+ def ensure_dir(path):
44
+ """
45
+ 确保目录存在,如果不存在则创建
46
+
47
+ Args:
48
+ path: 目录路径(可以是字符串或Path对象)
49
+ """
50
+ if isinstance(path, str):
51
+ path = Path(path)
52
+ os.makedirs(path, exist_ok=True)
utils/cookie_handler.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def convert_cookie_editor_to_playwright(cookies_from_editor, logger=None):
2
+ """
3
+ 将从 Cookie-Editor 插件导出的 Cookie 列表转换为 Playwright 兼容的格式。
4
+ """
5
+ playwright_cookies = []
6
+ allowed_keys = {'name', 'value', 'domain', 'path', 'expires', 'httpOnly', 'secure', 'sameSite'}
7
+
8
+ for cookie in cookies_from_editor:
9
+ pw_cookie = {}
10
+ for key in ['name', 'value', 'domain', 'path', 'httpOnly', 'secure']:
11
+ if key in cookie:
12
+ pw_cookie[key] = cookie[key]
13
+ if cookie.get('session', False):
14
+ pw_cookie['expires'] = -1
15
+ elif 'expirationDate' in cookie:
16
+ if cookie['expirationDate'] is not None:
17
+ pw_cookie['expires'] = int(cookie['expirationDate'])
18
+ else:
19
+ pw_cookie['expires'] = -1
20
+
21
+ if 'sameSite' in cookie:
22
+ same_site_value = str(cookie['sameSite']).lower()
23
+ if same_site_value == 'no_restriction':
24
+ pw_cookie['sameSite'] = 'None'
25
+ elif same_site_value in ['lax', 'strict']:
26
+ pw_cookie['sameSite'] = same_site_value.capitalize()
27
+ elif same_site_value == 'unspecified':
28
+ pw_cookie['sameSite'] = 'Lax'
29
+
30
+ if all(key in pw_cookie for key in ['name', 'value', 'domain', 'path']):
31
+ playwright_cookies.append(pw_cookie)
32
+ else:
33
+ if logger:
34
+ logger.warning(f"跳过一个格式不完整的 Cookie: {cookie}")
35
+
36
+ return playwright_cookies
37
+
38
+
39
+ def convert_kv_to_playwright(kv_string, default_domain=".google.com", logger=None):
40
+ """
41
+ 将键值对格式的 Cookie 字符串转换为 Playwright 兼容的格式。
42
+
43
+ Args:
44
+ kv_string (str): 包含 Cookie 的键值对字符串,格式为 "name1=value1; name2=value2; ..."
45
+ default_domain (str): 默认域名,默认为".google.com"
46
+ logger: 日志记录器
47
+
48
+ Returns:
49
+ list: Playwright 兼容的 Cookie 列表
50
+ """
51
+ import re
52
+
53
+ playwright_cookies = []
54
+
55
+ # 按分号分割 Cookie
56
+ cookie_pairs = kv_string.split(';')
57
+
58
+ for pair in cookie_pairs:
59
+ pair = pair.strip() # 去除首尾空白字符
60
+
61
+ if not pair: # 跳过空字符串
62
+ continue
63
+
64
+ # 跳过无效的 Cookie(不包含等号)
65
+ if '=' not in pair:
66
+ if logger:
67
+ logger.warning(f"跳过无效的 Cookie 格式: '{pair}'")
68
+ continue
69
+
70
+ # 分割name和value
71
+ name, value = pair.split('=', 1) # 只分割第一个等号
72
+ name = name.strip()
73
+ value = value.strip()
74
+
75
+ if not name: # 跳过空名称
76
+ if logger:
77
+ logger.warning(f"跳过空名称的 Cookie: '{pair}'")
78
+ continue
79
+
80
+ # 构造 Playwright 格式的 Cookie
81
+ pw_cookie = {
82
+ 'name': name,
83
+ 'value': value,
84
+ 'domain': default_domain,
85
+ 'path': '/',
86
+ 'expires': -1, # 默认为会话 Cookie
87
+ 'httpOnly': False, # KV 格式无法确定 httpOnly 状态,默认为 False
88
+ 'secure': True, # 假设为安全 Cookie
89
+ 'sameSite': 'Lax' # 默认 SameSite 策略
90
+ }
91
+
92
+ playwright_cookies.append(pw_cookie)
93
+
94
+ if logger:
95
+ logger.debug(f"成功转换 Cookie: {name} -> domain={default_domain}")
96
+
97
+ return playwright_cookies
utils/cookie_manager.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 统一的Cookie管理器
3
+ 整合JSON文件和环境变量cookie的检测、加载和管理功能
4
+ """
5
+
6
+ import os
7
+ import json
8
+ from dataclasses import dataclass
9
+ from typing import List, Dict, Optional
10
+ from utils.paths import cookies_dir
11
+ from utils.cookie_handler import convert_cookie_editor_to_playwright, convert_kv_to_playwright
12
+ from utils.common import clean_env_value
13
+
14
+ @dataclass
15
+ class CookieSource:
16
+ """Cookie来源的统一表示"""
17
+ type: str # "file" | "env_var"
18
+ identifier: str # filename or "USER_COOKIE_1"
19
+ display_name: str # 显示名称
20
+ exists: bool = True
21
+
22
+ def __str__(self):
23
+ return f"{self.type}:{self.identifier}"
24
+
25
+
26
+ class CookieManager:
27
+ """
28
+ 统一的Cookie管理器
29
+ 负责检测、加载和缓存所有来源的cookie数据
30
+ """
31
+
32
+ def __init__(self, logger=None):
33
+ self.logger = logger
34
+ self._detected_sources: Optional[List[CookieSource]] = None
35
+ self._cookie_cache: Dict[str, List[Dict]] = {}
36
+
37
+ def detect_all_sources(self) -> List[CookieSource]:
38
+ """
39
+ 检测所有可用的cookie来源(JSON文件 + 环境变量)
40
+ 结果会被缓存,避免重复扫描
41
+ """
42
+ if self._detected_sources is not None:
43
+ return self._detected_sources
44
+
45
+ sources = []
46
+
47
+ # 1. 扫描cookies目录中的JSON文件
48
+ try:
49
+ cookie_path = cookies_dir()
50
+ if os.path.isdir(cookie_path):
51
+ cookie_files = [f for f in os.listdir(cookie_path) if f.lower().endswith('.json')]
52
+
53
+ for cookie_file in cookie_files:
54
+ source = CookieSource(
55
+ type="file",
56
+ identifier=cookie_file,
57
+ display_name=cookie_file
58
+ )
59
+ sources.append(source)
60
+
61
+ if cookie_files and self.logger:
62
+ self.logger.info(f"发现 {len(cookie_files)} 个 Cookie 文件")
63
+ elif self.logger:
64
+ self.logger.info(f"在 {cookie_path} 目录下未找到任何 .json 格式的 Cookie 文件")
65
+ else:
66
+ if self.logger:
67
+ self.logger.error(f"Cookie 目录不存在: {cookie_path}")
68
+
69
+ except Exception as e:
70
+ if self.logger:
71
+ self.logger.error(f"扫描 Cookie 目录时出错: {e}")
72
+
73
+ # 2. 扫描USER_COOKIE环境变量
74
+ cookie_index = 1
75
+ env_cookie_count = 0
76
+
77
+ while True:
78
+ env_var_name = f"USER_COOKIE_{cookie_index}"
79
+ env_value = clean_env_value(os.getenv(env_var_name))
80
+
81
+ if not env_value:
82
+ if cookie_index == 1 and self.logger:
83
+ self.logger.info(f"未检测到任何 USER_COOKIE 环境变量")
84
+ break
85
+
86
+ source = CookieSource(
87
+ type="env_var",
88
+ identifier=env_var_name,
89
+ display_name=env_var_name
90
+ )
91
+ sources.append(source)
92
+
93
+ env_cookie_count += 1
94
+ cookie_index += 1
95
+
96
+ if env_cookie_count > 0 and self.logger:
97
+ self.logger.info(f"发现 {env_cookie_count} 个 Cookie 环境变量")
98
+
99
+ # 缓存结果
100
+ self._detected_sources = sources
101
+ return sources
102
+
103
+ def load_cookies(self, source: CookieSource) -> List[Dict]:
104
+ """
105
+ 从指定来源加载cookie数据
106
+
107
+ Args:
108
+ source: Cookie来源对象
109
+
110
+ Returns:
111
+ Playwright兼容的cookie列表
112
+ """
113
+ cache_key = str(source)
114
+
115
+ # 检查缓存
116
+ if cache_key in self._cookie_cache:
117
+ if self.logger:
118
+ self.logger.debug(f"从缓存加载 Cookie: {source.display_name}")
119
+ return self._cookie_cache[cache_key]
120
+
121
+ cookies = []
122
+
123
+ try:
124
+ if source.type == "file":
125
+ cookies = self._load_from_file(source.identifier)
126
+ elif source.type == "env_var":
127
+ cookies = self._load_from_env(source.identifier)
128
+ else:
129
+ if self.logger:
130
+ self.logger.error(f"未知的 Cookie 来源类型: {source.type}")
131
+ return []
132
+
133
+ # 缓存结果
134
+ self._cookie_cache[cache_key] = cookies
135
+
136
+ if self.logger:
137
+ self.logger.info(f"从 {source.display_name} 加载了 {len(cookies)} 个 Cookie 数据")
138
+
139
+ except Exception as e:
140
+ if self.logger:
141
+ self.logger.error(f"从 {source.display_name} 加载 Cookie 时出错: {e}")
142
+ return []
143
+
144
+ return cookies
145
+
146
+ def _load_from_file(self, filename: str) -> List[Dict]:
147
+ """从JSON文件加载 Cookie"""
148
+ cookie_path = cookies_dir() / filename
149
+
150
+ if not os.path.exists(cookie_path):
151
+ raise FileNotFoundError(f"Cookie 文件不存在: {cookie_path}")
152
+
153
+ with open(cookie_path, 'r', encoding='utf-8') as f:
154
+ cookies_from_file = json.load(f)
155
+
156
+ return convert_cookie_editor_to_playwright(cookies_from_file, logger=self.logger)
157
+
158
+ def _load_from_env(self, env_var_name: str) -> List[Dict]:
159
+ """从环境变量加载 Cookie"""
160
+ env_value = clean_env_value(os.getenv(env_var_name))
161
+
162
+ if not env_value:
163
+ raise ValueError(f"环境变量 {env_var_name} 不存在或为空")
164
+
165
+ return convert_kv_to_playwright(
166
+ env_value,
167
+ default_domain=".google.com",
168
+ logger=self.logger
169
+ )
170
+
171
+ def get_all_sources(self) -> List[CookieSource]:
172
+ """获取所有检测到的 Cookie 来源"""
173
+ return self.detect_all_sources()
174
+
175
+ def clear_cache(self):
176
+ """清空 Cookie 缓存"""
177
+ self._cookie_cache.clear()
178
+ if self.logger:
179
+ self.logger.debug("Cookie 缓存已清空")
180
+
181
+ def get_source_summary(self) -> Dict[str, int]:
182
+ """
183
+ 获取 Cookie 来源统计信息
184
+
185
+ Returns:
186
+ 包含各类型来源数量的字典
187
+ """
188
+ sources = self.detect_all_sources()
189
+ summary = {
190
+ "total": len(sources),
191
+ "files": 0,
192
+ "env_vars": 0
193
+ }
194
+
195
+ for source in sources:
196
+ if source.type == "file":
197
+ summary["files"] += 1
198
+ elif source.type == "env_var":
199
+ summary["env_vars"] += 1
200
+
201
+ return summary
utils/logger.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ def setup_logging(log_file, prefix=None, level=logging.INFO):
4
+ """
5
+ 配置日志记录器,使其输出到文件和控制台。
6
+ 支持一个可选的前缀,用于标识日志来源。
7
+
8
+ 每次调用都会重新配置处理器,以适应多进程环境。
9
+
10
+ :param log_file: 日志文件的路径。
11
+ :param prefix: (可选) 要添加到每条日志消息开头的字符串前缀。
12
+ :param level: 日志级别。
13
+ """
14
+ logger = logging.getLogger('my_app_logger')
15
+ logger.setLevel(level)
16
+
17
+ if logger.hasHandlers():
18
+ logger.handlers.clear()
19
+
20
+ base_format = '%(asctime)s - %(process)d - %(levelname)s - %(message)s'
21
+
22
+ if prefix:
23
+ log_format = f'%(asctime)s - %(process)d - %(levelname)s - {prefix} - %(message)s'
24
+ else:
25
+ log_format = base_format
26
+
27
+ fh = logging.FileHandler(log_file)
28
+ fh.setLevel(level)
29
+
30
+ ch = logging.StreamHandler()
31
+ ch.setLevel(level)
32
+
33
+ formatter = logging.Formatter(log_format)
34
+ fh.setFormatter(formatter)
35
+ ch.setFormatter(formatter)
36
+
37
+ logger.addHandler(fh)
38
+ logger.addHandler(ch)
39
+
40
+ logger.propagate = False
41
+
42
+ return logger
utils/paths.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from functools import lru_cache
3
+ from pathlib import Path
4
+
5
+
6
+ @lru_cache(maxsize=1)
7
+ def project_root() -> Path:
8
+ """
9
+ Return the repository root so callers can build absolute paths that do not
10
+ depend on the current working directory.
11
+ """
12
+ env_root = os.getenv("CAMOUFOX_PROJECT_ROOT")
13
+ if env_root:
14
+ return Path(env_root).expanduser().resolve()
15
+
16
+ current = Path(__file__).resolve()
17
+ for parent in current.parents:
18
+ if (parent / "cookies").exists():
19
+ return parent
20
+
21
+ # Fallback to the original behaviour if the marker directory is missing.
22
+ return current.parents[min(2, len(current.parents) - 1)]
23
+
24
+
25
+ def logs_dir() -> Path:
26
+ """Root-level directory that stores log files and screenshots."""
27
+ return project_root() / "logs"
28
+
29
+
30
+ def cookies_dir() -> Path:
31
+ """Root-level directory that stores persistent cookie JSON files."""
32
+ return project_root() / "cookies"