Spaces:
Running
Running
fix(browser): make URL validation robust to redirects
Browse files- browser/instance.py +12 -4
- utils/url_helper.py +46 -0
browser/instance.py
CHANGED
|
@@ -6,6 +6,7 @@ from browser.navigation import handle_successful_navigation
|
|
| 6 |
from camoufox.sync_api import Camoufox
|
| 7 |
from utils.paths import logs_dir
|
| 8 |
from utils.common import parse_headless_mode, ensure_dir
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
def run_browser_instance(config):
|
|
@@ -144,9 +145,13 @@ def run_browser_instance(config):
|
|
| 144 |
logger.error("检测到Google登录页面(需要输入邮箱)。Cookie已完全失效。")
|
| 145 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_identifier_page_{diagnostic_tag}.png"))
|
| 146 |
return
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
# --- NEW ROBUST STRATEGY: Wait for the loading spinner to disappear ---
|
| 152 |
# This is the key to solving the race condition. The error message or
|
|
@@ -197,7 +202,10 @@ def run_browser_instance(config):
|
|
| 197 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
|
| 198 |
return
|
| 199 |
else:
|
| 200 |
-
logger.error(f"
|
|
|
|
|
|
|
|
|
|
| 201 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_unexpected_url_{diagnostic_tag}.png"))
|
| 202 |
return
|
| 203 |
|
|
|
|
| 6 |
from camoufox.sync_api import Camoufox
|
| 7 |
from utils.paths import logs_dir
|
| 8 |
from utils.common import parse_headless_mode, ensure_dir
|
| 9 |
+
from utils.url_helper import extract_url_path
|
| 10 |
|
| 11 |
|
| 12 |
def run_browser_instance(config):
|
|
|
|
| 145 |
logger.error("检测到Google登录页面(需要输入邮箱)。Cookie已完全失效。")
|
| 146 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_identifier_page_{diagnostic_tag}.png"))
|
| 147 |
return
|
| 148 |
+
|
| 149 |
+
# 提取路径部分进行匹配(允许域名重定向)
|
| 150 |
+
expected_path = extract_url_path(expected_url).split('?')[0]
|
| 151 |
+
final_path = extract_url_path(final_url)
|
| 152 |
+
|
| 153 |
+
if expected_path and expected_path in final_path:
|
| 154 |
+
logger.info(f"URL验证通过。预期路径: {expected_path}, 最终URL: {final_url}")
|
| 155 |
|
| 156 |
# --- NEW ROBUST STRATEGY: Wait for the loading spinner to disappear ---
|
| 157 |
# This is the key to solving the race condition. The error message or
|
|
|
|
| 202 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
|
| 203 |
return
|
| 204 |
else:
|
| 205 |
+
logger.error(f"导航到了意外的URL。")
|
| 206 |
+
logger.error(f" 预期路径: {expected_path}")
|
| 207 |
+
logger.error(f" 最终URL: {final_url}")
|
| 208 |
+
logger.error(f" 最终路径: {final_path}")
|
| 209 |
page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_unexpected_url_{diagnostic_tag}.png"))
|
| 210 |
return
|
| 211 |
|
utils/url_helper.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
URL处理辅助函数
|
| 3 |
+
|
| 4 |
+
提供URL解析和路径提取功能,用于导航验证中的域名无关匹配。
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from urllib.parse import urlparse
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def extract_url_path(url: str) -> str:
|
| 11 |
+
"""
|
| 12 |
+
提取URL的路径和查询参数部分,忽略协议和域名差异
|
| 13 |
+
|
| 14 |
+
用于验证导航是否到达正确页面,允许域名重定向。
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
url: 完整URL字符串
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
路径+查询参数+片段(例如:"/apps/drive/123?param=value#section")
|
| 21 |
+
如果URL为空或无效,返回空字符串
|
| 22 |
+
|
| 23 |
+
Examples:
|
| 24 |
+
>>> extract_url_path("https://ai.studio/apps/drive/123?param=value")
|
| 25 |
+
'/apps/drive/123?param=value'
|
| 26 |
+
|
| 27 |
+
>>> extract_url_path("https://aistudio.google.com/apps/drive/123")
|
| 28 |
+
'/apps/drive/123'
|
| 29 |
+
|
| 30 |
+
>>> extract_url_path("https://example.com/path")
|
| 31 |
+
'/path'
|
| 32 |
+
"""
|
| 33 |
+
if not url:
|
| 34 |
+
return ""
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
parsed = urlparse(url)
|
| 38 |
+
result = parsed.path
|
| 39 |
+
if parsed.query:
|
| 40 |
+
result += '?' + parsed.query
|
| 41 |
+
if parsed.fragment:
|
| 42 |
+
result += '#' + parsed.fragment
|
| 43 |
+
return result
|
| 44 |
+
except Exception:
|
| 45 |
+
# 如果URL格式无效,返回空字符串
|
| 46 |
+
return ""
|