hkfires commited on
Commit
4047340
·
verified ·
1 Parent(s): 3085164

fix(browser): make URL validation robust to redirects

Browse files
Files changed (2) hide show
  1. browser/instance.py +12 -4
  2. utils/url_helper.py +46 -0
browser/instance.py CHANGED
@@ -6,6 +6,7 @@ from browser.navigation import handle_successful_navigation
6
  from camoufox.sync_api import Camoufox
7
  from utils.paths import logs_dir
8
  from utils.common import parse_headless_mode, ensure_dir
 
9
 
10
 
11
  def run_browser_instance(config):
@@ -144,9 +145,13 @@ def run_browser_instance(config):
144
  logger.error("检测到Google登录页面(需要输入邮箱)。Cookie已完全失效。")
145
  page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_identifier_page_{diagnostic_tag}.png"))
146
  return
147
- elif expected_url.split('?')[0] in final_url:
148
-
149
- logger.info("URL正确。现在等待页面完成初始加载...")
 
 
 
 
150
 
151
  # --- NEW ROBUST STRATEGY: Wait for the loading spinner to disappear ---
152
  # This is the key to solving the race condition. The error message or
@@ -197,7 +202,10 @@ def run_browser_instance(config):
197
  page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
198
  return
199
  else:
200
- logger.error(f"导航到了一个意外的URL: {final_url}")
 
 
 
201
  page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_unexpected_url_{diagnostic_tag}.png"))
202
  return
203
 
 
6
  from camoufox.sync_api import Camoufox
7
  from utils.paths import logs_dir
8
  from utils.common import parse_headless_mode, ensure_dir
9
+ from utils.url_helper import extract_url_path
10
 
11
 
12
  def run_browser_instance(config):
 
145
  logger.error("检测到Google登录页面(需要输入邮箱)。Cookie已完全失效。")
146
  page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_identifier_page_{diagnostic_tag}.png"))
147
  return
148
+
149
+ # 提取路径部分进行匹配(允许域名重定向)
150
+ expected_path = extract_url_path(expected_url).split('?')[0]
151
+ final_path = extract_url_path(final_url)
152
+
153
+ if expected_path and expected_path in final_path:
154
+ logger.info(f"URL验证通过。预期路径: {expected_path}, 最终URL: {final_url}")
155
 
156
  # --- NEW ROBUST STRATEGY: Wait for the loading spinner to disappear ---
157
  # This is the key to solving the race condition. The error message or
 
202
  page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_chooser_click_failed_{diagnostic_tag}.png"))
203
  return
204
  else:
205
+ logger.error(f"导航到了意外的URL")
206
+ logger.error(f" 预期路径: {expected_path}")
207
+ logger.error(f" 最终URL: {final_url}")
208
+ logger.error(f" 最终路径: {final_path}")
209
  page.screenshot(path=os.path.join(screenshot_dir, f"FAIL_unexpected_url_{diagnostic_tag}.png"))
210
  return
211
 
utils/url_helper.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ URL处理辅助函数
3
+
4
+ 提供URL解析和路径提取功能,用于导航验证中的域名无关匹配。
5
+ """
6
+
7
+ from urllib.parse import urlparse
8
+
9
+
10
+ def extract_url_path(url: str) -> str:
11
+ """
12
+ 提取URL的路径和查询参数部分,忽略协议和域名差异
13
+
14
+ 用于验证导航是否到达正确页面,允许域名重定向。
15
+
16
+ Args:
17
+ url: 完整URL字符串
18
+
19
+ Returns:
20
+ 路径+查询参数+片段(例如:"/apps/drive/123?param=value#section")
21
+ 如果URL为空或无效,返回空字符串
22
+
23
+ Examples:
24
+ >>> extract_url_path("https://ai.studio/apps/drive/123?param=value")
25
+ '/apps/drive/123?param=value'
26
+
27
+ >>> extract_url_path("https://aistudio.google.com/apps/drive/123")
28
+ '/apps/drive/123'
29
+
30
+ >>> extract_url_path("https://example.com/path")
31
+ '/path'
32
+ """
33
+ if not url:
34
+ return ""
35
+
36
+ try:
37
+ parsed = urlparse(url)
38
+ result = parsed.path
39
+ if parsed.query:
40
+ result += '?' + parsed.query
41
+ if parsed.fragment:
42
+ result += '#' + parsed.fragment
43
+ return result
44
+ except Exception:
45
+ # 如果URL格式无效,返回空字符串
46
+ return ""