Spaces:
Running
Running
feat(cookies): auto-detect JSON or KV cookie formats from sources
Browse files- utils/cookie_handler.py +51 -1
- utils/cookie_manager.py +40 -10
utils/cookie_handler.py
CHANGED
|
@@ -94,4 +94,54 @@ def convert_kv_to_playwright(kv_string, default_domain=".google.com", logger=Non
|
|
| 94 |
if logger:
|
| 95 |
logger.debug(f"成功转换 Cookie: {name} -> domain={default_domain}")
|
| 96 |
|
| 97 |
-
return playwright_cookies
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
if logger:
|
| 95 |
logger.debug(f"成功转换 Cookie: {name} -> domain={default_domain}")
|
| 96 |
|
| 97 |
+
return playwright_cookies
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def auto_convert_to_playwright(cookie_data, default_domain=".google.com", logger=None):
|
| 101 |
+
"""
|
| 102 |
+
自动识别 Cookie 数据格式并转换为 Playwright 兼容格式。
|
| 103 |
+
支持两种输入格式:
|
| 104 |
+
1. JSON 数组 (Cookie-Editor 导出格式)
|
| 105 |
+
2. KV 字符串 (键值对格式: "name1=value1; name2=value2; ...")
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
cookie_data: Cookie 数据,可以是 list (JSON格式) 或 str (KV格式)
|
| 109 |
+
default_domain (str): KV格式使用的默认域名,默认为".google.com"
|
| 110 |
+
logger: 日志记录器
|
| 111 |
+
|
| 112 |
+
Returns:
|
| 113 |
+
list: Playwright 兼容的 Cookie 列表
|
| 114 |
+
|
| 115 |
+
Raises:
|
| 116 |
+
ValueError: 当格式无法识别时抛出异常
|
| 117 |
+
"""
|
| 118 |
+
# 格式1: JSON 数组格式 (Cookie-Editor 导出格式)
|
| 119 |
+
if isinstance(cookie_data, list):
|
| 120 |
+
if logger:
|
| 121 |
+
logger.debug(f"检测到 JSON 数组格式的 Cookie 数据,共 {len(cookie_data)} 个条目")
|
| 122 |
+
return convert_cookie_editor_to_playwright(cookie_data, logger=logger)
|
| 123 |
+
|
| 124 |
+
# 格式2: KV 字符串格式
|
| 125 |
+
if isinstance(cookie_data, str):
|
| 126 |
+
# 去除首尾空白字符
|
| 127 |
+
cookie_str = cookie_data.strip()
|
| 128 |
+
|
| 129 |
+
if not cookie_str:
|
| 130 |
+
if logger:
|
| 131 |
+
logger.warning("收到空的 Cookie 字符串")
|
| 132 |
+
return []
|
| 133 |
+
|
| 134 |
+
if logger:
|
| 135 |
+
logger.debug(f"检测到 KV 字符串格式的 Cookie 数据")
|
| 136 |
+
|
| 137 |
+
return convert_kv_to_playwright(
|
| 138 |
+
cookie_str,
|
| 139 |
+
default_domain=default_domain,
|
| 140 |
+
logger=logger
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
# 无法识别的格式
|
| 144 |
+
error_msg = f"无法识别的 Cookie 数据格式: {type(cookie_data).__name__}"
|
| 145 |
+
if logger:
|
| 146 |
+
logger.error(error_msg)
|
| 147 |
+
raise ValueError(error_msg)
|
utils/cookie_manager.py
CHANGED
|
@@ -8,7 +8,7 @@ import json
|
|
| 8 |
from dataclasses import dataclass
|
| 9 |
from typing import List, Dict, Optional
|
| 10 |
from utils.paths import cookies_dir
|
| 11 |
-
from utils.cookie_handler import
|
| 12 |
from utils.common import clean_env_value
|
| 13 |
|
| 14 |
@dataclass
|
|
@@ -144,29 +144,59 @@ class CookieManager:
|
|
| 144 |
return cookies
|
| 145 |
|
| 146 |
def _load_from_file(self, filename: str) -> List[Dict]:
|
| 147 |
-
"""
|
| 148 |
cookie_path = cookies_dir() / filename
|
| 149 |
|
| 150 |
if not os.path.exists(cookie_path):
|
| 151 |
raise FileNotFoundError(f"Cookie 文件不存在: {cookie_path}")
|
| 152 |
|
| 153 |
with open(cookie_path, 'r', encoding='utf-8') as f:
|
| 154 |
-
|
| 155 |
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
def _load_from_env(self, env_var_name: str) -> List[Dict]:
|
| 159 |
-
"""从环境变量加载 Cookie"""
|
| 160 |
env_value = clean_env_value(os.getenv(env_var_name))
|
| 161 |
|
| 162 |
if not env_value:
|
| 163 |
raise ValueError(f"环境变量 {env_var_name} 不存在或为空")
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
def get_all_sources(self) -> List[CookieSource]:
|
| 172 |
"""获取所有检测到的 Cookie 来源"""
|
|
|
|
| 8 |
from dataclasses import dataclass
|
| 9 |
from typing import List, Dict, Optional
|
| 10 |
from utils.paths import cookies_dir
|
| 11 |
+
from utils.cookie_handler import auto_convert_to_playwright
|
| 12 |
from utils.common import clean_env_value
|
| 13 |
|
| 14 |
@dataclass
|
|
|
|
| 144 |
return cookies
|
| 145 |
|
| 146 |
def _load_from_file(self, filename: str) -> List[Dict]:
|
| 147 |
+
"""从文件加载 Cookie,自动识别 JSON 或 KV 格式"""
|
| 148 |
cookie_path = cookies_dir() / filename
|
| 149 |
|
| 150 |
if not os.path.exists(cookie_path):
|
| 151 |
raise FileNotFoundError(f"Cookie 文件不存在: {cookie_path}")
|
| 152 |
|
| 153 |
with open(cookie_path, 'r', encoding='utf-8') as f:
|
| 154 |
+
file_content = f.read().strip()
|
| 155 |
|
| 156 |
+
# 尝试解析为 JSON
|
| 157 |
+
try:
|
| 158 |
+
cookies_from_file = json.loads(file_content)
|
| 159 |
+
# JSON 解析成功,使用自动转换函数
|
| 160 |
+
return auto_convert_to_playwright(
|
| 161 |
+
cookies_from_file,
|
| 162 |
+
default_domain=".google.com",
|
| 163 |
+
logger=self.logger
|
| 164 |
+
)
|
| 165 |
+
except json.JSONDecodeError:
|
| 166 |
+
# JSON 解析失败,当作 KV 格式处理
|
| 167 |
+
if self.logger:
|
| 168 |
+
self.logger.info(f"文件 {filename} 不是有效的 JSON 格式,尝试作为 KV 格式解析")
|
| 169 |
+
return auto_convert_to_playwright(
|
| 170 |
+
file_content,
|
| 171 |
+
default_domain=".google.com",
|
| 172 |
+
logger=self.logger
|
| 173 |
+
)
|
| 174 |
|
| 175 |
def _load_from_env(self, env_var_name: str) -> List[Dict]:
|
| 176 |
+
"""从环境变量加载 Cookie,自动识别 JSON 或 KV 格式"""
|
| 177 |
env_value = clean_env_value(os.getenv(env_var_name))
|
| 178 |
|
| 179 |
if not env_value:
|
| 180 |
raise ValueError(f"环境变量 {env_var_name} 不存在或为空")
|
| 181 |
|
| 182 |
+
# 尝试解析为 JSON
|
| 183 |
+
try:
|
| 184 |
+
cookies_from_env = json.loads(env_value)
|
| 185 |
+
# JSON 解析成功,使用自动转换函数
|
| 186 |
+
return auto_convert_to_playwright(
|
| 187 |
+
cookies_from_env,
|
| 188 |
+
default_domain=".google.com",
|
| 189 |
+
logger=self.logger
|
| 190 |
+
)
|
| 191 |
+
except json.JSONDecodeError:
|
| 192 |
+
# JSON 解析失败,当作 KV 格式处理
|
| 193 |
+
if self.logger:
|
| 194 |
+
self.logger.debug(f"环境变量 {env_var_name} 不是有效的 JSON 格式,作为 KV 格式解析")
|
| 195 |
+
return auto_convert_to_playwright(
|
| 196 |
+
env_value,
|
| 197 |
+
default_domain=".google.com",
|
| 198 |
+
logger=self.logger
|
| 199 |
+
)
|
| 200 |
|
| 201 |
def get_all_sources(self) -> List[CookieSource]:
|
| 202 |
"""获取所有检测到的 Cookie 来源"""
|