Spaces:

Eliot0110
/

Travel_Assistant

Sleeping

App Files Files Community

Eliot0110 commited on Aug 6

Commit

b713501

1 Parent(s): 3327cc4

improve: reverse to old version

Browse files

Files changed (2) hide show

modules/info_extractor.py +175 -115
modules/response_generator.py +5 -51

modules/info_extractor.py CHANGED Viewed

@@ -4,7 +4,6 @@ from utils.logger import log
 import jieba
 from typing import List, Tuple
 import copy
 class InfoExtractor:
     def __init__(self):
@@ -341,6 +340,7 @@ class InfoExtractor:
                 existing_info[key] = value
         return existing_info
     def _tokenize_message(self, text: str) -> list:
         """智能分词，支持中英文混合"""
@@ -754,129 +754,189 @@ class InfoExtractor:
         return result
     def _extract_budget_from_tokens(self, tokens: list) -> dict:
         result = {}
-        text = "".join(tokens).lower().strip()
-        # --- 1. 统一提取金额和货币 ---
-        # 按优先级排列正则表达式，越精确的模式越靠前
-        # 模式捕获: (金额数字, 乘数单位[千/万/k/w], 货币单位[元/欧/usd...])
-        patterns = [
-            {'regex': r'¥\s*(\d+\.?\d*)', 'groups': {'amount': 1}, 'currency': 'RMB'},
-            {'regex': r'€\s*(\d+\.?\d*)', 'groups': {'amount': 1}, 'currency': 'EUR'},
-            {'regex': r'\$\s*(\d+\.?\d*)', 'groups': {'amount': 1}, 'currency': 'USD'},
-            {'regex': r'£\s*(\d+\.?\d*)', 'groups': {'amount': 1}, 'currency': 'GBP'},
-            {'regex': r'(usd|rmb|eur|gbp|chf|jpy)\s*(\d+\.?\d*)\s*(百|hundred|千|k|thousand|万|w)?',
-            'groups': {'currency': 1, 'amount': 2, 'multiplier': 3}},
-            {'regex': r'(\d+\.?\d*)\s*(十|百|hundred|千|k|thousand|万|w)?\s*(人民币|元|块|块钱|rmb)',
-            'groups': {'amount': 1, 'multiplier': 2}, 'currency': 'RMB'},
-            {'regex': r'(\d+\.?\d*)\s*(十|百|hundred|千|k|thousand|万|w)?\s*(欧元|欧|euros?|eur)',
-            'groups': {'amount': 1, 'multiplier': 2}, 'currency': 'EUR'},
-            {'regex': r'(\d+\.?\d*)\s*(十|百|hundred|千|k|thousand|万|w)?\s*(美元|dollars?|dollar|usd)',
-            'groups': {'amount': 1, 'multiplier': 2}, 'currency': 'USD'},
-            {'regex': r'(\d+\.?\d*)\s*(十|百|hundred|千|k|thousand|万|w)?\s*(英镑|pounds?|pound|gbp)',
-            'groups': {'amount': 1, 'multiplier': 2}, 'currency': 'GBP'},
-            {'regex': r'(\d+\.?\d*)\s*(十|百|hundred|千|k|thousand|万|w)?\s*(日元|yen|jpy)',
-            'groups': {'amount': 1, 'multiplier': 2}, 'currency': 'JPY'},
-            {'regex': r'(\d+\.?\d*)\s*(十|百|hundred|千|k|thousand|万|w)?\s*(瑞郎|瑞士法郎|chf)',
-            'groups': {'amount': 1, 'multiplier': 2}, 'currency': 'CHF'},
-            {'regex': r'(\d+\.?\d+)\s*(十|百|hundred|千|k|thousand|万|w)',
-            'groups': {'amount': 1, 'multiplier': 2}, 'context_needed': True},
-            {'regex': r'(\d+\.?\d+)',
-            'groups': {'amount': 1}, 'context_needed': True},
-        ]
-        amount_found = False
-        for p in patterns:
-            match = re.search(p['regex'], text)
-            if match:
-                # 检查是否是纯数字模式，是的话需要上下文
-                if p.get('context_needed', False):
-                    budget_indicators = ['预算', '花费', '费用', '成本', '开销', '支出', 'budget', 'cost']
-                    if not any(indicator in text for indicator in budget_indicators):
-                        continue # 如果没有上下文，则跳过纯数字匹配
-                groups = match.groups()
-                # 提取金额
-                amount_group_index = p['groups']['amount'] - 1
-                amount = float(groups[amount_group_index])
-                # 确定乘数
-                multiplier = 1
-                multiplier_token = ''
-                if 'multiplier' in p['groups']:
-                    multiplier_group_index = groups[p['groups']['multiplier']-1]
-                    if multiplier_group_index < len(groups) and groups[multiplier_group_index]:
-                        multiplier_token = groups[multiplier_group_index]
-                if '十' in multiplier_token:
-                    multiplier = 10
-                elif '百' in multiplier_token or 'hundred' in multiplier_token:
-                    multiplier = 100
-                elif '千' in multiplier_token or 'k' in multiplier_token or 'thousand' in multiplier_token:
-                    multiplier = 1000
-                elif '万' in multiplier_token or 'w' in multiplier_token:
-                    multiplier = 10000
-                final_amount = amount * multiplier
-                result['amount'] = int(final_amount)
-                # 确定货币
-                currency_token = ''
-                if p.get('currency'):
-                    result['currency'] = p['currency']
-                elif 'currency_str' in p['groups']:
-                    currency_group_index = p['groups']['currency_str'] - 1
-                    currency_token = groups[currency_group_index]
-                    currency_map = {'rmb': 'RMB', 'usd': 'USD', 'eur': 'EUR', 'gbp': 'GBP', 'chf': 'CHF', 'jpy': 'JPY'}
-                if currency_token in currency_map:
-                    result['currency'] = currency_map[currency_token]
-                amount_found = True
-                break
-        # 2. 查找预算类型（此部分逻辑与金额完全无关）
         budget_type_keywords = {
-            'economy': ['经济', '便宜', '省钱', '实惠', '节省', '穷游', '学生', '背包客', '预算有限', '性价比', 'budget', 'cheap'],
-            'comfortable': ['舒适', '中等', '适中', '标准', '普通', '中档', '合理', 'comfortable', 'standard'],
-            'luxury': ['豪华', '奢华', '高端', '顶级', '精品', '不差钱', '任性', '土豪', '五星', 'luxury', 'premium']
         }
-        # 查找最能代表预算类型的关键词
-        found_type_keyword = ""
-        found_type = ""
         for token in tokens:
             token_lower = token.lower()
             for budget_type, keywords in budget_type_keywords.items():
-                for keyword in keywords:
-                    if keyword in token_lower:
-                        # 优先选择更长的、更具体的关键词作为描述
-                        if len(keyword) > len(found_type_keyword):
-                            found_type_keyword = keyword
-                            found_type = budget_type
-        if found_type:
-            result["type"] = found_type
-            result["description"] = found_type_keyword # 使用找到的最匹配的关键词作为描述
         if not result.get("amount"):
-            chinese_money_mapping = {
-                '一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000,
-                '一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
-                '六千': 6000, '七千': 7000, '八千': 8000, '九千': 9000,
-                '一百': 100, '二百': 200, '三百': 300, '四百': 400, '五百': 500,
-                '六百': 600, '七百': 700, '八百': 800, '九百': 900,
-                '十': 10, '二十': 20, '三十': 30, '四十': 40, '五十': 50,
-                '六十': 60, '七十': 70, '八十': 80, '九十': 90
-            }
-            sorted_keys = sorted(chinese_money_mapping.keys(), key=len, reverse=True)
-            for name in sorted_keys:
-                if name in text:
-                    result['amount'] = chinese_money_mapping[name]
                     break
         return result
     # 保持向后兼容的验证方法

 import jieba
 from typing import List, Tuple
 import copy
 class InfoExtractor:
     def __init__(self):
                 existing_info[key] = value
         return existing_info
     def _tokenize_message(self, text: str) -> list:
         """智能分词，支持中英文混合"""
         return result
     def _extract_budget_from_tokens(self, tokens: list) -> dict:
+        """从tokens中提取预算信息"""
         result = {}
+        # 1. 查找金额
+        for i, token in enumerate(tokens):
+            amount = None
+            currency = "RMB"  # 默认货币
+            # 处理包含货币的token "2000欧", "5000元"
+            currency_patterns = [
+                (r'(\d+(?:\.\d+)?)欧(?:元)?', 'EUR'),
+                (r'(\d+(?:\.\d+)?)元', 'RMB'),
+                (r'(\d+(?:\.\d+)?)块(?:钱)?', 'RMB'),
+                (r'(\d+(?:\.\d+)?)人民币', 'RMB'),
+                (r'(\d+(?:\.\d+)?)美元', 'USD'),
+                (r'(\d+(?:\.\d+)?)英镑', 'GBP'),
+                (r'(\d+(?:\.\d+)?)瑞(?:士)?法郎', 'CHF'),
+                (r'(\d+(?:\.\d+)?)日元', 'JPY'),
+                (r'(\d+(?:\.\d+)?)韩元', 'KRW'),
+                (r'¥(\d+(?:\.\d+)?)', 'RMB'),
+                (r'€(\d+(?:\.\d+)?)', 'EUR'),
+                (r'\$(\d+(?:\.\d+)?)', 'USD'),
+                (r'£(\d+(?:\.\d+)?)', 'GBP'),
+                (r'(\d+(?:\.\d+)?)rmb', 'RMB'),
+                (r'(\d+(?:\.\d+)?)usd', 'USD'),
+                (r'(\d+(?:\.\d+)?)eur', 'EUR'),
+                (r'(\d+(?:\.\d+)?)gbp', 'GBP'),
+                (r'(\d+(?:\.\d+)?)chf', 'CHF'),
+            ]
+            for pattern, curr in currency_patterns:
+                match = re.search(pattern, token.lower())
+                if match:
+                    amount = float(match.group(1))
+                    currency = curr
+                    break
+            # 处理纯数字token（需要查看上下文）
+            if not amount and re.match(r'^\d+(?:\.\d+)?$', token):
+                number = float(token)
+                # 检查前面的token是否有预算相关词汇
+                budget_indicators = ['预算', '花费', '费用', '成本', '开销', '支出', '总共', '一共', 'budget', 'cost', 'spend']
+                has_budget_context = False
+                if i > 0 and tokens[i-1] in budget_indicators:
+                    has_budget_context = True
+                elif i > 1 and tokens[i-2] in budget_indicators:
+                    has_budget_context = True
+                # 检查后面是否有货币单位
+                if i < len(tokens) - 1:
+                    next_token = tokens[i + 1].lower()
+                    currency_units = {
+                        '元': 'RMB', '块': 'RMB', '钱': 'RMB', '人民币': 'RMB',
+                        '欧': 'EUR', '欧元': 'EUR', '美元': 'USD', '英镑': 'GBP',
+                        '瑞郎': 'CHF', '日元': 'JPY', '韩元': 'KRW',
+                        'rmb': 'RMB', 'usd': 'USD', 'eur': 'EUR', 'gbp': 'GBP', 'chf': 'CHF'
+                    }
+                    if next_token in currency_units:
+                        amount = number
+                        currency = currency_units[next_token]
+                        has_budget_context = True
+                # 如果有预算上下文但没有明确货币单位，根据数字大小推断
+                if has_budget_context and not amount:
+                    if number < 100:  # 可能是欧元或美元
+                        # 查看是否有欧洲城市上下文
+                        has_european_context = any(self._normalize_city_name(t) for t in tokens)
+                        if has_european_context:
+                            currency = 'EUR'
+                        else:
+                            currency = 'USD'
+                    else:
+                        currency = 'RMB'  # 大数字更可能是人民币
+                    amount = number
+            # 处理万、千等单位
+            if amount:
+                # 检查是否有万、千修饰符
+                if i > 0:
+                    prev_token = tokens[i-1]
+                    if '万' in prev_token or 'w' in prev_token.lower():
+                        amount *= 10000
+                    elif '千' in prev_token or 'k' in prev_token.lower():
+                        amount *= 1000
+                elif i < len(tokens) - 1:
+                    next_token = tokens[i+1]
+                    if '万' in next_token or 'w' in next_token.lower():
+                        amount *= 10000
+                    elif '千' in next_token or 'k' in next_token.lower():
+                        amount *= 1000
+                if amount > 0:
+                    result["amount"] = int(amount)
+                    result["currency"] = currency
+                    break
+        # 2. 查找预算类型
         budget_type_keywords = {
+            'economy': [
+                '经济', '便宜', '省钱', '实惠', '节省', '穷游', '学生', '青年',
+                '预算有限', '钱不多', '不贵', '划算', '性价比', '背包客',
+                '简单', '基础', '低成本', '节约', 'budget', 'cheap', 'economy', 'affordable'
+            ],
+            'comfortable': [
+                '舒适', '中等', '适中', '一般', '标准', '普通', '正常', '常规',
+                '中档', '中级', '合理', '平均', '中间档次', 'comfortable', 'standard', 'moderate'
+            ],
+            'luxury': [
+                '豪华', '奢华', '高端', '顶级', '精品', '奢侈', '贵族', '皇家',
+                '贵一点', '不差钱', '任性', '土豪', '有钱', '五星', 'VIP',
+                'luxury', 'premium', 'high-end', 'expensive', 'fancy'
+            ]
         }
         for token in tokens:
             token_lower = token.lower()
             for budget_type, keywords in budget_type_keywords.items():
+                if any(keyword in token_lower for keyword in keywords):
+                    result["type"] = budget_type
+                    # 找到第一个匹配的关键词作为描述
+                    for keyword in keywords:
+                        if keyword in token_lower:
+                            result["description"] = keyword if len(keyword) > 2 else token
+                            break
+                    break
+            if result.get("type"):
+                break
+        # 3. 如果有金额但没有类型，根据金额推断类型
+        if result.get("amount") and not result.get("type"):
+            amount = result["amount"]
+            currency = result.get("currency", "RMB")
+            # 根据欧洲旅行成本设置阈值
+            if currency == "EUR":
+                if amount < 1500:  # 总预算
+                    result["type"] = "economy"
+                    result["description"] = "经济预算"
+                elif amount < 4000:
+                    result["type"] = "comfortable"
+                    result["description"] = "舒适预算"
+                else:
+                    result["type"] = "luxury"
+                    result["description"] = "豪华预算"
+            elif currency == "USD":
+                if amount < 2000:
+                    result["type"] = "economy"
+                    result["description"] = "经济预算"
+                elif amount < 5000:
+                    result["type"] = "comfortable"
+                    result["description"] = "舒适预算"
+                else:
+                    result["type"] = "luxury"
+                    result["description"] = "豪华预算"
+            elif currency == "RMB":
+                if amount < 8000:
+                    result["type"] = "economy"
+                    result["description"] = "经济预算"
+                elif amount < 20000:
+                    result["type"] = "comfortable"
+                    result["description"] = "舒适预算"
+                else:
+                    result["type"] = "luxury"
+                    result["description"] = "豪华预算"
+        # 4. 处理中文数字金额
+        chinese_money_mapping = {
+            '一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
+            '六千': 6000, '七千': 7000, '八千': 8000, '九千': 9000,
+            '一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000
+        }
         if not result.get("amount"):
+            for token in tokens:
+                if token in chinese_money_mapping:
+                    result["amount"] = chinese_money_mapping[token]
+                    result["currency"] = "RMB"
                     break
         return result
     # 保持向后兼容的验证方法

modules/response_generator.py CHANGED Viewed

@@ -110,62 +110,16 @@ class ResponseGenerator:
             return "抱歉，我在处理您的请求时遇到了问题，请稍后再试。"
     def _generate_vivid_acknowledgement(self, extracted_info: dict, session_state: SessionManager) -> str:
-        """
-        【更新版本】根据最新提取的信息，生成一句生动的、非模板化的确认语。
-        此版本已重构，以处理更复杂的预算逻辑，并实现主动询问。
-        """
-        # --- 目的地确认 ---
         if "destination" in extracted_info and extracted_info["destination"]:
-            dest_info = extracted_info["destination"]
-            dest_name = dest_info.get('name')
-            if not dest_name: return "" # 如果没有提取到有效名称，则不作回复
-            # 优先使用预设的城市描述，让回复更生动
             if dest_name in self.city_descriptions:
                 feature = random.choice(self.city_descriptions[dest_name])
                 return f"{dest_name}！一个绝佳的选择，那可是著名的'{feature}'。目的地已为您记录。"
             else:
-                # 修复了原代码中从列表获取国家信息的bug
-                dest_country = dest_info.get('country')
-                if dest_country:
-                    return f"好的，目的地已确认为 {dest_country} 的 {dest_name}！一个充满魅力的地方。"
-                else:
-                    return f"好的，目的地 {dest_name} 已记录！听起来是个很棒的地方。"
-        # --- 旅行时长确认 ---
-        if "duration" in extracted_info and extracted_info["duration"]:
-            # 使用 .get() 增加代码健壮性
-            duration_description = extracted_info["duration"].get('description', '一段美好的时光')
-            return f"了解，{duration_description}的行程，时间很充裕，可以深度体验了！"
-        # --- 预算确认（核心修改部分） ---
-        if "budget" in extracted_info and extracted_info["budget"]:
-            budget_dict = extracted_info["budget"]
-            amount = budget_dict.get("amount")
-            currency = budget_dict.get("currency")
-            budget_type_desc = budget_dict.get("description") # 例如：“穷游”
-            # 场景一：用户提供了金额，但没说货币单位 -> 主动追问货币
-            if amount and not currency:
-                return f"收到，您的预算是 {amount}。请问这是以什么货币计算的呢？（例如：人民币、欧元、美元）"
-            # 场景二：用户提供了完整的金额和货币 -> 优雅地确认
-            if amount and currency:
-                if budget_type_desc:
-                    # 例如，用户说：“我预算5000元，想穷游”
-                    return f"好的，您「{budget_type_desc}」的预算（{amount} {currency}）已为您记录，我会为您规划性价比最高的方案。"
-                else:
-                    # 例如，用户说：“我预算5000元”
-                    return f"好的，预算 {amount} {currency} 已为您记录，我会为您规划性价比最高的方案。"
-            # 场景三：用户只提了预算类型，没说金额 -> 只确认风格
-            if budget_type_desc and not amount:
-                # 例如，用户说：“我这次想穷游”
-                return f"了解，您偏爱「{budget_type_desc}」的旅行方式，我会按这个风格为您规划。"
-        # 如果没有提取到任何新信息，返回空字符串
-        return ""
     def _get_dynamic_next_question(self, session_state: SessionManager) -> str:
@@ -175,7 +129,7 @@ class ResponseGenerator:
         if not session_state.get('duration'):
             return "计划玩几天呢？"
         if not session_state.get('budget'):
-            return "您的旅行预算大概是多少？"
         return "" # 所有信息都已收集

             return "抱歉，我在处理您的请求时遇到了问题，请稍后再试。"
     def _generate_vivid_acknowledgement(self, extracted_info: dict, session_state: SessionManager) -> str:
         if "destination" in extracted_info and extracted_info["destination"]:
+            dest_name = extracted_info["destination"]['name']
             if dest_name in self.city_descriptions:
                 feature = random.choice(self.city_descriptions[dest_name])
                 return f"{dest_name}！一个绝佳的选择，那可是著名的'{feature}'。目的地已为您记录。"
             else:
+                dest_country = extracted_info["destination"][0]['country']
+                return f"好的，目的地已确认为 {dest_country} 的 {dest_name}！一个充满魅力的地方。"
     def _get_dynamic_next_question(self, session_state: SessionManager) -> str:
         if not session_state.get('duration'):
             return "计划玩几天呢？"
         if not session_state.get('budget'):
+            return "您的旅行预算大概是多少？您可以金额+币种的格式输入,例如：5000元人民币 或 800 eur"
         return "" # 所有信息都已收集