Togmal-demo / togmal /context_analyzer.py
HeTalksInMaths
Initial commit: ToGMAL Prompt Difficulty Analyzer with real MMLU data
f9b1ad5
raw
history blame
3.85 kB
"""
Context analyzer for domain detection
Determines which limitation checks are relevant
"""
import re
from typing import List, Dict, Any, Optional
# from collections import Counter
# Domain keywords mapping
DOMAIN_KEYWORDS = {
"mathematics": ["math", "calculus", "algebra", "geometry", "proof", "theorem", "equation"],
"physics": ["physics", "force", "energy", "quantum", "relativity", "mechanics"],
"medicine": ["medical", "diagnosis", "treatment", "symptom", "disease", "patient", "doctor"],
"healthcare": ["health", "medication", "drug", "therapy", "clinical"],
"law": ["legal", "law", "court", "regulation", "compliance", "attorney", "contract"],
"finance": ["financial", "investment", "stock", "portfolio", "trading", "tax"],
"coding": ["code", "programming", "function", "class", "debug", "git", "api"],
"file_system": ["file", "directory", "path", "write", "delete", "permission"],
}
async def analyze_conversation_context(
conversation_history: Optional[List[Dict[str, str]]] = None,
user_context: Optional[Dict[str, Any]] = None,
threshold: float = 0.3
) -> List[str]:
"""
Analyze conversation to detect relevant domains
Args:
conversation_history: Recent messages [{"role": "user", "content": "..."}]
user_context: User metadata {"industry": "healthcare", "role": "developer"}
threshold: Minimum confidence to include domain (0-1)
Returns:
List of detected domains, e.g., ["mathematics", "coding"]
"""
detected_domains = set()
# Strategy 1: Keyword matching in conversation
if conversation_history:
domain_scores = _score_domains_by_keywords(conversation_history)
# Add domains above threshold
for domain, score in domain_scores.items():
if score >= threshold:
detected_domains.add(domain)
# Strategy 2: User context hints
if user_context:
if "industry" in user_context:
industry = str(user_context["industry"]).lower()
# Map industry to domains
if "health" in industry or "medical" in industry:
detected_domains.update(["medicine", "healthcare"])
elif "tech" in industry or "software" in industry:
detected_domains.add("coding")
elif "finance" in industry or "bank" in industry:
detected_domains.add("finance")
# Strategy 3: Always include if explicitly mentioned in last message
if conversation_history and len(conversation_history) > 0:
last_message = conversation_history[-1].get("content", "").lower()
for domain, keywords in DOMAIN_KEYWORDS.items():
if any(kw in last_message for kw in keywords):
detected_domains.add(domain)
return list(detected_domains)
def _score_domains_by_keywords(
conversation_history: List[Dict[str, str]],
recent_weight: float = 2.0
) -> Dict[str, float]:
"""
Score domains based on keyword frequency (recent messages weighted higher)
Returns:
Dict of {domain: score} normalized 0-1
"""
domain_counts: Dict[str, float] = {}
total_messages = len(conversation_history)
for i, message in enumerate(conversation_history):
content = message.get("content", "").lower()
# Weight recent messages higher
recency_weight = 1.0 + (i / total_messages) * (recent_weight - 1.0)
for domain, keywords in DOMAIN_KEYWORDS.items():
matches = sum(1 for kw in keywords if kw in content)
domain_counts[domain] = domain_counts.get(domain, 0.0) + matches * recency_weight
# Normalize scores
max_count = max(domain_counts.values()) if domain_counts else 1.0
return {
domain: count / max_count
for domain, count in domain_counts.items()
}