Spaces:

JustTheStatsHuman
/

Togmal-demo

Running

HeTalksInMaths

Initial commit: ToGMAL Prompt Difficulty Analyzer with real MMLU data

f9b1ad5 about 1 month ago

2.89 kB

	"""
	Dynamically generate tools from ML clustering results
	"""

	from typing import List, Optional
	import json
	from pathlib import Path

	ML_TOOLS_CACHE_PATH = Path("./data/ml_discovered_tools.json")

	async def get_ml_discovered_tools(
	relevant_domains: Optional[List[str]] = None,
	min_confidence: float = 0.8
	) -> List[dict]:
	"""
	Load ML-discovered limitation checks from cache.

	Args:
	relevant_domains: Only return tools for these domains (None = all)
	min_confidence: Minimum confidence threshold

	Returns:
	List of dict definitions for dynamically discovered checks
	"""
	if not ML_TOOLS_CACHE_PATH.exists():
	return []

	with open(ML_TOOLS_CACHE_PATH) as f:
	ml_patterns = json.load(f)

	tools = []

	for pattern in ml_patterns.get("patterns", []):
	domain = pattern.get("domain")

	# Filter by relevant domains
	if relevant_domains and domain not in relevant_domains:
	continue

	# Only include high-confidence patterns
	if float(pattern.get("confidence", 0)) < float(min_confidence):
	continue

	tools.append({
	"name": f"check_{pattern['id']}",
	"domain": domain,
	"description": pattern["description"],
	"inputSchema": {
	"type": "object",
	"properties": {
	"prompt": {"type": "string"},
	"response": {"type": "string"}
	},
	"required": ["prompt", "response"]
	},
	"heuristic": pattern.get("heuristic", ""),
	"examples": pattern.get("examples", [])
	})

	return tools


	async def update_ml_tools_cache(research_pipeline_output: dict) -> None:
	"""
	Called by research pipeline to update available ML tools

	Args:
	research_pipeline_output: Latest clustering/anomaly detection results
	"""
	# Extract high-confidence patterns
	patterns = []

	for cluster in research_pipeline_output.get("clusters", []):
	if cluster.get("is_dangerous", False) and float(cluster.get("purity", 0)) > 0.7:
	pattern = {
	"id": cluster["id"],
	"domain": cluster.get("domain", "general"),
	"description": f"Check for {cluster.get('pattern_description', 'unknown pattern')}",
	"confidence": float(cluster["purity"]),
	"heuristic": cluster.get("detection_rule", ""),
	"examples": (cluster.get("examples", []) or [])[:3]
	}
	patterns.append(pattern)

	# Save to cache
	ML_TOOLS_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
	with open(ML_TOOLS_CACHE_PATH, 'w') as f:
	json.dump({
	"updated_at": research_pipeline_output.get("timestamp"),
	"patterns": patterns
	}, f, indent=2)