Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import os, yaml | |
| from pydantic import BaseModel, AnyHttpUrl | |
| from typing import Optional, List | |
| class ModelCfg(BaseModel): | |
| # HF Router defaults (used when we reach the router) | |
| name: str = "HuggingFaceH4/zephyr-7b-beta" | |
| fallback: str = "mistralai/Mistral-7B-Instruct-v0.2" | |
| max_new_tokens: int = 256 | |
| temperature: float = 0.2 | |
| provider: Optional[str] = None # HF Router provider tag (e.g., "featherless-ai") | |
| # New: provider-specific default models | |
| groq_model: str = "llama-3.1-8b-instant" | |
| gemini_model: str = "gemini-2.5-flash" | |
| class LimitsCfg(BaseModel): | |
| rate_per_min: int = 60 | |
| cache_size: int = 256 | |
| class RagCfg(BaseModel): | |
| index_dataset: Optional[str] = None | |
| top_k: int = 4 | |
| class MatrixHubCfg(BaseModel): | |
| base_url: AnyHttpUrl = "https://api.matrixhub.io" | |
| class SecurityCfg(BaseModel): | |
| admin_token: Optional[str] = None | |
| class Settings(BaseModel): | |
| model: ModelCfg = ModelCfg() | |
| limits: LimitsCfg = LimitsCfg() | |
| rag: RagCfg = RagCfg() | |
| matrixhub: MatrixHubCfg = MatrixHubCfg() | |
| security: SecurityCfg = SecurityCfg() | |
| # New | |
| provider_order: List[str] = ["groq", "gemini", "router"] # cascade order | |
| chat_backend: str = "multi" # was "router"; "multi" enables cascade | |
| chat_stream: bool = True | |
| def load() -> "Settings": | |
| path = os.getenv("SETTINGS_FILE", "configs/settings.yaml") | |
| data = {} | |
| if os.path.exists(path): | |
| with open(path, "r", encoding="utf-8") as f: | |
| data = yaml.safe_load(f) or {} | |
| settings = Settings.model_validate(data) | |
| # Existing env overrides | |
| if "MODEL_NAME" in os.environ: | |
| settings.model.name = os.environ["MODEL_NAME"] | |
| if "MODEL_FALLBACK" in os.environ: | |
| settings.model.fallback = os.environ["MODEL_FALLBACK"] | |
| if "MODEL_PROVIDER" in os.environ: | |
| settings.model.provider = os.environ["MODEL_PROVIDER"] | |
| if "ADMIN_TOKEN" in os.environ: | |
| settings.security.admin_token = os.environ["ADMIN_TOKEN"] | |
| if "RATE_LIMITS" in os.environ: | |
| settings.limits.rate_per_min = int(os.environ["RATE_LIMITS"]) | |
| if "HF_CHAT_BACKEND" in os.environ: | |
| settings.chat_backend = os.environ["HF_CHAT_BACKEND"].strip().lower() | |
| if "CHAT_STREAM" in os.environ: | |
| settings.chat_stream = os.environ["CHAT_STREAM"].lower() in ("1","true","yes","on") | |
| # New env overrides | |
| if "GROQ_MODEL" in os.environ: | |
| settings.model.groq_model = os.environ["GROQ_MODEL"] | |
| if "GEMINI_MODEL" in os.environ: | |
| settings.model.gemini_model = os.environ["GEMINI_MODEL"] | |
| if "PROVIDER_ORDER" in os.environ: | |
| settings.provider_order = [p.strip().lower() for p in os.environ["PROVIDER_ORDER"].split(",") if p.strip()] | |
| # Default to cascade | |
| if settings.chat_backend not in ("multi", "router"): | |
| settings.chat_backend = "multi" | |
| return settings | |