Spaces:
Running
on
T4
Running
on
T4
File size: 2,787 Bytes
9024ad9 10a33a6 9024ad9 64e4f7a ae3b1b5 d3f36f7 9024ad9 2aa2b79 9024ad9 2aa2b79 9024ad9 2aa2b79 0b6e76d d174be4 0b6e76d d174be4 0b6e76d 2aa2b79 9024ad9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
"""
Configuration management for the text summarizer backend.
"""
import os
from typing import Optional
from pydantic import Field, validator
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
# Ollama Configuration
ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
ollama_timeout: int = Field(default=60, env="OLLAMA_TIMEOUT", ge=1)
# Server Configuration
server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
server_port: int = Field(default=8000, env="SERVER_PORT", ge=1, le=65535)
log_level: str = Field(default="INFO", env="LOG_LEVEL")
# Optional: API Security
api_key_enabled: bool = Field(default=False, env="API_KEY_ENABLED")
api_key: Optional[str] = Field(default=None, env="API_KEY")
# Optional: Rate Limiting
rate_limit_enabled: bool = Field(default=False, env="RATE_LIMIT_ENABLED")
rate_limit_requests: int = Field(default=60, env="RATE_LIMIT_REQUESTS", ge=1)
rate_limit_window: int = Field(default=60, env="RATE_LIMIT_WINDOW", ge=1)
# Input validation
max_text_length: int = Field(default=32000, env="MAX_TEXT_LENGTH", ge=1) # ~32KB
max_tokens_default: int = Field(default=256, env="MAX_TOKENS_DEFAULT", ge=1)
# V2 HuggingFace Configuration
hf_model_id: str = Field(default="t5-small", env="HF_MODEL_ID")
hf_device_map: str = Field(default="auto", env="HF_DEVICE_MAP") # "auto" for GPU fallback to CPU
hf_torch_dtype: str = Field(default="auto", env="HF_TORCH_DTYPE") # "auto" for automatic dtype selection
hf_cache_dir: str = Field(default="/tmp/huggingface", env="HF_HOME") # HuggingFace cache directory
hf_max_new_tokens: int = Field(default=128, env="HF_MAX_NEW_TOKENS", ge=1, le=2048)
hf_temperature: float = Field(default=0.7, env="HF_TEMPERATURE", ge=0.0, le=2.0)
hf_top_p: float = Field(default=0.95, env="HF_TOP_P", ge=0.0, le=1.0)
# V1/V2 Warmup Control
enable_v1_warmup: bool = Field(default=False, env="ENABLE_V1_WARMUP") # Disable V1 warmup by default
enable_v2_warmup: bool = Field(default=True, env="ENABLE_V2_WARMUP") # Enable V2 warmup
@validator('log_level')
def validate_log_level(cls, v):
"""Validate log level is one of the standard levels."""
valid_levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
if v.upper() not in valid_levels:
return 'INFO' # Default to INFO for invalid levels
return v.upper()
class Config:
env_file = ".env"
case_sensitive = False
# Global settings instance
settings = Settings()
|