|
|
|
|
|
import os |
|
|
import logging |
|
|
from typing import Optional |
|
|
from dataclasses import dataclass, field |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class ConfigError(Exception): |
|
|
"""Custom exception for configuration errors.""" |
|
|
pass |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class AppConfig: |
|
|
""" |
|
|
Application configuration loaded from environment variables. |
|
|
Includes validation and sensible defaults. |
|
|
""" |
|
|
|
|
|
|
|
|
sql_backend: str = "motherduck" |
|
|
gcp_project: Optional[str] = None |
|
|
motherduck_token: Optional[str] = None |
|
|
motherduck_db: str = "workspace" |
|
|
|
|
|
|
|
|
hf_model_repo: str = "your-org/your-model" |
|
|
hf_token: Optional[str] = None |
|
|
|
|
|
|
|
|
trace_enabled: bool = True |
|
|
trace_url: Optional[str] = None |
|
|
|
|
|
|
|
|
enable_forecasting: bool = True |
|
|
enable_explanations: bool = True |
|
|
|
|
|
|
|
|
max_workers: int = 4 |
|
|
timeout_seconds: int = 300 |
|
|
|
|
|
|
|
|
log_level: str = "INFO" |
|
|
|
|
|
def __post_init__(self): |
|
|
"""Validate configuration after initialization.""" |
|
|
self._validate() |
|
|
|
|
|
def _validate(self): |
|
|
"""Validate configuration values.""" |
|
|
|
|
|
valid_backends = ["bigquery", "motherduck"] |
|
|
if self.sql_backend not in valid_backends: |
|
|
raise ConfigError( |
|
|
f"Invalid sql_backend: {self.sql_backend}. " |
|
|
f"Must be one of: {valid_backends}" |
|
|
) |
|
|
|
|
|
|
|
|
if self.sql_backend == "bigquery": |
|
|
if not self.gcp_project: |
|
|
logger.warning("BigQuery selected but gcp_project not set") |
|
|
|
|
|
if self.sql_backend == "motherduck": |
|
|
if not self.motherduck_token: |
|
|
logger.warning("MotherDuck selected but motherduck_token not set") |
|
|
|
|
|
|
|
|
if not self.hf_model_repo: |
|
|
logger.warning("hf_model_repo not set - predictions/explanations will fail") |
|
|
|
|
|
|
|
|
if self.max_workers < 1: |
|
|
raise ConfigError(f"max_workers must be >= 1, got {self.max_workers}") |
|
|
|
|
|
if self.timeout_seconds < 1: |
|
|
raise ConfigError(f"timeout_seconds must be >= 1, got {self.timeout_seconds}") |
|
|
|
|
|
|
|
|
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] |
|
|
if self.log_level.upper() not in valid_levels: |
|
|
raise ConfigError( |
|
|
f"Invalid log_level: {self.log_level}. " |
|
|
f"Must be one of: {valid_levels}" |
|
|
) |
|
|
|
|
|
@classmethod |
|
|
def from_env(cls) -> "AppConfig": |
|
|
""" |
|
|
Create configuration from environment variables. |
|
|
|
|
|
Environment variables: |
|
|
SQL_BACKEND: "bigquery" or "motherduck" (default: "motherduck") |
|
|
GCP_PROJECT: GCP project ID for BigQuery |
|
|
GCP_SERVICE_ACCOUNT_JSON: Service account credentials for BigQuery |
|
|
MOTHERDUCK_TOKEN: MotherDuck authentication token |
|
|
MOTHERDUCK_DB: MotherDuck database name (default: "workspace") |
|
|
HF_MODEL_REPO: HuggingFace model repository (required) |
|
|
HF_TOKEN: HuggingFace API token (optional, for private repos) |
|
|
TRACE_ENABLED: Enable tracing (default: "true") |
|
|
TRACE_URL: Custom trace URL |
|
|
ENABLE_FORECASTING: Enable forecasting features (default: "true") |
|
|
ENABLE_EXPLANATIONS: Enable SHAP explanations (default: "true") |
|
|
MAX_WORKERS: Max parallel workers (default: 4) |
|
|
TIMEOUT_SECONDS: Request timeout (default: 300) |
|
|
LOG_LEVEL: Logging level (default: "INFO") |
|
|
""" |
|
|
try: |
|
|
config = cls( |
|
|
sql_backend=os.getenv("SQL_BACKEND", "motherduck").lower(), |
|
|
gcp_project=os.getenv("GCP_PROJECT"), |
|
|
motherduck_token=os.getenv("MOTHERDUCK_TOKEN"), |
|
|
motherduck_db=os.getenv("MOTHERDUCK_DB", "workspace"), |
|
|
hf_model_repo=os.getenv("HF_MODEL_REPO", "your-org/your-model"), |
|
|
hf_token=os.getenv("HF_TOKEN"), |
|
|
trace_enabled=os.getenv("TRACE_ENABLED", "true").lower() == "true", |
|
|
trace_url=os.getenv("TRACE_URL"), |
|
|
enable_forecasting=os.getenv("ENABLE_FORECASTING", "true").lower() == "true", |
|
|
enable_explanations=os.getenv("ENABLE_EXPLANATIONS", "true").lower() == "true", |
|
|
max_workers=int(os.getenv("MAX_WORKERS", "4")), |
|
|
timeout_seconds=int(os.getenv("TIMEOUT_SECONDS", "300")), |
|
|
log_level=os.getenv("LOG_LEVEL", "INFO").upper() |
|
|
) |
|
|
|
|
|
logger.info("Configuration loaded successfully") |
|
|
logger.info(f"SQL Backend: {config.sql_backend}") |
|
|
logger.info(f"Model Repo: {config.hf_model_repo}") |
|
|
logger.info(f"Forecasting: {'enabled' if config.enable_forecasting else 'disabled'}") |
|
|
logger.info(f"Explanations: {'enabled' if config.enable_explanations else 'disabled'}") |
|
|
|
|
|
return config |
|
|
|
|
|
except ValueError as e: |
|
|
raise ConfigError(f"Invalid numeric configuration value: {e}") from e |
|
|
except Exception as e: |
|
|
raise ConfigError(f"Configuration loading failed: {e}") from e |
|
|
|
|
|
def to_dict(self) -> dict: |
|
|
"""Convert configuration to dictionary (for logging/debugging).""" |
|
|
return { |
|
|
"sql_backend": self.sql_backend, |
|
|
"gcp_project": self.gcp_project or "not set", |
|
|
"motherduck_db": self.motherduck_db, |
|
|
"hf_model_repo": self.hf_model_repo, |
|
|
"hf_token_set": bool(self.hf_token), |
|
|
"trace_enabled": self.trace_enabled, |
|
|
"enable_forecasting": self.enable_forecasting, |
|
|
"enable_explanations": self.enable_explanations, |
|
|
"max_workers": self.max_workers, |
|
|
"timeout_seconds": self.timeout_seconds, |
|
|
"log_level": self.log_level |
|
|
} |
|
|
|
|
|
def validate_for_features(self, features: list) -> tuple[bool, list]: |
|
|
""" |
|
|
Validate configuration supports requested features. |
|
|
|
|
|
Args: |
|
|
features: List of feature names to check |
|
|
|
|
|
Returns: |
|
|
Tuple of (all_valid, list_of_errors) |
|
|
""" |
|
|
errors = [] |
|
|
|
|
|
for feature in features: |
|
|
if feature == "predict" or feature == "explain": |
|
|
if not self.hf_model_repo or self.hf_model_repo == "your-org/your-model": |
|
|
errors.append(f"{feature} requires valid HF_MODEL_REPO") |
|
|
|
|
|
elif feature == "forecast": |
|
|
if not self.enable_forecasting: |
|
|
errors.append("forecasting is disabled (ENABLE_FORECASTING=false)") |
|
|
|
|
|
elif feature == "explain": |
|
|
if not self.enable_explanations: |
|
|
errors.append("explanations are disabled (ENABLE_EXPLANATIONS=false)") |
|
|
|
|
|
elif feature == "sql": |
|
|
if self.sql_backend == "bigquery" and not self.gcp_project: |
|
|
errors.append("BigQuery requires GCP_PROJECT") |
|
|
elif self.sql_backend == "motherduck" and not self.motherduck_token: |
|
|
errors.append("MotherDuck requires MOTHERDUCK_TOKEN") |
|
|
|
|
|
return len(errors) == 0, errors |