File size: 7,573 Bytes
3d0e99a 6e7ce87 3d0e99a 6e7ce87 3d0e99a 6e7ce87 3d0e99a 6e7ce87 3d0e99a 6e7ce87 3d0e99a 6e7ce87 3d0e99a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# space/utils/config.py
import os
import logging
from typing import Optional
from dataclasses import dataclass, field
logger = logging.getLogger(__name__)
class ConfigError(Exception):
"""Custom exception for configuration errors."""
pass
@dataclass
class AppConfig:
"""
Application configuration loaded from environment variables.
Includes validation and sensible defaults.
"""
# SQL Backend Configuration
sql_backend: str = "motherduck" # "bigquery" or "motherduck"
gcp_project: Optional[str] = None
motherduck_token: Optional[str] = None
motherduck_db: str = "workspace"
# Model Configuration
hf_model_repo: str = "your-org/your-model"
hf_token: Optional[str] = None
# Tracing Configuration
trace_enabled: bool = True
trace_url: Optional[str] = None
# Feature Flags
enable_forecasting: bool = True
enable_explanations: bool = True
# Performance Settings
max_workers: int = 4
timeout_seconds: int = 300
# Additional settings
log_level: str = "INFO"
def __post_init__(self):
"""Validate configuration after initialization."""
self._validate()
def _validate(self):
"""Validate configuration values."""
# Validate SQL backend
valid_backends = ["bigquery", "motherduck"]
if self.sql_backend not in valid_backends:
raise ConfigError(
f"Invalid sql_backend: {self.sql_backend}. "
f"Must be one of: {valid_backends}"
)
# Validate backend-specific requirements
if self.sql_backend == "bigquery":
if not self.gcp_project:
logger.warning("BigQuery selected but gcp_project not set")
if self.sql_backend == "motherduck":
if not self.motherduck_token:
logger.warning("MotherDuck selected but motherduck_token not set")
# Validate model configuration
if not self.hf_model_repo:
logger.warning("hf_model_repo not set - predictions/explanations will fail")
# Validate numeric settings
if self.max_workers < 1:
raise ConfigError(f"max_workers must be >= 1, got {self.max_workers}")
if self.timeout_seconds < 1:
raise ConfigError(f"timeout_seconds must be >= 1, got {self.timeout_seconds}")
# Validate log level
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
if self.log_level.upper() not in valid_levels:
raise ConfigError(
f"Invalid log_level: {self.log_level}. "
f"Must be one of: {valid_levels}"
)
@classmethod
def from_env(cls) -> "AppConfig":
"""
Create configuration from environment variables.
Environment variables:
SQL_BACKEND: "bigquery" or "motherduck" (default: "motherduck")
GCP_PROJECT: GCP project ID for BigQuery
GCP_SERVICE_ACCOUNT_JSON: Service account credentials for BigQuery
MOTHERDUCK_TOKEN: MotherDuck authentication token
MOTHERDUCK_DB: MotherDuck database name (default: "workspace")
HF_MODEL_REPO: HuggingFace model repository (required)
HF_TOKEN: HuggingFace API token (optional, for private repos)
TRACE_ENABLED: Enable tracing (default: "true")
TRACE_URL: Custom trace URL
ENABLE_FORECASTING: Enable forecasting features (default: "true")
ENABLE_EXPLANATIONS: Enable SHAP explanations (default: "true")
MAX_WORKERS: Max parallel workers (default: 4)
TIMEOUT_SECONDS: Request timeout (default: 300)
LOG_LEVEL: Logging level (default: "INFO")
"""
try:
config = cls(
sql_backend=os.getenv("SQL_BACKEND", "motherduck").lower(),
gcp_project=os.getenv("GCP_PROJECT"),
motherduck_token=os.getenv("MOTHERDUCK_TOKEN"),
motherduck_db=os.getenv("MOTHERDUCK_DB", "workspace"),
hf_model_repo=os.getenv("HF_MODEL_REPO", "your-org/your-model"),
hf_token=os.getenv("HF_TOKEN"),
trace_enabled=os.getenv("TRACE_ENABLED", "true").lower() == "true",
trace_url=os.getenv("TRACE_URL"),
enable_forecasting=os.getenv("ENABLE_FORECASTING", "true").lower() == "true",
enable_explanations=os.getenv("ENABLE_EXPLANATIONS", "true").lower() == "true",
max_workers=int(os.getenv("MAX_WORKERS", "4")),
timeout_seconds=int(os.getenv("TIMEOUT_SECONDS", "300")),
log_level=os.getenv("LOG_LEVEL", "INFO").upper()
)
logger.info("Configuration loaded successfully")
logger.info(f"SQL Backend: {config.sql_backend}")
logger.info(f"Model Repo: {config.hf_model_repo}")
logger.info(f"Forecasting: {'enabled' if config.enable_forecasting else 'disabled'}")
logger.info(f"Explanations: {'enabled' if config.enable_explanations else 'disabled'}")
return config
except ValueError as e:
raise ConfigError(f"Invalid numeric configuration value: {e}") from e
except Exception as e:
raise ConfigError(f"Configuration loading failed: {e}") from e
def to_dict(self) -> dict:
"""Convert configuration to dictionary (for logging/debugging)."""
return {
"sql_backend": self.sql_backend,
"gcp_project": self.gcp_project or "not set",
"motherduck_db": self.motherduck_db,
"hf_model_repo": self.hf_model_repo,
"hf_token_set": bool(self.hf_token),
"trace_enabled": self.trace_enabled,
"enable_forecasting": self.enable_forecasting,
"enable_explanations": self.enable_explanations,
"max_workers": self.max_workers,
"timeout_seconds": self.timeout_seconds,
"log_level": self.log_level
}
def validate_for_features(self, features: list) -> tuple[bool, list]:
"""
Validate configuration supports requested features.
Args:
features: List of feature names to check
Returns:
Tuple of (all_valid, list_of_errors)
"""
errors = []
for feature in features:
if feature == "predict" or feature == "explain":
if not self.hf_model_repo or self.hf_model_repo == "your-org/your-model":
errors.append(f"{feature} requires valid HF_MODEL_REPO")
elif feature == "forecast":
if not self.enable_forecasting:
errors.append("forecasting is disabled (ENABLE_FORECASTING=false)")
elif feature == "explain":
if not self.enable_explanations:
errors.append("explanations are disabled (ENABLE_EXPLANATIONS=false)")
elif feature == "sql":
if self.sql_backend == "bigquery" and not self.gcp_project:
errors.append("BigQuery requires GCP_PROJECT")
elif self.sql_backend == "motherduck" and not self.motherduck_token:
errors.append("MotherDuck requires MOTHERDUCK_TOKEN")
return len(errors) == 0, errors |