ming commited on
Commit
7019b66
·
1 Parent(s): 93c9664

feat: Switch V4 model to Phi-3-mini for better structured output

Browse files

- Replace Qwen/Qwen2.5-0.5B-Instruct with microsoft/Phi-3-mini-4k-instruct
- Phi-3-mini (3.8B params) has much better instruction following capability
- Expected improvements:
- Valid NDJSON patch generation
- Better structured JSON output
- More reliable field completion
- Trade-off: Slower generation (~20-30s vs ~10s) but higher quality
- RAM usage: ~7-8GB (quantized) vs ~1-2GB for Qwen2.5-0.5B

Files changed (1) hide show
  1. app/core/config.py +2 -2
app/core/config.py CHANGED
@@ -107,9 +107,9 @@ class Settings(BaseSettings):
107
  description="Enable V4 model warmup on startup (uses 1-2GB RAM with quantization)",
108
  )
109
  v4_model_id: str = Field(
110
- default="Qwen/Qwen2.5-0.5B-Instruct",
111
  env="V4_MODEL_ID",
112
- description="Model ID for V4 structured output (490M params, optimized for CPU, no auth required)",
113
  )
114
  v4_max_tokens: int = Field(
115
  default=1024, env="V4_MAX_TOKENS", ge=128, le=2048, description="Max tokens for V4 generation"
 
107
  description="Enable V4 model warmup on startup (uses 1-2GB RAM with quantization)",
108
  )
109
  v4_model_id: str = Field(
110
+ default="microsoft/Phi-3-mini-4k-instruct",
111
  env="V4_MODEL_ID",
112
+ description="Model ID for V4 structured output (3.8B params, optimized for instruction following)",
113
  )
114
  v4_max_tokens: int = Field(
115
  default=1024, env="V4_MAX_TOKENS", ge=128, le=2048, description="Max tokens for V4 generation"