Spaces:
Running
Running
File size: 3,804 Bytes
0498411 d4d57c4 0498411 859897e 0498411 905ef08 0498411 859897e 905ef08 0498411 905ef08 0498411 bc202f9 0498411 859897e 0498411 859897e 0498411 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
"""
Standalone model inference and client management for AnyCoder Backend API.
No Gradio dependencies - works with FastAPI/backend only.
"""
import os
from typing import Optional
from openai import OpenAI
def get_inference_client(model_id: str, provider: str = "auto"):
"""
Return an appropriate client based on model_id.
Returns OpenAI-compatible client for all models or raises error if not configured.
"""
if model_id == "MiniMaxAI/MiniMax-M2":
# Use HuggingFace Router with Novita provider for MiniMax M2 model
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id == "moonshotai/Kimi-K2-Thinking":
# Use HuggingFace Router with Novita provider
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id == "moonshotai/Kimi-K2-Instruct":
# Use HuggingFace Router with Groq provider
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id.startswith("deepseek-ai/"):
# DeepSeek models via HuggingFace Router with Novita provider
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id.startswith("zai-org/GLM-4"):
# GLM models via HuggingFace Router
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id.startswith("moonshotai/Kimi-K2"):
# Kimi K2 models via HuggingFace Router
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
else:
# Unknown model - try HuggingFace Inference API
return OpenAI(
base_url="https://api-inference.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN")
)
def get_real_model_id(model_id: str) -> str:
"""Get the real model ID with provider suffixes if needed"""
if model_id == "zai-org/GLM-4.6":
# GLM-4.6 requires Cerebras provider suffix in model string for API calls
return "zai-org/GLM-4.6:cerebras"
elif model_id == "MiniMaxAI/MiniMax-M2":
# MiniMax M2 needs Novita provider suffix
return "MiniMaxAI/MiniMax-M2:novita"
elif model_id == "moonshotai/Kimi-K2-Thinking":
# Kimi K2 Thinking needs Together AI provider
return "moonshotai/Kimi-K2-Thinking:together"
elif model_id == "moonshotai/Kimi-K2-Instruct":
# Kimi K2 Instruct needs Groq provider
return "moonshotai/Kimi-K2-Instruct:groq"
elif model_id.startswith("deepseek-ai/DeepSeek-V3") or model_id.startswith("deepseek-ai/DeepSeek-R1"):
# DeepSeek V3 and R1 models need Novita provider
return f"{model_id}:novita"
elif model_id == "zai-org/GLM-4.5":
# GLM-4.5 needs fireworks-ai provider
return "zai-org/GLM-4.5:fireworks-ai"
return model_id
def is_native_sdk_model(model_id: str) -> bool:
"""Check if model uses native SDK (not OpenAI-compatible)"""
return False
def is_mistral_model(model_id: str) -> bool:
"""Check if model uses Mistral SDK"""
return False
|