File size: 3,804 Bytes
0498411
 
 
 
 
 
 
 
 
 
 
 
 
d4d57c4
0498411
859897e
0498411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
905ef08
 
 
 
 
 
 
 
0498411
 
 
 
 
 
 
 
 
 
859897e
905ef08
 
0498411
 
 
 
 
 
905ef08
 
0498411
 
 
 
 
bc202f9
 
0498411
 
 
 
 
 
 
 
 
 
 
859897e
0498411
 
 
 
859897e
0498411
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
Standalone model inference and client management for AnyCoder Backend API.
No Gradio dependencies - works with FastAPI/backend only.
"""
import os
from typing import Optional

from openai import OpenAI

def get_inference_client(model_id: str, provider: str = "auto"):
    """
    Return an appropriate client based on model_id.
    
    Returns OpenAI-compatible client for all models or raises error if not configured.
    """
    if model_id == "MiniMaxAI/MiniMax-M2":
        # Use HuggingFace Router with Novita provider for MiniMax M2 model
        return OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=os.getenv("HF_TOKEN"),
            default_headers={"X-HF-Bill-To": "huggingface"}
        )
    
    elif model_id == "moonshotai/Kimi-K2-Thinking":
        # Use HuggingFace Router with Novita provider
        return OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=os.getenv("HF_TOKEN"),
            default_headers={"X-HF-Bill-To": "huggingface"}
        )
    
    elif model_id == "moonshotai/Kimi-K2-Instruct":
        # Use HuggingFace Router with Groq provider
        return OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=os.getenv("HF_TOKEN"),
            default_headers={"X-HF-Bill-To": "huggingface"}
        )
    
    elif model_id.startswith("deepseek-ai/"):
        # DeepSeek models via HuggingFace Router with Novita provider
        return OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=os.getenv("HF_TOKEN"),
            default_headers={"X-HF-Bill-To": "huggingface"}
        )
    
    elif model_id.startswith("zai-org/GLM-4"):
        # GLM models via HuggingFace Router
        return OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=os.getenv("HF_TOKEN"),
            default_headers={"X-HF-Bill-To": "huggingface"}
        )
    
    elif model_id.startswith("moonshotai/Kimi-K2"):
        # Kimi K2 models via HuggingFace Router
        return OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=os.getenv("HF_TOKEN"),
            default_headers={"X-HF-Bill-To": "huggingface"}
        )
    
    else:
        # Unknown model - try HuggingFace Inference API
        return OpenAI(
            base_url="https://api-inference.huggingface.co/v1",
            api_key=os.getenv("HF_TOKEN")
        )


def get_real_model_id(model_id: str) -> str:
    """Get the real model ID with provider suffixes if needed"""
    if model_id == "zai-org/GLM-4.6":
        # GLM-4.6 requires Cerebras provider suffix in model string for API calls
        return "zai-org/GLM-4.6:cerebras"
    
    elif model_id == "MiniMaxAI/MiniMax-M2":
        # MiniMax M2 needs Novita provider suffix
        return "MiniMaxAI/MiniMax-M2:novita"
    
    elif model_id == "moonshotai/Kimi-K2-Thinking":
        # Kimi K2 Thinking needs Together AI provider
        return "moonshotai/Kimi-K2-Thinking:together"
    
    elif model_id == "moonshotai/Kimi-K2-Instruct":
        # Kimi K2 Instruct needs Groq provider
        return "moonshotai/Kimi-K2-Instruct:groq"
    
    elif model_id.startswith("deepseek-ai/DeepSeek-V3") or model_id.startswith("deepseek-ai/DeepSeek-R1"):
        # DeepSeek V3 and R1 models need Novita provider
        return f"{model_id}:novita"
    
    elif model_id == "zai-org/GLM-4.5":
        # GLM-4.5 needs fireworks-ai provider
        return "zai-org/GLM-4.5:fireworks-ai"
    
    return model_id


def is_native_sdk_model(model_id: str) -> bool:
    """Check if model uses native SDK (not OpenAI-compatible)"""
    return False


def is_mistral_model(model_id: str) -> bool:
    """Check if model uses Mistral SDK"""
    return False