Update app.py
Browse files
app.py
CHANGED
|
@@ -312,14 +312,11 @@ COHERE_MODELS = {
|
|
| 312 |
"c4ai-aya-expanse-32b": 131072,
|
| 313 |
}
|
| 314 |
|
| 315 |
-
# TOGETHER MODELS
|
| 316 |
TOGETHER_MODELS = {
|
| 317 |
-
"
|
| 318 |
-
"meta-llama/Llama-
|
| 319 |
-
"meta-llama/Llama-3.3-70B-Instruct":
|
| 320 |
-
"deepseek-ai/deepseek-r1-distill-llama-70b": 8192,
|
| 321 |
-
"meta-llama/Llama-3.2-11B-Vision-Instruct": 131072,
|
| 322 |
-
"meta-llama/Llama-3.2-90B-Vision-Instruct": 131072,
|
| 323 |
}
|
| 324 |
|
| 325 |
# OVH MODELS - OVH AI Endpoints (free beta)
|
|
@@ -339,8 +336,8 @@ OVH_MODELS = {
|
|
| 339 |
|
| 340 |
# CEREBRAS MODELS
|
| 341 |
CEREBRAS_MODELS = {
|
| 342 |
-
"
|
| 343 |
-
"
|
| 344 |
}
|
| 345 |
|
| 346 |
# GOOGLE AI MODELS
|
|
@@ -952,23 +949,47 @@ def call_together_api(payload, api_key_override=None):
|
|
| 952 |
)
|
| 953 |
|
| 954 |
# Extract parameters from payload
|
| 955 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 956 |
|
| 957 |
-
#
|
| 958 |
-
|
| 959 |
-
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
|
|
|
|
|
|
|
|
|
|
| 963 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 964 |
# Create completion
|
| 965 |
-
response = client.chat.completions.create(
|
| 966 |
-
model=model,
|
| 967 |
-
messages=payload.get("messages", []),
|
| 968 |
-
temperature=payload.get("temperature", 0.7),
|
| 969 |
-
max_tokens=payload.get("max_tokens", 1000),
|
| 970 |
-
stream=payload.get("stream", False)
|
| 971 |
-
)
|
| 972 |
|
| 973 |
return response
|
| 974 |
except Exception as e:
|
|
@@ -1020,42 +1041,65 @@ def call_cerebras_api(payload, api_key_override=None):
|
|
| 1020 |
"""Make a call to Cerebras API with error handling"""
|
| 1021 |
try:
|
| 1022 |
# Extract parameters from payload
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1028 |
messages = payload.get("messages", [])
|
| 1029 |
temperature = payload.get("temperature", 0.7)
|
| 1030 |
max_tokens = payload.get("max_tokens", 1000)
|
| 1031 |
-
|
| 1032 |
-
data = {
|
| 1033 |
-
"model": model,
|
| 1034 |
-
"messages": messages,
|
| 1035 |
-
"temperature": temperature,
|
| 1036 |
-
"max_tokens": max_tokens
|
| 1037 |
-
}
|
| 1038 |
-
|
| 1039 |
-
api_key = api_key_override if api_key_override else os.environ.get("CEREBRAS_API_KEY", "")
|
| 1040 |
-
headers = {
|
| 1041 |
-
"Content-Type": "application/json",
|
| 1042 |
-
"Authorization": f"Bearer {api_key}"
|
| 1043 |
-
}
|
| 1044 |
|
|
|
|
| 1045 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1046 |
response = requests.post(
|
| 1047 |
"https://api.cloud.cerebras.ai/v1/chat/completions",
|
| 1048 |
headers=headers,
|
| 1049 |
json=data,
|
| 1050 |
-
timeout=
|
| 1051 |
)
|
| 1052 |
|
| 1053 |
if response.status_code != 200:
|
| 1054 |
raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
|
| 1055 |
|
| 1056 |
return response.json()
|
| 1057 |
-
except requests.exceptions.
|
| 1058 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1059 |
except Exception as e:
|
| 1060 |
logger.error(f"Cerebras API error: {str(e)}")
|
| 1061 |
raise e
|
|
|
|
| 312 |
"c4ai-aya-expanse-32b": 131072,
|
| 313 |
}
|
| 314 |
|
| 315 |
+
# TOGETHER MODELS in the free tier
|
| 316 |
TOGETHER_MODELS = {
|
| 317 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192,
|
| 318 |
+
"meta-llama/Llama-Vision-Free": 8192,
|
| 319 |
+
"meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 8192,
|
|
|
|
|
|
|
|
|
|
| 320 |
}
|
| 321 |
|
| 322 |
# OVH MODELS - OVH AI Endpoints (free beta)
|
|
|
|
| 336 |
|
| 337 |
# CEREBRAS MODELS
|
| 338 |
CEREBRAS_MODELS = {
|
| 339 |
+
"llama3.1-8b": 8192,
|
| 340 |
+
"llama-3.3-70b": 8192,
|
| 341 |
}
|
| 342 |
|
| 343 |
# GOOGLE AI MODELS
|
|
|
|
| 949 |
)
|
| 950 |
|
| 951 |
# Extract parameters from payload
|
| 952 |
+
requested_model = payload.get("model", "")
|
| 953 |
+
|
| 954 |
+
# Use a safe model that's known to work in the free tier
|
| 955 |
+
# these models are available without dedicated endpoints
|
| 956 |
+
free_models = [
|
| 957 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
|
| 958 |
+
"meta-llama/Llama-Vision-Free",
|
| 959 |
+
"meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
|
| 960 |
+
]
|
| 961 |
+
|
| 962 |
+
# Default to the first free model
|
| 963 |
+
model = free_models[0]
|
| 964 |
+
|
| 965 |
+
# Try to match a requested model with a free model if possible
|
| 966 |
+
if requested_model:
|
| 967 |
+
for free_model in free_models:
|
| 968 |
+
if requested_model.lower() in free_model.lower():
|
| 969 |
+
model = free_model
|
| 970 |
+
break
|
| 971 |
|
| 972 |
+
# Create payload with clean messages
|
| 973 |
+
messages = []
|
| 974 |
+
for msg in payload.get("messages", []):
|
| 975 |
+
# Ensure we only include role and content
|
| 976 |
+
clean_msg = {
|
| 977 |
+
"role": msg["role"],
|
| 978 |
+
"content": msg["content"]
|
| 979 |
+
}
|
| 980 |
+
messages.append(clean_msg)
|
| 981 |
|
| 982 |
+
# Create payload
|
| 983 |
+
together_payload = {
|
| 984 |
+
"model": model,
|
| 985 |
+
"messages": messages,
|
| 986 |
+
"temperature": payload.get("temperature", 0.7),
|
| 987 |
+
"max_tokens": payload.get("max_tokens", 1000),
|
| 988 |
+
"stream": payload.get("stream", False)
|
| 989 |
+
}
|
| 990 |
+
|
| 991 |
# Create completion
|
| 992 |
+
response = client.chat.completions.create(**together_payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 993 |
|
| 994 |
return response
|
| 995 |
except Exception as e:
|
|
|
|
| 1041 |
"""Make a call to Cerebras API with error handling"""
|
| 1042 |
try:
|
| 1043 |
# Extract parameters from payload
|
| 1044 |
+
requested_model = payload.get("model", "")
|
| 1045 |
+
|
| 1046 |
+
# Map the full model name to the correct Cerebras model ID
|
| 1047 |
+
model_mapping = {
|
| 1048 |
+
"cerebras/llama-3.1-8b": "llama3.1-8b",
|
| 1049 |
+
"cerebras/llama-3.3-70b": "llama-3.3-70b",
|
| 1050 |
+
"llama-3.1-8b": "llama3.1-8b",
|
| 1051 |
+
"llama-3.3-70b": "llama-3.3-70b",
|
| 1052 |
+
"llama3.1-8b": "llama3.1-8b"
|
| 1053 |
+
}
|
| 1054 |
+
|
| 1055 |
+
# Default to the 8B model
|
| 1056 |
+
model = "llama3.1-8b"
|
| 1057 |
+
|
| 1058 |
+
# If the requested model matches any of our mappings, use that instead
|
| 1059 |
+
if requested_model in model_mapping:
|
| 1060 |
+
model = model_mapping[requested_model]
|
| 1061 |
+
elif "3.3" in requested_model or "70b" in requested_model.lower():
|
| 1062 |
+
model = "llama-3.3-70b"
|
| 1063 |
+
|
| 1064 |
messages = payload.get("messages", [])
|
| 1065 |
temperature = payload.get("temperature", 0.7)
|
| 1066 |
max_tokens = payload.get("max_tokens", 1000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1067 |
|
| 1068 |
+
# Try-except block for network issues
|
| 1069 |
try:
|
| 1070 |
+
headers = {
|
| 1071 |
+
"Content-Type": "application/json",
|
| 1072 |
+
"Authorization": f"Bearer {api_key_override or os.environ.get('CEREBRAS_API_KEY', '')}"
|
| 1073 |
+
}
|
| 1074 |
+
|
| 1075 |
+
data = {
|
| 1076 |
+
"model": model,
|
| 1077 |
+
"messages": messages,
|
| 1078 |
+
"temperature": temperature,
|
| 1079 |
+
"max_tokens": max_tokens
|
| 1080 |
+
}
|
| 1081 |
+
|
| 1082 |
response = requests.post(
|
| 1083 |
"https://api.cloud.cerebras.ai/v1/chat/completions",
|
| 1084 |
headers=headers,
|
| 1085 |
json=data,
|
| 1086 |
+
timeout=30 # Increased timeout
|
| 1087 |
)
|
| 1088 |
|
| 1089 |
if response.status_code != 200:
|
| 1090 |
raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
|
| 1091 |
|
| 1092 |
return response.json()
|
| 1093 |
+
except requests.exceptions.RequestException as e:
|
| 1094 |
+
# More specific error handling for network issues
|
| 1095 |
+
if "NameResolution" in str(e):
|
| 1096 |
+
raise ValueError(
|
| 1097 |
+
"Unable to connect to the Cerebras API. This might be due to network "
|
| 1098 |
+
"restrictions in your environment. The API requires direct internet access. "
|
| 1099 |
+
"Please try a different provider or check your network settings."
|
| 1100 |
+
)
|
| 1101 |
+
else:
|
| 1102 |
+
raise ValueError(f"Request to Cerebras API failed: {str(e)}")
|
| 1103 |
except Exception as e:
|
| 1104 |
logger.error(f"Cerebras API error: {str(e)}")
|
| 1105 |
raise e
|