Update app.py
Browse files
app.py
CHANGED
|
@@ -856,28 +856,11 @@ def call_cohere_api(payload, api_key_override=None):
|
|
| 856 |
temperature = payload.get("temperature", 0.7)
|
| 857 |
max_tokens = payload.get("max_tokens", 1000)
|
| 858 |
|
| 859 |
-
#
|
| 860 |
-
#
|
| 861 |
-
cohere_messages = []
|
| 862 |
-
for msg in messages:
|
| 863 |
-
role = msg["role"].upper() # Cohere requires uppercase roles
|
| 864 |
-
content = msg["content"]
|
| 865 |
-
|
| 866 |
-
# Handle multimodal content
|
| 867 |
-
if isinstance(content, list):
|
| 868 |
-
text_parts = []
|
| 869 |
-
for item in content:
|
| 870 |
-
if item["type"] == "text":
|
| 871 |
-
text_parts.append(item["text"])
|
| 872 |
-
content = "\n".join(text_parts)
|
| 873 |
-
|
| 874 |
-
cohere_messages.append({"role": role, "content": content})
|
| 875 |
-
|
| 876 |
-
# Create chat completion
|
| 877 |
response = client.chat(
|
| 878 |
-
message=cohere_messages[-1]["content"] if cohere_messages else "",
|
| 879 |
-
chat_history=cohere_messages[:-1] if len(cohere_messages) > 1 else [],
|
| 880 |
model=model,
|
|
|
|
| 881 |
temperature=temperature,
|
| 882 |
max_tokens=max_tokens
|
| 883 |
)
|
|
@@ -904,35 +887,23 @@ def call_together_api(payload, api_key_override=None):
|
|
| 904 |
)
|
| 905 |
|
| 906 |
# Extract parameters from payload
|
| 907 |
-
model = payload.get("model", "meta-llama/Llama-3
|
| 908 |
|
| 909 |
-
# Fix model name format - Together API
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
model =
|
| 915 |
|
| 916 |
-
# Clean up messages - remove any unexpected properties
|
| 917 |
-
messages = []
|
| 918 |
-
for msg in payload.get("messages", []):
|
| 919 |
-
clean_msg = {
|
| 920 |
-
"role": msg["role"],
|
| 921 |
-
"content": msg["content"]
|
| 922 |
-
}
|
| 923 |
-
messages.append(clean_msg)
|
| 924 |
-
|
| 925 |
-
# Create payload
|
| 926 |
-
together_payload = {
|
| 927 |
-
"model": model,
|
| 928 |
-
"messages": messages,
|
| 929 |
-
"temperature": payload.get("temperature", 0.7),
|
| 930 |
-
"max_tokens": payload.get("max_tokens", 1000),
|
| 931 |
-
"stream": payload.get("stream", False)
|
| 932 |
-
}
|
| 933 |
-
|
| 934 |
# Create completion
|
| 935 |
-
response = client.chat.completions.create(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 936 |
|
| 937 |
return response
|
| 938 |
except Exception as e:
|
|
@@ -952,33 +923,30 @@ def call_ovh_api(payload, api_key_override=None):
|
|
| 952 |
"Content-Type": "application/json"
|
| 953 |
}
|
| 954 |
|
| 955 |
-
# Clean up messages - remove any unexpected properties
|
| 956 |
-
clean_messages = []
|
| 957 |
-
for msg in messages:
|
| 958 |
-
clean_msg = {
|
| 959 |
-
"role": msg["role"],
|
| 960 |
-
"content": msg["content"]
|
| 961 |
-
}
|
| 962 |
-
clean_messages.append(clean_msg)
|
| 963 |
-
|
| 964 |
data = {
|
| 965 |
"model": model,
|
| 966 |
-
"messages":
|
| 967 |
"temperature": temperature,
|
| 968 |
"max_tokens": max_tokens
|
| 969 |
}
|
| 970 |
|
| 971 |
-
#
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 980 |
|
| 981 |
-
return response.json()
|
| 982 |
except Exception as e:
|
| 983 |
logger.error(f"OVH API error: {str(e)}")
|
| 984 |
raise e
|
|
@@ -988,38 +956,41 @@ def call_cerebras_api(payload, api_key_override=None):
|
|
| 988 |
try:
|
| 989 |
# Extract parameters from payload
|
| 990 |
model = payload.get("model", "cerebras/llama-3.1-8b")
|
| 991 |
-
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
|
| 997 |
-
|
| 998 |
-
}
|
| 999 |
-
messages.append(clean_msg)
|
| 1000 |
|
| 1001 |
data = {
|
| 1002 |
"model": model,
|
| 1003 |
"messages": messages,
|
| 1004 |
-
"temperature":
|
| 1005 |
-
"max_tokens":
|
| 1006 |
}
|
| 1007 |
|
|
|
|
| 1008 |
headers = {
|
| 1009 |
"Content-Type": "application/json",
|
| 1010 |
-
"Authorization": f"Bearer {
|
| 1011 |
}
|
| 1012 |
|
| 1013 |
-
|
| 1014 |
-
|
| 1015 |
-
|
| 1016 |
-
|
| 1017 |
-
|
| 1018 |
-
|
| 1019 |
-
|
| 1020 |
-
raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
|
| 1021 |
|
| 1022 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1023 |
except Exception as e:
|
| 1024 |
logger.error(f"Cerebras API error: {str(e)}")
|
| 1025 |
raise e
|
|
@@ -1027,80 +998,92 @@ def call_cerebras_api(payload, api_key_override=None):
|
|
| 1027 |
def call_googleai_api(payload, api_key_override=None):
|
| 1028 |
"""Make a call to Google AI (Gemini) API with error handling"""
|
| 1029 |
try:
|
| 1030 |
-
from google.generativeai import configure, GenerativeModel
|
| 1031 |
-
|
| 1032 |
api_key = api_key_override if api_key_override else GOOGLEAI_API_KEY
|
| 1033 |
if not api_key:
|
| 1034 |
raise ValueError("Google AI API key is required")
|
| 1035 |
|
| 1036 |
-
|
|
|
|
| 1037 |
|
| 1038 |
# Extract parameters from payload
|
| 1039 |
-
model_name = payload.get("model", "gemini-1.5-pro")
|
| 1040 |
messages = payload.get("messages", [])
|
| 1041 |
temperature = payload.get("temperature", 0.7)
|
|
|
|
| 1042 |
|
| 1043 |
-
# Convert
|
| 1044 |
-
|
|
|
|
|
|
|
| 1045 |
for msg in messages:
|
| 1046 |
role = msg["role"]
|
| 1047 |
content = msg["content"]
|
| 1048 |
|
| 1049 |
-
#
|
| 1050 |
if role == "system":
|
|
|
|
| 1051 |
continue
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1055 |
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
for item in content:
|
| 1061 |
-
if item["type"] == "text":
|
| 1062 |
-
parts.append({"text": item["text"]})
|
| 1063 |
-
elif item["type"] == "image_url":
|
| 1064 |
-
image_data = item["image_url"]["url"]
|
| 1065 |
-
if image_data.startswith("data:"):
|
| 1066 |
-
# Extract base64 data
|
| 1067 |
-
mime, base64_data = image_data.split(";base64,")
|
| 1068 |
-
mime_type = mime.split(":")[1]
|
| 1069 |
-
parts.append({
|
| 1070 |
-
"inline_data": {
|
| 1071 |
-
"mime_type": mime_type,
|
| 1072 |
-
"data": base64_data
|
| 1073 |
-
}
|
| 1074 |
-
})
|
| 1075 |
-
google_messages.append({"role": gemini_role, "parts": parts})
|
| 1076 |
-
else:
|
| 1077 |
-
# Simple text content
|
| 1078 |
-
google_messages.append({"role": gemini_role, "parts": [{"text": content}]})
|
| 1079 |
-
|
| 1080 |
-
# Create Gemini model
|
| 1081 |
-
model = GenerativeModel(model_name)
|
| 1082 |
-
|
| 1083 |
-
# Generate content
|
| 1084 |
-
response = model.generate_content(
|
| 1085 |
-
google_messages,
|
| 1086 |
-
generation_config={
|
| 1087 |
"temperature": temperature,
|
| 1088 |
-
"
|
| 1089 |
-
"
|
| 1090 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1091 |
)
|
| 1092 |
|
| 1093 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1094 |
return {
|
| 1095 |
"choices": [
|
| 1096 |
{
|
| 1097 |
"message": {
|
| 1098 |
"role": "assistant",
|
| 1099 |
-
"content":
|
| 1100 |
}
|
| 1101 |
}
|
| 1102 |
]
|
| 1103 |
}
|
|
|
|
| 1104 |
except Exception as e:
|
| 1105 |
logger.error(f"Google AI API error: {str(e)}")
|
| 1106 |
raise e
|
|
|
|
| 856 |
temperature = payload.get("temperature", 0.7)
|
| 857 |
max_tokens = payload.get("max_tokens", 1000)
|
| 858 |
|
| 859 |
+
# Create chat completion - note the correct format for CohereLLM V2
|
| 860 |
+
# The ClientV2's chat method expects 'messages' parameter, not 'message'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
response = client.chat(
|
|
|
|
|
|
|
| 862 |
model=model,
|
| 863 |
+
messages=messages, # This is directly passed as is
|
| 864 |
temperature=temperature,
|
| 865 |
max_tokens=max_tokens
|
| 866 |
)
|
|
|
|
| 887 |
)
|
| 888 |
|
| 889 |
# Extract parameters from payload
|
| 890 |
+
model = payload.get("model", "meta-llama/Meta-Llama-3-8B-Instruct")
|
| 891 |
|
| 892 |
+
# Fix model name format - Together API uses a different format
|
| 893 |
+
# Check documentation for correct model names: https://api.together.ai/models
|
| 894 |
+
if "llama-3.1" in model.lower():
|
| 895 |
+
model = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 896 |
+
elif "llama-3.3" in model.lower():
|
| 897 |
+
model = "meta-llama/Meta-Llama-3.3-70B-Instruct"
|
| 898 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 899 |
# Create completion
|
| 900 |
+
response = client.chat.completions.create(
|
| 901 |
+
model=model,
|
| 902 |
+
messages=payload.get("messages", []),
|
| 903 |
+
temperature=payload.get("temperature", 0.7),
|
| 904 |
+
max_tokens=payload.get("max_tokens", 1000),
|
| 905 |
+
stream=payload.get("stream", False)
|
| 906 |
+
)
|
| 907 |
|
| 908 |
return response
|
| 909 |
except Exception as e:
|
|
|
|
| 923 |
"Content-Type": "application/json"
|
| 924 |
}
|
| 925 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 926 |
data = {
|
| 927 |
"model": model,
|
| 928 |
+
"messages": messages,
|
| 929 |
"temperature": temperature,
|
| 930 |
"max_tokens": max_tokens
|
| 931 |
}
|
| 932 |
|
| 933 |
+
# Use a try-except to handle DNS resolution errors and provide a more helpful message
|
| 934 |
+
try:
|
| 935 |
+
# Correct endpoint URL based on documentation
|
| 936 |
+
response = requests.post(
|
| 937 |
+
"https://endpoints.ai.cloud.ovh.net/v1/chat/completions", # Updated endpoint
|
| 938 |
+
headers=headers,
|
| 939 |
+
json=data,
|
| 940 |
+
timeout=10 # Add timeout to avoid hanging
|
| 941 |
+
)
|
| 942 |
+
|
| 943 |
+
if response.status_code != 200:
|
| 944 |
+
raise ValueError(f"OVH API returned status code {response.status_code}: {response.text}")
|
| 945 |
+
|
| 946 |
+
return response.json()
|
| 947 |
+
except requests.exceptions.ConnectionError as e:
|
| 948 |
+
raise ValueError(f"Connection error to OVH API. This may be due to network restrictions in the environment: {str(e)}")
|
| 949 |
|
|
|
|
| 950 |
except Exception as e:
|
| 951 |
logger.error(f"OVH API error: {str(e)}")
|
| 952 |
raise e
|
|
|
|
| 956 |
try:
|
| 957 |
# Extract parameters from payload
|
| 958 |
model = payload.get("model", "cerebras/llama-3.1-8b")
|
| 959 |
+
# Strip 'cerebras/' prefix if present
|
| 960 |
+
if model.startswith("cerebras/"):
|
| 961 |
+
model = model[9:]
|
| 962 |
+
|
| 963 |
+
messages = payload.get("messages", [])
|
| 964 |
+
temperature = payload.get("temperature", 0.7)
|
| 965 |
+
max_tokens = payload.get("max_tokens", 1000)
|
|
|
|
|
|
|
| 966 |
|
| 967 |
data = {
|
| 968 |
"model": model,
|
| 969 |
"messages": messages,
|
| 970 |
+
"temperature": temperature,
|
| 971 |
+
"max_tokens": max_tokens
|
| 972 |
}
|
| 973 |
|
| 974 |
+
api_key = api_key_override if api_key_override else os.environ.get("CEREBRAS_API_KEY", "")
|
| 975 |
headers = {
|
| 976 |
"Content-Type": "application/json",
|
| 977 |
+
"Authorization": f"Bearer {api_key}"
|
| 978 |
}
|
| 979 |
|
| 980 |
+
try:
|
| 981 |
+
response = requests.post(
|
| 982 |
+
"https://api.cloud.cerebras.ai/v1/chat/completions",
|
| 983 |
+
headers=headers,
|
| 984 |
+
json=data,
|
| 985 |
+
timeout=10 # Add timeout to avoid hanging
|
| 986 |
+
)
|
|
|
|
| 987 |
|
| 988 |
+
if response.status_code != 200:
|
| 989 |
+
raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
|
| 990 |
+
|
| 991 |
+
return response.json()
|
| 992 |
+
except requests.exceptions.ConnectionError as e:
|
| 993 |
+
raise ValueError(f"Connection error to Cerebras API. This may be due to network restrictions in the environment: {str(e)}")
|
| 994 |
except Exception as e:
|
| 995 |
logger.error(f"Cerebras API error: {str(e)}")
|
| 996 |
raise e
|
|
|
|
| 998 |
def call_googleai_api(payload, api_key_override=None):
|
| 999 |
"""Make a call to Google AI (Gemini) API with error handling"""
|
| 1000 |
try:
|
|
|
|
|
|
|
| 1001 |
api_key = api_key_override if api_key_override else GOOGLEAI_API_KEY
|
| 1002 |
if not api_key:
|
| 1003 |
raise ValueError("Google AI API key is required")
|
| 1004 |
|
| 1005 |
+
# Use regular requests instead of the SDK since it might be missing
|
| 1006 |
+
gemini_api_url = "https://generativelanguage.googleapis.com/v1/models/gemini-1.5-pro:generateContent"
|
| 1007 |
|
| 1008 |
# Extract parameters from payload
|
|
|
|
| 1009 |
messages = payload.get("messages", [])
|
| 1010 |
temperature = payload.get("temperature", 0.7)
|
| 1011 |
+
max_tokens = payload.get("max_tokens", 1000)
|
| 1012 |
|
| 1013 |
+
# Convert to Google's format
|
| 1014 |
+
content_parts = []
|
| 1015 |
+
|
| 1016 |
+
# Add all messages
|
| 1017 |
for msg in messages:
|
| 1018 |
role = msg["role"]
|
| 1019 |
content = msg["content"]
|
| 1020 |
|
| 1021 |
+
# Handle different roles
|
| 1022 |
if role == "system":
|
| 1023 |
+
# For system messages, we add it as part of the first user message
|
| 1024 |
continue
|
| 1025 |
+
elif role == "user":
|
| 1026 |
+
# For user messages, add as regular content
|
| 1027 |
+
if isinstance(content, str):
|
| 1028 |
+
content_parts.append({"text": content})
|
| 1029 |
+
else:
|
| 1030 |
+
# Handle multimodal content
|
| 1031 |
+
for item in content:
|
| 1032 |
+
if item["type"] == "text":
|
| 1033 |
+
content_parts.append({"text": item["text"]})
|
| 1034 |
|
| 1035 |
+
# Form the request data
|
| 1036 |
+
data = {
|
| 1037 |
+
"contents": [{"parts": content_parts}],
|
| 1038 |
+
"generationConfig": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1039 |
"temperature": temperature,
|
| 1040 |
+
"maxOutputTokens": max_tokens,
|
| 1041 |
+
"topP": payload.get("top_p", 0.95),
|
| 1042 |
}
|
| 1043 |
+
}
|
| 1044 |
+
|
| 1045 |
+
headers = {
|
| 1046 |
+
"Content-Type": "application/json",
|
| 1047 |
+
"x-goog-api-key": api_key
|
| 1048 |
+
}
|
| 1049 |
+
|
| 1050 |
+
# Make the request
|
| 1051 |
+
response = requests.post(
|
| 1052 |
+
gemini_api_url,
|
| 1053 |
+
headers=headers,
|
| 1054 |
+
json=data,
|
| 1055 |
+
timeout=30
|
| 1056 |
)
|
| 1057 |
|
| 1058 |
+
if response.status_code != 200:
|
| 1059 |
+
error_msg = f"Google AI API error: {response.status_code} - {response.text}"
|
| 1060 |
+
logger.error(error_msg)
|
| 1061 |
+
raise ValueError(error_msg)
|
| 1062 |
+
|
| 1063 |
+
# Parse response and convert to standard format
|
| 1064 |
+
result = response.json()
|
| 1065 |
+
text_content = ""
|
| 1066 |
+
|
| 1067 |
+
# Extract text from response
|
| 1068 |
+
if "candidates" in result and len(result["candidates"]) > 0:
|
| 1069 |
+
candidate = result["candidates"][0]
|
| 1070 |
+
if "content" in candidate and "parts" in candidate["content"]:
|
| 1071 |
+
for part in candidate["content"]["parts"]:
|
| 1072 |
+
if "text" in part:
|
| 1073 |
+
text_content += part["text"]
|
| 1074 |
+
|
| 1075 |
+
# Create a standardized response format
|
| 1076 |
return {
|
| 1077 |
"choices": [
|
| 1078 |
{
|
| 1079 |
"message": {
|
| 1080 |
"role": "assistant",
|
| 1081 |
+
"content": text_content
|
| 1082 |
}
|
| 1083 |
}
|
| 1084 |
]
|
| 1085 |
}
|
| 1086 |
+
|
| 1087 |
except Exception as e:
|
| 1088 |
logger.error(f"Google AI API error: {str(e)}")
|
| 1089 |
raise e
|