Spaces:

locuslab
/

safe-playground

Sleeping

App Files Files Community

pratyushmaini commited on Feb 21

Commit

6da6bfa

1 Parent(s): e9867ef

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -33

app.py CHANGED Viewed

@@ -13,45 +13,91 @@ model_list = {
     "Mix IFT V2 - Score0 Only MBS16 GBS1024": "locuslab/mix_ift_v2-smollm2-360m-smollm2-360m-score0_only-300B-mbs16-gbs1024-16feb-lr2e-05-gbs16"
 }
 def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model):
     try:
-        # Create an InferenceClient for the selected model
-        client = InferenceClient(model_list.get(selected_model, "HuggingFaceH4/zephyr-7b-beta"))
-        # Build conversation messages for the client
-        messages = [{"role": "system", "content": system_message}]
-        for user_msg, assistant_msg in history:
-            if user_msg:  # Only add non-empty messages
-                messages.append({"role": "user", "content": user_msg})
-            if assistant_msg:  # Only add non-empty messages
-                messages.append({"role": "assistant", "content": assistant_msg})
-        messages.append({"role": "user", "content": message})
-        response = ""
-        # Stream the response from the client
-        for token_message in client.chat_completion(
-            messages,
-            max_tokens=max_tokens,
-            stream=True,
-            temperature=temperature,
-            top_p=top_p,
-        ):
-            # Safe extraction of token with error handling
-            try:
-                token = token_message.choices[0].delta.content
-                if token is not None:  # Handle potential None values
-                    response += token
-                    yield response
-            except (AttributeError, IndexError) as e:
-                # Handle cases where token structure might be different
-                print(f"Error extracting token: {e}")
-                continue
     except Exception as e:
-        # Return error message if the model call fails
-        print(f"Error calling model API: {e}")
-        yield f"Sorry, there was an error: {str(e)}"
 # Custom CSS for styling
 css = """
@@ -118,6 +164,9 @@ with gr.Blocks(css=css) as demo:
         </h1>
     </div>
     """)
     with gr.Row():
         # Left sidebar: Model selector

     "Mix IFT V2 - Score0 Only MBS16 GBS1024": "locuslab/mix_ift_v2-smollm2-360m-smollm2-360m-score0_only-300B-mbs16-gbs1024-16feb-lr2e-05-gbs16"
 }
+# Dictionary to track which models support chat completion vs. text generation
+model_tasks = {
+    "HuggingFaceH4/zephyr-7b-beta": "chat-completion",  # This model supports chat completion
+    # Add other models that support chat completion
+}
+# Default to text-generation for models not specified above
 def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model):
     try:
+        # Get the model ID for the selected model
+        model_id = model_list.get(selected_model, "HuggingFaceH4/zephyr-7b-beta")
+        # Create an InferenceClient for the selected model
+        client = InferenceClient(model_id)
+        # Check if the model supports chat completion
+        if model_tasks.get(model_id) == "chat-completion":
+            # Handle as chat completion
+            messages = [{"role": "system", "content": system_message}]
+            for user_msg, assistant_msg in history:
+                if user_msg:  # Only add non-empty messages
+                    messages.append({"role": "user", "content": user_msg})
+                if assistant_msg:  # Only add non-empty messages
+                    messages.append({"role": "assistant", "content": assistant_msg})
+            messages.append({"role": "user", "content": message})
+            response = ""
+            # Stream the response from the client
+            for token_message in client.chat_completion(
+                messages,
+                max_tokens=max_tokens,
+                stream=True,
+                temperature=temperature,
+                top_p=top_p,
+            ):
+                # Safe extraction of token with error handling
+                try:
+                    token = token_message.choices[0].delta.content
+                    if token is not None:  # Handle potential None values
+                        response += token
+                        yield response
+                except (AttributeError, IndexError) as e:
+                    # Handle cases where token structure might be different
+                    print(f"Error extracting token: {e}")
+                    continue
+        else:
+            # Handle as text generation for models that don't support chat completion
+            # Format the prompt manually for text generation
+            formatted_prompt = f"{system_message}\n\n"
+            for user_msg, assistant_msg in history:
+                if user_msg:
+                    formatted_prompt += f"User: {user_msg}\n"
+                if assistant_msg:
+                    formatted_prompt += f"Assistant: {assistant_msg}\n"
+            formatted_prompt += f"User: {message}\nAssistant:"
+            response = ""
+            # Use text generation instead of chat completion
+            for token in client.text_generation(
+                formatted_prompt,
+                max_new_tokens=max_tokens,
+                stream=True,
+                temperature=temperature,
+                top_p=top_p,
+            ):
+                response += token
+                yield response
     except Exception as e:
+        # Return detailed error message if the model call fails
+        error_message = str(e)
+        print(f"Error calling model API: {error_message}")
+        # Check for specific error types and give more helpful messages
+        if "Task not found" in error_message:
+            yield ("Sorry, the selected model doesn't support chat completion. "
+                   "I'm switching to text generation mode. Please try again.")
+        else:
+            yield f"Sorry, there was an error: {error_message}"
 # Custom CSS for styling
 css = """
         </h1>
     </div>
     """)
+    # Status message for API errors
+    status_message = gr.Markdown("", elem_id="status-message")
     with gr.Row():
         # Left sidebar: Model selector