Spaces:

locuslab
/

safe-playground

Sleeping

App Files Files Community

pratyushmaini commited on Feb 21

Commit

c731b5a

1 Parent(s): 6da6bfa

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -47

app.py CHANGED Viewed

@@ -29,74 +29,99 @@ def respond(message, history, system_message, max_tokens, temperature, top_p, se
         # Create an InferenceClient for the selected model
         client = InferenceClient(model_id)
-        # Check if the model supports chat completion
-        if model_tasks.get(model_id) == "chat-completion":
-            # Handle as chat completion
-            messages = [{"role": "system", "content": system_message}]
-            for user_msg, assistant_msg in history:
-                if user_msg:  # Only add non-empty messages
-                    messages.append({"role": "user", "content": user_msg})
-                if assistant_msg:  # Only add non-empty messages
-                    messages.append({"role": "assistant", "content": assistant_msg})
-            messages.append({"role": "user", "content": message})
-            response = ""
-            # Stream the response from the client
-            for token_message in client.chat_completion(
-                messages,
-                max_tokens=max_tokens,
-                stream=True,
-                temperature=temperature,
-                top_p=top_p,
-            ):
-                # Safe extraction of token with error handling
-                try:
-                    token = token_message.choices[0].delta.content
-                    if token is not None:  # Handle potential None values
-                        response += token
-                        yield response
-                except (AttributeError, IndexError) as e:
-                    # Handle cases where token structure might be different
-                    print(f"Error extracting token: {e}")
-                    continue
-        else:
-            # Handle as text generation for models that don't support chat completion
             # Format the prompt manually for text generation
-            formatted_prompt = f"{system_message}\n\n"
-            for user_msg, assistant_msg in history:
-                if user_msg:
-                    formatted_prompt += f"User: {user_msg}\n"
-                if assistant_msg:
-                    formatted_prompt += f"Assistant: {assistant_msg}\n"
-            formatted_prompt += f"User: {message}\nAssistant:"
             response = ""
             # Use text generation instead of chat completion
             for token in client.text_generation(
                 formatted_prompt,
                 max_new_tokens=max_tokens,
                 stream=True,
                 temperature=temperature,
                 top_p=top_p,
             ):
                 response += token
                 yield response
     except Exception as e:
         # Return detailed error message if the model call fails
         error_message = str(e)
         print(f"Error calling model API: {error_message}")
-        # Check for specific error types and give more helpful messages
-        if "Task not found" in error_message:
-            yield ("Sorry, the selected model doesn't support chat completion. "
-                   "I'm switching to text generation mode. Please try again.")
-        else:
-            yield f"Sorry, there was an error: {error_message}"
 # Custom CSS for styling

         # Create an InferenceClient for the selected model
         client = InferenceClient(model_id)
+        # Always use text generation for locuslab models
+        if "locuslab" in model_id:
             # Format the prompt manually for text generation
+            # Simple formatting that works with most models
+            formatted_prompt = ""
+            # Add minimal formatting for better results with research models
+            if len(history) > 0:
+                # Include minimal context from history
+                last_exchanges = history[-1:]  # Just use the last exchange
+                for user_msg, assistant_msg in last_exchanges:
+                    if user_msg:
+                        formatted_prompt += f"{user_msg}\n"
+            # Add current message - keep it simple
+            formatted_prompt += f"{message}"
             response = ""
             # Use text generation instead of chat completion
+            print(f"Using text generation with prompt: {formatted_prompt}")
             for token in client.text_generation(
                 formatted_prompt,
                 max_new_tokens=max_tokens,
                 stream=True,
                 temperature=temperature,
                 top_p=top_p,
+                do_sample=True  # Enable sampling for more creative responses
             ):
                 response += token
                 yield response
+        else:
+            # Try chat completion for standard models
+            try:
+                messages = [{"role": "system", "content": system_message}]
+                for user_msg, assistant_msg in history:
+                    if user_msg:  # Only add non-empty messages
+                        messages.append({"role": "user", "content": user_msg})
+                    if assistant_msg:  # Only add non-empty messages
+                        messages.append({"role": "assistant", "content": assistant_msg})
+                messages.append({"role": "user", "content": message})
+                response = ""
+                # Stream the response from the client
+                for token_message in client.chat_completion(
+                    messages,
+                    max_tokens=max_tokens,
+                    stream=True,
+                    temperature=temperature,
+                    top_p=top_p,
+                ):
+                    # Safe extraction of token with error handling
+                    try:
+                        token = token_message.choices[0].delta.content
+                        if token is not None:  # Handle potential None values
+                            response += token
+                            yield response
+                    except (AttributeError, IndexError) as e:
+                        # Handle cases where token structure might be different
+                        print(f"Error extracting token: {e}")
+                        continue
+            except Exception as e:
+                # If chat completion fails, fall back to text generation
+                print(f"Chat completion failed: {e}. Falling back to text generation.")
+                formatted_prompt = f"{system_message}\n\n"
+                for user_msg, assistant_msg in history:
+                    if user_msg:
+                        formatted_prompt += f"User: {user_msg}\n"
+                    if assistant_msg:
+                        formatted_prompt += f"Assistant: {assistant_msg}\n"
+                formatted_prompt += f"User: {message}\nAssistant:"
+                response = ""
+                # Use text generation instead of chat completion
+                for token in client.text_generation(
+                    formatted_prompt,
+                    max_new_tokens=max_tokens,
+                    stream=True,
+                    temperature=temperature,
+                    top_p=top_p,
+                ):
+                    response += token
+                    yield response
     except Exception as e:
         # Return detailed error message if the model call fails
         error_message = str(e)
         print(f"Error calling model API: {error_message}")
+        yield f"Error: {error_message}. Please try a different model or adjust parameters."
 # Custom CSS for styling