import gradio as gr from huggingface_hub import InferenceClient # Define available models (update with your actual model IDs) model_list = { "Safe LM": "HuggingFaceH4/zephyr-7b-beta", "Baseline 1": "HuggingFaceH4/zephyr-7b-beta", "Another Model": "HuggingFaceH4/zephyr-7b-beta", "LLaMA3.2-1B": "meta-llama/Llama-3.2-1B-Instruct", "Mix IFT V2 - Score0 Rephrased": "locuslab/mix_ift_v2-smollm2-360m-smollm2-360m-score0_mix_rephrased_from_beginning-300B", "Mix IFT V2 - Score0 Only": "locuslab/mix_ift_v2-smollm2-360m-smollm2-360m-score0_only-300B", "Mix IFT V2 - All Raw Folders Metadata": "locuslab/mix_ift_v2-smollm2-360m-smollm2-360m-all_raw_folders_metadata-300B", "Mix IFT V2 - All Raw Folders Baseline": "locuslab/mix_ift_v2-smollm2-360m-smollm2-360m-all_raw_folders_baseline-300B", "Mix IFT V2 - Score0 Only MBS16 GBS1024": "locuslab/mix_ift_v2-smollm2-360m-smollm2-360m-score0_only-300B-mbs16-gbs1024-16feb-lr2e-05-gbs16" } # Dictionary to track which models support chat completion vs. text generation model_tasks = { "HuggingFaceH4/zephyr-7b-beta": "chat-completion", # This model supports chat completion # Add other models that support chat completion } # Default to text-generation for models not specified above def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model): try: # Get the model ID for the selected model model_id = model_list.get(selected_model, "HuggingFaceH4/zephyr-7b-beta") # Create an InferenceClient for the selected model client = InferenceClient(model_id) # Always use text generation for locuslab models if "locuslab" in model_id: # Format the prompt manually for text generation # Simple formatting that works with most models formatted_prompt = "" # Add minimal formatting for better results with research models if len(history) > 0: # Include minimal context from history last_exchanges = history[-1:] # Just use the last exchange for user_msg, assistant_msg in last_exchanges: if user_msg: formatted_prompt += f"{user_msg}\n" # Add current message - keep it simple formatted_prompt += f"{message}" response = "" # Use text generation instead of chat completion print(f"Using text generation with prompt: {formatted_prompt}") for token in client.text_generation( formatted_prompt, max_new_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, do_sample=True # Enable sampling for more creative responses ): response += token yield response else: # Try chat completion for standard models try: messages = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: if user_msg: # Only add non-empty messages messages.append({"role": "user", "content": user_msg}) if assistant_msg: # Only add non-empty messages messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) response = "" # Stream the response from the client for token_message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): # Safe extraction of token with error handling try: token = token_message.choices[0].delta.content if token is not None: # Handle potential None values response += token yield response except (AttributeError, IndexError) as e: # Handle cases where token structure might be different print(f"Error extracting token: {e}") continue except Exception as e: # If chat completion fails, fall back to text generation print(f"Chat completion failed: {e}. Falling back to text generation.") formatted_prompt = f"{system_message}\n\n" for user_msg, assistant_msg in history: if user_msg: formatted_prompt += f"User: {user_msg}\n" if assistant_msg: formatted_prompt += f"Assistant: {assistant_msg}\n" formatted_prompt += f"User: {message}\nAssistant:" response = "" # Use text generation instead of chat completion for token in client.text_generation( formatted_prompt, max_new_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): response += token yield response except Exception as e: # Return detailed error message if the model call fails error_message = str(e) print(f"Error calling model API: {error_message}") yield f"Error: {error_message}. Please try a different model or adjust parameters." # Custom CSS for styling css = """ body { background-color: #f0f5fb; /* Light pastel blue background */ } .gradio-container { background-color: white; border-radius: 16px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); max-width: 90%; margin: 15px auto; padding-bottom: 20px; } /* Header styling with diagonal shield */ .app-header { position: relative; overflow: hidden; } .app-header::before { content: "🛡️"; position: absolute; font-size: 100px; opacity: 0.1; right: -20px; top: -30px; transform: rotate(15deg); pointer-events: none; } /* Simple styling for buttons */ #send-btn { background-color: white !important; color: #333 !important; border: 2px solid #e6c200 !important; } #send-btn:hover { background-color: #fff9e6 !important; } #clear-btn { background-color: white !important; color: #333 !important; border: 2px solid #e6c200 !important; } #clear-btn:hover { background-color: #fff9e6 !important; } /* Hide elements */ footer { display: none !important; } .footer { display: none !important; } """ with gr.Blocks(css=css) as demo: # Custom header with branding gr.HTML("""