import gradio as gr import os import json from datetime import datetime, date from openai import OpenAI # from llama_cpp import Llama # apriel_q2 = Llama.from_pretrained( # repo_id="unsloth/Apriel-1.5-15b-Thinker-GGUF", # filename="Apriel-1.5-15b-Thinker-UD-IQ2_XXS.gguf", # ) # ---------------------------------------------------------------------- # Helper to read secrets from the HF Space environment # ---------------------------------------------------------------------- def _secret(key: str, fallback: str = None) -> str: val = os.getenv(key) if val is not None: return val if fallback is not None: return fallback raise RuntimeError(f"Secret '{key}' not found. Please add it to your Space secrets.") # ---------------------------------------------------------------------- # User Management # ---------------------------------------------------------------------- def load_users(): """Load users from secrets or environment variables""" users = {} # Try to load from JSON string users_json = _secret("CHAT_USERS", "{}") try: users_data = json.loads(users_json) for username, password in users_data.items(): users[username] = password except: pass return users # Load users VALID_USERS = load_users() def authenticate_user(username, password): """Authenticate user against the valid users dictionary""" return username in VALID_USERS and VALID_USERS[username] == password # ---------------------------------------------------------------------- # Configuration # ---------------------------------------------------------------------- # Available models with their respective API configurations MODELS = { # "Qwen3-4B-Thinking-2507": { # "provider": "huggingface", # "model_name": "Qwen/Qwen3-4B-Thinking-2507:nscale", # "api_url": "https://router.huggingface.co/v1" # }, # "Free - NVIDIA Nemotron-nano-9b [EN] + Gemma 3n4b [ID]": { # "provider": "openrouter", # "model_name": "nvidia/nemotron-nano-9b-v2:free", # "api_url": "https://openrouter.ai/api/v1", # "translate":"yes" # }, # "Free - Gpt-oss-20b [EN] + Gemma 3n4b [ID]": { # "provider": "openrouter", # "model_name": "openai/gpt-oss-20b:free", # "api_url": "https://openrouter.ai/api/v1", # "translate":"yes" # }, # "Free - Glm-4.5-air [EN] + Gemma 3n4b [ID]": { # "provider": "openrouter", # "model_name": "z-ai/glm-4.5-air:free", # "api_url": "https://openrouter.ai/api/v1", # "translate":"yes" # }, # "Free - Deepseek-chat-v3.1": { # "provider": "openrouter", # "model_name": "deepseek/deepseek-chat-v3.1:free", # "api_url": "https://openrouter.ai/api/v1", # "translate":"no" # }, # "Ringan - Gemma-3n4b": { # "provider": "openrouter", # "model_name": "google/gemma-3n-e4b-it:floor", # "api_url": "https://openrouter.ai/api/v1" # }, "Gpt-oss-120b": { "provider": "openrouter", "model_name": "@preset/cps-chat", "api_url": "https://openrouter.ai/api/v1", "translate":"no" }, # "Gpt-oss-20b": { # "provider": "openrouter", # "model_name": "openai/gpt-oss-20b:floor", # "api_url": "https://openrouter.ai/api/v1", # "translate":"no" # }, # "Tongyi-deepresearch-30b-a3b": { # "provider": "openrouter", # "model_name": "alibaba/tongyi-deepresearch-30b-a3b:floor", # "api_url": "https://openrouter.ai/api/v1", # "translate":"no" # }, # "Gpt-oss-120b": { # "provider": "openrouter", # "model_name": "openai/gpt-oss-120b:floor", # "api_url": "https://openrouter.ai/api/v1", # "translate":"no" # }, # "Apriel-1.5-15b-thinker-Q2_quantized": { # "provider": "llama.cpp", # "model_name": "apriel-1.5-15b", # "api_url": "local", # "translate":"no" # } , } # Get model display names for dropdown MODEL_NAMES = list(MODELS.keys()) # ---------------------------------------------------------------------- # Core Chat Logic # ---------------------------------------------------------------------- def respond( message, history: list[dict[str, str]], system_message, max_tokens, selected_model, ): """ Handle chat responses using the selected model. For 'translate'=='yes': First call responds in English, second translates to Indonesian. History reflects Indonesian queries and responses. """ try: # Get model configuration model_config = MODELS[selected_model] provider = model_config["provider"] # Get API key based on provider if provider == "huggingface": api_key = _secret("HF_TOKEN") elif provider == "openrouter": # openrouter api_key = _secret("OPENROUTER_KEY") # Configure main client for selected model client = OpenAI( base_url=model_config["api_url"], api_key=api_key, ) # Configure translator client (always Gemma on OpenRouter) translator_client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=_secret("OPENROUTER_KEY"), ) # Modify system message to force English response if translate=yes if model_config.get("translate") == "yes": final_system_message = "**HIGHEST PRIORITY: YOU MUST ALWAYS THINK AND RESPOND IN ENGLISH REGARDLESS OF OTHER INSTRUCTIONS FROM HERE ON.\n" + system_message else: final_system_message = system_message # Prepare messages for main API call (full history + current message) messages = [{"role": "system", "content": final_system_message}] messages.extend(history) # History: Indo queries + prior Indo responses messages.append({"role": "user", "content": message}) # New Indo query # First API call: Get response from selected model (English if translate=yes) if ((provider == 'huggingface') | (provider == 'openrouter')) : response = client.chat.completions.create( model=model_config["model_name"], messages=messages, max_tokens=max_tokens, reasoning_effort="high", stream=False, ) english_response = response.choices[0].message.content # If translate=yes, make second API call to Gemma for Indonesian translation if model_config.get("translate") == "yes": try: # Translation prompt: Focus only on translating the response (not query) translation_messages = [ { "role": "system", "content": ( "Translate the following English text to natural, accurate Bahasa Indonesia. " "**IMPORTANT: OUTPUT ONLY THE TRANSLATION. NO PREAMBLES, COMMENTS, OR EXPLANATIONS. " "Just the Indonesian text." ) }, { "role": "user", "content": english_response # The English response to translate } ] translation_response = translator_client.chat.completions.create( model="google/gemma-3n-e4b-it:floor", messages=translation_messages, max_tokens=max_tokens, # Reuse limit; translation is short stream=False, ) final_response = translation_response.choices[0].message.content.strip() # Fallback to English if translation is empty or invalid if not final_response or len(final_response) < 10: # Basic sanity check final_response = english_response except Exception as trans_error: print(f"Translation error: {trans_error}") final_response = english_response # Fallback to English else: final_response = english_response return final_response # Gradio appends this (Indonesian) as assistant message to history # else : # response = apriel_q2.create_chat_completion( # messages = messages # ) # return response.choices[0].message.content except Exception as e: print(f"Error in respond function: {e}") return f"Error: {str(e)}" # Return error string; Gradio appends it # ---------------------------------------------------------------------- # Custom Auth Function for Gradio # ---------------------------------------------------------------------- def gradio_auth(username, password): """Custom authentication function for Gradio""" return authenticate_user(username, password) # ---------------------------------------------------------------------- # UI Layout # ---------------------------------------------------------------------- # Tips section tips_md = """ """ # Footer # footer_md = """ # --- # **Providers**: Hugging Face Inference API + OpenRouter, dipilih providers dengan kebijakan ZDR (Zero Data Retention). Artinya data request/response tidak disimpan dan tidak digunakan untuk training data. # Jika error, kemungkinan kena rate limit sehingga bisa coba model lain. # """ # Create the chat interface with gr.Blocks( title="AI Chat", theme=gr.themes.Soft() ) as demo: gr.Markdown("# AI Chat") gr.Markdown("Provider dipilih khusus yang tidak menggunakan data untuk training (YOI/251029).") # Model selection and settings in sidebar with gr.Sidebar(): gr.Markdown("### ⚙️ Configuration") # Model selection selected_model = gr.Dropdown( choices=MODEL_NAMES, value=MODEL_NAMES[0], label="Select Model", info="Choose which AI model to use" ) # Display current user (if available) current_user = gr.Textbox( label="Current User", value="Authenticated User", interactive=False, visible=False # Hide by default, can set to True if you want to show ) # Advanced settings with gr.Accordion("Advanced Settings", open=False): system_message = gr.Textbox( value="Anda adalah asisten AI. Jawab dalam Bahasa Indonesia, di bawah 100 kata. Berikan label yang jelas antara fakta dan inferensi. Jika tidak yakin dengan suatu fakta, berikan identifier yang jelas, seperti 'Mungkin...', 'Sepertinya....'", label="System Message", info="Instruksi untuk AI." ) max_tokens = gr.Slider( minimum=1, maximum=30000, value=4000, step=100, label="Max New Tokens", info="Jumlah token respon maksimum." ) # Main chat interface chatbot = gr.ChatInterface( respond, type="messages", additional_inputs=[ system_message, max_tokens, selected_model, ], examples=[ ["Jelaskan penggunaan King's Safety Stock dalam inventory management."], ["Bandingkan use‑case dan tingkat kesulitan antara penggunaan R, Excel, dan Tableau untuk analisis data."], ["Kampanye training perusahaan “Ceria Melayani Semangat Berprestasi” bertujuan meningkatkan kolaborasi antar departemen. Jelaskan kenapa ini 'tone-deaf' dan bukan solusi masalah."], ["Apa saran praktis untuk transisi perusahaan brick dan mortar dengan data maturity yang rendah untuk membangun budaya yang data-driven?"] ], cache_examples=False, ) # Tips and footer gr.Markdown(tips_md) # gr.Markdown(footer_md) # ---------------------------------------------------------------------- # Launch with Custom Auth # ---------------------------------------------------------------------- if __name__ == "__main__": demo.launch( auth=gradio_auth, # Use our custom auth function auth_message="Please login to access the chat interface", server_name="0.0.0.0", ssr_mode=False, server_port=7860, show_error=True )