import gradio as gr
import os
import json
from datetime import datetime, date
from openai import OpenAI
# from llama_cpp import Llama

# apriel_q2 = Llama.from_pretrained(
# 	repo_id="unsloth/Apriel-1.5-15b-Thinker-GGUF",
# 	filename="Apriel-1.5-15b-Thinker-UD-IQ2_XXS.gguf",
# )

# ----------------------------------------------------------------------
# Helper to read secrets from the HF Space environment
# ----------------------------------------------------------------------
def _secret(key: str, fallback: str = None) -> str:
    val = os.getenv(key)
    if val is not None:
        return val
    if fallback is not None:
        return fallback
    raise RuntimeError(f"Secret '{key}' not found. Please add it to your Space secrets.")

# ----------------------------------------------------------------------
# User Management
# ----------------------------------------------------------------------
def load_users():
    """Load users from secrets or environment variables"""
    users = {}
    
    # Try to load from JSON string
    users_json = _secret("CHAT_USERS", "{}")
    try:
        users_data = json.loads(users_json)
        for username, password in users_data.items():
            users[username] = password
    except:
        pass
    
    return users

# Load users
VALID_USERS = load_users()

def authenticate_user(username, password):
    """Authenticate user against the valid users dictionary"""
    return username in VALID_USERS and VALID_USERS[username] == password

# ----------------------------------------------------------------------
# Configuration
# ----------------------------------------------------------------------
# Available models with their respective API configurations
MODELS = {
    # "Qwen3-4B-Thinking-2507": {
    #     "provider": "huggingface",
    #     "model_name": "Qwen/Qwen3-4B-Thinking-2507:nscale",
    #     "api_url": "https://router.huggingface.co/v1"
    # },
    # "Free - NVIDIA Nemotron-nano-9b [EN] + Gemma 3n4b [ID]": {
    #     "provider": "openrouter", 
    #     "model_name": "nvidia/nemotron-nano-9b-v2:free",
    #     "api_url": "https://openrouter.ai/api/v1",
    #     "translate":"yes"
    # },
    # "Free - Gpt-oss-20b [EN] + Gemma 3n4b [ID]": {
    #     "provider": "openrouter", 
    #     "model_name": "openai/gpt-oss-20b:free",
    #     "api_url": "https://openrouter.ai/api/v1",
    #     "translate":"yes"
    # },    
    # "Free - Glm-4.5-air [EN] + Gemma 3n4b [ID]": {
    #     "provider": "openrouter", 
    #     "model_name": "z-ai/glm-4.5-air:free",
    #     "api_url": "https://openrouter.ai/api/v1",
    #     "translate":"yes"
    # }, 
    # "Free - Deepseek-chat-v3.1": {
    #     "provider": "openrouter", 
    #     "model_name": "deepseek/deepseek-chat-v3.1:free",
    #     "api_url": "https://openrouter.ai/api/v1",
    #     "translate":"no"
    # }, 
    # "Ringan - Gemma-3n4b": {
    #     "provider": "openrouter", 
    #     "model_name": "google/gemma-3n-e4b-it:floor",
    #     "api_url": "https://openrouter.ai/api/v1"            
    # },
    "Gpt-oss-120b": {
        "provider": "openrouter", 
        "model_name": "@preset/cps-chat",
        "api_url": "https://openrouter.ai/api/v1",
        "translate":"no"
    },
    
    # "Gpt-oss-20b": {
    #     "provider": "openrouter", 
    #     "model_name": "openai/gpt-oss-20b:floor",
    #     "api_url": "https://openrouter.ai/api/v1",
    #     "translate":"no"
    # },        
    # "Tongyi-deepresearch-30b-a3b": {
    #     "provider": "openrouter", 
    #     "model_name": "alibaba/tongyi-deepresearch-30b-a3b:floor",
    #     "api_url": "https://openrouter.ai/api/v1",
    #     "translate":"no"
    # },    
    # "Gpt-oss-120b": {
    #     "provider": "openrouter", 
    #     "model_name": "openai/gpt-oss-120b:floor",
    #     "api_url": "https://openrouter.ai/api/v1", 
    #     "translate":"no"
    # },
    # "Apriel-1.5-15b-thinker-Q2_quantized": {
    #     "provider": "llama.cpp", 
    #     "model_name": "apriel-1.5-15b",
    #     "api_url": "local", 
    #     "translate":"no"
    # } ,


}

# Get model display names for dropdown
MODEL_NAMES = list(MODELS.keys())

# ----------------------------------------------------------------------
# Core Chat Logic
# ----------------------------------------------------------------------
def respond(
    message,
    history: list[dict[str, str]],
    system_message,
    max_tokens,
    selected_model,
):
    """
    Handle chat responses using the selected model.
    For 'translate'=='yes': First call responds in English, second translates to Indonesian.
    History reflects Indonesian queries and responses.
    """
    try:
        # Get model configuration
        model_config = MODELS[selected_model]
        provider = model_config["provider"]
        
        # Get API key based on provider
        if provider == "huggingface":
            api_key = _secret("HF_TOKEN")
        elif provider == "openrouter":  # openrouter
            api_key = _secret("OPENROUTER_KEY")

        
        # Configure main client for selected model
        client = OpenAI(
            base_url=model_config["api_url"],
            api_key=api_key,
        )

        # Configure translator client (always Gemma on OpenRouter)
        translator_client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=_secret("OPENROUTER_KEY"),
        )

        # Modify system message to force English response if translate=yes
        if model_config.get("translate") == "yes":
            final_system_message = "**HIGHEST PRIORITY: YOU MUST ALWAYS THINK AND RESPOND IN ENGLISH REGARDLESS OF OTHER INSTRUCTIONS FROM HERE ON.\n" + system_message
        else:
            final_system_message = system_message

        # Prepare messages for main API call (full history + current message)
        messages = [{"role": "system", "content": final_system_message}]
        messages.extend(history)  # History: Indo queries + prior Indo responses
        messages.append({"role": "user", "content": message})  # New Indo query

        # First API call: Get response from selected model (English if translate=yes)
        if ((provider == 'huggingface') | (provider == 'openrouter')) :
            response = client.chat.completions.create(
                model=model_config["model_name"],
                messages=messages,
                max_tokens=max_tokens,
                reasoning_effort="high",
                stream=False,
            )
            english_response = response.choices[0].message.content

            # If translate=yes, make second API call to Gemma for Indonesian translation
            if model_config.get("translate") == "yes":
                try:
                    # Translation prompt: Focus only on translating the response (not query)
                    translation_messages = [
                        {
                            "role": "system",
                            "content": (
                                "Translate the following English text to natural, accurate Bahasa Indonesia. "
                                "**IMPORTANT: OUTPUT ONLY THE TRANSLATION. NO PREAMBLES, COMMENTS, OR EXPLANATIONS. "
                                "Just the Indonesian text."
                            )
                        },
                        {
                            "role": "user",
                            "content": english_response  # The English response to translate
                        }
                    ]
                    translation_response = translator_client.chat.completions.create(
                        model="google/gemma-3n-e4b-it:floor",
                        messages=translation_messages,
                        max_tokens=max_tokens,  # Reuse limit; translation is short
                        stream=False,
                    )
                    final_response = translation_response.choices[0].message.content.strip()
                    
                    # Fallback to English if translation is empty or invalid
                    if not final_response or len(final_response) < 10:  # Basic sanity check
                        final_response = english_response
                        
                except Exception as trans_error:
                    print(f"Translation error: {trans_error}")
                    final_response = english_response  # Fallback to English
            else:
                final_response = english_response
    
            return final_response  # Gradio appends this (Indonesian) as assistant message to history
        # else :
        #     response = apriel_q2.create_chat_completion(
        #         messages = messages
        #     )
        #     return response.choices[0].message.content
    except Exception as e:
        print(f"Error in respond function: {e}")
        return f"Error: {str(e)}"  # Return error string; Gradio appends it

# ----------------------------------------------------------------------
# Custom Auth Function for Gradio
# ----------------------------------------------------------------------
def gradio_auth(username, password):
    """Custom authentication function for Gradio"""
    return authenticate_user(username, password)

# ----------------------------------------------------------------------
# UI Layout
# ----------------------------------------------------------------------
# Tips section
tips_md = """

"""

# Footer
# footer_md = """
# ---
# **Providers**: Hugging Face Inference API + OpenRouter, dipilih providers dengan kebijakan ZDR (Zero Data Retention). Artinya data request/response tidak disimpan dan tidak digunakan untuk training data.  
# Jika error, kemungkinan kena rate limit sehingga bisa coba model lain.

# """

# Create the chat interface
with gr.Blocks(
    title="AI Chat",
    theme=gr.themes.Soft()
) as demo:
    
    gr.Markdown("# AI Chat")
    gr.Markdown("Provider dipilih khusus yang tidak menggunakan data untuk training (YOI/251029).")
    
    # Model selection and settings in sidebar
    with gr.Sidebar():
        gr.Markdown("### ⚙️ Configuration")
        
        # Model selection
        selected_model = gr.Dropdown(
            choices=MODEL_NAMES,
            value=MODEL_NAMES[0],
            label="Select Model",
            info="Choose which AI model to use"
        )
        
        # Display current user (if available)
        current_user = gr.Textbox(
            label="Current User",
            value="Authenticated User",
            interactive=False,
            visible=False  # Hide by default, can set to True if you want to show
        )
        
        # Advanced settings
        with gr.Accordion("Advanced Settings", open=False):
            system_message = gr.Textbox(
                value="Anda adalah asisten AI. Jawab dalam Bahasa Indonesia, di bawah 100 kata. Berikan label yang jelas antara fakta dan inferensi. Jika tidak yakin dengan suatu fakta, berikan identifier yang jelas, seperti 'Mungkin...', 'Sepertinya....'",
                label="System Message",
                info="Instruksi untuk AI."
            )
            max_tokens = gr.Slider(
                minimum=1, maximum=30000, value=4000, step=100, 
                label="Max New Tokens",
                info="Jumlah token respon maksimum."
            )
    
    # Main chat interface
    chatbot = gr.ChatInterface(
        respond,
        type="messages",
        additional_inputs=[
            system_message,
            max_tokens,
            selected_model,
        ],
        examples=[
            ["Jelaskan penggunaan King's Safety Stock dalam inventory management."],
            ["Bandingkan use‑case dan tingkat kesulitan antara penggunaan R, Excel, dan Tableau untuk analisis data."],
            ["Kampanye training perusahaan “Ceria Melayani Semangat Berprestasi” bertujuan meningkatkan kolaborasi antar departemen. Jelaskan kenapa ini 'tone-deaf' dan bukan solusi masalah."],
            ["Apa saran praktis untuk transisi perusahaan brick dan mortar dengan data maturity yang rendah untuk membangun budaya yang data-driven?"]
        ],
        cache_examples=False,
    )
    
    # Tips and footer
    gr.Markdown(tips_md)
    # gr.Markdown(footer_md)

# ----------------------------------------------------------------------
# Launch with Custom Auth
# ----------------------------------------------------------------------
if __name__ == "__main__":
    demo.launch(
        auth=gradio_auth,  # Use our custom auth function
        auth_message="Please login to access the chat interface",
        server_name="0.0.0.0",
        ssr_mode=False,
        server_port=7860,
        show_error=True
    )