import gradio as gr
import os
import requests
import json

# Try to import transformers, fall back to API if not available
try:
    from transformers import pipeline
    import torch
    TRANSFORMERS_AVAILABLE = True
except ImportError:
    TRANSFORMERS_AVAILABLE = False

# Configuration
model_id = "Modade6787/gptoss-mymodel"  # Your custom GPT-OSS model

# Initialize the model pipeline if transformers is available
pipe = None
if TRANSFORMERS_AVAILABLE:
    try:
        # First try with auto detection
        pipe = pipeline(
            "text-generation",
            model=model_id,
            torch_dtype=torch.bfloat16,
            device_map="auto",
            trust_remote_code=True
        )
        MODEL_LOADED = True
        print(f"✅ Model {model_id} loaded successfully with transformers!")
    except Exception as e:
        print(f"❌ Failed to load model with auto detection: {e}")
        try:
            # Try loading as GPT-OSS model type specifically
            from transformers import AutoTokenizer, AutoModelForCausalLM
            print("🔄 Trying to load as GPT-OSS model...")
            
            tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
            model = AutoModelForCausalLM.from_pretrained(
                model_id, 
                torch_dtype=torch.bfloat16,
                device_map="auto",
                trust_remote_code=True
            )
            
            # Create a simple pipeline manually
            class SimpleGPTOSSPipeline:
                def __init__(self, model, tokenizer):
                    self.model = model
                    self.tokenizer = tokenizer
                
                def __call__(self, prompt, max_new_tokens=200, temperature=0.7, do_sample=True, **kwargs):
                    if isinstance(prompt, list) and len(prompt) > 0 and isinstance(prompt[0], dict):
                        # Handle chat format
                        text = prompt[0].get("content", str(prompt))
                    else:
                        text = str(prompt)
                    
                    inputs = self.tokenizer(text, return_tensors="pt")
                    if torch.cuda.is_available():
                        inputs = {k: v.cuda() for k, v in inputs.items()}
                    
                    with torch.no_grad():
                        outputs = self.model.generate(
                            **inputs,
                            max_new_tokens=max_new_tokens,
                            temperature=temperature,
                            do_sample=do_sample,
                            pad_token_id=self.tokenizer.eos_token_id
                        )
                    
                    generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
                    # Remove the input prompt from the output
                    generated_text = generated_text[len(text):].strip()
                    
                    return [{"generated_text": generated_text}]
            
            pipe = SimpleGPTOSSPipeline(model, tokenizer)
            MODEL_LOADED = True
            print(f"✅ Model {model_id} loaded successfully with custom GPT-OSS handling!")
            
        except Exception as e2:
            MODEL_LOADED = False
            print(f"❌ Failed to load model with GPT-OSS handling: {e2}")
            print("📡 Falling back to API mode...")
else:
    MODEL_LOADED = False
    print("📦 Transformers not available, using API mode...")

def generate_text_transformers(prompt):
    """Generate text using transformers pipeline"""
    try:
        # Handle both standard pipeline and custom GPT-OSS pipeline
        if hasattr(pipe, 'tokenizer'):
            # Custom pipeline
            outputs = pipe(prompt, max_new_tokens=200, temperature=0.7, do_sample=True)
        else:
            # Standard pipeline
            messages = [{"role": "user", "content": prompt}]
            outputs = pipe(
                messages,
                max_new_tokens=200,
                temperature=0.7,
                do_sample=True,
                pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe, 'tokenizer') else None
            )
        
        if outputs and len(outputs) > 0:
            if hasattr(pipe, 'tokenizer'):
                # Custom pipeline format
                return outputs[0].get("generated_text", "No text generated")
            else:
                # Standard pipeline format
                if isinstance(outputs[0], dict) and "generated_text" in outputs[0]:
                    if isinstance(outputs[0]["generated_text"], list):
                        return outputs[0]["generated_text"][-1].get("content", "No text generated")
                    else:
                        return outputs[0]["generated_text"]
                return str(outputs[0])
        else:
            return "No text generated"
            
    except Exception as e:
        return f"Error generating text: {str(e)}"

def generate_text_api_fallback(prompt, hf_token=None):
    """Fallback to direct API call to OpenAI or other providers"""
    try:
        # Try a simple completion approach
        api_url = f"https://api-inference.huggingface.co/models/{model_id}"
        headers = {"Content-Type": "application/json"}
        if hf_token:
            headers["Authorization"] = f"Bearer {hf_token}"
        
        payload = {
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": 200,
                "temperature": 0.7,
                "do_sample": True,
                "return_full_text": False
            }
        }
        
        response = requests.post(api_url, headers=headers, json=payload, timeout=60)
        
        if response.status_code == 200:
            result = response.json()
            if isinstance(result, list) and len(result) > 0:
                return result[0].get("generated_text", "No text generated")
            return str(result)
        else:
            # If HF API fails, provide a helpful message
            return f"""⚠️ Model not accessible via API (Status: {response.status_code})

This could be because:
1. The GPT-OSS model requires special handling not available in standard HF Inference API
2. The model might need to be loaded locally with the transformers library
3. You might need a valid Hugging Face token

To run this properly, you would need to:
- Install: pip install transformers torch
- The app will automatically use the transformers library when available

For now, here's a demo response to your prompt: "{prompt}"

This is a placeholder response. The actual GPT-OSS model would provide much more sophisticated output."""
            
    except requests.exceptions.Timeout:
        return "⏰ Request timed out. The model might be loading or busy."
    except Exception as e:
        return f"🔧 Technical details: {str(e)}"

def generate_text(prompt, hf_token=""):
    """Main generation function"""
    if not prompt.strip():
        return "Please enter a prompt."
    
    if MODEL_LOADED and pipe:
        return generate_text_transformers(prompt)
    else:
        return generate_text_api_fallback(prompt, hf_token if hf_token.strip() else None)

# Create the Gradio interface
with gr.Blocks(title="GPT-OSS Text Generator") as demo:
    gr.Markdown("# 🤖 GPT-OSS Text Generator")
    
    if MODEL_LOADED:
        gr.Markdown(f"✅ **Status**: Model `{model_id}` loaded successfully!")
        gr.Markdown("🚀 Using transformers library for optimal performance")
    else:
        gr.Markdown(f"📡 **Status**: Using API mode for `{model_id}`")
        gr.Markdown("💡 Install `transformers` and `torch` for better performance")
    
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="Enter your prompt:",
                lines=5,
                placeholder="Type your prompt here...",
                value="Explain quantum computing in simple terms."
            )
            
            if not MODEL_LOADED:
                hf_token_input = gr.Textbox(
                    label="Hugging Face Token (optional):",
                    type="password",
                    placeholder="Your HF token for API access..."
                )
            else:
                hf_token_input = gr.Textbox(visible=False, value="")
            
            generate_btn = gr.Button("🎯 Generate", variant="primary", size="lg")
            
            gr.Markdown("""
            ### 💡 Tips:
            - Try different types of prompts: questions, creative writing, code, etc.
            - The model works best with clear, specific prompts
            - For better performance, install: `pip install transformers torch`
            """)
            
        with gr.Column():
            output_text = gr.Textbox(
                label="Generated text:",
                lines=15,
                interactive=False,
                placeholder="Generated text will appear here..."
            )
    
    generate_btn.click(
        fn=generate_text,
        inputs=[prompt_input, hf_token_input],
        outputs=output_text
    )
    
    # Example prompts
    gr.Examples(
        examples=[
            ["Write a short story about a robot learning to paint."],
            ["Explain how neural networks work."],
            ["Write a Python function to sort a list of dictionaries."],
            ["What are the benefits of renewable energy?"],
            ["Create a haiku about artificial intelligence."]
        ],
        inputs=prompt_input,
        label="📝 Example Prompts"
    )

if __name__ == "__main__":
    demo.launch()