import gradio as gr import os import requests import json # Try to import transformers, fall back to API if not available try: from transformers import pipeline import torch TRANSFORMERS_AVAILABLE = True except ImportError: TRANSFORMERS_AVAILABLE = False # Configuration model_id = "Modade6787/gptoss-mymodel" # Your custom GPT-OSS model # Initialize the model pipeline if transformers is available pipe = None if TRANSFORMERS_AVAILABLE: try: # First try with auto detection pipe = pipeline( "text-generation", model=model_id, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True ) MODEL_LOADED = True print(f"✅ Model {model_id} loaded successfully with transformers!") except Exception as e: print(f"❌ Failed to load model with auto detection: {e}") try: # Try loading as GPT-OSS model type specifically from transformers import AutoTokenizer, AutoModelForCausalLM print("🔄 Trying to load as GPT-OSS model...") tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True ) # Create a simple pipeline manually class SimpleGPTOSSPipeline: def __init__(self, model, tokenizer): self.model = model self.tokenizer = tokenizer def __call__(self, prompt, max_new_tokens=200, temperature=0.7, do_sample=True, **kwargs): if isinstance(prompt, list) and len(prompt) > 0 and isinstance(prompt[0], dict): # Handle chat format text = prompt[0].get("content", str(prompt)) else: text = str(prompt) inputs = self.tokenizer(text, return_tensors="pt") if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} with torch.no_grad(): outputs = self.model.generate( **inputs, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=do_sample, pad_token_id=self.tokenizer.eos_token_id ) generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) # Remove the input prompt from the output generated_text = generated_text[len(text):].strip() return [{"generated_text": generated_text}] pipe = SimpleGPTOSSPipeline(model, tokenizer) MODEL_LOADED = True print(f"✅ Model {model_id} loaded successfully with custom GPT-OSS handling!") except Exception as e2: MODEL_LOADED = False print(f"❌ Failed to load model with GPT-OSS handling: {e2}") print("📡 Falling back to API mode...") else: MODEL_LOADED = False print("📦 Transformers not available, using API mode...") def generate_text_transformers(prompt): """Generate text using transformers pipeline""" try: # Handle both standard pipeline and custom GPT-OSS pipeline if hasattr(pipe, 'tokenizer'): # Custom pipeline outputs = pipe(prompt, max_new_tokens=200, temperature=0.7, do_sample=True) else: # Standard pipeline messages = [{"role": "user", "content": prompt}] outputs = pipe( messages, max_new_tokens=200, temperature=0.7, do_sample=True, pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe, 'tokenizer') else None ) if outputs and len(outputs) > 0: if hasattr(pipe, 'tokenizer'): # Custom pipeline format return outputs[0].get("generated_text", "No text generated") else: # Standard pipeline format if isinstance(outputs[0], dict) and "generated_text" in outputs[0]: if isinstance(outputs[0]["generated_text"], list): return outputs[0]["generated_text"][-1].get("content", "No text generated") else: return outputs[0]["generated_text"] return str(outputs[0]) else: return "No text generated" except Exception as e: return f"Error generating text: {str(e)}" def generate_text_api_fallback(prompt, hf_token=None): """Fallback to direct API call to OpenAI or other providers""" try: # Try a simple completion approach api_url = f"https://api-inference.huggingface.co/models/{model_id}" headers = {"Content-Type": "application/json"} if hf_token: headers["Authorization"] = f"Bearer {hf_token}" payload = { "inputs": prompt, "parameters": { "max_new_tokens": 200, "temperature": 0.7, "do_sample": True, "return_full_text": False } } response = requests.post(api_url, headers=headers, json=payload, timeout=60) if response.status_code == 200: result = response.json() if isinstance(result, list) and len(result) > 0: return result[0].get("generated_text", "No text generated") return str(result) else: # If HF API fails, provide a helpful message return f"""⚠️ Model not accessible via API (Status: {response.status_code}) This could be because: 1. The GPT-OSS model requires special handling not available in standard HF Inference API 2. The model might need to be loaded locally with the transformers library 3. You might need a valid Hugging Face token To run this properly, you would need to: - Install: pip install transformers torch - The app will automatically use the transformers library when available For now, here's a demo response to your prompt: "{prompt}" This is a placeholder response. The actual GPT-OSS model would provide much more sophisticated output.""" except requests.exceptions.Timeout: return "⏰ Request timed out. The model might be loading or busy." except Exception as e: return f"🔧 Technical details: {str(e)}" def generate_text(prompt, hf_token=""): """Main generation function""" if not prompt.strip(): return "Please enter a prompt." if MODEL_LOADED and pipe: return generate_text_transformers(prompt) else: return generate_text_api_fallback(prompt, hf_token if hf_token.strip() else None) # Create the Gradio interface with gr.Blocks(title="GPT-OSS Text Generator") as demo: gr.Markdown("# 🤖 GPT-OSS Text Generator") if MODEL_LOADED: gr.Markdown(f"✅ **Status**: Model `{model_id}` loaded successfully!") gr.Markdown("🚀 Using transformers library for optimal performance") else: gr.Markdown(f"📡 **Status**: Using API mode for `{model_id}`") gr.Markdown("💡 Install `transformers` and `torch` for better performance") with gr.Row(): with gr.Column(): prompt_input = gr.Textbox( label="Enter your prompt:", lines=5, placeholder="Type your prompt here...", value="Explain quantum computing in simple terms." ) if not MODEL_LOADED: hf_token_input = gr.Textbox( label="Hugging Face Token (optional):", type="password", placeholder="Your HF token for API access..." ) else: hf_token_input = gr.Textbox(visible=False, value="") generate_btn = gr.Button("🎯 Generate", variant="primary", size="lg") gr.Markdown(""" ### 💡 Tips: - Try different types of prompts: questions, creative writing, code, etc. - The model works best with clear, specific prompts - For better performance, install: `pip install transformers torch` """) with gr.Column(): output_text = gr.Textbox( label="Generated text:", lines=15, interactive=False, placeholder="Generated text will appear here..." ) generate_btn.click( fn=generate_text, inputs=[prompt_input, hf_token_input], outputs=output_text ) # Example prompts gr.Examples( examples=[ ["Write a short story about a robot learning to paint."], ["Explain how neural networks work."], ["Write a Python function to sort a list of dictionaries."], ["What are the benefits of renewable energy?"], ["Create a haiku about artificial intelligence."] ], inputs=prompt_input, label="📝 Example Prompts" ) if __name__ == "__main__": demo.launch()