Spaces:
Runtime error
Runtime error
| # ============================================ | |
| # app.py - Usando HF Inference API | |
| # ============================================ | |
| import gradio as gr | |
| import requests | |
| import os | |
| # Tu modelo ya está disponible en HF Inference API | |
| MODEL_API = "https://api-inference.huggingface.co/models/Delta0723/techmind-pro-v9" | |
| HF_TOKEN = os.getenv("HF_TOKEN", "") # Configura tu token en Settings del Space | |
| def query_model(question, max_tokens=300, temperature=0.7): | |
| if not HF_TOKEN: | |
| return "❌ Error: Necesitas configurar tu HF_TOKEN en Settings > Repository secrets" | |
| headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
| payload = { | |
| "inputs": f"<s>[INST] {question} [/INST]", | |
| "parameters": { | |
| "max_new_tokens": int(max_tokens), | |
| "temperature": float(temperature), | |
| "top_p": 0.95, | |
| "do_sample": True, | |
| "return_full_text": False | |
| } | |
| } | |
| try: | |
| response = requests.post(MODEL_API, headers=headers, json=payload, timeout=120) | |
| if response.status_code == 503: | |
| return "⏳ El modelo se está cargando en los servidores de HuggingFace. Espera 20 segundos e intenta de nuevo." | |
| if response.status_code == 401: | |
| return "❌ Error de autenticación. Verifica tu HF_TOKEN." | |
| response.raise_for_status() | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| return result[0].get("generated_text", "No response") | |
| return str(result) | |
| except requests.exceptions.RequestException as e: | |
| return f"❌ Error: {str(e)}" | |
| # Interfaz Gradio | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # 🤖 TechMind Pro v9 | |
| ### Modelo basado en Mistral-7B + LoRA fine-tuning | |
| *Usando HuggingFace Inference API (sin necesidad de GPU local)* | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| question_input = gr.Textbox( | |
| label="💬 Tu Pregunta", | |
| placeholder="Escribe tu pregunta aquí...", | |
| lines=4 | |
| ) | |
| with gr.Accordion("⚙️ Parámetros Avanzados", open=False): | |
| max_tokens_slider = gr.Slider( | |
| minimum=50, | |
| maximum=500, | |
| value=300, | |
| step=50, | |
| label="Máximo de tokens" | |
| ) | |
| temperature_slider = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperatura (creatividad)" | |
| ) | |
| submit_btn = gr.Button("🚀 Generar Respuesta", variant="primary", size="lg") | |
| gr.Markdown(""" | |
| --- | |
| **Nota:** La primera petición puede tardar ~20s mientras el modelo se carga. | |
| Las siguientes serán más rápidas. | |
| """) | |
| with gr.Column(scale=1): | |
| output = gr.Textbox( | |
| label="✨ Respuesta", | |
| lines=12, | |
| show_copy_button=True | |
| ) | |
| # Ejemplos | |
| gr.Examples( | |
| examples=[ | |
| ["¿Qué es Python y para qué se usa?"], | |
| ["Explícame qué es machine learning de forma simple"], | |
| ["¿Cómo funciona una red neuronal?"], | |
| ["Dame consejos para aprender programación"] | |
| ], | |
| inputs=question_input, | |
| label="📝 Ejemplos" | |
| ) | |
| submit_btn.click( | |
| fn=query_model, | |
| inputs=[question_input, max_tokens_slider, temperature_slider], | |
| outputs=output | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |
| # ============================================ | |
| # requirements.txt | |
| # ============================================ | |
| """ | |
| gradio>=4.0.0 | |
| requests>=2.31.0 | |
| """ | |
| # ============================================ | |
| # README.md | |
| # ============================================ | |
| """ | |
| --- | |
| title: TechMind Pro v9 | |
| emoji: 🤖 | |
| colorFrom: blue | |
| colorTo: purple | |
| sdk: gradio | |
| sdk_version: 4.44.0 | |
| app_file: app.py | |
| pinned: false | |
| license: mit | |
| --- | |
| # 🤖 TechMind Pro v9 | |
| Interfaz web para el modelo TechMind Pro v9 (Mistral-7B + LoRA fine-tuning) | |
| ## 🚀 Cómo usar | |
| 1. **Configura tu token de HuggingFace:** | |
| - Ve a Settings > Repository secrets | |
| - Añade: `HF_TOKEN` = tu token de https://huggingface.co/settings/tokens | |
| 2. **Haz tu pregunta** y presiona "Generar Respuesta" | |
| ## ⚡ Ventajas | |
| - ✅ No consume recursos del Space (usa Inference API) | |
| - ✅ GPU automática en los servidores de HF | |
| - ✅ Respuestas rápidas después de la primera carga | |
| - ✅ Gratis dentro de los límites de HF | |
| ## 📊 Modelo | |
| - **Base:** mistralai/Mistral-7B-Instruct-v0.3 | |
| - **Adaptador:** Delta0723/techmind-pro-v9 | |
| - **Backend:** HuggingFace Inference API | |
| ## 🔒 Límites | |
| - ~30 requests/minuto en el tier gratuito | |
| - Primera petición tarda ~20s (cold start) | |
| """ |