Spaces:
Runtime error
Runtime error
| """ | |
| TechMind Pro - API Production Ready | |
| Fine-tuning IA especializada en Redes Cisco | |
| """ | |
| from fastapi import FastAPI, HTTPException, BackgroundTasks | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import FileResponse, JSONResponse | |
| from pydantic import BaseModel | |
| from typing import Optional, List | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| import uvicorn | |
| import os | |
| import json | |
| from datetime import datetime | |
| import re | |
| # ============================================ | |
| # CONFIGURACIÓN | |
| # ============================================ | |
| BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3" | |
| LORA_MODEL = "Delta0723/techmind-pro-v9" | |
| OUTPUT_DIR = "/workspace/TechMind/api_outputs" | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| # ============================================ | |
| # INICIALIZAR APP | |
| # ============================================ | |
| app = FastAPI( | |
| title="TechMind Pro API", | |
| description="Asistente IA especializado en Redes Cisco & Packet Tracer", | |
| version="1.0.0", | |
| docs_url="/docs", | |
| redoc_url="/redoc" | |
| ) | |
| # CORS para permitir requests desde cualquier origen | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ============================================ | |
| # CARGAR MODELO (Al iniciar) | |
| # ============================================ | |
| print("🔥 Iniciando TechMind Pro API...") | |
| print("="*60) | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| print("📦 Cargando Mistral 7B...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| print("🔧 Cargando LoRA v9 ULTIMATE...") | |
| model = PeftModel.from_pretrained(model, LORA_MODEL) | |
| model.eval() | |
| print("✅ TechMind Pro listo para producción") | |
| print("="*60) | |
| # ============================================ | |
| # MODELOS DE DATOS | |
| # ============================================ | |
| class QueryRequest(BaseModel): | |
| question: str | |
| max_tokens: Optional[int] = 500 | |
| temperature: Optional[float] = 0.7 | |
| include_files: Optional[bool] = False | |
| class QueryResponse(BaseModel): | |
| answer: str | |
| confidence: float | |
| processing_time: float | |
| files: Optional[List[dict]] = None | |
| metadata: dict | |
| # ============================================ | |
| # FUNCIONES CORE | |
| # ============================================ | |
| def generar_respuesta(question: str, max_tokens: int = 500, temperature: float = 0.7) -> str: | |
| """ | |
| Genera respuesta del modelo TechMind | |
| """ | |
| prompt = f"<s>[INST] {question} [/INST]" | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048) | |
| inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| respuesta = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| if "[/INST]" in respuesta: | |
| respuesta = respuesta.split("[/INST]")[1].strip() | |
| return respuesta | |
| def calcular_confianza(respuesta: str, pregunta: str) -> float: | |
| """ | |
| Calcula score de confianza basado en keywords técnicos | |
| """ | |
| keywords_cisco = [ | |
| 'interface', 'ip address', 'router', 'switch', 'vlan', | |
| 'configure', 'enable', 'show', 'no shutdown', 'ospf', | |
| 'eigrp', 'bgp', 'acl', 'nat', 'trunk' | |
| ] | |
| resp_lower = respuesta.lower() | |
| encontrados = sum(1 for k in keywords_cisco if k in resp_lower) | |
| # Score base por keywords | |
| score = min(encontrados / 5, 1.0) * 0.7 | |
| # Bonus si tiene bloques de código | |
| if '```' in respuesta or 'enable\nconfigure' in respuesta: | |
| score += 0.2 | |
| # Bonus si menciona verificación | |
| if any(v in resp_lower for v in ['show', 'verify', 'debug']): | |
| score += 0.1 | |
| return min(score, 1.0) | |
| def extraer_bloques_codigo(respuesta: str) -> List[dict]: | |
| """ | |
| Extrae bloques de código de la respuesta | |
| """ | |
| bloques = [] | |
| # Buscar bloques ``` | |
| patron = r'```(?:cisco|bash|text)?\n(.*?)```' | |
| matches = re.findall(patron, respuesta, re.DOTALL) | |
| for i, codigo in enumerate(matches, 1): | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| filename = f"config_{i}_{timestamp}.txt" | |
| filepath = os.path.join(OUTPUT_DIR, filename) | |
| with open(filepath, 'w') as f: | |
| f.write(codigo) | |
| bloques.append({ | |
| "filename": filename, | |
| "content": codigo, | |
| "size": len(codigo), | |
| "download_url": f"/download/{filename}" | |
| }) | |
| return bloques | |
| # ============================================ | |
| # ENDPOINTS | |
| # ============================================ | |
| def root(): | |
| """ | |
| Información de la API | |
| """ | |
| return { | |
| "service": "TechMind Pro API", | |
| "version": "1.0.0", | |
| "model": "Mistral-7B v9 ULTIMATE", | |
| "specialization": "Cisco Networking & Packet Tracer", | |
| "status": "operational", | |
| "docs": "/docs", | |
| "endpoints": { | |
| "ask": "POST /ask", | |
| "health": "GET /health", | |
| "stats": "GET /stats" | |
| } | |
| } | |
| def health_check(): | |
| """ | |
| Health check del servicio | |
| """ | |
| return { | |
| "status": "healthy", | |
| "model_loaded": model is not None, | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| async def ask_techmind(request: QueryRequest): | |
| """ | |
| Endpoint principal - Consultar a TechMind | |
| Ejemplo: | |
| ```json | |
| { | |
| "question": "¿Cómo configuro OSPF área 0?", | |
| "max_tokens": 500, | |
| "temperature": 0.7, | |
| "include_files": true | |
| } | |
| ``` | |
| """ | |
| try: | |
| start_time = datetime.now() | |
| # Generar respuesta | |
| answer = generar_respuesta( | |
| request.question, | |
| max_tokens=request.max_tokens, | |
| temperature=request.temperature | |
| ) | |
| # Calcular confianza | |
| confidence = calcular_confianza(answer, request.question) | |
| # Extraer archivos si se solicita | |
| files = None | |
| if request.include_files: | |
| files = extraer_bloques_codigo(answer) | |
| # Calcular tiempo | |
| processing_time = (datetime.now() - start_time).total_seconds() | |
| return QueryResponse( | |
| answer=answer, | |
| confidence=confidence, | |
| processing_time=processing_time, | |
| files=files, | |
| metadata={ | |
| "model": "Mistral-7B v9 ULTIMATE", | |
| "timestamp": datetime.now().isoformat(), | |
| "tokens_generated": len(answer.split()) | |
| } | |
| ) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def download_file(filename: str): | |
| """ | |
| Descargar archivos de configuración generados | |
| """ | |
| filepath = os.path.join(OUTPUT_DIR, filename) | |
| if not os.path.exists(filepath): | |
| raise HTTPException(status_code=404, detail="Archivo no encontrado") | |
| return FileResponse( | |
| filepath, | |
| media_type='application/octet-stream', | |
| filename=filename | |
| ) | |
| def get_stats(): | |
| """ | |
| Estadísticas del servicio | |
| """ | |
| archivos_generados = len([f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')]) | |
| return { | |
| "archivos_generados": archivos_generados, | |
| "modelo": "Mistral-7B v9 ULTIMATE", | |
| "dataset": "1,191 ejemplos", | |
| "especialización": "Redes Cisco & Packet Tracer", | |
| "uptime": "N/A" | |
| } | |
| async def batch_queries(questions: List[str]): | |
| """ | |
| Procesar múltiples preguntas | |
| """ | |
| results = [] | |
| for q in questions: | |
| try: | |
| answer = generar_respuesta(q) | |
| confidence = calcular_confianza(answer, q) | |
| results.append({ | |
| "question": q, | |
| "answer": answer, | |
| "confidence": confidence | |
| }) | |
| except Exception as e: | |
| results.append({ | |
| "question": q, | |
| "error": str(e) | |
| }) | |
| return {"results": results} | |
| # ============================================ | |
| # MAIN | |
| # ============================================ | |
| if __name__ == "__main__": | |
| print("\n" + "="*60) | |
| print("🚀 TechMind Pro API - Production Mode") | |
| print("="*60) | |
| print("📍 URL: http://0.0.0.0:8000") | |
| print("📚 Docs: http://0.0.0.0:8000/docs") | |
| print("🔥 Listo para recibir consultas") | |
| print("="*60 + "\n") | |
| uvicorn.run( | |
| app, | |
| host="0.0.0.0", | |
| port=8000, | |
| log_level="info" | |
| ) | |