Spaces:
Runtime error
Runtime error
| # app.py para Hugging Face Spaces | |
| # Usa CPU con optimizaciones máximas | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import Optional | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| import torch | |
| import os | |
| # ========================= | |
| # CONFIG | |
| # ========================= | |
| BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3" | |
| LORA_MODEL = "Delta0723/techmind-pro-v9" | |
| OFFLOAD_DIR = "./offload_folder" | |
| os.makedirs(OFFLOAD_DIR, exist_ok=True) | |
| # ========================= | |
| # FastAPI Setup | |
| # ========================= | |
| app = FastAPI(title="TechMind Pro v9") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"] | |
| ) | |
| # Variable global para modelo | |
| model = None | |
| tokenizer = None | |
| # ========================= | |
| # Load Model (lazy loading) | |
| # ========================= | |
| def load_model(): | |
| global model, tokenizer | |
| if model is not None: | |
| return | |
| print("🚀 Cargando modelo...") | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Cargar en CPU con int8 (más ligero que 4bit para CPU) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| device_map={"": "cpu"}, | |
| torch_dtype=torch.float16, | |
| low_cpu_mem_usage=True, | |
| offload_folder=OFFLOAD_DIR, | |
| offload_state_dict=True | |
| ) | |
| # Cargar LoRA | |
| model = PeftModel.from_pretrained( | |
| base_model, | |
| LORA_MODEL, | |
| device_map={"": "cpu"}, | |
| offload_folder=OFFLOAD_DIR | |
| ) | |
| model.eval() | |
| print("✅ Modelo cargado") | |
| # ========================= | |
| # Data Models | |
| # ========================= | |
| class Query(BaseModel): | |
| question: str | |
| max_tokens: Optional[int] = 200 | |
| temperature: Optional[float] = 0.7 | |
| # ========================= | |
| # Utilidades | |
| # ========================= | |
| def generate_answer(question: str, max_tokens=200, temperature=0.7) -> str: | |
| load_model() # Carga lazy | |
| prompt = f"<s>[INST] {question} [/INST]" | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=0.95, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| num_beams=1 # Velocidad | |
| ) | |
| decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return decoded.split("[/INST]")[-1].strip() if "[/INST]" in decoded else decoded | |
| # ========================= | |
| # Endpoints | |
| # ========================= | |
| def root(): | |
| return { | |
| "model": "TechMind Pro v9", | |
| "base": BASE_MODEL, | |
| "lora": LORA_MODEL, | |
| "status": "online" | |
| } | |
| def ask_q(req: Query): | |
| try: | |
| result = generate_answer(req.question, req.max_tokens, req.temperature) | |
| return {"response": result} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # ========================= | |
| # README.md para el Space | |
| # ========================= | |
| """ | |
| --- | |
| title: TechMind Pro v9 | |
| emoji: 🤖 | |
| colorFrom: blue | |
| colorTo: purple | |
| sdk: docker | |
| pinned: false | |
| --- | |
| # TechMind Pro v9 | |
| API para el modelo TechMind Pro v9 (Mistral-7B + LoRA fine-tuned) | |
| ## Uso | |
| ```bash | |
| curl -X POST "https://YOUR-SPACE.hf.space/ask" \ | |
| -H "Content-Type: application/json" \ | |
| -d '{"question": "¿Qué es Python?"}' | |
| ``` | |
| """ | |
| # ========================= | |
| # Dockerfile para el Space | |
| # ========================= | |
| """ | |
| FROM python:3.10-slim | |
| WORKDIR /app | |
| RUN apt-get update && apt-get install -y \ | |
| git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| COPY . . | |
| CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] | |
| """ | |
| # ========================= | |
| # requirements.txt | |
| # ========================= | |
| """ | |
| fastapi | |
| uvicorn[standard] | |
| transformers>=4.35.0 | |
| peft | |
| torch | |
| accelerate | |
| sentencepiece | |
| protobuf | |
| """ |