Delta0723 commited on
Commit
c456490
·
verified ·
1 Parent(s): 19cb7b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -284
app.py CHANGED
@@ -1,333 +1,101 @@
1
- """
2
- TechMind Pro - API Production Ready
3
- Fine-tuning IA especializada en Redes Cisco
4
- """
5
-
6
- from fastapi import FastAPI, HTTPException, BackgroundTasks
7
  from fastapi.middleware.cors import CORSMiddleware
8
- from fastapi.responses import FileResponse, JSONResponse
9
  from pydantic import BaseModel
10
  from typing import Optional, List
11
- import torch
12
  from transformers import AutoTokenizer, AutoModelForCausalLM
13
  from peft import PeftModel
14
- import uvicorn
15
  import os
16
- import json
17
  from datetime import datetime
18
  import re
19
 
20
- # ============================================
21
- # CONFIGURACIÓN
22
- # ============================================
23
 
24
  BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
25
  LORA_MODEL = "Delta0723/techmind-pro-v9"
26
- OUTPUT_DIR = "/workspace/TechMind/api_outputs"
27
 
28
- os.makedirs(OUTPUT_DIR, exist_ok=True)
 
 
29
 
30
- # ============================================
31
- # INICIALIZAR APP
32
- # ============================================
33
 
34
- app = FastAPI(
35
- title="TechMind Pro API",
36
- description="Asistente IA especializado en Redes Cisco & Packet Tracer",
37
- version="1.0.0",
38
- docs_url="/docs",
39
- redoc_url="/redoc"
40
- )
41
-
42
- # CORS para permitir requests desde cualquier origen
43
  app.add_middleware(
44
  CORSMiddleware,
45
  allow_origins=["*"],
46
- allow_credentials=True,
47
  allow_methods=["*"],
48
- allow_headers=["*"],
49
  )
50
 
51
- # ============================================
52
- # CARGAR MODELO (Al iniciar)
53
- # ============================================
54
 
55
- print("🔥 Iniciando TechMind Pro API...")
56
- print("="*60)
57
 
58
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
59
- tokenizer.pad_token = tokenizer.eos_token
 
60
 
61
- print("📦 Cargando Mistral 7B...")
62
- model = AutoModelForCausalLM.from_pretrained(
63
- BASE_MODEL,
64
- device_map="auto",
65
- trust_remote_code=True
66
- )
 
 
67
 
68
- print("🔧 Cargando LoRA v9 ULTIMATE...")
69
- model = PeftModel.from_pretrained(model, LORA_MODEL)
70
- model.eval()
71
 
72
- print("✅ TechMind Pro listo para producción")
73
- print("="*60)
74
 
75
- # ============================================
76
- # MODELOS DE DATOS
77
- # ============================================
78
 
79
- class QueryRequest(BaseModel):
80
  question: str
81
- max_tokens: Optional[int] = 500
82
  temperature: Optional[float] = 0.7
83
- include_files: Optional[bool] = False
84
-
85
- class QueryResponse(BaseModel):
86
- answer: str
87
- confidence: float
88
- processing_time: float
89
- files: Optional[List[dict]] = None
90
- metadata: dict
91
 
92
- # ============================================
93
- # FUNCIONES CORE
94
- # ============================================
95
 
96
- def generar_respuesta(question: str, max_tokens: int = 500, temperature: float = 0.7) -> str:
97
- """
98
- Genera respuesta del modelo TechMind
99
- """
100
  prompt = f"<s>[INST] {question} [/INST]"
101
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
102
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
103
-
104
  with torch.no_grad():
105
  outputs = model.generate(
106
  **inputs,
107
  max_new_tokens=max_tokens,
108
  temperature=temperature,
109
- top_p=0.9,
110
  do_sample=True,
111
- pad_token_id=tokenizer.eos_token_id,
112
- eos_token_id=tokenizer.eos_token_id
113
  )
114
-
115
- respuesta = tokenizer.decode(outputs[0], skip_special_tokens=True)
116
-
117
- if "[/INST]" in respuesta:
118
- respuesta = respuesta.split("[/INST]")[1].strip()
119
-
120
- return respuesta
121
-
122
- def calcular_confianza(respuesta: str, pregunta: str) -> float:
123
- """
124
- Calcula score de confianza basado en keywords técnicos
125
- """
126
- keywords_cisco = [
127
- 'interface', 'ip address', 'router', 'switch', 'vlan',
128
- 'configure', 'enable', 'show', 'no shutdown', 'ospf',
129
- 'eigrp', 'bgp', 'acl', 'nat', 'trunk'
130
- ]
131
-
132
- resp_lower = respuesta.lower()
133
- encontrados = sum(1 for k in keywords_cisco if k in resp_lower)
134
-
135
- # Score base por keywords
136
- score = min(encontrados / 5, 1.0) * 0.7
137
-
138
- # Bonus si tiene bloques de código
139
- if '```' in respuesta or 'enable\nconfigure' in respuesta:
140
- score += 0.2
141
-
142
- # Bonus si menciona verificación
143
- if any(v in resp_lower for v in ['show', 'verify', 'debug']):
144
- score += 0.1
145
-
146
- return min(score, 1.0)
147
 
148
- def extraer_bloques_codigo(respuesta: str) -> List[dict]:
149
- """
150
- Extrae bloques de código de la respuesta
151
- """
152
- bloques = []
153
-
154
- # Buscar bloques ```
155
- patron = r'```(?:cisco|bash|text)?\n(.*?)```'
156
- matches = re.findall(patron, respuesta, re.DOTALL)
157
-
158
- for i, codigo in enumerate(matches, 1):
159
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
160
- filename = f"config_{i}_{timestamp}.txt"
161
- filepath = os.path.join(OUTPUT_DIR, filename)
162
-
163
- with open(filepath, 'w') as f:
164
- f.write(codigo)
165
-
166
- bloques.append({
167
- "filename": filename,
168
- "content": codigo,
169
- "size": len(codigo),
170
- "download_url": f"/download/{filename}"
171
- })
172
-
173
- return bloques
174
 
175
- # ============================================
176
- # ENDPOINTS
177
- # ============================================
178
 
179
  @app.get("/")
180
  def root():
181
- """
182
- Información de la API
183
- """
184
- return {
185
- "service": "TechMind Pro API",
186
- "version": "1.0.0",
187
- "model": "Mistral-7B v9 ULTIMATE",
188
- "specialization": "Cisco Networking & Packet Tracer",
189
- "status": "operational",
190
- "docs": "/docs",
191
- "endpoints": {
192
- "ask": "POST /ask",
193
- "health": "GET /health",
194
- "stats": "GET /stats"
195
- }
196
- }
197
 
198
- @app.get("/health")
199
- def health_check():
200
- """
201
- Health check del servicio
202
- """
203
- return {
204
- "status": "healthy",
205
- "model_loaded": model is not None,
206
- "timestamp": datetime.now().isoformat()
207
- }
208
-
209
- @app.post("/ask", response_model=QueryResponse)
210
- async def ask_techmind(request: QueryRequest):
211
- """
212
- Endpoint principal - Consultar a TechMind
213
-
214
- Ejemplo:
215
- ```json
216
- {
217
- "question": "¿Cómo configuro OSPF área 0?",
218
- "max_tokens": 500,
219
- "temperature": 0.7,
220
- "include_files": true
221
- }
222
- ```
223
- """
224
  try:
225
- start_time = datetime.now()
226
-
227
- # Generar respuesta
228
- answer = generar_respuesta(
229
- request.question,
230
- max_tokens=request.max_tokens,
231
- temperature=request.temperature
232
- )
233
-
234
- # Calcular confianza
235
- confidence = calcular_confianza(answer, request.question)
236
-
237
- # Extraer archivos si se solicita
238
- files = None
239
- if request.include_files:
240
- files = extraer_bloques_codigo(answer)
241
-
242
- # Calcular tiempo
243
- processing_time = (datetime.now() - start_time).total_seconds()
244
-
245
- return QueryResponse(
246
- answer=answer,
247
- confidence=confidence,
248
- processing_time=processing_time,
249
- files=files,
250
- metadata={
251
- "model": "Mistral-7B v9 ULTIMATE",
252
- "timestamp": datetime.now().isoformat(),
253
- "tokens_generated": len(answer.split())
254
- }
255
- )
256
-
257
  except Exception as e:
258
  raise HTTPException(status_code=500, detail=str(e))
259
-
260
- @app.get("/download/{filename}")
261
- async def download_file(filename: str):
262
- """
263
- Descargar archivos de configuración generados
264
- """
265
- filepath = os.path.join(OUTPUT_DIR, filename)
266
-
267
- if not os.path.exists(filepath):
268
- raise HTTPException(status_code=404, detail="Archivo no encontrado")
269
-
270
- return FileResponse(
271
- filepath,
272
- media_type='application/octet-stream',
273
- filename=filename
274
- )
275
-
276
- @app.get("/stats")
277
- def get_stats():
278
- """
279
- Estadísticas del servicio
280
- """
281
- archivos_generados = len([f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')])
282
-
283
- return {
284
- "archivos_generados": archivos_generados,
285
- "modelo": "Mistral-7B v9 ULTIMATE",
286
- "dataset": "1,191 ejemplos",
287
- "especialización": "Redes Cisco & Packet Tracer",
288
- "uptime": "N/A"
289
- }
290
-
291
- @app.post("/batch")
292
- async def batch_queries(questions: List[str]):
293
- """
294
- Procesar múltiples preguntas
295
- """
296
- results = []
297
-
298
- for q in questions:
299
- try:
300
- answer = generar_respuesta(q)
301
- confidence = calcular_confianza(answer, q)
302
- results.append({
303
- "question": q,
304
- "answer": answer,
305
- "confidence": confidence
306
- })
307
- except Exception as e:
308
- results.append({
309
- "question": q,
310
- "error": str(e)
311
- })
312
-
313
- return {"results": results}
314
-
315
- # ============================================
316
- # MAIN
317
- # ============================================
318
-
319
- if __name__ == "__main__":
320
- print("\n" + "="*60)
321
- print("🚀 TechMind Pro API - Production Mode")
322
- print("="*60)
323
- print("📍 URL: http://0.0.0.0:8000")
324
- print("📚 Docs: http://0.0.0.0:8000/docs")
325
- print("🔥 Listo para recibir consultas")
326
- print("="*60 + "\n")
327
-
328
- uvicorn.run(
329
- app,
330
- host="0.0.0.0",
331
- port=8000,
332
- log_level="info"
333
- )
 
1
+ from fastapi import FastAPI, HTTPException
 
 
 
 
 
2
  from fastapi.middleware.cors import CORSMiddleware
 
3
  from pydantic import BaseModel
4
  from typing import Optional, List
 
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
  from peft import PeftModel
7
+ import torch
8
  import os
 
9
  from datetime import datetime
10
  import re
11
 
12
+ # =========================
13
+ # CONFIG
14
+ # =========================
15
 
16
  BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
17
  LORA_MODEL = "Delta0723/techmind-pro-v9"
 
18
 
19
+ # =========================
20
+ # FastAPI Setup
21
+ # =========================
22
 
23
+ app = FastAPI(title="TechMind Pro API")
 
 
24
 
 
 
 
 
 
 
 
 
 
25
  app.add_middleware(
26
  CORSMiddleware,
27
  allow_origins=["*"],
 
28
  allow_methods=["*"],
29
+ allow_headers=["*"]
30
  )
31
 
32
+ # =========================
33
+ # Load Model
34
+ # =========================
35
 
36
+ print("🚀 Cargando modelo y tokenizer...")
 
37
 
38
+ try:
39
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
40
+ tokenizer.pad_token = tokenizer.eos_token
41
 
42
+ base_model = AutoModelForCausalLM.from_pretrained(
43
+ BASE_MODEL,
44
+ torch_dtype=torch.float16,
45
+ device_map="auto"
46
+ )
47
+
48
+ model = PeftModel.from_pretrained(base_model, LORA_MODEL)
49
+ model.eval()
50
 
51
+ except Exception as e:
52
+ print("❌ Error al cargar el modelo:", e)
53
+ raise e
54
 
55
+ print("✅ Modelo listo")
 
56
 
57
+ # =========================
58
+ # Data Models
59
+ # =========================
60
 
61
+ class Query(BaseModel):
62
  question: str
63
+ max_tokens: Optional[int] = 300
64
  temperature: Optional[float] = 0.7
 
 
 
 
 
 
 
 
65
 
66
+ # =========================
67
+ # Utilidades
68
+ # =========================
69
 
70
+ def generate_answer(question: str, max_tokens=300, temperature=0.7) -> str:
 
 
 
71
  prompt = f"<s>[INST] {question} [/INST]"
72
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
73
+
 
74
  with torch.no_grad():
75
  outputs = model.generate(
76
  **inputs,
77
  max_new_tokens=max_tokens,
78
  temperature=temperature,
79
+ top_p=0.95,
80
  do_sample=True,
81
+ pad_token_id=tokenizer.eos_token_id
 
82
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
85
+ return decoded.split("[/INST]")[-1].strip() if "[/INST]" in decoded else decoded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ # =========================
88
+ # Endpoints
89
+ # =========================
90
 
91
  @app.get("/")
92
  def root():
93
+ return {"TechMind": "Mistral-7B Instruct + LoRA v9", "status": "online"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ @app.post("/ask")
96
+ def ask_q(req: Query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  try:
98
+ result = generate_answer(req.question, req.max_tokens, req.temperature)
99
+ return {"response": result}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  except Exception as e:
101
  raise HTTPException(status_code=500, detail=str(e))