EuuIia commited on
Commit
4ac877f
·
verified ·
1 Parent(s): 9ea7873

Upload ltx_server.py

Browse files
Files changed (1) hide show
  1. api/ltx_server.py +21 -65
api/ltx_server.py CHANGED
@@ -1,4 +1,5 @@
1
- # ltx_server.py — VideoService (sempre output_type="latent") com VAE→pixels→MP4 no fim
 
2
 
3
  # --- 1. IMPORTAÇÕES ---
4
  import torch
@@ -20,6 +21,10 @@ import contextlib
20
  import time
21
  import traceback
22
 
 
 
 
 
23
  # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
24
  def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
25
  try:
@@ -65,7 +70,7 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
65
  parts = [p.strip() for p in line.split(",")]
66
  if len(parts) >= 3:
67
  try:
68
- pid = int(parts[0]); name = parts[_1]; used_mb = int(parts[_2])
69
  user = "unknown"
70
  try:
71
  import psutil
@@ -360,60 +365,7 @@ class VideoService:
360
  print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
361
  return out
362
 
363
- # === Decodificação “simples”: latentes → pixels → MP4 ===
364
- def _decode_one_latent_to_pixel(self, latent_chw: torch.Tensor) -> torch.Tensor:
365
- """
366
- Decodifica um latente (C,H,W) para pixel (C,H,W) em [0,1].
367
- """
368
- ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
369
- with ctx:
370
- if hasattr(self.pipeline, "decode_latents"):
371
- img_bchw = self.pipeline.decode_latents(latent_chw.unsqueeze(0))
372
- elif hasattr(self.pipeline, "vae") and hasattr(self.pipeline.vae, "decode"):
373
- img_bchw = self.pipeline.vae.decode(latent_chw.unsqueeze(0))
374
- else:
375
- raise RuntimeError("Nenhum decoder (decode_latents/vae.decode) disponível.")
376
- img_chw = img_bchw[0]
377
- if img_chw.min() < 0:
378
- img_chw = (img_chw.clamp(-1, 1) + 1.0) / 2.0
379
- else:
380
- img_chw = img_chw.clamp(0, 1)
381
- return img_chw
382
-
383
- def _pixels_to_uint8_np(self, pixel_chw: torch.Tensor, padding_values) -> np.ndarray:
384
- """
385
- Converte (C,H,W) float [0,1] em (H,W,C) uint8 com crop do padding.
386
- """
387
- pad_left, pad_right, pad_top, pad_bottom = padding_values
388
- H, W = pixel_chw.shape[1], pixel_chw.shape[2]
389
- h_end = H - pad_bottom if pad_bottom > 0 else H
390
- w_end = W - pad_right if pad_right > 0 else W
391
- pixel_chw = pixel_chw[:, pad_top:h_end, pad_left:w_end]
392
- frame_hwc_u8 = (pixel_chw.permute(1, 2, 0)
393
- .mul(255)
394
- .to(torch.uint8)
395
- .cpu()
396
- .numpy())
397
- return frame_hwc_u8
398
-
399
- def encode_latents_to_mp4(self, latents: torch.Tensor, output_path: str, fps: int, padding_values,
400
- progress_callback=None):
401
- """
402
- Latentes (B,C,T,H,W) → decodifica quadro a quadro → escreve MP4 incremental.
403
- """
404
- T = latents.shape[2]
405
- print(f"[DEBUG] encode_latents_to_mp4: frames={T} out={output_path}")
406
- with imageio.get_writer(output_path, fps=fps, codec="libx264", quality=8) as writer:
407
- for i in range(T):
408
- latent_chw = latents[0, :, i].to(self.device)
409
- pixel_chw = self._decode_one_latent_to_pixel(latent_chw)
410
- frame_hwc_u8 = self._pixels_to_uint8_np(pixel_chw, padding_values)
411
- writer.append_data(frame_hwc_u8)
412
- if progress_callback:
413
- progress_callback(i + 1, T)
414
- if i % getattr(self, "frame_log_every", 8) == 0:
415
- print(f"[DEBUG] frame {i}/{T} codificado")
416
-
417
  def generate(
418
  self,
419
  prompt,
@@ -435,7 +387,7 @@ class VideoService:
435
  guidance_scale=3.0,
436
  improve_texture=True,
437
  progress_callback=None,
438
- # Sempre latent→VAE→MP4 (simples)
439
  external_decode=True,
440
  ):
441
  t_all = time.perf_counter()
@@ -586,19 +538,23 @@ class VideoService:
586
  latents = result
587
  print(f"[DEBUG] Latentes (single-pass): shape={tuple(latents.shape)}")
588
 
589
- # Staging e escrita MP4 (simples: VAE→pixels→MP4)
590
  temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
591
  results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
592
  output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
593
  final_output_path = None
594
 
595
- print("[DEBUG] Codificando a partir dos latentes (VAE externo) MP4...")
596
- self.encode_latents_to_mp4(
597
- latents=latents,
598
- output_path=output_video_path,
599
- fps=call_kwargs["frame_rate"],
600
- padding_values=padding_values,
601
- progress_callback=progress_callback,
 
 
 
 
602
  )
603
 
604
  candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
 
1
+ # ltx_server.py — VideoService (beta 1.0)
2
+ # Sempre output_type="latent"; no final: VAE (bloco inteiro) → pixels → MP4.
3
 
4
  # --- 1. IMPORTAÇÕES ---
5
  import torch
 
21
  import time
22
  import traceback
23
 
24
+ # Singletons do projeto para VAE e Encoder
25
+ from aduc_framework.tools.video_encode_tool import video_encode_tool_singleton
26
+ from aduc_framework.managers.vae_manager import vae_manager_singleton
27
+
28
  # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
29
  def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
30
  try:
 
70
  parts = [p.strip() for p in line.split(",")]
71
  if len(parts) >= 3:
72
  try:
73
+ pid = int(parts[0]); name = parts[1]; used_mb = int(parts[2])
74
  user = "unknown"
75
  try:
76
  import psutil
 
365
  print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
366
  return out
367
 
368
+ # --- 6. GERAÇÃO ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  def generate(
370
  self,
371
  prompt,
 
387
  guidance_scale=3.0,
388
  improve_texture=True,
389
  progress_callback=None,
390
+ # Sempre latent VAE MP4 (simples)
391
  external_decode=True,
392
  ):
393
  t_all = time.perf_counter()
 
538
  latents = result
539
  print(f"[DEBUG] Latentes (single-pass): shape={tuple(latents.shape)}")
540
 
541
+ # Staging e escrita MP4 (simples: VAE pixels MP4)
542
  temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
543
  results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
544
  output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
545
  final_output_path = None
546
 
547
+ print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
548
+ # Se desejar “desocupar” a GPU antes do decode, pode-se mover p/ CPU e limpar:
549
+ # latents_cpu = latents.detach().to("cpu", non_blocking=True); torch.cuda.empty_cache(); torch.cuda.ipc_collect(); latents = latents_cpu.to(self.device)
550
+ pixel_tensor = vae_manager_singleton.decode(latents.to(self.device, non_blocking=True))
551
+ log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
552
+
553
+ print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
554
+ video_encode_tool_singleton.save_video_from_tensor(
555
+ pixel_tensor,
556
+ output_video_path,
557
+ fps=call_kwargs["frame_rate"]
558
  )
559
 
560
  candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")