Test

Paused

App Files Files Community

EuuIia commited on Oct 3

Commit

4ac877f

verified ·

1 Parent(s): 9ea7873

Upload ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +21 -65

api/ltx_server.py CHANGED Viewed

@@ -1,4 +1,5 @@
-# ltx_server.py — VideoService (sempre output_type="latent") com VAE→pixels→MP4 no fim
 # --- 1. IMPORTAÇÕES ---
 import torch
@@ -20,6 +21,10 @@ import contextlib
 import time
 import traceback
 # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
 def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
     try:
@@ -65,7 +70,7 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
         parts = [p.strip() for p in line.split(",")]
         if len(parts) >= 3:
             try:
-                pid = int(parts[0]); name = parts[_1]; used_mb = int(parts[_2])
                 user = "unknown"
                 try:
                     import psutil
@@ -360,60 +365,7 @@ class VideoService:
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
-    # === Decodificação “simples”: latentes → pixels → MP4 ===
-    def _decode_one_latent_to_pixel(self, latent_chw: torch.Tensor) -> torch.Tensor:
-        """
-        Decodifica um latente (C,H,W) para pixel (C,H,W) em [0,1].
-        """
-        ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
-        with ctx:
-            if hasattr(self.pipeline, "decode_latents"):
-                img_bchw = self.pipeline.decode_latents(latent_chw.unsqueeze(0))
-            elif hasattr(self.pipeline, "vae") and hasattr(self.pipeline.vae, "decode"):
-                img_bchw = self.pipeline.vae.decode(latent_chw.unsqueeze(0))
-            else:
-                raise RuntimeError("Nenhum decoder (decode_latents/vae.decode) disponível.")
-        img_chw = img_bchw[0]
-        if img_chw.min() < 0:
-            img_chw = (img_chw.clamp(-1, 1) + 1.0) / 2.0
-        else:
-            img_chw = img_chw.clamp(0, 1)
-        return img_chw
-    def _pixels_to_uint8_np(self, pixel_chw: torch.Tensor, padding_values) -> np.ndarray:
-        """
-        Converte (C,H,W) float [0,1] em (H,W,C) uint8 com crop do padding.
-        """
-        pad_left, pad_right, pad_top, pad_bottom = padding_values
-        H, W = pixel_chw.shape[1], pixel_chw.shape[2]
-        h_end = H - pad_bottom if pad_bottom > 0 else H
-        w_end = W - pad_right if pad_right > 0 else W
-        pixel_chw = pixel_chw[:, pad_top:h_end, pad_left:w_end]
-        frame_hwc_u8 = (pixel_chw.permute(1, 2, 0)
-                        .mul(255)
-                        .to(torch.uint8)
-                        .cpu()
-                        .numpy())
-        return frame_hwc_u8
-    def encode_latents_to_mp4(self, latents: torch.Tensor, output_path: str, fps: int, padding_values,
-                              progress_callback=None):
-        """
-        Latentes (B,C,T,H,W) → decodifica quadro a quadro → escreve MP4 incremental.
-        """
-        T = latents.shape[2]
-        print(f"[DEBUG] encode_latents_to_mp4: frames={T} out={output_path}")
-        with imageio.get_writer(output_path, fps=fps, codec="libx264", quality=8) as writer:
-            for i in range(T):
-                latent_chw = latents[0, :, i].to(self.device)
-                pixel_chw = self._decode_one_latent_to_pixel(latent_chw)
-                frame_hwc_u8 = self._pixels_to_uint8_np(pixel_chw, padding_values)
-                writer.append_data(frame_hwc_u8)
-                if progress_callback:
-                    progress_callback(i + 1, T)
-                if i % getattr(self, "frame_log_every", 8) == 0:
-                    print(f"[DEBUG] frame {i}/{T} codificado")
     def generate(
         self,
         prompt,
@@ -435,7 +387,7 @@ class VideoService:
         guidance_scale=3.0,
         improve_texture=True,
         progress_callback=None,
-        # Sempre latent→VAE→MP4 (simples)
         external_decode=True,
     ):
         t_all = time.perf_counter()
@@ -586,19 +538,23 @@ class VideoService:
                     latents = result
                 print(f"[DEBUG] Latentes (single-pass): shape={tuple(latents.shape)}")
-            # Staging e escrita MP4 (simples: VAE→pixels→MP4)
             temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
             results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
             output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
             final_output_path = None
-            print("[DEBUG] Codificando a partir dos latentes (VAE externo) → MP4...")
-            self.encode_latents_to_mp4(
-                latents=latents,
-                output_path=output_video_path,
-                fps=call_kwargs["frame_rate"],
-                padding_values=padding_values,
-                progress_callback=progress_callback,
             )
             candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")

+# ltx_server.py — VideoService (beta 1.0)
+# Sempre output_type="latent"; no final: VAE (bloco inteiro) → pixels → MP4.
 # --- 1. IMPORTAÇÕES ---
 import torch
 import time
 import traceback
+# Singletons do projeto para VAE e Encoder
+from aduc_framework.tools.video_encode_tool import video_encode_tool_singleton
+from aduc_framework.managers.vae_manager import vae_manager_singleton
 # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
 def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
     try:
         parts = [p.strip() for p in line.split(",")]
         if len(parts) >= 3:
             try:
+                pid = int(parts[0]); name = parts[1]; used_mb = int(parts[2])
                 user = "unknown"
                 try:
                     import psutil
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
+    # --- 6. GERAÇÃO ---
     def generate(
         self,
         prompt,
         guidance_scale=3.0,
         improve_texture=True,
         progress_callback=None,
+        # Sempre latent → VAE → MP4 (simples)
         external_decode=True,
     ):
         t_all = time.perf_counter()
                     latents = result
                 print(f"[DEBUG] Latentes (single-pass): shape={tuple(latents.shape)}")
+            # Staging e escrita MP4 (simples: VAE → pixels → MP4)
             temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
             results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
             output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
             final_output_path = None
+            print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
+            # Se desejar “desocupar” a GPU antes do decode, pode-se mover p/ CPU e limpar:
+            # latents_cpu = latents.detach().to("cpu", non_blocking=True); torch.cuda.empty_cache(); torch.cuda.ipc_collect(); latents = latents_cpu.to(self.device)
+            pixel_tensor = vae_manager_singleton.decode(latents.to(self.device, non_blocking=True))
+            log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
+            print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
+            video_encode_tool_singleton.save_video_from_tensor(
+                pixel_tensor,
+                output_video_path,
+                fps=call_kwargs["frame_rate"]
             )
             candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")