Test

Paused

App Files Files Community

EuuIia commited on Oct 3

Commit

bda9780

verified ·

1 Parent(s): 4747f61

Rename video_service.py to api/ltx_server.py

Browse files

Files changed (1) hide show

video_service.py → api/ltx_server.py +31 -47

video_service.py → api/ltx_server.py RENAMED Viewed

@@ -63,9 +63,9 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
         parts = [p.strip() for p in line.split(",")]
         if len(parts) >= 3:
             try:
-                pid = int(parts[0])
-                name = parts[1]
-                used_mb = int(parts[2])
                 user = "unknown"
                 try:
                     import psutil
@@ -163,7 +163,7 @@ class VideoService:
         if self.latent_upsampler:
             self.latent_upsampler.to(self.device)
-        # Política de precisão (inclui promoção FP8->BF16 e dtype de autocast)
         self._apply_precision_policy()
         if self.device == "cuda":
@@ -171,7 +171,6 @@ class VideoService:
             self._log_gpu_memory("Após carregar modelos")
         print("VideoService pronto para uso.")
-    # Método de log de GPU como parte da classe
     def _log_gpu_memory(self, stage_name: str):
         if self.device != "cuda":
             return
@@ -209,15 +208,9 @@ class VideoService:
             pass
     def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
-        """
-        Remove temporários e coleta memória.
-        keep_paths: caminhos que não devem ser removidos (ex.: vídeo final).
-        extra_paths: caminhos adicionais para tentar remover (opcional).
-        """
         keep = set(keep_paths or [])
         extras = set(extra_paths or [])
-        # Remoção de arquivos
         for f in list(self._tmp_files | extras):
             try:
                 if f not in keep and os.path.isfile(f):
@@ -227,7 +220,6 @@ class VideoService:
             finally:
                 self._tmp_files.discard(f)
-        # Remoção de diretórios
         for d in list(self._tmp_dirs):
             try:
                 if d not in keep and os.path.isdir(d):
@@ -237,7 +229,6 @@ class VideoService:
             finally:
                 self._tmp_dirs.discard(d)
-        # Coleta de GC e limpeza de VRAM
         gc.collect()
         try:
             if clear_gpu and torch.cuda.is_available():
@@ -249,26 +240,23 @@ class VideoService:
         except Exception:
             pass
-        # Log opcional pós-limpeza
         try:
             self._log_gpu_memory("Após finalize")
         except Exception:
             pass
     def _load_config(self):
-        # Prioriza configs FP8 se presentes, mantendo compatibilidade
         base = LTX_VIDEO_REPO_DIR / "configs"
         candidates = [
             base / "ltxv-13b-0.9.8-dev-fp8.yaml",
             base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
             base / "ltxv-13b-0.9.8-dev-fp8.yaml.txt",
-            base / "ltxv-13b-0.9.8-distilled.yaml",  # fallback não-FP8
         ]
         for cfg in candidates:
             if cfg.exists():
                 with open(cfg, "r") as file:
                     return yaml.safe_load(file)
-        # Fallback rígido para caminho clássico se nada acima existir
         config_file_path = base / "ltxv-13b-0.9.8-distilled.yaml"
         with open(config_file_path, "r") as file:
             return yaml.safe_load(file)
@@ -311,9 +299,7 @@ class VideoService:
         return pipeline, latent_upsampler
-    # Precisão: promove FP8->BF16 e define dtype de autocast (versão segura)
     def _promote_fp8_weights_to_bf16(self, module):
-        # Só promova se for realmente um nn.Module; Pipelines não são nn.Module
         if not isinstance(module, torch.nn.Module):
             return
         f8 = getattr(torch, "float8_e4m3fn", None)
@@ -332,16 +318,14 @@ class VideoService:
                     b.data = b.data.to(torch.bfloat16)
             except Exception:
                 pass
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
         if prec == "float8_e4m3fn":
-            # FP8: kernels nativos da LTX podem estar ativos; por padrão, não promover pesos
             self.runtime_autocast_dtype = torch.bfloat16
             force_promote = os.getenv("LTXV_FORCE_BF16_ON_FP8", "0") == "1"
             if force_promote and hasattr(torch, "float8_e4m3fn"):
-                # Promove apenas módulos reais; ignora objetos Pipeline
                 try:
                     self._promote_fp8_weights_to_bf16(self.pipeline)
                 except Exception:
@@ -357,7 +341,7 @@ class VideoService:
             self.runtime_autocast_dtype = torch.float16
         else:
             self.runtime_autocast_dtype = torch.float32
     def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
         tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = torch.nn.functional.pad(tensor, padding_values)
@@ -458,7 +442,6 @@ class VideoService:
             ).to(self.device)
         result_tensor = None
-        video_np = None
         multi_scale_pipeline = None
         if improve_texture:
@@ -496,32 +479,31 @@ class VideoService:
                     "skip_block_list": first_pass_config.get("skip_block_list"),
                 }
             )
-            # EVITAR guidance_timesteps no single-pass para não acionar guidance_mapping na lib
-            # Preferir 'timesteps' se existir; caso contrário, deixar sem e usar defaults do pipeline.
-            config_timesteps = first_pass_config.get("timesteps")
             if mode == "video-to-video":
-                single_pass_kwargs["timesteps"] = [0.7]
                 print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
-            elif isinstance(config_timesteps, (list, tuple)) and len(config_timesteps) > 0:
-                single_pass_kwargs["timesteps"] = config_timesteps
-            # IMPORTANTE: não usar first_pass_config.get("guidance_timesteps") aqui
             print("\n[INFO] Executando pipeline de etapa única...")
             ctx = contextlib.nullcontext()
             if self.device == "cuda":
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
             with ctx:
                 result_tensor = self.pipeline(**single_pass_kwargs).images
         pad_left, pad_right, pad_top, pad_bottom = padding_values
         slice_h_end = -pad_bottom if pad_bottom > 0 else None
         slice_w_end = -pad_right if pad_right > 0 else None
         result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
         log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")
-        video_np = (result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)
         # Staging seguro em tmp e move para diretório persistente
         temp_dir = tempfile.mkdtemp(prefix="ltxv_")
         self._register_tmp_dir(temp_dir)
@@ -531,12 +513,20 @@ class VideoService:
         final_output_path = None
         output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
         try:
             with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8) as writer:
-                total_frames = len(video_np)
-                for i, frame in enumerate(video_np):
-                    writer.append_data(frame)
                     if progress_callback:
-                        progress_callback(i + 1, total_frames)
             candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
             try:
@@ -549,15 +539,10 @@ class VideoService:
             self._log_gpu_memory("Fim da Geração")
             return final_output_path, used_seed
         finally:
-            # Libera tensores/objetos grandes antes de limpar VRAM
             try:
                 del result_tensor
             except Exception:
                 pass
-            try:
-                del video_np
-            except Exception:
-                pass
             try:
                 del multi_scale_pipeline
             except Exception:
@@ -574,11 +559,10 @@ class VideoService:
             except Exception:
                 pass
-            # Limpeza de temporários preservando o vídeo final
             try:
                 self.finalize(keep_paths=[final_output_path] if final_output_path else [])
             except Exception:
                 pass
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
-video_generation_service = VideoService()

         parts = [p.strip() for p in line.split(",")]
         if len(parts) >= 3:
             try:
+                pid = int(parts[^18_0])
+                name = parts[^18_1]
+                used_mb = int(parts[^18_2])
                 user = "unknown"
                 try:
                     import psutil
         if self.latent_upsampler:
             self.latent_upsampler.to(self.device)
+        # Política de precisão (FP8 opcional + autocast coerente)
         self._apply_precision_policy()
         if self.device == "cuda":
             self._log_gpu_memory("Após carregar modelos")
         print("VideoService pronto para uso.")
     def _log_gpu_memory(self, stage_name: str):
         if self.device != "cuda":
             return
             pass
     def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
         keep = set(keep_paths or [])
         extras = set(extra_paths or [])
         for f in list(self._tmp_files | extras):
             try:
                 if f not in keep and os.path.isfile(f):
             finally:
                 self._tmp_files.discard(f)
         for d in list(self._tmp_dirs):
             try:
                 if d not in keep and os.path.isdir(d):
             finally:
                 self._tmp_dirs.discard(d)
         gc.collect()
         try:
             if clear_gpu and torch.cuda.is_available():
         except Exception:
             pass
         try:
             self._log_gpu_memory("Após finalize")
         except Exception:
             pass
     def _load_config(self):
         base = LTX_VIDEO_REPO_DIR / "configs"
         candidates = [
             base / "ltxv-13b-0.9.8-dev-fp8.yaml",
             base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
             base / "ltxv-13b-0.9.8-dev-fp8.yaml.txt",
+            base / "ltxv-13b-0.9.8-distilled.yaml",
         ]
         for cfg in candidates:
             if cfg.exists():
                 with open(cfg, "r") as file:
                     return yaml.safe_load(file)
         config_file_path = base / "ltxv-13b-0.9.8-distilled.yaml"
         with open(config_file_path, "r") as file:
             return yaml.safe_load(file)
         return pipeline, latent_upsampler
     def _promote_fp8_weights_to_bf16(self, module):
         if not isinstance(module, torch.nn.Module):
             return
         f8 = getattr(torch, "float8_e4m3fn", None)
                     b.data = b.data.to(torch.bfloat16)
             except Exception:
                 pass
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
         if prec == "float8_e4m3fn":
             self.runtime_autocast_dtype = torch.bfloat16
             force_promote = os.getenv("LTXV_FORCE_BF16_ON_FP8", "0") == "1"
             if force_promote and hasattr(torch, "float8_e4m3fn"):
                 try:
                     self._promote_fp8_weights_to_bf16(self.pipeline)
                 except Exception:
             self.runtime_autocast_dtype = torch.float16
         else:
             self.runtime_autocast_dtype = torch.float32
     def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
         tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = torch.nn.functional.pad(tensor, padding_values)
             ).to(self.device)
         result_tensor = None
         multi_scale_pipeline = None
         if improve_texture:
                     "skip_block_list": first_pass_config.get("skip_block_list"),
                 }
             )
+            # Escolha de schedule única para garantir guidance_mapping definido e consistente
+            schedule = first_pass_config.get("timesteps")
+            if schedule is None:
+                schedule = first_pass_config.get("guidance_timesteps")
             if mode == "video-to-video":
+                schedule = [0.7]
                 print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
+            if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
+                single_pass_kwargs["timesteps"] = schedule
+                single_pass_kwargs["guidance_timesteps"] = schedule  # garante criação de guidance_mapping
             print("\n[INFO] Executando pipeline de etapa única...")
             ctx = contextlib.nullcontext()
             if self.device == "cuda":
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
             with ctx:
                 result_tensor = self.pipeline(**single_pass_kwargs).images
         pad_left, pad_right, pad_top, pad_bottom = padding_values
         slice_h_end = -pad_bottom if pad_bottom > 0 else None
         slice_w_end = -pad_right if pad_right > 0 else None
         result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
         log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")
         # Staging seguro em tmp e move para diretório persistente
         temp_dir = tempfile.mkdtemp(prefix="ltxv_")
         self._register_tmp_dir(temp_dir)
         final_output_path = None
         output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
         try:
+            # Escrita quadro a quadro para evitar array 4D gigante em RAM
             with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8) as writer:
+                T = result_tensor.shape[^18_2]  # (B, C, T, H, W)
+                for i in range(T):
+                    frame_chw = result_tensor[0, :, i]              # (C,H,W) no device
+                    frame_hwc_u8 = (frame_chw.permute(1, 2, 0)      # (H,W,C)
+                                    .clamp(0, 1)
+                                    .mul(255)
+                                    .to(torch.uint8)
+                                    .cpu()
+                                    .numpy())
+                    writer.append_data(frame_hwc_u8)
                     if progress_callback:
+                        progress_callback(i + 1, T)
             candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
             try:
             self._log_gpu_memory("Fim da Geração")
             return final_output_path, used_seed
         finally:
             try:
                 del result_tensor
             except Exception:
                 pass
             try:
                 del multi_scale_pipeline
             except Exception:
             except Exception:
                 pass
             try:
                 self.finalize(keep_paths=[final_output_path] if final_output_path else [])
             except Exception:
                 pass
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
+video_generation_service = VideoService()