Test

Paused

App Files Files Community

EuuIia commited on Oct 3

Commit

33de423

verified ·

1 Parent(s): c14605c

Update video_service.py

Browse files

Files changed (1) hide show

video_service.py +80 -10

video_service.py CHANGED Viewed

@@ -16,9 +16,9 @@ import sys
 import subprocess
 import gc
 import shutil
 # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
 def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
     try:
         import psutil
@@ -156,11 +156,16 @@ class VideoService:
         self._tmp_dirs = set()
         self._tmp_files = set()
         self._last_outputs = []
         self.pipeline, self.latent_upsampler = self._load_models()
         print(f"Movendo modelos para o dispositivo de inferência: {self.device}")
         self.pipeline.to(self.device)
         if self.latent_upsampler:
             self.latent_upsampler.to(self.device)
         if self.device == "cuda":
             torch.cuda.empty_cache()
             self._log_gpu_memory("Após carregar modelos")
@@ -212,6 +217,7 @@ class VideoService:
         keep = set(keep_paths or [])
         extras = set(extra_paths or [])
         for f in list(self._tmp_files | extras):
             try:
                 if f not in keep and os.path.isfile(f):
@@ -221,6 +227,7 @@ class VideoService:
             finally:
                 self._tmp_files.discard(f)
         for d in list(self._tmp_dirs):
             try:
                 if d not in keep and os.path.isdir(d):
@@ -230,6 +237,7 @@ class VideoService:
             finally:
                 self._tmp_dirs.discard(d)
         gc.collect()
         try:
             if clear_gpu and torch.cuda.is_available():
@@ -241,19 +249,33 @@ class VideoService:
         except Exception:
             pass
         try:
             self._log_gpu_memory("Após finalize")
         except Exception:
             pass
     def _load_config(self):
-        config_file_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
         with open(config_file_path, "r") as file:
             return yaml.safe_load(file)
     def _load_models(self):
         LTX_REPO = "Lightricks/LTX-Video"
         distilled_model_path = hf_hub_download(
             repo_id=LTX_REPO,
             filename=self.config["checkpoint_path"],
@@ -289,9 +311,47 @@ class VideoService:
         return pipeline, latent_upsampler
     def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
         tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = torch.nn.functional.pad(tensor, padding_values)
         return tensor.to(self.device)
     def generate(
@@ -407,7 +467,12 @@ class VideoService:
                     "second_pass": second_pass_args,
                 }
             )
-            result_tensor = multi_scale_pipeline(**multi_scale_call_kwargs).images
             log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
         else:
             single_pass_kwargs = call_kwargs.copy()
@@ -424,10 +489,14 @@ class VideoService:
                 single_pass_kwargs["timesteps"] = [0.7]
                 print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
             else:
-                single_pass_kwargs["timesteps"] = first_pass_config.get("timesteps")
             print("\n[INFO] Executando pipeline de etapa única...")
-            result_tensor = self.pipeline(**single_pass_kwargs).images
         pad_left, pad_right, pad_top, pad_bottom = padding_values
         slice_h_end = -pad_bottom if pad_bottom > 0 else None
@@ -437,17 +506,16 @@ class VideoService:
         video_np = (result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)
         temp_dir = tempfile.mkdtemp(prefix="ltxv_")
         self._register_tmp_dir(temp_dir)
-        results_dir = "/app/output"
         os.makedirs(results_dir, exist_ok=True)
         final_output_path = None
         output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
         try:
-            with imageio.get_writer(
-                output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8
-            ) as writer:
                 total_frames = len(video_np)
                 for i, frame in enumerate(video_np):
                     writer.append_data(frame)
@@ -465,6 +533,7 @@ class VideoService:
             self._log_gpu_memory("Fim da Geração")
             return final_output_path, used_seed
         finally:
             try:
                 del result_tensor
             except Exception:
@@ -489,6 +558,7 @@ class VideoService:
             except Exception:
                 pass
             try:
                 self.finalize(keep_paths=[final_output_path] if final_output_path else [])
             except Exception:

 import subprocess
 import gc
 import shutil
+import contextlib
 # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
 def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
     try:
         import psutil
         self._tmp_dirs = set()
         self._tmp_files = set()
         self._last_outputs = []
         self.pipeline, self.latent_upsampler = self._load_models()
         print(f"Movendo modelos para o dispositivo de inferência: {self.device}")
         self.pipeline.to(self.device)
         if self.latent_upsampler:
             self.latent_upsampler.to(self.device)
+        # Política de precisão (inclui promoção FP8->BF16 e dtype de autocast)
+        self._apply_precision_policy()
         if self.device == "cuda":
             torch.cuda.empty_cache()
             self._log_gpu_memory("Após carregar modelos")
         keep = set(keep_paths or [])
         extras = set(extra_paths or [])
+        # Remoção de arquivos
         for f in list(self._tmp_files | extras):
             try:
                 if f not in keep and os.path.isfile(f):
             finally:
                 self._tmp_files.discard(f)
+        # Remoção de diretórios
         for d in list(self._tmp_dirs):
             try:
                 if d not in keep and os.path.isdir(d):
             finally:
                 self._tmp_dirs.discard(d)
+        # Coleta de GC e limpeza de VRAM
         gc.collect()
         try:
             if clear_gpu and torch.cuda.is_available():
         except Exception:
             pass
+        # Log opcional pós-limpeza
         try:
             self._log_gpu_memory("Após finalize")
         except Exception:
             pass
     def _load_config(self):
+        # Prioriza configs FP8 se presentes, mantendo compatibilidade
+        base = LTX_VIDEO_REPO_DIR / "configs"
+        candidates = [
+            base / "ltxv-13b-0.9.8-dev-fp8.yaml",
+            base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
+            base / "ltxv-13b-0.9.8-dev-fp8.yaml.txt",
+            base / "ltxv-13b-0.9.8-distilled.yaml",  # fallback não-FP8
+        ]
+        for cfg in candidates:
+            if cfg.exists():
+                with open(cfg, "r") as file:
+                    return yaml.safe_load(file)
+        # Fallback rígido para caminho clássico se nada acima existir
+        config_file_path = base / "ltxv-13b-0.9.8-distilled.yaml"
         with open(config_file_path, "r") as file:
             return yaml.safe_load(file)
     def _load_models(self):
         LTX_REPO = "Lightricks/LTX-Video"
         distilled_model_path = hf_hub_download(
             repo_id=LTX_REPO,
             filename=self.config["checkpoint_path"],
         return pipeline, latent_upsampler
+    # Precisão: promove FP8->BF16 e define dtype de autocast
+    def _promote_fp8_weights_to_bf16(self, module):
+        f8 = getattr(torch, "float8_e4m3fn", None)
+        if f8 is None:
+            return
+        for _, p in module.named_parameters(recurse=True):
+            try:
+                if p.dtype == f8:
+                    with torch.no_grad():
+                        p.data = p.data.to(torch.bfloat16)
+            except Exception:
+                pass
+        for _, b in module.named_buffers(recurse=True):
+            try:
+                if hasattr(b, "dtype") and b.dtype == f8:
+                    b.data = b.data.to(torch.bfloat16)
+            except Exception:
+                pass
+    def _apply_precision_policy(self):
+        prec = str(self.config.get("precision", "")).lower()
+        self.runtime_autocast_dtype = torch.float32
+        if prec == "float8_e4m3fn":
+            # FP8 experimental: promove pesos para BF16 e padroniza autocast em BF16
+            if hasattr(torch, "float8_e4m3fn"):
+                self._promote_fp8_weights_to_bf16(self.pipeline)
+                if self.latent_upsampler:
+                    self._promote_fp8_weights_to_bf16(self.latent_upsampler)
+            self.runtime_autocast_dtype = torch.bfloat16
+        elif prec == "bfloat16":
+            self.runtime_autocast_dtype = torch.bfloat16
+        elif prec == "mixed_precision":
+            self.runtime_autocast_dtype = torch.float16
+        else:
+            self.runtime_autocast_dtype = torch.float32
     def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
         tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = torch.nn.functional.pad(tensor, padding_values)
+        if self.device == "cuda":
+            return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
         return tensor.to(self.device)
     def generate(
                     "second_pass": second_pass_args,
                 }
             )
+            ctx = contextlib.nullcontext()
+            if self.device == "cuda":
+                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
+            with ctx:
+                result_tensor = multi_scale_pipeline(**multi_scale_call_kwargs).images
             log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
         else:
             single_pass_kwargs = call_kwargs.copy()
                 single_pass_kwargs["timesteps"] = [0.7]
                 print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
             else:
+                single_pass_kwargs["timesteps"] = first_pass_config.get("guidance_timesteps") or first_pass_config.get("timesteps")
             print("\n[INFO] Executando pipeline de etapa única...")
+            ctx = contextlib.nullcontext()
+            if self.device == "cuda":
+                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
+            with ctx:
+                result_tensor = self.pipeline(**single_pass_kwargs).images
         pad_left, pad_right, pad_top, pad_bottom = padding_values
         slice_h_end = -pad_bottom if pad_bottom > 0 else None
         video_np = (result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)
+        # Staging seguro em tmp e move para diretório persistente
         temp_dir = tempfile.mkdtemp(prefix="ltxv_")
         self._register_tmp_dir(temp_dir)
+        results_dir = "/data/results"
         os.makedirs(results_dir, exist_ok=True)
         final_output_path = None
         output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
         try:
+            with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8) as writer:
                 total_frames = len(video_np)
                 for i, frame in enumerate(video_np):
                     writer.append_data(frame)
             self._log_gpu_memory("Fim da Geração")
             return final_output_path, used_seed
         finally:
+            # Libera tensores/objetos grandes antes de limpar VRAM
             try:
                 del result_tensor
             except Exception:
             except Exception:
                 pass
+            # Limpeza de temporários preservando o vídeo final
             try:
                 self.finalize(keep_paths=[final_output_path] if final_output_path else [])
             except Exception: