Test

Paused

App Files Files Community

Eueuiaa commited on Oct 8

Commit

39769b3

verified ·

1 Parent(s): a281339

Upload ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +96 -186

api/ltx_server.py CHANGED Viewed

@@ -8,15 +8,19 @@ warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", message=".*")
-from huggingface_hub import logging, hf_hub_download
 logging.set_verbosity_error()
 logging.set_verbosity_warning()
 logging.set_verbosity_info()
 logging.set_verbosity_debug()
 LTXV_DEBUG=1
 LTXV_FRAME_LOG_EVERY=8
 # --- 1. IMPORTAÇÕES ---
 import os, subprocess, shlex, tempfile
 import torch
@@ -104,6 +108,8 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
                 continue
     return results
 def calculate_new_dimensions(orig_w, orig_h, divisor=8):
     """
     Calcula novas dimensões mantendo a proporção, garantindo que ambos os
@@ -138,6 +144,7 @@ def calculate_new_dimensions(orig_w, orig_h, divisor=8):
     print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
     return final_h, final_w # Retorna (altura, largura)
 def handle_media_upload_for_dims(filepath, current_h, current_w):
     """
     Esta função agora usará o novo cálculo robusto.
@@ -211,6 +218,7 @@ def add_deps_to_path():
 add_deps_to_path()
 # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
 from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
 from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
 from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
@@ -233,6 +241,10 @@ def log_tensor_info(tensor, name="Tensor"):
             pass
     print("------------------------------------------\n")
 # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
 class VideoService:
     def __init__(self):
@@ -354,80 +366,6 @@ class VideoService:
             return yaml.safe_load(file)
     def _load_models(self):
-        """
-        Carrega os modelos de forma inteligente:
-        1. Tenta resolver o caminho do cache local (rápido, sem rede).
-        2. Se o arquivo não for encontrado localmente, baixa como fallback.
-        Garante que o serviço possa iniciar mesmo que o setup.py não tenha sido executado.
-        """
-        t0 = time.perf_counter()
-        LTX_REPO = "Lightricks/LTX-Video"
-        print("[DEBUG] Resolvendo caminhos dos modelos de forma inteligente...")
-        # --- Função Auxiliar para Carregamento Inteligente ---
-        def get_or_download_model(repo_id, filename, description):
-            try:
-                # hf_hub_download é a ferramenta certa aqui. Ela verifica o cache PRIMEIRO.
-                # Se o arquivo estiver no cache, retorna o caminho instantaneamente (após uma verificação rápida de metadados).
-                # Se não estiver no cache, ela o baixa.
-                print(f"[DEBUG] Verificando {description}: {filename}...")
-                model_path = hf_hub_download(
-                    repo_id=repo_id,
-                    filename=filename,
-                    # Forçar o uso de um cache específico se necessário
-                    cache_dir=os.getenv("HF_HOME_CACHE"),
-                    token=os.getenv("HF_TOKEN")
-                )
-                print(f"[DEBUG] Caminho do {description} resolvido com sucesso.")
-                return model_path
-            except Exception as e:
-                print("\n" + "="*80)
-                print(f"[ERRO CRÍTICO] Falha ao obter o modelo '{filename}'.")
-                print(f"Detalhe do erro: {e}")
-                print("Verifique sua conexão com a internet ou o estado do cache do Hugging Face.")
-                print("="*80 + "\n")
-                sys.exit(1)
-        # --- Checkpoint Principal ---
-        checkpoint_filename = self.config["checkpoint_path"]
-        distilled_model_path = get_or_download_model(
-            LTX_REPO, checkpoint_filename, "checkpoint principal"
-        )
-        self.config["checkpoint_path"] = distilled_model_path
-        # --- Upscaler Espacial ---
-        upscaler_filename = self.config["spatial_upscaler_model_path"]
-        spatial_upscaler_path = get_or_download_model(
-            LTX_REPO, upscaler_filename, "upscaler espacial"
-        )
-        self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
-        # --- Construção dos Pipelines ---
-        print("\n[DEBUG] Construindo pipeline a partir dos caminhos resolvidos...")
-        pipeline = create_ltx_video_pipeline(
-            ckpt_path=self.config["checkpoint_path"],
-            precision=self.config["precision"],
-            text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
-            sampler=self.config["sampler"],
-            device="cpu",
-            enhance_prompt=False,
-            prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
-            prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
-        )
-        print("[DEBUG] Pipeline pronto.")
-        latent_upsampler = None
-        if self.config.get("spatial_upscaler_model_path"):
-            print("[DEBUG] Construindo latent_upsampler...")
-            latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
-            print("[DEBUG] Upsampler pronto.")
-        print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
-        return pipeline, latent_upsampler
-    def _load_models_old(self):
         t0 = time.perf_counter()
         LTX_REPO = "Lightricks/LTX-Video"
         print("[DEBUG] Baixando checkpoint principal...")
@@ -497,6 +435,8 @@ class VideoService:
                 pass
         print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
     @torch.no_grad()
     def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
         """
@@ -517,6 +457,7 @@ class VideoService:
         return upsampled_latents
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
@@ -571,32 +512,28 @@ class VideoService:
         n_chunks = (sum_latent) // num_latente_por_chunk
         steps = sum_latent//n_chunks
-        print("==========PODA CAUSAL[start:stop-1]==========")
         print(f"[DEBUG] TOTAL LATENTES = {sum_latent}")
         print(f"[DEBUG] LATENTES min por chunk = {num_latente_por_chunk}")
         print(f"[DEBUG] Número de chunks = {n_chunks}")
         if n_chunks > 1:
             i=0
             while i < n_chunks:
-                if i>0:
-                    dow=0
-                else:
-                    dow=0
                 start = (num_latente_por_chunk*i)
-                end = (start+num_latente_por_chunk+(overlap))
                 if i+1 < n_chunks:
-                    chunk = latents_brutos[:, :, start-(dow):end, :, :].clone().detach()
-                    print(f"[DEBUG] chunk{i+1}[:, :, {start-dow}:{end}, :, :] = {chunk.shape[2]}")
                 else:
-                    chunk = latents_brutos[:, :, start-(dow):, :, :].clone().detach()
-                    print(f"[DEBUG] chunk{i+1}[:, :, {start-(dow)}:, :, :] = {chunk.shape[2]}")
                 chunks.append(chunk)
                 i+=1
         else:
             print(f"[DEBUG] numero chunks minimo ")
             print(f"[DEBUG] latents_brutos[:, :, :, :, :] = {latents_brutos.shape[2]}")
             chunks.append(latents_brutos)
-        print("\n\n================PODA CAUSAL=================")
         return chunks
     def _get_total_frames(self, video_path: str) -> int:
@@ -625,16 +562,10 @@ class VideoService:
         video_fade_ini = None
         nova_lista = []
-        if crossfade_frames == 0:
-            print("\n\n[DEBUG] CROSSFADE_FRAMES=0 Ship concatenation causal")
-            return video_paths
-        print("\n\n===========CONCATECAO CAUSAL=============")
         print(f"[DEBUG] Iniciando pipeline com {total_partes} vídeos e {poda} frames de crossfade")
         for i in range(total_partes):
             base = video_paths[i]
@@ -693,7 +624,7 @@ class VideoService:
             nova_lista.append(video_podado)
             print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
         print("===========CONCATECAO CAUSAL=============")
@@ -809,7 +740,8 @@ class VideoService:
             "enhance_prompt": False,
             "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
         }
         latents = None
         latents_list = []
         results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
@@ -821,9 +753,7 @@ class VideoService:
                 if improve_texture:
                     if not self.latent_upsampler:
                         raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
-                    first_pass_kwargs = call_kwargs.copy()
                     # --- ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---
                     print("\n--- INICIANDO ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---")
                     t_pass1 = time.perf_counter()
@@ -832,21 +762,23 @@ class VideoService:
                     downscale_factor = self.config.get("downscale_factor", 0.6666666)
                     vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
                     x_width = int(width_padded * downscale_factor)
                     downscaled_width = x_width - (x_width % vae_scale_factor)
                     x_height = int(height_padded * downscale_factor)
                     downscaled_height = x_height - (x_height % vae_scale_factor)
                     print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
-                    first_pass_kwargs.update({
-                        **first_pass_config
-                    })
                     first_pass_kwargs.update({
                         "output_type": "latent",
                         "width": downscaled_width,
                         "height": downscaled_height,
                         "guidance_scale": float(guidance_scale),
                     })
                     print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
@@ -872,44 +804,38 @@ class VideoService:
                     except Exception:
                          pass
-                    latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,40,0)
-                    print("\n\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
-                    cc = 1
                     for latents in latents_parts_up:
-                        t_pass2 = time.perf_counter()
-                        print("\n\n#########################################")
                         # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
-                        print(f"\n--- INICIANDO ETAPA 3/{cc} ")
-                        first_pass_kwargs = call_kwargs.copy()
                         second_pass_config = self.config.get("second_pass", {}).copy()
                         second_pass_width = downscaled_width * 2
                         second_pass_height = downscaled_height * 2
                         print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
                         num_latent_frames_part = latents.shape[2]
-                        log_tensor_info(latents, "Latentes input (Pre-Pós-Second Pass)")
                         vae_temporal_scale = self.pipeline.video_scale_factor # Geralmente 4 ou 8
                         num_pixel_frames_part = ((num_latent_frames_part - 1) * vae_temporal_scale) + 1
                         print(f"[DEBUG] Parte: {num_latent_frames_part - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
-                        second_pass_kwargs.update({
-                           **second_pass_config
-                        })
                         second_pass_kwargs.update({
                            "output_type": "latent",
                            "width": second_pass_width,
                            "height": second_pass_height,
-                           "num_frames": num_pixel_frames_part,
-                           "latents": latents, # O tensor upscaled
                            "guidance_scale": float(guidance_scale),
                         })
                         print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
@@ -918,10 +844,6 @@ class VideoService:
                         print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
                         latents_list.append(final_latents)
-                        cc+=1
-                        print("#########################################")
-                    print("\n\n--- FIM ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
                 else: # Geração de etapa única
                     print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
@@ -931,74 +853,62 @@ class VideoService:
                     single_pass_kwargs["guidance_scale"] = float(guidance_scale)
                     single_pass_kwargs["output_type"] = "latent"
-                    # Remove keys that might conflict or are not used in single pass / handled by above
-                    #single_pass_kwargs.pop("num_inference_steps", None)
-                    #single_pass_kwargs.pop("first_pass", None)
-                    #single_pass_kwargs.pop("second_pass", None)
-                    #single_pass_kwargs.pop("downscale_factor", None)
                     latents = self.pipeline(**single_pass_kwargs).images
                     log_tensor_info(latents, "Latentes Finais (Etapa Única)")
                     print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
                     latents_list.append(latents)
             # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
             print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
-            temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
-            results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
             partes_mp4 = []
             par = 0
-            for latents_vae in latents_list:
-                latents_cpu_vae = latents_vae.detach().to("cpu", non_blocking=True)
-                torch.cuda.empty_cache()
-                try:
-                    torch.cuda.ipc_collect()
-                except Exception:
-                    pass
-                latents_parts_vae = self._dividir_latentes_por_tamanho(latents_cpu_vae,4,1)
-                for latents in latents_parts_vae:
-                    print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
-                    par = par + 1
-                    output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
-                    final_output_path = None
-                    print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
-                    # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
-                    pixel_tensor = vae_manager_singleton.decode(
-                        latents.to(self.device, non_blocking=True),
-                        decode_timestep=float(self.config.get("decode_timestep", 0.05))
-                    )
-                    log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
-                    print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
-                    video_encode_tool_singleton.save_video_from_tensor(
-                        pixel_tensor,
-                        output_video_path,
-                        fps=call_kwargs["frame_rate"],
-                        progress_callback=progress_callback
-                    )
-                    candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
-                    try:
-                        shutil.move(output_video_path, candidate)
-                        final_output_path = candidate
-                        print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
-                        partes_mp4.append(final_output_path)
-                    except Exception as e:
-                        final_output_path = output_video_path
-                        print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
             total_partes = len(partes_mp4)
             if (total_partes>1):
                 final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")

 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", message=".*")
+from huggingface_hub import logging
 logging.set_verbosity_error()
 logging.set_verbosity_warning()
 logging.set_verbosity_info()
 logging.set_verbosity_debug()
 LTXV_DEBUG=1
 LTXV_FRAME_LOG_EVERY=8
 # --- 1. IMPORTAÇÕES ---
 import os, subprocess, shlex, tempfile
 import torch
                 continue
     return results
 def calculate_new_dimensions(orig_w, orig_h, divisor=8):
     """
     Calcula novas dimensões mantendo a proporção, garantindo que ambos os
     print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
     return final_h, final_w # Retorna (altura, largura)
 def handle_media_upload_for_dims(filepath, current_h, current_w):
     """
     Esta função agora usará o novo cálculo robusto.
 add_deps_to_path()
 # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
 from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
 from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
 from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
             pass
     print("------------------------------------------\n")
 # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
 class VideoService:
     def __init__(self):
             return yaml.safe_load(file)
     def _load_models(self):
         t0 = time.perf_counter()
         LTX_REPO = "Lightricks/LTX-Video"
         print("[DEBUG] Baixando checkpoint principal...")
                 pass
         print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
     @torch.no_grad()
     def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
         """
         return upsampled_latents
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
         n_chunks = (sum_latent) // num_latente_por_chunk
         steps = sum_latent//n_chunks
+        print("================PODA CAUSAL=================")
         print(f"[DEBUG] TOTAL LATENTES = {sum_latent}")
         print(f"[DEBUG] LATENTES min por chunk = {num_latente_por_chunk}")
         print(f"[DEBUG] Número de chunks = {n_chunks}")
         if n_chunks > 1:
             i=0
             while i < n_chunks:
                 start = (num_latente_por_chunk*i)
+                end = (start+num_latente_por_chunk+overlap)
                 if i+1 < n_chunks:
+                    chunk = latents_brutos[:, :, start:end, :, :].clone().detach()
+                    print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
                 else:
+                    chunk = latents_brutos[:, :, start:, :, :].clone().detach()
+                    print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
                 chunks.append(chunk)
                 i+=1
         else:
             print(f"[DEBUG] numero chunks minimo ")
             print(f"[DEBUG] latents_brutos[:, :, :, :, :] = {latents_brutos.shape[2]}")
             chunks.append(latents_brutos)
+        print("================PODA CAUSAL=================")
         return chunks
     def _get_total_frames(self, video_path: str) -> int:
         video_fade_ini = None
         nova_lista = []
+        print("===========CONCATECAO CAUSAL=============")
         print(f"[DEBUG] Iniciando pipeline com {total_partes} vídeos e {poda} frames de crossfade")
         for i in range(total_partes):
             base = video_paths[i]
             nova_lista.append(video_podado)
             print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
         print("===========CONCATECAO CAUSAL=============")
             "enhance_prompt": False,
             "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
         }
+        print(f"[DEBUG] output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
         latents = None
         latents_list = []
         results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
                 if improve_texture:
                     if not self.latent_upsampler:
                         raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
                     # --- ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---
                     print("\n--- INICIANDO ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---")
                     t_pass1 = time.perf_counter()
                     downscale_factor = self.config.get("downscale_factor", 0.6666666)
                     vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
+                    # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA> ---
+                    # Replica a fórmula da LTXMultiScalePipeline
                     x_width = int(width_padded * downscale_factor)
                     downscaled_width = x_width - (x_width % vae_scale_factor)
                     x_height = int(height_padded * downscale_factor)
                     downscaled_height = x_height - (x_height % vae_scale_factor)
                     print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
+                    # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
+                    first_pass_kwargs = call_kwargs.copy()
                     first_pass_kwargs.update({
                         "output_type": "latent",
                         "width": downscaled_width,
                         "height": downscaled_height,
                         "guidance_scale": float(guidance_scale),
+                        **first_pass_config
                     })
                     print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
                     except Exception:
                          pass
+                    latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,15,1)
                     for latents in latents_parts_up:
                         # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
+                        print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
                         second_pass_config = self.config.get("second_pass", {}).copy()
+                        # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA PARA SECOND PASS> ---
+                        # Usa as dimensões da primeira passagem dobradas, como na pipeline original
                         second_pass_width = downscaled_width * 2
                         second_pass_height = downscaled_height * 2
                         print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
+                        # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
+                        t_pass2 = time.perf_counter()
                         num_latent_frames_part = latents.shape[2]
                         vae_temporal_scale = self.pipeline.video_scale_factor # Geralmente 4 ou 8
                         num_pixel_frames_part = ((num_latent_frames_part - 1) * vae_temporal_scale) + 1
                         print(f"[DEBUG] Parte: {num_latent_frames_part - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
+                        second_pass_kwargs = call_kwargs.copy()
                         second_pass_kwargs.update({
                            "output_type": "latent",
                            "width": second_pass_width,
                            "height": second_pass_height,
+                           #"num_frames": num_pixel_frames_part,
+                           "latents": upsampled_latents, # O tensor upscaled
                            "guidance_scale": float(guidance_scale),
+                           **second_pass_config
                         })
                         print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
                         print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
                         latents_list.append(final_latents)
                 else: # Geração de etapa única
                     print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
                     single_pass_kwargs["guidance_scale"] = float(guidance_scale)
                     single_pass_kwargs["output_type"] = "latent"
                     latents = self.pipeline(**single_pass_kwargs).images
                     log_tensor_info(latents, "Latentes Finais (Etapa Única)")
                     print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
                     latents_list.append(latents)
             # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
             print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
+            #latents_cpu = latents.detach().to("cpu", non_blocking=True)
+            #torch.cuda.empty_cache()
+            #try:
+            #    torch.cuda.ipc_collect()
+            #except Exception:
+            #    pass
+            latents_parts = []
+            for latents in latents_list:
+                latents_parts.append(self._dividir_latentes_por_tamanho(latents,15,1))
             partes_mp4 = []
             par = 0
+            for latents in latents_parts:
+                par = par + 1
+                output_video_path = os.path.join(results_dir, f"output_{used_seed}_{par}.mp4")
+                final_output_path = None
+                print("[DEBUG] Decodificando bloco de latentes com VAE {par} → tensor de pixels...")
+                # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
+                pixel_tensor = vae_manager_singleton.decode(
+                    latents.to(self.device, non_blocking=True),
+                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
+                )
+                log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
+                print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
+                video_encode_tool_singleton.save_video_from_tensor(
+                    pixel_tensor,
+                    output_video_path,
+                    fps=call_kwargs["frame_rate"],
+                    progress_callback=progress_callback
+                )
+                candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
+                try:
+                    shutil.move(output_video_path, candidate)
+                    final_output_path = candidate
+                    print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
+                    partes_mp4.append(final_output_path)
+                except Exception as e:
+                    final_output_path = output_video_path
+                    print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
             total_partes = len(partes_mp4)
             if (total_partes>1):
                 final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")