Test

Paused

App Files Files Community

Eueuiaa commited on Oct 10

Commit

2134e1a

verified ·

1 Parent(s): f9ef94f

Update api/ltx_server_refactored.py

Browse files

Files changed (1) hide show

api/ltx_server_refactored.py +248 -69

api/ltx_server_refactored.py CHANGED Viewed

@@ -169,55 +169,11 @@ class VideoService:
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
-        print(f"[DEBUG] Aplicando política de precisão: {prec}")
-        if prec == "float8_e4m3fn":
-            self.runtime_autocast_dtype = torch.bfloat16
-            force_promote = True #os.getenv("LTXV_FORCE_BF16_ON_FP8", "0") == "1"
-            print(f"[DEBUG] FP8 detectado. force_promote={force_promote}")
-            if force_promote: # and hasattr(torch, "float8_e4m3fn"):
-                try:
-                    self._promote_fp8_weights_to_bf16(self.pipeline)
-                except Exception as e:
-                    print(f"[DEBUG] Promoção FP8→BF16 na pipeline falhou: {e}")
-                try:
-                    if self.latent_upsampler:
-                        self._promote_fp8_weights_to_bf16(self.latent_upsampler)
-                except Exception as e:
-                    print(f"[DEBUG] Promoção FP8→BF16 no upsampler falhou: {e}")
-        elif prec == "bfloat16":
             self.runtime_autocast_dtype = torch.bfloat16
         elif prec == "mixed_precision":
             self.runtime_autocast_dtype = torch.float16
-        else:
-            self.runtime_autocast_dtype = torch.float32
-    def _promote_fp8_weights_to_bf16(self, module):
-        if not isinstance(module, torch.nn.Module):
-            print("[DEBUG] Promoção FP8→BF16 ignorada: alvo não é nn.Module.")
-            return
-        f8 = getattr(torch, "float8_e4m3fn", None)
-        if f8 is None:
-            print("[DEBUG] torch.float8_e4m3fn indisponível.")
-            return
-        p_cnt = b_cnt = 0
-        for _, p in module.named_parameters(recurse=True):
-            try:
-                if p.dtype == f8:
-                    with torch.no_grad():
-                        p.data = p.data.to(torch.bfloat16); p_cnt += 1
-            except Exception:
-                pass
-        for _, b in module.named_buffers(recurse=True):
-            try:
-                if hasattr(b, "dtype") and b.dtype == f8:
-                    b.data = b.data.to(torch.bfloat16); b_cnt += 1
-            except Exception:
-                pass
-        print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
     def _register_tmp_dir(self, d: str):
         if d and os.path.isdir(d):
             self._tmp_dirs.add(d); print(f"[DEBUG] Registrado tmp dir: {d}")
@@ -253,11 +209,11 @@ class VideoService:
         print(f"[DEBUG] Vídeo salvo em: {final_path}")
         return final_path
-    def prepare_condition_items(
-        self, items_list: List, height: int,
-        width: int, num_frames: int,
-    ):
         if not items_list: return []
         height_padded = ((height - 1) // 8 + 1) * 8
         width_padded = ((width - 1) // 8 + 1) * 8
@@ -269,18 +225,7 @@ class VideoService:
             conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
         return conditioning_items
-    # ==============================================================================
-    # --- FUNÇÕES MODULARES COM A LÓGICA DE CHUNKING SIMPLIFICADA ---
-    # ==============================================================================
-    def generate_low(
-        self, prompt, negative_prompt,
-        height, width, duration, seed,
-        conditioning_items=None,
-        conditions_itens=None,
-        ltx_configs_override: dict = None,
-    ):
-        guidance_scale=4
         used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
         seed_everething(used_seed)
         FPS = 24.0
@@ -299,7 +244,7 @@ class VideoService:
             "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
             "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": torch.Generator(device=self.device).manual_seed(used_seed),
             "output_type": "latent", "conditioning_items": conditioning_items,
-            "guidance_scale": float(guidance_scale),
             **(self.config.get("first_pass", {}))
         }
         try:
@@ -311,18 +256,252 @@ class VideoService:
                 tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
                 torch.save(latents_cpu, tensor_path)
             return video_path, tensor_path, used_seed
         except Exception as e:
-            pass
         finally:
             torch.cuda.empty_cache()
             torch.cuda.ipc_collect()
             self.finalize(keep_paths=[])
     # ==============================================================================
     # --- FUNÇÃO #4: ORQUESTRADOR  (Upscaler + texturas hd) ---
     # ==============================================================================
     def generate_upscale_denoise(
-        self, latents_path, prompt, negative_prompt, seed
     ):
             used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
             seed_everething(used_seed)
@@ -417,9 +596,6 @@ class VideoService:
         # 4. Configurar o resto dos componentes com o dispositivo correto
         self._apply_precision_policy()
-        print(f"[DEBUG] runtime_autocast_dtype = {getattr(self, 'runtime_autocast_dtype', None)}")
         vae_manager_singleton.attach_pipeline(
             self.pipeline,
             device=self.device, # Agora `self.device` está correto
@@ -428,6 +604,7 @@ class VideoService:
         self._tmp_dirs = set()
         print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
     def move_to_device(self, device):
         """Move os modelos do pipeline para o dispositivo especificado."""
         print(f"[LTX] Movendo modelos para {device}...")
@@ -443,6 +620,8 @@ class VideoService:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
 print("Criando instância do VideoService...")
 video_generation_service = VideoService()
 print("Instância do VideoService pronta.")

     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
+        if prec in ["float8_e4m3fn", "bfloat16"]:
             self.runtime_autocast_dtype = torch.bfloat16
         elif prec == "mixed_precision":
             self.runtime_autocast_dtype = torch.float16
     def _register_tmp_dir(self, d: str):
         if d and os.path.isdir(d):
             self._tmp_dirs.add(d); print(f"[DEBUG] Registrado tmp dir: {d}")
         print(f"[DEBUG] Vídeo salvo em: {final_path}")
         return final_path
+    # ==============================================================================
+    # --- FUNÇÕES MODULARES COM A LÓGICA DE CHUNKING SIMPLIFICADA ---
+    # ==============================================================================
+    def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int):
         if not items_list: return []
         height_padded = ((height - 1) // 8 + 1) * 8
         width_padded = ((width - 1) // 8 + 1) * 8
             conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
         return conditioning_items
+    def generate_low(self, prompt, negative_prompt, height, width, duration, guidance_scale, seed, conditioning_items=None):
         used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
         seed_everething(used_seed)
         FPS = 24.0
             "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
             "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": torch.Generator(device=self.device).manual_seed(used_seed),
             "output_type": "latent", "conditioning_items": conditioning_items,
+            #"guidance_scale": float(guidance_scale),
             **(self.config.get("first_pass", {}))
         }
         try:
                 tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
                 torch.save(latents_cpu, tensor_path)
             return video_path, tensor_path, used_seed
         except Exception as e:
+            print(f"[DEBUG] falhou: {e}")
         finally:
             torch.cuda.empty_cache()
             torch.cuda.ipc_collect()
             self.finalize(keep_paths=[])
+    # ==============================================================================
+    # --- FUNÇÃO #1: GERADOR DE CHUNK ÚNICO (AUXILIAR INTERNA) ---
+    # ==============================================================================
+    def _generate_single_chunk_low(
+        self, prompt, negative_prompt,
+        height, width, num_frames, guidance_scale,
+        seed, initial_latent_condition=None, image_conditions=None,
+        ltx_configs_override=None):
+        """
+        [NÓ DE GERAÇÃO]
+        Gera um ÚNICO chunk de latentes brutos. Esta é a unidade de trabalho fundamental.
+        """
+        print("\n" + "-"*20 + " INÍCIO: _generate_single_chunk_low " + "-"*20)
+        # --- NÓ 1.1: SETUP DE PARÂMETROS ---
+        height_padded = ((height - 1) // 8 + 1) * 8
+        width_padded = ((width - 1) // 8 + 1) * 8
+        generator = torch.Generator(device=self.device).manual_seed(seed)
+        downscale_factor = self.config.get("downscale_factor", 0.6666666)
+        vae_scale_factor = self.pipeline.vae_scale_factor
+        x_width = int(width_padded * downscale_factor)
+        downscaled_width = x_width - (x_width % vae_scale_factor)
+        x_height = int(height_padded * downscale_factor)
+        downscaled_height = x_height - (x_height % vae_scale_factor)
+        # --- NÓ 1.2: MONTAGEM DE CONDIÇÕES E OVERRIDES ---
+        all_conditions = []
+        if image_conditions: all_conditions.extend(image_conditions)
+        if initial_latent_condition: all_conditions.append(initial_latent_condition)
+        first_pass_config = self.config.get("first_pass", {}).copy()
+        if ltx_configs_override:
+            print("[DEBUG] Sobrepondo configurações do LTX com valores da UI...")
+            preset = ltx_configs_override.get("guidance_preset")
+            if preset == "Customizado":
+                try:
+                    first_pass_config["guidance_scale"] = json.loads(ltx_configs_override["guidance_scale_list"])
+                    first_pass_config["stg_scale"] = json.loads(ltx_configs_override["stg_scale_list"])
+                    #first_pass_config["guidance_timesteps"] = json.loads(ltx_configs_override["timesteps_list"])
+                except Exception as e:
+                    print(f"  > ERRO ao parsear valores customizados: {e}. Usando Padrão como fallback.")
+            elif preset == "Agressivo":
+                first_pass_config["guidance_scale"] = [1, 2, 8, 12, 8, 2, 1]
+                first_pass_config["stg_scale"] = [0, 0, 5, 6, 5, 3, 2]
+            elif preset == "Suave":
+                first_pass_config["guidance_scale"] = [1, 1, 4, 5, 4, 1, 1]
+                first_pass_config["stg_scale"] = [0, 0, 2, 2, 2, 1, 0]
+        first_pass_kwargs = {
+            "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
+            "num_frames": num_frames, "frame_rate": 24, "generator": generator, "output_type": "latent",
+            "conditioning_items": all_conditions if all_conditions else None,
+            **first_pass_config
+        }
+        # --- NÓ 1.3: CHAMADA AO PIPELINE ---
+        try:
+            with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
+                latents_bruto = self.pipeline(**first_pass_kwargs).images
+                latents_cpu_bruto = latents_bruto.detach().to("cpu")
+                tensor_path_cpu = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
+                torch.save(latents_cpu_bruto, tensor_path_cpu)
+                log_tensor_info(latents_bruto, f"Latente Bruto Gerado para: '{prompt[:40]}...'")
+            print("-" * 20 + " FIM: _generate_single_chunk_low " + "-"*20)
+            return tensor_path_cpu
+        except Exception as e:
+            print("-" * 20 + f" ERRO: _generate_single_chunk_low {e} " + "-"*20)
+        finally:
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+            self.finalize(keep_paths=[])
+    # ==============================================================================
+    # --- FUNÇÃO #2: ORQUESTRADOR NARRATIVO (MÚLTIPLOS PROMPTS) ---
+    # ==============================================================================
+    def generate_narrative_low(
+        self, prompt: str, negative_prompt,
+        height, width, duration, guidance_scale,
+        seed, initial_image_conditions=None, overlap_frames: int = 8,
+        ltx_configs_override: dict = None):
+        """
+        [ORQUESTRADOR NARRATIVO]
+        Gera um vídeo em múltiplos chunks sequenciais a partir de um prompt com várias linhas.
+        """
+        print("\n" + "="*80)
+        print("======           INICIANDO GERAÇÃO NARRATIVA EM CHUNKS (LOW-RES)           ======")
+        print("="*80)
+        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
+        seed_everething(used_seed)
+        FPS = 24.0
+        prompt_list = [p.strip() for p in prompt.splitlines() if p.strip()]
+        num_chunks = len(prompt_list)
+        if num_chunks == 0: raise ValueError("O prompt está vazio ou não contém linhas válidas.")
+        total_actual_frames = max(9, int(round((round(duration * FPS) - 1) / 8.0) * 8 + 1))
+        if num_chunks > 1:
+            total_blocks = (total_actual_frames - 1) // 8
+            blocks_per_chunk = total_blocks // num_chunks
+            blocks_last_chunk = total_blocks - (blocks_per_chunk * (num_chunks - 1))
+            frames_per_chunk = blocks_per_chunk * 8 + 1
+            frames_per_chunk_last = blocks_last_chunk * 8 + 1
+        else:
+            frames_per_chunk = total_actual_frames
+            frames_per_chunk_last = total_actual_frames
+        frames_per_chunk = max(9, frames_per_chunk)
+        frames_per_chunk_last = max(9, frames_per_chunk_last)
+        poda_latents_num = overlap_frames // self.pipeline.video_scale_factor if self.pipeline.video_scale_factor > 0 else 0
+        latentes_chunk_video = []
+        condition_item_latent_overlap = None
+        temp_dir = tempfile.mkdtemp(prefix="ltxv_narrative_"); self._register_tmp_dir(temp_dir)
+        results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
+        for i, chunk_prompt in enumerate(prompt_list):
+            print(f"\n--- Gerando Chunk Narrativo {i+1}/{num_chunks}: '{chunk_prompt}' ---")
+            current_image_conditions = []
+            if initial_image_conditions:
+                cond_item_original = initial_image_conditions[0]
+                if i == 0:
+                    current_image_conditions.append(cond_item_original)
+                else:
+                    cond_item_fraco = ConditioningItem(
+                        media_item=cond_item_original.media_item, media_frame_number=0, conditioning_strength=0.1
+                    )
+                    current_image_conditions.append(cond_item_fraco)
+            num_frames_para_gerar = frames_per_chunk_last if i == num_chunks - 1 else frames_per_chunk
+            if i > 0 and poda_latents_num > 0:
+                num_frames_para_gerar += overlap_frames
+            latentes_bruto = self._generate_single_chunk_low(
+                prompt=chunk_prompt, negative_prompt=negative_prompt, height=height, width=width,
+                num_frames=num_frames_para_gerar, guidance_scale=guidance_scale, seed=used_seed + i,
+                initial_latent_condition=condition_item_latent_overlap, image_conditions=current_image_conditions,
+                ltx_configs_override=ltx_configs_override
+            )
+            if i > 0 and poda_latents_num > 0:
+                 latentes_bruto = latentes_bruto[:, :, poda_latents_num:, :, :]
+            latentes_podado = latentes_bruto.clone().detach()
+            if i < num_chunks - 1 and poda_latents_num > 0:
+                latentes_podado = latentes_bruto[:, :, :-poda_latents_num, :, :].clone()
+                overlap_latents = latentes_bruto[:, :, -poda_latents_num:, :, :].clone()
+                condition_item_latent_overlap = ConditioningItem(
+                    media_item=overlap_latents, media_frame_number=0, conditioning_strength=1.0
+                )
+            latentes_chunk_video.append(latentes_podado)
+        print("\n--- Finalizando Narrativa: Concatenando chunks ---")
+        final_latents = torch.cat(latentes_chunk_video, dim=2)
+        log_tensor_info(final_latents, "Tensor de Latentes Final Concatenado")
+        try:
+            with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
+                pixel_tensor = vae_manager_singleton.decode(final_latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
+                video_path = self._save_and_log_video(pixel_tensor, "narrative_video", FPS, temp_dir, results_dir, used_seed)
+                latents_cpu = latents.detach().to("cpu")
+                tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
+                torch.save(latents_cpu, tensor_path)
+            return video_path, tensor_path, used_seed
+        except Exception as e:
+            print(f"[DEBUG] falhou: {e}")
+        finally:
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+            self.finalize(keep_paths=[])
+    # ==============================================================================
+    # --- FUNÇÃO #3: ORQUESTRADOR SIMPLES (PROMPT ÚNICO) ---
+    # ==============================================================================
+    def generate_single_low(
+        self, prompt: str, negative_prompt,
+        height, width, duration, guidance_scale,
+        seed, initial_image_conditions=None,
+        ltx_configs_override: dict = None):
+        """
+        [ORQUESTRADOR SIMPLES]
+        Gera um vídeo completo em um único chunk. Ideal para prompts simples e curtos.
+        """
+        print("\n" + "="*80)
+        print("======             INICIANDO GERAÇÃO SIMPLES EM CHUNK ÚNICO (LOW-RES)             ======")
+        print("="*80)
+        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
+        seed_everething(used_seed)
+        FPS = 24.0
+        total_actual_frames = max(9, int(round((round(duration * FPS) - 1) / 8.0) * 8 + 1))
+        temp_dir = tempfile.mkdtemp(prefix="ltxv_single_"); self._register_tmp_dir(temp_dir)
+        results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
+        # Chama a função de geração de chunk único para fazer todo o trabalho
+        final_latents = self._generate_single_chunk_low(
+            prompt=prompt, negative_prompt=negative_prompt, height=height, width=width,
+            num_frames=total_actual_frames, guidance_scale=guidance_scale, seed=used_seed,
+            image_conditions=initial_image_conditions,
+            ltx_configs_override=ltx_configs_override
+        )
+        print("\n--- Finalizando Geração Simples: Salvando e decodificando ---")
+        log_tensor_info(final_latents, "Tensor de Latentes Final")
+        try:
+            with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
+                pixel_tensor = vae_manager_singleton.decode(final_latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
+                video_path = self._save_and_log_video(pixel_tensor, "single_video", FPS, temp_dir, results_dir, used_seed)
+                latents_cpu = latents.detach().to("cpu")
+                tensor_path = os.path.join(results_dir, f"latents_single_{used_seed}.pt")
+                torch.save(latents_cpu, tensor_path)
+            return video_path, tensor_path, used_seed
+        except Exception as e:
+            print(f"[DEBUG] falhou: {e}")
+        finally:
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+            self.finalize(keep_paths=[])
     # ==============================================================================
     # --- FUNÇÃO #4: ORQUESTRADOR  (Upscaler + texturas hd) ---
     # ==============================================================================
     def generate_upscale_denoise(
+        self, latents_path, prompt, negative_prompt,
+        guidance_scale, seed,
     ):
             used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
             seed_everething(used_seed)
         # 4. Configurar o resto dos componentes com o dispositivo correto
         self._apply_precision_policy()
         vae_manager_singleton.attach_pipeline(
             self.pipeline,
             device=self.device, # Agora `self.device` está correto
         self._tmp_dirs = set()
         print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
+    # A função move_to_device que criamos antes é essencial aqui
     def move_to_device(self, device):
         """Move os modelos do pipeline para o dispositivo especificado."""
         print(f"[LTX] Movendo modelos para {device}...")
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+# Instanciação limpa, sem usar `self` fora da classe.
 print("Criando instância do VideoService...")
 video_generation_service = VideoService()
 print("Instância do VideoService pronta.")