Spaces:
Paused
Paused
Update api/ltx_server_refactored.py
Browse files- api/ltx_server_refactored.py +57 -60
api/ltx_server_refactored.py
CHANGED
|
@@ -219,11 +219,14 @@ class VideoService:
|
|
| 219 |
|
| 220 |
@torch.no_grad()
|
| 221 |
def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
| 227 |
finally:
|
| 228 |
torch.cuda.empty_cache()
|
| 229 |
torch.cuda.ipc_collect()
|
|
@@ -234,20 +237,7 @@ class VideoService:
|
|
| 234 |
tensor = torch.nn.functional.pad(tensor, padding_values)
|
| 235 |
return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
|
| 236 |
|
| 237 |
-
|
| 238 |
-
if len(mp4_list) == 1:
|
| 239 |
-
shutil.move(mp4_list[0], out_path)
|
| 240 |
-
return
|
| 241 |
-
with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
|
| 242 |
-
for mp4 in mp4_list:
|
| 243 |
-
f.write(f"file '{os.path.abspath(mp4)}'\n")
|
| 244 |
-
list_path = f.name
|
| 245 |
-
cmd = f"ffmpeg -y -f concat -safe 0 -i {list_path} -c copy {out_path}"
|
| 246 |
-
try:
|
| 247 |
-
subprocess.check_call(shlex.split(cmd))
|
| 248 |
-
finally:
|
| 249 |
-
os.remove(list_path)
|
| 250 |
-
|
| 251 |
def _save_and_log_video(self, pixel_tensor, base_filename, fps, temp_dir, results_dir, used_seed, progress_callback=None):
|
| 252 |
output_path = os.path.join(temp_dir, f"{base_filename}_{used_seed}.mp4")
|
| 253 |
video_encode_tool_singleton.save_video_from_tensor(
|
|
@@ -295,71 +285,78 @@ class VideoService:
|
|
| 295 |
"output_type": "latent", "conditioning_items": conditioning_items, "guidance_scale": float(guidance_scale),
|
| 296 |
**(self.config.get("first_pass", {}))
|
| 297 |
}
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
finally:
|
| 307 |
torch.cuda.empty_cache()
|
| 308 |
torch.cuda.ipc_collect()
|
| 309 |
self.finalize(keep_paths=[])
|
| 310 |
-
|
| 311 |
def generate_upscale_denoise(self, latents_path, prompt, negative_prompt, guidance_scale, seed):
|
| 312 |
used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
|
| 313 |
seed_everething(used_seed)
|
| 314 |
temp_dir = tempfile.mkdtemp(prefix="ltxv_up_"); self._register_tmp_dir(temp_dir)
|
| 315 |
results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
|
| 316 |
latents_low = torch.load(latents_path).to(self.device)
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
|
|
|
| 321 |
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
|
| 348 |
-
|
| 349 |
-
|
| 350 |
|
| 351 |
latents_cpu = final_latents.detach().to("cpu")
|
| 352 |
tensor_path = os.path.join(results_dir, f"latents_refined_{used_seed}.pt")
|
| 353 |
torch.save(latents_cpu, tensor_path)
|
| 354 |
pixel_tensor = vae_manager_singleton.decode(final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
|
| 355 |
video_path = self._save_and_log_video(pixel_tensor, "refined_video", 24.0, temp_dir, results_dir, used_seed)
|
| 356 |
-
|
|
|
|
|
|
|
| 357 |
finally:
|
| 358 |
torch.cuda.empty_cache()
|
| 359 |
torch.cuda.ipc_collect()
|
| 360 |
self.finalize(keep_paths=[])
|
| 361 |
|
| 362 |
-
|
| 363 |
latents = torch.load(latents_path)
|
| 364 |
seed = random.randint(0, 99999)
|
| 365 |
temp_dir = tempfile.mkdtemp(prefix="ltxv_enc_"); self._register_tmp_dir(temp_dir)
|
|
|
|
| 219 |
|
| 220 |
@torch.no_grad()
|
| 221 |
def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
|
| 222 |
+
try:
|
| 223 |
+
if not self.latent_upsampler:
|
| 224 |
+
raise ValueError("Latent Upsampler não está carregado.")
|
| 225 |
+
latents_unnormalized = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
|
| 226 |
+
upsampled_latents = self.latent_upsampler(latents_unnormalized)
|
| 227 |
+
return normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
|
| 228 |
+
except Exception as e:
|
| 229 |
+
pass
|
| 230 |
finally:
|
| 231 |
torch.cuda.empty_cache()
|
| 232 |
torch.cuda.ipc_collect()
|
|
|
|
| 237 |
tensor = torch.nn.functional.pad(tensor, padding_values)
|
| 238 |
return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
|
| 239 |
|
| 240 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
def _save_and_log_video(self, pixel_tensor, base_filename, fps, temp_dir, results_dir, used_seed, progress_callback=None):
|
| 242 |
output_path = os.path.join(temp_dir, f"{base_filename}_{used_seed}.mp4")
|
| 243 |
video_encode_tool_singleton.save_video_from_tensor(
|
|
|
|
| 285 |
"output_type": "latent", "conditioning_items": conditioning_items, "guidance_scale": float(guidance_scale),
|
| 286 |
**(self.config.get("first_pass", {}))
|
| 287 |
}
|
| 288 |
+
try:
|
| 289 |
+
with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
|
| 290 |
+
latents = self.pipeline(**first_pass_kwargs).images
|
| 291 |
+
pixel_tensor = vae_manager_singleton.decode(latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
|
| 292 |
+
video_path = self._save_and_log_video(pixel_tensor, "low_res_video", FPS, temp_dir, results_dir, used_seed)
|
| 293 |
+
latents_cpu = latents.detach().to("cpu")
|
| 294 |
+
tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
|
| 295 |
+
torch.save(latents_cpu, tensor_path)
|
| 296 |
+
return video_path, tensor_path, used_seed
|
| 297 |
+
|
| 298 |
+
except Exception as e:
|
| 299 |
+
pass
|
| 300 |
finally:
|
| 301 |
torch.cuda.empty_cache()
|
| 302 |
torch.cuda.ipc_collect()
|
| 303 |
self.finalize(keep_paths=[])
|
| 304 |
+
|
| 305 |
def generate_upscale_denoise(self, latents_path, prompt, negative_prompt, guidance_scale, seed):
|
| 306 |
used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
|
| 307 |
seed_everething(used_seed)
|
| 308 |
temp_dir = tempfile.mkdtemp(prefix="ltxv_up_"); self._register_tmp_dir(temp_dir)
|
| 309 |
results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
|
| 310 |
latents_low = torch.load(latents_path).to(self.device)
|
| 311 |
+
try:
|
| 312 |
+
with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
|
| 313 |
+
upsampled_latents = self._upsample_latents_internal(latents_low)
|
| 314 |
+
upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents_low)
|
| 315 |
+
del latents_low; torch.cuda.empty_cache()
|
| 316 |
|
| 317 |
+
# --- LÓGICA DE DIVISÃO SIMPLES COM OVERLAP ---
|
| 318 |
+
total_frames = upsampled_latents.shape[2]
|
| 319 |
+
# Garante que mid_point seja pelo menos 1 para evitar um segundo chunk vazio se houver poucos frames
|
| 320 |
+
mid_point = max(1, total_frames // 2)
|
| 321 |
+
chunk1 = upsampled_latents[:, :, :mid_point, :, :]
|
| 322 |
+
# O segundo chunk começa um frame antes para criar o overlap
|
| 323 |
+
chunk2 = upsampled_latents[:, :, mid_point - 1:, :, :]
|
| 324 |
|
| 325 |
+
final_latents_list = []
|
| 326 |
+
for i, chunk in enumerate([chunk1, chunk2]):
|
| 327 |
+
if chunk.shape[2] <= 1: continue # Pula chunks inválidos ou vazios
|
| 328 |
+
second_pass_height = chunk.shape[3] * self.pipeline.vae_scale_factor
|
| 329 |
+
second_pass_width = chunk.shape[4] * self.pipeline.vae_scale_factor
|
| 330 |
+
second_pass_kwargs = {
|
| 331 |
+
"prompt": prompt, "negative_prompt": negative_prompt, "height": second_pass_height, "width": second_pass_width,
|
| 332 |
+
"num_frames": chunk.shape[2], "latents": chunk, "guidance_scale": float(guidance_scale),
|
| 333 |
+
"output_type": "latent", "generator": torch.Generator(device=self.device).manual_seed(used_seed),
|
| 334 |
+
**(self.config.get("second_pass", {}))
|
| 335 |
+
}
|
| 336 |
+
refined_chunk = self.pipeline(**second_pass_kwargs).images
|
| 337 |
+
# Remove o overlap do primeiro chunk refinado antes de juntar
|
| 338 |
+
if i == 0:
|
| 339 |
+
final_latents_list.append(refined_chunk[:, :, :-1, :, :])
|
| 340 |
+
else:
|
| 341 |
+
final_latents_list.append(refined_chunk)
|
| 342 |
|
| 343 |
+
final_latents = torch.cat(final_latents_list, dim=2)
|
| 344 |
+
log_tensor_info(final_latents, "Latentes Upscaled/Refinados Finais")
|
| 345 |
|
| 346 |
latents_cpu = final_latents.detach().to("cpu")
|
| 347 |
tensor_path = os.path.join(results_dir, f"latents_refined_{used_seed}.pt")
|
| 348 |
torch.save(latents_cpu, tensor_path)
|
| 349 |
pixel_tensor = vae_manager_singleton.decode(final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
|
| 350 |
video_path = self._save_and_log_video(pixel_tensor, "refined_video", 24.0, temp_dir, results_dir, used_seed)
|
| 351 |
+
return video_path, tensor_path
|
| 352 |
+
except Exception as e:
|
| 353 |
+
pass
|
| 354 |
finally:
|
| 355 |
torch.cuda.empty_cache()
|
| 356 |
torch.cuda.ipc_collect()
|
| 357 |
self.finalize(keep_paths=[])
|
| 358 |
|
| 359 |
+
def encode_mp4(self, latents_path: str, fps: int = 24):
|
| 360 |
latents = torch.load(latents_path)
|
| 361 |
seed = random.randint(0, 99999)
|
| 362 |
temp_dir = tempfile.mkdtemp(prefix="ltxv_enc_"); self._register_tmp_dir(temp_dir)
|