self-forcing

Runtime error

App Files Files Community

innoai commited on Jun 19

Commit

fb854a9

verified ·

1 Parent(s): c9dd185

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -10

app.py CHANGED Viewed

@@ -171,25 +171,60 @@ ASPECT_RATIOS = {
 def get_vae_cache_for_aspect_ratio(aspect_ratio, device, dtype):
     """
-    Create VAE cache with appropriate dimensions for the given aspect ratio.
-    VAE cache needs to have 5 dimensions: (batch, channels, time, height, width)
     """
     ar_config = ASPECT_RATIOS[aspect_ratio]
-    latent_h = ar_config["latent_h"]
     latent_w = ar_config["latent_w"]
-    # Create new cache tensors with correct dimensions
-    # These need to be 5D tensors: (batch, channels, time, height, width)
     cache = []
-    # The time dimension is 1 for cache initialization
-    cache.append(torch.zeros(1, 512, 1, latent_h // 8, latent_w // 8, device=device, dtype=dtype))  # 8x downsampled
-    cache.append(torch.zeros(1, 512, 1, latent_h // 4, latent_w // 4, device=device, dtype=dtype))  # 4x downsampled
-    cache.append(torch.zeros(1, 256, 1, latent_h // 2, latent_w // 2, device=device, dtype=dtype))  # 2x downsampled
-    cache.append(torch.zeros(1, 128, 1, latent_h, latent_w, device=device, dtype=dtype))            # 1x (same as latent)
     return cache
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
     Convert frames directly to .ts file using PyAV.

 def get_vae_cache_for_aspect_ratio(aspect_ratio, device, dtype):
     """
+    根据不同的长宽比，生成符合 VAE 解码器缓存格式的零张量缓存。
+    缓存张量格式必须与 ZERO_VAE_CACHE 保持一致： [batch, time, channels, height, width]
     """
     ar_config = ASPECT_RATIOS[aspect_ratio]
     latent_w = ar_config["latent_w"]
+    latent_h = ar_config["latent_h"]
+    # 这里 time 维度初始化为 1，channels 对应各级别的通道数
     cache = []
+    # 第一级特征，channels=512，下采样 8 倍
+    cache.append(torch.zeros(
+        1,                # batch size
+        1,                # time frames
+        512,              # channels
+        latent_h // 8,    # height
+        latent_w // 8,    # width
+        device=device,
+        dtype=dtype
+    ))
+    # 第二级特征，channels=512，下采样 4 倍
+    cache.append(torch.zeros(
+        1,
+        1,
+        512,
+        latent_h // 4,
+        latent_w // 4,
+        device=device,
+        dtype=dtype
+    ))
+    # 第三级特征，channels=256，下采样 2 倍
+    cache.append(torch.zeros(
+        1,
+        1,
+        256,
+        latent_h // 2,
+        latent_w // 2,
+        device=device,
+        dtype=dtype
+    ))
+    # 第四级特征，channels=128，不下采样
+    cache.append(torch.zeros(
+        1,
+        1,
+        128,
+        latent_h,
+        latent_w,
+        device=device,
+        dtype=dtype
+    ))
     return cache
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
     Convert frames directly to .ts file using PyAV.