self-forcing

Runtime error

App Files Files Community

innoai commited on Jun 19

Commit

e876696

verified ·

1 Parent(s): 9dc28da

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -5

app.py CHANGED Viewed

@@ -68,7 +68,7 @@ T2V_CINEMATIC_PROMPT = \
     '''1. For overly concise user inputs, reasonably infer and add details to make the video more complete and appealing without altering the original intent;\n''' \
     '''2. Enhance the main features in user descriptions (e.g., appearance, expression, quantity, race, posture, etc.), visual style, spatial relationships, and shot scales;\n''' \
     '''3. Output the entire prompt in English, retaining original text in quotes and titles, and preserving key input information;\n''' \
-    '''4. Prompts should match the user’s intent and accurately reflect the specified style. If the user does not specify a style, choose the most appropriate style for the video;\n''' \
     '''5. Emphasize motion information and different camera movements present in the input description;\n''' \
     '''6. Your output should have natural motion attributes. For the target category described, add natural actions of the target using simple and direct verbs;\n''' \
     '''7. The revised prompt should be around 80-100 words long.\n''' \
@@ -148,6 +148,9 @@ APP_STATE = {
     "current_vae_decoder": None,
 }
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
     Convert frames directly to .ts file using PyAV.
@@ -198,6 +201,55 @@ def frames_to_ts_file(frames, filepath, fps = 15):
     return filepath
 def initialize_vae_decoder(use_taehv=False, use_trt=False):
     if use_trt:
         from demo_utils.vae import VAETRTWrapper
@@ -262,6 +314,9 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
     Generator function that yields .ts video chunks using PyAV for streaming.
     Now optimized for block-based processing.
     """
     if seed == -1:
         seed = random.randint(0, 2**32 - 1)
@@ -286,6 +341,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
     all_num_frames = [pipeline.num_frame_per_block] * num_blocks
     total_frames_yielded = 0
     # Ensure temp directory exists
     os.makedirs("gradio_tmp", exist_ok=True)
@@ -352,6 +408,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
             frame_np = np.transpose(frame_np, (1, 2, 0))  # CHW -> HWC
             all_frames_from_block.append(frame_np)
             total_frames_yielded += 1
             # Yield status update for each frame (cute tracking!)
@@ -375,7 +432,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
             )
             # Yield None for video but update status (frame-by-frame tracking)
-            yield None, frame_status_html
         # Encode entire block as one chunk immediately
         if all_frames_from_block:
@@ -387,12 +444,13 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
                 ts_path = os.path.join("gradio_tmp", ts_filename)
                 frames_to_ts_file(all_frames_from_block, ts_path, fps)
                 # Calculate final progress for this block
                 total_progress = (idx + 1) / num_blocks * 100
                 # Yield the actual video chunk
-                yield ts_path, gr.update()
             except Exception as e:
                 print(f"⚠️ Error encoding block {idx}: {e}")
@@ -401,6 +459,21 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
         current_start_frame += current_num_frames
     # Final completion status
     final_status_html = (
         f"<div style='padding: 16px; border: 1px solid #198754; background: linear-gradient(135deg, #d1e7dd, #f8f9fa); border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>"
@@ -418,7 +491,11 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
         f"  </div>"
         f"</div>"
     )
-    yield None, final_status_html
     print(f"✅ PyAV streaming complete! {total_frames_yielded} frames across {num_blocks} blocks")
 # --- Gradio UI Layout ---
@@ -479,6 +556,14 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
                 show_label=False
             )
             status_display = gr.HTML(
                 value=(
                     "<div style='text-align: center; padding: 20px; color: #666; border: 1px dashed #ddd; border-radius: 8px;'>"
@@ -489,11 +574,21 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
                 label="Generation Status"
             )
     # Connect the generator to the streaming video
     start_btn.click(
         fn=video_generation_handler_streaming,
         inputs=[prompt, seed, fps],
-        outputs=[streaming_video, status_display]
     )
     enhance_button.click(

     '''1. For overly concise user inputs, reasonably infer and add details to make the video more complete and appealing without altering the original intent;\n''' \
     '''2. Enhance the main features in user descriptions (e.g., appearance, expression, quantity, race, posture, etc.), visual style, spatial relationships, and shot scales;\n''' \
     '''3. Output the entire prompt in English, retaining original text in quotes and titles, and preserving key input information;\n''' \
+    '''4. Prompts should match the user's intent and accurately reflect the specified style. If the user does not specify a style, choose the most appropriate style for the video;\n''' \
     '''5. Emphasize motion information and different camera movements present in the input description;\n''' \
     '''6. Your output should have natural motion attributes. For the target category described, add natural actions of the target using simple and direct verbs;\n''' \
     '''7. The revised prompt should be around 80-100 words long.\n''' \
     "current_vae_decoder": None,
 }
+# Global variable to store generated video chunks
+generated_video_chunks = []
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
     Convert frames directly to .ts file using PyAV.
     return filepath
+def frames_to_mp4_file(frames, filepath, fps=15):
+    """
+    Convert frames to MP4 file for download.
+    Args:
+        frames: List of numpy arrays (HWC, RGB, uint8)
+        filepath: Output file path
+        fps: Frames per second
+    Returns:
+        The filepath of the created file
+    """
+    if not frames:
+        return filepath
+    height, width = frames[0].shape[:2]
+    # Create container for MP4 format
+    container = av.open(filepath, mode='w', format='mp4')
+    # Add video stream
+    stream = container.add_stream('h264', rate=fps)
+    stream.width = width
+    stream.height = height
+    stream.pix_fmt = 'yuv420p'
+    # Optimize for quality
+    stream.options = {
+        'preset': 'medium',
+        'crf': '23',
+        'profile': 'high',
+        'level': '4.0'
+    }
+    try:
+        for frame_np in frames:
+            frame = av.VideoFrame.from_ndarray(frame_np, format='rgb24')
+            frame = frame.reformat(format=stream.pix_fmt)
+            for packet in stream.encode(frame):
+                container.mux(packet)
+        for packet in stream.encode():
+            container.mux(packet)
+    finally:
+        container.close()
+    return filepath
 def initialize_vae_decoder(use_taehv=False, use_trt=False):
     if use_trt:
         from demo_utils.vae import VAETRTWrapper
     Generator function that yields .ts video chunks using PyAV for streaming.
     Now optimized for block-based processing.
     """
+    global generated_video_chunks
+    generated_video_chunks = []  # Reset chunks for new generation
     if seed == -1:
         seed = random.randint(0, 2**32 - 1)
     all_num_frames = [pipeline.num_frame_per_block] * num_blocks
     total_frames_yielded = 0
+    all_frames_for_download = []  # Store all frames for final download
     # Ensure temp directory exists
     os.makedirs("gradio_tmp", exist_ok=True)
             frame_np = np.transpose(frame_np, (1, 2, 0))  # CHW -> HWC
             all_frames_from_block.append(frame_np)
+            all_frames_for_download.append(frame_np)  # Store for download
             total_frames_yielded += 1
             # Yield status update for each frame (cute tracking!)
             )
             # Yield None for video but update status (frame-by-frame tracking)
+            yield None, frame_status_html, gr.update(visible=False), None
         # Encode entire block as one chunk immediately
         if all_frames_from_block:
                 ts_path = os.path.join("gradio_tmp", ts_filename)
                 frames_to_ts_file(all_frames_from_block, ts_path, fps)
+                generated_video_chunks.append(ts_path)
                 # Calculate final progress for this block
                 total_progress = (idx + 1) / num_blocks * 100
                 # Yield the actual video chunk
+                yield ts_path, gr.update(), gr.update(visible=False), None
             except Exception as e:
                 print(f"⚠️ Error encoding block {idx}: {e}")
         current_start_frame += current_num_frames
+    # Create final MP4 for download
+    final_mp4_path = None
+    if all_frames_for_download:
+        try:
+            mp4_uuid = str(uuid.uuid4())[:8]
+            mp4_filename = f"generated_video_{mp4_uuid}.mp4"
+            mp4_path = os.path.join("gradio_tmp", mp4_filename)
+            frames_to_mp4_file(all_frames_for_download, mp4_path, fps)
+            final_mp4_path = mp4_path
+            print(f"✅ Created MP4 file for download: {mp4_path}")
+        except Exception as e:
+            print(f"⚠️ Error creating MP4: {e}")
+            import traceback
+            traceback.print_exc()
     # Final completion status
     final_status_html = (
         f"<div style='padding: 16px; border: 1px solid #198754; background: linear-gradient(135deg, #d1e7dd, #f8f9fa); border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>"
         f"  </div>"
         f"</div>"
     )
+    # Show download button if MP4 was created successfully
+    download_visible = final_mp4_path is not None
+    yield None, final_status_html, gr.update(visible=download_visible), final_mp4_path
     print(f"✅ PyAV streaming complete! {total_frames_yielded} frames across {num_blocks} blocks")
 # --- Gradio UI Layout ---
                 show_label=False
             )
+            # Download button - initially hidden
+            with gr.Row():
+                download_button = gr.DownloadButton(
+                    "📥 Download Video",
+                    visible=False,
+                    variant="primary"
+                )
             status_display = gr.HTML(
                 value=(
                     "<div style='text-align: center; padding: 20px; color: #666; border: 1px dashed #ddd; border-radius: 8px;'>"
                 label="Generation Status"
             )
+    # Hidden component to store the download file path
+    download_file = gr.File(visible=False)
     # Connect the generator to the streaming video
     start_btn.click(
         fn=video_generation_handler_streaming,
         inputs=[prompt, seed, fps],
+        outputs=[streaming_video, status_display, download_button, download_file]
+    )
+    # Set up download button to use the generated file
+    download_button.click(
+        lambda x: x,
+        inputs=[download_file],
+        outputs=[download_file]
     )
     enhance_button.click(