Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -68,7 +68,7 @@ T2V_CINEMATIC_PROMPT = \
|
|
| 68 |
'''1. For overly concise user inputs, reasonably infer and add details to make the video more complete and appealing without altering the original intent;\n''' \
|
| 69 |
'''2. Enhance the main features in user descriptions (e.g., appearance, expression, quantity, race, posture, etc.), visual style, spatial relationships, and shot scales;\n''' \
|
| 70 |
'''3. Output the entire prompt in English, retaining original text in quotes and titles, and preserving key input information;\n''' \
|
| 71 |
-
'''4. Prompts should match the user
|
| 72 |
'''5. Emphasize motion information and different camera movements present in the input description;\n''' \
|
| 73 |
'''6. Your output should have natural motion attributes. For the target category described, add natural actions of the target using simple and direct verbs;\n''' \
|
| 74 |
'''7. The revised prompt should be around 80-100 words long.\n''' \
|
|
@@ -148,6 +148,9 @@ APP_STATE = {
|
|
| 148 |
"current_vae_decoder": None,
|
| 149 |
}
|
| 150 |
|
|
|
|
|
|
|
|
|
|
| 151 |
def frames_to_ts_file(frames, filepath, fps = 15):
|
| 152 |
"""
|
| 153 |
Convert frames directly to .ts file using PyAV.
|
|
@@ -198,6 +201,55 @@ def frames_to_ts_file(frames, filepath, fps = 15):
|
|
| 198 |
|
| 199 |
return filepath
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
def initialize_vae_decoder(use_taehv=False, use_trt=False):
|
| 202 |
if use_trt:
|
| 203 |
from demo_utils.vae import VAETRTWrapper
|
|
@@ -262,6 +314,9 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
|
|
| 262 |
Generator function that yields .ts video chunks using PyAV for streaming.
|
| 263 |
Now optimized for block-based processing.
|
| 264 |
"""
|
|
|
|
|
|
|
|
|
|
| 265 |
if seed == -1:
|
| 266 |
seed = random.randint(0, 2**32 - 1)
|
| 267 |
|
|
@@ -286,6 +341,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
|
|
| 286 |
all_num_frames = [pipeline.num_frame_per_block] * num_blocks
|
| 287 |
|
| 288 |
total_frames_yielded = 0
|
|
|
|
| 289 |
|
| 290 |
# Ensure temp directory exists
|
| 291 |
os.makedirs("gradio_tmp", exist_ok=True)
|
|
@@ -352,6 +408,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
|
|
| 352 |
frame_np = np.transpose(frame_np, (1, 2, 0)) # CHW -> HWC
|
| 353 |
|
| 354 |
all_frames_from_block.append(frame_np)
|
|
|
|
| 355 |
total_frames_yielded += 1
|
| 356 |
|
| 357 |
# Yield status update for each frame (cute tracking!)
|
|
@@ -375,7 +432,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
|
|
| 375 |
)
|
| 376 |
|
| 377 |
# Yield None for video but update status (frame-by-frame tracking)
|
| 378 |
-
yield None, frame_status_html
|
| 379 |
|
| 380 |
# Encode entire block as one chunk immediately
|
| 381 |
if all_frames_from_block:
|
|
@@ -387,12 +444,13 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
|
|
| 387 |
ts_path = os.path.join("gradio_tmp", ts_filename)
|
| 388 |
|
| 389 |
frames_to_ts_file(all_frames_from_block, ts_path, fps)
|
|
|
|
| 390 |
|
| 391 |
# Calculate final progress for this block
|
| 392 |
total_progress = (idx + 1) / num_blocks * 100
|
| 393 |
|
| 394 |
# Yield the actual video chunk
|
| 395 |
-
yield ts_path, gr.update()
|
| 396 |
|
| 397 |
except Exception as e:
|
| 398 |
print(f"⚠️ Error encoding block {idx}: {e}")
|
|
@@ -401,6 +459,21 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
|
|
| 401 |
|
| 402 |
current_start_frame += current_num_frames
|
| 403 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
# Final completion status
|
| 405 |
final_status_html = (
|
| 406 |
f"<div style='padding: 16px; border: 1px solid #198754; background: linear-gradient(135deg, #d1e7dd, #f8f9fa); border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>"
|
|
@@ -418,7 +491,11 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
|
|
| 418 |
f" </div>"
|
| 419 |
f"</div>"
|
| 420 |
)
|
| 421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
print(f"✅ PyAV streaming complete! {total_frames_yielded} frames across {num_blocks} blocks")
|
| 423 |
|
| 424 |
# --- Gradio UI Layout ---
|
|
@@ -479,6 +556,14 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
|
|
| 479 |
show_label=False
|
| 480 |
)
|
| 481 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
status_display = gr.HTML(
|
| 483 |
value=(
|
| 484 |
"<div style='text-align: center; padding: 20px; color: #666; border: 1px dashed #ddd; border-radius: 8px;'>"
|
|
@@ -489,11 +574,21 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
|
|
| 489 |
label="Generation Status"
|
| 490 |
)
|
| 491 |
|
|
|
|
|
|
|
|
|
|
| 492 |
# Connect the generator to the streaming video
|
| 493 |
start_btn.click(
|
| 494 |
fn=video_generation_handler_streaming,
|
| 495 |
inputs=[prompt, seed, fps],
|
| 496 |
-
outputs=[streaming_video, status_display]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
)
|
| 498 |
|
| 499 |
enhance_button.click(
|
|
|
|
| 68 |
'''1. For overly concise user inputs, reasonably infer and add details to make the video more complete and appealing without altering the original intent;\n''' \
|
| 69 |
'''2. Enhance the main features in user descriptions (e.g., appearance, expression, quantity, race, posture, etc.), visual style, spatial relationships, and shot scales;\n''' \
|
| 70 |
'''3. Output the entire prompt in English, retaining original text in quotes and titles, and preserving key input information;\n''' \
|
| 71 |
+
'''4. Prompts should match the user's intent and accurately reflect the specified style. If the user does not specify a style, choose the most appropriate style for the video;\n''' \
|
| 72 |
'''5. Emphasize motion information and different camera movements present in the input description;\n''' \
|
| 73 |
'''6. Your output should have natural motion attributes. For the target category described, add natural actions of the target using simple and direct verbs;\n''' \
|
| 74 |
'''7. The revised prompt should be around 80-100 words long.\n''' \
|
|
|
|
| 148 |
"current_vae_decoder": None,
|
| 149 |
}
|
| 150 |
|
| 151 |
+
# Global variable to store generated video chunks
|
| 152 |
+
generated_video_chunks = []
|
| 153 |
+
|
| 154 |
def frames_to_ts_file(frames, filepath, fps = 15):
|
| 155 |
"""
|
| 156 |
Convert frames directly to .ts file using PyAV.
|
|
|
|
| 201 |
|
| 202 |
return filepath
|
| 203 |
|
| 204 |
+
def frames_to_mp4_file(frames, filepath, fps=15):
|
| 205 |
+
"""
|
| 206 |
+
Convert frames to MP4 file for download.
|
| 207 |
+
|
| 208 |
+
Args:
|
| 209 |
+
frames: List of numpy arrays (HWC, RGB, uint8)
|
| 210 |
+
filepath: Output file path
|
| 211 |
+
fps: Frames per second
|
| 212 |
+
|
| 213 |
+
Returns:
|
| 214 |
+
The filepath of the created file
|
| 215 |
+
"""
|
| 216 |
+
if not frames:
|
| 217 |
+
return filepath
|
| 218 |
+
|
| 219 |
+
height, width = frames[0].shape[:2]
|
| 220 |
+
|
| 221 |
+
# Create container for MP4 format
|
| 222 |
+
container = av.open(filepath, mode='w', format='mp4')
|
| 223 |
+
|
| 224 |
+
# Add video stream
|
| 225 |
+
stream = container.add_stream('h264', rate=fps)
|
| 226 |
+
stream.width = width
|
| 227 |
+
stream.height = height
|
| 228 |
+
stream.pix_fmt = 'yuv420p'
|
| 229 |
+
|
| 230 |
+
# Optimize for quality
|
| 231 |
+
stream.options = {
|
| 232 |
+
'preset': 'medium',
|
| 233 |
+
'crf': '23',
|
| 234 |
+
'profile': 'high',
|
| 235 |
+
'level': '4.0'
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
try:
|
| 239 |
+
for frame_np in frames:
|
| 240 |
+
frame = av.VideoFrame.from_ndarray(frame_np, format='rgb24')
|
| 241 |
+
frame = frame.reformat(format=stream.pix_fmt)
|
| 242 |
+
for packet in stream.encode(frame):
|
| 243 |
+
container.mux(packet)
|
| 244 |
+
|
| 245 |
+
for packet in stream.encode():
|
| 246 |
+
container.mux(packet)
|
| 247 |
+
|
| 248 |
+
finally:
|
| 249 |
+
container.close()
|
| 250 |
+
|
| 251 |
+
return filepath
|
| 252 |
+
|
| 253 |
def initialize_vae_decoder(use_taehv=False, use_trt=False):
|
| 254 |
if use_trt:
|
| 255 |
from demo_utils.vae import VAETRTWrapper
|
|
|
|
| 314 |
Generator function that yields .ts video chunks using PyAV for streaming.
|
| 315 |
Now optimized for block-based processing.
|
| 316 |
"""
|
| 317 |
+
global generated_video_chunks
|
| 318 |
+
generated_video_chunks = [] # Reset chunks for new generation
|
| 319 |
+
|
| 320 |
if seed == -1:
|
| 321 |
seed = random.randint(0, 2**32 - 1)
|
| 322 |
|
|
|
|
| 341 |
all_num_frames = [pipeline.num_frame_per_block] * num_blocks
|
| 342 |
|
| 343 |
total_frames_yielded = 0
|
| 344 |
+
all_frames_for_download = [] # Store all frames for final download
|
| 345 |
|
| 346 |
# Ensure temp directory exists
|
| 347 |
os.makedirs("gradio_tmp", exist_ok=True)
|
|
|
|
| 408 |
frame_np = np.transpose(frame_np, (1, 2, 0)) # CHW -> HWC
|
| 409 |
|
| 410 |
all_frames_from_block.append(frame_np)
|
| 411 |
+
all_frames_for_download.append(frame_np) # Store for download
|
| 412 |
total_frames_yielded += 1
|
| 413 |
|
| 414 |
# Yield status update for each frame (cute tracking!)
|
|
|
|
| 432 |
)
|
| 433 |
|
| 434 |
# Yield None for video but update status (frame-by-frame tracking)
|
| 435 |
+
yield None, frame_status_html, gr.update(visible=False), None
|
| 436 |
|
| 437 |
# Encode entire block as one chunk immediately
|
| 438 |
if all_frames_from_block:
|
|
|
|
| 444 |
ts_path = os.path.join("gradio_tmp", ts_filename)
|
| 445 |
|
| 446 |
frames_to_ts_file(all_frames_from_block, ts_path, fps)
|
| 447 |
+
generated_video_chunks.append(ts_path)
|
| 448 |
|
| 449 |
# Calculate final progress for this block
|
| 450 |
total_progress = (idx + 1) / num_blocks * 100
|
| 451 |
|
| 452 |
# Yield the actual video chunk
|
| 453 |
+
yield ts_path, gr.update(), gr.update(visible=False), None
|
| 454 |
|
| 455 |
except Exception as e:
|
| 456 |
print(f"⚠️ Error encoding block {idx}: {e}")
|
|
|
|
| 459 |
|
| 460 |
current_start_frame += current_num_frames
|
| 461 |
|
| 462 |
+
# Create final MP4 for download
|
| 463 |
+
final_mp4_path = None
|
| 464 |
+
if all_frames_for_download:
|
| 465 |
+
try:
|
| 466 |
+
mp4_uuid = str(uuid.uuid4())[:8]
|
| 467 |
+
mp4_filename = f"generated_video_{mp4_uuid}.mp4"
|
| 468 |
+
mp4_path = os.path.join("gradio_tmp", mp4_filename)
|
| 469 |
+
frames_to_mp4_file(all_frames_for_download, mp4_path, fps)
|
| 470 |
+
final_mp4_path = mp4_path
|
| 471 |
+
print(f"✅ Created MP4 file for download: {mp4_path}")
|
| 472 |
+
except Exception as e:
|
| 473 |
+
print(f"⚠️ Error creating MP4: {e}")
|
| 474 |
+
import traceback
|
| 475 |
+
traceback.print_exc()
|
| 476 |
+
|
| 477 |
# Final completion status
|
| 478 |
final_status_html = (
|
| 479 |
f"<div style='padding: 16px; border: 1px solid #198754; background: linear-gradient(135deg, #d1e7dd, #f8f9fa); border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>"
|
|
|
|
| 491 |
f" </div>"
|
| 492 |
f"</div>"
|
| 493 |
)
|
| 494 |
+
|
| 495 |
+
# Show download button if MP4 was created successfully
|
| 496 |
+
download_visible = final_mp4_path is not None
|
| 497 |
+
yield None, final_status_html, gr.update(visible=download_visible), final_mp4_path
|
| 498 |
+
|
| 499 |
print(f"✅ PyAV streaming complete! {total_frames_yielded} frames across {num_blocks} blocks")
|
| 500 |
|
| 501 |
# --- Gradio UI Layout ---
|
|
|
|
| 556 |
show_label=False
|
| 557 |
)
|
| 558 |
|
| 559 |
+
# Download button - initially hidden
|
| 560 |
+
with gr.Row():
|
| 561 |
+
download_button = gr.DownloadButton(
|
| 562 |
+
"📥 Download Video",
|
| 563 |
+
visible=False,
|
| 564 |
+
variant="primary"
|
| 565 |
+
)
|
| 566 |
+
|
| 567 |
status_display = gr.HTML(
|
| 568 |
value=(
|
| 569 |
"<div style='text-align: center; padding: 20px; color: #666; border: 1px dashed #ddd; border-radius: 8px;'>"
|
|
|
|
| 574 |
label="Generation Status"
|
| 575 |
)
|
| 576 |
|
| 577 |
+
# Hidden component to store the download file path
|
| 578 |
+
download_file = gr.File(visible=False)
|
| 579 |
+
|
| 580 |
# Connect the generator to the streaming video
|
| 581 |
start_btn.click(
|
| 582 |
fn=video_generation_handler_streaming,
|
| 583 |
inputs=[prompt, seed, fps],
|
| 584 |
+
outputs=[streaming_video, status_display, download_button, download_file]
|
| 585 |
+
)
|
| 586 |
+
|
| 587 |
+
# Set up download button to use the generated file
|
| 588 |
+
download_button.click(
|
| 589 |
+
lambda x: x,
|
| 590 |
+
inputs=[download_file],
|
| 591 |
+
outputs=[download_file]
|
| 592 |
)
|
| 593 |
|
| 594 |
enhance_button.click(
|