import gradio as gr import torch from diffusers import DiffusionPipeline import numpy as np import spaces import time from PIL import Image import io import base64 # Model configuration MODEL_ID = "hpcai-tech/Open-Sora-v2" # Initialize the pipeline @spaces.GPU(duration=1500) def load_model(): """Load the Open-Sora-v2 model""" try: pipe = DiffusionPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.float16, variant="fp16", use_safetensors=True ) pipe.to("cuda") # Enable memory efficient attention pipe.enable_attention_slicing() return pipe except Exception as e: print(f"Error loading model: {e}") return None # Global model variable model = None def initialize_model(): """Initialize the model on first request""" global model if model is None: model = load_model() return model is not None @spaces.GPU(duration=120) def generate_video( prompt: str, duration: int = 4, height: int = 720, width: int = 1280, num_inference_steps: int = 50, guidance_scale: float = 7.5, progress=gr.Progress() ) -> str: """ Generate a video from text prompt using Open-Sora-v2 Args: prompt: Text description of the video duration: Duration in seconds height: Video height width: Video width num_inference_steps: Number of denoising steps guidance_scale: Guidance scale for generation Returns: Path to the generated video file """ try: # Initialize model if not already done if not initialize_model(): raise Exception("Failed to initialize model") progress(0.1, desc="Initializing generation...") # Calculate number of frames based on duration (assuming 30 fps) num_frames = duration * 30 progress(0.2, desc="Starting video generation...") # Generate video frames result = model( prompt=prompt, num_frames=num_frames, height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=torch.Generator().manual_seed(42) ) progress(0.8, desc="Processing frames...") # Save the generated video output_path = f"generated_video_{int(time.time())}.mp4" if hasattr(result, 'videos'): # Handle video output video_frames = result.videos[0] else: # Handle image sequence output video_frames = result.frames[0] if hasattr(result, 'frames') else result # Save as video file save_video(video_frames, output_path, fps=30) progress(1.0, desc="Video generation complete!") return output_path except Exception as e: print(f"Error generating video: {e}") raise gr.Error(f"Video generation failed: {str(e)}") def save_video(frames, output_path, fps=30): """Save video frames to MP4 file""" try: import cv2 # Convert frames to numpy if needed if torch.is_tensor(frames): frames = frames.cpu().numpy() # Ensure frames are in the correct format if len(frames.shape) == 4: frames = np.transpose(frames, (0, 2, 3, 1)) # TCHW -> THWC # Normalize frames to 0-255 frames = ((frames + 1.0) * 127.5).astype(np.uint8) # Get video dimensions height, width = frames[0].shape[:2] # Initialize video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) # Write frames for frame in frames: if len(frame.shape) == 3: frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) out.write(frame) out.release() except ImportError: # Fallback: save as GIF if cv2 is not available from PIL import Image if torch.is_tensor(frames): frames = frames.cpu().numpy() if len(frames.shape) == 4: frames = np.transpose(frames, (0, 2, 3, 1)) frames = ((frames + 1.0) * 127.5).astype(np.uint8) images = [Image.fromarray(frame) for frame in frames] images[0].save( output_path.replace('.mp4', '.gif'), save_all=True, append_images=images[1:], duration=33, # ~30 fps loop=0 ) def create_interface(): """Create the Gradio interface""" with gr.Blocks( title="Text to Video - Open-Sora-v2", theme=gr.themes.Soft(), css=""" .header-text { text-align: center; font-size: 2em; margin-bottom: 0.5em; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; } .subheader-text { text-align: center; color: #666; margin-bottom: 2em; } .generate-btn { background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); border: none; color: white; font-weight: bold; } .generate-btn:hover { background: linear-gradient(45deg, #764ba2 0%, #667eea 100%); } """ ) as demo: gr.Markdown("""
🎬 Text to Video Generator
Powered by Open-Sora-v2 - Transform your ideas into stunning videos
Built with anycoder
""") with gr.Row(): with gr.Column(scale=2): prompt_input = gr.Textbox( label="📝 Describe your video", placeholder="A beautiful sunset over the ocean with waves gently crashing on the shore, cinematic quality, 4K resolution...", lines=4, max_lines=6 ) with gr.Row(): duration_input = gr.Slider( minimum=2, maximum=16, value=4, step=2, label="⏱️ Duration (seconds)" ) quality_input = gr.Dropdown( choices=[ ("720p HD", 720), ("1080p Full HD", 1080), ("4K Ultra HD", 2160) ], value=720, label="🎥 Quality" ) with gr.Accordion("⚙️ Advanced Settings", open=False): with gr.Row(): steps_input = gr.Slider( minimum=20, maximum=100, value=50, step=5, label="🔢 Inference Steps" ) guidance_input = gr.Slider( minimum=1.0, maximum=20.0, value=7.5, step=0.5, label="🎯 Guidance Scale" ) generate_btn = gr.Button( "🚀 Generate Video", variant="primary", size="lg", elem_classes=["generate-btn"] ) with gr.Column(scale=1): gr.Markdown(""" ### 💡 Example Prompts - 🌅 "A serene mountain landscape at sunrise with golden light filtering through misty valleys" - 🏙️ "A futuristic cyberpunk city at night with neon signs reflecting on wet streets" - 🌊 "Underwater coral reef with colorful tropical fish swimming in crystal clear water" - 🌳 "A magical enchanted forest with glowing mushrooms and fireflies at twilight" ### ⚡ Tips for Best Results - Be descriptive and specific - Include visual style (cinematic, realistic, anime, etc.) - Mention lighting and atmosphere - Specify camera angles if desired """) with gr.Row(): video_output = gr.Video( label="🎬 Generated Video", visible=False ) loading_info = gr.Markdown( "✨ Your video will appear here after generation", visible=True ) # Example prompts example_prompts = [ [ "A beautiful sunset over the ocean with waves gently crashing on the shore, cinematic quality, warm golden lighting", 4, 720, 50, 7.5 ], [ "A serene mountain landscape at sunrise with mist rolling over the valleys, golden light filtering through the clouds", 4, 720, 50, 7.5 ], [ "A bustling city street at night with neon signs reflecting on wet pavement, cyberpunk aesthetic, blade runner style", 4, 720, 50, 7.5 ], [ "Underwater coral reef with colorful fish swimming, sun rays penetrating through the water, national geographic documentary style", 4, 720, 50, 7.5 ] ] gr.Examples( examples=example_prompts, inputs=[prompt_input, duration_input, quality_input, steps_input, guidance_input], label="🎯 Try these examples", cache_examples=False ) def generate_and_display(prompt, duration, quality, steps, guidance, progress=gr.Progress()): try: # Calculate width based on quality (16:9 aspect ratio) width_map = {720: 1280, 1080: 1920, 2160: 3840} width = width_map.get(quality, 1280) # Generate video video_path = generate_video( prompt=prompt, duration=duration, height=quality, width=width, num_inference_steps=steps, guidance_scale=guidance, progress=progress ) return { video_output: gr.Video(value=video_path, visible=True), loading_info: gr.Markdown(visible=False) } except Exception as e: return { video_output: gr.Video(visible=False), loading_info: gr.Markdown(f"❌ Error: {str(e)}", visible=True) } generate_btn.click( fn=generate_and_display, inputs=[prompt_input, duration_input, quality_input, steps_input, guidance_input], outputs=[video_output, loading_info], show_progress=True ) # Initialize model on page load demo.load( fn=initialize_model, inputs=[], outputs=[], queue=False ) return demo if __name__ == "__main__": demo = create_interface() demo.launch( share=True, show_error=True, show_tips=True, queue=True )