Spaces:
Running
Running
| import gradio as gr | |
| import torch | |
| from diffusers import DiffusionPipeline | |
| import numpy as np | |
| import spaces | |
| import time | |
| from PIL import Image | |
| import io | |
| import base64 | |
| # Model configuration | |
| MODEL_ID = "hpcai-tech/Open-Sora-v2" | |
| # Initialize the pipeline | |
| def load_model(): | |
| """Load the Open-Sora-v2 model""" | |
| try: | |
| pipe = DiffusionPipeline.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.float16, | |
| variant="fp16", | |
| use_safetensors=True | |
| ) | |
| pipe.to("cuda") | |
| # Enable memory efficient attention | |
| pipe.enable_attention_slicing() | |
| return pipe | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| return None | |
| # Global model variable | |
| model = None | |
| def initialize_model(): | |
| """Initialize the model on first request""" | |
| global model | |
| if model is None: | |
| model = load_model() | |
| return model is not None | |
| def generate_video( | |
| prompt: str, | |
| duration: int = 4, | |
| height: int = 720, | |
| width: int = 1280, | |
| num_inference_steps: int = 50, | |
| guidance_scale: float = 7.5, | |
| progress=gr.Progress() | |
| ) -> str: | |
| """ | |
| Generate a video from text prompt using Open-Sora-v2 | |
| Args: | |
| prompt: Text description of the video | |
| duration: Duration in seconds | |
| height: Video height | |
| width: Video width | |
| num_inference_steps: Number of denoising steps | |
| guidance_scale: Guidance scale for generation | |
| Returns: | |
| Path to the generated video file | |
| """ | |
| try: | |
| # Initialize model if not already done | |
| if not initialize_model(): | |
| raise Exception("Failed to initialize model") | |
| progress(0.1, desc="Initializing generation...") | |
| # Calculate number of frames based on duration (assuming 30 fps) | |
| num_frames = duration * 30 | |
| progress(0.2, desc="Starting video generation...") | |
| # Generate video frames | |
| result = model( | |
| prompt=prompt, | |
| num_frames=num_frames, | |
| height=height, | |
| width=width, | |
| num_inference_steps=num_inference_steps, | |
| guidance_scale=guidance_scale, | |
| generator=torch.Generator().manual_seed(42) | |
| ) | |
| progress(0.8, desc="Processing frames...") | |
| # Save the generated video | |
| output_path = f"generated_video_{int(time.time())}.mp4" | |
| if hasattr(result, 'videos'): | |
| # Handle video output | |
| video_frames = result.videos[0] | |
| else: | |
| # Handle image sequence output | |
| video_frames = result.frames[0] if hasattr(result, 'frames') else result | |
| # Save as video file | |
| save_video(video_frames, output_path, fps=30) | |
| progress(1.0, desc="Video generation complete!") | |
| return output_path | |
| except Exception as e: | |
| print(f"Error generating video: {e}") | |
| raise gr.Error(f"Video generation failed: {str(e)}") | |
| def save_video(frames, output_path, fps=30): | |
| """Save video frames to MP4 file""" | |
| try: | |
| import cv2 | |
| # Convert frames to numpy if needed | |
| if torch.is_tensor(frames): | |
| frames = frames.cpu().numpy() | |
| # Ensure frames are in the correct format | |
| if len(frames.shape) == 4: | |
| frames = np.transpose(frames, (0, 2, 3, 1)) # TCHW -> THWC | |
| # Normalize frames to 0-255 | |
| frames = ((frames + 1.0) * 127.5).astype(np.uint8) | |
| # Get video dimensions | |
| height, width = frames[0].shape[:2] | |
| # Initialize video writer | |
| fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
| out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
| # Write frames | |
| for frame in frames: | |
| if len(frame.shape) == 3: | |
| frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |
| out.write(frame) | |
| out.release() | |
| except ImportError: | |
| # Fallback: save as GIF if cv2 is not available | |
| from PIL import Image | |
| if torch.is_tensor(frames): | |
| frames = frames.cpu().numpy() | |
| if len(frames.shape) == 4: | |
| frames = np.transpose(frames, (0, 2, 3, 1)) | |
| frames = ((frames + 1.0) * 127.5).astype(np.uint8) | |
| images = [Image.fromarray(frame) for frame in frames] | |
| images[0].save( | |
| output_path.replace('.mp4', '.gif'), | |
| save_all=True, | |
| append_images=images[1:], | |
| duration=33, # ~30 fps | |
| loop=0 | |
| ) | |
| def create_interface(): | |
| """Create the Gradio interface""" | |
| with gr.Blocks( | |
| title="Text to Video - Open-Sora-v2", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .header-text { | |
| text-align: center; | |
| font-size: 2em; | |
| margin-bottom: 0.5em; | |
| background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| } | |
| .subheader-text { | |
| text-align: center; | |
| color: #666; | |
| margin-bottom: 2em; | |
| } | |
| .generate-btn { | |
| background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); | |
| border: none; | |
| color: white; | |
| font-weight: bold; | |
| } | |
| .generate-btn:hover { | |
| background: linear-gradient(45deg, #764ba2 0%, #667eea 100%); | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| <div class="header-text">π¬ Text to Video Generator</div> | |
| <div class="subheader-text">Powered by Open-Sora-v2 - Transform your ideas into stunning videos</div> | |
| <div style="text-align: center; margin-bottom: 1em;"> | |
| <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #667eea; text-decoration: none;"> | |
| Built with anycoder | |
| </a> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt_input = gr.Textbox( | |
| label="π Describe your video", | |
| placeholder="A beautiful sunset over the ocean with waves gently crashing on the shore, cinematic quality, 4K resolution...", | |
| lines=4, | |
| max_lines=6 | |
| ) | |
| with gr.Row(): | |
| duration_input = gr.Slider( | |
| minimum=2, | |
| maximum=16, | |
| value=4, | |
| step=2, | |
| label="β±οΈ Duration (seconds)" | |
| ) | |
| quality_input = gr.Dropdown( | |
| choices=[ | |
| ("720p HD", 720), | |
| ("1080p Full HD", 1080), | |
| ("4K Ultra HD", 2160) | |
| ], | |
| value=720, | |
| label="π₯ Quality" | |
| ) | |
| with gr.Accordion("βοΈ Advanced Settings", open=False): | |
| with gr.Row(): | |
| steps_input = gr.Slider( | |
| minimum=20, | |
| maximum=100, | |
| value=50, | |
| step=5, | |
| label="π’ Inference Steps" | |
| ) | |
| guidance_input = gr.Slider( | |
| minimum=1.0, | |
| maximum=20.0, | |
| value=7.5, | |
| step=0.5, | |
| label="π― Guidance Scale" | |
| ) | |
| generate_btn = gr.Button( | |
| "π Generate Video", | |
| variant="primary", | |
| size="lg", | |
| elem_classes=["generate-btn"] | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown(""" | |
| ### π‘ Example Prompts | |
| - π "A serene mountain landscape at sunrise with golden light filtering through misty valleys" | |
| - ποΈ "A futuristic cyberpunk city at night with neon signs reflecting on wet streets" | |
| - π "Underwater coral reef with colorful tropical fish swimming in crystal clear water" | |
| - π³ "A magical enchanted forest with glowing mushrooms and fireflies at twilight" | |
| ### β‘ Tips for Best Results | |
| - Be descriptive and specific | |
| - Include visual style (cinematic, realistic, anime, etc.) | |
| - Mention lighting and atmosphere | |
| - Specify camera angles if desired | |
| """) | |
| with gr.Row(): | |
| video_output = gr.Video( | |
| label="π¬ Generated Video", | |
| visible=False | |
| ) | |
| loading_info = gr.Markdown( | |
| "β¨ Your video will appear here after generation", | |
| visible=True | |
| ) | |
| # Example prompts | |
| example_prompts = [ | |
| [ | |
| "A beautiful sunset over the ocean with waves gently crashing on the shore, cinematic quality, warm golden lighting", | |
| 4, 720, 50, 7.5 | |
| ], | |
| [ | |
| "A serene mountain landscape at sunrise with mist rolling over the valleys, golden light filtering through the clouds", | |
| 4, 720, 50, 7.5 | |
| ], | |
| [ | |
| "A bustling city street at night with neon signs reflecting on wet pavement, cyberpunk aesthetic, blade runner style", | |
| 4, 720, 50, 7.5 | |
| ], | |
| [ | |
| "Underwater coral reef with colorful fish swimming, sun rays penetrating through the water, national geographic documentary style", | |
| 4, 720, 50, 7.5 | |
| ] | |
| ] | |
| gr.Examples( | |
| examples=example_prompts, | |
| inputs=[prompt_input, duration_input, quality_input, steps_input, guidance_input], | |
| label="π― Try these examples", | |
| cache_examples=False | |
| ) | |
| def generate_and_display(prompt, duration, quality, steps, guidance, progress=gr.Progress()): | |
| try: | |
| # Calculate width based on quality (16:9 aspect ratio) | |
| width_map = {720: 1280, 1080: 1920, 2160: 3840} | |
| width = width_map.get(quality, 1280) | |
| # Generate video | |
| video_path = generate_video( | |
| prompt=prompt, | |
| duration=duration, | |
| height=quality, | |
| width=width, | |
| num_inference_steps=steps, | |
| guidance_scale=guidance, | |
| progress=progress | |
| ) | |
| return { | |
| video_output: gr.Video(value=video_path, visible=True), | |
| loading_info: gr.Markdown(visible=False) | |
| } | |
| except Exception as e: | |
| return { | |
| video_output: gr.Video(visible=False), | |
| loading_info: gr.Markdown(f"β Error: {str(e)}", visible=True) | |
| } | |
| generate_btn.click( | |
| fn=generate_and_display, | |
| inputs=[prompt_input, duration_input, quality_input, steps_input, guidance_input], | |
| outputs=[video_output, loading_info], | |
| show_progress=True | |
| ) | |
| # Initialize model on page load | |
| demo.load( | |
| fn=initialize_model, | |
| inputs=[], | |
| outputs=[], | |
| queue=False | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch( | |
| share=True, | |
| show_error=True, | |
| show_tips=True, | |
| queue=True | |
| ) |