AverageAiLiker commited on
Commit
3ab16a2
Β·
verified Β·
1 Parent(s): cb8aa6e

Update Gradio app with multiple files

Browse files
Files changed (2) hide show
  1. app.py +367 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from diffusers import DiffusionPipeline
4
+ import numpy as np
5
+ import spaces
6
+ import time
7
+ from PIL import Image
8
+ import io
9
+ import base64
10
+
11
+ # Model configuration
12
+ MODEL_ID = "hpcai-tech/Open-Sora-v2"
13
+
14
+ # Initialize the pipeline
15
+ @spaces.GPU(duration=1500)
16
+ def load_model():
17
+ """Load the Open-Sora-v2 model"""
18
+ try:
19
+ pipe = DiffusionPipeline.from_pretrained(
20
+ MODEL_ID,
21
+ torch_dtype=torch.float16,
22
+ variant="fp16",
23
+ use_safetensors=True
24
+ )
25
+ pipe.to("cuda")
26
+ # Enable memory efficient attention
27
+ pipe.enable_attention_slicing()
28
+ return pipe
29
+ except Exception as e:
30
+ print(f"Error loading model: {e}")
31
+ return None
32
+
33
+ # Global model variable
34
+ model = None
35
+
36
+ def initialize_model():
37
+ """Initialize the model on first request"""
38
+ global model
39
+ if model is None:
40
+ model = load_model()
41
+ return model is not None
42
+
43
+ @spaces.GPU(duration=120)
44
+ def generate_video(
45
+ prompt: str,
46
+ duration: int = 4,
47
+ height: int = 720,
48
+ width: int = 1280,
49
+ num_inference_steps: int = 50,
50
+ guidance_scale: float = 7.5,
51
+ progress=gr.Progress()
52
+ ) -> str:
53
+ """
54
+ Generate a video from text prompt using Open-Sora-v2
55
+
56
+ Args:
57
+ prompt: Text description of the video
58
+ duration: Duration in seconds
59
+ height: Video height
60
+ width: Video width
61
+ num_inference_steps: Number of denoising steps
62
+ guidance_scale: Guidance scale for generation
63
+
64
+ Returns:
65
+ Path to the generated video file
66
+ """
67
+ try:
68
+ # Initialize model if not already done
69
+ if not initialize_model():
70
+ raise Exception("Failed to initialize model")
71
+
72
+ progress(0.1, desc="Initializing generation...")
73
+
74
+ # Calculate number of frames based on duration (assuming 30 fps)
75
+ num_frames = duration * 30
76
+
77
+ progress(0.2, desc="Starting video generation...")
78
+
79
+ # Generate video frames
80
+ result = model(
81
+ prompt=prompt,
82
+ num_frames=num_frames,
83
+ height=height,
84
+ width=width,
85
+ num_inference_steps=num_inference_steps,
86
+ guidance_scale=guidance_scale,
87
+ generator=torch.Generator().manual_seed(42)
88
+ )
89
+
90
+ progress(0.8, desc="Processing frames...")
91
+
92
+ # Save the generated video
93
+ output_path = f"generated_video_{int(time.time())}.mp4"
94
+
95
+ if hasattr(result, 'videos'):
96
+ # Handle video output
97
+ video_frames = result.videos[0]
98
+ else:
99
+ # Handle image sequence output
100
+ video_frames = result.frames[0] if hasattr(result, 'frames') else result
101
+
102
+ # Save as video file
103
+ save_video(video_frames, output_path, fps=30)
104
+
105
+ progress(1.0, desc="Video generation complete!")
106
+
107
+ return output_path
108
+
109
+ except Exception as e:
110
+ print(f"Error generating video: {e}")
111
+ raise gr.Error(f"Video generation failed: {str(e)}")
112
+
113
+ def save_video(frames, output_path, fps=30):
114
+ """Save video frames to MP4 file"""
115
+ try:
116
+ import cv2
117
+
118
+ # Convert frames to numpy if needed
119
+ if torch.is_tensor(frames):
120
+ frames = frames.cpu().numpy()
121
+
122
+ # Ensure frames are in the correct format
123
+ if len(frames.shape) == 4:
124
+ frames = np.transpose(frames, (0, 2, 3, 1)) # TCHW -> THWC
125
+
126
+ # Normalize frames to 0-255
127
+ frames = ((frames + 1.0) * 127.5).astype(np.uint8)
128
+
129
+ # Get video dimensions
130
+ height, width = frames[0].shape[:2]
131
+
132
+ # Initialize video writer
133
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
134
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
135
+
136
+ # Write frames
137
+ for frame in frames:
138
+ if len(frame.shape) == 3:
139
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
140
+ out.write(frame)
141
+
142
+ out.release()
143
+
144
+ except ImportError:
145
+ # Fallback: save as GIF if cv2 is not available
146
+ from PIL import Image
147
+
148
+ if torch.is_tensor(frames):
149
+ frames = frames.cpu().numpy()
150
+
151
+ if len(frames.shape) == 4:
152
+ frames = np.transpose(frames, (0, 2, 3, 1))
153
+
154
+ frames = ((frames + 1.0) * 127.5).astype(np.uint8)
155
+
156
+ images = [Image.fromarray(frame) for frame in frames]
157
+ images[0].save(
158
+ output_path.replace('.mp4', '.gif'),
159
+ save_all=True,
160
+ append_images=images[1:],
161
+ duration=33, # ~30 fps
162
+ loop=0
163
+ )
164
+
165
+ def create_interface():
166
+ """Create the Gradio interface"""
167
+
168
+ with gr.Blocks(
169
+ title="Text to Video - Open-Sora-v2",
170
+ theme=gr.themes.Soft(),
171
+ css="""
172
+ .header-text {
173
+ text-align: center;
174
+ font-size: 2em;
175
+ margin-bottom: 0.5em;
176
+ background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
177
+ -webkit-background-clip: text;
178
+ -webkit-text-fill-color: transparent;
179
+ }
180
+ .subheader-text {
181
+ text-align: center;
182
+ color: #666;
183
+ margin-bottom: 2em;
184
+ }
185
+ .generate-btn {
186
+ background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
187
+ border: none;
188
+ color: white;
189
+ font-weight: bold;
190
+ }
191
+ .generate-btn:hover {
192
+ background: linear-gradient(45deg, #764ba2 0%, #667eea 100%);
193
+ }
194
+ """
195
+ ) as demo:
196
+
197
+ gr.Markdown("""
198
+ <div class="header-text">🎬 Text to Video Generator</div>
199
+ <div class="subheader-text">Powered by Open-Sora-v2 - Transform your ideas into stunning videos</div>
200
+ <div style="text-align: center; margin-bottom: 1em;">
201
+ <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #667eea; text-decoration: none;">
202
+ Built with anycoder
203
+ </a>
204
+ </div>
205
+ """)
206
+
207
+ with gr.Row():
208
+ with gr.Column(scale=2):
209
+ prompt_input = gr.Textbox(
210
+ label="πŸ“ Describe your video",
211
+ placeholder="A beautiful sunset over the ocean with waves gently crashing on the shore, cinematic quality, 4K resolution...",
212
+ lines=4,
213
+ max_lines=6
214
+ )
215
+
216
+ with gr.Row():
217
+ duration_input = gr.Slider(
218
+ minimum=2,
219
+ maximum=16,
220
+ value=4,
221
+ step=2,
222
+ label="⏱️ Duration (seconds)"
223
+ )
224
+
225
+ quality_input = gr.Dropdown(
226
+ choices=[
227
+ ("720p HD", 720),
228
+ ("1080p Full HD", 1080),
229
+ ("4K Ultra HD", 2160)
230
+ ],
231
+ value=720,
232
+ label="πŸŽ₯ Quality"
233
+ )
234
+
235
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
236
+ with gr.Row():
237
+ steps_input = gr.Slider(
238
+ minimum=20,
239
+ maximum=100,
240
+ value=50,
241
+ step=5,
242
+ label="πŸ”’ Inference Steps"
243
+ )
244
+
245
+ guidance_input = gr.Slider(
246
+ minimum=1.0,
247
+ maximum=20.0,
248
+ value=7.5,
249
+ step=0.5,
250
+ label="🎯 Guidance Scale"
251
+ )
252
+
253
+ generate_btn = gr.Button(
254
+ "πŸš€ Generate Video",
255
+ variant="primary",
256
+ size="lg",
257
+ elem_classes=["generate-btn"]
258
+ )
259
+
260
+ with gr.Column(scale=1):
261
+ gr.Markdown("""
262
+ ### πŸ’‘ Example Prompts
263
+
264
+ - πŸŒ… "A serene mountain landscape at sunrise with golden light filtering through misty valleys"
265
+ - πŸ™οΈ "A futuristic cyberpunk city at night with neon signs reflecting on wet streets"
266
+ - 🌊 "Underwater coral reef with colorful tropical fish swimming in crystal clear water"
267
+ - 🌳 "A magical enchanted forest with glowing mushrooms and fireflies at twilight"
268
+
269
+ ### ⚑ Tips for Best Results
270
+
271
+ - Be descriptive and specific
272
+ - Include visual style (cinematic, realistic, anime, etc.)
273
+ - Mention lighting and atmosphere
274
+ - Specify camera angles if desired
275
+ """)
276
+
277
+ with gr.Row():
278
+ video_output = gr.Video(
279
+ label="🎬 Generated Video",
280
+ visible=False
281
+ )
282
+
283
+ loading_info = gr.Markdown(
284
+ "✨ Your video will appear here after generation",
285
+ visible=True
286
+ )
287
+
288
+ # Example prompts
289
+ example_prompts = [
290
+ [
291
+ "A beautiful sunset over the ocean with waves gently crashing on the shore, cinematic quality, warm golden lighting",
292
+ 4, 720, 50, 7.5
293
+ ],
294
+ [
295
+ "A serene mountain landscape at sunrise with mist rolling over the valleys, golden light filtering through the clouds",
296
+ 4, 720, 50, 7.5
297
+ ],
298
+ [
299
+ "A bustling city street at night with neon signs reflecting on wet pavement, cyberpunk aesthetic, blade runner style",
300
+ 4, 720, 50, 7.5
301
+ ],
302
+ [
303
+ "Underwater coral reef with colorful fish swimming, sun rays penetrating through the water, national geographic documentary style",
304
+ 4, 720, 50, 7.5
305
+ ]
306
+ ]
307
+
308
+ gr.Examples(
309
+ examples=example_prompts,
310
+ inputs=[prompt_input, duration_input, quality_input, steps_input, guidance_input],
311
+ label="🎯 Try these examples",
312
+ cache_examples=False
313
+ )
314
+
315
+ def generate_and_display(prompt, duration, quality, steps, guidance, progress=gr.Progress()):
316
+ try:
317
+ # Calculate width based on quality (16:9 aspect ratio)
318
+ width_map = {720: 1280, 1080: 1920, 2160: 3840}
319
+ width = width_map.get(quality, 1280)
320
+
321
+ # Generate video
322
+ video_path = generate_video(
323
+ prompt=prompt,
324
+ duration=duration,
325
+ height=quality,
326
+ width=width,
327
+ num_inference_steps=steps,
328
+ guidance_scale=guidance,
329
+ progress=progress
330
+ )
331
+
332
+ return {
333
+ video_output: gr.Video(value=video_path, visible=True),
334
+ loading_info: gr.Markdown(visible=False)
335
+ }
336
+
337
+ except Exception as e:
338
+ return {
339
+ video_output: gr.Video(visible=False),
340
+ loading_info: gr.Markdown(f"❌ Error: {str(e)}", visible=True)
341
+ }
342
+
343
+ generate_btn.click(
344
+ fn=generate_and_display,
345
+ inputs=[prompt_input, duration_input, quality_input, steps_input, guidance_input],
346
+ outputs=[video_output, loading_info],
347
+ show_progress=True
348
+ )
349
+
350
+ # Initialize model on page load
351
+ demo.load(
352
+ fn=initialize_model,
353
+ inputs=[],
354
+ outputs=[],
355
+ queue=False
356
+ )
357
+
358
+ return demo
359
+
360
+ if __name__ == "__main__":
361
+ demo = create_interface()
362
+ demo.launch(
363
+ share=True,
364
+ show_error=True,
365
+ show_tips=True,
366
+ queue=True
367
+ )
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ diffusers
4
+ transformers
5
+ accelerate
6
+ numpy
7
+ Pillow
8
+ opencv-python
9
+ spaces