Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -224,6 +224,7 @@ def run_pipeline(prompt_text, steps, image_paths, audio_file_path, max_duration
|
|
| 224 |
if not audio_file_path:
|
| 225 |
inference_mode = "TI"
|
| 226 |
audio_path = None
|
|
|
|
| 227 |
else:
|
| 228 |
audio_path = audio_file_path if isinstance(audio_file_path, str) else getattr(audio_file_path, "name", str(audio_file_path))
|
| 229 |
|
|
@@ -233,11 +234,34 @@ def run_pipeline(prompt_text, steps, image_paths, audio_file_path, max_duration
|
|
| 233 |
else:
|
| 234 |
img_paths = [image_data[0] for image_data in image_paths]
|
| 235 |
|
|
|
|
| 236 |
|
| 237 |
-
# Prepare output
|
| 238 |
output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
|
| 239 |
os.makedirs(output_dir, exist_ok=True)
|
| 240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
# Random filename
|
| 242 |
filename = f"gen_{uuid.uuid4().hex[:10]}"
|
| 243 |
width, height = 832, 480
|
|
@@ -247,7 +271,7 @@ def run_pipeline(prompt_text, steps, image_paths, audio_file_path, max_duration
|
|
| 247 |
runner.inference_loop(
|
| 248 |
prompt_text,
|
| 249 |
img_paths,
|
| 250 |
-
|
| 251 |
output_dir,
|
| 252 |
filename,
|
| 253 |
inference_mode,
|
|
@@ -332,7 +356,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 332 |
gr.Markdown("**SETTINGS**")
|
| 333 |
|
| 334 |
default_steps = 10
|
| 335 |
-
default_max_duration =
|
| 336 |
|
| 337 |
max_duration = gr.Slider(minimum=20, maximum=95, value=default_max_duration, step=25, label="Frames")
|
| 338 |
steps_input = gr.Slider(minimum=10, maximum=50, value=default_steps, step=5, label="Diffusion Steps")
|
|
@@ -392,7 +416,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 392 |
10,
|
| 393 |
["./examples/art.png"],
|
| 394 |
"./examples/art.wav",
|
| 395 |
-
|
| 396 |
],
|
| 397 |
|
| 398 |
[
|
|
@@ -408,7 +432,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 408 |
40,
|
| 409 |
["./examples/amber.png", "./examples/jacket.png"],
|
| 410 |
"./examples/fictional.wav",
|
| 411 |
-
70,
|
| 412 |
],
|
| 413 |
|
| 414 |
],
|
|
|
|
| 224 |
if not audio_file_path:
|
| 225 |
inference_mode = "TI"
|
| 226 |
audio_path = None
|
| 227 |
+
tmp_audio_path = None
|
| 228 |
else:
|
| 229 |
audio_path = audio_file_path if isinstance(audio_file_path, str) else getattr(audio_file_path, "name", str(audio_file_path))
|
| 230 |
|
|
|
|
| 234 |
else:
|
| 235 |
img_paths = [image_data[0] for image_data in image_paths]
|
| 236 |
|
| 237 |
+
print(f'{session_id} is using inference_mode:{inference_mode} with steps:{steps} with {max_duration} frames')
|
| 238 |
|
|
|
|
| 239 |
output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
|
| 240 |
os.makedirs(output_dir, exist_ok=True)
|
| 241 |
|
| 242 |
+
if audio_path:
|
| 243 |
+
|
| 244 |
+
def add_silence_to_audio_ffmpeg(audio_path, tmp_audio_path, silence_duration_s=0.5):
|
| 245 |
+
|
| 246 |
+
command = [
|
| 247 |
+
'ffmpeg',
|
| 248 |
+
'-i', audio_path,
|
| 249 |
+
'-f', 'lavfi',
|
| 250 |
+
'-t', str(silence_duration_s),
|
| 251 |
+
'-i', 'anullsrc=r=16000:cl=stereo',
|
| 252 |
+
'-filter_complex', '[1][0]concat=n=2:v=0:a=1[out]',
|
| 253 |
+
'-map', '[out]',
|
| 254 |
+
'-y', tmp_audio_path,
|
| 255 |
+
'-loglevel', 'quiet'
|
| 256 |
+
]
|
| 257 |
+
|
| 258 |
+
subprocess.run(command, check=True)
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
tmp_audio_path = os.path.join(output_dir, "tmp_audio.wav")
|
| 262 |
+
|
| 263 |
+
add_silence_to_audio_ffmpeg(audio_path, tmp_audio_path)
|
| 264 |
+
|
| 265 |
# Random filename
|
| 266 |
filename = f"gen_{uuid.uuid4().hex[:10]}"
|
| 267 |
width, height = 832, 480
|
|
|
|
| 271 |
runner.inference_loop(
|
| 272 |
prompt_text,
|
| 273 |
img_paths,
|
| 274 |
+
tmp_audio_path,
|
| 275 |
output_dir,
|
| 276 |
filename,
|
| 277 |
inference_mode,
|
|
|
|
| 356 |
gr.Markdown("**SETTINGS**")
|
| 357 |
|
| 358 |
default_steps = 10
|
| 359 |
+
default_max_duration = 20
|
| 360 |
|
| 361 |
max_duration = gr.Slider(minimum=20, maximum=95, value=default_max_duration, step=25, label="Frames")
|
| 362 |
steps_input = gr.Slider(minimum=10, maximum=50, value=default_steps, step=5, label="Diffusion Steps")
|
|
|
|
| 416 |
10,
|
| 417 |
["./examples/art.png"],
|
| 418 |
"./examples/art.wav",
|
| 419 |
+
70,
|
| 420 |
],
|
| 421 |
|
| 422 |
[
|
|
|
|
| 432 |
40,
|
| 433 |
["./examples/amber.png", "./examples/jacket.png"],
|
| 434 |
"./examples/fictional.wav",
|
| 435 |
+
70,
|
| 436 |
],
|
| 437 |
|
| 438 |
],
|