alexnasa commited on
Commit
f7bddbb
·
verified ·
1 Parent(s): 09d2b03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -5
app.py CHANGED
@@ -224,6 +224,7 @@ def run_pipeline(prompt_text, steps, image_paths, audio_file_path, max_duration
224
  if not audio_file_path:
225
  inference_mode = "TI"
226
  audio_path = None
 
227
  else:
228
  audio_path = audio_file_path if isinstance(audio_file_path, str) else getattr(audio_file_path, "name", str(audio_file_path))
229
 
@@ -233,11 +234,34 @@ def run_pipeline(prompt_text, steps, image_paths, audio_file_path, max_duration
233
  else:
234
  img_paths = [image_data[0] for image_data in image_paths]
235
 
 
236
 
237
- # Prepare output
238
  output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
239
  os.makedirs(output_dir, exist_ok=True)
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  # Random filename
242
  filename = f"gen_{uuid.uuid4().hex[:10]}"
243
  width, height = 832, 480
@@ -247,7 +271,7 @@ def run_pipeline(prompt_text, steps, image_paths, audio_file_path, max_duration
247
  runner.inference_loop(
248
  prompt_text,
249
  img_paths,
250
- audio_path,
251
  output_dir,
252
  filename,
253
  inference_mode,
@@ -332,7 +356,7 @@ with gr.Blocks(css=css) as demo:
332
  gr.Markdown("**SETTINGS**")
333
 
334
  default_steps = 10
335
- default_max_duration = 45
336
 
337
  max_duration = gr.Slider(minimum=20, maximum=95, value=default_max_duration, step=25, label="Frames")
338
  steps_input = gr.Slider(minimum=10, maximum=50, value=default_steps, step=5, label="Diffusion Steps")
@@ -392,7 +416,7 @@ with gr.Blocks(css=css) as demo:
392
  10,
393
  ["./examples/art.png"],
394
  "./examples/art.wav",
395
- 45,
396
  ],
397
 
398
  [
@@ -408,7 +432,7 @@ with gr.Blocks(css=css) as demo:
408
  40,
409
  ["./examples/amber.png", "./examples/jacket.png"],
410
  "./examples/fictional.wav",
411
- 70,
412
  ],
413
 
414
  ],
 
224
  if not audio_file_path:
225
  inference_mode = "TI"
226
  audio_path = None
227
+ tmp_audio_path = None
228
  else:
229
  audio_path = audio_file_path if isinstance(audio_file_path, str) else getattr(audio_file_path, "name", str(audio_file_path))
230
 
 
234
  else:
235
  img_paths = [image_data[0] for image_data in image_paths]
236
 
237
+ print(f'{session_id} is using inference_mode:{inference_mode} with steps:{steps} with {max_duration} frames')
238
 
 
239
  output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
240
  os.makedirs(output_dir, exist_ok=True)
241
 
242
+ if audio_path:
243
+
244
+ def add_silence_to_audio_ffmpeg(audio_path, tmp_audio_path, silence_duration_s=0.5):
245
+
246
+ command = [
247
+ 'ffmpeg',
248
+ '-i', audio_path,
249
+ '-f', 'lavfi',
250
+ '-t', str(silence_duration_s),
251
+ '-i', 'anullsrc=r=16000:cl=stereo',
252
+ '-filter_complex', '[1][0]concat=n=2:v=0:a=1[out]',
253
+ '-map', '[out]',
254
+ '-y', tmp_audio_path,
255
+ '-loglevel', 'quiet'
256
+ ]
257
+
258
+ subprocess.run(command, check=True)
259
+
260
+
261
+ tmp_audio_path = os.path.join(output_dir, "tmp_audio.wav")
262
+
263
+ add_silence_to_audio_ffmpeg(audio_path, tmp_audio_path)
264
+
265
  # Random filename
266
  filename = f"gen_{uuid.uuid4().hex[:10]}"
267
  width, height = 832, 480
 
271
  runner.inference_loop(
272
  prompt_text,
273
  img_paths,
274
+ tmp_audio_path,
275
  output_dir,
276
  filename,
277
  inference_mode,
 
356
  gr.Markdown("**SETTINGS**")
357
 
358
  default_steps = 10
359
+ default_max_duration = 20
360
 
361
  max_duration = gr.Slider(minimum=20, maximum=95, value=default_max_duration, step=25, label="Frames")
362
  steps_input = gr.Slider(minimum=10, maximum=50, value=default_steps, step=5, label="Diffusion Steps")
 
416
  10,
417
  ["./examples/art.png"],
418
  "./examples/art.wav",
419
+ 70,
420
  ],
421
 
422
  [
 
432
  40,
433
  ["./examples/amber.png", "./examples/jacket.png"],
434
  "./examples/fictional.wav",
435
+ 70,
436
  ],
437
 
438
  ],