Spaces:

jhtonyKoo
/

ITO-Master

Runtime error

App Files Files Community

jhtonyKoo commited on Oct 15, 2024

Commit

f9e9793

1 Parent(s): 0e5211a

modify app

Browse files

Files changed (1) hide show

app.py +17 -24

app.py CHANGED Viewed

@@ -113,6 +113,7 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
     ito_log = ""
     loss_values = []
     for log_entry, current_output, current_params, step, loss in mastering_transfer.inference_time_optimization(
         input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
     ):
@@ -123,7 +124,7 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
         # Convert current_output to numpy array if it's a tensor
         if isinstance(current_output, torch.Tensor):
             current_output = current_output.cpu().numpy()
         if current_output.ndim == 1:
             current_output = current_output.reshape(-1, 1)
         elif current_output.ndim > 2:
@@ -137,28 +138,20 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
         # Denormalize the audio to int16
         current_output = denormalize_audio(current_output, dtype=np.int16)
-        yield (args.sample_rate, current_output), ito_param_output, step, ito_log, pd.DataFrame(loss_values)
-def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
-    af_weights = [float(w.strip()) for w in af_weights.split(',')]
-    ito_generator = mastering_transfer.inference_time_optimization(
-        input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights
-    )
-    all_results = []
-    for result in ito_generator:
-        all_results.append(result)
-    min_loss_step = min(range(len(all_results)), key=lambda i: all_results[i]['loss'])
-    loss_df = pd.DataFrame([(r['step'], r['loss']) for r in all_results], columns=['step', 'loss'])
-    return all_results, min_loss_step, loss_df
 def update_ito_output(all_results, selected_step):
     print(all_results)
     print(selected_step)
-    selected_result = all_results[selected_step]
     return (args.sample_rate, selected_result['audio']), selected_result['params'], selected_result['log']
@@ -187,11 +180,11 @@ with gr.Blocks() as demo:
     with gr.Tab("YouTube Audio"):
         with gr.Row():
-            input_audio_yt = gr.Audio(label="Input Audio (Optional)")
-            input_youtube_url = gr.Textbox(label="Input YouTube URL (Optional)")
         with gr.Row():
-            reference_audio_yt = gr.Audio(label="Reference Audio (Optional)")
-            reference_youtube_url = gr.Textbox(label="Reference YouTube URL (Optional)")
         process_button_yt = gr.Button("Process Mastering Style Transfer")
@@ -251,9 +244,9 @@ with gr.Blocks() as demo:
         return loss_df, gr.update(maximum=len(results), value=min_step+1)
     ito_button.click(
-        run_ito,
         inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
-        outputs=[all_results, min_loss_step, ito_loss_plot, ito_step_slider]
     ).then(
         update_ito_output,
         inputs=[all_results, ito_step_slider],

     ito_log = ""
     loss_values = []
+    all_results = []
     for log_entry, current_output, current_params, step, loss in mastering_transfer.inference_time_optimization(
         input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
     ):
         # Convert current_output to numpy array if it's a tensor
         if isinstance(current_output, torch.Tensor):
             current_output = current_output.cpu().numpy()
         if current_output.ndim == 1:
             current_output = current_output.reshape(-1, 1)
         elif current_output.ndim > 2:
         # Denormalize the audio to int16
         current_output = denormalize_audio(current_output, dtype=np.int16)
+        all_results.append({
+            'step': step,
+            'audio': current_output,
+            'params': ito_param_output,
+            'log': log_entry,
+            'loss': loss
+        })
+        yield (args.sample_rate, current_output), ito_param_output, step, ito_log, pd.DataFrame(loss_values), all_results
 def update_ito_output(all_results, selected_step):
     print(all_results)
     print(selected_step)
+    selected_result = all_results[selected_step - 1]
     return (args.sample_rate, selected_result['audio']), selected_result['params'], selected_result['log']
     with gr.Tab("YouTube Audio"):
         with gr.Row():
+            input_youtube_url = gr.Textbox(label="Input YouTube URL")
+            reference_youtube_url = gr.Textbox(label="Reference YouTube URL")
         with gr.Row():
+            input_audio_yt = gr.Audio(label="Input Audio (Do not put when using YouTube URL)")
+            reference_audio_yt = gr.Audio(label="Reference Audio (Do not put when using YouTube URL)")
         process_button_yt = gr.Button("Process Mastering Style Transfer")
         return loss_df, gr.update(maximum=len(results), value=min_step+1)
     ito_button.click(
+        perform_ito,
         inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
+        outputs=[ito_output_audio, ito_param_output, ito_step_slider, ito_log, ito_loss_plot, all_results]
     ).then(
         update_ito_output,
         inputs=[all_results, ito_step_slider],