Spaces:
Runtime error
Runtime error
modify app
Browse files
app.py
CHANGED
|
@@ -113,6 +113,7 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
| 113 |
|
| 114 |
ito_log = ""
|
| 115 |
loss_values = []
|
|
|
|
| 116 |
for log_entry, current_output, current_params, step, loss in mastering_transfer.inference_time_optimization(
|
| 117 |
input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
|
| 118 |
):
|
|
@@ -123,7 +124,7 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
| 123 |
# Convert current_output to numpy array if it's a tensor
|
| 124 |
if isinstance(current_output, torch.Tensor):
|
| 125 |
current_output = current_output.cpu().numpy()
|
| 126 |
-
|
| 127 |
if current_output.ndim == 1:
|
| 128 |
current_output = current_output.reshape(-1, 1)
|
| 129 |
elif current_output.ndim > 2:
|
|
@@ -137,28 +138,20 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
| 137 |
# Denormalize the audio to int16
|
| 138 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
| 139 |
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
-
|
| 143 |
-
af_weights = [float(w.strip()) for w in af_weights.split(',')]
|
| 144 |
-
ito_generator = mastering_transfer.inference_time_optimization(
|
| 145 |
-
input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights
|
| 146 |
-
)
|
| 147 |
-
|
| 148 |
-
all_results = []
|
| 149 |
-
for result in ito_generator:
|
| 150 |
-
all_results.append(result)
|
| 151 |
-
|
| 152 |
-
min_loss_step = min(range(len(all_results)), key=lambda i: all_results[i]['loss'])
|
| 153 |
-
|
| 154 |
-
loss_df = pd.DataFrame([(r['step'], r['loss']) for r in all_results], columns=['step', 'loss'])
|
| 155 |
-
|
| 156 |
-
return all_results, min_loss_step, loss_df
|
| 157 |
|
| 158 |
def update_ito_output(all_results, selected_step):
|
| 159 |
print(all_results)
|
| 160 |
print(selected_step)
|
| 161 |
-
selected_result = all_results[selected_step]
|
| 162 |
return (args.sample_rate, selected_result['audio']), selected_result['params'], selected_result['log']
|
| 163 |
|
| 164 |
|
|
@@ -187,11 +180,11 @@ with gr.Blocks() as demo:
|
|
| 187 |
|
| 188 |
with gr.Tab("YouTube Audio"):
|
| 189 |
with gr.Row():
|
| 190 |
-
|
| 191 |
-
|
| 192 |
with gr.Row():
|
| 193 |
-
|
| 194 |
-
|
| 195 |
|
| 196 |
process_button_yt = gr.Button("Process Mastering Style Transfer")
|
| 197 |
|
|
@@ -251,9 +244,9 @@ with gr.Blocks() as demo:
|
|
| 251 |
return loss_df, gr.update(maximum=len(results), value=min_step+1)
|
| 252 |
|
| 253 |
ito_button.click(
|
| 254 |
-
|
| 255 |
inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
|
| 256 |
-
outputs=[
|
| 257 |
).then(
|
| 258 |
update_ito_output,
|
| 259 |
inputs=[all_results, ito_step_slider],
|
|
|
|
| 113 |
|
| 114 |
ito_log = ""
|
| 115 |
loss_values = []
|
| 116 |
+
all_results = []
|
| 117 |
for log_entry, current_output, current_params, step, loss in mastering_transfer.inference_time_optimization(
|
| 118 |
input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
|
| 119 |
):
|
|
|
|
| 124 |
# Convert current_output to numpy array if it's a tensor
|
| 125 |
if isinstance(current_output, torch.Tensor):
|
| 126 |
current_output = current_output.cpu().numpy()
|
| 127 |
+
|
| 128 |
if current_output.ndim == 1:
|
| 129 |
current_output = current_output.reshape(-1, 1)
|
| 130 |
elif current_output.ndim > 2:
|
|
|
|
| 138 |
# Denormalize the audio to int16
|
| 139 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
| 140 |
|
| 141 |
+
all_results.append({
|
| 142 |
+
'step': step,
|
| 143 |
+
'audio': current_output,
|
| 144 |
+
'params': ito_param_output,
|
| 145 |
+
'log': log_entry,
|
| 146 |
+
'loss': loss
|
| 147 |
+
})
|
| 148 |
|
| 149 |
+
yield (args.sample_rate, current_output), ito_param_output, step, ito_log, pd.DataFrame(loss_values), all_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
def update_ito_output(all_results, selected_step):
|
| 152 |
print(all_results)
|
| 153 |
print(selected_step)
|
| 154 |
+
selected_result = all_results[selected_step - 1]
|
| 155 |
return (args.sample_rate, selected_result['audio']), selected_result['params'], selected_result['log']
|
| 156 |
|
| 157 |
|
|
|
|
| 180 |
|
| 181 |
with gr.Tab("YouTube Audio"):
|
| 182 |
with gr.Row():
|
| 183 |
+
input_youtube_url = gr.Textbox(label="Input YouTube URL")
|
| 184 |
+
reference_youtube_url = gr.Textbox(label="Reference YouTube URL")
|
| 185 |
with gr.Row():
|
| 186 |
+
input_audio_yt = gr.Audio(label="Input Audio (Do not put when using YouTube URL)")
|
| 187 |
+
reference_audio_yt = gr.Audio(label="Reference Audio (Do not put when using YouTube URL)")
|
| 188 |
|
| 189 |
process_button_yt = gr.Button("Process Mastering Style Transfer")
|
| 190 |
|
|
|
|
| 244 |
return loss_df, gr.update(maximum=len(results), value=min_step+1)
|
| 245 |
|
| 246 |
ito_button.click(
|
| 247 |
+
perform_ito,
|
| 248 |
inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
|
| 249 |
+
outputs=[ito_output_audio, ito_param_output, ito_step_slider, ito_log, ito_loss_plot, all_results]
|
| 250 |
).then(
|
| 251 |
update_ito_output,
|
| 252 |
inputs=[all_results, ito_step_slider],
|