Spaces:
Sleeping
Sleeping
Rob Jaret
commited on
Commit
·
1151d94
1
Parent(s):
198141f
Added ability to exclude paramters from being averaged
Browse files
app.py
CHANGED
|
@@ -65,7 +65,7 @@ import librosa
|
|
| 65 |
import ast
|
| 66 |
import os
|
| 67 |
|
| 68 |
-
def AverageRaveModels(rave_a, rave_b, bias = 0):
|
| 69 |
|
| 70 |
r1_ratio = .5
|
| 71 |
r2_ratio = .5
|
|
@@ -95,21 +95,28 @@ def AverageRaveModels(rave_a, rave_b, bias = 0):
|
|
| 95 |
# for reporting
|
| 96 |
keys_averaged={}
|
| 97 |
keys_not_averaged={}
|
|
|
|
|
|
|
| 98 |
for key in rave_a_params:
|
| 99 |
-
if key in
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
keys_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, "")
|
| 103 |
-
except Exception as e:
|
| 104 |
-
print(f"Error averaging key {key}: {e}")
|
| 105 |
-
keys_not_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, e)
|
| 106 |
else:
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
messages["keys_averaged"] = keys_averaged
|
| 112 |
messages["keys_not_averaged"] = keys_not_averaged
|
|
|
|
| 113 |
|
| 114 |
messages["stats"] = f'Numb Params Averaged: {len(keys_averaged)}\nNumb Params Unable to Average: {len(keys_not_averaged)}\nPercent Averaged: {len(keys_averaged) * 100/(len(keys_not_averaged) + len(keys_averaged)):5.2f}%'
|
| 115 |
|
|
@@ -118,81 +125,81 @@ def AverageRaveModels(rave_a, rave_b, bias = 0):
|
|
| 118 |
|
| 119 |
return rave_avg, messages
|
| 120 |
|
| 121 |
-
def ProcessRequest(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0, function="AverageModels"):
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
|
| 128 |
|
| 129 |
-
def GenerateStyleTransfer(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1):
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
|
| 143 |
-
|
| 144 |
-
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
|
| 176 |
-
|
| 177 |
-
|
| 178 |
|
| 179 |
-
|
| 180 |
-
|
| 181 |
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
| 189 |
|
| 190 |
|
| 191 |
-
|
| 192 |
|
| 193 |
-
|
| 194 |
|
| 195 |
-
def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0):
|
| 196 |
|
| 197 |
###############################################
|
| 198 |
# Choose models from filenames dictionary created in previous cell
|
|
@@ -214,7 +221,7 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
|
|
| 214 |
# Choose Audio File to encode/decode
|
| 215 |
#####################################
|
| 216 |
# audio_file_name = "RJM1240-Gestures.wav"
|
| 217 |
-
if audio_file is None:
|
| 218 |
audio_file = os.path.join('assets', audio_file_name)
|
| 219 |
# print("Audio File Name:", audio_file_name)
|
| 220 |
|
|
@@ -230,7 +237,10 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
|
|
| 230 |
# Let's load a sample audio file
|
| 231 |
y, sr = librosa.load(audio_file)
|
| 232 |
|
| 233 |
-
sr_multiplied = sr
|
|
|
|
|
|
|
|
|
|
| 234 |
print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
|
| 235 |
|
| 236 |
# Convert audio to a PyTorch tensor and reshape it to the
|
|
@@ -240,10 +250,26 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
|
|
| 240 |
|
| 241 |
messages={}
|
| 242 |
audio_outputs={}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
for bias in biases:
|
| 244 |
# Average the rave models
|
| 245 |
# rave_avg, numb_params_mod, numb_params_unable_to_mod = AverageRaveModels(rave_a, rave_b, bias=bias)
|
| 246 |
-
rave_avg, new_msgs = AverageRaveModels(rave_a, rave_b, (-1 * bias))
|
| 247 |
messages |= new_msgs
|
| 248 |
|
| 249 |
# no decode the results back to audio
|
|
@@ -301,7 +327,7 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
|
|
| 301 |
df_not_averaged.columns=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes']
|
| 302 |
|
| 303 |
messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Averaged Version: {sr_multiple}\n\n" + messages["stats"]
|
| 304 |
-
|
| 305 |
return original_audio, model_a_audio, model_b_audio, averaged_audio, messages["stats"], df_averaged, df_not_averaged
|
| 306 |
|
| 307 |
import gradio as gr
|
|
@@ -320,31 +346,54 @@ description = "<p style='line-height: 1'>This app attempts to average two RAVE m
|
|
| 320 |
"<li>Select a bias towards one of the models. A bias of 0 will average the two models equally. A positive bias will favor Model A, and a negative bias will favor Model B.</li></ul>" \
|
| 321 |
"-->"
|
| 322 |
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
|
| 325 |
AverageModels = gr.Interface(title="Process Audio Through the Average of Two Rave Models", description=description,
|
| 326 |
fn=GenerateRaveEncDecAudio,
|
| 327 |
inputs=[
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
additional_inputs=[
|
| 334 |
-
|
| 335 |
-
|
|
|
|
| 336 |
],
|
| 337 |
# if no way to pass dictionary, pass separate keys and values and zip them.
|
| 338 |
outputs=[
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
]
|
| 347 |
,fill_width=True
|
| 348 |
)
|
| 349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
AverageModels.launch(max_file_size=10 * gr.FileSize.MB, share=True)
|
|
|
|
| 65 |
import ast
|
| 66 |
import os
|
| 67 |
|
| 68 |
+
def AverageRaveModels(rave_a, rave_b, bias = 0, exclusion_key_list = []):
|
| 69 |
|
| 70 |
r1_ratio = .5
|
| 71 |
r2_ratio = .5
|
|
|
|
| 95 |
# for reporting
|
| 96 |
keys_averaged={}
|
| 97 |
keys_not_averaged={}
|
| 98 |
+
print("Excluded Keys:", exclusion_key_list)
|
| 99 |
+
|
| 100 |
for key in rave_a_params:
|
| 101 |
+
if key in exclusion_key_list:
|
| 102 |
+
print("Not averaging param (in exclusion list):", key)
|
| 103 |
+
keys_not_averaged[key]=(key, "-", "-", "Key in exclusion list.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
else:
|
| 105 |
+
if key in rave_b_params:
|
| 106 |
+
try:
|
| 107 |
+
avg[key] = ((rave_a_params[key] * r1_ratio) + (rave_b_params[key] * r2_ratio))
|
| 108 |
+
keys_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, "")
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"Error averaging key {key}: {e}")
|
| 111 |
+
keys_not_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, e)
|
| 112 |
+
else:
|
| 113 |
+
print(f"Key {key} not found in rave_b parameters, skipping.")
|
| 114 |
+
# keys_not_averaged(key)
|
| 115 |
+
keys_not_averaged[key]=(key, rave_a_params[key].shape, "n/a", "Key not found in rave_b parameters.")
|
| 116 |
|
| 117 |
messages["keys_averaged"] = keys_averaged
|
| 118 |
messages["keys_not_averaged"] = keys_not_averaged
|
| 119 |
+
messages["excluded_keys_list"] = exclusion_key_list
|
| 120 |
|
| 121 |
messages["stats"] = f'Numb Params Averaged: {len(keys_averaged)}\nNumb Params Unable to Average: {len(keys_not_averaged)}\nPercent Averaged: {len(keys_averaged) * 100/(len(keys_not_averaged) + len(keys_averaged)):5.2f}%'
|
| 122 |
|
|
|
|
| 125 |
|
| 126 |
return rave_avg, messages
|
| 127 |
|
| 128 |
+
# def ProcessRequest(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0, function="AverageModels"):
|
| 129 |
+
# if function == "AverageModels":
|
| 130 |
+
# return GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple, bias)
|
| 131 |
+
# elif function == "StyleTransfer":
|
| 132 |
+
# print("Style Transfer not implemented yet.")
|
| 133 |
+
# return None
|
| 134 |
|
| 135 |
|
| 136 |
+
# def GenerateStyleTransfer(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1):
|
| 137 |
|
| 138 |
+
# model_path_a = GetModelPath(model_name_a)
|
| 139 |
+
# model_path_b = GetModelPath(model_name_b)
|
| 140 |
|
| 141 |
+
# # Choose Audio File to encode/decode
|
| 142 |
+
# if audio_file is None:
|
| 143 |
+
# audio_file = os.path.join('assets', audio_file_name)
|
| 144 |
+
# # print("Audio File Name:", audio_file_name)
|
| 145 |
|
| 146 |
+
# # Generate Audio Files
|
| 147 |
+
# # Audio files are created in the assets folder
|
| 148 |
+
# generate_audio_files = False
|
| 149 |
|
| 150 |
+
# rave_a = torch.jit.load(model_path_a)
|
| 151 |
+
# rave_b = torch.jit.load(model_path_b)
|
| 152 |
|
| 153 |
+
# # Let's load a sample audio file
|
| 154 |
+
# y, sr = librosa.load(audio_file)
|
| 155 |
|
| 156 |
+
# sr_multiplied = sr * sr_multiple # Adjust sample rate if needed
|
| 157 |
+
# print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
|
| 158 |
|
| 159 |
+
# # Convert audio to a PyTorch tensor and reshape it to the
|
| 160 |
+
# # required shape: (batch_size, n_channels, n_samples)
|
| 161 |
+
# audio = torch.from_numpy(y).float()
|
| 162 |
+
# audio = audio.reshape(1, 1, -1)
|
| 163 |
|
| 164 |
+
# messages={}
|
| 165 |
+
# audio_outputs={}
|
| 166 |
|
| 167 |
+
# # perform style transfer
|
| 168 |
+
# with torch.no_grad():
|
| 169 |
+
# # encode the audio with the new averaged models
|
| 170 |
+
# try:
|
| 171 |
+
# latent = rave_a.encode(audio)
|
| 172 |
|
| 173 |
+
# # decode individual and averaged models
|
| 174 |
+
# decoded = rave_a.decode(latent)
|
| 175 |
+
# style_transfer_decoded = rave_b.decode(latent)
|
| 176 |
|
| 177 |
+
# audio_outputs['decoded'] = decoded
|
| 178 |
+
# audio_outputs['style_transfer'] = style_transfer_decoded
|
| 179 |
|
| 180 |
+
# except Exception as e:
|
| 181 |
+
# print(f'Encoding process generated error: ', e)
|
| 182 |
|
| 183 |
+
# model_a_file=model_path_a.rsplit("/")[-1]
|
| 184 |
+
# model_b_file=model_path_b.rsplit("/")[-1]
|
| 185 |
|
| 186 |
+
# # Original Audio
|
| 187 |
+
# original_audio = (sr, y)
|
| 188 |
|
| 189 |
+
# # Decoded Audio
|
| 190 |
+
# print("Encoded and Decoded using original models")
|
| 191 |
+
# decoded_audio = (sr, decoded.detach().numpy().squeeze())
|
| 192 |
+
# style_transfer_audio = (sr, style_transfer_decoded.detach().numpy().squeeze())
|
| 193 |
|
| 194 |
+
# # saveAudio('assets/' + model_a_file[: 7] + '_enc-dec.wav', a)
|
| 195 |
+
# # saveAudio('assets/' model_a_file[: 7] + '-' model_b_file[: 7] + '_style_transfer.wav', a)
|
| 196 |
|
| 197 |
|
| 198 |
+
# messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Style Transfer Version: {sr_multiple}\n\n" + messages["stats"]
|
| 199 |
|
| 200 |
+
# return original_audio, decoded_audio, style_transfer_audio, '', messages["stats"], '', ''
|
| 201 |
|
| 202 |
+
def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0, excluded_keys = []):
|
| 203 |
|
| 204 |
###############################################
|
| 205 |
# Choose models from filenames dictionary created in previous cell
|
|
|
|
| 221 |
# Choose Audio File to encode/decode
|
| 222 |
#####################################
|
| 223 |
# audio_file_name = "RJM1240-Gestures.wav"
|
| 224 |
+
if audio_file is None and audio_file_name is not None:
|
| 225 |
audio_file = os.path.join('assets', audio_file_name)
|
| 226 |
# print("Audio File Name:", audio_file_name)
|
| 227 |
|
|
|
|
| 237 |
# Let's load a sample audio file
|
| 238 |
y, sr = librosa.load(audio_file)
|
| 239 |
|
| 240 |
+
sr_multiplied = sr
|
| 241 |
+
if sr_multiple is not None:
|
| 242 |
+
sr_multiplied = sr * sr_multiple # Adjust sample rate if needed
|
| 243 |
+
|
| 244 |
print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
|
| 245 |
|
| 246 |
# Convert audio to a PyTorch tensor and reshape it to the
|
|
|
|
| 250 |
|
| 251 |
messages={}
|
| 252 |
audio_outputs={}
|
| 253 |
+
|
| 254 |
+
print("Excluded Keys (raw):", excluded_keys)
|
| 255 |
+
excluded_keys_list = [] #sum(excluded_keys, []) # flatten list of lists
|
| 256 |
+
|
| 257 |
+
if '\n' in excluded_keys:
|
| 258 |
+
keys=excluded_keys.split('\n')
|
| 259 |
+
excluded_keys_list.extend(keys)
|
| 260 |
+
elif ',' in excluded_keys:
|
| 261 |
+
keys=excluded_keys.split(',')
|
| 262 |
+
excluded_keys_list.extend(keys)
|
| 263 |
+
elif ' ' in excluded_keys:
|
| 264 |
+
keys=excluded_keys.split(' ')
|
| 265 |
+
excluded_keys_list.extend(keys)
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
print("Excluded Keys List:", excluded_keys_list)
|
| 269 |
for bias in biases:
|
| 270 |
# Average the rave models
|
| 271 |
# rave_avg, numb_params_mod, numb_params_unable_to_mod = AverageRaveModels(rave_a, rave_b, bias=bias)
|
| 272 |
+
rave_avg, new_msgs = AverageRaveModels(rave_a, rave_b, (-1 * bias), excluded_keys_list)
|
| 273 |
messages |= new_msgs
|
| 274 |
|
| 275 |
# no decode the results back to audio
|
|
|
|
| 327 |
df_not_averaged.columns=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes']
|
| 328 |
|
| 329 |
messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Averaged Version: {sr_multiple}\n\n" + messages["stats"]
|
| 330 |
+
messages["stats"] += f"\nExcluded Keys:\n{'\n'.join(map(str, messages['excluded_keys_list']))}"
|
| 331 |
return original_audio, model_a_audio, model_b_audio, averaged_audio, messages["stats"], df_averaged, df_not_averaged
|
| 332 |
|
| 333 |
import gradio as gr
|
|
|
|
| 346 |
"<li>Select a bias towards one of the models. A bias of 0 will average the two models equally. A positive bias will favor Model A, and a negative bias will favor Model B.</li></ul>" \
|
| 347 |
"-->"
|
| 348 |
|
| 349 |
+
# Inputs
|
| 350 |
+
modelA = gr.Radio(model_path_config_keys, label="Select Model A", value="Multimbral Guitar", container=True)
|
| 351 |
+
modelB = gr.Radio(model_path_config_keys, label="Select Model B", value="Water", container=True)
|
| 352 |
+
audioFileSelect = gr.Dropdown(available_audio_files, label="Select from these audio files or upload your own below:", value="SingingBowl-OmniMic.wav",container=True)
|
| 353 |
+
audioFileUpload = gr.Audio(label="...or upload your own audio file (max 60 seconds):", type="filepath", sources=["upload", "microphone"], max_length=60, waveform_options=waveform_options, format='wav')
|
| 354 |
+
sampleRateMultiple = gr.Radio([.2, .5, .75, 1, 2, 4], label="Sample Rate Multiple (Averaged version only)", value=1, container=True)
|
| 355 |
+
bias = gr.Slider(label="Bias towards Model A or B", minimum=-1, maximum=1, value=0, step=0.1, container=True)
|
| 356 |
+
paramsToExclude = gr.TextArea(label="Params Not to Average", show_copy_button="True", placeholder="Enter parameter keys to exclude from averaging, separated by commas, spaces, or new lines",)
|
| 357 |
+
|
| 358 |
+
# Outputs
|
| 359 |
+
originalAudio = gr.Audio(label="Original Audio", sources=None, waveform_options=waveform_options, interactive=False)
|
| 360 |
+
modelAAudio = gr.Audio(label="Encoded/Decoded through Model A", sources=None, waveform_options=waveform_options,)
|
| 361 |
+
modelBAudio = gr.Audio(label="Encoded/Decoded through Model B", sources=None, waveform_options=waveform_options,)
|
| 362 |
+
averagedAudio = gr.Audio(label="Encoded/Decoded through averaged model", sources=None, waveform_options=waveform_options,)
|
| 363 |
+
infoText = gr.Textbox(label="Info:")
|
| 364 |
+
dfParamsAveraged = gr.Dataframe(label="Params Averaged", show_copy_button="True", scale=100, column_widths=column_widths, interactive=True, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes'])
|
| 365 |
+
drParamsNotAveraged = gr.Dataframe(label="Params Not Averaged", show_copy_button="True", scale=100, column_widths=column_widths, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes'])
|
| 366 |
|
| 367 |
AverageModels = gr.Interface(title="Process Audio Through the Average of Two Rave Models", description=description,
|
| 368 |
fn=GenerateRaveEncDecAudio,
|
| 369 |
inputs=[
|
| 370 |
+
modelA,
|
| 371 |
+
modelB,
|
| 372 |
+
audioFileSelect,
|
| 373 |
+
audioFileUpload,
|
| 374 |
+
],
|
| 375 |
additional_inputs=[
|
| 376 |
+
sampleRateMultiple,
|
| 377 |
+
bias,
|
| 378 |
+
paramsToExclude,
|
| 379 |
],
|
| 380 |
# if no way to pass dictionary, pass separate keys and values and zip them.
|
| 381 |
outputs=[
|
| 382 |
+
originalAudio,
|
| 383 |
+
modelAAudio,
|
| 384 |
+
modelBAudio,
|
| 385 |
+
averagedAudio,
|
| 386 |
+
infoText,
|
| 387 |
+
dfParamsAveraged,
|
| 388 |
+
drParamsNotAveraged,
|
| 389 |
]
|
| 390 |
,fill_width=True
|
| 391 |
)
|
| 392 |
|
| 393 |
+
|
| 394 |
+
# def add_excluded_key(key_to_exclude: gr.SelectData):
|
| 395 |
+
# paramsToExclude.append([key_to_exclude])
|
| 396 |
+
|
| 397 |
+
# dfParamsAveraged.select(add_excluded_key)
|
| 398 |
+
|
| 399 |
AverageModels.launch(max_file_size=10 * gr.FileSize.MB, share=True)
|