Rob Jaret commited on
Commit
1151d94
·
1 Parent(s): 198141f

Added ability to exclude paramters from being averaged

Browse files
Files changed (1) hide show
  1. app.py +132 -83
app.py CHANGED
@@ -65,7 +65,7 @@ import librosa
65
  import ast
66
  import os
67
 
68
- def AverageRaveModels(rave_a, rave_b, bias = 0):
69
 
70
  r1_ratio = .5
71
  r2_ratio = .5
@@ -95,21 +95,28 @@ def AverageRaveModels(rave_a, rave_b, bias = 0):
95
  # for reporting
96
  keys_averaged={}
97
  keys_not_averaged={}
 
 
98
  for key in rave_a_params:
99
- if key in rave_b_params:
100
- try:
101
- avg[key] = ((rave_a_params[key] * r1_ratio) + (rave_b_params[key] * r2_ratio))
102
- keys_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, "")
103
- except Exception as e:
104
- print(f"Error averaging key {key}: {e}")
105
- keys_not_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, e)
106
  else:
107
- print(f"Key {key} not found in rave_b parameters, skipping.")
108
- # keys_not_averaged(key)
109
- keys_not_averaged[key]=(key, rave_a_params[key].shape, "n/a", "Key not found in rave_b parameters.")
 
 
 
 
 
 
 
 
110
 
111
  messages["keys_averaged"] = keys_averaged
112
  messages["keys_not_averaged"] = keys_not_averaged
 
113
 
114
  messages["stats"] = f'Numb Params Averaged: {len(keys_averaged)}\nNumb Params Unable to Average: {len(keys_not_averaged)}\nPercent Averaged: {len(keys_averaged) * 100/(len(keys_not_averaged) + len(keys_averaged)):5.2f}%'
115
 
@@ -118,81 +125,81 @@ def AverageRaveModels(rave_a, rave_b, bias = 0):
118
 
119
  return rave_avg, messages
120
 
121
- def ProcessRequest(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0, function="AverageModels"):
122
- if function == "AverageModels":
123
- return GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple, bias)
124
- elif function == "StyleTransfer":
125
- print("Style Transfer not implemented yet.")
126
- return None
127
 
128
 
129
- def GenerateStyleTransfer(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1):
130
 
131
- model_path_a = GetModelPath(model_name_a)
132
- model_path_b = GetModelPath(model_name_b)
133
 
134
- # Choose Audio File to encode/decode
135
- if audio_file is None:
136
- audio_file = os.path.join('assets', audio_file_name)
137
- # print("Audio File Name:", audio_file_name)
138
 
139
- # Generate Audio Files
140
- # Audio files are created in the assets folder
141
- generate_audio_files = False
142
 
143
- rave_a = torch.jit.load(model_path_a)
144
- rave_b = torch.jit.load(model_path_b)
145
 
146
- # Let's load a sample audio file
147
- y, sr = librosa.load(audio_file)
148
 
149
- sr_multiplied = sr * sr_multiple # Adjust sample rate if needed
150
- print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
151
 
152
- # Convert audio to a PyTorch tensor and reshape it to the
153
- # required shape: (batch_size, n_channels, n_samples)
154
- audio = torch.from_numpy(y).float()
155
- audio = audio.reshape(1, 1, -1)
156
 
157
- messages={}
158
- audio_outputs={}
159
 
160
- # perform style transfer
161
- with torch.no_grad():
162
- # encode the audio with the new averaged models
163
- try:
164
- latent = rave_a.encode(audio)
165
 
166
- # decode individual and averaged models
167
- decoded = rave_a.decode(latent)
168
- style_transfer_decoded = rave_b.decode(latent)
169
 
170
- audio_outputs['decoded'] = decoded
171
- audio_outputs['style_transfer'] = style_transfer_decoded
172
 
173
- except Exception as e:
174
- print(f'Encoding process generated error: ', e)
175
 
176
- model_a_file=model_path_a.rsplit("/")[-1]
177
- model_b_file=model_path_b.rsplit("/")[-1]
178
 
179
- # Original Audio
180
- original_audio = (sr, y)
181
 
182
- # Decoded Audio
183
- print("Encoded and Decoded using original models")
184
- decoded_audio = (sr, decoded.detach().numpy().squeeze())
185
- style_transfer_audio = (sr, style_transfer_decoded.detach().numpy().squeeze())
186
 
187
- # saveAudio('assets/' + model_a_file[: 7] + '_enc-dec.wav', a)
188
- # saveAudio('assets/' model_a_file[: 7] + '-' model_b_file[: 7] + '_style_transfer.wav', a)
189
 
190
 
191
- messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Style Transfer Version: {sr_multiple}\n\n" + messages["stats"]
192
 
193
- return original_audio, decoded_audio, style_transfer_audio, '', messages["stats"], '', ''
194
 
195
- def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0):
196
 
197
  ###############################################
198
  # Choose models from filenames dictionary created in previous cell
@@ -214,7 +221,7 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
214
  # Choose Audio File to encode/decode
215
  #####################################
216
  # audio_file_name = "RJM1240-Gestures.wav"
217
- if audio_file is None:
218
  audio_file = os.path.join('assets', audio_file_name)
219
  # print("Audio File Name:", audio_file_name)
220
 
@@ -230,7 +237,10 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
230
  # Let's load a sample audio file
231
  y, sr = librosa.load(audio_file)
232
 
233
- sr_multiplied = sr * sr_multiple # Adjust sample rate if needed
 
 
 
234
  print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
235
 
236
  # Convert audio to a PyTorch tensor and reshape it to the
@@ -240,10 +250,26 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
240
 
241
  messages={}
242
  audio_outputs={}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  for bias in biases:
244
  # Average the rave models
245
  # rave_avg, numb_params_mod, numb_params_unable_to_mod = AverageRaveModels(rave_a, rave_b, bias=bias)
246
- rave_avg, new_msgs = AverageRaveModels(rave_a, rave_b, (-1 * bias))
247
  messages |= new_msgs
248
 
249
  # no decode the results back to audio
@@ -301,7 +327,7 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
301
  df_not_averaged.columns=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes']
302
 
303
  messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Averaged Version: {sr_multiple}\n\n" + messages["stats"]
304
-
305
  return original_audio, model_a_audio, model_b_audio, averaged_audio, messages["stats"], df_averaged, df_not_averaged
306
 
307
  import gradio as gr
@@ -320,31 +346,54 @@ description = "<p style='line-height: 1'>This app attempts to average two RAVE m
320
  "<li>Select a bias towards one of the models. A bias of 0 will average the two models equally. A positive bias will favor Model A, and a negative bias will favor Model B.</li></ul>" \
321
  "-->"
322
 
323
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
  AverageModels = gr.Interface(title="Process Audio Through the Average of Two Rave Models", description=description,
326
  fn=GenerateRaveEncDecAudio,
327
  inputs=[
328
- gr.Radio(model_path_config_keys, label="Select Model A", value="Multimbral Guitar", container=True),
329
- gr.Radio(model_path_config_keys, label="Select Model B", value="Water", container=True),
330
- gr.Dropdown(available_audio_files, label="Select from these audio files or upload your own below:", value="SingingBowl-OmniMic.wav",container=True),
331
- gr.Audio(label="Upload an audio file (wav)", type="filepath", sources=["upload", "microphone"], max_length=60,
332
- waveform_options=waveform_options, format='wav'),],
333
  additional_inputs=[
334
- gr.Radio([.2, .5, .75, 1, 2, 4], label="Sample Rate Multiple (Averaged version only)", value=1, container=True),
335
- gr.Slider(label="Bias towards Model A or B", minimum=-1, maximum=1, value=0, step=0.1, container=True),
 
336
  ],
337
  # if no way to pass dictionary, pass separate keys and values and zip them.
338
  outputs=[
339
- gr.Audio(label="Original Audio", sources=None, waveform_options=waveform_options, interactive=False),
340
- gr.Audio(label="Encoded/Decoded through Model A", sources=None, waveform_options=waveform_options,),
341
- gr.Audio(label="Encoded/Decoded through Model B", sources=None, waveform_options=waveform_options,),
342
- gr.Audio(label="Encoded/Decoded through averaged model", sources=None, waveform_options=waveform_options,),
343
- gr.Textbox(label="Info:"),
344
- gr.Dataframe(label="Params Averaged", show_copy_button="True", scale=100, column_widths=column_widths, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes']),
345
- gr.Dataframe(label="Params Not Averaged", show_copy_button="True", scale=100, column_widths=column_widths, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes'])
346
  ]
347
  ,fill_width=True
348
  )
349
 
 
 
 
 
 
 
350
  AverageModels.launch(max_file_size=10 * gr.FileSize.MB, share=True)
 
65
  import ast
66
  import os
67
 
68
+ def AverageRaveModels(rave_a, rave_b, bias = 0, exclusion_key_list = []):
69
 
70
  r1_ratio = .5
71
  r2_ratio = .5
 
95
  # for reporting
96
  keys_averaged={}
97
  keys_not_averaged={}
98
+ print("Excluded Keys:", exclusion_key_list)
99
+
100
  for key in rave_a_params:
101
+ if key in exclusion_key_list:
102
+ print("Not averaging param (in exclusion list):", key)
103
+ keys_not_averaged[key]=(key, "-", "-", "Key in exclusion list.")
 
 
 
 
104
  else:
105
+ if key in rave_b_params:
106
+ try:
107
+ avg[key] = ((rave_a_params[key] * r1_ratio) + (rave_b_params[key] * r2_ratio))
108
+ keys_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, "")
109
+ except Exception as e:
110
+ print(f"Error averaging key {key}: {e}")
111
+ keys_not_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, e)
112
+ else:
113
+ print(f"Key {key} not found in rave_b parameters, skipping.")
114
+ # keys_not_averaged(key)
115
+ keys_not_averaged[key]=(key, rave_a_params[key].shape, "n/a", "Key not found in rave_b parameters.")
116
 
117
  messages["keys_averaged"] = keys_averaged
118
  messages["keys_not_averaged"] = keys_not_averaged
119
+ messages["excluded_keys_list"] = exclusion_key_list
120
 
121
  messages["stats"] = f'Numb Params Averaged: {len(keys_averaged)}\nNumb Params Unable to Average: {len(keys_not_averaged)}\nPercent Averaged: {len(keys_averaged) * 100/(len(keys_not_averaged) + len(keys_averaged)):5.2f}%'
122
 
 
125
 
126
  return rave_avg, messages
127
 
128
+ # def ProcessRequest(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0, function="AverageModels"):
129
+ # if function == "AverageModels":
130
+ # return GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple, bias)
131
+ # elif function == "StyleTransfer":
132
+ # print("Style Transfer not implemented yet.")
133
+ # return None
134
 
135
 
136
+ # def GenerateStyleTransfer(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1):
137
 
138
+ # model_path_a = GetModelPath(model_name_a)
139
+ # model_path_b = GetModelPath(model_name_b)
140
 
141
+ # # Choose Audio File to encode/decode
142
+ # if audio_file is None:
143
+ # audio_file = os.path.join('assets', audio_file_name)
144
+ # # print("Audio File Name:", audio_file_name)
145
 
146
+ # # Generate Audio Files
147
+ # # Audio files are created in the assets folder
148
+ # generate_audio_files = False
149
 
150
+ # rave_a = torch.jit.load(model_path_a)
151
+ # rave_b = torch.jit.load(model_path_b)
152
 
153
+ # # Let's load a sample audio file
154
+ # y, sr = librosa.load(audio_file)
155
 
156
+ # sr_multiplied = sr * sr_multiple # Adjust sample rate if needed
157
+ # print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
158
 
159
+ # # Convert audio to a PyTorch tensor and reshape it to the
160
+ # # required shape: (batch_size, n_channels, n_samples)
161
+ # audio = torch.from_numpy(y).float()
162
+ # audio = audio.reshape(1, 1, -1)
163
 
164
+ # messages={}
165
+ # audio_outputs={}
166
 
167
+ # # perform style transfer
168
+ # with torch.no_grad():
169
+ # # encode the audio with the new averaged models
170
+ # try:
171
+ # latent = rave_a.encode(audio)
172
 
173
+ # # decode individual and averaged models
174
+ # decoded = rave_a.decode(latent)
175
+ # style_transfer_decoded = rave_b.decode(latent)
176
 
177
+ # audio_outputs['decoded'] = decoded
178
+ # audio_outputs['style_transfer'] = style_transfer_decoded
179
 
180
+ # except Exception as e:
181
+ # print(f'Encoding process generated error: ', e)
182
 
183
+ # model_a_file=model_path_a.rsplit("/")[-1]
184
+ # model_b_file=model_path_b.rsplit("/")[-1]
185
 
186
+ # # Original Audio
187
+ # original_audio = (sr, y)
188
 
189
+ # # Decoded Audio
190
+ # print("Encoded and Decoded using original models")
191
+ # decoded_audio = (sr, decoded.detach().numpy().squeeze())
192
+ # style_transfer_audio = (sr, style_transfer_decoded.detach().numpy().squeeze())
193
 
194
+ # # saveAudio('assets/' + model_a_file[: 7] + '_enc-dec.wav', a)
195
+ # # saveAudio('assets/' model_a_file[: 7] + '-' model_b_file[: 7] + '_style_transfer.wav', a)
196
 
197
 
198
+ # messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Style Transfer Version: {sr_multiple}\n\n" + messages["stats"]
199
 
200
+ # return original_audio, decoded_audio, style_transfer_audio, '', messages["stats"], '', ''
201
 
202
+ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0, excluded_keys = []):
203
 
204
  ###############################################
205
  # Choose models from filenames dictionary created in previous cell
 
221
  # Choose Audio File to encode/decode
222
  #####################################
223
  # audio_file_name = "RJM1240-Gestures.wav"
224
+ if audio_file is None and audio_file_name is not None:
225
  audio_file = os.path.join('assets', audio_file_name)
226
  # print("Audio File Name:", audio_file_name)
227
 
 
237
  # Let's load a sample audio file
238
  y, sr = librosa.load(audio_file)
239
 
240
+ sr_multiplied = sr
241
+ if sr_multiple is not None:
242
+ sr_multiplied = sr * sr_multiple # Adjust sample rate if needed
243
+
244
  print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
245
 
246
  # Convert audio to a PyTorch tensor and reshape it to the
 
250
 
251
  messages={}
252
  audio_outputs={}
253
+
254
+ print("Excluded Keys (raw):", excluded_keys)
255
+ excluded_keys_list = [] #sum(excluded_keys, []) # flatten list of lists
256
+
257
+ if '\n' in excluded_keys:
258
+ keys=excluded_keys.split('\n')
259
+ excluded_keys_list.extend(keys)
260
+ elif ',' in excluded_keys:
261
+ keys=excluded_keys.split(',')
262
+ excluded_keys_list.extend(keys)
263
+ elif ' ' in excluded_keys:
264
+ keys=excluded_keys.split(' ')
265
+ excluded_keys_list.extend(keys)
266
+
267
+
268
+ print("Excluded Keys List:", excluded_keys_list)
269
  for bias in biases:
270
  # Average the rave models
271
  # rave_avg, numb_params_mod, numb_params_unable_to_mod = AverageRaveModels(rave_a, rave_b, bias=bias)
272
+ rave_avg, new_msgs = AverageRaveModels(rave_a, rave_b, (-1 * bias), excluded_keys_list)
273
  messages |= new_msgs
274
 
275
  # no decode the results back to audio
 
327
  df_not_averaged.columns=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes']
328
 
329
  messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Averaged Version: {sr_multiple}\n\n" + messages["stats"]
330
+ messages["stats"] += f"\nExcluded Keys:\n{'\n'.join(map(str, messages['excluded_keys_list']))}"
331
  return original_audio, model_a_audio, model_b_audio, averaged_audio, messages["stats"], df_averaged, df_not_averaged
332
 
333
  import gradio as gr
 
346
  "<li>Select a bias towards one of the models. A bias of 0 will average the two models equally. A positive bias will favor Model A, and a negative bias will favor Model B.</li></ul>" \
347
  "-->"
348
 
349
+ # Inputs
350
+ modelA = gr.Radio(model_path_config_keys, label="Select Model A", value="Multimbral Guitar", container=True)
351
+ modelB = gr.Radio(model_path_config_keys, label="Select Model B", value="Water", container=True)
352
+ audioFileSelect = gr.Dropdown(available_audio_files, label="Select from these audio files or upload your own below:", value="SingingBowl-OmniMic.wav",container=True)
353
+ audioFileUpload = gr.Audio(label="...or upload your own audio file (max 60 seconds):", type="filepath", sources=["upload", "microphone"], max_length=60, waveform_options=waveform_options, format='wav')
354
+ sampleRateMultiple = gr.Radio([.2, .5, .75, 1, 2, 4], label="Sample Rate Multiple (Averaged version only)", value=1, container=True)
355
+ bias = gr.Slider(label="Bias towards Model A or B", minimum=-1, maximum=1, value=0, step=0.1, container=True)
356
+ paramsToExclude = gr.TextArea(label="Params Not to Average", show_copy_button="True", placeholder="Enter parameter keys to exclude from averaging, separated by commas, spaces, or new lines",)
357
+
358
+ # Outputs
359
+ originalAudio = gr.Audio(label="Original Audio", sources=None, waveform_options=waveform_options, interactive=False)
360
+ modelAAudio = gr.Audio(label="Encoded/Decoded through Model A", sources=None, waveform_options=waveform_options,)
361
+ modelBAudio = gr.Audio(label="Encoded/Decoded through Model B", sources=None, waveform_options=waveform_options,)
362
+ averagedAudio = gr.Audio(label="Encoded/Decoded through averaged model", sources=None, waveform_options=waveform_options,)
363
+ infoText = gr.Textbox(label="Info:")
364
+ dfParamsAveraged = gr.Dataframe(label="Params Averaged", show_copy_button="True", scale=100, column_widths=column_widths, interactive=True, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes'])
365
+ drParamsNotAveraged = gr.Dataframe(label="Params Not Averaged", show_copy_button="True", scale=100, column_widths=column_widths, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes'])
366
 
367
  AverageModels = gr.Interface(title="Process Audio Through the Average of Two Rave Models", description=description,
368
  fn=GenerateRaveEncDecAudio,
369
  inputs=[
370
+ modelA,
371
+ modelB,
372
+ audioFileSelect,
373
+ audioFileUpload,
374
+ ],
375
  additional_inputs=[
376
+ sampleRateMultiple,
377
+ bias,
378
+ paramsToExclude,
379
  ],
380
  # if no way to pass dictionary, pass separate keys and values and zip them.
381
  outputs=[
382
+ originalAudio,
383
+ modelAAudio,
384
+ modelBAudio,
385
+ averagedAudio,
386
+ infoText,
387
+ dfParamsAveraged,
388
+ drParamsNotAveraged,
389
  ]
390
  ,fill_width=True
391
  )
392
 
393
+
394
+ # def add_excluded_key(key_to_exclude: gr.SelectData):
395
+ # paramsToExclude.append([key_to_exclude])
396
+
397
+ # dfParamsAveraged.select(add_excluded_key)
398
+
399
  AverageModels.launch(max_file_size=10 * gr.FileSize.MB, share=True)