Spaces:

rjaret
/

rave_model_averager

Sleeping

App Files Files Community

Rob Jaret commited on Sep 11

Commit

1151d94

1 Parent(s): 198141f

Added ability to exclude paramters from being averaged

Browse files

Files changed (1) hide show

app.py +132 -83

app.py CHANGED Viewed

@@ -65,7 +65,7 @@ import librosa
 import ast
 import os
-def AverageRaveModels(rave_a, rave_b, bias = 0):
     r1_ratio = .5
     r2_ratio = .5
@@ -95,21 +95,28 @@ def AverageRaveModels(rave_a, rave_b, bias = 0):
     # for reporting
     keys_averaged={}
     keys_not_averaged={}
     for key in rave_a_params:
-        if key in rave_b_params:
-            try:
-                avg[key] = ((rave_a_params[key] * r1_ratio) + (rave_b_params[key] * r2_ratio))
-                keys_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, "")
-            except Exception as e:
-                print(f"Error averaging key {key}: {e}")
-                keys_not_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, e)
         else:
-            print(f"Key {key} not found in rave_b parameters, skipping.")
-            # keys_not_averaged(key)
-            keys_not_averaged[key]=(key, rave_a_params[key].shape, "n/a", "Key not found in rave_b parameters.")
     messages["keys_averaged"] = keys_averaged
     messages["keys_not_averaged"] = keys_not_averaged
     messages["stats"] = f'Numb Params Averaged: {len(keys_averaged)}\nNumb Params Unable to Average: {len(keys_not_averaged)}\nPercent Averaged: {len(keys_averaged) * 100/(len(keys_not_averaged) + len(keys_averaged)):5.2f}%'
@@ -118,81 +125,81 @@ def AverageRaveModels(rave_a, rave_b, bias = 0):
     return rave_avg, messages
-def ProcessRequest(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0, function="AverageModels"):
-    if function == "AverageModels":
-        return GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple, bias)
-    elif function == "StyleTransfer":
-        print("Style Transfer not implemented yet.")
-        return None
-def GenerateStyleTransfer(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1):
-    model_path_a = GetModelPath(model_name_a)
-    model_path_b = GetModelPath(model_name_b)
-    # Choose Audio File to encode/decode
-    if audio_file is None:
-        audio_file = os.path.join('assets', audio_file_name)
-    # print("Audio File Name:", audio_file_name)
-    # Generate Audio Files
-    # Audio files are created in the assets folder
-    generate_audio_files = False
-    rave_a = torch.jit.load(model_path_a)
-    rave_b = torch.jit.load(model_path_b)
-    # Let's load a sample audio file
-    y, sr = librosa.load(audio_file)
-    sr_multiplied = sr * sr_multiple  # Adjust sample rate if needed
-    print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
-    # Convert audio to a PyTorch tensor and reshape it to the
-    # required shape: (batch_size, n_channels, n_samples)
-    audio = torch.from_numpy(y).float()
-    audio = audio.reshape(1, 1, -1)
-    messages={}
-    audio_outputs={}
-    # perform style transfer
-    with torch.no_grad():
-        # encode the audio with the new averaged models
-        try:
-            latent = rave_a.encode(audio)
-            # decode individual and averaged models
-            decoded = rave_a.decode(latent)
-            style_transfer_decoded = rave_b.decode(latent)
-            audio_outputs['decoded'] = decoded
-            audio_outputs['style_transfer'] = style_transfer_decoded
-        except Exception as e:
-            print(f'Encoding process generated error: ', e)
-        model_a_file=model_path_a.rsplit("/")[-1]
-        model_b_file=model_path_b.rsplit("/")[-1]
-        # Original Audio
-        original_audio = (sr, y)
-        # Decoded Audio
-        print("Encoded and Decoded using original models")
-        decoded_audio =  (sr, decoded.detach().numpy().squeeze())
-        style_transfer_audio = (sr, style_transfer_decoded.detach().numpy().squeeze())
-        # saveAudio('assets/' + model_a_file[: 7] + '_enc-dec.wav', a)
-        # saveAudio('assets/' model_a_file[: 7] + '-' model_b_file[: 7] + '_style_transfer.wav', a)
-        messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Style Transfer Version: {sr_multiple}\n\n" + messages["stats"]
-        return original_audio, decoded_audio, style_transfer_audio, '', messages["stats"], '', ''
-def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0):
     ###############################################
     # Choose models from filenames dictionary created in previous cell
@@ -214,7 +221,7 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
     # Choose Audio File to encode/decode
     #####################################
     # audio_file_name = "RJM1240-Gestures.wav"
-    if audio_file is None:
         audio_file = os.path.join('assets', audio_file_name)
     # print("Audio File Name:", audio_file_name)
@@ -230,7 +237,10 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
     # Let's load a sample audio file
     y, sr = librosa.load(audio_file)
-    sr_multiplied = sr * sr_multiple  # Adjust sample rate if needed
     print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
     # Convert audio to a PyTorch tensor and reshape it to the
@@ -240,10 +250,26 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
     messages={}
     audio_outputs={}
     for bias in biases:
         # Average the rave models
         # rave_avg, numb_params_mod, numb_params_unable_to_mod = AverageRaveModels(rave_a, rave_b, bias=bias)
-        rave_avg, new_msgs = AverageRaveModels(rave_a, rave_b, (-1 * bias))
         messages |= new_msgs
         # no decode the results back to audio
@@ -301,7 +327,7 @@ def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_f
         df_not_averaged.columns=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes']
         messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Averaged Version: {sr_multiple}\n\n" + messages["stats"]
         return original_audio, model_a_audio, model_b_audio, averaged_audio, messages["stats"], df_averaged, df_not_averaged
 import gradio as gr
@@ -320,31 +346,54 @@ description = "<p style='line-height: 1'>This app attempts to average two RAVE m
 "<li>Select a bias towards one of the models. A bias of 0 will average the two models equally. A positive bias will favor Model A, and a negative bias will favor Model B.</li></ul>" \
 "-->"
 AverageModels = gr.Interface(title="Process Audio Through the Average of Two Rave Models", description=description,
     fn=GenerateRaveEncDecAudio,
     inputs=[
-        gr.Radio(model_path_config_keys, label="Select Model A", value="Multimbral Guitar", container=True),
-        gr.Radio(model_path_config_keys, label="Select Model B", value="Water", container=True),
-        gr.Dropdown(available_audio_files, label="Select from these audio files or upload your own below:", value="SingingBowl-OmniMic.wav",container=True),
-        gr.Audio(label="Upload an audio file (wav)", type="filepath", sources=["upload", "microphone"], max_length=60,
-                waveform_options=waveform_options, format='wav'),],
     additional_inputs=[
-        gr.Radio([.2, .5, .75, 1, 2, 4], label="Sample Rate Multiple (Averaged version only)", value=1, container=True),
-        gr.Slider(label="Bias towards Model A or B", minimum=-1, maximum=1, value=0, step=0.1, container=True),
         ],
     # if no way to pass dictionary, pass separate keys and values and zip them.
     outputs=[
-        gr.Audio(label="Original Audio", sources=None, waveform_options=waveform_options, interactive=False),
-        gr.Audio(label="Encoded/Decoded through Model A", sources=None, waveform_options=waveform_options,),
-        gr.Audio(label="Encoded/Decoded through Model B", sources=None, waveform_options=waveform_options,),
-        gr.Audio(label="Encoded/Decoded through averaged model", sources=None, waveform_options=waveform_options,),
-        gr.Textbox(label="Info:"),
-        gr.Dataframe(label="Params Averaged", show_copy_button="True", scale=100, column_widths=column_widths, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes']),
-        gr.Dataframe(label="Params Not Averaged", show_copy_button="True", scale=100, column_widths=column_widths, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes'])
         ]
     ,fill_width=True
 )
 AverageModels.launch(max_file_size=10 * gr.FileSize.MB, share=True)

 import ast
 import os
+def AverageRaveModels(rave_a, rave_b, bias = 0, exclusion_key_list = []):
     r1_ratio = .5
     r2_ratio = .5
     # for reporting
     keys_averaged={}
     keys_not_averaged={}
+    print("Excluded Keys:", exclusion_key_list)
     for key in rave_a_params:
+        if key in exclusion_key_list:
+            print("Not averaging param (in exclusion list):", key)
+            keys_not_averaged[key]=(key, "-", "-", "Key in exclusion list.")
         else:
+            if key in rave_b_params:
+                try:
+                    avg[key] = ((rave_a_params[key] * r1_ratio) + (rave_b_params[key] * r2_ratio))
+                    keys_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, "")
+                except Exception as e:
+                    print(f"Error averaging key {key}: {e}")
+                    keys_not_averaged[key]=(key, rave_a_params[key].shape, rave_b_params[key].shape, e)
+            else:
+                print(f"Key {key} not found in rave_b parameters, skipping.")
+                # keys_not_averaged(key)
+                keys_not_averaged[key]=(key, rave_a_params[key].shape, "n/a", "Key not found in rave_b parameters.")
     messages["keys_averaged"] = keys_averaged
     messages["keys_not_averaged"] = keys_not_averaged
+    messages["excluded_keys_list"] = exclusion_key_list
     messages["stats"] = f'Numb Params Averaged: {len(keys_averaged)}\nNumb Params Unable to Average: {len(keys_not_averaged)}\nPercent Averaged: {len(keys_averaged) * 100/(len(keys_not_averaged) + len(keys_averaged)):5.2f}%'
     return rave_avg, messages
+# def ProcessRequest(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0, function="AverageModels"):
+#     if function == "AverageModels":
+#         return GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple, bias)
+#     elif function == "StyleTransfer":
+#         print("Style Transfer not implemented yet.")
+#         return None
+# def GenerateStyleTransfer(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1):
+#     model_path_a = GetModelPath(model_name_a)
+#     model_path_b = GetModelPath(model_name_b)
+#     # Choose Audio File to encode/decode
+#     if audio_file is None:
+#         audio_file = os.path.join('assets', audio_file_name)
+#     # print("Audio File Name:", audio_file_name)
+#     # Generate Audio Files
+#     # Audio files are created in the assets folder
+#     generate_audio_files = False
+#     rave_a = torch.jit.load(model_path_a)
+#     rave_b = torch.jit.load(model_path_b)
+#     # Let's load a sample audio file
+#     y, sr = librosa.load(audio_file)
+#     sr_multiplied = sr * sr_multiple  # Adjust sample rate if needed
+#     print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
+#     # Convert audio to a PyTorch tensor and reshape it to the
+#     # required shape: (batch_size, n_channels, n_samples)
+#     audio = torch.from_numpy(y).float()
+#     audio = audio.reshape(1, 1, -1)
+#     messages={}
+#     audio_outputs={}
+#     # perform style transfer
+#     with torch.no_grad():
+#         # encode the audio with the new averaged models
+#         try:
+#             latent = rave_a.encode(audio)
+#             # decode individual and averaged models
+#             decoded = rave_a.decode(latent)
+#             style_transfer_decoded = rave_b.decode(latent)
+#             audio_outputs['decoded'] = decoded
+#             audio_outputs['style_transfer'] = style_transfer_decoded
+#         except Exception as e:
+#             print(f'Encoding process generated error: ', e)
+#         model_a_file=model_path_a.rsplit("/")[-1]
+#         model_b_file=model_path_b.rsplit("/")[-1]
+#         # Original Audio
+#         original_audio = (sr, y)
+#         # Decoded Audio
+#         print("Encoded and Decoded using original models")
+#         decoded_audio =  (sr, decoded.detach().numpy().squeeze())
+#         style_transfer_audio = (sr, style_transfer_decoded.detach().numpy().squeeze())
+#         # saveAudio('assets/' + model_a_file[: 7] + '_enc-dec.wav', a)
+#         # saveAudio('assets/' model_a_file[: 7] + '-' model_b_file[: 7] + '_style_transfer.wav', a)
+#         messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Style Transfer Version: {sr_multiple}\n\n" + messages["stats"]
+#         return original_audio, decoded_audio, style_transfer_audio, '', messages["stats"], '', ''
+def GenerateRaveEncDecAudio(model_name_a, model_name_b, audio_file_name, audio_file, sr_multiple=1, bias=0, excluded_keys = []):
     ###############################################
     # Choose models from filenames dictionary created in previous cell
     # Choose Audio File to encode/decode
     #####################################
     # audio_file_name = "RJM1240-Gestures.wav"
+    if audio_file is None and audio_file_name is not None:
         audio_file = os.path.join('assets', audio_file_name)
     # print("Audio File Name:", audio_file_name)
     # Let's load a sample audio file
     y, sr = librosa.load(audio_file)
+    sr_multiplied = sr
+    if sr_multiple is not None:
+        sr_multiplied = sr * sr_multiple  # Adjust sample rate if needed
     print(f"Audio File Loaded: {audio_file}, sample_rate = {sr}")
     # Convert audio to a PyTorch tensor and reshape it to the
     messages={}
     audio_outputs={}
+    print("Excluded Keys (raw):", excluded_keys)
+    excluded_keys_list = [] #sum(excluded_keys, [])  # flatten list of lists
+    if '\n' in excluded_keys:
+        keys=excluded_keys.split('\n')
+        excluded_keys_list.extend(keys)
+    elif ',' in excluded_keys:
+        keys=excluded_keys.split(',')
+        excluded_keys_list.extend(keys)
+    elif ' ' in excluded_keys:
+        keys=excluded_keys.split(' ')
+        excluded_keys_list.extend(keys)
+    print("Excluded Keys List:", excluded_keys_list)
     for bias in biases:
         # Average the rave models
         # rave_avg, numb_params_mod, numb_params_unable_to_mod = AverageRaveModels(rave_a, rave_b, bias=bias)
+        rave_avg, new_msgs = AverageRaveModels(rave_a, rave_b, (-1 * bias), excluded_keys_list)
         messages |= new_msgs
         # no decode the results back to audio
         df_not_averaged.columns=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes']
         messages["stats"] = f"Model A: {model_name_a}\nModel B: {model_name_b}\nAudio file: {os.path.basename(audio_file)}\nSample Rate Multiple for Averaged Version: {sr_multiple}\n\n" + messages["stats"]
+        messages["stats"] += f"\nExcluded Keys:\n{'\n'.join(map(str, messages['excluded_keys_list']))}"
         return original_audio, model_a_audio, model_b_audio, averaged_audio, messages["stats"], df_averaged, df_not_averaged
 import gradio as gr
 "<li>Select a bias towards one of the models. A bias of 0 will average the two models equally. A positive bias will favor Model A, and a negative bias will favor Model B.</li></ul>" \
 "-->"
+# Inputs
+modelA = gr.Radio(model_path_config_keys, label="Select Model A", value="Multimbral Guitar", container=True)
+modelB = gr.Radio(model_path_config_keys, label="Select Model B", value="Water", container=True)
+audioFileSelect = gr.Dropdown(available_audio_files, label="Select from these audio files or upload your own below:", value="SingingBowl-OmniMic.wav",container=True)
+audioFileUpload = gr.Audio(label="...or upload your own audio file (max 60 seconds):", type="filepath", sources=["upload", "microphone"], max_length=60, waveform_options=waveform_options, format='wav')
+sampleRateMultiple = gr.Radio([.2, .5, .75, 1, 2, 4], label="Sample Rate Multiple (Averaged version only)", value=1, container=True)
+bias = gr.Slider(label="Bias towards Model A or B", minimum=-1, maximum=1, value=0, step=0.1, container=True)
+paramsToExclude = gr.TextArea(label="Params Not to Average", show_copy_button="True", placeholder="Enter parameter keys to exclude from averaging, separated by commas, spaces, or new lines",)
+# Outputs
+originalAudio = gr.Audio(label="Original Audio", sources=None, waveform_options=waveform_options, interactive=False)
+modelAAudio = gr.Audio(label="Encoded/Decoded through Model A", sources=None, waveform_options=waveform_options,)
+modelBAudio = gr.Audio(label="Encoded/Decoded through Model B", sources=None, waveform_options=waveform_options,)
+averagedAudio = gr.Audio(label="Encoded/Decoded through averaged model", sources=None, waveform_options=waveform_options,)
+infoText = gr.Textbox(label="Info:")
+dfParamsAveraged = gr.Dataframe(label="Params Averaged", show_copy_button="True", scale=100, column_widths=column_widths, interactive=True, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes'])
+drParamsNotAveraged = gr.Dataframe(label="Params Not Averaged", show_copy_button="True", scale=100, column_widths=column_widths, headers=['Param Name', 'Model A Shape', 'Model B Shape', 'Notes'])
 AverageModels = gr.Interface(title="Process Audio Through the Average of Two Rave Models", description=description,
     fn=GenerateRaveEncDecAudio,
     inputs=[
+        modelA,
+        modelB,
+        audioFileSelect,
+        audioFileUpload,
+        ],
     additional_inputs=[
+        sampleRateMultiple,
+        bias,
+        paramsToExclude,
         ],
     # if no way to pass dictionary, pass separate keys and values and zip them.
     outputs=[
+        originalAudio,
+        modelAAudio,
+        modelBAudio,
+        averagedAudio,
+        infoText,
+        dfParamsAveraged,
+        drParamsNotAveraged,
         ]
     ,fill_width=True
 )
+# def add_excluded_key(key_to_exclude: gr.SelectData):
+#     paramsToExclude.append([key_to_exclude])
+# dfParamsAveraged.select(add_excluded_key)
 AverageModels.launch(max_file_size=10 * gr.FileSize.MB, share=True)