Spaces:

mrfakename
/

VoiceStar

Running on Zero

App Files Files Community

mrfakename commited on Mar 8

Commit

3f32750

verified ·

1 Parent(s): 7362f82

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +1 -0
README.md +30 -15
abc2xml.py +0 -0
config.py +15 -0
demo.py +236 -0
illustration.png +3 -0
inference.py +260 -0
prompts.txt +112 -0
utils.py +406 -0

.gitattributes CHANGED Viewed

@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 examples/web_6f93090a-81f6-489e-bb35-1a2838b18c01.png filter=lfs diff=lfs merge=lfs -text
 examples/web_dfacd48d-d2c2-492f-b94c-41e6a34ea99f.png filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 examples/web_6f93090a-81f6-489e-bb35-1a2838b18c01.png filter=lfs diff=lfs merge=lfs -text
 examples/web_dfacd48d-d2c2-492f-b94c-41e6a34ea99f.png filter=lfs diff=lfs merge=lfs -text
+illustration.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,16 +1,31 @@
----
-title: DeepSeek-R1
-emoji: 🐋
-colorFrom: indigo
-colorTo: blue
-sdk: gradio
-sdk_version: 5.12.0
-app_file: app.py
-pinned: false
-preload_from_hub:
-- deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
-- deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
-short_description: Try out the distilled DeepSeek-R1 models (MIT licensed!)
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+## Local Gradio Demo
+1. Set up the environment:
+  ```
+  conda create --name notagen python=3.10
+  conda activate notagen
+  conda install pytorch==2.3.0 pytorch-cuda=11.8 -c pytorch -c nvidia
+  pip install accelerate
+  pip install optimum
+  pip install -r requirements.txt
+  ```
+2. Download [NotaGen-X](https://huggingface.co/ElectricAlexis/NotaGen/blob/main/weights_notagenx_p_size_16_p_length_1024_p_layers_20_h_size_1280.pth) and put it under ```gradio/```.
+3. run ```demo.py```:
+  ```
+  cd gradio/
+  python demo.py
+  ```
+4. Then you can view the demo page at 0.0.0.0:7861.
+  <p align="center">
+  <img src="illustration.png" alt="NotaGen Gradio Demo">
+  </p>
+  You can choose period, composer, and instrumentation as a prompt combination for NotaGen's conditional generation. After generation completes, you can save the ABC notation and MusicXML files locally.
+  It is with some regret that the current combination of prompts is limited to 112, which is constrained by the number of pieces of music under each prompt in the fine-tuning dataset. We hope to expand the combinations and forms of prompts in the future.

abc2xml.py ADDED Viewed

The diff for this file is too large to render. See raw diff

config.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import os
+# Configurations for inference
+INFERENCE_WEIGHTS_PATH = 'weights_notagenx_p_size_16_p_length_1024_p_layers_20_h_size_1280.pth'               # Path to weights for inference# Folder to save output files
+TOP_K = 9                                                       # Top k for sampling
+TOP_P = 0.9                                                      # Top p for sampling
+TEMPERATURE = 1.2                                                 # Temperature for sampling
+# Configurations for model
+PATCH_STREAM = True                                             # Stream training / inference
+PATCH_SIZE = 16                                                # Patch Size
+PATCH_LENGTH = 1024                                             # Patch Length
+CHAR_NUM_LAYERS = 6                                             # Number of layers in the decoder
+PATCH_NUM_LAYERS = 20                                           # Number of layers in the encoder
+HIDDEN_SIZE = 1280                                               # Hidden Size

demo.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import gradio as gr
+import sys
+import threading
+import queue
+from io import TextIOBase
+from inference import inference_patch
+import datetime
+import subprocess
+import os
+# Predefined valid combinations set
+with open('prompts.txt', 'r') as f:
+    prompts = f.readlines()
+valid_combinations = set()
+for prompt in prompts:
+    prompt = prompt.strip()
+    parts = prompt.split('_')
+    valid_combinations.add((parts[0], parts[1], parts[2]))
+# Generate available options
+periods = sorted({p for p, _, _ in valid_combinations})
+composers = sorted({c for _, c, _ in valid_combinations})
+instruments = sorted({i for _, _, i in valid_combinations})
+# Dynamic component updates
+def update_components(period, composer):
+    if not period:
+        return [
+            gr.Dropdown(choices=[], value=None, interactive=False),
+            gr.Dropdown(choices=[], value=None, interactive=False)
+        ]
+    valid_composers = sorted({c for p, c, _ in valid_combinations if p == period})
+    valid_instruments = sorted({i for p, c, i in valid_combinations if p == period and c == composer}) if composer else []
+    return [
+        gr.Dropdown(
+            choices=valid_composers,
+            value=composer if composer in valid_composers else None,
+            interactive=True
+        ),
+        gr.Dropdown(
+            choices=valid_instruments,
+            value=None,
+            interactive=bool(valid_instruments)
+        )
+    ]
+class RealtimeStream(TextIOBase):
+    def __init__(self, queue):
+        self.queue = queue
+    def write(self, text):
+        self.queue.put(text)
+        return len(text)
+def save_and_convert(abc_content, period, composer, instrumentation):
+    if not all([period, composer, instrumentation]):
+        raise gr.Error("Please complete a valid generation first before saving")
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    prompt_str = f"{period}_{composer}_{instrumentation}"
+    filename_base = f"{timestamp}_{prompt_str}"
+    abc_filename = f"{filename_base}.abc"
+    with open(abc_filename, "w", encoding="utf-8") as f:
+        f.write(abc_content)
+    xml_filename = f"{filename_base}.xml"
+    try:
+        subprocess.run(
+            ["python", "abc2xml.py", '-o', '.', abc_filename, ],
+            check=True,
+            capture_output=True,
+            text=True
+        )
+    except subprocess.CalledProcessError as e:
+        error_msg = f"Conversion failed: {e.stderr}" if e.stderr else "Unknown error"
+        raise gr.Error(f"ABC to XML conversion failed: {error_msg}. Please try to generate another composition.")
+    return f"Saved successfully: {abc_filename} -> {xml_filename}"
+def generate_music(period, composer, instrumentation):
+    if (period, composer, instrumentation) not in valid_combinations:
+        raise gr.Error("Invalid prompt combination! Please re-select from the period options")
+    output_queue = queue.Queue()
+    original_stdout = sys.stdout
+    sys.stdout = RealtimeStream(output_queue)
+    result_container = []
+    def run_inference():
+        try:
+            result_container.append(inference_patch(period, composer, instrumentation))
+        finally:
+            sys.stdout = original_stdout
+    thread = threading.Thread(target=run_inference)
+    thread.start()
+    process_output = ""
+    while thread.is_alive():
+        try:
+            text = output_queue.get(timeout=0.1)
+            process_output += text
+            yield process_output, None
+        except queue.Empty:
+            continue
+    while not output_queue.empty():
+        text = output_queue.get()
+        process_output += text
+        yield process_output, None
+    final_result = result_container[0] if result_container else ""
+    yield process_output, final_result
+with gr.Blocks() as demo:
+    gr.Markdown("## NotaGen")
+    with gr.Row():
+        # 左侧栏
+        with gr.Column():
+            period_dd = gr.Dropdown(
+                choices=periods,
+                value=None,
+                label="Period",
+                interactive=True
+            )
+            composer_dd = gr.Dropdown(
+                choices=[],
+                value=None,
+                label="Composer",
+                interactive=False
+            )
+            instrument_dd = gr.Dropdown(
+                choices=[],
+                value=None,
+                label="Instrumentation",
+                interactive=False
+            )
+            generate_btn = gr.Button("Generate!", variant="primary")
+            process_output = gr.Textbox(
+                label="Generation process",
+                interactive=False,
+                lines=15,
+                max_lines=15,
+                placeholder="Generation progress will be shown here...",
+                elem_classes="process-output"
+            )
+        # 右侧栏
+        with gr.Column():
+            final_output = gr.Textbox(
+                label="Post-processed ABC notation scores",
+                interactive=True,
+                lines=23,
+                placeholder="Post-processed ABC scores will be shown here...",
+                elem_classes="final-output"
+            )
+            with gr.Row():
+                save_btn = gr.Button("💾 Save as ABC & XML files", variant="secondary")
+            save_status = gr.Textbox(
+                label="Save Status",
+                interactive=False,
+                visible=True,
+                max_lines=2
+            )
+    period_dd.change(
+        update_components,
+        inputs=[period_dd, composer_dd],
+        outputs=[composer_dd, instrument_dd]
+    )
+    composer_dd.change(
+        update_components,
+        inputs=[period_dd, composer_dd],
+        outputs=[composer_dd, instrument_dd]
+    )
+    generate_btn.click(
+        generate_music,
+        inputs=[period_dd, composer_dd, instrument_dd],
+        outputs=[process_output, final_output]
+    )
+    save_btn.click(
+        save_and_convert,
+        inputs=[final_output, period_dd, composer_dd, instrument_dd],
+        outputs=[save_status]
+    )
+css = """
+.process-output {
+    background-color: #f0f0f0;
+    font-family: monospace;
+    padding: 10px;
+    border-radius: 5px;
+}
+.final-output {
+    background-color: #ffffff;
+    font-family: sans-serif;
+    padding: 10px;
+    border-radius: 5px;
+}
+.process-output textarea {
+    max-height: 500px !important;
+    overflow-y: auto !important;
+    white-space: pre-wrap;
+}
+"""
+css += """
+button#💾-save-convert:hover {
+    background-color: #ffe6e6;
+}
+"""
+demo.css = css
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7861
+    )

illustration.png ADDED Viewed

Git LFS Details

SHA256: 10e0d5742ed50035210c40983bdf56d038d0288ebd89881b895e1e50afe609a3
Pointer size: 131 Bytes
Size of remote file: 384 kB

inference.py ADDED Viewed

	@@ -0,0 +1,260 @@

+import os
+import time
+import torch
+from utils import *
+from config import *
+from transformers import GPT2Config, LlamaConfig
+from abctoolkit.utils import Exclaim_re, Quote_re, SquareBracket_re, Barline_regexPattern
+from abctoolkit.transpose import Note_list, Pitch_sign_list
+from abctoolkit.duration import calculate_bartext_duration
+Note_list = Note_list + ['z', 'x']
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+else:
+    device = torch.device("cpu")
+patchilizer = Patchilizer()
+patch_config = GPT2Config(num_hidden_layers=PATCH_NUM_LAYERS,
+                          max_length=PATCH_LENGTH,
+                          max_position_embeddings=PATCH_LENGTH,
+                          n_embd=HIDDEN_SIZE,
+                          num_attention_heads=HIDDEN_SIZE // 64,
+                          vocab_size=1)
+byte_config = GPT2Config(num_hidden_layers=CHAR_NUM_LAYERS,
+                         max_length=PATCH_SIZE + 1,
+                         max_position_embeddings=PATCH_SIZE + 1,
+                         hidden_size=HIDDEN_SIZE,
+                         num_attention_heads=HIDDEN_SIZE // 64,
+                         vocab_size=128)
+model = NotaGenLMHeadModel(encoder_config=patch_config, decoder_config=byte_config)
+print("Parameter Number: " + str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
+checkpoint = torch.load(INFERENCE_WEIGHTS_PATH, map_location=torch.device(device))
+model.load_state_dict(checkpoint['model'])
+model = model.to(device)
+model.eval()
+def rest_unreduce(abc_lines):
+    tunebody_index = None
+    for i in range(len(abc_lines)):
+        if '[V:' in abc_lines[i]:
+            tunebody_index = i
+            break
+    metadata_lines = abc_lines[: tunebody_index]
+    tunebody_lines = abc_lines[tunebody_index:]
+    part_symbol_list = []
+    voice_group_list = []
+    for line in metadata_lines:
+        if line.startswith('%%score'):
+            for round_bracket_match in re.findall(r'\((.*?)\)', line):
+                voice_group_list.append(round_bracket_match.split())
+            existed_voices = [item for sublist in voice_group_list for item in sublist]
+        if line.startswith('V:'):
+            symbol = line.split()[0]
+            part_symbol_list.append(symbol)
+            if symbol[2:] not in existed_voices:
+                voice_group_list.append([symbol[2:]])
+    z_symbol_list = []  # voices that use z as rest
+    x_symbol_list = []  # voices that use x as rest
+    for voice_group in voice_group_list:
+        z_symbol_list.append('V:' + voice_group[0])
+        for j in range(1, len(voice_group)):
+            x_symbol_list.append('V:' + voice_group[j])
+    part_symbol_list.sort(key=lambda x: int(x[2:]))
+    unreduced_tunebody_lines = []
+    for i, line in enumerate(tunebody_lines):
+        unreduced_line = ''
+        line = re.sub(r'^\[r:[^\]]*\]', '', line)
+        pattern = r'\[V:(\d+)\](.*?)(?=\[V:|$)'
+        matches = re.findall(pattern, line)
+        line_bar_dict = {}
+        for match in matches:
+            key = f'V:{match[0]}'
+            value = match[1]
+            line_bar_dict[key] = value
+        # calculate duration and collect barline
+        dur_dict = {}
+        for symbol, bartext in line_bar_dict.items():
+            right_barline = ''.join(re.split(Barline_regexPattern, bartext)[-2:])
+            bartext = bartext[:-len(right_barline)]
+            try:
+                bar_dur = calculate_bartext_duration(bartext)
+            except:
+                bar_dur = None
+            if bar_dur is not None:
+                if bar_dur not in dur_dict.keys():
+                    dur_dict[bar_dur] = 1
+                else:
+                    dur_dict[bar_dur] += 1
+        try:
+            ref_dur = max(dur_dict, key=dur_dict.get)
+        except:
+            pass    # use last ref_dur
+        if i == 0:
+            prefix_left_barline = line.split('[V:')[0]
+        else:
+            prefix_left_barline = ''
+        for symbol in part_symbol_list:
+            if symbol in line_bar_dict.keys():
+                symbol_bartext = line_bar_dict[symbol]
+            else:
+                if symbol in z_symbol_list:
+                    symbol_bartext = prefix_left_barline + 'z' + str(ref_dur) + right_barline
+                elif symbol in x_symbol_list:
+                    symbol_bartext = prefix_left_barline + 'x' + str(ref_dur) + right_barline
+            unreduced_line += '[' + symbol + ']' + symbol_bartext
+        unreduced_tunebody_lines.append(unreduced_line + '\n')
+    unreduced_lines = metadata_lines + unreduced_tunebody_lines
+    return unreduced_lines
+def inference_patch(period, composer, instrumentation):
+    prompt_lines=[
+        '%' + period + '\n',
+        '%' + composer + '\n',
+        '%' + instrumentation + '\n']
+    while True:
+        failure_flag = False
+        bos_patch = [patchilizer.bos_token_id] * (PATCH_SIZE - 1) + [patchilizer.eos_token_id]
+        start_time = time.time()
+        prompt_patches = patchilizer.patchilize_metadata(prompt_lines)
+        byte_list = list(''.join(prompt_lines))
+        print(''.join(byte_list), end='')
+        prompt_patches = [[ord(c) for c in patch] + [patchilizer.special_token_id] * (PATCH_SIZE - len(patch)) for patch
+                            in prompt_patches]
+        prompt_patches.insert(0, bos_patch)
+        input_patches = torch.tensor(prompt_patches, device=device).reshape(1, -1)
+        end_flag = False
+        cut_index = None
+        tunebody_flag = False
+        while True:
+            predicted_patch = model.generate(input_patches.unsqueeze(0),
+                                                top_k=TOP_K,
+                                                top_p=TOP_P,
+                                                temperature=TEMPERATURE)
+            if not tunebody_flag and patchilizer.decode([predicted_patch]).startswith('[r:'):  # start with [r:0/
+                tunebody_flag = True
+                r0_patch = torch.tensor([ord(c) for c in '[r:0/']).unsqueeze(0).to(device)
+                temp_input_patches = torch.concat([input_patches, r0_patch], axis=-1)
+                predicted_patch = model.generate(temp_input_patches.unsqueeze(0),
+                                                    top_k=TOP_K,
+                                                    top_p=TOP_P,
+                                                    temperature=TEMPERATURE)
+                predicted_patch = [ord(c) for c in '[r:0/'] + predicted_patch
+            if predicted_patch[0] == patchilizer.bos_token_id and predicted_patch[1] == patchilizer.eos_token_id:
+                end_flag = True
+                break
+            next_patch = patchilizer.decode([predicted_patch])
+            for char in next_patch:
+                byte_list.append(char)
+                print(char, end='')
+            patch_end_flag = False
+            for j in range(len(predicted_patch)):
+                if patch_end_flag:
+                    predicted_patch[j] = patchilizer.special_token_id
+                if predicted_patch[j] == patchilizer.eos_token_id:
+                    patch_end_flag = True
+            predicted_patch = torch.tensor([predicted_patch], device=device)  # (1, 16)
+            input_patches = torch.cat([input_patches, predicted_patch], dim=1)  # (1, 16 * patch_len)
+            if len(byte_list) > 102400:
+                failure_flag = True
+                break
+            if time.time() - start_time > 20 * 60:
+                failure_flag = True
+                break
+            if input_patches.shape[1] >= PATCH_LENGTH * PATCH_SIZE and not end_flag:
+                print('Stream generating...')
+                abc_code = ''.join(byte_list)
+                abc_lines = abc_code.split('\n')
+                tunebody_index = None
+                for i, line in enumerate(abc_lines):
+                    if line.startswith('[r:') or line.startswith('[V:'):
+                        tunebody_index = i
+                        break
+                if tunebody_index is None or tunebody_index == len(abc_lines) - 1:
+                    break
+                metadata_lines = abc_lines[:tunebody_index]
+                tunebody_lines = abc_lines[tunebody_index:]
+                metadata_lines = [line + '\n' for line in metadata_lines]
+                if not abc_code.endswith('\n'):
+                    tunebody_lines = [tunebody_lines[i] + '\n' for i in range(len(tunebody_lines) - 1)] + [
+                        tunebody_lines[-1]]
+                else:
+                    tunebody_lines = [tunebody_lines[i] + '\n' for i in range(len(tunebody_lines))]
+                if cut_index is None:
+                    cut_index = len(tunebody_lines) // 2
+                abc_code_slice = ''.join(metadata_lines + tunebody_lines[-cut_index:])
+                input_patches = patchilizer.encode_generate(abc_code_slice)
+                input_patches = [item for sublist in input_patches for item in sublist]
+                input_patches = torch.tensor([input_patches], device=device)
+                input_patches = input_patches.reshape(1, -1)
+        if not failure_flag:
+            abc_text = ''.join(byte_list)
+            # unreduce
+            abc_lines = abc_text.split('\n')
+            abc_lines = list(filter(None, abc_lines))
+            abc_lines = [line + '\n' for line in abc_lines]
+            try:
+                unreduced_abc_lines = rest_unreduce(abc_lines)
+            except:
+                failure_flag = True
+                pass
+            else:
+                unreduced_abc_lines = [line for line in unreduced_abc_lines if not(line.startswith('%') and not line.startswith('%%'))]
+                unreduced_abc_lines = ['X:1\n'] + unreduced_abc_lines
+                unreduced_abc_text = ''.join(unreduced_abc_lines)
+                return unreduced_abc_text
+if __name__ == '__main__':
+    inference_patch('Classical', 'Beethoven, Ludwig van', 'Keyboard')

prompts.txt ADDED Viewed

	@@ -0,0 +1,112 @@

+Baroque_Bach, Johann Sebastian_Chamber
+Baroque_Bach, Johann Sebastian_Choral
+Baroque_Bach, Johann Sebastian_Keyboard
+Baroque_Bach, Johann Sebastian_Orchestral
+Baroque_Bach, Johann Sebastian_Vocal-Orchestral
+Baroque_Corelli, Arcangelo_Chamber
+Baroque_Corelli, Arcangelo_Orchestral
+Baroque_Handel, George Frideric_Chamber
+Baroque_Handel, George Frideric_Keyboard
+Baroque_Handel, George Frideric_Orchestral
+Baroque_Handel, George Frideric_Vocal-Orchestral
+Baroque_Scarlatti, Domenico_Keyboard
+Baroque_Vivaldi, Antonio_Chamber
+Baroque_Vivaldi, Antonio_Orchestral
+Baroque_Vivaldi, Antonio_Vocal-Orchestral
+Classical_Beethoven, Ludwig van_Art Song
+Classical_Beethoven, Ludwig van_Chamber
+Classical_Beethoven, Ludwig van_Keyboard
+Classical_Beethoven, Ludwig van_Orchestral
+Classical_Haydn, Joseph_Chamber
+Classical_Haydn, Joseph_Keyboard
+Classical_Haydn, Joseph_Orchestral
+Classical_Haydn, Joseph_Vocal-Orchestral
+Classical_Mozart, Wolfgang Amadeus_Chamber
+Classical_Mozart, Wolfgang Amadeus_Choral
+Classical_Mozart, Wolfgang Amadeus_Keyboard
+Classical_Mozart, Wolfgang Amadeus_Orchestral
+Classical_Mozart, Wolfgang Amadeus_Vocal-Orchestral
+Classical_Paradis, Maria Theresia von_Art Song
+Classical_Reichardt, Louise_Art Song
+Classical_Saint-Georges, Joseph Bologne_Chamber
+Classical_Schroter, Corona_Art Song
+Romantic_Bartok, Bela_Keyboard
+Romantic_Berlioz, Hector_Choral
+Romantic_Bizet, Georges_Art Song
+Romantic_Boulanger, Lili_Art Song
+Romantic_Boulton, Harold_Art Song
+Romantic_Brahms, Johannes_Art Song
+Romantic_Brahms, Johannes_Chamber
+Romantic_Brahms, Johannes_Choral
+Romantic_Brahms, Johannes_Keyboard
+Romantic_Brahms, Johannes_Orchestral
+Romantic_Burgmuller, Friedrich_Keyboard
+Romantic_Butterworth, George_Art Song
+Romantic_Chaminade, Cecile_Art Song
+Romantic_Chausson, Ernest_Art Song
+Romantic_Chopin, Frederic_Art Song
+Romantic_Chopin, Frederic_Keyboard
+Romantic_Cornelius, Peter_Art Song
+Romantic_Debussy, Claude_Art Song
+Romantic_Debussy, Claude_Keyboard
+Romantic_Dvorak, Antonin_Chamber
+Romantic_Dvorak, Antonin_Choral
+Romantic_Dvorak, Antonin_Keyboard
+Romantic_Dvorak, Antonin_Orchestral
+Romantic_Faisst, Clara_Art Song
+Romantic_Faure, Gabriel_Art Song
+Romantic_Faure, Gabriel_Chamber
+Romantic_Faure, Gabriel_Keyboard
+Romantic_Franz, Robert_Art Song
+Romantic_Gonzaga, Chiquinha_Art Song
+Romantic_Grandval, Clemence de_Art Song
+Romantic_Grieg, Edvard_Keyboard
+Romantic_Grieg, Edvard_Orchestral
+Romantic_Hensel, Fanny_Art Song
+Romantic_Holmes, Augusta Mary Anne_Art Song
+Romantic_Jaell, Marie_Art Song
+Romantic_Kinkel, Johanna_Art Song
+Romantic_Kralik, Mathilde_Art Song
+Romantic_Lang, Josephine_Art Song
+Romantic_Lehmann, Liza_Art Song
+Romantic_Liszt, Franz_Keyboard
+Romantic_Mayer, Emilie_Chamber
+Romantic_Medtner, Nikolay_Keyboard
+Romantic_Mendelssohn, Felix_Art Song
+Romantic_Mendelssohn, Felix_Chamber
+Romantic_Mendelssohn, Felix_Choral
+Romantic_Mendelssohn, Felix_Keyboard
+Romantic_Mendelssohn, Felix_Orchestral
+Romantic_Munktell, Helena_Art Song
+Romantic_Parratt, Walter_Choral
+Romantic_Prokofiev, Sergey_Keyboard
+Romantic_Rachmaninoff, Sergei_Choral
+Romantic_Rachmaninoff, Sergei_Keyboard
+Romantic_Ravel, Maurice_Art Song
+Romantic_Ravel, Maurice_Chamber
+Romantic_Ravel, Maurice_Keyboard
+Romantic_Saint-Saens, Camille_Chamber
+Romantic_Saint-Saens, Camille_Keyboard
+Romantic_Saint-Saens, Camille_Orchestral
+Romantic_Satie, Erik_Art Song
+Romantic_Satie, Erik_Keyboard
+Romantic_Schubert, Franz_Art Song
+Romantic_Schubert, Franz_Chamber
+Romantic_Schubert, Franz_Choral
+Romantic_Schubert, Franz_Keyboard
+Romantic_Schumann, Clara_Art Song
+Romantic_Schumann, Robert_Art Song
+Romantic_Schumann, Robert_Chamber
+Romantic_Schumann, Robert_Choral
+Romantic_Schumann, Robert_Keyboard
+Romantic_Scriabin, Aleksandr_Keyboard
+Romantic_Shostakovich, Dmitry_Chamber
+Romantic_Shostakovich, Dmitry_Keyboard
+Romantic_Sibelius, Jean_Keyboard
+Romantic_Smetana, Bedrich_Keyboard
+Romantic_Tchaikovsky, Pyotr_Keyboard
+Romantic_Tchaikovsky, Pyotr_Orchestral
+Romantic_Viardot, Pauline_Art Song
+Romantic_Warlock, Peter_Art Song
+Romantic_Wolf, Hugo_Art Song
+Romantic_Zumsteeg, Emilie_Art Song

utils.py ADDED Viewed

	@@ -0,0 +1,406 @@

+import torch
+import random
+import bisect
+import json
+import re
+from config import *
+from transformers import GPT2Model, GPT2LMHeadModel, LlamaModel, LlamaForCausalLM, PreTrainedModel
+from samplings import top_p_sampling, top_k_sampling, temperature_sampling
+from tokenizers import Tokenizer
+class Patchilizer:
+    def __init__(self, stream=PATCH_STREAM):
+        self.stream = stream
+        self.delimiters = ["|:", "::", ":|", "[|", "||", "|]", "|"]
+        self.regexPattern = '(' + '|'.join(map(re.escape, self.delimiters)) + ')'
+        self.bos_token_id = 1
+        self.eos_token_id = 2
+        self.special_token_id = 0
+    def split_bars(self, body_lines):
+        """
+        Split a body of music into individual bars.
+        """
+        new_bars = []
+        try:
+            for line in body_lines:
+                line_bars = re.split(self.regexPattern, line)
+                line_bars = list(filter(None, line_bars))
+                new_line_bars = []
+                if len(line_bars) == 1:
+                    new_line_bars = line_bars
+                else:
+                    if line_bars[0] in self.delimiters:
+                        new_line_bars = [line_bars[i] + line_bars[i + 1] for i in range(0, len(line_bars), 2)]
+                    else:
+                        new_line_bars = [line_bars[0]] + [line_bars[i] + line_bars[i + 1] for i in range(1, len(line_bars), 2)]
+                    if 'V' not in new_line_bars[-1]:
+                        new_line_bars[-2] += new_line_bars[-1]  # 吸收最后一个 小节线+\n 的组合
+                        new_line_bars = new_line_bars[:-1]
+                new_bars += new_line_bars
+        except:
+            pass
+        return new_bars
+    def split_patches(self, abc_text, patch_size=PATCH_SIZE, generate_last=False):
+        if not generate_last and len(abc_text) % patch_size != 0:
+            abc_text += chr(self.eos_token_id)
+        patches = [abc_text[i : i + patch_size] for i in range(0, len(abc_text), patch_size)]
+        return patches
+    def patch2chars(self, patch):
+        """
+        Convert a patch into a bar.
+        """
+        bytes = ''
+        for idx in patch:
+            if idx == self.eos_token_id:
+                break
+            if idx < self.eos_token_id:
+                pass
+            bytes += chr(idx)
+        return bytes
+    def patchilize_metadata(self, metadata_lines):
+        metadata_patches = []
+        for line in metadata_lines:
+            metadata_patches += self.split_patches(line)
+        return metadata_patches
+    def patchilize_tunebody(self, tunebody_lines, encode_mode='train'):
+        tunebody_patches = []
+        bars = self.split_bars(tunebody_lines)
+        if encode_mode == 'train':
+            for bar in bars:
+                tunebody_patches += self.split_patches(bar)
+        elif encode_mode == 'generate':
+            for bar in bars[:-1]:
+                tunebody_patches += self.split_patches(bar)
+            tunebody_patches += self.split_patches(bars[-1], generate_last=True)
+        return tunebody_patches
+    def encode_train(self, abc_text, patch_length=PATCH_LENGTH, patch_size=PATCH_SIZE, add_special_patches=True, cut=True):
+        lines = abc_text.split('\n')
+        lines = list(filter(None, lines))
+        lines = [line + '\n' for line in lines]
+        tunebody_index = -1
+        for i, line in enumerate(lines):
+            if '[V:' in line:
+                tunebody_index = i
+                break
+        metadata_lines = lines[ : tunebody_index]
+        tunebody_lines = lines[tunebody_index : ]
+        if self.stream:
+            tunebody_lines = ['[r:' + str(line_index) + '/' + str(len(tunebody_lines) - line_index - 1) + ']' + line for line_index, line in
+                                enumerate(tunebody_lines)]
+        metadata_patches = self.patchilize_metadata(metadata_lines)
+        tunebody_patches = self.patchilize_tunebody(tunebody_lines, encode_mode='train')
+        if add_special_patches:
+            bos_patch = chr(self.bos_token_id) * (patch_size - 1) + chr(self.eos_token_id)
+            eos_patch = chr(self.bos_token_id) + chr(self.eos_token_id) * (patch_size - 1)
+            metadata_patches = [bos_patch] + metadata_patches
+            tunebody_patches = tunebody_patches + [eos_patch]
+        if self.stream:
+            if len(metadata_patches) + len(tunebody_patches) > patch_length:
+                available_cut_indexes = [0] + [index + 1 for index, patch in enumerate(tunebody_patches) if '\n' in patch]
+                line_index_for_cut_index = list(range(len(available_cut_indexes)))
+                end_index = len(metadata_patches) + len(tunebody_patches) - patch_length
+                biggest_index = bisect.bisect_left(available_cut_indexes, end_index)
+                available_cut_indexes = available_cut_indexes[:biggest_index + 1]
+                if len(available_cut_indexes) == 1:
+                    choices = ['head']
+                elif len(available_cut_indexes) == 2:
+                    choices = ['head', 'tail']
+                else:
+                    choices = ['head', 'tail', 'middle']
+                choice = random.choice(choices)
+                if choice == 'head':
+                    patches = metadata_patches + tunebody_patches[0:]
+                else:
+                    if choice == 'tail':
+                        cut_index = len(available_cut_indexes) - 1
+                    else:
+                        cut_index = random.choice(range(1, len(available_cut_indexes) - 1))
+                    line_index = line_index_for_cut_index[cut_index]
+                    stream_tunebody_lines = tunebody_lines[line_index : ]
+                    stream_tunebody_patches = self.patchilize_tunebody(stream_tunebody_lines, encode_mode='train')
+                    if add_special_patches:
+                        stream_tunebody_patches = stream_tunebody_patches + [eos_patch]
+                    patches = metadata_patches + stream_tunebody_patches
+            else:
+                patches = metadata_patches + tunebody_patches
+        else:
+            patches = metadata_patches + tunebody_patches
+        if cut:
+            patches = patches[ : patch_length]
+        else:
+            pass
+        # encode to ids
+        id_patches = []
+        for patch in patches:
+            id_patch = [ord(c) for c in patch] + [self.special_token_id] * (patch_size - len(patch))
+            id_patches.append(id_patch)
+        return id_patches
+    def encode_generate(self, abc_code, patch_length=PATCH_LENGTH, patch_size=PATCH_SIZE, add_special_patches=True):
+        lines = abc_code.split('\n')
+        lines = list(filter(None, lines))
+        tunebody_index = None
+        for i, line in enumerate(lines):
+            if line.startswith('[V:') or line.startswith('[r:'):
+                tunebody_index = i
+                break
+        metadata_lines = lines[ : tunebody_index]
+        tunebody_lines = lines[tunebody_index : ]
+        metadata_lines = [line + '\n' for line in metadata_lines]
+        if self.stream:
+            if not abc_code.endswith('\n'):
+                tunebody_lines = [tunebody_lines[i] + '\n' for i in range(len(tunebody_lines) - 1)] + [tunebody_lines[-1]]
+            else:
+                tunebody_lines = [tunebody_lines[i] + '\n' for i in range(len(tunebody_lines))]
+        else:
+            tunebody_lines = [line + '\n' for line in tunebody_lines]
+        metadata_patches = self.patchilize_metadata(metadata_lines)
+        tunebody_patches = self.patchilize_tunebody(tunebody_lines, encode_mode='generate')
+        if add_special_patches:
+            bos_patch = chr(self.bos_token_id) * (patch_size - 1) + chr(self.eos_token_id)
+            metadata_patches = [bos_patch] + metadata_patches
+        patches = metadata_patches + tunebody_patches
+        patches = patches[ : patch_length]
+        # encode to ids
+        id_patches = []
+        for patch in patches:
+            if len(patch) < PATCH_SIZE and patch[-1] != chr(self.eos_token_id):
+                id_patch = [ord(c) for c in patch]
+            else:
+                id_patch = [ord(c) for c in patch] + [self.special_token_id] * (patch_size - len(patch))
+            id_patches.append(id_patch)
+        return id_patches
+    def decode(self, patches):
+        """
+        Decode patches into music.
+        """
+        return ''.join(self.patch2chars(patch) for patch in patches)
+class PatchLevelDecoder(PreTrainedModel):
+    """
+    A Patch-level Decoder model for generating patch features in an auto-regressive manner.
+    It inherits PreTrainedModel from transformers.
+    """
+    def __init__(self, config):
+        super().__init__(config)
+        self.patch_embedding = torch.nn.Linear(PATCH_SIZE * 128, config.n_embd)
+        torch.nn.init.normal_(self.patch_embedding.weight, std=0.02)
+        self.base = GPT2Model(config)
+    def forward(self,
+                patches: torch.Tensor,
+                masks=None) -> torch.Tensor:
+        """
+        The forward pass of the patch-level decoder model.
+        :param patches: the patches to be encoded
+        :param masks: the masks for the patches
+        :return: the encoded patches
+        """
+        patches = torch.nn.functional.one_hot(patches, num_classes=128).to(self.dtype)
+        patches = patches.reshape(len(patches), -1, PATCH_SIZE * (128))
+        patches = self.patch_embedding(patches.to(self.device))
+        if masks==None:
+            return self.base(inputs_embeds=patches)
+        else:
+            return self.base(inputs_embeds=patches,
+                             attention_mask=masks)
+class CharLevelDecoder(PreTrainedModel):
+    """
+    A Char-level Decoder model for generating the chars within each patch in an auto-regressive manner
+    based on the encoded patch features. It inherits PreTrainedModel from transformers.
+    """
+    def __init__(self, config):
+        super().__init__(config)
+        self.special_token_id = 0
+        self.bos_token_id = 1
+        self.base = GPT2LMHeadModel(config)
+    def forward(self,
+                encoded_patches: torch.Tensor,
+                target_patches: torch.Tensor):
+        """
+        The forward pass of the char-level decoder model.
+        :param encoded_patches: the encoded patches
+        :param target_patches: the target patches
+        :return: the output of the model
+        """
+        # preparing the labels for model training
+        target_patches = torch.cat((torch.ones_like(target_patches[:,0:1])*self.bos_token_id, target_patches), dim=1)
+        # print('target_patches shape:', target_patches.shape)
+        target_masks = target_patches == self.special_token_id
+        labels = target_patches.clone().masked_fill_(target_masks, -100)
+        # masking the labels for model training
+        target_masks = torch.ones_like(labels)
+        target_masks = target_masks.masked_fill_(labels == -100, 0)
+        # select patches
+        if PATCH_SAMPLING_BATCH_SIZE!=0 and PATCH_SAMPLING_BATCH_SIZE<target_patches.shape[0]:
+            indices = list(range(len(target_patches)))
+            random.shuffle(indices)
+            selected_indices = sorted(indices[:PATCH_SAMPLING_BATCH_SIZE])
+            target_patches = target_patches[selected_indices,:]
+            target_masks = target_masks[selected_indices,:]
+            encoded_patches = encoded_patches[selected_indices,:]
+        # get input embeddings
+        inputs_embeds = torch.nn.functional.embedding(target_patches, self.base.transformer.wte.weight)
+        # concatenate the encoded patches with the input embeddings
+        inputs_embeds = torch.cat((encoded_patches.unsqueeze(1), inputs_embeds[:,1:,:]), dim=1)
+        output = self.base(inputs_embeds=inputs_embeds,
+                         attention_mask=target_masks,
+                         labels=labels)
+                         # output_hidden_states=True=True)
+        return output
+    def generate(self,
+                 encoded_patch: torch.Tensor,   # [hidden_size]
+                 tokens: torch.Tensor): # [1]
+        """
+        The generate function for generating a patch based on the encoded patch and already generated tokens.
+        :param encoded_patch: the encoded patch
+        :param tokens: already generated tokens in the patch
+        :return: the probability distribution of next token
+        """
+        encoded_patch = encoded_patch.reshape(1, 1, -1) # [1, 1, hidden_size]
+        tokens = tokens.reshape(1, -1)
+        # Get input embeddings
+        tokens = torch.nn.functional.embedding(tokens, self.base.transformer.wte.weight)
+        # Concatenate the encoded patch with the input embeddings
+        tokens = torch.cat((encoded_patch, tokens[:,1:,:]), dim=1)
+        # Get output from model
+        outputs = self.base(inputs_embeds=tokens)
+        # Get probabilities of next token
+        probs = torch.nn.functional.softmax(outputs.logits.squeeze(0)[-1], dim=-1)
+        return probs
+class NotaGenLMHeadModel(PreTrainedModel):
+    """
+    NotaGen is a language model with a hierarchical structure.
+    It includes a patch-level decoder and a char-level decoder.
+    The patch-level decoder is used to generate patch features in an auto-regressive manner.
+    The char-level decoder is used to generate the chars within each patch in an auto-regressive manner.
+    It inherits PreTrainedModel from transformers.
+    """
+    def __init__(self, encoder_config, decoder_config):
+        super().__init__(encoder_config)
+        self.special_token_id = 0
+        self.bos_token_id = 1
+        self.eos_token_id = 2
+        self.patch_level_decoder = PatchLevelDecoder(encoder_config)
+        self.char_level_decoder = CharLevelDecoder(decoder_config)
+    def forward(self,
+                patches: torch.Tensor,
+                masks: torch.Tensor):
+        """
+        The forward pass of the bGPT model.
+        :param patches: the patches to be encoded
+        :param masks: the masks for the patches
+        :return: the decoded patches
+        """
+        patches = patches.reshape(len(patches), -1, PATCH_SIZE)
+        encoded_patches = self.patch_level_decoder(patches, masks)["last_hidden_state"]
+        left_shift_masks = masks * (masks.flip(1).cumsum(1).flip(1) > 1)
+        masks[:, 0] = 0
+        encoded_patches = encoded_patches[left_shift_masks == 1]
+        patches = patches[masks == 1]
+        return self.char_level_decoder(encoded_patches, patches)
+    def generate(self,
+                 patches: torch.Tensor,
+                 top_k=0,
+                 top_p=1,
+                 temperature=1.0):
+        """
+        The generate function for generating patches based on patches.
+        :param patches: the patches to be encoded
+        :param top_k: the top k for sampling
+        :param top_p: the top p for sampling
+        :param temperature: the temperature for sampling
+        :return: the generated patches
+        """
+        if patches.shape[-1] % PATCH_SIZE != 0:
+            tokens = patches[:,:,-(patches.shape[-1]%PATCH_SIZE):].squeeze(0, 1)
+            tokens = torch.cat((torch.tensor([self.bos_token_id], device=self.device), tokens), dim=-1)
+            patches = patches[:,:,:-(patches.shape[-1]%PATCH_SIZE)]
+        else:
+            tokens =  torch.tensor([self.bos_token_id], device=self.device)
+        patches = patches.reshape(len(patches), -1, PATCH_SIZE) # [bs, seq, patch_size]
+        encoded_patches = self.patch_level_decoder(patches)["last_hidden_state"]    # [bs, seq, hidden_size]
+        generated_patch = []
+        while True:
+            prob = self.char_level_decoder.generate(encoded_patches[0][-1], tokens).cpu().detach().numpy()  # [128]
+            prob = top_k_sampling(prob, top_k=top_k, return_probs=True) # [128]
+            prob = top_p_sampling(prob, top_p=top_p, return_probs=True) # [128]
+            token = temperature_sampling(prob, temperature=temperature) # int
+            char = chr(token)
+            generated_patch.append(token)
+            if len(tokens) >= PATCH_SIZE:# or token == self.eos_token_id:
+                break
+            else:
+                tokens = torch.cat((tokens, torch.tensor([token], device=self.device)), dim=0)
+        return generated_patch