amaai-lab
/

text2midi

Model card Files Files and versions

dorienh commited on Jan 9

Commit

ee1cbd9

·

verified ·

1 Parent(s): 70e9e4d

Update README.md

Files changed (1) hide show

README.md +16 -1

README.md CHANGED Viewed

@@ -43,7 +43,14 @@ model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
 # Download the vocab_remi.pkl file
 tokenizer_path = hf_hub_download(repo_id=repo_id, filename="vocab_remi.pkl")
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
 # Load the tokenizer dictionary
 with open(tokenizer_path, "rb") as f:
@@ -57,12 +64,20 @@ model.load_state_dict(torch.load(model_path, map_location=device))
 model.eval()
 tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
 src = "A melodic electronic song with ambient elements, featuring piano, acoustic guitar, alto saxophone, string ensemble, and electric bass. Set in G minor with a 4/4 time signature, it moves at a lively Presto tempo. The composition evokes a blend of relaxation and darkness, with hints of happiness and a meditative quality."
 inputs = tokenizer(src, return_tensors='pt', padding=True, truncation=True)
 input_ids = nn.utils.rnn.pad_sequence(inputs.input_ids, batch_first=True, padding_value=0)
 input_ids = input_ids.to(device)
 attention_mask =nn.utils.rnn.pad_sequence(inputs.attention_mask, batch_first=True, padding_value=0)
 attention_mask = attention_mask.to(device)
 output = model.generate(input_ids, attention_mask, max_len=2000,temperature = 1.0)
 output_list = output[0].tolist()
 generated_midi = r_tokenizer.decode(output_list)

 # Download the vocab_remi.pkl file
 tokenizer_path = hf_hub_download(repo_id=repo_id, filename="vocab_remi.pkl")
+if torch.cuda.is_available():
+    device = 'cuda'
+elif torch.backends.mps.is_available():
+    device = 'mps'
+else:
+    device = 'cpu'
+print(f"Using device: {device}")
 # Load the tokenizer dictionary
 with open(tokenizer_path, "rb") as f:
 model.eval()
 tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
+print('Model loaded.')
+# Enter the text prompt and tokenize it
 src = "A melodic electronic song with ambient elements, featuring piano, acoustic guitar, alto saxophone, string ensemble, and electric bass. Set in G minor with a 4/4 time signature, it moves at a lively Presto tempo. The composition evokes a blend of relaxation and darkness, with hints of happiness and a meditative quality."
+print('Generating for prompt: ' + src)
 inputs = tokenizer(src, return_tensors='pt', padding=True, truncation=True)
 input_ids = nn.utils.rnn.pad_sequence(inputs.input_ids, batch_first=True, padding_value=0)
 input_ids = input_ids.to(device)
 attention_mask =nn.utils.rnn.pad_sequence(inputs.attention_mask, batch_first=True, padding_value=0)
 attention_mask = attention_mask.to(device)
+# Generate the midi
 output = model.generate(input_ids, attention_mask, max_len=2000,temperature = 1.0)
 output_list = output[0].tolist()
 generated_midi = r_tokenizer.decode(output_list)