Spaces:
Runtime error
Runtime error
Prompt conditioning sample segments ( -1 Conditions each verse
Browse filesUpdate title to Melody Conditioning file name upon load
Separate title and settings inclusions to background
Fixed a bug in my 6/19 code... stupid logical mistake
- app.py +70 -16
- assets/favicon.ico +0 -0
- audiocraft/utils/extend.py +46 -33
app.py
CHANGED
|
@@ -19,6 +19,8 @@ from audiocraft.data.audio_utils import apply_fade, apply_tafade
|
|
| 19 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
|
| 20 |
import numpy as np
|
| 21 |
import random
|
|
|
|
|
|
|
| 22 |
|
| 23 |
MODEL = None
|
| 24 |
MODELS = None
|
|
@@ -26,6 +28,7 @@ IS_SHARED_SPACE = "Surn/UnlimitedMusicGen" in os.environ.get('SPACE_ID', '')
|
|
| 26 |
INTERRUPTED = False
|
| 27 |
UNLOAD_MODEL = False
|
| 28 |
MOVE_TO_CPU = False
|
|
|
|
| 29 |
|
| 30 |
def interrupt_callback():
|
| 31 |
return INTERRUPTED
|
|
@@ -65,11 +68,53 @@ def load_model(version):
|
|
| 65 |
print("Cached model loaded in %.2fs" % (time.monotonic() - t1))
|
| 66 |
return result
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
output_segments = None
|
| 72 |
-
|
| 73 |
INTERRUPTED = False
|
| 74 |
INTERRUPTING = False
|
| 75 |
if temperature < 0:
|
|
@@ -126,7 +171,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
| 126 |
if melody:
|
| 127 |
# todo return excess duration, load next model and continue in loop structure building up output_segments
|
| 128 |
if duration > MODEL.lm.cfg.dataset.segment_duration:
|
| 129 |
-
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration)
|
| 130 |
else:
|
| 131 |
# pure original code
|
| 132 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
|
|
@@ -191,10 +236,10 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
| 191 |
else:
|
| 192 |
output = output.detach().cpu().float()[0]
|
| 193 |
|
| 194 |
-
with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
background = add_settings_to_image(title, video_description, background_path=background, font=settings_font, font_color=settings_font_color)
|
| 198 |
audio_write(
|
| 199 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
| 200 |
loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
|
|
@@ -210,6 +255,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
| 210 |
def ui(**kwargs):
|
| 211 |
css="""
|
| 212 |
#col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
|
|
|
|
| 213 |
a {text-decoration-line: underline; font-weight: 600;}
|
| 214 |
"""
|
| 215 |
with gr.Blocks(title="UnlimitedMusicGen", css=css) as demo:
|
|
@@ -235,15 +281,20 @@ def ui(**kwargs):
|
|
| 235 |
with gr.Row():
|
| 236 |
with gr.Column():
|
| 237 |
with gr.Row():
|
| 238 |
-
text = gr.Text(label="
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
| 240 |
with gr.Row():
|
| 241 |
submit = gr.Button("Submit")
|
| 242 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
| 243 |
_ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
|
| 244 |
with gr.Row():
|
| 245 |
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
| 246 |
-
|
|
|
|
|
|
|
| 247 |
with gr.Row():
|
| 248 |
title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True)
|
| 249 |
settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
|
|
@@ -252,7 +303,7 @@ def ui(**kwargs):
|
|
| 252 |
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
|
| 253 |
with gr.Row():
|
| 254 |
duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration", interactive=True)
|
| 255 |
-
overlap = gr.Slider(minimum=1, maximum=15, value=
|
| 256 |
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
| 257 |
with gr.Row():
|
| 258 |
topk = gr.Number(label="Top-k", value=250, precision=0, interactive=True)
|
|
@@ -267,8 +318,10 @@ def ui(**kwargs):
|
|
| 267 |
output = gr.Video(label="Generated Music")
|
| 268 |
seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
|
| 269 |
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
| 272 |
gr.Examples(
|
| 273 |
fn=predict,
|
| 274 |
examples=[
|
|
@@ -307,10 +360,11 @@ def ui(**kwargs):
|
|
| 307 |
share = kwargs.get('share', False)
|
| 308 |
if share:
|
| 309 |
launch_kwargs['share'] = share
|
|
|
|
| 310 |
|
| 311 |
|
| 312 |
|
| 313 |
-
demo.queue(max_size=
|
| 314 |
|
| 315 |
if __name__ == "__main__":
|
| 316 |
parser = argparse.ArgumentParser()
|
|
|
|
| 19 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
|
| 20 |
import numpy as np
|
| 21 |
import random
|
| 22 |
+
from pathlib import Path
|
| 23 |
+
from typing import List, Union
|
| 24 |
|
| 25 |
MODEL = None
|
| 26 |
MODELS = None
|
|
|
|
| 28 |
INTERRUPTED = False
|
| 29 |
UNLOAD_MODEL = False
|
| 30 |
MOVE_TO_CPU = False
|
| 31 |
+
MAX_PROMPT_INDEX = 0
|
| 32 |
|
| 33 |
def interrupt_callback():
|
| 34 |
return INTERRUPTED
|
|
|
|
| 68 |
print("Cached model loaded in %.2fs" % (time.monotonic() - t1))
|
| 69 |
return result
|
| 70 |
|
| 71 |
+
def get_filename(file):
|
| 72 |
+
# extract filename from file object
|
| 73 |
+
filename = None
|
| 74 |
+
if file is not None:
|
| 75 |
+
filename = file.name
|
| 76 |
+
return filename
|
| 77 |
+
|
| 78 |
+
def get_filename_from_filepath(filepath):
|
| 79 |
+
file_name = os.path.basename(filepath)
|
| 80 |
+
file_base, file_extension = os.path.splitext(file_name)
|
| 81 |
+
return file_base, file_extension
|
| 82 |
+
|
| 83 |
+
def load_melody_filepath(melody_filepath, title):
|
| 84 |
+
# get melody filename
|
| 85 |
+
#$Union[str, os.PathLike]
|
| 86 |
+
symbols = ['_', '.', '-']
|
| 87 |
+
if melody_filepath is None:
|
| 88 |
+
return None, title
|
| 89 |
+
|
| 90 |
+
if (title is None) or ("MusicGen" in title) or (title == ""):
|
| 91 |
+
melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
|
| 92 |
+
# fix melody name for symbols
|
| 93 |
+
for symbol in symbols:
|
| 94 |
+
melody_name = melody_name.replace(symbol, ' ').title()
|
| 95 |
+
else:
|
| 96 |
+
melody_name = title
|
| 97 |
+
|
| 98 |
+
print(f"Melody name: {melody_name}, Melody Filepath: {melody_filepath}\n")
|
| 99 |
+
|
| 100 |
+
return gr.Audio.update(value=melody_filepath), gr.Textbox.update(value=melody_name)
|
| 101 |
+
|
| 102 |
+
def load_melody(melody, prompt_index):
|
| 103 |
+
# get melody length in number of segments and modify the UI
|
| 104 |
+
if melody is None:
|
| 105 |
+
return prompt_index
|
| 106 |
+
sr, melody_data = melody[0], melody[1]
|
| 107 |
+
segment_samples = sr * 30
|
| 108 |
+
total_melodys = max(min((len(melody_data) // segment_samples) - 1, 25), 0)
|
| 109 |
+
print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
|
| 110 |
+
MAX_PROMPT_INDEX = total_melodys
|
| 111 |
+
return gr.Slider.update(maximum=MAX_PROMPT_INDEX, value=0, visible=True)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def predict(model, text, melody, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True):
|
| 115 |
+
global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
|
| 116 |
output_segments = None
|
| 117 |
+
melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
|
| 118 |
INTERRUPTED = False
|
| 119 |
INTERRUPTING = False
|
| 120 |
if temperature < 0:
|
|
|
|
| 171 |
if melody:
|
| 172 |
# todo return excess duration, load next model and continue in loop structure building up output_segments
|
| 173 |
if duration > MODEL.lm.cfg.dataset.segment_duration:
|
| 174 |
+
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index)
|
| 175 |
else:
|
| 176 |
# pure original code
|
| 177 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
|
|
|
|
| 236 |
else:
|
| 237 |
output = output.detach().cpu().float()[0]
|
| 238 |
|
| 239 |
+
with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
|
| 240 |
+
video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Model: {model}\n Melody Condition:{melody_name}\n Prompt index: {prompt_index}"
|
| 241 |
+
if include_settings or include_title:
|
| 242 |
+
background = add_settings_to_image(title if include_title else "", video_description if include_settings else "", background_path=background, font=settings_font, font_color=settings_font_color)
|
| 243 |
audio_write(
|
| 244 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
| 245 |
loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
|
|
|
|
| 255 |
def ui(**kwargs):
|
| 256 |
css="""
|
| 257 |
#col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
|
| 258 |
+
#aud-melody {height: 0; width:0; visibility: hidden;}
|
| 259 |
a {text-decoration-line: underline; font-weight: 600;}
|
| 260 |
"""
|
| 261 |
with gr.Blocks(title="UnlimitedMusicGen", css=css) as demo:
|
|
|
|
| 281 |
with gr.Row():
|
| 282 |
with gr.Column():
|
| 283 |
with gr.Row():
|
| 284 |
+
text = gr.Text(label="Prompt Text", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi")
|
| 285 |
+
with gr.Column():
|
| 286 |
+
melody_filepath = gr.Audio(source="upload", type="filepath", label="Melody Condition (optional)", interactive=True)
|
| 287 |
+
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True, visible=True, elem_id="aud-melody")#.style("display: none;height: 0; width:0;")
|
| 288 |
+
prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 30 second segment to condition with, - 1 condition each segment independantly")
|
| 289 |
with gr.Row():
|
| 290 |
submit = gr.Button("Submit")
|
| 291 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
| 292 |
_ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
|
| 293 |
with gr.Row():
|
| 294 |
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
| 295 |
+
with gr.Column():
|
| 296 |
+
include_title = gr.Checkbox(label="Add Title", value=True, interactive=True)
|
| 297 |
+
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
|
| 298 |
with gr.Row():
|
| 299 |
title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True)
|
| 300 |
settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
|
|
|
|
| 303 |
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
|
| 304 |
with gr.Row():
|
| 305 |
duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration", interactive=True)
|
| 306 |
+
overlap = gr.Slider(minimum=1, maximum=15, value=3, step=1, label="Overlap", interactive=True)
|
| 307 |
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
| 308 |
with gr.Row():
|
| 309 |
topk = gr.Number(label="Top-k", value=250, precision=0, interactive=True)
|
|
|
|
| 318 |
output = gr.Video(label="Generated Music")
|
| 319 |
seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
|
| 320 |
|
| 321 |
+
melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title], outputs=[melody, title], api_name="melody_filepath_change").success(load_melody, inputs=[melody, prompt_index], outputs=[prompt_index])
|
| 322 |
+
melody.change(load_melody, inputs=[melody, prompt_index], outputs=[prompt_index], api_name="melody_change")
|
| 323 |
+
reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed")
|
| 324 |
+
submit.click(predict, inputs=[model, text, melody, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings], outputs=[output, seed_used], api_name="submit")
|
| 325 |
gr.Examples(
|
| 326 |
fn=predict,
|
| 327 |
examples=[
|
|
|
|
| 360 |
share = kwargs.get('share', False)
|
| 361 |
if share:
|
| 362 |
launch_kwargs['share'] = share
|
| 363 |
+
launch_kwargs['favicon_path']= "./assets/favicon.ico"
|
| 364 |
|
| 365 |
|
| 366 |
|
| 367 |
+
demo.queue(max_size=12).launch(**launch_kwargs)
|
| 368 |
|
| 369 |
if __name__ == "__main__":
|
| 370 |
parser = argparse.ArgumentParser()
|
assets/favicon.ico
ADDED
|
|
audiocraft/utils/extend.py
CHANGED
|
@@ -18,7 +18,7 @@ INTERRUPTING = False
|
|
| 18 |
def separate_audio_segments(audio, segment_duration=30, overlap=1):
|
| 19 |
sr, audio_data = audio[0], audio[1]
|
| 20 |
|
| 21 |
-
total_samples =
|
| 22 |
segment_samples = sr * segment_duration
|
| 23 |
overlap_samples = sr * overlap
|
| 24 |
|
|
@@ -43,15 +43,16 @@ def separate_audio_segments(audio, segment_duration=30, overlap=1):
|
|
| 43 |
print(f"separate_audio_segments: {len(segments)} segments")
|
| 44 |
return segments
|
| 45 |
|
| 46 |
-
def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:int=1, segment_duration:int=30):
|
| 47 |
# generate audio segments
|
| 48 |
melody_segments = separate_audio_segments(melody, segment_duration, 0)
|
| 49 |
|
| 50 |
-
# Create
|
| 51 |
melodys = []
|
| 52 |
output_segments = []
|
| 53 |
last_chunk = []
|
| 54 |
text += ", seed=" + str(seed)
|
|
|
|
| 55 |
|
| 56 |
# Calculate the total number of segments
|
| 57 |
total_segments = max(math.ceil(duration / segment_duration),1)
|
|
@@ -94,55 +95,63 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:
|
|
| 94 |
melodys.append(verse)
|
| 95 |
|
| 96 |
torch.manual_seed(seed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
for idx, verse in enumerate(melodys):
|
| 98 |
if INTERRUPTING:
|
| 99 |
return output_segments, duration
|
| 100 |
|
| 101 |
print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap} Overlap Loss: {duration_loss}')
|
| 102 |
# Compensate for the length of final segment
|
| 103 |
-
if (idx + 1) == len(melodys):
|
| 104 |
-
|
|
|
|
| 105 |
MODEL.set_generation_params(
|
| 106 |
use_sampling=True,
|
| 107 |
top_k=MODEL.generation_params["top_k"],
|
| 108 |
top_p=MODEL.generation_params["top_p"],
|
| 109 |
temperature=MODEL.generation_params["temp"],
|
| 110 |
cfg_coef=MODEL.generation_params["cfg_coef"],
|
| 111 |
-
duration=
|
| 112 |
two_step_cfg=False,
|
| 113 |
rep_penalty=0.5
|
| 114 |
)
|
| 115 |
try:
|
| 116 |
# get last chunk
|
| 117 |
-
verse = verse[:, :, -
|
| 118 |
-
prompt_segment = prompt_segment[:, :, -
|
| 119 |
except:
|
| 120 |
# get first chunk
|
| 121 |
-
verse = verse[:, :, :
|
| 122 |
-
prompt_segment = prompt_segment[:, :, :
|
| 123 |
-
|
| 124 |
-
else:
|
| 125 |
-
MODEL.set_generation_params(
|
| 126 |
-
use_sampling=True,
|
| 127 |
-
top_k=MODEL.generation_params["top_k"],
|
| 128 |
-
top_p=MODEL.generation_params["top_p"],
|
| 129 |
-
temperature=MODEL.generation_params["temp"],
|
| 130 |
-
cfg_coef=MODEL.generation_params["cfg_coef"],
|
| 131 |
-
duration=segment_duration,
|
| 132 |
-
two_step_cfg=False,
|
| 133 |
-
rep_penalty=0.5
|
| 134 |
-
)
|
| 135 |
-
|
| 136 |
-
# Generate a new prompt segment based on the first verse. This will be applied to all segments for consistency
|
| 137 |
-
if idx == 0:
|
| 138 |
-
print(f"Generating New Prompt Segment: {text}\r")
|
| 139 |
-
prompt_segment = MODEL.generate_with_all(
|
| 140 |
-
descriptions=[text],
|
| 141 |
-
melody_wavs=verse,
|
| 142 |
-
sample_rate=sr,
|
| 143 |
-
progress=False,
|
| 144 |
-
prompt=None,
|
| 145 |
-
)
|
| 146 |
|
| 147 |
print(f"Generating New Melody Segment {idx + 1}: {text}\r")
|
| 148 |
output = MODEL.generate_with_all(
|
|
@@ -152,6 +161,10 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:
|
|
| 152 |
progress=False,
|
| 153 |
prompt=prompt_segment,
|
| 154 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
# Append the generated output to the list of segments
|
| 157 |
#output_segments.append(output[:, :segment_duration])
|
|
|
|
| 18 |
def separate_audio_segments(audio, segment_duration=30, overlap=1):
|
| 19 |
sr, audio_data = audio[0], audio[1]
|
| 20 |
|
| 21 |
+
total_samples = len(audio_data)
|
| 22 |
segment_samples = sr * segment_duration
|
| 23 |
overlap_samples = sr * overlap
|
| 24 |
|
|
|
|
| 43 |
print(f"separate_audio_segments: {len(segments)} segments")
|
| 44 |
return segments
|
| 45 |
|
| 46 |
+
def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:int=1, segment_duration:int=30, prompt_index:int=0):
|
| 47 |
# generate audio segments
|
| 48 |
melody_segments = separate_audio_segments(melody, segment_duration, 0)
|
| 49 |
|
| 50 |
+
# Create lists to store the melody tensors for each segment
|
| 51 |
melodys = []
|
| 52 |
output_segments = []
|
| 53 |
last_chunk = []
|
| 54 |
text += ", seed=" + str(seed)
|
| 55 |
+
prompt_segment = None
|
| 56 |
|
| 57 |
# Calculate the total number of segments
|
| 58 |
total_segments = max(math.ceil(duration / segment_duration),1)
|
|
|
|
| 95 |
melodys.append(verse)
|
| 96 |
|
| 97 |
torch.manual_seed(seed)
|
| 98 |
+
|
| 99 |
+
# If user selects a prompt segment, generate a new prompt segment to use on all segments
|
| 100 |
+
#default to the first segment for prompt conditioning
|
| 101 |
+
prompt_verse = melodys[0]
|
| 102 |
+
if prompt_index > 0:
|
| 103 |
+
# Get a prompt segment from the selected verse, normally the first verse
|
| 104 |
+
prompt_verse = melodys[prompt_index if prompt_index <= (total_segments - 1) else (total_segments -1)]
|
| 105 |
+
|
| 106 |
+
# set the prompt segment MODEL generation params
|
| 107 |
+
MODEL.set_generation_params(
|
| 108 |
+
use_sampling=True,
|
| 109 |
+
top_k=MODEL.generation_params["top_k"],
|
| 110 |
+
top_p=MODEL.generation_params["top_p"],
|
| 111 |
+
temperature=MODEL.generation_params["temp"],
|
| 112 |
+
cfg_coef=MODEL.generation_params["cfg_coef"],
|
| 113 |
+
duration=segment_duration,
|
| 114 |
+
two_step_cfg=False,
|
| 115 |
+
rep_penalty=0.5
|
| 116 |
+
)
|
| 117 |
+
# Generate a new prompt segment. This will be applied to all segments for consistency
|
| 118 |
+
print(f"Generating New Prompt Segment: {text} from verse {prompt_index}\r")
|
| 119 |
+
prompt_segment = MODEL.generate_with_all(
|
| 120 |
+
descriptions=[text],
|
| 121 |
+
melody_wavs=prompt_verse,
|
| 122 |
+
sample_rate=sr,
|
| 123 |
+
progress=False,
|
| 124 |
+
prompt=None,
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
for idx, verse in enumerate(melodys):
|
| 128 |
if INTERRUPTING:
|
| 129 |
return output_segments, duration
|
| 130 |
|
| 131 |
print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap} Overlap Loss: {duration_loss}')
|
| 132 |
# Compensate for the length of final segment
|
| 133 |
+
if ((idx + 1) == len(melodys)) or (duration < segment_duration):
|
| 134 |
+
mod_duration = max(min(duration, segment_duration),1)
|
| 135 |
+
print(f'Modify verse length, duration: {duration}, overlap: {overlap} Overlap Loss: {duration_loss} to mod duration: {mod_duration}')
|
| 136 |
MODEL.set_generation_params(
|
| 137 |
use_sampling=True,
|
| 138 |
top_k=MODEL.generation_params["top_k"],
|
| 139 |
top_p=MODEL.generation_params["top_p"],
|
| 140 |
temperature=MODEL.generation_params["temp"],
|
| 141 |
cfg_coef=MODEL.generation_params["cfg_coef"],
|
| 142 |
+
duration=mod_duration,
|
| 143 |
two_step_cfg=False,
|
| 144 |
rep_penalty=0.5
|
| 145 |
)
|
| 146 |
try:
|
| 147 |
# get last chunk
|
| 148 |
+
verse = verse[:, :, -mod_duration*MODEL.sample_rate:]
|
| 149 |
+
prompt_segment = prompt_segment[:, :, -mod_duration*MODEL.sample_rate:]
|
| 150 |
except:
|
| 151 |
# get first chunk
|
| 152 |
+
verse = verse[:, :, :mod_duration*MODEL.sample_rate]
|
| 153 |
+
prompt_segment = prompt_segment[:, :, :mod_duration*MODEL.sample_rate]
|
| 154 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
print(f"Generating New Melody Segment {idx + 1}: {text}\r")
|
| 157 |
output = MODEL.generate_with_all(
|
|
|
|
| 161 |
progress=False,
|
| 162 |
prompt=prompt_segment,
|
| 163 |
)
|
| 164 |
+
# If user selects a prompt segment, use the prompt segment for all segments
|
| 165 |
+
# Otherwise, use the previous segment as the prompt
|
| 166 |
+
if prompt_index < 0:
|
| 167 |
+
prompt_segment = output
|
| 168 |
|
| 169 |
# Append the generated output to the list of segments
|
| 170 |
#output_segments.append(output[:, :segment_duration])
|