Spaces:
Runtime error
Runtime error
Add Harmony / Drum separation
Browse files- app.py +60 -53
- audiocraft/data/audio_utils.py +12 -6
- audiocraft/utils/extend.py +14 -2
app.py
CHANGED
|
@@ -19,8 +19,9 @@ from audiocraft.data.audio_utils import apply_fade, apply_tafade
|
|
| 19 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
|
| 20 |
import numpy as np
|
| 21 |
import random
|
| 22 |
-
from pathlib import Path
|
| 23 |
-
from typing import List, Union
|
|
|
|
| 24 |
|
| 25 |
MODEL = None
|
| 26 |
MODELS = None
|
|
@@ -80,12 +81,18 @@ def get_filename_from_filepath(filepath):
|
|
| 80 |
file_base, file_extension = os.path.splitext(file_name)
|
| 81 |
return file_base, file_extension
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
def load_melody_filepath(melody_filepath, title):
|
| 84 |
# get melody filename
|
| 85 |
#$Union[str, os.PathLike]
|
| 86 |
symbols = ['_', '.', '-']
|
| 87 |
if melody_filepath is None:
|
| 88 |
-
return None, title
|
| 89 |
|
| 90 |
if (title is None) or ("MusicGen" in title) or (title == ""):
|
| 91 |
melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
|
|
@@ -97,26 +104,25 @@ def load_melody_filepath(melody_filepath, title):
|
|
| 97 |
|
| 98 |
print(f"Melody name: {melody_name}, Melody Filepath: {melody_filepath}\n")
|
| 99 |
|
| 100 |
-
return gr.Audio.update(value=melody_filepath), gr.Textbox.update(value=melody_name)
|
| 101 |
-
|
| 102 |
-
def load_melody(melody, prompt_index):
|
| 103 |
# get melody length in number of segments and modify the UI
|
| 104 |
-
|
| 105 |
-
return gr.Slider.update(maximum=0, value=0) , gr.Radio.update(value="melody", interactive=True)
|
| 106 |
sr, melody_data = melody[0], melody[1]
|
| 107 |
segment_samples = sr * 30
|
| 108 |
-
total_melodys = max(min((len(melody_data) // segment_samples)
|
| 109 |
print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
|
| 110 |
-
MAX_PROMPT_INDEX = total_melodys
|
| 111 |
-
|
| 112 |
-
|
| 113 |
|
| 114 |
-
def predict(model, text,
|
| 115 |
global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
|
| 116 |
output_segments = None
|
| 117 |
melody_name = "Not Used"
|
|
|
|
| 118 |
if melody_filepath:
|
| 119 |
melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
|
|
|
|
|
|
|
| 120 |
INTERRUPTED = False
|
| 121 |
INTERRUPTING = False
|
| 122 |
if temperature < 0:
|
|
@@ -173,7 +179,7 @@ def predict(model, text, melody, melody_filepath, duration, dimension, topk, top
|
|
| 173 |
if melody:
|
| 174 |
# todo return excess duration, load next model and continue in loop structure building up output_segments
|
| 175 |
if duration > MODEL.lm.cfg.dataset.segment_duration:
|
| 176 |
-
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index)
|
| 177 |
else:
|
| 178 |
# pure original code
|
| 179 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
|
|
@@ -217,12 +223,12 @@ def predict(model, text, melody, melody_filepath, duration, dimension, topk, top
|
|
| 217 |
overlap_samples = overlap * MODEL.sample_rate
|
| 218 |
#stack tracks and fade out/in
|
| 219 |
overlapping_output_fadeout = output[:, :, -overlap_samples:]
|
| 220 |
-
overlapping_output_fadeout = apply_fade(overlapping_output_fadeout,sample_rate=MODEL.sample_rate,duration=overlap,out=True,start=True, curve_end=0.0, current_device=MODEL.device)
|
| 221 |
-
|
| 222 |
|
| 223 |
overlapping_output_fadein = output_segments[i][:, :, :overlap_samples]
|
| 224 |
-
overlapping_output_fadein = apply_fade(overlapping_output_fadein,sample_rate=MODEL.sample_rate,duration=overlap,out=False,start=False, curve_start=0.0, current_device=MODEL.device)
|
| 225 |
-
|
| 226 |
|
| 227 |
overlapping_output = torch.cat([overlapping_output_fadeout[:, :, :-(overlap_samples // 2)], overlapping_output_fadein],dim=2)
|
| 228 |
print(f" overlap size Fade:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
|
|
@@ -244,7 +250,7 @@ def predict(model, text, melody, melody_filepath, duration, dimension, topk, top
|
|
| 244 |
background = add_settings_to_image(title if include_title else "", video_description if include_settings else "", background_path=background, font=settings_font, font_color=settings_font_color)
|
| 245 |
audio_write(
|
| 246 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
| 247 |
-
loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
|
| 248 |
waveform_video = make_waveform(file.name,bg_image=background, bar_count=45)
|
| 249 |
if MOVE_TO_CPU:
|
| 250 |
MODEL.to('cpu')
|
|
@@ -252,12 +258,11 @@ def predict(model, text, melody, melody_filepath, duration, dimension, topk, top
|
|
| 252 |
MODEL = None
|
| 253 |
torch.cuda.empty_cache()
|
| 254 |
torch.cuda.ipc_collect()
|
| 255 |
-
return waveform_video, seed
|
| 256 |
|
| 257 |
def ui(**kwargs):
|
| 258 |
css="""
|
| 259 |
-
#col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
|
| 260 |
-
#aud-melody {height: 0; width:0; visibility: hidden;}
|
| 261 |
a {text-decoration-line: underline; font-weight: 600;}
|
| 262 |
"""
|
| 263 |
with gr.Blocks(title="UnlimitedMusicGen", css=css) as demo:
|
|
@@ -283,47 +288,49 @@ def ui(**kwargs):
|
|
| 283 |
with gr.Row():
|
| 284 |
with gr.Column():
|
| 285 |
with gr.Row():
|
| 286 |
-
text = gr.Text(label="Prompt Text", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi")
|
| 287 |
with gr.Column():
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
| 290 |
prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 30 second segment to condition with, - 1 condition each segment independantly")
|
|
|
|
| 291 |
with gr.Row():
|
| 292 |
submit = gr.Button("Submit")
|
| 293 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
| 294 |
_ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
|
| 295 |
-
with gr.
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
reuse_seed = gr.Button('\u267b\ufe0f').style(full_width=False)
|
| 319 |
with gr.Column() as c:
|
| 320 |
output = gr.Video(label="Generated Music")
|
|
|
|
| 321 |
seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
|
| 322 |
|
| 323 |
-
melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title], outputs=[
|
| 324 |
-
melody.change(load_melody, inputs=[melody, prompt_index], outputs=[prompt_index], api_name="melody_change")
|
| 325 |
reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed")
|
| 326 |
-
submit.click(predict, inputs=[model, text,
|
| 327 |
gr.Examples(
|
| 328 |
fn=predict,
|
| 329 |
examples=[
|
|
@@ -353,7 +360,7 @@ def ui(**kwargs):
|
|
| 353 |
"medium",
|
| 354 |
],
|
| 355 |
],
|
| 356 |
-
inputs=[text,
|
| 357 |
outputs=[output]
|
| 358 |
)
|
| 359 |
|
|
|
|
| 19 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
|
| 20 |
import numpy as np
|
| 21 |
import random
|
| 22 |
+
#from pathlib import Path
|
| 23 |
+
#from typing import List, Union
|
| 24 |
+
import librosa
|
| 25 |
|
| 26 |
MODEL = None
|
| 27 |
MODELS = None
|
|
|
|
| 81 |
file_base, file_extension = os.path.splitext(file_name)
|
| 82 |
return file_base, file_extension
|
| 83 |
|
| 84 |
+
def get_melody(melody_filepath):
|
| 85 |
+
audio_data= list(librosa.load(melody_filepath, sr=None))
|
| 86 |
+
audio_data[0], audio_data[1] = audio_data[1], audio_data[0]
|
| 87 |
+
melody = tuple(audio_data)
|
| 88 |
+
return melody
|
| 89 |
+
|
| 90 |
def load_melody_filepath(melody_filepath, title):
|
| 91 |
# get melody filename
|
| 92 |
#$Union[str, os.PathLike]
|
| 93 |
symbols = ['_', '.', '-']
|
| 94 |
if melody_filepath is None:
|
| 95 |
+
return None, title, gr.Slider.update(maximum=0, value=0) , gr.Radio.update(value="melody", interactive=True)
|
| 96 |
|
| 97 |
if (title is None) or ("MusicGen" in title) or (title == ""):
|
| 98 |
melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
|
|
|
|
| 104 |
|
| 105 |
print(f"Melody name: {melody_name}, Melody Filepath: {melody_filepath}\n")
|
| 106 |
|
|
|
|
|
|
|
|
|
|
| 107 |
# get melody length in number of segments and modify the UI
|
| 108 |
+
melody = get_melody(melody_filepath)
|
|
|
|
| 109 |
sr, melody_data = melody[0], melody[1]
|
| 110 |
segment_samples = sr * 30
|
| 111 |
+
total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
|
| 112 |
print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
|
| 113 |
+
MAX_PROMPT_INDEX = total_melodys
|
| 114 |
+
|
| 115 |
+
return gr.Textbox.update(value=melody_name), gr.Slider.update(maximum=MAX_PROMPT_INDEX, value=0), gr.Radio.update(value="melody", interactive=False)
|
| 116 |
|
| 117 |
+
def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False):
|
| 118 |
global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
|
| 119 |
output_segments = None
|
| 120 |
melody_name = "Not Used"
|
| 121 |
+
melody = None
|
| 122 |
if melody_filepath:
|
| 123 |
melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
|
| 124 |
+
melody = get_melody(melody_filepath)
|
| 125 |
+
|
| 126 |
INTERRUPTED = False
|
| 127 |
INTERRUPTING = False
|
| 128 |
if temperature < 0:
|
|
|
|
| 179 |
if melody:
|
| 180 |
# todo return excess duration, load next model and continue in loop structure building up output_segments
|
| 181 |
if duration > MODEL.lm.cfg.dataset.segment_duration:
|
| 182 |
+
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only=False)
|
| 183 |
else:
|
| 184 |
# pure original code
|
| 185 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
|
|
|
|
| 223 |
overlap_samples = overlap * MODEL.sample_rate
|
| 224 |
#stack tracks and fade out/in
|
| 225 |
overlapping_output_fadeout = output[:, :, -overlap_samples:]
|
| 226 |
+
#overlapping_output_fadeout = apply_fade(overlapping_output_fadeout,sample_rate=MODEL.sample_rate,duration=overlap,out=True,start=True, curve_end=0.0, current_device=MODEL.device)
|
| 227 |
+
overlapping_output_fadeout = apply_tafade(overlapping_output_fadeout,sample_rate=MODEL.sample_rate,duration=overlap,out=True,start=True,shape="linear")
|
| 228 |
|
| 229 |
overlapping_output_fadein = output_segments[i][:, :, :overlap_samples]
|
| 230 |
+
#overlapping_output_fadein = apply_fade(overlapping_output_fadein,sample_rate=MODEL.sample_rate,duration=overlap,out=False,start=False, curve_start=0.0, current_device=MODEL.device)
|
| 231 |
+
overlapping_output_fadein = apply_tafade(overlapping_output_fadein,sample_rate=MODEL.sample_rate,duration=overlap,out=False,start=False, shape="linear")
|
| 232 |
|
| 233 |
overlapping_output = torch.cat([overlapping_output_fadeout[:, :, :-(overlap_samples // 2)], overlapping_output_fadein],dim=2)
|
| 234 |
print(f" overlap size Fade:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
|
|
|
|
| 250 |
background = add_settings_to_image(title if include_title else "", video_description if include_settings else "", background_path=background, font=settings_font, font_color=settings_font_color)
|
| 251 |
audio_write(
|
| 252 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
| 253 |
+
loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
|
| 254 |
waveform_video = make_waveform(file.name,bg_image=background, bar_count=45)
|
| 255 |
if MOVE_TO_CPU:
|
| 256 |
MODEL.to('cpu')
|
|
|
|
| 258 |
MODEL = None
|
| 259 |
torch.cuda.empty_cache()
|
| 260 |
torch.cuda.ipc_collect()
|
| 261 |
+
return waveform_video, file.name, seed
|
| 262 |
|
| 263 |
def ui(**kwargs):
|
| 264 |
css="""
|
| 265 |
+
#col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
|
|
|
|
| 266 |
a {text-decoration-line: underline; font-weight: 600;}
|
| 267 |
"""
|
| 268 |
with gr.Blocks(title="UnlimitedMusicGen", css=css) as demo:
|
|
|
|
| 288 |
with gr.Row():
|
| 289 |
with gr.Column():
|
| 290 |
with gr.Row():
|
|
|
|
| 291 |
with gr.Column():
|
| 292 |
+
text = gr.Text(label="Prompt Text", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi")
|
| 293 |
+
duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration", interactive=True)
|
| 294 |
+
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
|
| 295 |
+
with gr.Column():
|
| 296 |
+
melody_filepath = gr.Audio(source="upload", type="filepath", label="Melody Condition (optional)", interactive=True)
|
| 297 |
prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 30 second segment to condition with, - 1 condition each segment independantly")
|
| 298 |
+
harmony_only = gr.Radio(label="Harmony Only",choices=["No", "Yes"], value="No", interactive=True, info="Remove Drums?")
|
| 299 |
with gr.Row():
|
| 300 |
submit = gr.Button("Submit")
|
| 301 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
| 302 |
_ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
|
| 303 |
+
with gr.Accordion("Video", open=False):
|
| 304 |
+
with gr.Row():
|
| 305 |
+
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
| 306 |
+
with gr.Column():
|
| 307 |
+
include_title = gr.Checkbox(label="Add Title", value=True, interactive=True)
|
| 308 |
+
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
|
| 309 |
+
with gr.Row():
|
| 310 |
+
title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True)
|
| 311 |
+
settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
|
| 312 |
+
settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True)
|
| 313 |
+
with gr.Accordion("Expert", open=False):
|
| 314 |
+
with gr.Row():
|
| 315 |
+
overlap = gr.Slider(minimum=1, maximum=15, value=2, step=1, label="Verse Overlap", interactive=True)
|
| 316 |
+
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
| 317 |
+
with gr.Row():
|
| 318 |
+
topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True)
|
| 319 |
+
topp = gr.Number(label="Top-p", value=1450, precision=0, interactive=True)
|
| 320 |
+
temperature = gr.Number(label="Randomness Temperature", value=0.75, precision=None, interactive=True)
|
| 321 |
+
cfg_coef = gr.Number(label="Classifier Free Guidance", value=8.5, precision=None, interactive=True)
|
| 322 |
+
with gr.Row():
|
| 323 |
+
seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True)
|
| 324 |
+
gr.Button('\U0001f3b2\ufe0f').style(full_width=False).click(fn=lambda: -1, outputs=[seed], queue=False)
|
| 325 |
+
reuse_seed = gr.Button('\u267b\ufe0f').style(full_width=False)
|
|
|
|
| 326 |
with gr.Column() as c:
|
| 327 |
output = gr.Video(label="Generated Music")
|
| 328 |
+
wave_file = gr.File(label=".wav file", elem_id="output_wavefile", interactive=True)
|
| 329 |
seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
|
| 330 |
|
| 331 |
+
melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title], outputs=[title, prompt_index , model], api_name="melody_filepath_change")
|
|
|
|
| 332 |
reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed")
|
| 333 |
+
submit.click(predict, inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only], outputs=[output, wave_file, seed_used], api_name="submit")
|
| 334 |
gr.Examples(
|
| 335 |
fn=predict,
|
| 336 |
examples=[
|
|
|
|
| 360 |
"medium",
|
| 361 |
],
|
| 362 |
],
|
| 363 |
+
inputs=[text, melody_filepath, model],
|
| 364 |
outputs=[output]
|
| 365 |
)
|
| 366 |
|
audiocraft/data/audio_utils.py
CHANGED
|
@@ -173,7 +173,7 @@ def i16_pcm(wav: torch.Tensor) -> torch.Tensor:
|
|
| 173 |
assert wav.dtype == torch.int16
|
| 174 |
return wav
|
| 175 |
|
| 176 |
-
def apply_tafade(audio: torch.Tensor, sample_rate, duration=3.0, out=True, start=True, shape: str = "linear") -> torch.Tensor:
|
| 177 |
"""
|
| 178 |
Apply fade-in and/or fade-out effects to the audio tensor.
|
| 179 |
|
|
@@ -192,11 +192,12 @@ def apply_tafade(audio: torch.Tensor, sample_rate, duration=3.0, out=True, start
|
|
| 192 |
fade_samples = int(sample_rate * duration) # Number of samples for the fade duration
|
| 193 |
|
| 194 |
# Create the fade transform
|
| 195 |
-
fade_transform = torchaudio.transforms.Fade(fade_in_len=
|
| 196 |
|
| 197 |
if out:
|
| 198 |
fade_transform.fade_out_len = fade_samples
|
| 199 |
-
|
|
|
|
| 200 |
|
| 201 |
# Select the portion of the audio to apply the fade
|
| 202 |
if start:
|
|
@@ -213,9 +214,12 @@ def apply_tafade(audio: torch.Tensor, sample_rate, duration=3.0, out=True, start
|
|
| 213 |
else:
|
| 214 |
audio_faded[:, -fade_samples:] = audio_fade_section
|
| 215 |
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
-
def apply_fade(audio: torch.Tensor, sample_rate, duration=3.0, out=True, start=True, curve_start:float=0.0, curve_end:float=1.0, current_device:str="cpu") -> torch.Tensor:
|
| 219 |
"""
|
| 220 |
Apply fade-in and/or fade-out effects to the audio tensor.
|
| 221 |
|
|
@@ -256,4 +260,6 @@ def apply_fade(audio: torch.Tensor, sample_rate, duration=3.0, out=True, start=T
|
|
| 256 |
else:
|
| 257 |
audio_faded[:, -fade_samples:] = audio_fade_section
|
| 258 |
|
| 259 |
-
|
|
|
|
|
|
|
|
|
| 173 |
assert wav.dtype == torch.int16
|
| 174 |
return wav
|
| 175 |
|
| 176 |
+
def apply_tafade(audio: torch.Tensor, sample_rate, duration=3.0, out=True, start=True, shape: str = "linear", stem_name: tp.Optional[str] = None) -> torch.Tensor:
|
| 177 |
"""
|
| 178 |
Apply fade-in and/or fade-out effects to the audio tensor.
|
| 179 |
|
|
|
|
| 192 |
fade_samples = int(sample_rate * duration) # Number of samples for the fade duration
|
| 193 |
|
| 194 |
# Create the fade transform
|
| 195 |
+
fade_transform = torchaudio.transforms.Fade(fade_in_len=0, fade_out_len=0, fade_shape=shape)
|
| 196 |
|
| 197 |
if out:
|
| 198 |
fade_transform.fade_out_len = fade_samples
|
| 199 |
+
else:
|
| 200 |
+
fade_transform.fade_in_len = fade_samples
|
| 201 |
|
| 202 |
# Select the portion of the audio to apply the fade
|
| 203 |
if start:
|
|
|
|
| 214 |
else:
|
| 215 |
audio_faded[:, -fade_samples:] = audio_fade_section
|
| 216 |
|
| 217 |
+
wav = normalize_loudness(audio_faded,sample_rate, loudness_headroom_db=18, loudness_compressor=True)
|
| 218 |
+
_clip_wav(wav, log_clipping=False, stem_name=stem_name)
|
| 219 |
+
return wav
|
| 220 |
+
|
| 221 |
|
| 222 |
+
def apply_fade(audio: torch.Tensor, sample_rate, duration=3.0, out=True, start=True, curve_start:float=0.0, curve_end:float=1.0, current_device:str="cpu", stem_name: tp.Optional[str] = None) -> torch.Tensor:
|
| 223 |
"""
|
| 224 |
Apply fade-in and/or fade-out effects to the audio tensor.
|
| 225 |
|
|
|
|
| 260 |
else:
|
| 261 |
audio_faded[:, -fade_samples:] = audio_fade_section
|
| 262 |
|
| 263 |
+
wav = normalize_loudness(audio_faded,sample_rate, loudness_headroom_db=18, loudness_compressor=True)
|
| 264 |
+
_clip_wav(wav, log_clipping=False, stem_name=stem_name)
|
| 265 |
+
return wav
|
audiocraft/utils/extend.py
CHANGED
|
@@ -11,6 +11,7 @@ import textwrap
|
|
| 11 |
import requests
|
| 12 |
from io import BytesIO
|
| 13 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
INTERRUPTING = False
|
|
@@ -43,7 +44,7 @@ def separate_audio_segments(audio, segment_duration=30, overlap=1):
|
|
| 43 |
print(f"separate_audio_segments: {len(segments)} segments")
|
| 44 |
return segments
|
| 45 |
|
| 46 |
-
def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:int=1, segment_duration:int=30, prompt_index:int=0):
|
| 47 |
# generate audio segments
|
| 48 |
melody_segments = separate_audio_segments(melody, segment_duration, 0)
|
| 49 |
|
|
@@ -85,12 +86,23 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:
|
|
| 85 |
if INTERRUPTING:
|
| 86 |
return [], duration
|
| 87 |
print(f"segment {segment_idx + 1} of {total_segments} \r")
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
print(f"shape:{verse.shape} dim:{verse.dim()}")
|
| 91 |
if verse.dim() == 2:
|
| 92 |
verse = verse[None]
|
| 93 |
verse = verse[..., :int(sr * MODEL.lm.cfg.dataset.segment_duration)]
|
|
|
|
| 94 |
# Append the segment to the melodys list
|
| 95 |
melodys.append(verse)
|
| 96 |
|
|
|
|
| 11 |
import requests
|
| 12 |
from io import BytesIO
|
| 13 |
from huggingface_hub import hf_hub_download
|
| 14 |
+
import librosa
|
| 15 |
|
| 16 |
|
| 17 |
INTERRUPTING = False
|
|
|
|
| 44 |
print(f"separate_audio_segments: {len(segments)} segments")
|
| 45 |
return segments
|
| 46 |
|
| 47 |
+
def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:int=1, segment_duration:int=30, prompt_index:int=0, harmony_only:bool= False):
|
| 48 |
# generate audio segments
|
| 49 |
melody_segments = separate_audio_segments(melody, segment_duration, 0)
|
| 50 |
|
|
|
|
| 86 |
if INTERRUPTING:
|
| 87 |
return [], duration
|
| 88 |
print(f"segment {segment_idx + 1} of {total_segments} \r")
|
| 89 |
+
|
| 90 |
+
if harmony_only:
|
| 91 |
+
# REMOVE PERCUSION FROM MELODY
|
| 92 |
+
# Apply HPSS using librosa
|
| 93 |
+
verse_harmonic, verse_percussive = librosa.effects.hpss(melody_segments[segment_idx][1])
|
| 94 |
+
# Convert the separated components back to torch.Tensor
|
| 95 |
+
#harmonic_tensor = torch.from_numpy(verse_harmonic)
|
| 96 |
+
#percussive_tensor = torch.from_numpy(verse_percussive)
|
| 97 |
+
sr, verse = melody_segments[segment_idx][0], torch.from_numpy(verse_harmonic).to(MODEL.device).float().t().unsqueeze(0)
|
| 98 |
+
else:
|
| 99 |
+
sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)
|
| 100 |
|
| 101 |
print(f"shape:{verse.shape} dim:{verse.dim()}")
|
| 102 |
if verse.dim() == 2:
|
| 103 |
verse = verse[None]
|
| 104 |
verse = verse[..., :int(sr * MODEL.lm.cfg.dataset.segment_duration)]
|
| 105 |
+
|
| 106 |
# Append the segment to the melodys list
|
| 107 |
melodys.append(verse)
|
| 108 |
|