Spaces:

Surn
/

UnlimitedMusicGen

Runtime error

App Files Files Community

UnlimitedMusicGen / app.py

Surn

Allow Melody in MCP call

0790175 5 months ago

raw

history blame contribute delete

49.1 kB

	"""
	Copyright (c) Meta Platforms, Inc. and affiliates.
	All rights reserved.

	This source code is licensed under the license found in the
	LICENSE file in the root directory of this source tree.
	"""

	from tempfile import NamedTemporaryFile
	import argparse
	import torch
	import gradio as gr
	import os
	import subprocess
	import sys
	from pathlib import Path
	import time
	import typing as tp
	import warnings
	import gc
	from tqdm import tqdm
	from audiocraft.models import MusicGen
	from audiocraft.data.audio import audio_write
	from audiocraft.data.audio_utils import apply_fade, apply_tafade, apply_splice_effect
	from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
	from audiocraft.utils import utils
	import numpy as np
	import random
	import shutil
	from mutagen.mp4 import MP4
	#from typing import List, Union
	import librosa
	import modules.user_history
	from modules.version_info import versions_html, commit_hash, get_xformers_version
	from modules.gradio import *
	from modules.file_utils import get_file_parts, get_filename_from_filepath, convert_title_to_filename, get_unique_file_path, delete_file, download_and_save_image, download_and_save_file
	from modules.constants import IS_SHARED_SPACE, HF_REPO_ID, TMPDIR, HF_API_TOKEN
	from modules.storage import upload_files_to_repo

	MODEL = None
	MODELS = None
	#IS_SHARED_SPACE = "Surn/UnlimitedMusicGen" in os.environ.get('SPACE_ID', '')
	INTERRUPTED = False
	UNLOAD_MODEL = False
	MOVE_TO_CPU = False
	MAX_PROMPT_INDEX = 0
	git = os.environ.get('GIT', "git")
	#s.environ["CUDA_LAUNCH_BLOCKING"] = "1"
	os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
	os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
	os.environ['CUDA_MODULE_LOADING']='LAZY'
	os.environ['USE_FLASH_ATTENTION'] = '1'
	os.environ['XFORMERS_FORCE_DISABLE_TRITON']= '1'

	def interrupt_callback():
	return INTERRUPTED

	def interrupt():
	global INTERRUPTING
	INTERRUPTING = True

	class FileCleaner:
	def __init__(self, file_lifetime: float = 3600):
	self.file_lifetime = file_lifetime
	self.files = []

	def add(self, path: tp.Union[str, Path]):
	self._cleanup()
	self.files.append((time.time(), Path(path)))

	def _cleanup(self):
	now = time.time()
	for time_added, path in list(self.files):
	if now - time_added > self.file_lifetime:
	if path.exists():
	path.unlink()
	self.files.pop(0)
	else:
	break


	#file_cleaner = FileCleaner()

	def ping():
	"""
	return the value true

	Returns:
	boolean: true
	"""
	return True

	def toggle_audio_src(choice):
	"""
	Toggle the audio input source between microphone and file upload.

	Args:
	choice (str): The selected audio source, either 'mic' or 'upload'.

	Returns:
	gr.Update: Gradio update object to change the audio input component.
	"""
	if choice == "mic":
	return gr.update(source="microphone", value=None, label="Microphone")
	else:
	return gr.update(source="upload", value=None, label="File")

	def get_waveform(args, *kwargs):
	"""
	Generate a waveform video for the given audio input.

	Args:
	melody_filepath (str): Path to the melody audio file.

	Returns:
	tuple: (sample_rate, audio_data) loaded from the file.
	"""
	be = time.time()
	with warnings.catch_warnings():
	warnings.simplefilter('ignore')
	out = gr.make_waveform(args, *kwargs)
	print("Make a video took", time.time() - be)
	return out

	def load_model(version, progress=gr.Progress(track_tqdm=True)):
	"""
	Load a MusicGen model by version name, optionally showing progress.

	Args:
	version (str): The model version to load.
	progress (gr.Progress, optional): Gradio progress tracker.

	Returns:
	MusicGen: The loaded MusicGen model instance.
	"""
	global MODEL, MODELS, UNLOAD_MODEL
	print("Loading model", version)

	with tqdm(total=100, desc=f"Loading model '{version}'", unit="step") as pbar:
	if MODELS is None:
	pbar.update(50) # Simulate progress for loading
	result = MusicGen.get_pretrained(version)
	pbar.update(50) # Complete progress
	return result
	else:
	t1 = time.monotonic()
	if MODEL is not None:
	MODEL.to('cpu') # Move to cache
	print("Previous model moved to CPU in %.2fs" % (time.monotonic() - t1))
	pbar.update(30) # Simulate progress for moving model to CPU
	t1 = time.monotonic()
	if MODELS.get(version) is None:
	print("Loading model %s from disk" % version)
	result = MusicGen.get_pretrained(version)
	MODELS[version] = result
	print("Model loaded in %.2fs" % (time.monotonic() - t1))
	pbar.update(70) # Simulate progress for loading from disk
	return result
	result = MODELS[version].to('cuda')
	print("Cached model loaded in %.2fs" % (time.monotonic() - t1))
	pbar.update(100) # Complete progress
	return result

	def get_melody(melody_filepath):
	audio_data= list(librosa.load(melody_filepath, sr=None))
	audio_data[0], audio_data[1] = audio_data[1], audio_data[0]
	melody = tuple(audio_data)
	return melody

	def git_tag():
	"""
	Get the current git tag or fallback to the first line of CHANGELOG.md if unavailable.

	Returns:
	str: The current git tag or '<none>' if not available.
	"""
	try:
	return subprocess.check_output([git, "describe", "--tags"], shell=False, encoding='utf8').strip()
	except Exception:
	try:
	from pathlib import Path
	changelog_md = Path(__file__).parent.parent / "CHANGELOG.md"
	with changelog_md.open(encoding="utf-8") as file:
	return next((line.strip() for line in file if line.strip()), "<none>")
	except Exception:
	return "<none>"

	def load_background_filepath(video_orientation):
	"""
	Get the background image path based on video orientation.

	Args:
	video_orientation (str): Either 'Landscape' or 'Portrait'.

	Returns:
	str: Path to the background image file.
	"""
	if video_orientation == "Landscape":
	return "./assets/background.png"
	else:
	return "./assets/background_portrait.png"

	def load_melody_filepath(melody_filepath, title, assigned_model, topp, temperature, cfg_coef, segment_length = 30):
	"""
	Update melody-related UI fields based on the selected melody file and settings.

	Args:
	melody_filepath (str): Path to the melody file.
	title (str): The song title.
	assigned_model (str): The selected model name.
	topp (float): Top-p sampling value.
	temperature (float): Sampling temperature.
	cfg_coef (float): Classifier-free guidance coefficient.
	segment_length (int, optional): Segment length in seconds.

	Returns:
	tuple: Updated values for title, prompt_index, model, topp, temperature, cfg_coef, overlap.
	"""
	# get melody filename
	#$Union[str, os.PathLike]
	symbols = ['_', '.', '-']
	MAX_OVERLAP = int(segment_length // 2) - 1
	if (melody_filepath is None) or (melody_filepath == ""):
	return title, gr.update(maximum=0, value=-1) , gr.update(value="medium", interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef), gr.update(maximum=MAX_OVERLAP)

	if (title is None) or ("MusicGen" in title) or (title == ""):
	melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
	# fix melody name for symbols
	for symbol in symbols:
	melody_name = melody_name.replace(symbol, ' ').title()
	#additonal melody setting updates
	topp = 800
	temperature = 0.5
	cfg_coef = 3.25
	else:
	melody_name = title

	if ("melody" not in assigned_model):
	assigned_model = "melody-large"

	print(f"Melody name: {melody_name}, Melody Filepath: {melody_filepath}, Model: {assigned_model}\n")

	# get melody length in number of segments and modify the UI
	melody = get_melody(melody_filepath)
	sr, melody_data = melody[0], melody[1]
	segment_samples = sr * segment_length
	total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
	print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
	MAX_PROMPT_INDEX = total_melodys

	return gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=-1), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef), gr.update(maximum=MAX_OVERLAP)

	def predict(model, text, melody_filepath = None, duration=10, dimension=2, topk=200, topp=0, temperature=1.0, cfg_coef=4.0, background = None, title="UnlimitedMusicGen", settings_font="./assets/arial.ttf", settings_font_color = "#c87f05", seed=-1, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, segment_length = 30, settings_font_size=28, settings_animate_waveform=False, video_orientation="Landscape", excerpt_duration=3.5, return_history_json=False, progress=gr.Progress(track_tqdm=True)):
	"""
	Generate music and video based on the provided parameters and model.

	Args:
	model (str): Model name to use for generation. Default to "style"
	text (str): Prompt describing the music.
	melody_filepath (str, optional): Path to melody conditioning file. Default to None.
	duration (int): Total duration in seconds.
	dimension (int): Audio stacking/concatenation dimension.
	topk (int): Top-k sampling value.
	topp (float): Top-p sampling value.
	temperature (float): Sampling temperature.
	cfg_coef (float): Classifier-free guidance coefficient.
	background (str, optional): Path to background image. Default to "./assets/background.png".
	title (str, optional): Song title. Default to "UnlimitedMusicGen".
	settings_font (str, optional): Path to font file. Default to "./assets/arial.ttf".
	settings_font_color (str, optional): Font color for settings text. Default to "
	seed (int, optional): Random seed. Default to -1.
	overlap (int, optional): Segment overlap in seconds. Default to 1.
	prompt_index (int, optional): Melody segment index. Default to 0.
	include_title (bool, optional): Whether to add title to video. Default to True.
	include_settings (bool, optional): Whether to add settings to video. Default to True.
	harmony_only (bool, optional): Whether to use harmony only. Default to False.
	profile (gr.OAuthProfile): User profile.
	segment_length (int, optional): Segment length in seconds.
	settings_font_size (int, optional): Font size for settings text.
	settings_animate_waveform (bool, optional): Animate waveform in video.
	video_orientation (str, optional): Video orientation.
	excerpt_duration (float, optional): Excerpt duration for style conditioning.
	return_history_json (bool, optional): Whether to return history JSON instead of typical output. Default to False.
	progress (gr.Progress, optional): Gradio progress tracker.

	Returns:
	tuple(str,str,str): (waveform_video_path, wave_file_path, seed_used)
	"""
	global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
	output_segments = None
	melody_name = "Not Used"
	melody_extension = "Not Used"
	melody = None
	if melody_filepath in ["None", ""]:
	melody_filepath = None


	# if melody_filepath is a url string, download it using download_and_save_file
	if melody_filepath and melody_filepath.startswith(("http://", "https://")):
	username = profile if isinstance(profile, str) else profile.value.username if hasattr(profile.value, 'username') else "default_user" if (profile is None) else profile
	melody_filepath = download_and_save_file(melody_filepath, Path(TMPDIR) / str(username), HF_API_TOKEN)

	#if background is a url string, download it using download_and_save_image
	if background is None or background in ["None", ""]:
	background = load_background_filepath(video_orientation)

	if background.startswith(("http://", "https://")):
	username = profile if isinstance(profile, str) else profile.value.username if hasattr(profile.value, 'username') else "default_user" if (profile is None) else profile
	background = download_and_save_image(background, Path(TMPDIR) / str(username), HF_API_TOKEN)

	if melody_filepath:
	melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
	melody = get_melody(melody_filepath)

	INTERRUPTED = False
	INTERRUPTING = False
	if temperature < 0:
	temperature = 0.1
	raise gr.Error("Temperature must be >= 0.")
	if topk < 0:
	topk = 1
	raise gr.Error("Topk must be non-negative.")
	if topp < 0:
	topp =1
	raise gr.Error("Topp must be non-negative.")

	# Clean up GPU resources only if the model changes
	if MODEL is not None and model not in MODEL.name:
	print(f"Switching model from {MODEL.name} to {model}. Cleaning up resources.")
	del MODEL # Delete the current model
	torch.cuda.empty_cache() # Clear GPU memory
	gc.collect() # Force garbage collection
	MODEL = None

	try:
	if MODEL is None or model not in MODEL.name:
	MODEL = load_model(model)
	else:
	if MOVE_TO_CPU:
	MODEL.to('cuda')
	except Exception as e:
	raise gr.Error(f"Error loading model '{model}': {str(e)}. Try a different model.")

	# prevent hacking
	duration = min(duration, 720)
	overlap = min(overlap, 15)
	#

	output = None
	segment_duration = duration
	initial_duration = duration
	output_segments = []
	while duration > 0:
	if not output_segments: # first pass of long or short song
	if segment_duration > MODEL.lm.cfg.dataset.segment_duration:
	segment_duration = MODEL.lm.cfg.dataset.segment_duration
	else:
	segment_duration = duration
	else: # next pass of long song
	if duration + overlap < MODEL.lm.cfg.dataset.segment_duration:
	segment_duration = duration + overlap
	else:
	segment_duration = MODEL.lm.cfg.dataset.segment_duration
	if (segment_length + overlap) < segment_duration:
	segment_duration = segment_length + overlap
	# implement seed
	if seed < 0:
	seed = random.randint(0, 0xffff_ffff_ffff)
	torch.manual_seed(seed)


	print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap}')
	if ("style" in model) and melody:
	# style and text-to-music
	MODEL.set_generation_params(
	use_sampling=True,
	top_k=topk,
	top_p=topp,
	temperature=temperature,
	cfg_coef=cfg_coef,
	duration=segment_duration,
	two_step_cfg=False,
	cfg_coef_beta=5, # double CFG is only useful for text-and-style conditioning
	)

	MODEL.set_style_conditioner_params(
	eval_q=3, # integer between 1 and 6
	# eval_q is the level of quantization that passes
	# through the conditioner. When low, the models adheres less to the
	# audio conditioning
	excerpt_length=excerpt_duration, # the length in seconds that is taken by the model in the provided excerpt, can be
	# between 1.5 and 4.5 seconds but it has to be shortest to the length of the provided conditioning
	)
	else:
	MODEL.set_generation_params(
	use_sampling=True,
	top_k=topk,
	top_p=topp,
	temperature=temperature,
	cfg_coef=cfg_coef,
	duration=segment_duration,
	two_step_cfg=False,
	extend_stride=2,
	rep_penalty=0.5,
	cfg_coef_beta=None, # double CFG is only useful for text-and-style conditioning
	)
	MODEL.set_custom_progress_callback(gr.Progress(track_tqdm=True))

	try:
	if melody and ("melody" or "style" in model):
	# return excess duration, load next model and continue in loop structure building up output_segments
	if duration > MODEL.duration:
	output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.duration, prompt_index, harmony_only, excerpt_duration, progress=gr.Progress(track_tqdm=True))
	else:
	# pure original code
	sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
	print(melody.shape)
	if melody.dim() == 2:
	melody = melody[None]
	melody = melody[..., :int(sr * MODEL.lm.cfg.dataset.segment_duration)]
	output = MODEL.generate_with_chroma(
	descriptions=[text],
	melody_wavs=melody,
	melody_sample_rate=sr,
	progress=False, progress_callback=gr.Progress(track_tqdm=True)
	)
	# All output_segments are populated, so we can break the loop or set duration to 0
	break
	else:
	#output = MODEL.generate(descriptions=[text], progress=False)
	if not output_segments:
	next_segment = MODEL.generate(descriptions=[text], progress=False, progress_callback=gr.Progress(track_tqdm=True))
	duration -= segment_duration
	else:
	last_chunk = output_segments[-1][:, :, -overlap*MODEL.sample_rate:]
	next_segment = MODEL.generate_continuation(last_chunk, MODEL.sample_rate, descriptions=[text], progress=False, progress_callback=gr.Progress(track_tqdm=True))
	duration -= segment_duration - overlap
	if next_segment != None:
	output_segments.append(next_segment)
	except Exception as e:
	print(f"Error generating audio: {e}")
	gr.Error(f"Error generating audio: {e}")
	return None, None, seed

	if INTERRUPTING:
	INTERRUPTED = True
	INTERRUPTING = False
	print("Function execution interrupted!")
	raise gr.Error("Interrupted.")

	print(f"\nOutput segments: {len(output_segments)}\n")
	if output_segments:
	try:
	# Combine the output segments into one long audio file or stack tracks
	#output_segments = [segment.detach().cpu().float()[0] for segment in output_segments]
	#output = torch.cat(output_segments, dim=dimension)

	output = output_segments[0]
	for i in range(1, len(output_segments)):
	if overlap > 0:
	overlap_samples = overlap * MODEL.sample_rate
	#stack tracks and fade out/in
	overlapping_output_fadeout = output[:, :, -overlap_samples:]
	#overlapping_output_fadeout = apply_fade(overlapping_output_fadeout,sample_rate=MODEL.sample_rate,duration=overlap,out=True,start=True, curve_end=0.0, current_device=MODEL.device)
	overlapping_output_fadeout = apply_tafade(overlapping_output_fadeout,sample_rate=MODEL.sample_rate,duration=overlap,out=True,start=True,shape="linear")

	overlapping_output_fadein = output_segments[i][:, :, :overlap_samples]
	#overlapping_output_fadein = apply_fade(overlapping_output_fadein,sample_rate=MODEL.sample_rate,duration=overlap,out=False,start=False, curve_start=0.0, current_device=MODEL.device)
	overlapping_output_fadein = apply_tafade(overlapping_output_fadein,sample_rate=MODEL.sample_rate,duration=overlap,out=False,start=False, shape="linear")

	overlapping_output = torch.cat([overlapping_output_fadeout[:, :, :-(overlap_samples // 2)], overlapping_output_fadein],dim=2)
	###overlapping_output, overlap_sample_rate = apply_splice_effect(overlapping_output_fadeout, MODEL.sample_rate, overlapping_output_fadein, MODEL.sample_rate, overlap)
	print(f" overlap size Fade:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
	##overlapping_output = torch.cat([output[:, :, -overlap_samples:], output_segments[i][:, :, :overlap_samples]], dim=1) #stack tracks
	##print(f" overlap size stack:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
	#overlapping_output = torch.cat([output[:, :, -overlap_samples:], output_segments[i][:, :, :overlap_samples]], dim=2) #stack tracks
	#print(f" overlap size cat:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
	output = torch.cat([output[:, :, :-overlap_samples], overlapping_output, output_segments[i][:, :, overlap_samples:]], dim=dimension)
	else:
	output = torch.cat([output, output_segments[i]], dim=dimension)
	output = output.detach().cpu().float()[0]
	except Exception as e:
	print(f"Error combining segments: {e}. Using the first segment only.")
	output = output_segments[0].detach().cpu().float()[0]
	else:
	if (output is None) or (output.dim() == 0):
	return None, None, seed
	else:
	output = output.detach().cpu().float()[0]

	video_width, video_height = 768, 512
	if video_orientation == "Portrait":
	video_width, video_height = 512, 768

	title_file_name = convert_title_to_filename(title)
	with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix=title_file_name) as file:
	video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}"
	if include_settings or include_title:
	background = add_settings_to_image(title if include_title else "",video_description if include_settings else "",width=video_width, height=video_height, background_path=background,font=settings_font,font_color=settings_font_color, font_size=settings_font_size)
	audio_write(
	file.name, output, MODEL.sample_rate, strategy="loudness",
	loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
	waveform_video_path = get_waveform(file.name, bg_image=background, bar_count=45, name=title_file_name, animate=settings_animate_waveform, progress=gr.Progress(track_tqdm=True))
	# Remove the extension from file.name
	file_name_without_extension = os.path.splitext(file.name)[0]
	# Get the directory, filename, name, extension, and new extension of the waveform video path
	video_dir, video_name, video_name, video_ext, video_new_ext = get_file_parts(waveform_video_path)

	new_video_path = get_unique_file_path(video_dir, title_file_name, video_new_ext)

	mp4 = MP4(waveform_video_path)
	mp4["©nam"] = title_file_name # Title tag
	mp4["desc"] = f"{text}\n Duration: {str(initial_duration)}" # Description tag

	commit = commit_hash()
	metadata = {
	"Title": title,
	"Year": time.strftime("%Y"),
	"prompt": text,
	"negative_prompt": "",
	"Seed": seed,
	"steps": 1,
	"wdth": video_width,
	"hght": video_height,
	"Dimension": dimension,
	"Top-k": topk,
	"Top-p": topp,
	"Randomness": temperature,
	"cfg": cfg_coef,
	"overlap": overlap,
	"Melody Condition": melody_name,
	"Sample Segment": prompt_index,
	"Duration": initial_duration,
	"Audio": file.name,
	"font": settings_font,
	"font_color": settings_font_color,
	"font_size": settings_font_size,
	"harmony_only": harmony_only,
	"background": background,
	"include_title": include_title,
	"include_settings": include_settings,
	"profile": profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username') else "default_user" if (profile is None) else profile,
	"commit": commit_hash(),
	"tag": git_tag(),
	"version": gr.__version__,
	"model_version": MODEL.version,
	"model_name": MODEL.name,
	"model_description": f"{MODEL.audio_channels} channels, {MODEL.sample_rate} Hz",
	"melody_name": melody_name if melody_name else "",
	"melody_extension": melody_extension if melody_extension else "",
	"hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen",
	"version": f"https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/{'huggingface' if commit == '<none>' else commit}",
	"python": sys.version,
	"torch": getattr(torch, '__long_version__', torch.__version__),
	"xformers": get_xformers_version(),
	"gradio": gr.__version__,
	"huggingface_space": os.environ.get('SPACE_ID', ''),
	"CUDA": f"{'CUDA is available. device: ' + torch.cuda.get_device_name(0) + ' version: ' + torch.version.cuda if torch.cuda.is_available() else 'CUDA is not available.'}",
	}
	# Add additional metadata from the metadata dictionary (if it exists)
	for key, value in metadata.items():
	mp4[key] = str(value) # Convert values to strings as required by mutagen

	# Save the metadata changes to the file
	mp4.save()

	try:
	os.replace(waveform_video_path, new_video_path)
	waveform_video_path = new_video_path
	except Exception as e:
	print(f"Error renaming file: {e}")

	if waveform_video_path:
	history_results = modules.user_history.save_file(
	profile=profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username') else "default_user" if (profile is None) else profile,
	image=background,
	audio=file.name,
	video=waveform_video_path,
	label=title,
	metadata=metadata,
	progress=gr.Progress(track_tqdm=True)
	)


	if MOVE_TO_CPU:
	MODEL.to('cpu')
	if UNLOAD_MODEL:
	MODEL = None

	# Explicitly delete large tensors or objects
	del output_segments, output, melody, melody_name, melody_extension, metadata, mp4

	# Force garbage collection
	#gc.collect()

	# Synchronize CUDA streams
	torch.cuda.synchronize()

	#torch.cuda.empty_cache()
	torch.cuda.ipc_collect()

	if return_history_json:
	return history_results
	else:
	return waveform_video_path, file.name, seed

	def fix_path(path: str) -> str:
	"""
	Strips all characters preceding '_user_history' in the given path and replaces them with "./".

	If the substring '_user_history' is not found, returns the original path.

	Args:
	path (str): The input file path.

	Returns:
	str: The modified file path.
	"""
	index = path.find("_user_history")
	if index != -1:
	return "./" + path[index:].replace("\\", "/")
	return path
	# Add this wrapper function above the gr.api definitions
	def predict_simple(model: str, text: str, melody_filepath: str = None, duration: int = 10, dimension: int = 2, topk: int = 200, topp: float = 0.01, temperature: float = 1.0, cfg_coef: float = 4.0, background: str = "./assets/background.png", title: str = "UnlimitedMusicGen", settings_font: str = "./assets/arial.ttf", settings_font_color: str = "#c87f05", seed: int = -1, overlap: int = 1, prompt_index: int = -1, include_title: bool = True, include_settings: bool = True, harmony_only: bool = False, profile: str = "Satoshi Nakamoto", segment_length: int = 30, settings_font_size: int = 28, settings_animate_waveform: bool = False, video_orientation: str = "Landscape", return_history_json: bool = False) -> tp.List[tp.Tuple[str, str, str]]:
	"""
	Generate music and video based on the provided parameters and model.

	Args:
	model (str): Model name to use for generation.
	text (str): Prompt describing the music.
	melody_filepath (str, optional): Path to melody conditioning file. Default to None.
	duration (int): Total duration in seconds.
	dimension (int): Audio stacking/concatenation dimension.
	topk (int): Top-k sampling value.
	topp (float): Top-p sampling value.
	temperature (float): Sampling temperature.
	cfg_coef (float): Classifier-free guidance coefficient.
	background (str, optional): Path to background image. Default to "./assets/background.png".
	title (str, optional): Song title. Default to "UnlimitedMusicGen".
	settings_font (str, optional): Path to font file. Default to "./assets/arial.ttf".
	settings_font_color (str, optional): Font color for settings text. Default to "
	seed (int, optional): Random seed. Default to -1.
	overlap (int, optional): Segment overlap in seconds. Default to 1.
	prompt_index (int, optional): Melody segment index. Default to -1.
	include_title (bool, optional): Whether to add title to video. Default to True.
	include_settings (bool, optional): Whether to add settings to video. Default to True.
	harmony_only (bool, optional): Whether to use harmony only. Default to False.
	profile (str, optional): User profile.
	segment_length (int, optional): Segment length in seconds.
	settings_font_size (int, optional): Font size for settings text.
	settings_animate_waveform (bool, optional): Animate waveform in video.
	video_orientation (str, optional): Video orientation
	return_history_json (bool, optional): Return history JSON instead of typical output. Default to False.

	Returns:
	tp.List[tp.Tuple[str, str, str]]: [waveform_video_path, wave_file_path, seed_used]
	"""
	profile_username_to_send = "default_user"

	if not profile:
	profile = modules.user_history.get_profile

	if profile:
	actual_profile_data = profile
	# Unwrap if it's a gr.State object
	if hasattr(profile, 'value') and profile.value is not None:
	actual_profile_data = profile.value

	# Now actual_profile_data is either an OAuthProfile or a string username
	if hasattr(actual_profile_data, 'username') and actual_profile_data.username: # OAuthProfile
	profile_username_to_send = actual_profile_data.username
	elif isinstance(actual_profile_data, str) and actual_profile_data: # string username
	profile_username_to_send = actual_profile_data

	UMG_result = predict(model, text, melody_filepath=melody_filepath, duration=duration, dimension=dimension, topk=topk, topp=topp, temperature=temperature, cfg_coef=cfg_coef, background=background, title=title, settings_font=settings_font, settings_font_color=settings_font_color, seed=seed, overlap=overlap, prompt_index=prompt_index, include_title=include_title, include_settings=include_settings, harmony_only=harmony_only, profile=profile, segment_length=segment_length, settings_font_size=settings_font_size, settings_animate_waveform=settings_animate_waveform, video_orientation=video_orientation, excerpt_duration=3.5, return_history_json=return_history_json)

	# upload to storage and return urls
	folder_name = f"user_uploads/{convert_title_to_filename(profile_username_to_send)}/{convert_title_to_filename(title)}"
	if return_history_json:
	# use modules.storage.upload_files_to_repo to get urls for image_path, video_path, audio_path
	upload_result = upload_files_to_repo(
	files=[UMG_result["video_path"],UMG_result["audio_path"], UMG_result["image_path"]],
	repo_id=HF_REPO_ID, # constants.py value of dataset repo
	folder_name=f"{folder_name}/{UMG_result['metadata']['Seed']}/{time.strftime('%Y%m%d%H%M%S')}",
	create_permalink=False,
	repo_type="dataset"
	)
	if upload_result:
	UMG_result["video_path"] = upload_result[0][1] # Assuming [(response, link) for link in individual_links]
	UMG_result["audio_path"] = upload_result[1][1]
	UMG_result["image_path"] = upload_result[2][1]
	content = UMG_result["video_path"], UMG_result["audio_path"], UMG_result["metadata"]["Seed"]
	UMG_result = content
	else:
	# use modules.storage.upload_files_to_repo to get urls for video_path, audio_path
	upload_result = upload_files_to_repo(
	files=[UMG_result[0],UMG_result[1]],
	repo_id=HF_REPO_ID, # constants.py value of dataset repo
	folder_name=f"{folder_name}/{UMG_result[2]}/{time.strftime('%Y%m%d%H%M%S')}",
	create_permalink=False,
	repo_type="dataset"
	)
	if upload_result:
	UMG_result = upload_result[0][1], upload_result[1][1], UMG_result[2]

	return UMG_result

	gr.set_static_paths(paths=["fonts/","assets/","images/"])
	def ui(**kwargs):
	with gr.Blocks(title="UnlimitedMusicGen", css_paths="style_20250331.css", theme='Surn/beeuty') as demo:
	with gr.Tab("UnlimitedMusicGen"):
	gr.Markdown(
	"""
	# UnlimitedMusicGen
	This is your private demo for [UnlimitedMusicGen](https://github.com/Oncorporation/audiocraft), a simple and controllable model for music generation
	presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)

	Disclaimer: This won't run on CPU only. Clone this App and run on GPU instance!

	Todo: Working on improved Interrupt.
	Theme Available at ["Surn/Beeuty"](https://huggingface.co/spaces/Surn/Beeuty)

	"""
	)
	if IS_SHARED_SPACE and not torch.cuda.is_available():
	gr.Markdown("""
	⚠ This Space doesn't work in this shared UI ⚠

	<a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
	<img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
	to use it privately, or use the <a href="https://huggingface.co/spaces/facebook/MusicGen">public demo</a>
	""")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	with gr.Column():
	text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 32khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out", key="prompt", lines=4)
	autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
	with gr.Column():
	duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True, key="total_duration", step=1)
	model = gr.Radio(["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large", "style"], label="AI Model", value="medium", interactive=True, key="chosen_model")
	with gr.Row():
	submit = gr.Button("Generate", elem_id="btn-generate")
	# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
	_ = gr.Button("Interrupt", elem_id="btn-interrupt").click(fn=interrupt, queue=False)
	with gr.Row():
	with gr.Column():
	radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
	melody_filepath = gr.Audio(value=None,sources=["upload"], type="filepath", label="Melody Condition (optional)", interactive=True, elem_id="melody-input", key="melody_input")
	with gr.Column():
	harmony_only = gr.Radio(label="Use Harmony Only",choices=["No", "Yes"], value="No", interactive=True, info="Remove Drums?", key="use_harmony")
	prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=-1, interactive=True, info="Which 10-30 second segment to condition with, - 1 = align with conditioning melody", key="melody_index")
	with gr.Accordion("Video", open=False):
	with gr.Row():
	background= gr.Image(value="./assets/background.png", sources=["upload"], label="Background", width=768, height=512, type="filepath", interactive=True, key="background_imagepath")
	with gr.Column():
	include_title = gr.Checkbox(label="Add Title", value=True, interactive=True,key="add_title")
	include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True, key="add_settings")
	video_orientation = gr.Radio(label="Video Orientation", choices=["Landscape", "Portrait"], value="Landscape", interactive=True, key="video_orientation")
	with gr.Row():
	title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True, key="song_title")
	settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
	settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True, key="settings_font_color")
	settings_font_size = gr.Slider(minimum=8, maximum=64, value=28, step=1, label="Settings Font Size", interactive=True, key="settings_font_size")
	settings_animate_waveform = gr.Checkbox(label="Animate Waveform", value=False, interactive=True, key="animate_waveform")
	with gr.Accordion("Expert", open=False):
	with gr.Row():
	segment_length = gr.Slider(minimum=10, maximum=30, value=30, step=1,label="Music Generation Segment Length (s)", interactive=True,key="segment_length")
	overlap = gr.Slider(minimum=0, maximum=14, value=1, step=1, label="Segment Overlap", interactive=True)
	dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
	with gr.Row():
	topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True, info="more structured", key="topk")
	topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="more variation, overwrites Top-k if not zero", key="topp")
	temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, step=0.1, interactive=True, info="less than one to follow Melody Condition song closely", key="temperature")
	cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.75, precision=None, step=0.1, interactive=True, info="3.0-4.0, stereo and small need more", key="cfg_coef")
	with gr.Row():
	seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True, key="seed")
	gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
	reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")
	with gr.Column() as c:
	output = gr.Video(label="Generated Music", interactive=False, show_download_button=True, show_share_button=True, autoplay=False)
	wave_file = gr.File(label=".wav file", elem_id="output_wavefile", interactive=True)
	seed_used = gr.Number(label='Seed used', value=-1, interactive=False)

	radio.change(toggle_audio_src, radio, [melody_filepath], queue=False, show_progress=False, api_name="audio_src_change")
	video_orientation.change(load_background_filepath, inputs=[video_orientation], outputs=[background], queue=False, show_progress=False, api_name="video_orientation_change")
	melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title, model,topp, temperature, cfg_coef, segment_length], outputs=[title, prompt_index , model, topp, temperature, cfg_coef, overlap], api_name="melody_filepath_change", queue=False)
	reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed_click")
	autoplay_cb.change(fn=lambda x: gr.update(autoplay=x), inputs=[autoplay_cb], outputs=[output], queue=False, api_name="autoplay_cb_change")
	segment_length.release(fn=load_melody_filepath, queue=False, api_name="segment_length_change", trigger_mode="once", inputs=[melody_filepath, title, model,topp, temperature, cfg_coef, segment_length], outputs=[title, prompt_index , model, topp, temperature, cfg_coef, overlap], show_progress="minimal")

	gr.Examples(
	examples=[
	[
	"4/4 120bpm 320kbps 32khz, An 80s driving pop song with heavy drums and synth pads in the background",
	"./assets/bach.mp3",
	"melody",
	"80s Pop Synth",
	950,
	0.6,
	3.5
	],
	[
	"4/4 120bpm 320kbps 32khz, A cheerful country song with acoustic guitars",
	"./assets/bolero_ravel.mp3",
	"stereo-melody-large",
	"Country Guitar",
	750,
	0.7,
	4.0
	],
	[
	"4/4 120bpm 320kbps 32khz, 90s rock song with electric guitar and heavy drums",
	None,
	"stereo-medium",
	"90s Rock Guitar",
	1150,
	0.7,
	3.75
	],
	[
	"4/4 120bpm 320kbps 32khz, a light and cheery EDM track, with syncopated drums, aery pads, and strong emotions",
	"./assets/bach.mp3",
	"melody-large",
	"EDM my Bach",
	500,
	0.7,
	3.75
	],
	[
	"4/4 320kbps 32khz, lofi slow bpm electro chill with organic samples",
	None,
	"medium",
	"LoFi Chill",
	0,
	0.7,
	4.0
	],
	],
	inputs=[text, melody_filepath, model, title, topp, temperature, cfg_coef],
	outputs=[output]
	)

	with gr.Tab("User History") as history_tab:
	modules.user_history.setup(display_type="video_path")
	modules.user_history.render()

	user_profile = gr.State(None)

	with gr.Row("Versions") as versions_row:
	gr.HTML(value=versions_html(), visible=True, elem_id="versions")

	submit.click(
	modules.user_history.get_profile,
	inputs=[],
	outputs=[user_profile],
	queue=True,
	api_name="submit"
	).then(
	predict,
	inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only, user_profile, segment_length, settings_font_size, settings_animate_waveform, video_orientation],
	outputs=[output, wave_file, seed_used], scroll_to_output=True, show_api=False)

	# Show the interface
	launch_kwargs = {}
	share = kwargs.get('share', False)
	server_port = kwargs.get('server_port', 0)
	server_name = kwargs.get('listen')

	launch_kwargs['server_name'] = server_name

	if server_port > 0:
	launch_kwargs['server_port'] = server_port
	if share:
	launch_kwargs['share'] = share
	launch_kwargs['allowed_paths'] = ["assets", "./assets", "images", "./images", 'e:/TMP']
	launch_kwargs['favicon_path'] = "./assets/favicon.ico"
	launch_kwargs['mcp_server'] = True
	launch_kwargs['ssr_mode'] = False

	gr.api(ping, api_name="ping")
	gr.api(predict_simple)

	demo.queue(max_size=10, api_open=True).launch(**launch_kwargs)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()

	parser.add_argument(
	'--listen',
	type=str,
	default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1',
	help='IP to listen on for connections to Gradio',
	)
	parser.add_argument(
	'--username', type=str, default='', help='Username for authentication'
	)
	parser.add_argument(
	'--password', type=str, default='', help='Password for authentication'
	)
	parser.add_argument(
	'--server_port',
	type=int,
	default=0,
	help='Port to run the server listener on',
	)
	parser.add_argument(
	'--inbrowser', action='store_true', help='Open in browser'
	)
	parser.add_argument(
	'--share', action='store_true', help='Share the gradio UI'
	)
	parser.add_argument(
	'--unload_model', action='store_true', help='Unload the model after every generation to save GPU memory'
	)

	parser.add_argument(
	'--unload_to_cpu', action='store_true', help='Move the model to main RAM after every generation to save GPU memory but reload faster than after full unload (see above)'
	)

	parser.add_argument(
	'--cache', action='store_true', help='Cache models in RAM to quickly switch between them'
	)

	args = parser.parse_args()

	launch_kwargs = {}
	launch_kwargs['listen'] = args.listen

	if args.username and args.password:
	launch_kwargs['auth'] = (args.username, args.password)
	if args.server_port:
	launch_kwargs['server_port'] = args.server_port
	if args.inbrowser:
	launch_kwargs['inbrowser'] = args.inbrowser
	if args.share:
	launch_kwargs['share'] = args.share
	launch_kwargs['favicon_path']= "./assets/favicon.ico"


	UNLOAD_MODEL = args.unload_model
	MOVE_TO_CPU = args.unload_to_cpu
	if args.cache:
	MODELS = {}

	ui(
	unload_to_cpu = MOVE_TO_CPU,
	share=args.share,
	**launch_kwargs,
	)