Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| import sys | |
| import subprocess | |
| from pathlib import Path | |
| from typing import Tuple, Optional | |
| import gradio as gr | |
| import numpy as np | |
| import soundfile as sf | |
| from huggingface_hub import hf_hub_download | |
| SPACE_ROOT = Path(__file__).parent.resolve() | |
| REPO_DIR = SPACE_ROOT / "SonicMasterRepo" | |
| WEIGHTS_REPO = "amaai-lab/SonicMaster" | |
| WEIGHTS_FILE = "model.safetensors" # from the HF model repo | |
| CACHE_DIR = SPACE_ROOT / "weights" | |
| CACHE_DIR.mkdir(parents=True, exist_ok=True) | |
| # ---------- 1) Pull weights from HF Hub ---------- | |
| def get_weights_path() -> Path: | |
| weights_path = hf_hub_download( | |
| repo_id=WEIGHTS_REPO, | |
| filename=WEIGHTS_FILE, | |
| local_dir=CACHE_DIR.as_posix(), | |
| local_dir_use_symlinks=False, | |
| force_download=False, | |
| resume_download=True, | |
| ) | |
| return Path(weights_path) | |
| # ---------- 2) Clone GitHub repo for code (model.py / inference_*.py ) ---------- | |
| def ensure_repo() -> Path: | |
| if not REPO_DIR.exists(): | |
| subprocess.run( | |
| ["git", "clone", "--depth", "1", "https://github.com/AMAAI-Lab/SonicMaster", REPO_DIR.as_posix()], | |
| check=True, | |
| ) | |
| if REPO_DIR.as_posix() not in sys.path: | |
| sys.path.append(REPO_DIR.as_posix()) | |
| return REPO_DIR | |
| # ---------- 3) Examples: use only *.wav from samples/inputs ---------- | |
| def build_examples(): | |
| """ | |
| Discover up to 10 .wav files from: | |
| SonicMasterRepo/samples/inputs | |
| and pair them with prompts for gr.Examples. | |
| """ | |
| repo = ensure_repo() | |
| wav_dir = repo / "samples" / "inputs" | |
| wav_paths = sorted(p for p in wav_dir.glob("*.wav") if p.is_file()) | |
| prompts = [ | |
| "Increase the clarity of this song by emphasizing treble frequencies.", | |
| "Make this song sound more boomy by amplifying the low end bass frequencies.", | |
| "Can you make this sound louder, please?", | |
| "Make the audio smoother and less distorted.", | |
| "Improve the balance in this song.", | |
| "Disentangle the left and right channels to give this song a stereo feeling.", | |
| "Correct the unnatural frequency emphasis. Reduce the roominess or echo.", | |
| "Raise the level of the vocals, please.", | |
| "Increase the clarity of this song by emphasizing treble frequencies.", | |
| "Please, dereverb this audio.", | |
| ] | |
| examples = [] | |
| for i, p in enumerate(wav_paths[:10]): | |
| prompt = prompts[i] if i < len(prompts) else prompts[-1] | |
| examples.append([p.as_posix(), prompt]) | |
| # Fallback: if no wavs found, provide an empty list (Gradio handles it) | |
| return examples | |
| # ---------- 4) I/O helpers ---------- | |
| def save_temp_wav(wav: np.ndarray, sr: int, path: Path): | |
| # Ensure (samples, channels) for soundfile | |
| if wav.ndim == 2 and wav.shape[0] < wav.shape[1]: | |
| # (channels, samples) -> (samples, channels) | |
| data = wav.T | |
| else: | |
| data = wav | |
| sf.write(path.as_posix(), data, sr) | |
| def read_audio(path: str) -> Tuple[np.ndarray, int]: | |
| wav, sr = sf.read(path, always_2d=False) | |
| if wav.dtype == np.float64: | |
| wav = wav.astype(np.float32) | |
| return wav, sr | |
| def run_sonicmaster_cli( | |
| input_wav_path: Path, | |
| prompt: str, | |
| out_path: Path, | |
| _logs: list, # kept for compatibility, but not shown in UI | |
| progress: Optional[gr.Progress] = None | |
| ) -> bool: | |
| """ | |
| Uses the current Python interpreter and tries a few script names/flags. | |
| """ | |
| import sys, shutil | |
| if progress: progress(0.15, desc="Loading weights & repo") | |
| ckpt = get_weights_path() | |
| repo = ensure_repo() | |
| # Use the exact Python interpreter running this process | |
| py = sys.executable or shutil.which("python3") or shutil.which("python") or "python3" | |
| # Prefer the scripts we know accept --ckpt/--input/--prompt/--output | |
| script_candidates = [ | |
| repo / "infer_single.py", # if you kept your own name | |
| ] | |
| CANDIDATE_CMDS = [] | |
| for script in script_candidates: | |
| if script.exists(): | |
| CANDIDATE_CMDS.append([ | |
| py, script.as_posix(), | |
| "--ckpt", ckpt.as_posix(), | |
| "--input", input_wav_path.as_posix(), | |
| "--prompt", prompt, | |
| "--output", out_path.as_posix(), | |
| ]) | |
| # As a last resort, try alternative flag names (if someone changed the CLI) | |
| for script in script_candidates: | |
| if script.exists(): | |
| CANDIDATE_CMDS.append([ | |
| py, script.as_posix(), | |
| "--weights", ckpt.as_posix(), | |
| "--input", input_wav_path.as_posix(), | |
| "--text", prompt, | |
| "--out", out_path.as_posix(), | |
| ]) | |
| if not CANDIDATE_CMDS: | |
| return False | |
| for idx, cmd in enumerate(CANDIDATE_CMDS, start=1): | |
| try: | |
| if progress: progress(0.35 + 0.05*idx, desc=f"Running inference (try {idx})") | |
| res = subprocess.run(cmd, capture_output=True, text=True, check=True) | |
| if out_path.exists() and out_path.stat().st_size > 0: | |
| if progress: progress(0.9, desc="Post-processing output") | |
| return True | |
| except subprocess.CalledProcessError: | |
| continue | |
| except Exception: | |
| continue | |
| return False | |
| def enhance_audio_ui( | |
| audio_path: str, | |
| prompt: str, | |
| progress=gr.Progress(track_tqdm=True) | |
| ) -> Tuple[int, np.ndarray]: | |
| """ | |
| Gradio callback: accepts a file path, a prompt, and returns enhanced audio. | |
| """ | |
| if progress: progress(0.0, desc="Validating input") | |
| if not audio_path or not prompt: | |
| raise gr.Error("Please provide audio and a text prompt.") | |
| # Standardize input -> temp wav | |
| wav, sr = read_audio(audio_path) | |
| if progress: progress(0.15, desc="Preparing audio") | |
| tmp_in = SPACE_ROOT / "tmp_in.wav" | |
| tmp_out = SPACE_ROOT / "tmp_out.wav" | |
| if tmp_out.exists(): | |
| try: | |
| tmp_out.unlink() | |
| except Exception: | |
| pass | |
| save_temp_wav(wav, sr, tmp_in) | |
| # Run model | |
| if progress: progress(0.3, desc="Starting inference") | |
| ok = run_sonicmaster_cli(tmp_in, prompt, tmp_out, _logs=[], progress=progress) | |
| # Return output (or echo input) | |
| if ok and tmp_out.exists() and tmp_out.stat().st_size > 0: | |
| out_wav, out_sr = read_audio(tmp_out.as_posix()) | |
| if progress: progress(1.0, desc="Done") | |
| return (out_sr, out_wav) | |
| else: | |
| if progress: progress(1.0, desc="No output produced") | |
| # Return original audio if model didn't produce output | |
| return (sr, wav) | |
| # ---------- 6) Gradio UI ---------- | |
| with gr.Blocks(title="SonicMaster β Text-Guided Restoration & Mastering", fill_height=True) as demo: | |
| gr.Markdown("## π§ SonicMaster\nUpload or choose an example (from repo: `samples/inputs/*.wav`), write a text prompt (e.g., *reduce reverb*, *clean distortion*), then click **Enhance**.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| in_audio = gr.Audio(label="Input Audio (upload or use examples)", type="filepath") | |
| prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., reduce reverb and enhance clarity") | |
| run_btn = gr.Button("π Enhance", variant="primary") | |
| # Use wavs from SonicMasterRepo/samples/inputs | |
| gr.Examples( | |
| examples=build_examples(), | |
| inputs=[in_audio, prompt], | |
| label="Examples (repo: samples/inputs/*.wav)" | |
| ) | |
| with gr.Column(scale=1): | |
| out_audio = gr.Audio(label="Enhanced Audio (output)") | |
| # Per-event concurrency (use 1 unless you know your VRAM/CPU can handle more) | |
| run_btn.click( | |
| fn=enhance_audio_ui, | |
| inputs=[in_audio, prompt], | |
| outputs=[out_audio], | |
| concurrency_limit=1, | |
| ) | |
| # Warm up cache & repo, then launch | |
| _ = get_weights_path() | |
| _ = ensure_repo() | |
| demo.queue(max_size=16).launch() | |
| # Or, a global default for all events: | |
| # demo.queue(max_size=16, default_concurrency_limit=1).launch() | |