Spaces:
Runtime error
Runtime error
renhang
commited on
Commit
·
1e7fc7e
1
Parent(s):
3db0011
update
Browse files- app.py +28 -6
- jam_infer.yaml +2 -2
app.py
CHANGED
|
@@ -7,12 +7,14 @@ import requests
|
|
| 7 |
import subprocess
|
| 8 |
from pathlib import Path
|
| 9 |
import torchaudio
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
from model import Jamify
|
| 12 |
from utils import json_to_text, text_to_json, convert_text_time_to_beats, convert_text_beats_to_time, convert_text_beats_to_time_with_regrouping, text_to_words, beats_to_text_with_regrouping, round_to_quarter_beats
|
| 13 |
|
| 14 |
def crop_audio_to_30_seconds(audio_path):
|
| 15 |
-
"""Crop audio to first 30 seconds and return path to temporary cropped file"""
|
| 16 |
if not audio_path or not os.path.exists(audio_path):
|
| 17 |
return None
|
| 18 |
|
|
@@ -29,11 +31,20 @@ def crop_audio_to_30_seconds(audio_path):
|
|
| 29 |
else:
|
| 30 |
cropped_waveform = waveform
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
# Save to temporary file
|
| 33 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
| 34 |
temp_path = temp_file.name
|
| 35 |
|
| 36 |
-
torchaudio.save(temp_path,
|
| 37 |
return temp_path
|
| 38 |
|
| 39 |
except Exception as e:
|
|
@@ -196,7 +207,18 @@ default_audio_display = crop_audio_to_30_seconds(default_audio) if default_audio
|
|
| 196 |
# Gradio interface
|
| 197 |
with gr.Blocks() as demo:
|
| 198 |
gr.Markdown("# Jamify: Music Generation from Lyrics and Style")
|
| 199 |
-
gr.Markdown("Provide your lyrics,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
# State to track selected example (-1 means "Make Your Own" is selected, 0 is first example)
|
| 202 |
selected_example = gr.State(0 if examples else -1)
|
|
@@ -211,8 +233,8 @@ with gr.Blocks() as demo:
|
|
| 211 |
with gr.Row():
|
| 212 |
example_buttons = []
|
| 213 |
for i, example in enumerate(examples):
|
| 214 |
-
# Use consistent button width
|
| 215 |
-
button_text = example['id'][:
|
| 216 |
# First button starts as primary (selected), others as secondary
|
| 217 |
initial_variant = "primary" if i == 0 else "secondary"
|
| 218 |
button = gr.Button(
|
|
|
|
| 7 |
import subprocess
|
| 8 |
from pathlib import Path
|
| 9 |
import torchaudio
|
| 10 |
+
import torch
|
| 11 |
+
import pyloudnorm as pyln
|
| 12 |
|
| 13 |
+
from model import Jamify, normalize_audio
|
| 14 |
from utils import json_to_text, text_to_json, convert_text_time_to_beats, convert_text_beats_to_time, convert_text_beats_to_time_with_regrouping, text_to_words, beats_to_text_with_regrouping, round_to_quarter_beats
|
| 15 |
|
| 16 |
def crop_audio_to_30_seconds(audio_path):
|
| 17 |
+
"""Crop audio to first 30 seconds, normalize, and return path to temporary cropped file"""
|
| 18 |
if not audio_path or not os.path.exists(audio_path):
|
| 19 |
return None
|
| 20 |
|
|
|
|
| 31 |
else:
|
| 32 |
cropped_waveform = waveform
|
| 33 |
|
| 34 |
+
# Resample to 44100 Hz if needed (to match prediction pipeline)
|
| 35 |
+
if sample_rate != 44100:
|
| 36 |
+
resampler = torchaudio.transforms.Resample(sample_rate, 44100)
|
| 37 |
+
cropped_waveform = resampler(cropped_waveform)
|
| 38 |
+
sample_rate = 44100
|
| 39 |
+
|
| 40 |
+
# Apply the same normalization as the prediction pipeline
|
| 41 |
+
normalized_waveform = normalize_audio(cropped_waveform)
|
| 42 |
+
|
| 43 |
# Save to temporary file
|
| 44 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
| 45 |
temp_path = temp_file.name
|
| 46 |
|
| 47 |
+
torchaudio.save(temp_path, normalized_waveform, sample_rate)
|
| 48 |
return temp_path
|
| 49 |
|
| 50 |
except Exception as e:
|
|
|
|
| 207 |
# Gradio interface
|
| 208 |
with gr.Blocks() as demo:
|
| 209 |
gr.Markdown("# Jamify: Music Generation from Lyrics and Style")
|
| 210 |
+
gr.Markdown("Provide your lyrics, an audio style reference, and a desired duration to generate a song.")
|
| 211 |
+
|
| 212 |
+
# Helpful reminder for users
|
| 213 |
+
gr.Markdown("""
|
| 214 |
+
💡 **Demo Tip**: Don't start from scratch! Use the sample examples below as templates:
|
| 215 |
+
- Click any sample to load its lyrics and audio style
|
| 216 |
+
- **Edit the lyrics**: Change words, modify timing, or adjust the structure
|
| 217 |
+
- **Experiment with timing**: Try different word durations or beats
|
| 218 |
+
- **Mix and match**: Use lyrics from one example with audio style from another
|
| 219 |
+
|
| 220 |
+
This approach is much easier than creating everything from zero!
|
| 221 |
+
""")
|
| 222 |
|
| 223 |
# State to track selected example (-1 means "Make Your Own" is selected, 0 is first example)
|
| 224 |
selected_example = gr.State(0 if examples else -1)
|
|
|
|
| 233 |
with gr.Row():
|
| 234 |
example_buttons = []
|
| 235 |
for i, example in enumerate(examples):
|
| 236 |
+
# Use consistent button width with 10 character limit
|
| 237 |
+
button_text = example['id'][:10] if len(example['id']) <= 10 else example['id'][:9] + "…"
|
| 238 |
# First button starts as primary (selected), others as secondary
|
| 239 |
initial_variant = "primary" if i == 0 else "secondary"
|
| 240 |
button = gr.Button(
|
jam_infer.yaml
CHANGED
|
@@ -23,10 +23,10 @@ evaluation:
|
|
| 23 |
cfg_range:
|
| 24 |
- 0.05
|
| 25 |
- 1
|
| 26 |
-
fix_dual_cfg: true
|
| 27 |
dual_cfg:
|
| 28 |
- 4.7
|
| 29 |
-
- 2.
|
| 30 |
steps: 50
|
| 31 |
|
| 32 |
model:
|
|
|
|
| 23 |
cfg_range:
|
| 24 |
- 0.05
|
| 25 |
- 1
|
| 26 |
+
# fix_dual_cfg: true
|
| 27 |
dual_cfg:
|
| 28 |
- 4.7
|
| 29 |
+
- 2.5
|
| 30 |
steps: 50
|
| 31 |
|
| 32 |
model:
|