root
commited on
Commit
·
3feaaf4
1
Parent(s):
173048d
ss
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ import numpy as np
|
|
| 6 |
import re
|
| 7 |
import pronouncing # Add this to requirements.txt for syllable counting
|
| 8 |
import functools # Add this for lru_cache functionality
|
|
|
|
| 9 |
from transformers import (
|
| 10 |
AutoModelForAudioClassification,
|
| 11 |
AutoFeatureExtractor,
|
|
@@ -2298,6 +2299,109 @@ Improved lyrics with fixed rhythm:
|
|
| 2298 |
|
| 2299 |
return lyrics
|
| 2300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2301 |
def process_audio(audio_file):
|
| 2302 |
"""Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
|
| 2303 |
if audio_file is None:
|
|
@@ -2524,7 +2628,17 @@ def process_audio(audio_file):
|
|
| 2524 |
except Exception as e:
|
| 2525 |
error_msg = f"Error processing audio: {str(e)}"
|
| 2526 |
print(error_msg)
|
| 2527 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2528 |
|
| 2529 |
# Create enhanced Gradio interface with tabs for better organization
|
| 2530 |
with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
@@ -2569,6 +2683,290 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
| 2569 |
with gr.TabItem("Rhythm Analysis"):
|
| 2570 |
rhythm_analysis_output = gr.Textbox(label="Syllable-Beat Alignment Analysis", lines=16)
|
| 2571 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2572 |
with gr.TabItem("Syllable Analysis"):
|
| 2573 |
syllable_analysis_output = gr.Textbox(label="Detailed Syllable Analysis", lines=16)
|
| 2574 |
prompt_template_output = gr.Textbox(label="Prompt Template", lines=16)
|
|
@@ -2576,7 +2974,12 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
| 2576 |
# Processing function with better handling of results
|
| 2577 |
def display_results(audio_file):
|
| 2578 |
if audio_file is None:
|
| 2579 |
-
return "Please upload an audio file.", "No emotion analysis available.", "No audio classification available.", "No lyrics generated.", "No rhythm analysis available.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2580 |
|
| 2581 |
try:
|
| 2582 |
# Process audio and get results
|
|
@@ -2584,9 +2987,19 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
| 2584 |
|
| 2585 |
# Check if we got an error message instead of results
|
| 2586 |
if isinstance(results, str) and "Error" in results:
|
| 2587 |
-
return results, "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2588 |
elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
|
| 2589 |
-
return results[0], "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2590 |
|
| 2591 |
# For backwards compatibility, handle both dictionary and tuple returns
|
| 2592 |
if isinstance(results, dict):
|
|
@@ -2673,19 +3086,87 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
| 2673 |
else:
|
| 2674 |
ast_text = "No valid audio classification results available."
|
| 2675 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2676 |
# Return all results including new fields
|
| 2677 |
-
return genre_results, emotion_text, ast_text, clean_lyrics, rhythm_analysis, syllable_analysis, prompt_template
|
| 2678 |
|
| 2679 |
except Exception as e:
|
| 2680 |
error_msg = f"Error: {str(e)}"
|
| 2681 |
print(error_msg)
|
| 2682 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2683 |
|
| 2684 |
# Connect the button to the display function with updated outputs
|
| 2685 |
submit_btn.click(
|
| 2686 |
fn=display_results,
|
| 2687 |
inputs=[audio_input],
|
| 2688 |
-
outputs=[genre_output, emotion_output, ast_output, lyrics_output, rhythm_analysis_output, syllable_analysis_output, prompt_template_output]
|
| 2689 |
)
|
| 2690 |
|
| 2691 |
# Enhanced explanation of how the system works
|
|
@@ -2722,6 +3203,12 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
| 2722 |
|
| 2723 |
8. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
|
| 2724 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2725 |
This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
|
| 2726 |
""")
|
| 2727 |
|
|
|
|
| 6 |
import re
|
| 7 |
import pronouncing # Add this to requirements.txt for syllable counting
|
| 8 |
import functools # Add this for lru_cache functionality
|
| 9 |
+
import json # Add this for JSON serialization
|
| 10 |
from transformers import (
|
| 11 |
AutoModelForAudioClassification,
|
| 12 |
AutoFeatureExtractor,
|
|
|
|
| 2299 |
|
| 2300 |
return lyrics
|
| 2301 |
|
| 2302 |
+
def prepare_beat_synced_lyrics(audio_data, lyrics, beats_info):
|
| 2303 |
+
"""
|
| 2304 |
+
Prepare data for the beat-synced lyrics viewer
|
| 2305 |
+
|
| 2306 |
+
Parameters:
|
| 2307 |
+
audio_data: Dictionary containing audio features
|
| 2308 |
+
lyrics: String containing generated lyrics
|
| 2309 |
+
beats_info: Dictionary containing beat analysis data
|
| 2310 |
+
|
| 2311 |
+
Returns:
|
| 2312 |
+
Dictionary containing data for the beat-synced lyrics viewer
|
| 2313 |
+
"""
|
| 2314 |
+
# Extract necessary data for visualization
|
| 2315 |
+
beat_times = beats_info.get("beat_times", [])
|
| 2316 |
+
beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
|
| 2317 |
+
tempo = beats_info.get("tempo", 120)
|
| 2318 |
+
|
| 2319 |
+
# Clean lyrics - remove section markers and annotations
|
| 2320 |
+
clean_lyrics = lyrics
|
| 2321 |
+
if isinstance(lyrics, str):
|
| 2322 |
+
# Remove "[Verse]", "[Chorus]", etc.
|
| 2323 |
+
clean_lyrics = re.sub(r'\[\w+\]', '', lyrics)
|
| 2324 |
+
# Remove any rhythm analysis notes
|
| 2325 |
+
if "[Note:" in clean_lyrics:
|
| 2326 |
+
clean_lyrics = clean_lyrics.split("[Note:")[0].strip()
|
| 2327 |
+
# Remove any rhythm analysis section
|
| 2328 |
+
if "[RHYTHM_ANALYSIS_SECTION]" in clean_lyrics:
|
| 2329 |
+
clean_lyrics = clean_lyrics.split("[RHYTHM_ANALYSIS_SECTION]")[0].strip()
|
| 2330 |
+
|
| 2331 |
+
# Split into lines
|
| 2332 |
+
lines = [line.strip() for line in clean_lyrics.split('\n') if line.strip()]
|
| 2333 |
+
|
| 2334 |
+
# Split each line into words and estimate timing
|
| 2335 |
+
lyrics_data = []
|
| 2336 |
+
|
| 2337 |
+
# Estimate start time for lyrics - allow a small intro period
|
| 2338 |
+
lyrics_start_time = beat_times[0] if len(beat_times) > 0 else 0
|
| 2339 |
+
|
| 2340 |
+
# Simple approach: distribute lines evenly across available beats
|
| 2341 |
+
if len(lines) > 0 and len(beat_times) > 0:
|
| 2342 |
+
beats_per_line = max(1, len(beat_times) // len(lines))
|
| 2343 |
+
|
| 2344 |
+
for i, line in enumerate(lines):
|
| 2345 |
+
# Determine beat range for this line
|
| 2346 |
+
start_beat_idx = min(i * beats_per_line, len(beat_times) - 1)
|
| 2347 |
+
end_beat_idx = min(start_beat_idx + beats_per_line, len(beat_times) - 1)
|
| 2348 |
+
|
| 2349 |
+
# Get time range
|
| 2350 |
+
line_start_time = beat_times[start_beat_idx]
|
| 2351 |
+
line_end_time = beat_times[end_beat_idx] if end_beat_idx < len(beat_times) else audio_data["duration"]
|
| 2352 |
+
|
| 2353 |
+
# Split line into words
|
| 2354 |
+
words = re.findall(r'\b\w+\b|-|\s+|[^\w\s]', line)
|
| 2355 |
+
filtered_words = [w for w in words if w.strip()]
|
| 2356 |
+
|
| 2357 |
+
if filtered_words:
|
| 2358 |
+
# Distribute words across beats for this line
|
| 2359 |
+
word_data = []
|
| 2360 |
+
|
| 2361 |
+
# Get beat times for this line
|
| 2362 |
+
line_beat_times = beat_times[start_beat_idx:end_beat_idx+1]
|
| 2363 |
+
if len(line_beat_times) < 2:
|
| 2364 |
+
line_beat_times = [line_start_time, line_end_time]
|
| 2365 |
+
|
| 2366 |
+
# Distribute words evenly if we have enough beats
|
| 2367 |
+
if len(filtered_words) <= len(line_beat_times):
|
| 2368 |
+
for j, word in enumerate(filtered_words):
|
| 2369 |
+
beat_idx = min(j, len(line_beat_times) - 1)
|
| 2370 |
+
word_time = line_beat_times[beat_idx]
|
| 2371 |
+
word_data.append({
|
| 2372 |
+
"text": word,
|
| 2373 |
+
"time": word_time,
|
| 2374 |
+
"is_strong": j == 0 or word[0].isupper() # Simple heuristic for strong beats
|
| 2375 |
+
})
|
| 2376 |
+
else:
|
| 2377 |
+
# More words than beats, distribute evenly
|
| 2378 |
+
word_duration = (line_end_time - line_start_time) / len(filtered_words)
|
| 2379 |
+
for j, word in enumerate(filtered_words):
|
| 2380 |
+
word_time = line_start_time + j * word_duration
|
| 2381 |
+
word_data.append({
|
| 2382 |
+
"text": word,
|
| 2383 |
+
"time": word_time,
|
| 2384 |
+
"is_strong": j == 0 or word[0].isupper()
|
| 2385 |
+
})
|
| 2386 |
+
|
| 2387 |
+
lyrics_data.append({
|
| 2388 |
+
"line": line,
|
| 2389 |
+
"start_time": line_start_time,
|
| 2390 |
+
"end_time": line_end_time,
|
| 2391 |
+
"words": word_data
|
| 2392 |
+
})
|
| 2393 |
+
|
| 2394 |
+
# Create visualization data
|
| 2395 |
+
visualization_data = {
|
| 2396 |
+
"duration": audio_data["duration"],
|
| 2397 |
+
"tempo": tempo,
|
| 2398 |
+
"beat_times": beat_times,
|
| 2399 |
+
"beat_strengths": beat_strengths,
|
| 2400 |
+
"lyrics_data": lyrics_data
|
| 2401 |
+
}
|
| 2402 |
+
|
| 2403 |
+
return visualization_data
|
| 2404 |
+
|
| 2405 |
def process_audio(audio_file):
|
| 2406 |
"""Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
|
| 2407 |
if audio_file is None:
|
|
|
|
| 2628 |
except Exception as e:
|
| 2629 |
error_msg = f"Error processing audio: {str(e)}"
|
| 2630 |
print(error_msg)
|
| 2631 |
+
|
| 2632 |
+
# Use a raw string literal to avoid f-string backslash issues
|
| 2633 |
+
error_html = """<script>
|
| 2634 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
| 2635 |
+
document.getElementById('loading-message').style.display = 'block';
|
| 2636 |
+
document.getElementById('loading-message').innerText = 'Error processing audio';
|
| 2637 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
| 2638 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
| 2639 |
+
</script>"""
|
| 2640 |
+
|
| 2641 |
+
return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", audio_file, error_html, "No syllable analysis available", "No prompt template available"
|
| 2642 |
|
| 2643 |
# Create enhanced Gradio interface with tabs for better organization
|
| 2644 |
with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
|
|
| 2683 |
with gr.TabItem("Rhythm Analysis"):
|
| 2684 |
rhythm_analysis_output = gr.Textbox(label="Syllable-Beat Alignment Analysis", lines=16)
|
| 2685 |
|
| 2686 |
+
with gr.TabItem("Beat-Synced Lyrics"):
|
| 2687 |
+
# Create a container for the beat-synced lyrics viewer
|
| 2688 |
+
synced_audio_output = gr.Audio(label="Playback with Synced Lyrics", type="filepath")
|
| 2689 |
+
|
| 2690 |
+
# Create a custom JavaScript component for the beat-synced lyrics viewer
|
| 2691 |
+
lyrics_viewer_html = gr.HTML(
|
| 2692 |
+
"""
|
| 2693 |
+
<div id="beat-sync-container" style="width:100%; height:400px; position:relative;">
|
| 2694 |
+
<div id="loading-message">Please analyze audio to view beat-synced lyrics</div>
|
| 2695 |
+
<div id="beat-sync-timeline" style="display:none; width:100%; height:80px; background:#f0f0f0; position:relative; overflow:hidden; margin-bottom:10px;">
|
| 2696 |
+
<div id="playhead" style="position:absolute; width:2px; height:100%; background:red; top:0; left:0; z-index:10;"></div>
|
| 2697 |
+
<div id="beat-markers" style="position:absolute; width:100%; height:100%; top:0; left:0;"></div>
|
| 2698 |
+
</div>
|
| 2699 |
+
<div id="lyrics-display" style="display:none; width:100%; height:300px; overflow-y:auto; font-size:16px; line-height:1.6;"></div>
|
| 2700 |
+
</div>
|
| 2701 |
+
|
| 2702 |
+
<script>
|
| 2703 |
+
let beatSyncData = null;
|
| 2704 |
+
let isPlaying = false;
|
| 2705 |
+
let audioElement = null;
|
| 2706 |
+
let playheadInterval = null;
|
| 2707 |
+
let lastHighlightedWord = -1;
|
| 2708 |
+
|
| 2709 |
+
function initBeatSyncViewer(data) {
|
| 2710 |
+
beatSyncData = data;
|
| 2711 |
+
const container = document.getElementById('beat-sync-container');
|
| 2712 |
+
const timeline = document.getElementById('beat-sync-timeline');
|
| 2713 |
+
const beatMarkers = document.getElementById('beat-markers');
|
| 2714 |
+
const lyricsDisplay = document.getElementById('lyrics-display');
|
| 2715 |
+
const loadingMessage = document.getElementById('loading-message');
|
| 2716 |
+
|
| 2717 |
+
// Clear previous content
|
| 2718 |
+
beatMarkers.innerHTML = '';
|
| 2719 |
+
lyricsDisplay.innerHTML = '';
|
| 2720 |
+
|
| 2721 |
+
// Show the viewer elements, hide loading message
|
| 2722 |
+
loadingMessage.style.display = 'none';
|
| 2723 |
+
timeline.style.display = 'block';
|
| 2724 |
+
lyricsDisplay.style.display = 'block';
|
| 2725 |
+
|
| 2726 |
+
// Create beat markers
|
| 2727 |
+
const duration = data.duration;
|
| 2728 |
+
const beatTimes = data.beat_times;
|
| 2729 |
+
const beatStrengths = data.beat_strengths;
|
| 2730 |
+
|
| 2731 |
+
if (beatTimes && beatTimes.length > 0) {
|
| 2732 |
+
for (let i = 0; i < beatTimes.length; i++) {
|
| 2733 |
+
const beatTime = beatTimes[i];
|
| 2734 |
+
const beatStrength = beatStrengths && beatStrengths[i] ? beatStrengths[i] : 1.0;
|
| 2735 |
+
const position = (beatTime / duration) * 100;
|
| 2736 |
+
|
| 2737 |
+
// Create marker with height based on beat strength
|
| 2738 |
+
const marker = document.createElement('div');
|
| 2739 |
+
const height = 30 + (beatStrength * 50); // Scale between 30-80px
|
| 2740 |
+
|
| 2741 |
+
marker.className = 'beat-marker';
|
| 2742 |
+
marker.style.position = 'absolute';
|
| 2743 |
+
marker.style.left = `${position}%`;
|
| 2744 |
+
marker.style.top = `${(80 - height) / 2}px`;
|
| 2745 |
+
marker.style.width = '2px';
|
| 2746 |
+
marker.style.height = `${height}px`;
|
| 2747 |
+
marker.style.background = beatStrength > 0.7 ? '#2d7dd2' : '#97c6e3';
|
| 2748 |
+
marker.setAttribute('data-time', beatTime);
|
| 2749 |
+
|
| 2750 |
+
beatMarkers.appendChild(marker);
|
| 2751 |
+
}
|
| 2752 |
+
}
|
| 2753 |
+
|
| 2754 |
+
// Create lyrics display
|
| 2755 |
+
if (data.lyrics_data && data.lyrics_data.length > 0) {
|
| 2756 |
+
for (let i = 0; i < data.lyrics_data.length; i++) {
|
| 2757 |
+
const line = data.lyrics_data[i];
|
| 2758 |
+
const lineElement = document.createElement('div');
|
| 2759 |
+
lineElement.className = 'lyric-line';
|
| 2760 |
+
lineElement.style.marginBottom = '15px';
|
| 2761 |
+
|
| 2762 |
+
// Create word elements for the line
|
| 2763 |
+
line.words.forEach((word, j) => {
|
| 2764 |
+
const wordSpan = document.createElement('span');
|
| 2765 |
+
wordSpan.innerText = word.text + ' ';
|
| 2766 |
+
wordSpan.className = 'lyric-word';
|
| 2767 |
+
wordSpan.style.display = 'inline-block';
|
| 2768 |
+
wordSpan.style.transition = 'color 0.1s, transform 0.1s';
|
| 2769 |
+
wordSpan.setAttribute('data-time', word.time);
|
| 2770 |
+
wordSpan.setAttribute('data-word-index', j);
|
| 2771 |
+
wordSpan.setAttribute('data-line-index', i);
|
| 2772 |
+
|
| 2773 |
+
if (word.is_strong) {
|
| 2774 |
+
wordSpan.style.fontWeight = 'bold';
|
| 2775 |
+
}
|
| 2776 |
+
|
| 2777 |
+
lineElement.appendChild(wordSpan);
|
| 2778 |
+
});
|
| 2779 |
+
|
| 2780 |
+
lyricsDisplay.appendChild(lineElement);
|
| 2781 |
+
}
|
| 2782 |
+
} else {
|
| 2783 |
+
lyricsDisplay.innerHTML = '<p>No lyrics data available or could not align lyrics with beats.</p>';
|
| 2784 |
+
}
|
| 2785 |
+
|
| 2786 |
+
// Add timeline click/drag handler for scrubbing
|
| 2787 |
+
timeline.addEventListener('click', function(e) {
|
| 2788 |
+
if (!audioElement) return;
|
| 2789 |
+
|
| 2790 |
+
const rect = timeline.getBoundingClientRect();
|
| 2791 |
+
const clickPosition = (e.clientX - rect.left) / rect.width;
|
| 2792 |
+
const newTime = clickPosition * duration;
|
| 2793 |
+
|
| 2794 |
+
// Set audio to new position
|
| 2795 |
+
audioElement.currentTime = newTime;
|
| 2796 |
+
|
| 2797 |
+
// Update playhead and lyrics
|
| 2798 |
+
updatePlayhead(newTime);
|
| 2799 |
+
highlightLyricsAtTime(newTime);
|
| 2800 |
+
});
|
| 2801 |
+
}
|
| 2802 |
+
|
| 2803 |
+
function connectAudio(audioSelector) {
|
| 2804 |
+
// Find the audio element from Gradio's component
|
| 2805 |
+
const audioContainer = document.querySelector(audioSelector);
|
| 2806 |
+
if (!audioContainer) return;
|
| 2807 |
+
|
| 2808 |
+
audioElement = audioContainer.querySelector('audio');
|
| 2809 |
+
if (!audioElement) return;
|
| 2810 |
+
|
| 2811 |
+
// Add event listeners to the audio element
|
| 2812 |
+
audioElement.addEventListener('play', startPlayheadMovement);
|
| 2813 |
+
audioElement.addEventListener('pause', stopPlayheadMovement);
|
| 2814 |
+
audioElement.addEventListener('ended', stopPlayheadMovement);
|
| 2815 |
+
audioElement.addEventListener('seeked', function() {
|
| 2816 |
+
updatePlayhead(audioElement.currentTime);
|
| 2817 |
+
highlightLyricsAtTime(audioElement.currentTime);
|
| 2818 |
+
});
|
| 2819 |
+
}
|
| 2820 |
+
|
| 2821 |
+
function startPlayheadMovement() {
|
| 2822 |
+
isPlaying = true;
|
| 2823 |
+
if (playheadInterval) clearInterval(playheadInterval);
|
| 2824 |
+
|
| 2825 |
+
playheadInterval = setInterval(() => {
|
| 2826 |
+
if (!audioElement || !isPlaying) return;
|
| 2827 |
+
updatePlayhead(audioElement.currentTime);
|
| 2828 |
+
highlightLyricsAtTime(audioElement.currentTime);
|
| 2829 |
+
}, 50); // Update every 50ms
|
| 2830 |
+
}
|
| 2831 |
+
|
| 2832 |
+
function stopPlayheadMovement() {
|
| 2833 |
+
isPlaying = false;
|
| 2834 |
+
if (playheadInterval) {
|
| 2835 |
+
clearInterval(playheadInterval);
|
| 2836 |
+
playheadInterval = null;
|
| 2837 |
+
}
|
| 2838 |
+
}
|
| 2839 |
+
|
| 2840 |
+
function updatePlayhead(currentTime) {
|
| 2841 |
+
if (!beatSyncData) return;
|
| 2842 |
+
|
| 2843 |
+
const playhead = document.getElementById('playhead');
|
| 2844 |
+
const position = (currentTime / beatSyncData.duration) * 100;
|
| 2845 |
+
playhead.style.left = `${position}%`;
|
| 2846 |
+
}
|
| 2847 |
+
|
| 2848 |
+
function highlightLyricsAtTime(currentTime) {
|
| 2849 |
+
if (!beatSyncData || !beatSyncData.lyrics_data) return;
|
| 2850 |
+
|
| 2851 |
+
// Reset all word styling
|
| 2852 |
+
const words = document.querySelectorAll('.lyric-word');
|
| 2853 |
+
words.forEach(word => {
|
| 2854 |
+
word.style.color = 'black';
|
| 2855 |
+
word.style.transform = 'scale(1)';
|
| 2856 |
+
});
|
| 2857 |
+
|
| 2858 |
+
// Find the current word to highlight
|
| 2859 |
+
let currentWordElement = null;
|
| 2860 |
+
let bestTimeDiff = Infinity;
|
| 2861 |
+
|
| 2862 |
+
words.forEach(word => {
|
| 2863 |
+
const wordTime = parseFloat(word.getAttribute('data-time'));
|
| 2864 |
+
|
| 2865 |
+
// Highlight words that have already been passed or are coming up soon
|
| 2866 |
+
if (wordTime <= currentTime + 0.2) {
|
| 2867 |
+
const timeDiff = Math.abs(wordTime - currentTime);
|
| 2868 |
+
|
| 2869 |
+
// Mark past words as "read"
|
| 2870 |
+
if (wordTime < currentTime - 0.5) {
|
| 2871 |
+
word.style.color = '#666666';
|
| 2872 |
+
}
|
| 2873 |
+
|
| 2874 |
+
// Find the closest word to current time
|
| 2875 |
+
if (timeDiff < bestTimeDiff) {
|
| 2876 |
+
bestTimeDiff = timeDiff;
|
| 2877 |
+
currentWordElement = word;
|
| 2878 |
+
}
|
| 2879 |
+
}
|
| 2880 |
+
});
|
| 2881 |
+
|
| 2882 |
+
// Highlight current word
|
| 2883 |
+
if (currentWordElement) {
|
| 2884 |
+
currentWordElement.style.color = '#e63946';
|
| 2885 |
+
currentWordElement.style.transform = 'scale(1.1)';
|
| 2886 |
+
|
| 2887 |
+
// Scroll to keep the current line visible
|
| 2888 |
+
const lineIndex = parseInt(currentWordElement.getAttribute('data-line-index'));
|
| 2889 |
+
const lineElement = document.querySelectorAll('.lyric-line')[lineIndex];
|
| 2890 |
+
|
| 2891 |
+
if (lineElement) {
|
| 2892 |
+
const lyricsDisplay = document.getElementById('lyrics-display');
|
| 2893 |
+
const displayRect = lyricsDisplay.getBoundingClientRect();
|
| 2894 |
+
const lineRect = lineElement.getBoundingClientRect();
|
| 2895 |
+
|
| 2896 |
+
// Check if the line is outside the visible area
|
| 2897 |
+
if (lineRect.top < displayRect.top || lineRect.bottom > displayRect.bottom) {
|
| 2898 |
+
lineElement.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
| 2899 |
+
}
|
| 2900 |
+
}
|
| 2901 |
+
}
|
| 2902 |
+
}
|
| 2903 |
+
|
| 2904 |
+
// Wait for Gradio to fully load the components
|
| 2905 |
+
function waitForGradio() {
|
| 2906 |
+
// Connect to the audio element when available
|
| 2907 |
+
setTimeout(() => {
|
| 2908 |
+
connectAudio('#component-17'); // Replace with the actual selector
|
| 2909 |
+
|
| 2910 |
+
// Check for data updates from Gradio
|
| 2911 |
+
const observer = new MutationObserver((mutations) => {
|
| 2912 |
+
for (const mutation of mutations) {
|
| 2913 |
+
if (mutation.type === 'attributes' &&
|
| 2914 |
+
mutation.target.id === 'beat-sync-container' &&
|
| 2915 |
+
mutation.target.hasAttribute('data-sync-info')) {
|
| 2916 |
+
|
| 2917 |
+
const dataStr = mutation.target.getAttribute('data-sync-info');
|
| 2918 |
+
try {
|
| 2919 |
+
const data = JSON.parse(dataStr);
|
| 2920 |
+
initBeatSyncViewer(data);
|
| 2921 |
+
} catch (e) {
|
| 2922 |
+
console.error('Error parsing beat sync data:', e);
|
| 2923 |
+
}
|
| 2924 |
+
}
|
| 2925 |
+
}
|
| 2926 |
+
});
|
| 2927 |
+
|
| 2928 |
+
observer.observe(document.getElementById('beat-sync-container'), {
|
| 2929 |
+
attributes: true,
|
| 2930 |
+
attributeFilter: ['data-sync-info']
|
| 2931 |
+
});
|
| 2932 |
+
|
| 2933 |
+
// Try to find all audio elements and add a more robust connection method
|
| 2934 |
+
function tryConnectAudio() {
|
| 2935 |
+
const audioElements = document.querySelectorAll('audio');
|
| 2936 |
+
for (const audio of audioElements) {
|
| 2937 |
+
if (audio.parentElement.closest('#component-17') ||
|
| 2938 |
+
audio.parentElement.closest('.beat-synced-lyrics-tab')) {
|
| 2939 |
+
audioElement = audio;
|
| 2940 |
+
audioElement.addEventListener('play', startPlayheadMovement);
|
| 2941 |
+
audioElement.addEventListener('pause', stopPlayheadMovement);
|
| 2942 |
+
audioElement.addEventListener('ended', stopPlayheadMovement);
|
| 2943 |
+
audioElement.addEventListener('seeked', function() {
|
| 2944 |
+
updatePlayhead(audioElement.currentTime);
|
| 2945 |
+
highlightLyricsAtTime(audioElement.currentTime);
|
| 2946 |
+
});
|
| 2947 |
+
return true;
|
| 2948 |
+
}
|
| 2949 |
+
}
|
| 2950 |
+
return false;
|
| 2951 |
+
}
|
| 2952 |
+
|
| 2953 |
+
// Keep trying until we find the audio element
|
| 2954 |
+
if (!tryConnectAudio()) {
|
| 2955 |
+
setTimeout(tryConnectAudio, 1000); // Retry after 1 second
|
| 2956 |
+
}
|
| 2957 |
+
}, 2000);
|
| 2958 |
+
}
|
| 2959 |
+
|
| 2960 |
+
// Initialize when DOM is ready
|
| 2961 |
+
if (document.readyState === 'loading') {
|
| 2962 |
+
document.addEventListener('DOMContentLoaded', waitForGradio);
|
| 2963 |
+
} else {
|
| 2964 |
+
waitForGradio();
|
| 2965 |
+
}
|
| 2966 |
+
</script>
|
| 2967 |
+
"""
|
| 2968 |
+
)
|
| 2969 |
+
|
| 2970 |
with gr.TabItem("Syllable Analysis"):
|
| 2971 |
syllable_analysis_output = gr.Textbox(label="Detailed Syllable Analysis", lines=16)
|
| 2972 |
prompt_template_output = gr.Textbox(label="Prompt Template", lines=16)
|
|
|
|
| 2974 |
# Processing function with better handling of results
|
| 2975 |
def display_results(audio_file):
|
| 2976 |
if audio_file is None:
|
| 2977 |
+
return "Please upload an audio file.", "No emotion analysis available.", "No audio classification available.", "No lyrics generated.", "No rhythm analysis available.", audio_file, """<script>
|
| 2978 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
| 2979 |
+
document.getElementById('loading-message').style.display = 'block';
|
| 2980 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
| 2981 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
| 2982 |
+
</script>""", "No syllable analysis available.", "No prompt template available."
|
| 2983 |
|
| 2984 |
try:
|
| 2985 |
# Process audio and get results
|
|
|
|
| 2987 |
|
| 2988 |
# Check if we got an error message instead of results
|
| 2989 |
if isinstance(results, str) and "Error" in results:
|
| 2990 |
+
return results, "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", audio_file, """<script>
|
| 2991 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
| 2992 |
+
document.getElementById('loading-message').style.display = 'block';
|
| 2993 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
| 2994 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
| 2995 |
+
</script>""", "No syllable analysis available", "No prompt template available"
|
| 2996 |
elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
|
| 2997 |
+
return results[0], "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", audio_file, """<script>
|
| 2998 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
| 2999 |
+
document.getElementById('loading-message').style.display = 'block';
|
| 3000 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
| 3001 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
| 3002 |
+
</script>""", "No syllable analysis available", "No prompt template available"
|
| 3003 |
|
| 3004 |
# For backwards compatibility, handle both dictionary and tuple returns
|
| 3005 |
if isinstance(results, dict):
|
|
|
|
| 3086 |
else:
|
| 3087 |
ast_text = "No valid audio classification results available."
|
| 3088 |
|
| 3089 |
+
# Prepare beat-synced lyrics visualization data
|
| 3090 |
+
try:
|
| 3091 |
+
audio_data = extract_audio_features(audio_file)
|
| 3092 |
+
|
| 3093 |
+
# Get beat information
|
| 3094 |
+
y, sr = load_audio(audio_file, SAMPLE_RATE)
|
| 3095 |
+
beats_info = detect_beats(y, sr)
|
| 3096 |
+
|
| 3097 |
+
# Prepare data for beat-synced lyrics
|
| 3098 |
+
visualization_data = prepare_beat_synced_lyrics(audio_data, clean_lyrics, beats_info)
|
| 3099 |
+
|
| 3100 |
+
# Convert to JSON for JavaScript
|
| 3101 |
+
visualization_json = json.dumps(visualization_data)
|
| 3102 |
+
|
| 3103 |
+
# Create HTML with the data injected - avoid using f-string for the entire HTML
|
| 3104 |
+
# Handle string escaping separately to avoid f-string backslash issues
|
| 3105 |
+
escaped_json = visualization_json.replace("'", "\\'")
|
| 3106 |
+
|
| 3107 |
+
# Create HTML in parts to avoid f-string backslash issues
|
| 3108 |
+
html_start = """<div id="beat-sync-container" data-sync-info='"""
|
| 3109 |
+
html_middle = """' style="width:100%; height:400px; position:relative;">
|
| 3110 |
+
<div id="loading-message">Loading beat-synced lyrics viewer...</div>
|
| 3111 |
+
<div id="beat-sync-timeline" style="display:none; width:100%; height:80px; background:#f0f0f0; position:relative; overflow:hidden; margin-bottom:10px;">
|
| 3112 |
+
<div id="playhead" style="position:absolute; width:2px; height:100%; background:red; top:0; left:0; z-index:10;"></div>
|
| 3113 |
+
<div id="beat-markers" style="position:absolute; width:100%; height:100%; top:0; left:0;"></div>
|
| 3114 |
+
</div>
|
| 3115 |
+
<div id="lyrics-display" style="display:none; width:100%; height:300px; overflow-y:auto; font-size:16px; line-height:1.6;"></div>
|
| 3116 |
+
</div>
|
| 3117 |
+
<script>
|
| 3118 |
+
// Signal to the viewer that new data is available
|
| 3119 |
+
const container = document.getElementById('beat-sync-container');
|
| 3120 |
+
if (container) {
|
| 3121 |
+
// This will trigger the mutation observer
|
| 3122 |
+
container.setAttribute('data-sync-info', '"""
|
| 3123 |
+
html_end = """');
|
| 3124 |
+
}
|
| 3125 |
+
</script>"""
|
| 3126 |
+
|
| 3127 |
+
# Combine parts without using f-strings in the parts that don't need variables
|
| 3128 |
+
beat_sync_html = html_start + visualization_json + html_middle + escaped_json + html_end
|
| 3129 |
+
except Exception as e:
|
| 3130 |
+
print(f"Error creating beat-synced lyrics: {str(e)}")
|
| 3131 |
+
# Handle string escaping separately to avoid f-string backslash issues
|
| 3132 |
+
escaped_error = str(e).replace("'", "\\'")
|
| 3133 |
+
|
| 3134 |
+
# Use regular strings instead of f-strings to avoid backslash issues
|
| 3135 |
+
html_start = """<script>
|
| 3136 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
| 3137 |
+
document.getElementById('loading-message').style.display = 'block';
|
| 3138 |
+
document.getElementById('loading-message').innerText = 'Error creating beat-synced lyrics: """
|
| 3139 |
+
html_end = """';
|
| 3140 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
| 3141 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
| 3142 |
+
</script>"""
|
| 3143 |
+
|
| 3144 |
+
# Combine parts without using f-strings
|
| 3145 |
+
beat_sync_html = html_start + escaped_error + html_end
|
| 3146 |
+
|
| 3147 |
# Return all results including new fields
|
| 3148 |
+
return genre_results, emotion_text, ast_text, clean_lyrics, rhythm_analysis, audio_file, beat_sync_html, syllable_analysis, prompt_template
|
| 3149 |
|
| 3150 |
except Exception as e:
|
| 3151 |
error_msg = f"Error: {str(e)}"
|
| 3152 |
print(error_msg)
|
| 3153 |
+
|
| 3154 |
+
# Use a raw string literal to avoid f-string backslash issues
|
| 3155 |
+
error_html = """<script>
|
| 3156 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
| 3157 |
+
document.getElementById('loading-message').style.display = 'block';
|
| 3158 |
+
document.getElementById('loading-message').innerText = 'Error processing audio';
|
| 3159 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
| 3160 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
| 3161 |
+
</script>"""
|
| 3162 |
+
|
| 3163 |
+
return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", audio_file, error_html, "No syllable analysis available", "No prompt template available"
|
| 3164 |
|
| 3165 |
# Connect the button to the display function with updated outputs
|
| 3166 |
submit_btn.click(
|
| 3167 |
fn=display_results,
|
| 3168 |
inputs=[audio_input],
|
| 3169 |
+
outputs=[genre_output, emotion_output, ast_output, lyrics_output, rhythm_analysis_output, synced_audio_output, lyrics_viewer_html, syllable_analysis_output, prompt_template_output]
|
| 3170 |
)
|
| 3171 |
|
| 3172 |
# Enhanced explanation of how the system works
|
|
|
|
| 3203 |
|
| 3204 |
8. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
|
| 3205 |
|
| 3206 |
+
9. **Beat-Synced Visualization**: The beat-synced lyrics viewer shows you exactly how the lyrics align with the music:
|
| 3207 |
+
- Beat markers show the song's rhythmic structure
|
| 3208 |
+
- Words are highlighted in sync with the music
|
| 3209 |
+
- Strong beats and stressed syllables are emphasized
|
| 3210 |
+
- You can scrub through the song to see how lyrics and music match at any point
|
| 3211 |
+
|
| 3212 |
This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
|
| 3213 |
""")
|
| 3214 |
|