Spaces:
Runtime error
Runtime error
| def regroup_words( | |
| words: list[dict], | |
| max_len: float = 15.0, | |
| gap: float = 0.50, | |
| ) -> list[dict]: | |
| """ | |
| Returns a list of segments with keys: | |
| 'start', 'end', 'text', 'words' | |
| """ | |
| if not words: | |
| return [] | |
| segs, seg_words = [], [] | |
| seg_start = words[0]["start"] | |
| last_end = seg_start | |
| for w in words: | |
| over_max = (w["end"] - seg_start) > max_len | |
| long_gap = (w["start"] - last_end) > gap | |
| if (seg_words and (over_max or long_gap)): | |
| segs.append({ | |
| "start": seg_start, | |
| "end": last_end, | |
| "segment": " ".join(x["word"] for x in seg_words), | |
| }) | |
| seg_words = [] | |
| seg_start = w["start"] | |
| seg_words.append(w) | |
| last_end = w["end"] | |
| # flush final segment | |
| segs.append({ | |
| "start": seg_start, | |
| "end": last_end, | |
| "segment": " ".join(x["word"] for x in seg_words), | |
| }) | |
| return segs | |
| def text_to_words(text: str) -> list[dict]: | |
| """ | |
| Convert text format like "word[start:end] word[start:end]..." to word list. | |
| Args: | |
| text: String in format "It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16]..." | |
| Returns: | |
| List of word dictionaries with keys: 'word', 'start', 'end' | |
| """ | |
| import re | |
| if not text.strip(): | |
| return [] | |
| # Pattern to match word[start:end] format | |
| pattern = r'(\S+?)\[([^:]+):([^\]]+)\]' | |
| matches = re.findall(pattern, text) | |
| words = [] | |
| for word, start_str, end_str in matches: | |
| try: | |
| start = float(start_str) if start_str != 'xxx' else 0.0 | |
| end = float(end_str) if end_str != 'xxx' else 0.0 | |
| words.append({ | |
| 'word': word, | |
| 'start': start, | |
| 'end': end | |
| }) | |
| except ValueError: | |
| # Skip invalid entries | |
| continue | |
| return words | |
| def words_to_text(words: list[dict]) -> str: | |
| """ | |
| Convert word list to text format "word[start:end] word[start:end]...". | |
| Args: | |
| words: List of word dictionaries with keys: 'word', 'start', 'end' | |
| Returns: | |
| String in format "It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16]..." | |
| """ | |
| if not words: | |
| return "" | |
| text_parts = [] | |
| for word in words: | |
| word_text = word.get('word', '') | |
| start = word.get('start', 0.0) | |
| end = word.get('end', 0.0) | |
| # Format timestamps to max 2 decimal places | |
| start_str = f"{start:.2f}".rstrip('0').rstrip('.') | |
| end_str = f"{end:.2f}".rstrip('0').rstrip('.') | |
| text_parts.append(f"{word_text}[{start_str}:{end_str}]") | |
| return " ".join(text_parts) | |
| def json_to_text(json_data: dict) -> str: | |
| """ | |
| Convert JSON lyrics data to text format for display. | |
| Only uses the 'word' layer from the JSON structure. | |
| Groups words into sentences/lines for better readability. | |
| Args: | |
| json_data: Dictionary with 'word' key containing list of word objects | |
| Returns: | |
| String with words grouped into lines: "word[start:end] word[start:end]...\nword[start:end]..." | |
| """ | |
| if not isinstance(json_data, dict) or 'word' not in json_data: | |
| return "" | |
| words = json_data['word'] | |
| # Group words into segments using the existing regroup_words function | |
| segments = regroup_words(words, max_len=5, gap=0.50) | |
| # Convert each segment to text format | |
| segment_lines = [] | |
| for seg in segments: | |
| # Extract words for this segment based on time range | |
| seg_words = [] | |
| for word in words: | |
| if seg['start'] <= word['start'] < seg['end'] or ( | |
| word['start'] <= seg['start'] < word['end'] | |
| ): | |
| seg_words.append(word) | |
| if seg_words: | |
| segment_text = words_to_text(seg_words) | |
| segment_lines.append(segment_text) | |
| return '\n\n'.join(segment_lines) | |
| def round_to_quarter_beats(beat_position: float) -> float: | |
| """Round beat position to nearest quarter note for sample display.""" | |
| return round(beat_position * 4) / 4 | |
| def beats_to_seconds(beat_position: float, bpm: float) -> float: | |
| """Convert beat position to time in seconds.""" | |
| return (beat_position * 60.0) / bpm | |
| def seconds_to_beats(time_seconds: float, bpm: float) -> float: | |
| """Convert time in seconds to beat position.""" | |
| return (time_seconds * bpm) / 60.0 | |
| def convert_text_time_to_beats(text: str, bpm: float, round_to_quarters: bool = False) -> str: | |
| """ | |
| Convert time-based text format to beats-based format. | |
| Args: | |
| text: String in format "word[start_sec:end_sec] ..." | |
| bpm: Beats per minute for conversion | |
| round_to_quarters: If True, round beats to quarter notes (for sample display) | |
| Returns: | |
| String in format "word[start_beat:end_beat] ..." | |
| """ | |
| if not text.strip(): | |
| return "" | |
| words = text_to_words(text) | |
| beat_words = [] | |
| for word in words: | |
| start_beat = seconds_to_beats(word['start'], bpm) | |
| end_beat = seconds_to_beats(word['end'], bpm) | |
| # Round to quarter notes for sample display | |
| if round_to_quarters: | |
| start_beat = round_to_quarter_beats(start_beat) | |
| end_beat = round_to_quarter_beats(end_beat) | |
| # Format to reasonable precision | |
| start_str = f"{start_beat:.2f}".rstrip('0').rstrip('.') | |
| end_str = f"{end_beat:.2f}".rstrip('0').rstrip('.') | |
| beat_words.append(f"{word['word']}[{start_str}:{end_str}]") | |
| return " ".join(beat_words) | |
| def beats_to_text_with_regrouping(text: str, bpm: float, round_to_quarters: bool = False) -> str: | |
| """ | |
| Convert time-based text to beats format with regrouping (like time mode). | |
| Args: | |
| text: String in format "word[start_sec:end_sec] ..." | |
| bpm: Beats per minute for conversion | |
| round_to_quarters: If True, round beats to quarter notes (for sample display) | |
| Returns: | |
| String with beats format grouped into lines | |
| """ | |
| if not text.strip(): | |
| return "" | |
| # First convert to beats format | |
| words = text_to_words(text) | |
| beat_words = [] | |
| for word in words: | |
| start_beat = seconds_to_beats(word['start'], bpm) | |
| end_beat = seconds_to_beats(word['end'], bpm) | |
| # Round to quarter notes for sample display | |
| if round_to_quarters: | |
| start_beat = round_to_quarter_beats(start_beat) | |
| end_beat = round_to_quarter_beats(end_beat) | |
| beat_words.append({ | |
| 'word': word['word'], | |
| 'start': start_beat, | |
| 'end': end_beat | |
| }) | |
| # Group beats into segments (using beat positions instead of seconds) | |
| segments = regroup_words(beat_words, max_len=20, gap=2.0) # 20 beats max, 2 beat gap | |
| # Convert each segment to text format | |
| segment_lines = [] | |
| for seg in segments: | |
| # Extract words for this segment based on beat range | |
| seg_words = [] | |
| for word in beat_words: | |
| if seg['start'] <= word['start'] < seg['end'] or ( | |
| word['start'] <= seg['start'] < word['end'] | |
| ): | |
| seg_words.append(word) | |
| if seg_words: | |
| segment_text = words_to_text(seg_words) # This will format as word[beat:beat] | |
| segment_lines.append(segment_text) | |
| return '\n\n'.join(segment_lines) | |
| def convert_text_beats_to_time(text: str, bpm: float) -> str: | |
| """ | |
| Convert beats-based text format to time-based format. | |
| Args: | |
| text: String in format "word[start_beat:end_beat] ..." | |
| bpm: Beats per minute for conversion | |
| Returns: | |
| String in format "word[start_sec:end_sec] ..." | |
| """ | |
| if not text.strip(): | |
| return "" | |
| # Parse beats format (same pattern as time format) | |
| words = text_to_words(text) | |
| time_words = [] | |
| for word in words: | |
| # Convert beat positions to time | |
| start_time = beats_to_seconds(word['start'], bpm) | |
| end_time = beats_to_seconds(word['end'], bpm) | |
| # Format to reasonable precision | |
| start_str = f"{start_time:.2f}".rstrip('0').rstrip('.') | |
| end_str = f"{end_time:.2f}".rstrip('0').rstrip('.') | |
| time_words.append(f"{word['word']}[{start_str}:{end_str}]") | |
| return " ".join(time_words) | |
| def convert_text_beats_to_time_with_regrouping(text: str, bpm: float) -> str: | |
| """ | |
| Convert beats-based text format to time-based format while preserving line structure. | |
| Args: | |
| text: String in format "word[start_beat:end_beat] ..." (can be multi-line) | |
| bpm: Beats per minute for conversion | |
| Returns: | |
| String in format "word[start_sec:end_sec] ..." with preserved line breaks | |
| """ | |
| if not text.strip(): | |
| return "" | |
| # Process each line separately to preserve segmentation | |
| lines = text.split('\n') | |
| converted_lines = [] | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| # Preserve empty lines | |
| converted_lines.append("") | |
| continue | |
| # Convert this line from beats to time | |
| words = text_to_words(line) | |
| time_words = [] | |
| for word in words: | |
| # Convert beat positions to time | |
| start_time = beats_to_seconds(word['start'], bpm) | |
| end_time = beats_to_seconds(word['end'], bpm) | |
| # Format to reasonable precision | |
| start_str = f"{start_time:.2f}".rstrip('0').rstrip('.') | |
| end_str = f"{end_time:.2f}".rstrip('0').rstrip('.') | |
| time_words.append(f"{word['word']}[{start_str}:{end_str}]") | |
| if time_words: | |
| converted_lines.append(" ".join(time_words)) | |
| return "\n".join(converted_lines) | |
| def text_to_json(text: str) -> dict: | |
| """ | |
| Convert text format to JSON structure expected by the model. | |
| Creates the 'word' layer that the model needs. | |
| Handles multi-line input by joining lines. | |
| Args: | |
| text: String in format "word[start:end] word[start:end]..." (can be multi-line) | |
| Returns: | |
| Dictionary with 'word' key containing list of word objects | |
| """ | |
| # Join multiple lines into single line for parsing | |
| single_line_text = ' '.join(line.strip() for line in text.split('\n') if line.strip()) | |
| words = text_to_words(single_line_text) | |
| return {"word": words} | |