Spaces:

jacob-c
/

syllables_matching_experiment

Paused

App Files Files Community

jacob-c commited on May 18

Commit

0a49a17

1 Parent(s): 6ac84ae

ss

Browse files

Files changed (3) hide show

__pycache__/beat_analysis.cpython-310.pyc +0 -0
app.py +120 -59
beat_analysis.py +22 -17

__pycache__/beat_analysis.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/beat_analysis.cpython-310.pyc and b/__pycache__/beat_analysis.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -227,19 +227,16 @@ def generate_lyrics(music_analysis, genre, duration):
 ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
 """
         else:
-            # Create phrase examples
-            num_phrases = len(lyric_templates)
             # Calculate the typical syllable range for this genre
             if num_phrases > 0:
                 # Get max syllables per line from templates
-                max_syllables = max([t.get('max_expected', 8) for t in lyric_templates]) if lyric_templates[0].get('max_expected') else 8
-                min_syllables = min([t.get('min_expected', 3) for t in lyric_templates]) if lyric_templates[0].get('min_expected') else 3
                 avg_syllables = (min_syllables + max_syllables) // 2
             else:
-                min_syllables = 3
-                max_syllables = 8
-                avg_syllables = 5
             # Create a more direct prompt with examples and specific syllable count guidance
             prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM. The emotion is {emotion} and theme is {theme}.
@@ -247,27 +244,29 @@ ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
 I need EXACTLY {num_phrases} lines of lyrics - one line for each musical phrase. Not one more, not one less.
 CRITICAL INSTRUCTIONS:
-- Each line MUST contain between {min_syllables}-{max_syllables} syllables (aim for {avg_syllables})
-- Keep lines SHORT and SIMPLE - fewer syllables is better than too many
-- Break complete thoughts across multiple lines instead of cramming them into one line
-- Each line should flow naturally with the beat
-- Make each line end at a natural pause point
-- Use shorter words when possible
 FORMAT:
 - Just write {num_phrases} plain text lines
-- Each line should be simple song lyrics (no annotations, no numbers, no labeling)
-- Don't include any explanations, thinking tags, or meta-commentary
-- Don't use any <think> or [thinking] tags
-- Don't include [Verse], [Chorus] or section markers
-- Don't include line numbers
-EXAMPLE OF WHAT I WANT (for a {num_phrases}-line song):
-Lost in the light ({min_syllables} syllables)
-Waiting for the morning dew ({avg_syllables} syllables)
-Time slips away ({min_syllables+1} syllables)
-In the silence of my room ({avg_syllables} syllables)
-(... and so on for exactly {num_phrases} lines)
 JUST THE PLAIN LYRICS, EXACTLY {num_phrases} LINES, KEEPING EACH LINE TO {min_syllables}-{max_syllables} SYLLABLES.
 """
@@ -467,21 +466,71 @@ JUST THE PLAIN LYRICS, EXACTLY {num_phrases} LINES, KEEPING EACH LINE TO {min_sy
                 i = len(clean_lines)
                 if i < len(lyric_templates):
                     template = lyric_templates[i]
-                    target_syllables = min(max_syllables, (template.get('min_expected', 3) + template.get('max_expected', 8)) // 2)
-                    if genre.lower() == 'pop':
-                        if target_syllables <= 4:
-                            placeholder = "Lost in the night"  # 4 syllables
-                        else:
-                            placeholder = "Dancing in the moonlight"  # 6 syllables
-                    elif genre.lower() == 'rock':
-                        placeholder = "Rocking to the beat"  # 5 syllables
-                    elif genre.lower() == 'country':
-                        placeholder = "Down the old dirt road"  # 5 syllables
                     else:
-                        placeholder = f"Echoes of {emotion}"  # ~4-5 syllables
                 else:
-                    placeholder = "Whispers in the wind"  # 5 syllables
                 clean_lines.append(placeholder)
@@ -531,22 +580,26 @@ def analyze_lyrics_rhythm_match(lyrics, lyric_templates, genre="pop"):
         check_result = beat_analyzer.check_syllable_stress_match(line, template, genre)
         # Get match symbols
-        syllable_match = "✓" if check_result["matches_beat_count"] else ("✓*" if check_result["within_range"] else "✗")
         stress_match = "✓" if check_result["stress_matches"] else f"{int(check_result['stress_match_percentage']*100)}%"
         # Update stats
-        if check_result["matches_beat_count"]:
             total_matches += 1
-        if check_result["within_range"]:
             total_range_matches += 1
         if check_result["stress_matches"]:
             total_stress_matches += 1
         total_stress_percentage += check_result["stress_match_percentage"]
-        # Track how close we are to ideal count for this genre
-        if abs(check_result["syllable_count"] - check_result["ideal_syllable_count"]) <= 1:
-            total_ideal_matches += 1
         # Create visual representation of the stress pattern
         stress_visual = ""
         for char in template['stress_pattern']:
@@ -563,38 +616,46 @@ def analyze_lyrics_rhythm_match(lyrics, lyric_templates, genre="pop"):
     # Add summary statistics
     if line_count > 0:
         exact_match_rate = (total_matches / line_count) * 100
-        range_match_rate = (total_range_matches / line_count) * 100
         ideal_match_rate = (total_ideal_matches / line_count) * 100
         stress_match_rate = (total_stress_matches / line_count) * 100
         avg_stress_percentage = (total_stress_percentage / line_count) * 100
         result += f"\n**Summary:**\n"
-        result += f"- Exact syllable match rate: {exact_match_rate:.1f}%\n"
         result += f"- Genre-appropriate syllable range match rate: {range_match_rate:.1f}%\n"
-        result += f"- Ideal genre syllable count match rate: {ideal_match_rate:.1f}%\n"
         result += f"- Perfect stress pattern match rate: {stress_match_rate:.1f}%\n"
         result += f"- Average stress pattern accuracy: {avg_stress_percentage:.1f}%\n"
         result += f"- Overall rhythmic accuracy: {((range_match_rate + avg_stress_percentage) / 2):.1f}%\n"
         # Add genre-specific notes
         result += f"\n**Genre Notes ({genre}):**\n"
         # Add appropriate genre notes based on genre
         if genre.lower() == "pop":
-            result += "- Pop music typically allows 1-3 syllables per beat using melisma and syncopation\n"
-            result += "- Strong downbeats often align with stressed syllables of important words\n"
         elif genre.lower() == "rock":
-            result += "- Rock music often uses 1-2 syllables per beat with some variation\n"
-            result += "- Emphasis on strong beats for impact and rhythmic drive\n"
-        elif genre.lower() in ["hiphop", "rap"]:
-            result += "- Hip-hop/rap often features 2-5 syllables per beat through rapid delivery\n"
-            result += "- Complex rhyme patterns and fast delivery create higher syllable density\n"
-        elif genre.lower() in ["folk", "country"]:
-            result += "- Folk/country music often stays closer to 1:1 syllable-to-beat ratio\n"
-            result += "- Narrative focus leads to clearer enunciation of syllables\n"
         else:
-            result += "- This genre typically allows for flexible syllable-to-beat relationships\n"
-            result += "- Syllable count can vary based on vocal style and song section\n"
     return result

 ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
 """
         else:
             # Calculate the typical syllable range for this genre
             if num_phrases > 0:
                 # Get max syllables per line from templates
+                max_syllables = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates[0].get('max_expected') else 7
+                min_syllables = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates[0].get('min_expected') else 2
                 avg_syllables = (min_syllables + max_syllables) // 2
             else:
+                min_syllables = 2
+                max_syllables = 7
+                avg_syllables = 4
             # Create a more direct prompt with examples and specific syllable count guidance
             prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM. The emotion is {emotion} and theme is {theme}.
 I need EXACTLY {num_phrases} lines of lyrics - one line for each musical phrase. Not one more, not one less.
 CRITICAL INSTRUCTIONS:
+- Each line MUST be VERY SHORT with only {min_syllables}-{max_syllables} syllables (aim for {avg_syllables} or fewer)
+- PRIORITIZE BREVITY - use fewer syllables rather than more
+- Keep each line SIMPLE and DIRECT - avoid complex phrases
+- Break complete thoughts across MULTIPLE LINES rather than fitting them into one line
+- Think of each line as part of a flowing conversation, not a complete sentence
+- Each phrase should fit into one measure of music
+- Use simple, short words whenever possible
+- End each line at a natural speaking pause point
 FORMAT:
 - Just write {num_phrases} plain text lines
+- Each line should be simple song lyrics (no annotations)
+- Don't include any explanations or commentary
+- Don't use any tags or markers
+- Don't include section labels like [Verse] or [Chorus]
+EXAMPLE OF WHAT I WANT:
+Empty chair ({min_syllables} syllables)
+Waiting by the door ({avg_syllables} syllables)
+Memories fade ({min_syllables+1} syllables)
+Into silence ({avg_syllables-1} syllables)
+Your ghost remains ({avg_syllables} syllables)
+(... and so on)
 JUST THE PLAIN LYRICS, EXACTLY {num_phrases} LINES, KEEPING EACH LINE TO {min_syllables}-{max_syllables} SYLLABLES.
 """
                 i = len(clean_lines)
                 if i < len(lyric_templates):
                     template = lyric_templates[i]
+                    target_syllables = min(max_syllables, (template.get('min_expected', 2) + template.get('max_expected', 7)) // 2)
+                    # Create a diverse set of placeholders that match the theme/emotion
+                    placeholders = {
+                        # 2-3 syllables
+                        2: [
+                            "Night falls",
+                            "Time stops",
+                            "Hearts beat",
+                            "Rain falls",
+                            "Stars shine"
+                        ],
+                        # 3-4 syllables
+                        3: [
+                            "Empty chair",
+                            "Shadows dance",
+                            "Whispers fade",
+                            "Memories",
+                            "Silent room"
+                        ],
+                        # 4-5 syllables
+                        4: [
+                            "Moonlight shimmers",
+                            "Echoes of time",
+                            "Footsteps fading",
+                            "Memories drift",
+                            "Silence speaks loud"
+                        ],
+                        # 5-6 syllables
+                        5: [
+                            "Walking in the rain",
+                            "Whispers in the dark",
+                            "Echoes of your voice",
+                            "Traces left behind",
+                            "Time moves ever on"
+                        ],
+                        # 6-7 syllables
+                        6: [
+                            "Dancing in the moonlight",
+                            "Shadows play on the wall",
+                            "Memories fade to silence",
+                            "Moments lost in the wind",
+                            "Whispers of a better time"
+                        ]
+                    }
+                    # Get the closest matching syllable group
+                    closest_group = min(placeholders.keys(), key=lambda k: abs(k - target_syllables))
+                    # Choose a placeholder that hasn't been used yet
+                    available_placeholders = [p for p in placeholders[closest_group]
+                                             if p not in clean_lines]
+                    if available_placeholders:
+                        placeholder = available_placeholders[i % len(available_placeholders)]
                     else:
+                        # If we've used all placeholders in this group, create a custom one
+                        if emotion.lower() in ["sad", "nostalgic", "calm"]:
+                            placeholder = f"Memories of {emotion}"
+                        elif emotion.lower() in ["happy", "energetic"]:
+                            placeholder = f"Dancing through {emotion}"
+                        else:
+                            placeholder = f"Feeling {emotion} now"
                 else:
+                    placeholder = "Silence speaks volumes"
                 clean_lines.append(placeholder)
         check_result = beat_analyzer.check_syllable_stress_match(line, template, genre)
         # Get match symbols
+        if check_result["close_to_ideal"]:
+            syllable_match = "✓"  # Ideal or very close
+        elif check_result["within_range"]:
+            syllable_match = "✓*"  # Within range but not ideal
+        else:
+            syllable_match = "✗"  # Outside range
         stress_match = "✓" if check_result["stress_matches"] else f"{int(check_result['stress_match_percentage']*100)}%"
         # Update stats
+        if check_result["close_to_ideal"]:
             total_matches += 1
+            total_ideal_matches += 1
+        elif check_result["within_range"]:
             total_range_matches += 1
         if check_result["stress_matches"]:
             total_stress_matches += 1
         total_stress_percentage += check_result["stress_match_percentage"]
         # Create visual representation of the stress pattern
         stress_visual = ""
         for char in template['stress_pattern']:
     # Add summary statistics
     if line_count > 0:
         exact_match_rate = (total_matches / line_count) * 100
+        range_match_rate = ((total_matches + total_range_matches) / line_count) * 100
         ideal_match_rate = (total_ideal_matches / line_count) * 100
         stress_match_rate = (total_stress_matches / line_count) * 100
         avg_stress_percentage = (total_stress_percentage / line_count) * 100
         result += f"\n**Summary:**\n"
+        result += f"- Ideal or near-ideal syllable match rate: {exact_match_rate:.1f}%\n"
         result += f"- Genre-appropriate syllable range match rate: {range_match_rate:.1f}%\n"
         result += f"- Perfect stress pattern match rate: {stress_match_rate:.1f}%\n"
         result += f"- Average stress pattern accuracy: {avg_stress_percentage:.1f}%\n"
         result += f"- Overall rhythmic accuracy: {((range_match_rate + avg_stress_percentage) / 2):.1f}%\n"
+        # Add guidance on ideal distribution for syllables
+        result += f"\n**Syllable Distribution Guidance:**\n"
+        result += f"- Aim for {min([t.get('min_expected', 3) for t in lyric_templates])}-{max([t.get('max_expected', 7) for t in lyric_templates])} syllables per line\n"
+        result += f"- Break complete thoughts across multiple lines for a more natural flow\n"
+        result += f"- Allow sentences to span 2-3 measures for better musical phrasing\n"
         # Add genre-specific notes
         result += f"\n**Genre Notes ({genre}):**\n"
         # Add appropriate genre notes based on genre
         if genre.lower() == "pop":
+            result += "- Pop lyrics are typically concise with 3-7 syllables per musical phrase\n"
+            result += "- Strong beats often align with stressed syllables in important words\n"
         elif genre.lower() == "rock":
+            result += "- Rock lyrics favor brevity with 3-6 syllables per musical phrase\n"
+            result += "- Emphasis on strong beats for rhythmic impact\n"
+        elif genre.lower() == "country":
+            result += "- Country lyrics tend toward clear storytelling with 3-6 syllables per phrase\n"
+            result += "- Natural speech rhythms are important for authentic delivery\n"
+        elif genre.lower() == "disco":
+            result += "- Disco lyrics work well with 4-7 syllables per musical phrase\n"
+            result += "- Rhythmic patterns often emphasize dance-friendly phrasing\n"
+        elif genre.lower() == "metal":
+            result += "- Metal lyrics balance intensity with 3-7 syllables per musical phrase\n"
+            result += "- Strong syllables on strong beats create powerful impact\n"
         else:
+            result += "- This genre typically works well with concise, focused phrasing\n"
+            result += "- Consider breaking complete thoughts across multiple lines\n"
     return result

beat_analysis.py CHANGED Viewed

@@ -32,11 +32,11 @@ class BeatAnalyzer:
         # Genre-specific syllable-to-beat ratio guidelines
         self.genre_syllable_ratios = {
             # Supported genres with strong syllable-to-beat patterns
-            'pop': (0.7, 1.2, 1.6),        # Pop - more conservative range
-            'rock': (0.7, 1.0, 1.5),       # Rock - slightly reduced upper range
-            'country': (0.7, 1.0, 1.3),    # Country - clear and simple syllable patterns
-            'disco': (0.8, 1.2, 1.5),      # Disco - tighter range for better alignment
-            'metal': (0.7, 1.2, 1.5),      # Metal - reduced upper limit
             # Other genres (analysis only, no lyrics generation)
             'hiphop': (1.8, 2.5, 3.5),     # Hip hop often has many syllables per beat
@@ -49,7 +49,7 @@ class BeatAnalyzer:
             'electronic': (0.7, 1.0, 1.5), # Electronic music varies widely
             'classical': (0.7, 1.0, 1.4),  # Classical can vary by subgenre
             'blues': (0.6, 0.8, 1.2),      # Blues often extends syllables
-            'default': (0.7, 1.2, 1.6)     # Default for unknown genres - more conservative
         }
         # List of genres supported for lyrics generation
@@ -276,16 +276,16 @@ class BeatAnalyzer:
                 visual_pattern += "weak "
         # Estimate number of words based on beats (very rough estimate)
-        est_words = max(1, int(num_beats * words_per_beat))
-        # Estimate syllables - use more conservative ranges
         # For 4/4 time signature, we want to encourage shorter phrases
         if stress_pattern == "SWMW":  # 4/4 time
-            min_syllables = max(1, int(num_beats * 0.7))
-            max_syllables = min(8, int(num_beats * 1.6))
         else:
-            min_syllables = max(1, int(num_beats * 0.7))
-            max_syllables = int(num_beats * 1.5)
         # Store these in the template for future reference
         template['min_expected'] = min_syllables
@@ -294,7 +294,7 @@ class BeatAnalyzer:
         guide = f"~{est_words} words, ~{min_syllables}-{max_syllables} syllables | Pattern: {visual_pattern}"
         # Add additional guidance to the template for natural phrasing
-        template['phrasing_guide'] = "Keep lines short. Split complete thoughts across multiple lines."
         return guide
@@ -317,11 +317,11 @@ class BeatAnalyzer:
         # Calculate flexible min and max syllable expectations based on genre
         # Use more conservative ranges to avoid too many syllables
         min_expected = max(1, int(expected_count * min_ratio))
-        max_expected = min(8, int(expected_count * max_ratio))
         # For 4/4 time signature, cap the max syllables per line
         if template['stress_pattern'] == "SWMW":  # 4/4 time
-            max_expected = min(max_expected, 8)  # Cap at 8 syllables max for 4/4
         # Record min and max expected in the template for future reference
         template['min_expected'] = min_expected
@@ -335,6 +335,10 @@ class BeatAnalyzer:
         # Ensure ideal count is also within our constrained range
         ideal_count = max(min_expected, min(max_expected, ideal_count))
         closeness_to_ideal = 1.0 - min(abs(syllable_count - ideal_count) / (max_expected - min_expected + 1), 1.0)
         # Get detailed syllable breakdown for stress analysis
@@ -354,7 +358,7 @@ class BeatAnalyzer:
         stress_match_percentage = self._calculate_stress_match(words, word_syllables, syllable_to_beat_mapping, stress_pattern)
         # Consider a stress match if the percentage is high enough
-        stress_matches = stress_match_percentage >= 0.7
         return {
             'syllable_count': syllable_count,
@@ -368,7 +372,8 @@ class BeatAnalyzer:
             'stress_match_percentage': stress_match_percentage,
             'closeness_to_ideal': closeness_to_ideal,
             'word_syllables': word_syllables,
-            'ideal_syllable_count': ideal_count
         }
     def _map_syllables_to_beats(self, word_syllables, stress_pattern):

         # Genre-specific syllable-to-beat ratio guidelines
         self.genre_syllable_ratios = {
             # Supported genres with strong syllable-to-beat patterns
+            'pop': (0.5, 1.0, 1.5),        # Pop - significantly reduced range
+            'rock': (0.5, 0.9, 1.3),       # Rock - reduced for brevity
+            'country': (0.6, 0.9, 1.2),    # Country - simpler syllable patterns
+            'disco': (0.7, 1.0, 1.3),      # Disco - tightened range
+            'metal': (0.6, 1.0, 1.3),      # Metal - reduced upper limit
             # Other genres (analysis only, no lyrics generation)
             'hiphop': (1.8, 2.5, 3.5),     # Hip hop often has many syllables per beat
             'electronic': (0.7, 1.0, 1.5), # Electronic music varies widely
             'classical': (0.7, 1.0, 1.4),  # Classical can vary by subgenre
             'blues': (0.6, 0.8, 1.2),      # Blues often extends syllables
+            'default': (0.6, 1.0, 1.3)     # Default for unknown genres - more conservative
         }
         # List of genres supported for lyrics generation
                 visual_pattern += "weak "
         # Estimate number of words based on beats (very rough estimate)
+        est_words = max(1, int(num_beats * 0.4))  # Reduced from 0.5 to encourage fewer words
+        # Estimate syllables - use even more conservative ranges
         # For 4/4 time signature, we want to encourage shorter phrases
         if stress_pattern == "SWMW":  # 4/4 time
+            min_syllables = max(1, int(num_beats * 0.5))  # Reduced from 0.7
+            max_syllables = min(7, int(num_beats * 1.3))  # Reduced from 1.6 to max 7
         else:
+            min_syllables = max(1, int(num_beats * 0.5))  # Reduced from 0.7
+            max_syllables = min(7, int(num_beats * 1.2))  # Reduced from 1.5 to max 7
         # Store these in the template for future reference
         template['min_expected'] = min_syllables
         guide = f"~{est_words} words, ~{min_syllables}-{max_syllables} syllables | Pattern: {visual_pattern}"
         # Add additional guidance to the template for natural phrasing
+        template['phrasing_guide'] = "Keep lines SHORT. Break complete thoughts across MULTIPLE LINES."
         return guide
         # Calculate flexible min and max syllable expectations based on genre
         # Use more conservative ranges to avoid too many syllables
         min_expected = max(1, int(expected_count * min_ratio))
+        max_expected = min(7, int(expected_count * max_ratio))
         # For 4/4 time signature, cap the max syllables per line
         if template['stress_pattern'] == "SWMW":  # 4/4 time
+            max_expected = min(max_expected, 7)  # Cap at 7 syllables max for 4/4
         # Record min and max expected in the template for future reference
         template['min_expected'] = min_expected
         # Ensure ideal count is also within our constrained range
         ideal_count = max(min_expected, min(max_expected, ideal_count))
+        # More lenient approach to determining "ideal"
+        # Count as ideal if within 1 syllable of the target instead of exact match
+        close_to_ideal = abs(syllable_count - ideal_count) <= 1
         closeness_to_ideal = 1.0 - min(abs(syllable_count - ideal_count) / (max_expected - min_expected + 1), 1.0)
         # Get detailed syllable breakdown for stress analysis
         stress_match_percentage = self._calculate_stress_match(words, word_syllables, syllable_to_beat_mapping, stress_pattern)
         # Consider a stress match if the percentage is high enough
+        stress_matches = stress_match_percentage >= 0.6  # Reduced from 0.7 to be more lenient
         return {
             'syllable_count': syllable_count,
             'stress_match_percentage': stress_match_percentage,
             'closeness_to_ideal': closeness_to_ideal,
             'word_syllables': word_syllables,
+            'ideal_syllable_count': ideal_count,
+            'close_to_ideal': close_to_ideal  # New field
         }
     def _map_syllables_to_beats(self, word_syllables, stress_pattern):