Spaces:

Somalitts
/

8aad

Running

App Files Files Community

Somalitts commited on Jul 20

Commit

df3f293

verified ·

1 Parent(s): c872044

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -29

app.py CHANGED Viewed

@@ -102,6 +102,7 @@ def split_into_sentences(text):
     return [s.strip() for s in sentences if s.strip()]
 # --- Main TTS function with pauses between sentences ---
 def text_to_speech(text, voice_choice):
     if not text or not voice_choice:
         gr.Warning("Fadlan geli qoraal oo dooro cod.")
@@ -112,38 +113,32 @@ def text_to_speech(text, voice_choice):
     paragraphs = text.strip().split("\n")
     audio_chunks = []
-    for para in paragraphs:
         para = para.strip()
         if not para:
             continue
-        sentences = split_into_sentences(para)
-        for idx, sentence in enumerate(sentences):
-            norm_sentence = normalize_text(sentence)
-            inputs = processor(text=norm_sentence, return_tensors="pt").to(device)
-            with torch.no_grad():
-                speech = model.generate(
-                    input_ids=inputs["input_ids"],
-                    speaker_embeddings=speaker_embedding.unsqueeze(0),
-                    do_sample=True,
-                    top_k=50,
-                    temperature=0.75,
-                    repetition_penalty=1.2,
-                    max_new_tokens=512
-                )
-                audio = vocoder(speech).cpu().squeeze().numpy()
-            audio_chunks.append(audio)
-            # Pause 0.5 sec between sentences (not after last)
-            if idx < len(sentences) - 1:
-                pause = np.zeros(int(16000 * 0.5))
-                audio_chunks.append(pause)
-        # Pause 0.8 sec between paragraphs (optional)
-        pause_para = np.zeros(int(16000 * 0.8))
-        audio_chunks.append(pause_para)
     final_audio = np.concatenate(audio_chunks)
     return (16000, final_audio)

     return [s.strip() for s in sentences if s.strip()]
 # --- Main TTS function with pauses between sentences ---
+# --- Main TTS function with pause after each new line only ---
 def text_to_speech(text, voice_choice):
     if not text or not voice_choice:
         gr.Warning("Fadlan geli qoraal oo dooro cod.")
     paragraphs = text.strip().split("\n")
     audio_chunks = []
+    for idx, para in enumerate(paragraphs):
         para = para.strip()
         if not para:
             continue
+        norm_para = normalize_text(para)
+        inputs = processor(text=norm_para, return_tensors="pt").to(device)
+        with torch.no_grad():
+            speech = model.generate(
+                input_ids=inputs["input_ids"],
+                speaker_embeddings=speaker_embedding.unsqueeze(0),
+                do_sample=True,
+                top_k=50,
+                temperature=0.75,
+                repetition_penalty=1.2,
+                max_new_tokens=512
+            )
+            audio = vocoder(speech).cpu().squeeze().numpy()
+        audio_chunks.append(audio)
+        # Pause after each paragraph (new line)
+        if idx < len(paragraphs) - 1:
+            pause = np.zeros(int(16000 * 0.8))  # 0.8s pause
+            audio_chunks.append(pause)
     final_audio = np.concatenate(audio_chunks)
     return (16000, final_audio)