Spaces:

Somalitts
/

8aad

Running

App Files Files Community

Somalitts commited on Jul 20

Commit

5a3bbd1

verified ·

1 Parent(s): f685632

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -24

app.py CHANGED Viewed

@@ -11,12 +11,11 @@ import numpy as np
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
-# --- KU DAR FAYLKA CODADKAAGA ---
 VOICE_SAMPLE_FILES = ["1.wav"]  # Hubi in faylkan tayadiisu fiican tahay
 EMBEDDING_DIR = "speaker_embeddings"
 os.makedirs(EMBEDDING_DIR, exist_ok=True)
-# --- Soo Dejinta Model-yada ---
 try:
     print("Loading models...")
     processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
@@ -58,7 +57,7 @@ def get_speaker_embedding(wav_file_path):
     except Exception as e:
         raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")
-# --- Number Handling Functions ---
 number_words = {
     0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
     6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
@@ -69,7 +68,6 @@ number_words = {
     60: "lixdan", 70: "toddobaatan", 80: "siddeetan", 90: "sagaashan",
     100: "boqol", 1000: "kun",
 }
 def number_to_words(n):
     if n in number_words:
         return number_words[n]
@@ -85,17 +83,27 @@ def number_to_words(n):
         return (number_to_words(n // 1_000_000) + " milyan" if n // 1_000_000 > 1 else "milyan") + (
             " iyo " + number_to_words(n % 1_000_000) if n % 1_000_000 else "")
     return str(n)
 def replace_numbers_with_words(text):
     return re.sub(r'\b\d+\b', lambda m: number_to_words(int(m.group())), text)
 def normalize_text(text):
     text = text.lower()
     text = replace_numbers_with_words(text)
     text = re.sub(r'[^\w\s\']', '', text)
     return text
-# --- Main Text-to-Speech Function with pause between lines ---
 def text_to_speech(text, voice_choice):
     if not text or not voice_choice:
         gr.Warning("Fadlan geli qoraal oo dooro cod.")
@@ -103,17 +111,12 @@ def text_to_speech(text, voice_choice):
     speaker_embedding = get_speaker_embedding(voice_choice)
-    # Qoraalka kala saar sadarro (lines)
-    lines = [line.strip() for line in text.strip().split('\n') if line.strip()]
-    if not lines:
-        return None
     all_audios = []
-    for i, line in enumerate(lines):
-        normalized_text = normalize_text(line)
         inputs = processor(text=normalized_text, return_tensors="pt").to(device)
         with torch.no_grad():
             speech = model.generate(
                 input_ids=inputs["input_ids"],
@@ -127,18 +130,14 @@ def text_to_speech(text, voice_choice):
             audio = vocoder(speech).cpu()
         all_audios.append(audio)
-        # Ku dar nasasho 0.5 ilbiriqsi haddii aanu ahayn line-kii ugu dambeeyay
-        if i < len(lines) - 1:
-            pause_samples = torch.zeros((1, int(16000 * 0.5)))  # 0.5 seconds pause
-            all_audios.append(pause_samples)
-    # Isku dar dhammaan codadka
     final_audio = torch.cat(all_audios, dim=1)
     return (16000, final_audio.numpy())
-# --- Gradio Interface ---
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=[
@@ -155,7 +154,6 @@ iface = gr.Interface(
     description="Geli qoraal Soomaali ah, dooro cod, kadibna riix 'Submit' si aad u abuurto hadal."
 )
-# --- Launch ---
 if __name__ == "__main__":
     if not all(os.path.exists(f) for f in VOICE_SAMPLE_FILES):
         raise FileNotFoundError("Fadlan hubi inaad faylasha codka soo gelisay Space-ka.")

 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 VOICE_SAMPLE_FILES = ["1.wav"]  # Hubi in faylkan tayadiisu fiican tahay
 EMBEDDING_DIR = "speaker_embeddings"
 os.makedirs(EMBEDDING_DIR, exist_ok=True)
+# --- Load models ---
 try:
     print("Loading models...")
     processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
     except Exception as e:
         raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")
+# Number to words functions (as before) ...
 number_words = {
     0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
     6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
     60: "lixdan", 70: "toddobaatan", 80: "siddeetan", 90: "sagaashan",
     100: "boqol", 1000: "kun",
 }
 def number_to_words(n):
     if n in number_words:
         return number_words[n]
         return (number_to_words(n // 1_000_000) + " milyan" if n // 1_000_000 > 1 else "milyan") + (
             " iyo " + number_to_words(n % 1_000_000) if n % 1_000_000 else "")
     return str(n)
 def replace_numbers_with_words(text):
     return re.sub(r'\b\d+\b', lambda m: number_to_words(int(m.group())), text)
 def normalize_text(text):
     text = text.lower()
     text = replace_numbers_with_words(text)
     text = re.sub(r'[^\w\s\']', '', text)
     return text
+# **Jumladaha kala saar (split into sentences) function**
+def split_into_sentences(text):
+    # Qaar ka mid ah hababka fudud ee jumladaha kala saarista
+    sentence_endings = re.compile(r'(?<=[.!?])\s+')
+    sentences = sentence_endings.split(text)
+    # Haddii qoraalka uusan lahayn calaamadaha dhamaadka jumlada, iska hubi oo qaybi ereyo waaweyn
+    if len(sentences) == 1:
+        # Ku kala jar ereyo waaweyn maxaa yeelay lama helin calaamad
+        sentences = re.split(r'(?<=\.)\s+|(?<=\?)\s+|(?<=!)\s+', text)
+    # Nadiifi meelaha banaan iyo jumladaha madhan
+    sentences = [s.strip() for s in sentences if s.strip()]
+    return sentences
 def text_to_speech(text, voice_choice):
     if not text or not voice_choice:
         gr.Warning("Fadlan geli qoraal oo dooro cod.")
     speaker_embedding = get_speaker_embedding(voice_choice)
+    sentences = split_into_sentences(text)
     all_audios = []
+    for i, sentence in enumerate(sentences):
+        normalized_text = normalize_text(sentence)
         inputs = processor(text=normalized_text, return_tensors="pt").to(device)
         with torch.no_grad():
             speech = model.generate(
                 input_ids=inputs["input_ids"],
             audio = vocoder(speech).cpu()
         all_audios.append(audio)
+        # Nasasho 0.5 ilbiriqsi haddii uusan ahayn jumladii ugu dambeysay
+        if i < len(sentences) - 1:
+            pause = torch.zeros((1, int(16000 * 0.5)))  # 0.5 sec silence
+            all_audios.append(pause)
     final_audio = torch.cat(all_audios, dim=1)
     return (16000, final_audio.numpy())
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=[
     description="Geli qoraal Soomaali ah, dooro cod, kadibna riix 'Submit' si aad u abuurto hadal."
 )
 if __name__ == "__main__":
     if not all(os.path.exists(f) for f in VOICE_SAMPLE_FILES):
         raise FileNotFoundError("Fadlan hubi inaad faylasha codka soo gelisay Space-ka.")