Spaces:

Somalitts
/

8aad

Running

App Files Files Community

Somalitts commited on Jul 20

Commit

f685632

verified ·

1 Parent(s): 1229011

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -18

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 # --- KU DAR FAYLKA CODADKAAGA ---
-VOICE_SAMPLE_FILES = ["1.wav"]
 EMBEDDING_DIR = "speaker_embeddings"
 os.makedirs(EMBEDDING_DIR, exist_ok=True)
@@ -92,26 +92,27 @@ def replace_numbers_with_words(text):
 def normalize_text(text):
     text = text.lower()
     text = replace_numbers_with_words(text)
-    text = re.sub(r'[^\w\s\'.!?]', '', text)  # Ha tirtirin calaamadaha muhiimka ah
     return text
-# --- Main TTS Function with Pause ---
 def text_to_speech(text, voice_choice):
     if not text or not voice_choice:
         gr.Warning("Fadlan geli qoraal oo dooro cod.")
         return None
     speaker_embedding = get_speaker_embedding(voice_choice)
-    normalized_text = normalize_text(text)
-    # Kala qaybi jumladaha
-    lines = re.split(r'(?<=[.!?])\s+', normalized_text.strip())
-    full_audio = []
-    for line in lines:
-        if not line.strip():
-            continue
-        inputs = processor(text=line, return_tensors="pt").to(device)
         with torch.no_grad():
             speech = model.generate(
@@ -123,20 +124,25 @@ def text_to_speech(text, voice_choice):
                 repetition_penalty=1.2,
                 max_new_tokens=512
             )
-            audio_chunk = vocoder(speech).cpu().numpy()
-            full_audio.append(audio_chunk)
-        # Nasasho 0.5 ilbiriqsi u dhaxeysa
-        pause = np.zeros((1, 16000 // 2), dtype=np.float32)
-        full_audio.append(pause)
-    return (16000, np.concatenate(full_audio, axis=-1))
 # --- Gradio Interface ---
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=[
-        gr.Textbox(label="Geli qoraalka af-Soomaaliga (Enter Somali Text)"),
         gr.Dropdown(
             VOICE_SAMPLE_FILES,
             label="Select Voice",

 print(f"Using device: {device}")
 # --- KU DAR FAYLKA CODADKAAGA ---
+VOICE_SAMPLE_FILES = ["1.wav"]  # Hubi in faylkan tayadiisu fiican tahay
 EMBEDDING_DIR = "speaker_embeddings"
 os.makedirs(EMBEDDING_DIR, exist_ok=True)
 def normalize_text(text):
     text = text.lower()
     text = replace_numbers_with_words(text)
+    text = re.sub(r'[^\w\s\']', '', text)
     return text
+# --- Main Text-to-Speech Function with pause between lines ---
 def text_to_speech(text, voice_choice):
     if not text or not voice_choice:
         gr.Warning("Fadlan geli qoraal oo dooro cod.")
         return None
     speaker_embedding = get_speaker_embedding(voice_choice)
+    # Qoraalka kala saar sadarro (lines)
+    lines = [line.strip() for line in text.strip().split('\n') if line.strip()]
+    if not lines:
+        return None
+    all_audios = []
+    for i, line in enumerate(lines):
+        normalized_text = normalize_text(line)
+        inputs = processor(text=normalized_text, return_tensors="pt").to(device)
         with torch.no_grad():
             speech = model.generate(
                 repetition_penalty=1.2,
                 max_new_tokens=512
             )
+            audio = vocoder(speech).cpu()
+        all_audios.append(audio)
+        # Ku dar nasasho 0.5 ilbiriqsi haddii aanu ahayn line-kii ugu dambeeyay
+        if i < len(lines) - 1:
+            pause_samples = torch.zeros((1, int(16000 * 0.5)))  # 0.5 seconds pause
+            all_audios.append(pause_samples)
+    # Isku dar dhammaan codadka
+    final_audio = torch.cat(all_audios, dim=1)
+    return (16000, final_audio.numpy())
 # --- Gradio Interface ---
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=[
+        gr.Textbox(label="Geli qoraalka af-Soomaaliga (Enter Somali Text)", lines=7, placeholder="Qoraalka geli halkan..."),
         gr.Dropdown(
             VOICE_SAMPLE_FILES,
             label="Select Voice",