Spaces:

Somalitts
/

8aad

Running

App Files Files Community

Somalitts commited on Jul 18

Commit

43ec985

verified ·

1 Parent(s): 2342a7b

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -121

app.py CHANGED Viewed

@@ -1,150 +1,88 @@
-import os
-import re
-import uuid
 import torch
 import torchaudio
-import soundfile as sf
-import gradio as gr
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
-from speechbrain.inference.speaker import EncoderClassifier
 # --- Configuration ---
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# --- TALLAABADA 1: KU DAR MAGACYADA FAYLASHAADA CODADKA HAKAAN ---
-# Hubi in faylashan ay ku jiraan isla galka uu koodhkani ku jiro.
-# Ku beddel magacyadan kuwaaga dhabta ah. Waa inay noqdaan faylal .wav ah.
-VOICE_SAMPLE_FILES = ["1.wav", "90.wav"]
-# Meelaha lagu keydinayo faylasha ku meel gaarka ah
-CACHE_DIR = "hf_cache"
-SPEAKER_EMBEDDING_DIR = "speaker_embeddings"
-os.makedirs(CACHE_DIR, exist_ok=True)
-os.makedirs(SPEAKER_EMBEDDING_DIR, exist_ok=True)
-# --- Soo Dejinta Model-yada ---
 try:
-    print("Waxaa la soo dejinayaa model-yada...")
-    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts", cache_dir=CACHE_DIR)
-    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan", cache_dir=CACHE_DIR).to(device)
-    # Magaca model-ka waxaan ka beddelnay 'model_female' oo ka dhignay 'model' maadaama uu hadda codad kala duwan isticmaalayo
-    model = SpeechT5ForTextToSpeech.from_pretrained("Somalitts/8aad", cache_dir=CACHE_DIR).to(device)
     speaker_model = EncoderClassifier.from_hparams(
         source="speechbrain/spkrec-xvect-voxceleb",
         run_opts={"device": device},
-        savedir=os.path.join(CACHE_DIR, "spk_model")
     )
-    print("Model-yadii waa diyaar.")
 except Exception as e:
-    raise gr.Error(f"Cillad ayaa ka timid soo dejinta model-yada: {e}. Hubi internet-kaaga.")
-# --- Shaqada Abuurista Astaanta Codka (Speaker Embedding) ---
-def get_speaker_embedding(wav_file_path):
-    """
-    Shaqadan waxay soo saaraysaa "astaanta codka" (speaker embedding)
-    haddii aysan jirin, way abuuraysaa oo keydinaysaa si aan mar dambe loo sugin.
-    """
-    embedding_path = os.path.join(SPEAKER_EMBEDDING_DIR, f"{os.path.basename(wav_file_path)}.pt")
     if os.path.exists(embedding_path):
-        return torch.load(embedding_path, map_location=device)
     if not os.path.exists(wav_file_path):
-        raise gr.Error(f"Lama helin faylka codka: {wav_file_path}. Hubi inuu ku jiro galka saxda ah oo magaca si sax ah u qortay.")
-    print(f"Waxaa la abuurayaa astaan cod oo cusub: {wav_file_path}")
-    audio, sr = torchaudio.load(wav_file_path)
-    if sr != 16000:
-        audio = torchaudio.functional.resample(audio, sr, 16000)
-    audio = audio.mean(dim=0).unsqueeze(0).to(device)
-    with torch.no_grad():
-        embedding = speaker_model.encode_batch(audio)
-        embedding = torch.nn.functional.normalize(embedding, dim=2).squeeze()
-    torch.save(embedding.cpu(), embedding_path)
-    return embedding
-# --- Hagaajinta Qoraalka (Text Normalization) ---
-number_words = {
-    0: "eber", 1: "koow", 2: "labo", 3: "seddex", 4: "afar", 5: "shan",
-    6: "lix", 7: "todobo", 8: "sideed", 9: "sagaal", 10: "toban",
-    20: "labaatan", 30: "sodon", 40: "afartan", 50: "konton",
-    60: "lixdan", 70: "todobaatan", 80: "sideetan", 90: "sagaashan",
-    100: "boqol", 1000: "kun"
-}
-def number_to_words(n):
-    if n < 20: return number_words.get(n, str(n))
-    if n < 100:
-        tens, unit = divmod(n, 10)
-        return number_words[tens * 10] + (" iyo " + number_words[unit] if unit else "")
-    if n < 1000:
-        hundreds, rem = divmod(n, 100)
-        return (number_words[hundreds] + " boqol" if hundreds > 1 else "boqol") + (" iyo " + number_to_words(rem) if rem else "")
-    if n < 1_000_000:
-        th, rem = divmod(n, 1000)
-        return (number_to_words(th) + " kun") + (" iyo " + number_to_words(rem) if rem else "")
-    return str(n)
-def replace_numbers_with_words(text):
-    return re.sub(r'\b\d+\b', lambda m: number_to_words(int(m.group())), text)
-def normalize_text(text):
-    text = text.lower()
-    text = replace_numbers_with_words(text)
-    text = re.sub(r'[^\w\s\']', '', text)
-    return text
-# --- Shaqada ugu Muhiimsan (TTS Function) ---
 def text_to_speech(text, voice_choice):
-    """ Hadda shaqadani waxay qaadanaysaa qoraalka iyo codka la doortay """
-    if not text or not voice_choice:
-        gr.Warning("Fadlan geli qoraal oo dooro cod.")
-        return None
-    # Soo qaado astaanta codka la doortay
-    speaker_embedding = get_speaker_embedding(voice_choice)
-    clean_text = normalize_text(text)
-    inputs = processor(text=clean_text, return_tensors="pt").to(device)
-    with torch.no_grad():
-        waveform = model.generate_speech(inputs["input_ids"], speaker_embedding.unsqueeze(0), vocoder=vocoder)
-    # Si ku meel gaar ah u keydi faylka codka la abuuray
-    os.makedirs("/tmp/tts_outputs", exist_ok=True)
-    out_path = f"/tmp/tts_outputs/{uuid.uuid4().hex}.wav"
-    sf.write(out_path, waveform.cpu().numpy(), 16000)
-    return out_path
-# --- Interface-ka Gradio ---
-# Hadda wuxuu leeyahay meel qoraalka la geliyo iyo meel codka laga doorto
 iface = gr.Interface(
-    fn=text_to_speech,
-    inputs=[
-        gr.Textbox(label="Geli qoraalka af Soomaali"),
-        gr.Dropdown(
-            choices=VOICE_SAMPLE_FILES,
-            label="Dooro Codkaaga (Select Your Voice)",
-            value=VOICE_SAMPLE_FILES[0] if VOICE_SAMPLE_FILES else None,
-            info="Dooro mid ka mid ah codadkaaga aad diyaarisay."
-        )
-    ],
-    outputs=gr.Audio(label="Codka La Abuuray", type="filepath"),
-    title="Soomaali Text-to-Speech (Codad Kala Duwan)",
-    description="Ku qor qoraal Soomaali ah, dooro codka aad rabto, kadibna riix 'Submit' si aad cod ugu dhageysato."
 )
-# Diyaari codadka ka hor inta aan barnaamijka la furin
 if __name__ == "__main__":
     print("Hubinta faylasha codadka...")
-    if not VOICE_SAMPLE_FILES or not all(os.path.exists(f) for f in VOICE_SAMPLE_FILES):
-         raise FileNotFoundError("Mid ka mid ah faylasha ku jira 'VOICE_SAMPLE_FILES' lama helin. Fadlan hubi magacyada iyo meesha ay ku jiraan.")
-    print("Diyaarinta codadkaaga...")
-    for voice in VOICE_SAMPLE_FILES:
-        get_speaker_embedding(voice) # Tani waxay abuuraysaa astaamaha codka haddii aysan jirin
-    print("Dhammaan waa diyaar. Barnaamijku wuu furmayaa.")
     iface.launch(share=True)

+import gradio as gr
 import torch
 import torchaudio
+import re
+import os
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
+from speechbrain.pretrained import EncoderClassifier
+import numpy as np
 # --- Configuration ---
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# --- HUBI INAAD SOO GELISAY FAYLASHAN ---
+# Faylashan waa inay ku jiraan Hugging Face Spaces, isla galka uu ku jiro "app.py"
+VOICE_SAMPLE_FILES = ["1.wav"]
+# Directory to store speaker embedding files
+EMBEDDING_DIR = "speaker_embeddings"
+os.makedirs(EMBEDDING_DIR, exist_ok=True)
+# --- Load Models ---
 try:
+    print("Loading models... This may take a moment.")
+    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+    model = SpeechT5ForTextToSpeech.from_pretrained("Somalitts/8aad").to(device)
+    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
     speaker_model = EncoderClassifier.from_hparams(
         source="speechbrain/spkrec-xvect-voxceleb",
         run_opts={"device": device},
+        savedir=os.path.join("pretrained_models", "spkrec-xvect-voxceleb")
     )
+    print("Models loaded successfully.")
 except Exception as e:
+    raise gr.Error(f"Error loading models: {e}. Check your internet connection.")
+speaker_embeddings_cache = {}
+def get_speaker_embedding(wav_file_path):
+    if wav_file_path in speaker_embeddings_cache:
+        return speaker_embeddings_cache[wav_file_path]
+    embedding_path = os.path.join(EMBEDDING_DIR, f"{os.path.basename(wav_file_path)}.pt")
     if os.path.exists(embedding_path):
+        embedding = torch.load(embedding_path, map_location=device)
+        speaker_embeddings_cache[wav_file_path] = embedding
+        return embedding
     if not os.path.exists(wav_file_path):
+        # Kani waa qaladka dhacay. Markaad faylasha soo geliso, meeshan wuu ka gudbayaa.
+        raise FileNotFoundError(f"Lama helin faylka codka: {wav_file_path}")
+    try:
+        audio, sr = torchaudio.load(wav_file_path)
+        if sr != 16000: audio = torchaudio.functional.resample(audio, sr, 16000)
+        if audio.shape[0] > 1: audio = torch.mean(audio, dim=0, keepdim=True)
+        with torch.no_grad():
+            embedding = speaker_model.encode_batch(audio.to(device))
+            embedding = torch.nn.functional.normalize(embedding, dim=2).squeeze()
+        torch.save(embedding.cpu(), embedding_path)
+        speaker_embeddings_cache[wav_file_path] = embedding.to(device)
+        return embedding.to(device)
+    except Exception as e:
+        raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")
+# ... (Inta kale ee koodhka way saxantahay) ...
+# --- Main Text-to-Speech Function ---
 def text_to_speech(text, voice_choice):
+    # ... (sidaadii hore) ...
+    pass # Koodhka intiisa kale halkan geli
+# --- Gradio Interface ---
 iface = gr.Interface(
+    # ... (sidaadii hore) ...
+    pass # Koodhka intiisa kale halkan geli
 )
+# --- Launch the web interface ---
 if __name__ == "__main__":
     print("Hubinta faylasha codadka...")
+    for f in VOICE_SAMPLE_FILES:
+        if not os.path.exists(f):
+            # Qaladku halkan ayuu ka bilaabmayaa
+            raise FileNotFoundError(f"Mid ka mid ah faylasha lama helin: '{f}'. Fadlan hubi inaad soo gelisay Hugging Face Spaces.")
+    print("Diyaarinta astaamaha codadka...")
+    for voice_file in VOICE_SAMPLE_FILES:
+        get_speaker_embedding(voice_file)
+    print("Dhammaan codadka waa diyaar. Waxaa la furayaa interface-ka.")
     iface.launch(share=True)