Spaces:

Somalitts
/

8aad

Running

App Files Files Community

Somalitts commited on Jul 18

Commit

8204814

verified ·

1 Parent(s): 04dc157

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -24

app.py CHANGED Viewed

@@ -27,15 +27,6 @@ try:
         run_opts={"device": device},
         savedir=os.path.join("pretrained_models", "spkrec-xvect-voxceleb")
     )
-    # --- ISKU DAYGA HAGAAJINTA XAWAARAHA ---
-    # Waxaan isku dayeynaa inaan model-yada u diyaarinno xawaare dheereeya
-    if device == "cpu":
-        print("Optimizing models for CPU inference with JIT...")
-        model = torch.jit.script(model)
-        vocoder = torch.jit.script(vocoder.to(device)) # Hubi inuu ku jiro device saxda ah
-        print("JIT optimization applied.")
     print("Models loaded successfully.")
 except Exception as e:
     raise gr.Error(f"Error loading models: {e}.")
@@ -67,7 +58,7 @@ def get_speaker_embedding(wav_file_path):
     except Exception as e:
         raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")
-# --- Text Processing Functions (sidoodii) ---
 number_words = {
     0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
     6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
@@ -95,7 +86,6 @@ def normalize_text(text):
 # --- Main Text-to-Speech Function ---
 def text_to_speech(text, voice_choice):
     try:
-        print(f"Received request: Text='{text}', Voice='{voice_choice}'")
         if not text:
             gr.Warning("Please enter some text.")
             return None
@@ -103,27 +93,19 @@ def text_to_speech(text, voice_choice):
             gr.Warning("Please select a voice.")
             return None
-        print("Step 1: Getting speaker embedding...")
         speaker_embedding = get_speaker_embedding(voice_choice)
-        print("Step 2: Normalizing text...")
         normalized_text = normalize_text(text)
-        print("Step 3: Processing text with SpeechT5Processor...")
         inputs = processor(text=normalized_text, return_tensors="pt").to(device)
-        print("Step 4: Generating speech with model.generate()...")
         with torch.no_grad():
-            # Waxaan ka saareynaa 'do_sample' si aan u yareyno shaqada processor-ka
             speech = model.generate(
                 input_ids=inputs["input_ids"],
-                speaker_embeddings=speaker_embedding.unsqueeze(0)
             )
-            print("Step 5: Applying vocoder...")
-            # Isticmaalka JIT Vocoder
-            speech = vocoder(speech.to(device)) # Hubi inuu ku jiro device saxda ah
-        print("Step 6: Generation complete. Returning audio.")
         return (16000, speech.cpu().numpy())
     except Exception as e:
         print(f"AN ERROR OCCURRED: {e}")
@@ -153,7 +135,7 @@ if __name__ == "__main__":
         if not os.path.exists(f):
             raise FileNotFoundError(f"Voice file not found: '{f}'. Please upload it to your Space.")
-    print("Pre-loading all voice embeddings for faster startup...")
     for voice_file in VOICE_SAMPLE_FILES:
         get_speaker_embedding(voice_file)
     print("All voices are ready. Launching interface.")

         run_opts={"device": device},
         savedir=os.path.join("pretrained_models", "spkrec-xvect-voxceleb")
     )
     print("Models loaded successfully.")
 except Exception as e:
     raise gr.Error(f"Error loading models: {e}.")
     except Exception as e:
         raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")
+# --- Text Processing Functions ---
 number_words = {
     0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
     6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
 # --- Main Text-to-Speech Function ---
 def text_to_speech(text, voice_choice):
     try:
         if not text:
             gr.Warning("Please enter some text.")
             return None
             gr.Warning("Please select a voice.")
             return None
         speaker_embedding = get_speaker_embedding(voice_choice)
         normalized_text = normalize_text(text)
         inputs = processor(text=normalized_text, return_tensors="pt").to(device)
         with torch.no_grad():
             speech = model.generate(
                 input_ids=inputs["input_ids"],
+                speaker_embeddings=speaker_embedding.unsqueeze(0),
+                do_sample=True,
+                top_k=50,
             )
+            speech = vocoder(speech)
         return (16000, speech.cpu().numpy())
     except Exception as e:
         print(f"AN ERROR OCCURRED: {e}")
         if not os.path.exists(f):
             raise FileNotFoundError(f"Voice file not found: '{f}'. Please upload it to your Space.")
+    print("Pre-loading all voice embeddings...")
     for voice_file in VOICE_SAMPLE_FILES:
         get_speaker_embedding(voice_file)
     print("All voices are ready. Launching interface.")