Somalitts commited on
Commit
8204814
·
verified ·
1 Parent(s): 04dc157

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -24
app.py CHANGED
@@ -27,15 +27,6 @@ try:
27
  run_opts={"device": device},
28
  savedir=os.path.join("pretrained_models", "spkrec-xvect-voxceleb")
29
  )
30
-
31
- # --- ISKU DAYGA HAGAAJINTA XAWAARAHA ---
32
- # Waxaan isku dayeynaa inaan model-yada u diyaarinno xawaare dheereeya
33
- if device == "cpu":
34
- print("Optimizing models for CPU inference with JIT...")
35
- model = torch.jit.script(model)
36
- vocoder = torch.jit.script(vocoder.to(device)) # Hubi inuu ku jiro device saxda ah
37
- print("JIT optimization applied.")
38
-
39
  print("Models loaded successfully.")
40
  except Exception as e:
41
  raise gr.Error(f"Error loading models: {e}.")
@@ -67,7 +58,7 @@ def get_speaker_embedding(wav_file_path):
67
  except Exception as e:
68
  raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")
69
 
70
- # --- Text Processing Functions (sidoodii) ---
71
  number_words = {
72
  0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
73
  6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
@@ -95,7 +86,6 @@ def normalize_text(text):
95
  # --- Main Text-to-Speech Function ---
96
  def text_to_speech(text, voice_choice):
97
  try:
98
- print(f"Received request: Text='{text}', Voice='{voice_choice}'")
99
  if not text:
100
  gr.Warning("Please enter some text.")
101
  return None
@@ -103,27 +93,19 @@ def text_to_speech(text, voice_choice):
103
  gr.Warning("Please select a voice.")
104
  return None
105
 
106
- print("Step 1: Getting speaker embedding...")
107
  speaker_embedding = get_speaker_embedding(voice_choice)
108
-
109
- print("Step 2: Normalizing text...")
110
  normalized_text = normalize_text(text)
111
-
112
- print("Step 3: Processing text with SpeechT5Processor...")
113
  inputs = processor(text=normalized_text, return_tensors="pt").to(device)
114
 
115
- print("Step 4: Generating speech with model.generate()...")
116
  with torch.no_grad():
117
- # Waxaan ka saareynaa 'do_sample' si aan u yareyno shaqada processor-ka
118
  speech = model.generate(
119
  input_ids=inputs["input_ids"],
120
- speaker_embeddings=speaker_embedding.unsqueeze(0)
 
 
121
  )
122
- print("Step 5: Applying vocoder...")
123
- # Isticmaalka JIT Vocoder
124
- speech = vocoder(speech.to(device)) # Hubi inuu ku jiro device saxda ah
125
 
126
- print("Step 6: Generation complete. Returning audio.")
127
  return (16000, speech.cpu().numpy())
128
  except Exception as e:
129
  print(f"AN ERROR OCCURRED: {e}")
@@ -153,7 +135,7 @@ if __name__ == "__main__":
153
  if not os.path.exists(f):
154
  raise FileNotFoundError(f"Voice file not found: '{f}'. Please upload it to your Space.")
155
 
156
- print("Pre-loading all voice embeddings for faster startup...")
157
  for voice_file in VOICE_SAMPLE_FILES:
158
  get_speaker_embedding(voice_file)
159
  print("All voices are ready. Launching interface.")
 
27
  run_opts={"device": device},
28
  savedir=os.path.join("pretrained_models", "spkrec-xvect-voxceleb")
29
  )
 
 
 
 
 
 
 
 
 
30
  print("Models loaded successfully.")
31
  except Exception as e:
32
  raise gr.Error(f"Error loading models: {e}.")
 
58
  except Exception as e:
59
  raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")
60
 
61
+ # --- Text Processing Functions ---
62
  number_words = {
63
  0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
64
  6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
 
86
  # --- Main Text-to-Speech Function ---
87
  def text_to_speech(text, voice_choice):
88
  try:
 
89
  if not text:
90
  gr.Warning("Please enter some text.")
91
  return None
 
93
  gr.Warning("Please select a voice.")
94
  return None
95
 
 
96
  speaker_embedding = get_speaker_embedding(voice_choice)
 
 
97
  normalized_text = normalize_text(text)
 
 
98
  inputs = processor(text=normalized_text, return_tensors="pt").to(device)
99
 
 
100
  with torch.no_grad():
 
101
  speech = model.generate(
102
  input_ids=inputs["input_ids"],
103
+ speaker_embeddings=speaker_embedding.unsqueeze(0),
104
+ do_sample=True,
105
+ top_k=50,
106
  )
107
+ speech = vocoder(speech)
 
 
108
 
 
109
  return (16000, speech.cpu().numpy())
110
  except Exception as e:
111
  print(f"AN ERROR OCCURRED: {e}")
 
135
  if not os.path.exists(f):
136
  raise FileNotFoundError(f"Voice file not found: '{f}'. Please upload it to your Space.")
137
 
138
+ print("Pre-loading all voice embeddings...")
139
  for voice_file in VOICE_SAMPLE_FILES:
140
  get_speaker_embedding(voice_file)
141
  print("All voices are ready. Launching interface.")