Somalitts commited on
Commit
f685632
·
verified ·
1 Parent(s): 1229011

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -18
app.py CHANGED
@@ -12,7 +12,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
12
  print(f"Using device: {device}")
13
 
14
  # --- KU DAR FAYLKA CODADKAAGA ---
15
- VOICE_SAMPLE_FILES = ["1.wav"]
16
  EMBEDDING_DIR = "speaker_embeddings"
17
  os.makedirs(EMBEDDING_DIR, exist_ok=True)
18
 
@@ -92,26 +92,27 @@ def replace_numbers_with_words(text):
92
  def normalize_text(text):
93
  text = text.lower()
94
  text = replace_numbers_with_words(text)
95
- text = re.sub(r'[^\w\s\'.!?]', '', text) # Ha tirtirin calaamadaha muhiimka ah
96
  return text
97
 
98
- # --- Main TTS Function with Pause ---
99
  def text_to_speech(text, voice_choice):
100
  if not text or not voice_choice:
101
  gr.Warning("Fadlan geli qoraal oo dooro cod.")
102
  return None
103
 
104
  speaker_embedding = get_speaker_embedding(voice_choice)
105
- normalized_text = normalize_text(text)
106
 
107
- # Kala qaybi jumladaha
108
- lines = re.split(r'(?<=[.!?])\s+', normalized_text.strip())
109
- full_audio = []
 
 
 
110
 
111
- for line in lines:
112
- if not line.strip():
113
- continue
114
- inputs = processor(text=line, return_tensors="pt").to(device)
115
 
116
  with torch.no_grad():
117
  speech = model.generate(
@@ -123,20 +124,25 @@ def text_to_speech(text, voice_choice):
123
  repetition_penalty=1.2,
124
  max_new_tokens=512
125
  )
126
- audio_chunk = vocoder(speech).cpu().numpy()
127
- full_audio.append(audio_chunk)
 
 
 
 
 
 
128
 
129
- # Nasasho 0.5 ilbiriqsi u dhaxeysa
130
- pause = np.zeros((1, 16000 // 2), dtype=np.float32)
131
- full_audio.append(pause)
132
 
133
- return (16000, np.concatenate(full_audio, axis=-1))
134
 
135
  # --- Gradio Interface ---
136
  iface = gr.Interface(
137
  fn=text_to_speech,
138
  inputs=[
139
- gr.Textbox(label="Geli qoraalka af-Soomaaliga (Enter Somali Text)"),
140
  gr.Dropdown(
141
  VOICE_SAMPLE_FILES,
142
  label="Select Voice",
 
12
  print(f"Using device: {device}")
13
 
14
  # --- KU DAR FAYLKA CODADKAAGA ---
15
+ VOICE_SAMPLE_FILES = ["1.wav"] # Hubi in faylkan tayadiisu fiican tahay
16
  EMBEDDING_DIR = "speaker_embeddings"
17
  os.makedirs(EMBEDDING_DIR, exist_ok=True)
18
 
 
92
  def normalize_text(text):
93
  text = text.lower()
94
  text = replace_numbers_with_words(text)
95
+ text = re.sub(r'[^\w\s\']', '', text)
96
  return text
97
 
98
+ # --- Main Text-to-Speech Function with pause between lines ---
99
  def text_to_speech(text, voice_choice):
100
  if not text or not voice_choice:
101
  gr.Warning("Fadlan geli qoraal oo dooro cod.")
102
  return None
103
 
104
  speaker_embedding = get_speaker_embedding(voice_choice)
 
105
 
106
+ # Qoraalka kala saar sadarro (lines)
107
+ lines = [line.strip() for line in text.strip().split('\n') if line.strip()]
108
+ if not lines:
109
+ return None
110
+
111
+ all_audios = []
112
 
113
+ for i, line in enumerate(lines):
114
+ normalized_text = normalize_text(line)
115
+ inputs = processor(text=normalized_text, return_tensors="pt").to(device)
 
116
 
117
  with torch.no_grad():
118
  speech = model.generate(
 
124
  repetition_penalty=1.2,
125
  max_new_tokens=512
126
  )
127
+ audio = vocoder(speech).cpu()
128
+
129
+ all_audios.append(audio)
130
+
131
+ # Ku dar nasasho 0.5 ilbiriqsi haddii aanu ahayn line-kii ugu dambeeyay
132
+ if i < len(lines) - 1:
133
+ pause_samples = torch.zeros((1, int(16000 * 0.5))) # 0.5 seconds pause
134
+ all_audios.append(pause_samples)
135
 
136
+ # Isku dar dhammaan codadka
137
+ final_audio = torch.cat(all_audios, dim=1)
 
138
 
139
+ return (16000, final_audio.numpy())
140
 
141
  # --- Gradio Interface ---
142
  iface = gr.Interface(
143
  fn=text_to_speech,
144
  inputs=[
145
+ gr.Textbox(label="Geli qoraalka af-Soomaaliga (Enter Somali Text)", lines=7, placeholder="Qoraalka geli halkan..."),
146
  gr.Dropdown(
147
  VOICE_SAMPLE_FILES,
148
  label="Select Voice",