Somalitts commited on
Commit
df3f293
·
verified ·
1 Parent(s): c872044

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -29
app.py CHANGED
@@ -102,6 +102,7 @@ def split_into_sentences(text):
102
  return [s.strip() for s in sentences if s.strip()]
103
 
104
  # --- Main TTS function with pauses between sentences ---
 
105
  def text_to_speech(text, voice_choice):
106
  if not text or not voice_choice:
107
  gr.Warning("Fadlan geli qoraal oo dooro cod.")
@@ -112,38 +113,32 @@ def text_to_speech(text, voice_choice):
112
  paragraphs = text.strip().split("\n")
113
  audio_chunks = []
114
 
115
- for para in paragraphs:
116
  para = para.strip()
117
  if not para:
118
  continue
119
- sentences = split_into_sentences(para)
120
-
121
- for idx, sentence in enumerate(sentences):
122
- norm_sentence = normalize_text(sentence)
123
- inputs = processor(text=norm_sentence, return_tensors="pt").to(device)
124
-
125
- with torch.no_grad():
126
- speech = model.generate(
127
- input_ids=inputs["input_ids"],
128
- speaker_embeddings=speaker_embedding.unsqueeze(0),
129
- do_sample=True,
130
- top_k=50,
131
- temperature=0.75,
132
- repetition_penalty=1.2,
133
- max_new_tokens=512
134
- )
135
- audio = vocoder(speech).cpu().squeeze().numpy()
136
-
137
- audio_chunks.append(audio)
138
-
139
- # Pause 0.5 sec between sentences (not after last)
140
- if idx < len(sentences) - 1:
141
- pause = np.zeros(int(16000 * 0.5))
142
- audio_chunks.append(pause)
143
-
144
- # Pause 0.8 sec between paragraphs (optional)
145
- pause_para = np.zeros(int(16000 * 0.8))
146
- audio_chunks.append(pause_para)
147
 
148
  final_audio = np.concatenate(audio_chunks)
149
  return (16000, final_audio)
 
102
  return [s.strip() for s in sentences if s.strip()]
103
 
104
  # --- Main TTS function with pauses between sentences ---
105
+ # --- Main TTS function with pause after each new line only ---
106
  def text_to_speech(text, voice_choice):
107
  if not text or not voice_choice:
108
  gr.Warning("Fadlan geli qoraal oo dooro cod.")
 
113
  paragraphs = text.strip().split("\n")
114
  audio_chunks = []
115
 
116
+ for idx, para in enumerate(paragraphs):
117
  para = para.strip()
118
  if not para:
119
  continue
120
+
121
+ norm_para = normalize_text(para)
122
+ inputs = processor(text=norm_para, return_tensors="pt").to(device)
123
+
124
+ with torch.no_grad():
125
+ speech = model.generate(
126
+ input_ids=inputs["input_ids"],
127
+ speaker_embeddings=speaker_embedding.unsqueeze(0),
128
+ do_sample=True,
129
+ top_k=50,
130
+ temperature=0.75,
131
+ repetition_penalty=1.2,
132
+ max_new_tokens=512
133
+ )
134
+ audio = vocoder(speech).cpu().squeeze().numpy()
135
+
136
+ audio_chunks.append(audio)
137
+
138
+ # Pause after each paragraph (new line)
139
+ if idx < len(paragraphs) - 1:
140
+ pause = np.zeros(int(16000 * 0.8)) # 0.8s pause
141
+ audio_chunks.append(pause)
 
 
 
 
 
 
142
 
143
  final_audio = np.concatenate(audio_chunks)
144
  return (16000, final_audio)