Somalitts commited on
Commit
6ce5da6
·
verified ·
1 Parent(s): df3f293

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -12,7 +12,7 @@ from speechbrain.pretrained import EncoderClassifier
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  print(f"Using device: {device}")
14
 
15
- VOICE_SAMPLE_FILES = ["1.wav"] # Codka tusaale ahaan
16
  EMBEDDING_DIR = "speaker_embeddings"
17
  os.makedirs(EMBEDDING_DIR, exist_ok=True)
18
 
@@ -95,31 +95,33 @@ def normalize_text(text):
95
  text = re.sub(r'[^\w\s\']', '', text)
96
  return text
97
 
98
- # --- Helper to split text into sentences ---
99
- def split_into_sentences(text):
100
- sentence_endings = re.compile(r'(?<=[.!?])\s+')
101
- sentences = sentence_endings.split(text)
102
- return [s.strip() for s in sentences if s.strip()]
 
 
 
103
 
104
- # --- Main TTS function with pauses between sentences ---
105
- # --- Main TTS function with pause after each new line only ---
106
  def text_to_speech(text, voice_choice):
107
  if not text or not voice_choice:
108
  gr.Warning("Fadlan geli qoraal oo dooro cod.")
109
  return None
110
 
111
  speaker_embedding = get_speaker_embedding(voice_choice)
 
112
 
113
- paragraphs = text.strip().split("\n")
114
  audio_chunks = []
115
 
116
- for idx, para in enumerate(paragraphs):
117
- para = para.strip()
118
- if not para:
119
  continue
120
 
121
- norm_para = normalize_text(para)
122
- inputs = processor(text=norm_para, return_tensors="pt").to(device)
123
 
124
  with torch.no_grad():
125
  speech = model.generate(
@@ -135,8 +137,8 @@ def text_to_speech(text, voice_choice):
135
 
136
  audio_chunks.append(audio)
137
 
138
- # Pause after each paragraph (new line)
139
- if idx < len(paragraphs) - 1:
140
  pause = np.zeros(int(16000 * 0.8)) # 0.8s pause
141
  audio_chunks.append(pause)
142
 
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  print(f"Using device: {device}")
14
 
15
+ VOICE_SAMPLE_FILES = ["1.wav"]
16
  EMBEDDING_DIR = "speaker_embeddings"
17
  os.makedirs(EMBEDDING_DIR, exist_ok=True)
18
 
 
95
  text = re.sub(r'[^\w\s\']', '', text)
96
  return text
97
 
98
+ # --- Split long text into chunks by word count ---
99
+ def split_long_text_into_chunks(text, max_words=18):
100
+ words = text.split()
101
+ chunks = []
102
+ for i in range(0, len(words), max_words):
103
+ chunk = ' '.join(words[i:i + max_words])
104
+ chunks.append(chunk)
105
+ return chunks
106
 
107
+ # --- Main TTS function ---
 
108
  def text_to_speech(text, voice_choice):
109
  if not text or not voice_choice:
110
  gr.Warning("Fadlan geli qoraal oo dooro cod.")
111
  return None
112
 
113
  speaker_embedding = get_speaker_embedding(voice_choice)
114
+ text_chunks = split_long_text_into_chunks(text)
115
 
 
116
  audio_chunks = []
117
 
118
+ for idx, chunk in enumerate(text_chunks):
119
+ chunk = chunk.strip()
120
+ if not chunk:
121
  continue
122
 
123
+ norm_chunk = normalize_text(chunk)
124
+ inputs = processor(text=norm_chunk, return_tensors="pt").to(device)
125
 
126
  with torch.no_grad():
127
  speech = model.generate(
 
137
 
138
  audio_chunks.append(audio)
139
 
140
+ # Pause after each chunk
141
+ if idx < len(text_chunks) - 1:
142
  pause = np.zeros(int(16000 * 0.8)) # 0.8s pause
143
  audio_chunks.append(pause)
144