Somalitts commited on
Commit
5a3bbd1
·
verified ·
1 Parent(s): f685632

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -24
app.py CHANGED
@@ -11,12 +11,11 @@ import numpy as np
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  print(f"Using device: {device}")
13
 
14
- # --- KU DAR FAYLKA CODADKAAGA ---
15
  VOICE_SAMPLE_FILES = ["1.wav"] # Hubi in faylkan tayadiisu fiican tahay
16
  EMBEDDING_DIR = "speaker_embeddings"
17
  os.makedirs(EMBEDDING_DIR, exist_ok=True)
18
 
19
- # --- Soo Dejinta Model-yada ---
20
  try:
21
  print("Loading models...")
22
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
@@ -58,7 +57,7 @@ def get_speaker_embedding(wav_file_path):
58
  except Exception as e:
59
  raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")
60
 
61
- # --- Number Handling Functions ---
62
  number_words = {
63
  0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
64
  6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
@@ -69,7 +68,6 @@ number_words = {
69
  60: "lixdan", 70: "toddobaatan", 80: "siddeetan", 90: "sagaashan",
70
  100: "boqol", 1000: "kun",
71
  }
72
-
73
  def number_to_words(n):
74
  if n in number_words:
75
  return number_words[n]
@@ -85,17 +83,27 @@ def number_to_words(n):
85
  return (number_to_words(n // 1_000_000) + " milyan" if n // 1_000_000 > 1 else "milyan") + (
86
  " iyo " + number_to_words(n % 1_000_000) if n % 1_000_000 else "")
87
  return str(n)
88
-
89
  def replace_numbers_with_words(text):
90
  return re.sub(r'\b\d+\b', lambda m: number_to_words(int(m.group())), text)
91
-
92
  def normalize_text(text):
93
  text = text.lower()
94
  text = replace_numbers_with_words(text)
95
  text = re.sub(r'[^\w\s\']', '', text)
96
  return text
97
 
98
- # --- Main Text-to-Speech Function with pause between lines ---
 
 
 
 
 
 
 
 
 
 
 
 
99
  def text_to_speech(text, voice_choice):
100
  if not text or not voice_choice:
101
  gr.Warning("Fadlan geli qoraal oo dooro cod.")
@@ -103,17 +111,12 @@ def text_to_speech(text, voice_choice):
103
 
104
  speaker_embedding = get_speaker_embedding(voice_choice)
105
 
106
- # Qoraalka kala saar sadarro (lines)
107
- lines = [line.strip() for line in text.strip().split('\n') if line.strip()]
108
- if not lines:
109
- return None
110
 
111
  all_audios = []
112
-
113
- for i, line in enumerate(lines):
114
- normalized_text = normalize_text(line)
115
  inputs = processor(text=normalized_text, return_tensors="pt").to(device)
116
-
117
  with torch.no_grad():
118
  speech = model.generate(
119
  input_ids=inputs["input_ids"],
@@ -127,18 +130,14 @@ def text_to_speech(text, voice_choice):
127
  audio = vocoder(speech).cpu()
128
 
129
  all_audios.append(audio)
 
 
 
 
130
 
131
- # Ku dar nasasho 0.5 ilbiriqsi haddii aanu ahayn line-kii ugu dambeeyay
132
- if i < len(lines) - 1:
133
- pause_samples = torch.zeros((1, int(16000 * 0.5))) # 0.5 seconds pause
134
- all_audios.append(pause_samples)
135
-
136
- # Isku dar dhammaan codadka
137
  final_audio = torch.cat(all_audios, dim=1)
138
-
139
  return (16000, final_audio.numpy())
140
 
141
- # --- Gradio Interface ---
142
  iface = gr.Interface(
143
  fn=text_to_speech,
144
  inputs=[
@@ -155,7 +154,6 @@ iface = gr.Interface(
155
  description="Geli qoraal Soomaali ah, dooro cod, kadibna riix 'Submit' si aad u abuurto hadal."
156
  )
157
 
158
- # --- Launch ---
159
  if __name__ == "__main__":
160
  if not all(os.path.exists(f) for f in VOICE_SAMPLE_FILES):
161
  raise FileNotFoundError("Fadlan hubi inaad faylasha codka soo gelisay Space-ka.")
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  print(f"Using device: {device}")
13
 
 
14
  VOICE_SAMPLE_FILES = ["1.wav"] # Hubi in faylkan tayadiisu fiican tahay
15
  EMBEDDING_DIR = "speaker_embeddings"
16
  os.makedirs(EMBEDDING_DIR, exist_ok=True)
17
 
18
+ # --- Load models ---
19
  try:
20
  print("Loading models...")
21
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
 
57
  except Exception as e:
58
  raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")
59
 
60
+ # Number to words functions (as before) ...
61
  number_words = {
62
  0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
63
  6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
 
68
  60: "lixdan", 70: "toddobaatan", 80: "siddeetan", 90: "sagaashan",
69
  100: "boqol", 1000: "kun",
70
  }
 
71
  def number_to_words(n):
72
  if n in number_words:
73
  return number_words[n]
 
83
  return (number_to_words(n // 1_000_000) + " milyan" if n // 1_000_000 > 1 else "milyan") + (
84
  " iyo " + number_to_words(n % 1_000_000) if n % 1_000_000 else "")
85
  return str(n)
 
86
  def replace_numbers_with_words(text):
87
  return re.sub(r'\b\d+\b', lambda m: number_to_words(int(m.group())), text)
 
88
  def normalize_text(text):
89
  text = text.lower()
90
  text = replace_numbers_with_words(text)
91
  text = re.sub(r'[^\w\s\']', '', text)
92
  return text
93
 
94
+ # **Jumladaha kala saar (split into sentences) function**
95
+ def split_into_sentences(text):
96
+ # Qaar ka mid ah hababka fudud ee jumladaha kala saarista
97
+ sentence_endings = re.compile(r'(?<=[.!?])\s+')
98
+ sentences = sentence_endings.split(text)
99
+ # Haddii qoraalka uusan lahayn calaamadaha dhamaadka jumlada, iska hubi oo qaybi ereyo waaweyn
100
+ if len(sentences) == 1:
101
+ # Ku kala jar ereyo waaweyn maxaa yeelay lama helin calaamad
102
+ sentences = re.split(r'(?<=\.)\s+|(?<=\?)\s+|(?<=!)\s+', text)
103
+ # Nadiifi meelaha banaan iyo jumladaha madhan
104
+ sentences = [s.strip() for s in sentences if s.strip()]
105
+ return sentences
106
+
107
  def text_to_speech(text, voice_choice):
108
  if not text or not voice_choice:
109
  gr.Warning("Fadlan geli qoraal oo dooro cod.")
 
111
 
112
  speaker_embedding = get_speaker_embedding(voice_choice)
113
 
114
+ sentences = split_into_sentences(text)
 
 
 
115
 
116
  all_audios = []
117
+ for i, sentence in enumerate(sentences):
118
+ normalized_text = normalize_text(sentence)
 
119
  inputs = processor(text=normalized_text, return_tensors="pt").to(device)
 
120
  with torch.no_grad():
121
  speech = model.generate(
122
  input_ids=inputs["input_ids"],
 
130
  audio = vocoder(speech).cpu()
131
 
132
  all_audios.append(audio)
133
+ # Nasasho 0.5 ilbiriqsi haddii uusan ahayn jumladii ugu dambeysay
134
+ if i < len(sentences) - 1:
135
+ pause = torch.zeros((1, int(16000 * 0.5))) # 0.5 sec silence
136
+ all_audios.append(pause)
137
 
 
 
 
 
 
 
138
  final_audio = torch.cat(all_audios, dim=1)
 
139
  return (16000, final_audio.numpy())
140
 
 
141
  iface = gr.Interface(
142
  fn=text_to_speech,
143
  inputs=[
 
154
  description="Geli qoraal Soomaali ah, dooro cod, kadibna riix 'Submit' si aad u abuurto hadal."
155
  )
156
 
 
157
  if __name__ == "__main__":
158
  if not all(os.path.exists(f) for f in VOICE_SAMPLE_FILES):
159
  raise FileNotFoundError("Fadlan hubi inaad faylasha codka soo gelisay Space-ka.")