Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 12 |
print(f"Using device: {device}")
|
| 13 |
|
| 14 |
# --- KU DAR FAYLKA CODADKAAGA ---
|
| 15 |
-
VOICE_SAMPLE_FILES = ["1.wav"]
|
| 16 |
EMBEDDING_DIR = "speaker_embeddings"
|
| 17 |
os.makedirs(EMBEDDING_DIR, exist_ok=True)
|
| 18 |
|
|
@@ -92,26 +92,27 @@ def replace_numbers_with_words(text):
|
|
| 92 |
def normalize_text(text):
|
| 93 |
text = text.lower()
|
| 94 |
text = replace_numbers_with_words(text)
|
| 95 |
-
text = re.sub(r'[^\w\s\'
|
| 96 |
return text
|
| 97 |
|
| 98 |
-
# --- Main
|
| 99 |
def text_to_speech(text, voice_choice):
|
| 100 |
if not text or not voice_choice:
|
| 101 |
gr.Warning("Fadlan geli qoraal oo dooro cod.")
|
| 102 |
return None
|
| 103 |
|
| 104 |
speaker_embedding = get_speaker_embedding(voice_choice)
|
| 105 |
-
normalized_text = normalize_text(text)
|
| 106 |
|
| 107 |
-
#
|
| 108 |
-
lines =
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
-
for line in lines:
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
inputs = processor(text=line, return_tensors="pt").to(device)
|
| 115 |
|
| 116 |
with torch.no_grad():
|
| 117 |
speech = model.generate(
|
|
@@ -123,20 +124,25 @@ def text_to_speech(text, voice_choice):
|
|
| 123 |
repetition_penalty=1.2,
|
| 124 |
max_new_tokens=512
|
| 125 |
)
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
full_audio.append(pause)
|
| 132 |
|
| 133 |
-
return (16000,
|
| 134 |
|
| 135 |
# --- Gradio Interface ---
|
| 136 |
iface = gr.Interface(
|
| 137 |
fn=text_to_speech,
|
| 138 |
inputs=[
|
| 139 |
-
gr.Textbox(label="Geli qoraalka af-Soomaaliga (Enter Somali Text)"),
|
| 140 |
gr.Dropdown(
|
| 141 |
VOICE_SAMPLE_FILES,
|
| 142 |
label="Select Voice",
|
|
|
|
| 12 |
print(f"Using device: {device}")
|
| 13 |
|
| 14 |
# --- KU DAR FAYLKA CODADKAAGA ---
|
| 15 |
+
VOICE_SAMPLE_FILES = ["1.wav"] # Hubi in faylkan tayadiisu fiican tahay
|
| 16 |
EMBEDDING_DIR = "speaker_embeddings"
|
| 17 |
os.makedirs(EMBEDDING_DIR, exist_ok=True)
|
| 18 |
|
|
|
|
| 92 |
def normalize_text(text):
|
| 93 |
text = text.lower()
|
| 94 |
text = replace_numbers_with_words(text)
|
| 95 |
+
text = re.sub(r'[^\w\s\']', '', text)
|
| 96 |
return text
|
| 97 |
|
| 98 |
+
# --- Main Text-to-Speech Function with pause between lines ---
|
| 99 |
def text_to_speech(text, voice_choice):
|
| 100 |
if not text or not voice_choice:
|
| 101 |
gr.Warning("Fadlan geli qoraal oo dooro cod.")
|
| 102 |
return None
|
| 103 |
|
| 104 |
speaker_embedding = get_speaker_embedding(voice_choice)
|
|
|
|
| 105 |
|
| 106 |
+
# Qoraalka kala saar sadarro (lines)
|
| 107 |
+
lines = [line.strip() for line in text.strip().split('\n') if line.strip()]
|
| 108 |
+
if not lines:
|
| 109 |
+
return None
|
| 110 |
+
|
| 111 |
+
all_audios = []
|
| 112 |
|
| 113 |
+
for i, line in enumerate(lines):
|
| 114 |
+
normalized_text = normalize_text(line)
|
| 115 |
+
inputs = processor(text=normalized_text, return_tensors="pt").to(device)
|
|
|
|
| 116 |
|
| 117 |
with torch.no_grad():
|
| 118 |
speech = model.generate(
|
|
|
|
| 124 |
repetition_penalty=1.2,
|
| 125 |
max_new_tokens=512
|
| 126 |
)
|
| 127 |
+
audio = vocoder(speech).cpu()
|
| 128 |
+
|
| 129 |
+
all_audios.append(audio)
|
| 130 |
+
|
| 131 |
+
# Ku dar nasasho 0.5 ilbiriqsi haddii aanu ahayn line-kii ugu dambeeyay
|
| 132 |
+
if i < len(lines) - 1:
|
| 133 |
+
pause_samples = torch.zeros((1, int(16000 * 0.5))) # 0.5 seconds pause
|
| 134 |
+
all_audios.append(pause_samples)
|
| 135 |
|
| 136 |
+
# Isku dar dhammaan codadka
|
| 137 |
+
final_audio = torch.cat(all_audios, dim=1)
|
|
|
|
| 138 |
|
| 139 |
+
return (16000, final_audio.numpy())
|
| 140 |
|
| 141 |
# --- Gradio Interface ---
|
| 142 |
iface = gr.Interface(
|
| 143 |
fn=text_to_speech,
|
| 144 |
inputs=[
|
| 145 |
+
gr.Textbox(label="Geli qoraalka af-Soomaaliga (Enter Somali Text)", lines=7, placeholder="Qoraalka geli halkan..."),
|
| 146 |
gr.Dropdown(
|
| 147 |
VOICE_SAMPLE_FILES,
|
| 148 |
label="Select Voice",
|