Spaces:
Runtime error
Runtime error
Ray Leung
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,16 +16,10 @@ SAMPLE_RATE = 16000 # Hz
|
|
| 16 |
|
| 17 |
# load ASR model
|
| 18 |
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
|
| 19 |
-
|
| 20 |
-
# update dcode params
|
| 21 |
decode_cfg = canary_model.cfg.decoding
|
| 22 |
decode_cfg.beam.beam_size = 1
|
| 23 |
canary_model.change_decoding_strategy(decode_cfg)
|
| 24 |
|
| 25 |
-
# load TTS model
|
| 26 |
-
# tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
| 27 |
-
# tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
| 28 |
-
|
| 29 |
# Function to convert audio to text using ASR
|
| 30 |
def gen_text(audio_filepath, action, source_lang, target_lang):
|
| 31 |
if audio_filepath is None:
|
|
@@ -72,7 +66,6 @@ def gen_text(audio_filepath, action, source_lang, target_lang):
|
|
| 72 |
# Function to convert text to speech using TTS
|
| 73 |
def gen_speech(text, lang):
|
| 74 |
set_seed(555) # Make it deterministic
|
| 75 |
-
|
| 76 |
match lang:
|
| 77 |
case "en":
|
| 78 |
model = "facebook/mms-tts-eng"
|
|
@@ -85,11 +78,6 @@ def gen_speech(text, lang):
|
|
| 85 |
case _:
|
| 86 |
model = "facebook/mms-tts-eng"
|
| 87 |
|
| 88 |
-
# if lang=="en":
|
| 89 |
-
# model = "facebook/mms-tts-eng"
|
| 90 |
-
# elif lang=="fr":
|
| 91 |
-
# model = "facebook/mms-tts-fra"
|
| 92 |
-
|
| 93 |
# load TTS model
|
| 94 |
tts_model = VitsModel.from_pretrained(model)
|
| 95 |
tts_tokenizer = AutoTokenizer.from_pretrained(model)
|
|
@@ -146,13 +134,18 @@ with playground:
|
|
| 146 |
with gr.Column():
|
| 147 |
clear_button = gr.ClearButton(components=[input_audio, source_lang, target_lang, transcipted_text, translated_text, translated_speech], value="Clear")
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
submit_button.click(start_process, inputs=[input_audio, source_lang, target_lang], outputs=[transcipted_text, translated_text, translated_speech])
|
| 158 |
|
|
|
|
| 16 |
|
| 17 |
# load ASR model
|
| 18 |
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
|
|
|
|
|
|
|
| 19 |
decode_cfg = canary_model.cfg.decoding
|
| 20 |
decode_cfg.beam.beam_size = 1
|
| 21 |
canary_model.change_decoding_strategy(decode_cfg)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
# Function to convert audio to text using ASR
|
| 24 |
def gen_text(audio_filepath, action, source_lang, target_lang):
|
| 25 |
if audio_filepath is None:
|
|
|
|
| 66 |
# Function to convert text to speech using TTS
|
| 67 |
def gen_speech(text, lang):
|
| 68 |
set_seed(555) # Make it deterministic
|
|
|
|
| 69 |
match lang:
|
| 70 |
case "en":
|
| 71 |
model = "facebook/mms-tts-eng"
|
|
|
|
| 78 |
case _:
|
| 79 |
model = "facebook/mms-tts-eng"
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
# load TTS model
|
| 82 |
tts_model = VitsModel.from_pretrained(model)
|
| 83 |
tts_tokenizer = AutoTokenizer.from_pretrained(model)
|
|
|
|
| 134 |
with gr.Column():
|
| 135 |
clear_button = gr.ClearButton(components=[input_audio, source_lang, target_lang, transcipted_text, translated_text, translated_speech], value="Clear")
|
| 136 |
|
| 137 |
+
with gr.Row():
|
| 138 |
+
gr.Examples(
|
| 139 |
+
examples=[
|
| 140 |
+
["sample_en.wav","en","fr"],
|
| 141 |
+
["sample_fr.wav","fr","de"],
|
| 142 |
+
["sample_de.wav","de","es"],
|
| 143 |
+
["sample_es.wav","es","en"]
|
| 144 |
+
],
|
| 145 |
+
inputs=[input_audio, source_lang, target_lang],
|
| 146 |
+
outputs=[transcipted_text, translated_text, translated_speech],
|
| 147 |
+
run_on_click=True, cache_examples=True, fn=start_process
|
| 148 |
+
)
|
| 149 |
|
| 150 |
submit_button.click(start_process, inputs=[input_audio, source_lang, target_lang], outputs=[transcipted_text, translated_text, translated_speech])
|
| 151 |
|