Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,8 +27,8 @@ def translate(audio):
|
|
| 27 |
|
| 28 |
|
| 29 |
def synthesise(text):
|
|
|
|
| 30 |
max_length = processor.tokenizer.model_max_length
|
| 31 |
-
inputs = processor(text=text, max_length=max_length, truncation=True, return_tensors="pt")
|
| 32 |
speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
|
| 33 |
return speech.cpu()
|
| 34 |
|
|
@@ -40,10 +40,11 @@ def speech_to_speech_translation(audio):
|
|
| 40 |
return 16000, synthesised_speech
|
| 41 |
|
| 42 |
|
| 43 |
-
|
|
|
|
| 44 |
description = """
|
| 45 |
-
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in
|
| 46 |
-
[
|
| 47 |

|
| 48 |
"""
|
| 49 |
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def synthesise(text):
|
| 30 |
+
inputs = processor(text=text, return_tensors="pt")
|
| 31 |
max_length = processor.tokenizer.model_max_length
|
|
|
|
| 32 |
speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
|
| 33 |
return speech.cpu()
|
| 34 |
|
|
|
|
| 40 |
return 16000, synthesised_speech
|
| 41 |
|
| 42 |
|
| 43 |
+
|
| 44 |
+
title = "English to Dutch Cascaded STST"
|
| 45 |
description = """
|
| 46 |
+
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in English to target speech in Dutch. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation, and Microsoft's
|
| 47 |
+
and a finetuned SpeechT5[speecht5_tts_vox_nl](https://huggingface.co/sanchit-gandhi/speecht5_tts_vox_nl) model for text-to-speech:
|
| 48 |

|
| 49 |
"""
|
| 50 |
|