speech-to-speech

Paused

Terps commited on Oct 8, 2023

Commit

ef0b4ef

1 Parent(s): d3ab3ec

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,8 +27,8 @@ def translate(audio):
 def synthesise(text):
     max_length = processor.tokenizer.model_max_length
-    inputs = processor(text=text, max_length=max_length, truncation=True, return_tensors="pt")
     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
     return speech.cpu()
@@ -40,10 +40,11 @@ def speech_to_speech_translation(audio):
     return 16000, synthesised_speech
-title = "Cascaded STST"
 description = """
-Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in English. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation, and Microsoft's
-[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech:
 ![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
 """

 def synthesise(text):
+    inputs = processor(text=text, return_tensors="pt")
     max_length = processor.tokenizer.model_max_length
     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
     return speech.cpu()
     return 16000, synthesised_speech
+title = "English to Dutch Cascaded STST"
 description = """
+Demo for cascaded speech-to-speech translation (STST), mapping from source speech in English to target speech in Dutch. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation, and Microsoft's
+and a finetuned SpeechT5[speecht5_tts_vox_nl](https://huggingface.co/sanchit-gandhi/speecht5_tts_vox_nl) model for text-to-speech:
 ![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
 """