speech-to-speech

Paused

App Files Files Community

zongxiao commited on Oct 9, 2023

Commit

5307e6b

1 Parent(s): a2d9db4

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -17

app.py CHANGED Viewed

@@ -1,44 +1,35 @@
 import torch
 from transformers import pipeline
 device="cpu"
 pipe = pipeline(
     "automatic-speech-recognition", model="openai/whisper-large-v2", device=device
 )
-def translate(audio):
-    outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe","language":"chinese"})
-    return outputs["text"]
-from transformers import BarkModel
-from transformers import AutoProcessor
-model = BarkModel.from_pretrained("suno/bark-small")
 processor = AutoProcessor.from_pretrained("suno/bark")
 model = model.to(device)
 synthesised_rate = model.generation_config.sample_rate
 def synthesise(text_prompt,voice_preset="v2/zh_speaker_1"):
     inputs = processor(text_prompt, voice_preset=voice_preset)
     speech_output = model.generate(**inputs.to(device),pad_token_id=10000)
-    #print(speech_output[0].cpu().numpy())
     return speech_output
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
     synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
     return 16000, synthesised_speech
-import numpy as np
 def speech_to_speech_translation(audio,voice_preset="v2/zh_speaker_1"):
     translated_text = translate(audio)
-    #print(translated_text)
     synthesised_speech = synthesise(translated_text,voice_preset)
     synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
-    #print(synthesised_speech)
     return synthesised_rate , synthesised_speech
 def speech_to_speech_translation_fix(audio,voice_preset="v2/zh_speaker_1"):
     synthesised_rate,synthesised_speech = speech_to_speech_translation(audio,voice_preset)

 import torch
+import numpy as np
 from transformers import pipeline
+from transformers import BarkModel
+from transformers import AutoProcessor
 device="cpu"
 pipe = pipeline(
     "automatic-speech-recognition", model="openai/whisper-large-v2", device=device
 )
 processor = AutoProcessor.from_pretrained("suno/bark")
+model = BarkModel.from_pretrained("suno/bark-small")
 model = model.to(device)
 synthesised_rate = model.generation_config.sample_rate
+def translate(audio):
+    outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe","language":"chinese"})
+    return outputs["text"]
 def synthesise(text_prompt,voice_preset="v2/zh_speaker_1"):
     inputs = processor(text_prompt, voice_preset=voice_preset)
     speech_output = model.generate(**inputs.to(device),pad_token_id=10000)
     return speech_output
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
     synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
     return 16000, synthesised_speech
 def speech_to_speech_translation(audio,voice_preset="v2/zh_speaker_1"):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text,voice_preset)
     synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
     return synthesised_rate , synthesised_speech
 def speech_to_speech_translation_fix(audio,voice_preset="v2/zh_speaker_1"):
     synthesised_rate,synthesised_speech = speech_to_speech_translation(audio,voice_preset)