Spaces:

Jaward
/

seamless-speech-translator

Running

App Files Files Community

Jaward commited on Aug 17, 2024

Commit

ee60cd3

verified ·

1 Parent(s): f466968

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -36

app.py CHANGED Viewed

@@ -12,42 +12,45 @@ LANGUAGE_CODES = {
     "Chinese": "cmn"
 }
-def translate_speech(audio_file, target_language):
-    """
-    Translate input speech (audio file) to the specified target language.
-    Args:
-    audio_file (str): Path to the input audio file.
-    target_language (str): The target language for translation.
-    Returns:
-    str: Path to the translated audio file.
-    """
-    language_code = LANGUAGE_CODES[target_language]
-    output_file = "translated_audio.wav"
-    command = [
-        "expressivity_predict",
-        audio_file,
-        "--tgt_lang", language_code,
-        "--model_name", "seamless_expressivity",
-        "--vocoder_name", "vocoder_pretssel",
-        "--gated-model-dir", "seamlessmodel",
-        "--output_path", output_file
-    ]
-    subprocess.run(command, check=True)
-    if os.path.exists(output_file):
-        print(f"File created successfully: {output_file}")
-    else:
-        print(f"File not found: {output_file}")
-    return output_file
 def create_interface():
-    """Create and configure the Gradio interface."""
     inputs = [
         gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False),
         gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language")
@@ -56,14 +59,11 @@ def create_interface():
     return gr.Interface(
         fn=translate_speech,
         inputs=inputs,
-        outputs=gr.Audio(label="Translated Audio",
-                        interactive=False,
-                        autoplay=True,
-                        elem_classes="audio"),
         title="Seamless Expressive Speech-To-Speech Translator",
         description="Hear how you sound in another language.",
     )
-if name == "main":
     iface = create_interface()
     iface.launch()

     "Chinese": "cmn"
 }
+def transcribe(audio):
+    if audio is None:
+        return "No audio input detected. Please record or upload an audio file."
+    try:
+        text = model.stt_file(audio)[0]
+        return text
+    except Exception as e:
+        return f"Error transcribing audio: {str(e)}"
+def translate_speech(audio_file, target_language):
+    if audio_file is None:
+        return "No audio input detected. Please record or upload an audio file."
+    try:
+        language_code = LANGUAGE_CODES[target_language]
+        output_file = "translated_audio.wav"
+        command = [
+            "expressivity_predict",
+            audio_file,
+            "--tgt_lang", language_code,
+            "--model_name", "seamless_expressivity",
+            "--vocoder_name", "vocoder_pretssel",
+            "--gated-model-dir", "seamlessmodel",
+            "--output_path", output_file
+        ]
+        subprocess.run(command, check=True)
+        if os.path.exists(output_file):
+            print(f"File created successfully: {output_file}")
+            return output_file
+        else:
+            return "Error: Translated audio file not found."
+    except Exception as e:
+        return f"Error translating speech: {str(e)}"
 def create_interface():
     inputs = [
         gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False),
         gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language")
     return gr.Interface(
         fn=translate_speech,
         inputs=inputs,
+        outputs=gr.Audio(label="Translated Audio", interactive=False, autoplay=True, elem_classes="audio"),
         title="Seamless Expressive Speech-To-Speech Translator",
         description="Hear how you sound in another language.",
     )
+if __name__ == "__main__":
     iface = create_interface()
     iface.launch()