Gradio-WhisperSpeech

Paused

App Files Files Community

jpc commited on Jan 31, 2024

Commit

40c0874

1 Parent(s): a75599a

Precompute examples, mnor layout adjustments

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ import torchaudio
 from pathlib import Path
 from whisperspeech.pipeline import Pipeline
 title = """# 🙋🏻‍♂️ Welcome to Collabora's WhisperSpeech
 WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
@@ -77,7 +79,7 @@ def generate_audio(pipe, segments, speaker, speaker_url, cps=14):
     audio = pipe.vocoder.decode(atoks)
     return audio.cpu()
-def whisper_speech_demo(multilingual_text, speaker_audio, speaker_url, cps):
     if len(multilingual_text) == 0:
         raise gr.Error("Please enter some text for me to speak!")
@@ -92,6 +94,9 @@ def whisper_speech_demo(multilingual_text, speaker_audio, speaker_url, cps):
     # torchaudio.save(mp3, audio, 24000, format='mp3')
     # return mp3.getvalue()
 with gr.Blocks() as demo:
     gr.Markdown(title)
     with gr.Row(equal_height=True):
@@ -101,29 +106,27 @@ with gr.Blocks() as demo:
                                     info="You can use `<en>` for English and `<pl>` for Polish, see examples below.")
             cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
                             label="Tempo (in characters per second)")
-            speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬",
                                      sources=["upload", "microphone"],
                                      type='filepath')
             gr.Markdown("  \n  ") # fixes the bottom overflow from Audio
-            url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
             generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
         with gr.Column(scale=1):
             output_audio = gr.Audio(label="WhisperSpeech says…")
-    with gr.Row():
         gr.Examples(
             examples=text_examples,
             inputs=[text_input, url_input],
             outputs=[output_audio],
             fn=whisper_speech_demo,
-            cache_examples=False,
-            label="Try these to get started !🌟🌬️"
         )
     generate_button.click(whisper_speech_demo, inputs=[text_input, speaker_input, url_input, cps], outputs=output_audio)
     gr.Markdown(footer)
-pipe = Pipeline(torch_compile=True)
-pipe.generate("WhisperSpeech warmup")
-demo.launch()

 from pathlib import Path
 from whisperspeech.pipeline import Pipeline
+DEVEL=os.environ.get('DEVEL', False)
 title = """# 🙋🏻‍♂️ Welcome to Collabora's WhisperSpeech
 WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
     audio = pipe.vocoder.decode(atoks)
     return audio.cpu()
+def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", cps=14):
     if len(multilingual_text) == 0:
         raise gr.Error("Please enter some text for me to speak!")
     # torchaudio.save(mp3, audio, 24000, format='mp3')
     # return mp3.getvalue()
+pipe = Pipeline(torch_compile=not DEVEL)
+# warmup will come from regenerating the examples
 with gr.Blocks() as demo:
     gr.Markdown(title)
     with gr.Row(equal_height=True):
                                     info="You can use `<en>` for English and `<pl>` for Polish, see examples below.")
             cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
                             label="Tempo (in characters per second)")
+            with gr.Row(equal_height=True):
+                speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬",
                                      sources=["upload", "microphone"],
                                      type='filepath')
+                url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
             gr.Markdown("  \n  ") # fixes the bottom overflow from Audio
             generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
         with gr.Column(scale=1):
             output_audio = gr.Audio(label="WhisperSpeech says…")
+    with gr.Column():
+        gr.Markdown("### Try these examples to get started !🌟🌬️")
         gr.Examples(
             examples=text_examples,
             inputs=[text_input, url_input],
             outputs=[output_audio],
             fn=whisper_speech_demo,
+            cache_examples=not DEVEL,
         )
     generate_button.click(whisper_speech_demo, inputs=[text_input, speaker_input, url_input, cps], outputs=output_audio)
     gr.Markdown(footer)
+demo.launch(server_port=3000 if DEVEL else None)