Precompute examples, mnor layout adjustments
Browse files
app.py
CHANGED
|
@@ -8,6 +8,8 @@ import torchaudio
|
|
| 8 |
from pathlib import Path
|
| 9 |
from whisperspeech.pipeline import Pipeline
|
| 10 |
|
|
|
|
|
|
|
| 11 |
title = """# 🙋🏻♂️ Welcome to Collabora's WhisperSpeech
|
| 12 |
|
| 13 |
WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
|
|
@@ -77,7 +79,7 @@ def generate_audio(pipe, segments, speaker, speaker_url, cps=14):
|
|
| 77 |
audio = pipe.vocoder.decode(atoks)
|
| 78 |
return audio.cpu()
|
| 79 |
|
| 80 |
-
def whisper_speech_demo(multilingual_text, speaker_audio, speaker_url, cps):
|
| 81 |
if len(multilingual_text) == 0:
|
| 82 |
raise gr.Error("Please enter some text for me to speak!")
|
| 83 |
|
|
@@ -92,6 +94,9 @@ def whisper_speech_demo(multilingual_text, speaker_audio, speaker_url, cps):
|
|
| 92 |
# torchaudio.save(mp3, audio, 24000, format='mp3')
|
| 93 |
# return mp3.getvalue()
|
| 94 |
|
|
|
|
|
|
|
|
|
|
| 95 |
with gr.Blocks() as demo:
|
| 96 |
gr.Markdown(title)
|
| 97 |
with gr.Row(equal_height=True):
|
|
@@ -101,29 +106,27 @@ with gr.Blocks() as demo:
|
|
| 101 |
info="You can use `<en>` for English and `<pl>` for Polish, see examples below.")
|
| 102 |
cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
|
| 103 |
label="Tempo (in characters per second)")
|
| 104 |
-
|
|
|
|
| 105 |
sources=["upload", "microphone"],
|
| 106 |
type='filepath')
|
|
|
|
| 107 |
gr.Markdown(" \n ") # fixes the bottom overflow from Audio
|
| 108 |
-
url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
|
| 109 |
generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
|
| 110 |
with gr.Column(scale=1):
|
| 111 |
output_audio = gr.Audio(label="WhisperSpeech says…")
|
| 112 |
|
| 113 |
-
with gr.
|
|
|
|
| 114 |
gr.Examples(
|
| 115 |
examples=text_examples,
|
| 116 |
inputs=[text_input, url_input],
|
| 117 |
outputs=[output_audio],
|
| 118 |
fn=whisper_speech_demo,
|
| 119 |
-
cache_examples=
|
| 120 |
-
label="Try these to get started !🌟🌬️"
|
| 121 |
)
|
| 122 |
|
| 123 |
generate_button.click(whisper_speech_demo, inputs=[text_input, speaker_input, url_input, cps], outputs=output_audio)
|
| 124 |
gr.Markdown(footer)
|
| 125 |
|
| 126 |
-
|
| 127 |
-
pipe.generate("WhisperSpeech warmup")
|
| 128 |
-
|
| 129 |
-
demo.launch()
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
from whisperspeech.pipeline import Pipeline
|
| 10 |
|
| 11 |
+
DEVEL=os.environ.get('DEVEL', False)
|
| 12 |
+
|
| 13 |
title = """# 🙋🏻♂️ Welcome to Collabora's WhisperSpeech
|
| 14 |
|
| 15 |
WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
|
|
|
|
| 79 |
audio = pipe.vocoder.decode(atoks)
|
| 80 |
return audio.cpu()
|
| 81 |
|
| 82 |
+
def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", cps=14):
|
| 83 |
if len(multilingual_text) == 0:
|
| 84 |
raise gr.Error("Please enter some text for me to speak!")
|
| 85 |
|
|
|
|
| 94 |
# torchaudio.save(mp3, audio, 24000, format='mp3')
|
| 95 |
# return mp3.getvalue()
|
| 96 |
|
| 97 |
+
pipe = Pipeline(torch_compile=not DEVEL)
|
| 98 |
+
# warmup will come from regenerating the examples
|
| 99 |
+
|
| 100 |
with gr.Blocks() as demo:
|
| 101 |
gr.Markdown(title)
|
| 102 |
with gr.Row(equal_height=True):
|
|
|
|
| 106 |
info="You can use `<en>` for English and `<pl>` for Polish, see examples below.")
|
| 107 |
cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
|
| 108 |
label="Tempo (in characters per second)")
|
| 109 |
+
with gr.Row(equal_height=True):
|
| 110 |
+
speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬",
|
| 111 |
sources=["upload", "microphone"],
|
| 112 |
type='filepath')
|
| 113 |
+
url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
|
| 114 |
gr.Markdown(" \n ") # fixes the bottom overflow from Audio
|
|
|
|
| 115 |
generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
|
| 116 |
with gr.Column(scale=1):
|
| 117 |
output_audio = gr.Audio(label="WhisperSpeech says…")
|
| 118 |
|
| 119 |
+
with gr.Column():
|
| 120 |
+
gr.Markdown("### Try these examples to get started !🌟🌬️")
|
| 121 |
gr.Examples(
|
| 122 |
examples=text_examples,
|
| 123 |
inputs=[text_input, url_input],
|
| 124 |
outputs=[output_audio],
|
| 125 |
fn=whisper_speech_demo,
|
| 126 |
+
cache_examples=not DEVEL,
|
|
|
|
| 127 |
)
|
| 128 |
|
| 129 |
generate_button.click(whisper_speech_demo, inputs=[text_input, speaker_input, url_input, cps], outputs=output_audio)
|
| 130 |
gr.Markdown(footer)
|
| 131 |
|
| 132 |
+
demo.launch(server_port=3000 if DEVEL else None)
|
|
|
|
|
|
|
|
|