Update app.py
Browse files
app.py
CHANGED
|
@@ -11,55 +11,37 @@ from whisperspeech.pipeline import Pipeline
|
|
| 11 |
DEVEL=os.environ.get('DEVEL', False)
|
| 12 |
|
| 13 |
title = """
|
| 14 |
-
<picture>
|
| 15 |
-
<source srcset="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/dark-banner.png" media="(prefers-color-scheme: dark)" />
|
| 16 |
-
<img alt="WhisperSpeech banner with Collabora and LAION logos" src="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/light-banner.png" style="width: 60%; margin: 0 auto;" />
|
| 17 |
-
</picture>
|
| 18 |
-
|
| 19 |
-
# Welcome to Collabora's WhisperSpeech
|
| 20 |
-
|
| 21 |
WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
|
| 22 |
-
The model is fully open and you can run it on your local hardware.
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
[You can contribute to WhisperSpeech on Github.](https://github.com/collabora/WhisperSpeech)
|
| 26 |
-
You can also join the discussion on Discord [](https://discord.gg/FANw4rHD5E)
|
| 27 |
-
|
| 28 |
-
Huge thanks to [Tonic](https://huggingface.co/Tonic) who helped build this Space for WhisperSpeech.
|
| 29 |
|
| 30 |
### How to Use It
|
| 31 |
-
|
| 32 |
Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
|
| 33 |
Optionally you can upload a speech sample or give it a file URL to clone an existing voice. Check out the
|
| 34 |
examples at the bottom of the page for inspiration.
|
| 35 |
"""
|
| 36 |
|
| 37 |
footer = """
|
| 38 |
-
|
| 39 |
### How to use it locally
|
| 40 |
-
|
| 41 |
```
|
| 42 |
pip install -U WhisperSpeech
|
| 43 |
```
|
| 44 |
-
|
| 45 |
Afterwards:
|
| 46 |
-
|
| 47 |
```
|
| 48 |
from whisperspeech.pipeline import Pipeline
|
| 49 |
-
|
| 50 |
pipe = Pipeline(torch_compile=True)
|
| 51 |
pipe.generate_to_file("output.wav", "Hello from WhisperSpeech.")
|
| 52 |
```
|
| 53 |
"""
|
| 54 |
|
| 55 |
-
|
| 56 |
text_examples = [
|
| 57 |
["This is the first demo of Whisper Speech, a fully open source text-to-speech model trained by Collabora and Lion on the Juwels supercomputer.", None],
|
| 58 |
["World War II or the Second World War was a global conflict that lasted from 1939 to 1945. The vast majority of the world's countries, including all the great powers, fought as part of two opposing military alliances: the Allies and the Axis.", "https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg"],
|
| 59 |
-
["<pl>To jest pierwszy test wielojęzycznego <en>Whisper Speech <pl>, modelu zamieniającego tekst na mowę, który Collabora i Laion nauczyli na superkomputerze <en>Jewels.", None],
|
| 60 |
-
["<en> WhisperSpeech is an Open Source library that helps you convert text to speech. <pl>Teraz także po Polsku! <en>I think I just tried saying \"now also in Polish\", don't judge me...", None],
|
| 61 |
-
|
| 62 |
-
["<pl>To jest pierwszy test naszego modelu. Pozdrawiamy serdecznie.", None],
|
| 63 |
# ["<en> The big difference between Europe <fr> et les Etats Unis <pl> jest to, że mamy tak wiele języków <uk> тут, в Європі"]
|
| 64 |
]
|
| 65 |
|
|
@@ -88,13 +70,9 @@ def generate_audio(pipe, segments, speaker, speaker_url, cps=14):
|
|
| 88 |
def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", cps=14):
|
| 89 |
if len(multilingual_text) == 0:
|
| 90 |
raise gr.Error("Please enter some text for me to speak!")
|
| 91 |
-
|
| 92 |
segments = parse_multilingual_text(multilingual_text)
|
| 93 |
-
|
| 94 |
audio = generate_audio(pipe, segments, speaker_audio, speaker_url, cps)
|
| 95 |
-
|
| 96 |
return (24000, audio.T.numpy())
|
| 97 |
-
|
| 98 |
# Did not work for me in Safari:
|
| 99 |
# mp3 = io.BytesIO()
|
| 100 |
# torchaudio.save(mp3, audio, 24000, format='mp3')
|
|
|
|
| 11 |
DEVEL=os.environ.get('DEVEL', False)
|
| 12 |
|
| 13 |
title = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
|
| 15 |
+
The model is fully open and you can run it on your local hardware.
|
| 16 |
+
https://github.com/collabora/WhisperSpeech
|
| 17 |
+
https://discord.gg/FANw4rHD5E
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
### How to Use It
|
|
|
|
| 20 |
Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
|
| 21 |
Optionally you can upload a speech sample or give it a file URL to clone an existing voice. Check out the
|
| 22 |
examples at the bottom of the page for inspiration.
|
| 23 |
"""
|
| 24 |
|
| 25 |
footer = """
|
|
|
|
| 26 |
### How to use it locally
|
|
|
|
| 27 |
```
|
| 28 |
pip install -U WhisperSpeech
|
| 29 |
```
|
|
|
|
| 30 |
Afterwards:
|
|
|
|
| 31 |
```
|
| 32 |
from whisperspeech.pipeline import Pipeline
|
|
|
|
| 33 |
pipe = Pipeline(torch_compile=True)
|
| 34 |
pipe.generate_to_file("output.wav", "Hello from WhisperSpeech.")
|
| 35 |
```
|
| 36 |
"""
|
| 37 |
|
|
|
|
| 38 |
text_examples = [
|
| 39 |
["This is the first demo of Whisper Speech, a fully open source text-to-speech model trained by Collabora and Lion on the Juwels supercomputer.", None],
|
| 40 |
["World War II or the Second World War was a global conflict that lasted from 1939 to 1945. The vast majority of the world's countries, including all the great powers, fought as part of two opposing military alliances: the Allies and the Axis.", "https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg"],
|
| 41 |
+
#["<pl>To jest pierwszy test wielojęzycznego <en>Whisper Speech <pl>, modelu zamieniającego tekst na mowę, który Collabora i Laion nauczyli na superkomputerze <en>Jewels.", None],
|
| 42 |
+
#["<en> WhisperSpeech is an Open Source library that helps you convert text to speech. <pl>Teraz także po Polsku! <en>I think I just tried saying \"now also in Polish\", don't judge me...", None],
|
| 43 |
+
["<de> WhisperSpeech is multi-lingual <es> hay una vez un bar un bargochicitito <hi> मध्य वाक्य में", None],
|
| 44 |
+
#["<pl>To jest pierwszy test naszego modelu. Pozdrawiamy serdecznie.", None],
|
| 45 |
# ["<en> The big difference between Europe <fr> et les Etats Unis <pl> jest to, że mamy tak wiele języków <uk> тут, в Європі"]
|
| 46 |
]
|
| 47 |
|
|
|
|
| 70 |
def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", cps=14):
|
| 71 |
if len(multilingual_text) == 0:
|
| 72 |
raise gr.Error("Please enter some text for me to speak!")
|
|
|
|
| 73 |
segments = parse_multilingual_text(multilingual_text)
|
|
|
|
| 74 |
audio = generate_audio(pipe, segments, speaker_audio, speaker_url, cps)
|
|
|
|
| 75 |
return (24000, audio.T.numpy())
|
|
|
|
| 76 |
# Did not work for me in Safari:
|
| 77 |
# mp3 = io.BytesIO()
|
| 78 |
# torchaudio.save(mp3, audio, 24000, format='mp3')
|