Commit
·
b21342c
1
Parent(s):
4b8ade9
Add long-speaker support
Browse files- app.py +26 -10
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import styletts2importable
|
|
| 3 |
import ljspeechimportable
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
-
|
| 7 |
import numpy as np
|
| 8 |
import pickle
|
| 9 |
theme = gr.themes.Base(
|
|
@@ -20,15 +20,31 @@ global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_
|
|
| 20 |
# else:
|
| 21 |
for v in voicelist:
|
| 22 |
voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
|
| 23 |
-
def synthesize(text, voice, multispeakersteps):
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
# def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
| 33 |
# if password == os.environ['ACCESS_CODE']:
|
| 34 |
# if text.strip() == "":
|
|
|
|
| 3 |
import ljspeechimportable
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
+
from tortoise.utils.text import split_and_recombine_text
|
| 7 |
import numpy as np
|
| 8 |
import pickle
|
| 9 |
theme = gr.themes.Base(
|
|
|
|
| 20 |
# else:
|
| 21 |
for v in voicelist:
|
| 22 |
voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
|
| 23 |
+
# def synthesize(text, voice, multispeakersteps):
|
| 24 |
+
# if text.strip() == "":
|
| 25 |
+
# raise gr.Error("You must enter some text")
|
| 26 |
+
# # if len(global_phonemizer.phonemize([text])) > 300:
|
| 27 |
+
# if len(text) > 300:
|
| 28 |
+
# raise gr.Error("Text must be under 300 characters")
|
| 29 |
+
# v = voice.lower()
|
| 30 |
+
# # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
| 31 |
+
# return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
|
| 32 |
+
def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
| 33 |
+
if password == os.environ['ACCESS_CODE']:
|
| 34 |
+
if text.strip() == "":
|
| 35 |
+
raise gr.Error("You must enter some text")
|
| 36 |
+
if lngsteps > 25:
|
| 37 |
+
raise gr.Error("Max 25 steps")
|
| 38 |
+
if lngsteps < 5:
|
| 39 |
+
raise gr.Error("Min 5 steps")
|
| 40 |
+
texts = split_and_recombine_text(text)
|
| 41 |
+
v = voice.lower()
|
| 42 |
+
audios = []
|
| 43 |
+
for t in progress.tqdm(texts):
|
| 44 |
+
audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=lngsteps, embedding_scale=1))
|
| 45 |
+
return (24000, np.concatenate(audios))
|
| 46 |
+
else:
|
| 47 |
+
raise gr.Error('Wrong access code')
|
| 48 |
# def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
| 49 |
# if password == os.environ['ACCESS_CODE']:
|
| 50 |
# if text.strip() == "":
|
requirements.txt
CHANGED
|
@@ -20,4 +20,4 @@ phonemizer
|
|
| 20 |
cached-path
|
| 21 |
gradio
|
| 22 |
gruut
|
| 23 |
-
|
|
|
|
| 20 |
cached-path
|
| 21 |
gradio
|
| 22 |
gruut
|
| 23 |
+
tortoise-tts
|