Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,16 +15,12 @@ from dotenv import load_dotenv
|
|
| 15 |
torch.manual_seed(0)
|
| 16 |
np.random.seed(0)
|
| 17 |
|
| 18 |
-
# CleanUnet Dependencies
|
| 19 |
-
|
| 20 |
import json
|
| 21 |
from copy import deepcopy
|
| 22 |
|
| 23 |
import numpy as np
|
| 24 |
import torch
|
| 25 |
|
| 26 |
-
# from util import print_size, sampling
|
| 27 |
-
|
| 28 |
import torchaudio
|
| 29 |
import torchaudio.transforms as T
|
| 30 |
|
|
@@ -36,33 +32,6 @@ np.random.seed(0)
|
|
| 36 |
|
| 37 |
SAMPLE_RATE = 8000
|
| 38 |
|
| 39 |
-
'''
|
| 40 |
-
CONFIG = "configs/DNS-large-full.json"
|
| 41 |
-
# CHECKPOINT = "./exp/DNS-large-full/checkpoint/pretrained.pkl"
|
| 42 |
-
|
| 43 |
-
# Parse configs. Globals nicer in this case
|
| 44 |
-
with open(CONFIG) as f:
|
| 45 |
-
data = f.read()
|
| 46 |
-
config = json.loads(data)
|
| 47 |
-
gen_config = config["gen_config"]
|
| 48 |
-
global network_config
|
| 49 |
-
network_config = config["network_config"] # to define wavenet
|
| 50 |
-
global train_config
|
| 51 |
-
train_config = config["train_config"] # train config
|
| 52 |
-
global trainset_config
|
| 53 |
-
trainset_config = config["trainset_config"] # to read trainset configurations
|
| 54 |
-
|
| 55 |
-
# global use_denoise
|
| 56 |
-
# use_denoise = False
|
| 57 |
-
|
| 58 |
-
# setup local experiment path
|
| 59 |
-
exp_path = train_config["exp_path"]
|
| 60 |
-
print('exp_path:', exp_path)
|
| 61 |
-
|
| 62 |
-
# load data
|
| 63 |
-
loader_config = deepcopy(trainset_config)
|
| 64 |
-
loader_config["crop_length_sec"] = 0
|
| 65 |
-
'''
|
| 66 |
#############################################################################################################
|
| 67 |
|
| 68 |
load_dotenv()
|
|
@@ -114,33 +83,24 @@ def get_phonetic_transcription(text: str):
|
|
| 114 |
return None
|
| 115 |
|
| 116 |
|
| 117 |
-
def tts_inference(text: str, speaker_idx: str = None
|
| 118 |
# synthesize
|
| 119 |
if synthesizer is None:
|
| 120 |
raise NameError("model not found")
|
| 121 |
t1 = time.time()
|
| 122 |
wavs = synthesizer.tts(text, speaker_idx)
|
| 123 |
-
print(type(wavs))
|
| 124 |
-
|
| 125 |
-
wavs_den = torch.Tensor(wavs).unsqueeze(0) # one sample
|
| 126 |
-
# wavs_den = denoise(wavs_den).tolist()
|
| 127 |
-
else:
|
| 128 |
-
wavs_den = wavs
|
| 129 |
|
| 130 |
# return output
|
| 131 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
| 132 |
# wavs must be a list of integers
|
| 133 |
-
synthesizer.save_wav(
|
| 134 |
t2 = time.time() - t1
|
| 135 |
print(round(t2, 2))
|
| 136 |
output_audio = fp.name
|
| 137 |
|
| 138 |
-
|
| 139 |
-
# wavs must be a list of integers
|
| 140 |
-
synthesizer.save_wav(wavs_den, fp)
|
| 141 |
-
output_audio_den = fp.name
|
| 142 |
-
|
| 143 |
-
return output_audio, output_audio_den
|
| 144 |
|
| 145 |
|
| 146 |
title = "🗣️ Catalan Multispeaker TTS Tester 🗣️"
|
|
@@ -222,11 +182,10 @@ with gr.Blocks(**AinaGradioTheme().get_kwargs()) as app:
|
|
| 222 |
"Submit",
|
| 223 |
variant="primary",
|
| 224 |
)
|
| 225 |
-
use_denoise = gr.Radio(choices=[("Yes", 0), ("No", 1)], value=0)
|
| 226 |
with gr.Column(variant='panel'):
|
| 227 |
output_audio = gr.Audio(label="Output", type="filepath", autoplay=True, show_share_button=False)
|
| 228 |
-
output_audio_den = gr.Audio(label="Output denoised", type="filepath", autoplay=False,
|
| 229 |
-
show_share_button=False)
|
| 230 |
|
| 231 |
output_phonetic = gr.Textbox(label="Phonetic Transcription", readonly=True)
|
| 232 |
|
|
@@ -234,7 +193,7 @@ with gr.Blocks(**AinaGradioTheme().get_kwargs()) as app:
|
|
| 234 |
input_.change(fn=change_interactive, inputs=[input_], outputs=button)
|
| 235 |
|
| 236 |
# clear_btn.click(fn=clean, inputs=[], outputs=[input_, output_audio, output_phonetic], queue=False)
|
| 237 |
-
submit_btn.click(fn=submit_input, inputs=[input_, speaker_id
|
| 238 |
output_audio_den,
|
| 239 |
output_phonetic])
|
| 240 |
|
|
|
|
| 15 |
torch.manual_seed(0)
|
| 16 |
np.random.seed(0)
|
| 17 |
|
|
|
|
|
|
|
| 18 |
import json
|
| 19 |
from copy import deepcopy
|
| 20 |
|
| 21 |
import numpy as np
|
| 22 |
import torch
|
| 23 |
|
|
|
|
|
|
|
| 24 |
import torchaudio
|
| 25 |
import torchaudio.transforms as T
|
| 26 |
|
|
|
|
| 32 |
|
| 33 |
SAMPLE_RATE = 8000
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
#############################################################################################################
|
| 36 |
|
| 37 |
load_dotenv()
|
|
|
|
| 83 |
return None
|
| 84 |
|
| 85 |
|
| 86 |
+
def tts_inference(text: str, speaker_idx: str = None):
|
| 87 |
# synthesize
|
| 88 |
if synthesizer is None:
|
| 89 |
raise NameError("model not found")
|
| 90 |
t1 = time.time()
|
| 91 |
wavs = synthesizer.tts(text, speaker_idx)
|
| 92 |
+
# print(type(wavs))
|
| 93 |
+
wavs_den = wavs
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
# return output
|
| 96 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
| 97 |
# wavs must be a list of integers
|
| 98 |
+
synthesizer.save_wav(wavs_den, fp)
|
| 99 |
t2 = time.time() - t1
|
| 100 |
print(round(t2, 2))
|
| 101 |
output_audio = fp.name
|
| 102 |
|
| 103 |
+
return output_audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
|
| 106 |
title = "🗣️ Catalan Multispeaker TTS Tester 🗣️"
|
|
|
|
| 182 |
"Submit",
|
| 183 |
variant="primary",
|
| 184 |
)
|
| 185 |
+
# use_denoise = gr.Radio(choices=[("Yes", 0), ("No", 1)], value=0)
|
| 186 |
with gr.Column(variant='panel'):
|
| 187 |
output_audio = gr.Audio(label="Output", type="filepath", autoplay=True, show_share_button=False)
|
| 188 |
+
# output_audio_den = gr.Audio(label="Output denoised", type="filepath", autoplay=False, show_share_button=False)
|
|
|
|
| 189 |
|
| 190 |
output_phonetic = gr.Textbox(label="Phonetic Transcription", readonly=True)
|
| 191 |
|
|
|
|
| 193 |
input_.change(fn=change_interactive, inputs=[input_], outputs=button)
|
| 194 |
|
| 195 |
# clear_btn.click(fn=clean, inputs=[], outputs=[input_, output_audio, output_phonetic], queue=False)
|
| 196 |
+
submit_btn.click(fn=submit_input, inputs=[input_, speaker_id], outputs=[output_audio,
|
| 197 |
output_audio_den,
|
| 198 |
output_phonetic])
|
| 199 |
|