Spaces:

AlexK-PL
/

vits-v2-8khz-inference

Runtime error

App Files Files Community

AlexK-PL commited on Jan 17, 2024

Commit

0860c4d

verified ·

1 Parent(s): d739d73

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -49

app.py CHANGED Viewed

@@ -15,16 +15,12 @@ from dotenv import load_dotenv
 torch.manual_seed(0)
 np.random.seed(0)
-# CleanUnet Dependencies
 import json
 from copy import deepcopy
 import numpy as np
 import torch
-# from util import print_size, sampling
 import torchaudio
 import torchaudio.transforms as T
@@ -36,33 +32,6 @@ np.random.seed(0)
 SAMPLE_RATE = 8000
-'''
-CONFIG = "configs/DNS-large-full.json"
-# CHECKPOINT = "./exp/DNS-large-full/checkpoint/pretrained.pkl"
-# Parse configs. Globals nicer in this case
-with open(CONFIG) as f:
-    data = f.read()
-    config = json.loads(data)
-    gen_config = config["gen_config"]
-    global network_config
-    network_config = config["network_config"]  # to define wavenet
-    global train_config
-    train_config = config["train_config"]  # train config
-    global trainset_config
-    trainset_config = config["trainset_config"]  # to read trainset configurations
-# global use_denoise
-# use_denoise = False
-# setup local experiment path
-exp_path = train_config["exp_path"]
-print('exp_path:', exp_path)
-# load data
-loader_config = deepcopy(trainset_config)
-loader_config["crop_length_sec"] = 0
-'''
 #############################################################################################################
 load_dotenv()
@@ -114,33 +83,24 @@ def get_phonetic_transcription(text: str):
         return None
-def tts_inference(text: str, speaker_idx: str = None, use_denoise: int = 0):
     # synthesize
     if synthesizer is None:
         raise NameError("model not found")
     t1 = time.time()
     wavs = synthesizer.tts(text, speaker_idx)
-    print(type(wavs))
-    if use_denoise == 0:
-        wavs_den = torch.Tensor(wavs).unsqueeze(0)  # one sample
-        # wavs_den = denoise(wavs_den).tolist()
-    else:
-        wavs_den = wavs
     # return output
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         # wavs must be a list of integers
-        synthesizer.save_wav(wavs, fp)
         t2 = time.time() - t1
         print(round(t2, 2))
         output_audio = fp.name
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        # wavs must be a list of integers
-        synthesizer.save_wav(wavs_den, fp)
-        output_audio_den = fp.name
-    return output_audio, output_audio_den
 title = "🗣️ Catalan Multispeaker TTS Tester 🗣️"
@@ -222,11 +182,10 @@ with gr.Blocks(**AinaGradioTheme().get_kwargs()) as app:
                     "Submit",
                     variant="primary",
                 )
-                use_denoise = gr.Radio(choices=[("Yes", 0), ("No", 1)], value=0)
         with gr.Column(variant='panel'):
             output_audio = gr.Audio(label="Output", type="filepath", autoplay=True, show_share_button=False)
-            output_audio_den = gr.Audio(label="Output denoised", type="filepath", autoplay=False,
-                                        show_share_button=False)
             output_phonetic = gr.Textbox(label="Phonetic Transcription", readonly=True)
@@ -234,7 +193,7 @@ with gr.Blocks(**AinaGradioTheme().get_kwargs()) as app:
         input_.change(fn=change_interactive, inputs=[input_], outputs=button)
     # clear_btn.click(fn=clean, inputs=[], outputs=[input_, output_audio, output_phonetic], queue=False)
-    submit_btn.click(fn=submit_input, inputs=[input_, speaker_id, use_denoise], outputs=[output_audio,
                                                                                          output_audio_den,
                                                                                          output_phonetic])

 torch.manual_seed(0)
 np.random.seed(0)
 import json
 from copy import deepcopy
 import numpy as np
 import torch
 import torchaudio
 import torchaudio.transforms as T
 SAMPLE_RATE = 8000
 #############################################################################################################
 load_dotenv()
         return None
+def tts_inference(text: str, speaker_idx: str = None):
     # synthesize
     if synthesizer is None:
         raise NameError("model not found")
     t1 = time.time()
     wavs = synthesizer.tts(text, speaker_idx)
+    # print(type(wavs))
+    wavs_den = wavs
     # return output
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         # wavs must be a list of integers
+        synthesizer.save_wav(wavs_den, fp)
         t2 = time.time() - t1
         print(round(t2, 2))
         output_audio = fp.name
+    return output_audio
 title = "🗣️ Catalan Multispeaker TTS Tester 🗣️"
                     "Submit",
                     variant="primary",
                 )
+                # use_denoise = gr.Radio(choices=[("Yes", 0), ("No", 1)], value=0)
         with gr.Column(variant='panel'):
             output_audio = gr.Audio(label="Output", type="filepath", autoplay=True, show_share_button=False)
+            # output_audio_den = gr.Audio(label="Output denoised", type="filepath", autoplay=False, show_share_button=False)
             output_phonetic = gr.Textbox(label="Phonetic Transcription", readonly=True)
         input_.change(fn=change_interactive, inputs=[input_], outputs=button)
     # clear_btn.click(fn=clean, inputs=[], outputs=[input_, output_audio, output_phonetic], queue=False)
+    submit_btn.click(fn=submit_input, inputs=[input_, speaker_id], outputs=[output_audio,
                                                                                          output_audio_den,
                                                                                          output_phonetic])