girumdom-tts / app.py
Kragelll's picture
Create app.py
e2edc99 verified
raw
history blame contribute delete
866 Bytes
import gradio as gr
import torch
from nemo.collections.tts.models import FastPitchModel
from nemo.collections.tts.models import HifiGanModel
# πŸ”Ή Load pretrained models from NeMo
fastpitch = FastPitchModel.from_pretrained("nvidia/tts_en_fastpitch")
hifigan = HifiGanModel.from_pretrained("nvidia/tts_hifigan")
# πŸ”Ή TTS function
def tts(text):
# Convert text β†’ mel spectrogram
with torch.no_grad():
spectrogram = fastpitch.parse(text)
audio = hifigan.convert_spectrogram_to_audio(spectrogram)
return (22050, audio.cpu().numpy())
# πŸ”Ή Gradio UI
iface = gr.Interface(
fn=tts,
inputs=gr.Textbox(label="Enter text"),
outputs=gr.Audio(label="Generated Speech"),
title="FastPitch + HiFiGAN (NeMo TTS)",
description="Enter text and get speech synthesized using NVIDIA NeMo FastPitch and HiFiGAN."
)
iface.launch()