import gradio as gr import torch from nemo.collections.tts.models import FastPitchModel from nemo.collections.tts.models import HifiGanModel # 🔹 Load pretrained models from NeMo fastpitch = FastPitchModel.from_pretrained("nvidia/tts_en_fastpitch") hifigan = HifiGanModel.from_pretrained("nvidia/tts_hifigan") # 🔹 TTS function def tts(text): # Convert text → mel spectrogram with torch.no_grad(): spectrogram = fastpitch.parse(text) audio = hifigan.convert_spectrogram_to_audio(spectrogram) return (22050, audio.cpu().numpy()) # 🔹 Gradio UI iface = gr.Interface( fn=tts, inputs=gr.Textbox(label="Enter text"), outputs=gr.Audio(label="Generated Speech"), title="FastPitch + HiFiGAN (NeMo TTS)", description="Enter text and get speech synthesized using NVIDIA NeMo FastPitch and HiFiGAN." ) iface.launch()