Spaces:
Build error
Build error
| import gradio as gr | |
| import tensorflow as tf | |
| import numpy as np | |
| from tensorflow_tts.inference import TFAutoModel, AutoProcessor | |
| # Load pre-trained models | |
| processor = AutoProcessor.from_pretrained("tensorspeech/tts-fastspeech2-ljspeech-en") | |
| fastspeech2 = TFAutoModel.from_pretrained("tensorspeech/tts-fastspeech2-ljspeech-en") | |
| melgan = TFAutoModel.from_pretrained("tensorspeech/tts-mb_melgan-ljspeech-en") | |
| # Define inference function | |
| def tts_inference(text): | |
| # Convert text to sequence | |
| input_ids = processor.text_to_sequence(text) | |
| # Generate mel spectrogram | |
| mel_outputs = fastspeech2.inference( | |
| input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0), | |
| speaker_ids=tf.convert_to_tensor([0], dtype=tf.int32) | |
| ) | |
| # Convert mel spectrogram to waveform | |
| audio = melgan.inference(mel_outputs)[0, :, 0] | |
| audio = audio.numpy() | |
| # Save to a temporary file and return path | |
| return audio, 22050 # Return audio and sample rate for Gradio to play | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=tts_inference, | |
| inputs="text", | |
| outputs="audio", | |
| title="FastSpeech2_vi TTS", | |
| description="Enter Vietnamese text and generate speech using FastSpeech2" | |
| ) | |
| iface.launch() | |