|
|
import gradio as gr |
|
|
from gtts import gTTS |
|
|
import io |
|
|
import os |
|
|
import tempfile |
|
|
|
|
|
def text_to_speech(text, language, pitch): |
|
|
tts = gTTS(text=text, lang=language, slow=False) |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: |
|
|
tts.save(fp.name) |
|
|
|
|
|
|
|
|
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name |
|
|
pitch_shift = pitch - 1 |
|
|
os.system(f"ffmpeg -i {fp.name} -af asetrate=44100*{2**pitch_shift},aresample=44100 {output_file}") |
|
|
|
|
|
|
|
|
os.unlink(fp.name) |
|
|
|
|
|
return output_file |
|
|
|
|
|
def gradio_tts_interface(text, language, pitch): |
|
|
audio_file = text_to_speech(text, language, pitch) |
|
|
return audio_file |
|
|
|
|
|
iface = gr.Blocks(theme="Hev832/Applio") |
|
|
|
|
|
with iface: |
|
|
gr.Markdown("# Text-to-Speech Demo") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
text_input = gr.Textbox(label="Enter text to convert to speech", lines=3) |
|
|
language_input = gr.Dropdown(["en", "fr", "es", "de", "it", "id", "ja"], label="Select Language") |
|
|
pitch_input = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Pitch (0.5 for lower/male, 1.0 for normal, 2.0 for higher/female)") |
|
|
submit_button = gr.Button("Convert to Speech") |
|
|
|
|
|
with gr.Column(): |
|
|
audio_output = gr.Audio(label="Generated Speech") |
|
|
|
|
|
submit_button.click( |
|
|
fn=gradio_tts_interface, |
|
|
inputs=[text_input, language_input, pitch_input], |
|
|
outputs=audio_output |
|
|
) |
|
|
|
|
|
iface.launch() |