Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,7 +30,7 @@ setup_dependencies()
|
|
| 30 |
|
| 31 |
import spaces
|
| 32 |
import gradio as gr
|
| 33 |
-
from util import Config, NemoAudioPlayer, KaniModel
|
| 34 |
import numpy as np
|
| 35 |
import torch
|
| 36 |
|
|
@@ -52,6 +52,7 @@ models_configs = {
|
|
| 52 |
|
| 53 |
# Global variables for models (loaded once)
|
| 54 |
player = NemoAudioPlayer(Config())
|
|
|
|
| 55 |
models = {}
|
| 56 |
|
| 57 |
def initialize_models():
|
|
@@ -97,27 +98,26 @@ def generate_speech_gpu(text, model_choice):
|
|
| 97 |
print(f"Generating speech with {model_choice}...")
|
| 98 |
audio, _ = selected_model.run_model(text)
|
| 99 |
|
| 100 |
-
|
| 101 |
-
sample_rate = 22050 # Standard sample rate for NeMo
|
| 102 |
print("Speech generation completed!")
|
| 103 |
|
| 104 |
-
return (sample_rate, audio)
|
| 105 |
|
| 106 |
except Exception as e:
|
| 107 |
print(f"Error during generation: {str(e)}")
|
| 108 |
-
return None
|
| 109 |
|
| 110 |
-
def validate_input(text, model_choice):
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
|
| 118 |
# Create Gradio interface
|
| 119 |
with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as demo:
|
| 120 |
-
gr.Markdown("#
|
| 121 |
gr.Markdown("Select a model and enter text to generate high-quality speech")
|
| 122 |
|
| 123 |
with gr.Row():
|
|
@@ -139,7 +139,7 @@ with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as d
|
|
| 139 |
generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
|
| 140 |
|
| 141 |
# Quick validation button (CPU only)
|
| 142 |
-
validate_btn = gr.Button("🔍 Validate Input", variant="secondary")
|
| 143 |
|
| 144 |
with gr.Column(scale=1):
|
| 145 |
audio_output = gr.Audio(
|
|
@@ -147,62 +147,79 @@ with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as d
|
|
| 147 |
type="numpy"
|
| 148 |
)
|
| 149 |
|
| 150 |
-
status_text = gr.Textbox(
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
)
|
| 155 |
|
| 156 |
# GPU generation event
|
| 157 |
generate_btn.click(
|
| 158 |
fn=generate_speech_gpu,
|
| 159 |
inputs=[text_input, model_dropdown],
|
| 160 |
-
outputs=[audio_output
|
| 161 |
)
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
outputs=status_text
|
| 168 |
-
)
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
# Text examples
|
| 178 |
-
gr.Markdown("### 📝 Text Examples:")
|
| 179 |
-
examples = [
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
]
|
| 185 |
|
| 186 |
-
gr.Examples(
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
)
|
| 191 |
|
| 192 |
-
# Information section
|
| 193 |
-
with gr.Accordion("ℹ️ Model Information", open=False):
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
|
| 207 |
if __name__ == "__main__":
|
| 208 |
demo.launch(
|
|
|
|
| 30 |
|
| 31 |
import spaces
|
| 32 |
import gradio as gr
|
| 33 |
+
from util import Config, NemoAudioPlayer, KaniModel, Demo
|
| 34 |
import numpy as np
|
| 35 |
import torch
|
| 36 |
|
|
|
|
| 52 |
|
| 53 |
# Global variables for models (loaded once)
|
| 54 |
player = NemoAudioPlayer(Config())
|
| 55 |
+
demo_examples = Demo()()
|
| 56 |
models = {}
|
| 57 |
|
| 58 |
def initialize_models():
|
|
|
|
| 98 |
print(f"Generating speech with {model_choice}...")
|
| 99 |
audio, _ = selected_model.run_model(text)
|
| 100 |
|
| 101 |
+
sample_rate = 22050
|
|
|
|
| 102 |
print("Speech generation completed!")
|
| 103 |
|
| 104 |
+
return (sample_rate, audio) #, f"✅ Audio generated successfully using {model_choice} on {device}"
|
| 105 |
|
| 106 |
except Exception as e:
|
| 107 |
print(f"Error during generation: {str(e)}")
|
| 108 |
+
return None #, f"❌ Error during generation: {str(e)}"
|
| 109 |
|
| 110 |
+
# def validate_input(text, model_choice):
|
| 111 |
+
# """Quick validation without GPU"""
|
| 112 |
+
# if not text.strip():
|
| 113 |
+
# return "⚠️ Please enter text for speech generation."
|
| 114 |
+
# if not model_choice:
|
| 115 |
+
# return "⚠️ Please select a model."
|
| 116 |
+
# return f"✅ Ready to generate with {model_choice}"
|
| 117 |
|
| 118 |
# Create Gradio interface
|
| 119 |
with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as demo:
|
| 120 |
+
gr.Markdown("# KaniTTS: Fast and Expressive Speech Generation Model")
|
| 121 |
gr.Markdown("Select a model and enter text to generate high-quality speech")
|
| 122 |
|
| 123 |
with gr.Row():
|
|
|
|
| 139 |
generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
|
| 140 |
|
| 141 |
# Quick validation button (CPU only)
|
| 142 |
+
# validate_btn = gr.Button("🔍 Validate Input", variant="secondary")
|
| 143 |
|
| 144 |
with gr.Column(scale=1):
|
| 145 |
audio_output = gr.Audio(
|
|
|
|
| 147 |
type="numpy"
|
| 148 |
)
|
| 149 |
|
| 150 |
+
# status_text = gr.Textbox(
|
| 151 |
+
# label="Status",
|
| 152 |
+
# interactive=False,
|
| 153 |
+
# value="Ready to generate speech"
|
| 154 |
+
# )
|
| 155 |
|
| 156 |
# GPU generation event
|
| 157 |
generate_btn.click(
|
| 158 |
fn=generate_speech_gpu,
|
| 159 |
inputs=[text_input, model_dropdown],
|
| 160 |
+
outputs=[audio_output]
|
| 161 |
)
|
| 162 |
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
# Demo Examples
|
| 166 |
+
gr.Markdown("## 🎯 Demo Examples")
|
|
|
|
|
|
|
| 167 |
|
| 168 |
+
def play_demo(text):
|
| 169 |
+
return demo_examples[text], f"Playing: {text}"
|
| 170 |
+
|
| 171 |
+
with gr.Row():
|
| 172 |
+
for text in list(demo_examples.keys())[:4]:
|
| 173 |
+
gr.Button(text).click(lambda t=text: play_demo(t), outputs=[audio_output])
|
| 174 |
+
|
| 175 |
+
with gr.Row():
|
| 176 |
+
for text in list(demo_examples.keys())[4:8]:
|
| 177 |
+
gr.Button(text).click(lambda t=text: play_demo(t), outputs=[audio_output])
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
# # CPU validation event
|
| 181 |
+
# validate_btn.click(
|
| 182 |
+
# fn=validate_input,
|
| 183 |
+
# inputs=[text_input, model_dropdown],
|
| 184 |
+
# outputs=status_text
|
| 185 |
+
# )
|
| 186 |
+
|
| 187 |
+
# # Update status on input change
|
| 188 |
+
# text_input.change(
|
| 189 |
+
# fn=validate_input,
|
| 190 |
+
# inputs=[text_input, model_dropdown],
|
| 191 |
+
# outputs=status_text
|
| 192 |
+
# )
|
| 193 |
|
| 194 |
# Text examples
|
| 195 |
+
# gr.Markdown("### 📝 Text Examples:")
|
| 196 |
+
# examples = [
|
| 197 |
+
# "Hello! How are you today?",
|
| 198 |
+
# "Welcome to the world of artificial intelligence.",
|
| 199 |
+
# "This is a demonstration of neural text-to-speech synthesis.",
|
| 200 |
+
# "Zero GPU makes high-quality speech generation accessible to everyone!"
|
| 201 |
+
# ]
|
| 202 |
|
| 203 |
+
# gr.Examples(
|
| 204 |
+
# examples=examples,
|
| 205 |
+
# inputs=text_input,
|
| 206 |
+
# label="Click on an example to use it"
|
| 207 |
+
# )
|
| 208 |
|
| 209 |
+
# # Information section
|
| 210 |
+
# with gr.Accordion("ℹ️ Model Information", open=False):
|
| 211 |
+
# gr.Markdown("""
|
| 212 |
+
# **Available Models:**
|
| 213 |
+
# - **Base Model**: Default pre-trained model for general use
|
| 214 |
+
# - **Female Voice**: Optimized for female voice characteristics
|
| 215 |
+
# - **Male Voice**: Optimized for male voice characteristics
|
| 216 |
|
| 217 |
+
# **Features:**
|
| 218 |
+
# - Powered by NVIDIA NeMo Toolkit
|
| 219 |
+
# - High-quality 22kHz audio output
|
| 220 |
+
# - Zero GPU acceleration for fast inference
|
| 221 |
+
# - Support for long text sequences
|
| 222 |
+
# """)
|
| 223 |
|
| 224 |
if __name__ == "__main__":
|
| 225 |
demo.launch(
|