Simonlob commited on
Commit
46cf002
·
verified ·
1 Parent(s): 41423b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -57
app.py CHANGED
@@ -30,7 +30,7 @@ setup_dependencies()
30
 
31
  import spaces
32
  import gradio as gr
33
- from util import Config, NemoAudioPlayer, KaniModel
34
  import numpy as np
35
  import torch
36
 
@@ -52,6 +52,7 @@ models_configs = {
52
 
53
  # Global variables for models (loaded once)
54
  player = NemoAudioPlayer(Config())
 
55
  models = {}
56
 
57
  def initialize_models():
@@ -97,27 +98,26 @@ def generate_speech_gpu(text, model_choice):
97
  print(f"Generating speech with {model_choice}...")
98
  audio, _ = selected_model.run_model(text)
99
 
100
- # Convert to Gradio format (sample_rate, audio_data)
101
- sample_rate = 22050 # Standard sample rate for NeMo
102
  print("Speech generation completed!")
103
 
104
- return (sample_rate, audio), f"✅ Audio generated successfully using {model_choice} on {device}"
105
 
106
  except Exception as e:
107
  print(f"Error during generation: {str(e)}")
108
- return None, f"❌ Error during generation: {str(e)}"
109
 
110
- def validate_input(text, model_choice):
111
- """Quick validation without GPU"""
112
- if not text.strip():
113
- return "⚠️ Please enter text for speech generation."
114
- if not model_choice:
115
- return "⚠️ Please select a model."
116
- return f"✅ Ready to generate with {model_choice}"
117
 
118
  # Create Gradio interface
119
  with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as demo:
120
- gr.Markdown("# 🎤 KaniTTS - Text to Speech with Zero GPU")
121
  gr.Markdown("Select a model and enter text to generate high-quality speech")
122
 
123
  with gr.Row():
@@ -139,7 +139,7 @@ with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as d
139
  generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
140
 
141
  # Quick validation button (CPU only)
142
- validate_btn = gr.Button("🔍 Validate Input", variant="secondary")
143
 
144
  with gr.Column(scale=1):
145
  audio_output = gr.Audio(
@@ -147,62 +147,79 @@ with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as d
147
  type="numpy"
148
  )
149
 
150
- status_text = gr.Textbox(
151
- label="Status",
152
- interactive=False,
153
- value="Ready to generate speech"
154
- )
155
 
156
  # GPU generation event
157
  generate_btn.click(
158
  fn=generate_speech_gpu,
159
  inputs=[text_input, model_dropdown],
160
- outputs=[audio_output, status_text]
161
  )
162
 
163
- # CPU validation event
164
- validate_btn.click(
165
- fn=validate_input,
166
- inputs=[text_input, model_dropdown],
167
- outputs=status_text
168
- )
169
 
170
- # Update status on input change
171
- text_input.change(
172
- fn=validate_input,
173
- inputs=[text_input, model_dropdown],
174
- outputs=status_text
175
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  # Text examples
178
- gr.Markdown("### 📝 Text Examples:")
179
- examples = [
180
- "Hello! How are you today?",
181
- "Welcome to the world of artificial intelligence.",
182
- "This is a demonstration of neural text-to-speech synthesis.",
183
- "Zero GPU makes high-quality speech generation accessible to everyone!"
184
- ]
185
 
186
- gr.Examples(
187
- examples=examples,
188
- inputs=text_input,
189
- label="Click on an example to use it"
190
- )
191
 
192
- # Information section
193
- with gr.Accordion("ℹ️ Model Information", open=False):
194
- gr.Markdown("""
195
- **Available Models:**
196
- - **Base Model**: Default pre-trained model for general use
197
- - **Female Voice**: Optimized for female voice characteristics
198
- - **Male Voice**: Optimized for male voice characteristics
199
 
200
- **Features:**
201
- - Powered by NVIDIA NeMo Toolkit
202
- - High-quality 22kHz audio output
203
- - Zero GPU acceleration for fast inference
204
- - Support for long text sequences
205
- """)
206
 
207
  if __name__ == "__main__":
208
  demo.launch(
 
30
 
31
  import spaces
32
  import gradio as gr
33
+ from util import Config, NemoAudioPlayer, KaniModel, Demo
34
  import numpy as np
35
  import torch
36
 
 
52
 
53
  # Global variables for models (loaded once)
54
  player = NemoAudioPlayer(Config())
55
+ demo_examples = Demo()()
56
  models = {}
57
 
58
  def initialize_models():
 
98
  print(f"Generating speech with {model_choice}...")
99
  audio, _ = selected_model.run_model(text)
100
 
101
+ sample_rate = 22050
 
102
  print("Speech generation completed!")
103
 
104
+ return (sample_rate, audio) #, f"✅ Audio generated successfully using {model_choice} on {device}"
105
 
106
  except Exception as e:
107
  print(f"Error during generation: {str(e)}")
108
+ return None #, f"❌ Error during generation: {str(e)}"
109
 
110
+ # def validate_input(text, model_choice):
111
+ # """Quick validation without GPU"""
112
+ # if not text.strip():
113
+ # return "⚠️ Please enter text for speech generation."
114
+ # if not model_choice:
115
+ # return "⚠️ Please select a model."
116
+ # return f"✅ Ready to generate with {model_choice}"
117
 
118
  # Create Gradio interface
119
  with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as demo:
120
+ gr.Markdown("# KaniTTS: Fast and Expressive Speech Generation Model")
121
  gr.Markdown("Select a model and enter text to generate high-quality speech")
122
 
123
  with gr.Row():
 
139
  generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
140
 
141
  # Quick validation button (CPU only)
142
+ # validate_btn = gr.Button("🔍 Validate Input", variant="secondary")
143
 
144
  with gr.Column(scale=1):
145
  audio_output = gr.Audio(
 
147
  type="numpy"
148
  )
149
 
150
+ # status_text = gr.Textbox(
151
+ # label="Status",
152
+ # interactive=False,
153
+ # value="Ready to generate speech"
154
+ # )
155
 
156
  # GPU generation event
157
  generate_btn.click(
158
  fn=generate_speech_gpu,
159
  inputs=[text_input, model_dropdown],
160
+ outputs=[audio_output]
161
  )
162
 
163
+
164
+
165
+ # Demo Examples
166
+ gr.Markdown("## 🎯 Demo Examples")
 
 
167
 
168
+ def play_demo(text):
169
+ return demo_examples[text], f"Playing: {text}"
170
+
171
+ with gr.Row():
172
+ for text in list(demo_examples.keys())[:4]:
173
+ gr.Button(text).click(lambda t=text: play_demo(t), outputs=[audio_output])
174
+
175
+ with gr.Row():
176
+ for text in list(demo_examples.keys())[4:8]:
177
+ gr.Button(text).click(lambda t=text: play_demo(t), outputs=[audio_output])
178
+
179
+
180
+ # # CPU validation event
181
+ # validate_btn.click(
182
+ # fn=validate_input,
183
+ # inputs=[text_input, model_dropdown],
184
+ # outputs=status_text
185
+ # )
186
+
187
+ # # Update status on input change
188
+ # text_input.change(
189
+ # fn=validate_input,
190
+ # inputs=[text_input, model_dropdown],
191
+ # outputs=status_text
192
+ # )
193
 
194
  # Text examples
195
+ # gr.Markdown("### 📝 Text Examples:")
196
+ # examples = [
197
+ # "Hello! How are you today?",
198
+ # "Welcome to the world of artificial intelligence.",
199
+ # "This is a demonstration of neural text-to-speech synthesis.",
200
+ # "Zero GPU makes high-quality speech generation accessible to everyone!"
201
+ # ]
202
 
203
+ # gr.Examples(
204
+ # examples=examples,
205
+ # inputs=text_input,
206
+ # label="Click on an example to use it"
207
+ # )
208
 
209
+ # # Information section
210
+ # with gr.Accordion("ℹ️ Model Information", open=False):
211
+ # gr.Markdown("""
212
+ # **Available Models:**
213
+ # - **Base Model**: Default pre-trained model for general use
214
+ # - **Female Voice**: Optimized for female voice characteristics
215
+ # - **Male Voice**: Optimized for male voice characteristics
216
 
217
+ # **Features:**
218
+ # - Powered by NVIDIA NeMo Toolkit
219
+ # - High-quality 22kHz audio output
220
+ # - Zero GPU acceleration for fast inference
221
+ # - Support for long text sequences
222
+ # """)
223
 
224
  if __name__ == "__main__":
225
  demo.launch(