nazdridoy commited on
Commit
a6a746c
·
verified ·
1 Parent(s): c663c5b

feat(tts): add Dia model support and examples

Browse files

- [feat] Add "Dia (Auto)" to `TTS_MODEL_PRESETS` (utils.py:97)
- [feat] Add `nari-labs/Dia-1.6B` configuration to `TTS_MODEL_CONFIGS` (utils.py:111-115)
- [feat] Add Dia-specific example text to `TTS_EXAMPLE_TEXTS` (utils.py:186-187)
- [update] Update `TTS_EXAMPLE_AUDIO_URLS` by removing generic samples and adding a Chatterbox demo (utils.py:192-194)

Files changed (1) hide show
  1. utils.py +10 -4
utils.py CHANGED
@@ -94,6 +94,7 @@ TTS_MODEL_PRESETS = [
94
  ("Kokoro (Fal.ai)", "hexgrad/Kokoro-82M", "fal-ai"),
95
  ("Kokoro (Replicate)", "hexgrad/Kokoro-82M", "replicate"),
96
  ("Chatterbox (Fal.ai)", "ResembleAI/chatterbox", "fal-ai"),
 
97
  ]
98
 
99
  # Model-specific configurations for TTS
@@ -109,6 +110,12 @@ TTS_MODEL_CONFIGS = {
109
  "supports_voice": False,
110
  "supports_speed": False,
111
  "extra_body_params": ["audio_url", "exaggeration", "temperature", "cfg"]
 
 
 
 
 
 
112
  }
113
  }
114
 
@@ -177,15 +184,14 @@ TTS_EXAMPLE_TEXTS = [
177
  "The future belongs to those who believe in the beauty of their dreams and have the courage to pursue them.",
178
  "Science is not only compatible with spirituality; it is a profound source of spirituality.",
179
  "The only way to do great work is to love what you do. If you haven't found it yet, keep looking.",
180
- "Life is what happens when you're busy making other plans. Embrace every moment with gratitude."
 
181
  ]
182
 
183
  # Example audio URLs for Chatterbox TTS
184
  TTS_EXAMPLE_AUDIO_URLS = [
185
  "https://github.com/nazdridoy/kokoro-tts/raw/main/previews/demo.mp3",
186
- "https://huggingface.co/datasets/hf-internal-testing/fixtures/resolve/main/audio/sample_audio_1.mp3",
187
- "https://huggingface.co/datasets/hf-internal-testing/fixtures/resolve/main/audio/sample_audio_2.mp3",
188
- "https://www.soundjay.com/misc/sounds/bell-ringing-05.wav"
189
  ]
190
 
191
 
 
94
  ("Kokoro (Fal.ai)", "hexgrad/Kokoro-82M", "fal-ai"),
95
  ("Kokoro (Replicate)", "hexgrad/Kokoro-82M", "replicate"),
96
  ("Chatterbox (Fal.ai)", "ResembleAI/chatterbox", "fal-ai"),
97
+ ("Dia (Auto)", "nari-labs/Dia-1.6B", "auto"),
98
  ]
99
 
100
  # Model-specific configurations for TTS
 
110
  "supports_voice": False,
111
  "supports_speed": False,
112
  "extra_body_params": ["audio_url", "exaggeration", "temperature", "cfg"]
113
+ },
114
+ "nari-labs/Dia-1.6B": {
115
+ "type": "dia",
116
+ "supports_voice": False,
117
+ "supports_speed": False,
118
+ "extra_body_params": []
119
  }
120
  }
121
 
 
184
  "The future belongs to those who believe in the beauty of their dreams and have the courage to pursue them.",
185
  "Science is not only compatible with spirituality; it is a profound source of spirituality.",
186
  "The only way to do great work is to love what you do. If you haven't found it yet, keep looking.",
187
+ "Life is what happens when you're busy making other plans. Embrace every moment with gratitude.",
188
+ "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now."
189
  ]
190
 
191
  # Example audio URLs for Chatterbox TTS
192
  TTS_EXAMPLE_AUDIO_URLS = [
193
  "https://github.com/nazdridoy/kokoro-tts/raw/main/previews/demo.mp3",
194
+ "https://storage.googleapis.com/chatterbox-demo-samples/prompts/male_rickmorty.mp3"
 
 
195
  ]
196
 
197