Spaces:

nazdridoy
/

inferoxy-hub

Running

App Files Files Community

nazdridoy commited on Aug 22

Commit

a6a746c

verified ·

1 Parent(s): c663c5b

feat(tts): add Dia model support and examples

Browse files

- [feat] Add "Dia (Auto)" to `TTS_MODEL_PRESETS` (utils.py:97)
- [feat] Add `nari-labs/Dia-1.6B` configuration to `TTS_MODEL_CONFIGS` (utils.py:111-115)
- [feat] Add Dia-specific example text to `TTS_EXAMPLE_TEXTS` (utils.py:186-187)
- [update] Update `TTS_EXAMPLE_AUDIO_URLS` by removing generic samples and adding a Chatterbox demo (utils.py:192-194)

Files changed (1) hide show

utils.py +10 -4

utils.py CHANGED Viewed

@@ -94,6 +94,7 @@ TTS_MODEL_PRESETS = [
     ("Kokoro (Fal.ai)", "hexgrad/Kokoro-82M", "fal-ai"),
     ("Kokoro (Replicate)", "hexgrad/Kokoro-82M", "replicate"),
     ("Chatterbox (Fal.ai)", "ResembleAI/chatterbox", "fal-ai"),
 ]
 # Model-specific configurations for TTS
@@ -109,6 +110,12 @@ TTS_MODEL_CONFIGS = {
         "supports_voice": False,
         "supports_speed": False,
         "extra_body_params": ["audio_url", "exaggeration", "temperature", "cfg"]
     }
 }
@@ -177,15 +184,14 @@ TTS_EXAMPLE_TEXTS = [
     "The future belongs to those who believe in the beauty of their dreams and have the courage to pursue them.",
     "Science is not only compatible with spirituality; it is a profound source of spirituality.",
     "The only way to do great work is to love what you do. If you haven't found it yet, keep looking.",
-    "Life is what happens when you're busy making other plans. Embrace every moment with gratitude."
 ]
 # Example audio URLs for Chatterbox TTS
 TTS_EXAMPLE_AUDIO_URLS = [
     "https://github.com/nazdridoy/kokoro-tts/raw/main/previews/demo.mp3",
-    "https://huggingface.co/datasets/hf-internal-testing/fixtures/resolve/main/audio/sample_audio_1.mp3",
-    "https://huggingface.co/datasets/hf-internal-testing/fixtures/resolve/main/audio/sample_audio_2.mp3",
-    "https://www.soundjay.com/misc/sounds/bell-ringing-05.wav"
 ]

     ("Kokoro (Fal.ai)", "hexgrad/Kokoro-82M", "fal-ai"),
     ("Kokoro (Replicate)", "hexgrad/Kokoro-82M", "replicate"),
     ("Chatterbox (Fal.ai)", "ResembleAI/chatterbox", "fal-ai"),
+    ("Dia (Auto)", "nari-labs/Dia-1.6B", "auto"),
 ]
 # Model-specific configurations for TTS
         "supports_voice": False,
         "supports_speed": False,
         "extra_body_params": ["audio_url", "exaggeration", "temperature", "cfg"]
+    },
+    "nari-labs/Dia-1.6B": {
+        "type": "dia",
+        "supports_voice": False,
+        "supports_speed": False,
+        "extra_body_params": []
     }
 }
     "The future belongs to those who believe in the beauty of their dreams and have the courage to pursue them.",
     "Science is not only compatible with spirituality; it is a profound source of spirituality.",
     "The only way to do great work is to love what you do. If you haven't found it yet, keep looking.",
+    "Life is what happens when you're busy making other plans. Embrace every moment with gratitude.",
+    "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now."
 ]
 # Example audio URLs for Chatterbox TTS
 TTS_EXAMPLE_AUDIO_URLS = [
     "https://github.com/nazdridoy/kokoro-tts/raw/main/previews/demo.mp3",
+    "https://storage.googleapis.com/chatterbox-demo-samples/prompts/male_rickmorty.mp3"
 ]