File size: 12,695 Bytes
c1bee18
c2e6d7e
c1bee18
 
 
 
074f3bf
c1bee18
 
 
 
 
43a0ca3
0d77564
6244d01
c663c5b
 
 
c1bee18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd78fc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1bee18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c94952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28a1352
0c94952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1bee18
0d77564
43333ad
 
 
 
 
 
 
 
 
 
 
 
 
a6a746c
 
 
 
 
 
43333ad
 
 
6244d01
 
 
 
 
 
 
 
 
 
 
 
 
0d77564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf154be
 
 
 
 
 
 
 
 
 
 
 
 
 
c1bee18
 
 
 
 
 
 
 
 
 
 
 
43a0ca3
 
 
a9e86b4
43a0ca3
 
 
 
 
 
 
 
 
0d77564
 
 
 
 
 
 
 
 
a6a746c
 
0d77564
 
43333ad
 
 
a6a746c
43333ad
 
c1bee18
 
 
 
 
 
 
 
 
 
 
7d06e0a
 
 
 
 
 
 
 
 
 
 
 
 
c1bee18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a50492
 
074f3bf
 
 
 
 
 
 
bde6cbf
 
 
 
 
 
 
 
 
 
074f3bf
bde6cbf
074f3bf
 
bde6cbf
 
 
 
 
 
 
 
 
 
 
9aff9bb
 
bde6cbf
 
9aff9bb
 
 
 
 
 
 
bde6cbf
9aff9bb
 
 
 
 
 
bde6cbf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
"""
Utility functions and constants for AI-Inferoxy AI Hub.
Contains configuration constants and helper functions.
"""

import os
import re


# Configuration constants
DEFAULT_CHAT_MODEL = "openai/gpt-oss-20b"
DEFAULT_IMAGE_MODEL = "Qwen/Qwen-Image"
DEFAULT_IMAGE_TO_IMAGE_MODEL = "Qwen/Qwen-Image-Edit"
DEFAULT_TTS_MODEL = "hexgrad/Kokoro-82M"
DEFAULT_VIDEO_MODEL = "Wan-AI/Wan2.2-T2V-A14B"

# Unified default provider used by all non-chat tasks
DEFAULT_PROVIDER = "auto"

# Chat configuration
CHAT_CONFIG = {
    "max_tokens": 1024,
    "temperature": 0.7,
    "top_p": 0.95,
    "system_message": "You are a helpful and friendly AI assistant. Provide clear, accurate, and helpful responses."
}

# Image generation configuration
IMAGE_CONFIG = {
    "width": 1024,
    "height": 1024,
    "num_inference_steps": 20,
    "guidance_scale": 7.5,
    "seed": -1,
    "negative_prompt": "blurry, low quality, distorted, deformed, ugly, bad anatomy"
}

# Supported providers (unified across tasks)
PROVIDERS_UNIFIED = [
    "auto",
    "cerebras",
    "cohere",
    "fal-ai",
    "featherless-ai",
    "fireworks-ai",
    "groq",
    "hf-inference",
    "hyperbolic",
    "nebius",
    "novita",
    "nscale",
    "replicate",
    "sambanova",
    "together",
]

# Backwards compatibility exported lists
CHAT_PROVIDERS = PROVIDERS_UNIFIED
IMAGE_PROVIDERS = PROVIDERS_UNIFIED

# Popular models for quick access
POPULAR_CHAT_MODELS = [
    "openai/gpt-oss-20b",
    "meta-llama/Llama-2-7b-chat-hf", 
    "microsoft/DialoGPT-medium",
    "google/flan-t5-base"
]

POPULAR_IMAGE_MODELS = [
    "Qwen/Qwen-Image",
    "black-forest-labs/FLUX.1-dev",
    "stabilityai/stable-diffusion-xl-base-1.0",
    "runwayml/stable-diffusion-v1-5"
]

# Suggested model lists (users can still input any model id)
SUGGESTED_CHAT_MODELS = [
    "openai/gpt-oss-20b",
    "openai/gpt-oss-120b",
    "deepseek-ai/DeepSeek-V3.1",
    "zai-org/GLM-4.5",
    "Qwen/Qwen3-8B",
    "meta-llama/Llama-3.1-8B-Instruct",
    "deepseek-ai/DeepSeek-R1",
    "moonshotai/Kimi-K2-Instruct",
    "Qwen/Qwen3-Coder-30B-A3B-Instruct",
    "CohereLabs/command-a-reasoning-08-2025",
]

SUGGESTED_IMAGE_MODELS = [
    "Qwen/Qwen-Image",
    "black-forest-labs/FLUX.1-dev",
    "black-forest-labs/FLUX.1-Krea-dev",
    "stabilityai/stable-diffusion-xl-base-1.0",
    "black-forest-labs/FLUX.1-schnell",
    "UmeAiRT/FLUX.1-dev-LoRA-Modern_Pixel_art",
    "xey/sldr_flux_nsfw_v2-studio",
    "HiDream-ai/HiDream-I1-Full",
    "Kwai-Kolors/Kolors",
]

SUGGESTED_IMAGE_TO_IMAGE_MODELS = [
    "Qwen/Qwen-Image-Edit",
    "Kontext-Style/Ghibli_lora",
    "black-forest-labs/FLUX.1-Kontext-dev",
    "fofr/kontext-make-person-real",
    "jerrrycans/watermark20000",
    "fal/Pencil-Drawing-Kontext-Dev-LoRA",
]

SUGGESTED_VIDEO_MODELS = [
    "Wan-AI/Wan2.2-T2V-A14B",
    "Wan-AI/Wan2.2-TI2V-5B",
    "tencent/HunyuanVideo",
    "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
    "zai-org/CogVideoX-5b",
    "Wan-AI/Wan2.1-T2V-14B",
    "genmo/mochi-1-preview",
    "Wan-AI/Wan2.1-T2V-1.3B",
    "Lightricks/LTX-Video-0.9.7-dev",
    "Lightricks/LTX-Video-0.9.5",
    "Lightricks/LTX-Video-0.9.7-distilled",
]



# Model-specific configurations for TTS
TTS_MODEL_CONFIGS = {
    "hexgrad/Kokoro-82M": {
        "type": "kokoro",
        "supports_voice": True,
        "supports_speed": True,
        "extra_body_params": ["voice", "speed"]
    },
    "ResembleAI/chatterbox": {
        "type": "chatterbox", 
        "supports_voice": False,
        "supports_speed": False,
        "extra_body_params": ["audio_url", "exaggeration", "temperature", "cfg"]
    },
    "nari-labs/Dia-1.6B": {
        "type": "dia",
        "supports_voice": False,
        "supports_speed": False,
        "extra_body_params": []
    }
}

# -----------------------------
# Text-to-Video configuration
# -----------------------------


# Example prompts for text-to-video generation
VIDEO_EXAMPLE_PROMPTS = [
    "A young man walking on the street",
    "A corgi puppy running through a field of flowers, cinematic",
    "A futuristic city skyline at sunset with flying cars, 4k",
    "A serene beach with gentle waves and palm trees swaying",
]

# Voice options for Kokoro TTS (based on the reference app)
TTS_VOICES = {
    'πŸ‡ΊπŸ‡Έ 🚺 Heart ❀️': 'af_heart',
    'πŸ‡ΊπŸ‡Έ 🚺 Bella πŸ”₯': 'af_bella',
    'πŸ‡ΊπŸ‡Έ 🚺 Nicole 🎧': 'af_nicole',
    'πŸ‡ΊπŸ‡Έ 🚺 Aoede': 'af_aoede',
    'πŸ‡ΊπŸ‡Έ 🚺 Kore': 'af_kore',
    'πŸ‡ΊπŸ‡Έ 🚺 Sarah': 'af_sarah',
    'πŸ‡ΊπŸ‡Έ 🚺 Nova': 'af_nova',
    'πŸ‡ΊπŸ‡Έ 🚺 Sky': 'af_sky',
    'πŸ‡ΊπŸ‡Έ 🚺 Alloy': 'af_alloy',
    'πŸ‡ΊπŸ‡Έ 🚺 Jessica': 'af_jessica',
    'πŸ‡ΊπŸ‡Έ 🚺 River': 'af_river',
    'πŸ‡ΊπŸ‡Έ 🚹 Michael': 'am_michael',
    'πŸ‡ΊπŸ‡Έ 🚹 Fenrir': 'am_fenrir',
    'πŸ‡ΊπŸ‡Έ 🚹 Puck': 'am_puck',
    'πŸ‡ΊπŸ‡Έ 🚹 Echo': 'am_echo',
    'πŸ‡ΊπŸ‡Έ 🚹 Eric': 'am_eric',
    'πŸ‡ΊπŸ‡Έ 🚹 Liam': 'am_liam',
    'πŸ‡ΊπŸ‡Έ 🚹 Onyx': 'am_onyx',
    'πŸ‡ΊπŸ‡Έ 🚹 Santa': 'am_santa',
    'πŸ‡ΊπŸ‡Έ 🚹 Adam': 'am_adam',
    'πŸ‡¬πŸ‡§ 🚺 Emma': 'bf_emma',
    'πŸ‡¬πŸ‡§ 🚺 Isabella': 'bf_isabella',
    'πŸ‡¬πŸ‡§ 🚺 Alice': 'bf_alice',
    'πŸ‡¬πŸ‡§ 🚺 Lily': 'bf_lily',
    'πŸ‡¬πŸ‡§ 🚹 George': 'bm_george',
    'πŸ‡¬πŸ‡§ 🚹 Fable': 'bm_fable',
    'πŸ‡¬πŸ‡§ 🚹 Lewis': 'bm_lewis',
    'πŸ‡¬πŸ‡§ 🚹 Daniel': 'bm_daniel',
}

# Example prompts for chat 
CHAT_EXAMPLE_PROMPTS = [
    "What's a polite way to introduce myself at a networking event?",
    "Can you suggest a fun icebreaker question for a group chat?",
    "Explain the concept of entropy in simple terms suitable for a high school student.",
    "Summarize the main differences between classical and operant conditioning.",
    "Is it possible for artificial intelligence to possess consciousness? Discuss briefly.",
    "What does 'the map is not the territory' mean in philosophy?",
    "Write a Python function to reverse a linked list.",
    "How can I optimize a SQL query for faster performance?",
    "Suggest 3 imaginative prompts for generating images of futuristic cities.",
    "Give me 3 creative prompts for generating surreal animal portraits.",
]

# Example prompts for image generation
IMAGE_EXAMPLE_PROMPTS = [
    "A majestic dragon flying over a medieval castle, epic fantasy art, detailed, 8k",
    "A serene Japanese garden with cherry blossoms, zen atmosphere, peaceful, high quality",
    "A futuristic cityscape with flying cars and neon lights, cyberpunk style, cinematic",
    "A cute robot cat playing with yarn, adorable, cartoon style, vibrant colors",
    "A magical forest with glowing mushrooms and fairy lights, fantasy, ethereal beauty",
    "Portrait of a wise old wizard with flowing robes, magical aura, fantasy character art",
    "A cozy coffee shop on a rainy day, warm lighting, peaceful atmosphere, detailed",
    "An astronaut floating in space with Earth in background, photorealistic, stunning"
]

# Example prompts for image-to-image generation
IMAGE_TO_IMAGE_EXAMPLE_PROMPTS = [
    "Turn the cat into a tiger with stripes and fierce expression",
    "Turn this image into the Ghibli style.",
    "Make the background a magical forest with glowing mushrooms",
    "Change the style to vintage comic book with bold colors",
    "Add a superhero cape and mask to the person",
    "Transform the building into a futuristic skyscraper",
    "Make the flowers bloom and add butterflies around them",
    "Change the weather to a stormy night with lightning",
    "Add a magical portal in the background with sparkles"
]

# Example texts for text-to-speech generation
TTS_EXAMPLE_TEXTS = [
    "Hello! Welcome to the amazing world of AI-powered text-to-speech technology.",
    "The quick brown fox jumps over the lazy dog. This pangram contains every letter of the alphabet.",
    "In a world where technology advances at lightning speed, artificial intelligence continues to reshape our future.",
    "Imagine a world where machines can understand and respond to human emotions with perfect clarity.",
    "The future belongs to those who believe in the beauty of their dreams and have the courage to pursue them.",
    "Science is not only compatible with spirituality; it is a profound source of spirituality.",
    "The only way to do great work is to love what you do. If you haven't found it yet, keep looking.",
    "Life is what happens when you're busy making other plans. Embrace every moment with gratitude.",
    "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now."
]

# Example audio URLs for Chatterbox TTS
TTS_EXAMPLE_AUDIO_URLS = [
    "https://github.com/nazdridoy/kokoro-tts/raw/main/previews/demo.mp3",
    "https://storage.googleapis.com/chatterbox-demo-samples/prompts/male_rickmorty.mp3"
]


def get_proxy_key():
    """Get the proxy API key from environment variables."""
    return os.getenv("PROXY_KEY")


def validate_proxy_key():
    """Validate that the proxy key is available."""
    proxy_key = get_proxy_key()
    if not proxy_key:
        return False, "❌ Error: PROXY_KEY not found in environment variables. Please set it in your HuggingFace Space secrets."
    return True, ""


def get_proxy_url():
    """Get the proxy URL from environment variables."""
    return os.getenv("PROXY_URL")


def validate_proxy_url():
    """Validate that the proxy URL is available."""
    proxy_url = get_proxy_url()
    if not proxy_url:
        return False, "❌ Error: PROXY_URL not found in environment variables. Please set it in your HuggingFace Space secrets."
    return True, ""




def format_error_message(error_type, error_message):
    """Format error messages consistently."""
    return f"❌ {error_type}: {error_message}"


def format_success_message(operation, details=""):
    """Format success messages consistently."""
    base_message = f"βœ… {operation} completed successfully"
    if details:
        return f"{base_message}: {details}"
    return f"{base_message}!"


def get_gradio_theme():
    """Get the default Gradio theme for the application."""
    try:
        import gradio as gr
        return gr.themes.Soft()
    except ImportError:
        return None


# -----------------------------
# Reasoning (<think>) utilities
# -----------------------------

def render_with_reasoning_toggle(text: str, show_reasoning: bool) -> str:
    """Render assistant text while optionally revealing content inside <think>...</think>.

    Behavior:
    - When show_reasoning is True:
      * Replace the opening <think> tag with a collapsible HTML <details> block and an opening
        fenced code block. Stream reasoning tokens inside this block as they arrive.
      * Replace the closing </think> tag with the closing fence and </details> when it appears.
    - When show_reasoning is False:
      * Remove complete <think>...</think> blocks.
      * For partial streams (no closing tag yet), trim everything from the first <think> onward.

    Safe to call on every streamed chunk; conversions are idempotent.
    """
    if not isinstance(text, str):
        return text

    # If we are NOT showing reasoning, remove it entirely. For partial streams, hide from <think> onwards.
    if not show_reasoning:
        if "<think>" not in text:
            return text
        if "</think>" not in text:
            return text.split("<think>", 1)[0]
        # Remove complete <think>...</think> blocks
        pattern_strip = re.compile(r"<think>[\s\S]*?</think>", re.IGNORECASE)
        return pattern_strip.sub("", text)

    # Show reasoning: stream it as it arrives by converting tags into a collapsible details block
    open_block_open = "<details open><summary>Reasoning</summary>\n\n```text\n"
    open_block_closed = "<details><summary>Reasoning</summary>\n\n```text\n"
    close_block = "\n```\n</details>\n"

    # If the closing tag is not present yet, keep the block expanded while streaming
    if "</think>" not in text:
        # Replace any raw <think> with an expanded details block
        text = re.sub(r"<think>", open_block_open, text, flags=re.IGNORECASE)
        # If for any reason a closed details opening exists, switch it to open (expanded)
        text = text.replace(open_block_closed, open_block_open)
        return text

    # If the closing tag is present, render a collapsed block by default
    # 1) Ensure opening is the closed variant
    text = re.sub(r"<think>", open_block_closed, text, flags=re.IGNORECASE)
    text = text.replace(open_block_open, open_block_closed)
    # 2) Close the block
    text = re.sub(r"</think>", close_block, text, flags=re.IGNORECASE)

    return text