Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,35 +6,67 @@ from datetime import datetime
|
|
| 6 |
import os
|
| 7 |
|
| 8 |
# --- Configuration ---
|
| 9 |
-
#
|
| 10 |
model_id = "Tesslate/Tessa-T1-14B"
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# --- Text Content ---
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
description = f"""
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
"""
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
"""
|
| 30 |
|
| 31 |
-
join_us = """
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
"""
|
| 39 |
|
| 40 |
# --- Model and Tokenizer Loading ---
|
|
@@ -42,24 +74,26 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 42 |
print(f"Using device: {device}")
|
| 43 |
|
| 44 |
# Get the token from environment variables
|
| 45 |
-
hf_token = os.getenv('
|
| 46 |
if not hf_token:
|
| 47 |
# Try to load from Hugging Face login if available, otherwise raise error
|
| 48 |
try:
|
| 49 |
-
from huggingface_hub import HfApi
|
| 50 |
-
hf_token =
|
|
|
|
|
|
|
|
|
|
| 51 |
if not hf_token:
|
| 52 |
-
raise ValueError("HF token not found. Please set
|
| 53 |
print("Using token from Hugging Face login.")
|
| 54 |
except ImportError:
|
| 55 |
-
raise ValueError("huggingface_hub not installed. Please set the
|
| 56 |
except Exception as e:
|
| 57 |
-
raise ValueError(f"Please set the
|
| 58 |
-
|
| 59 |
|
| 60 |
print(f"Loading Tokenizer: {model_id}")
|
| 61 |
# Initialize tokenizer and model with token authentication
|
| 62 |
-
#
|
| 63 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 64 |
model_id,
|
| 65 |
token=hf_token,
|
|
@@ -67,271 +101,218 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
| 67 |
)
|
| 68 |
|
| 69 |
print(f"Loading Model: {model_id}")
|
| 70 |
-
# Load the model
|
| 71 |
-
# device_map="auto" automatically distributes the model across available GPUs/CPU
|
| 72 |
model = AutoModelForCausalLM.from_pretrained(
|
| 73 |
model_id,
|
| 74 |
token=hf_token,
|
| 75 |
device_map="auto",
|
| 76 |
-
torch_dtype=torch.bfloat16,
|
| 77 |
trust_remote_code=True
|
| 78 |
)
|
| 79 |
print("Model loaded successfully.")
|
| 80 |
|
| 81 |
-
# Attempt to get config, handle potential errors
|
| 82 |
try:
|
| 83 |
config_json = model.config.to_dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
except Exception as e:
|
| 85 |
print(f"Could not retrieve model config: {e}")
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
# --- Helper Functions ---
|
| 89 |
-
def format_model_info(config):
|
| 90 |
-
if "error" in config:
|
| 91 |
-
return f"**Error:** {config['error']}"
|
| 92 |
-
|
| 93 |
-
info = []
|
| 94 |
-
important_keys = [
|
| 95 |
-
"model_type", "vocab_size", "hidden_size", "num_attention_heads",
|
| 96 |
-
"num_hidden_layers", "max_position_embeddings", "torch_dtype"
|
| 97 |
-
]
|
| 98 |
-
# Add other potential keys if needed based on common model architectures
|
| 99 |
-
potential_keys = ["intermediate_size", "rms_norm_eps", "rope_theta"]
|
| 100 |
-
all_keys_to_check = important_keys + potential_keys
|
| 101 |
-
|
| 102 |
-
for key in all_keys_to_check:
|
| 103 |
-
if key in config:
|
| 104 |
-
value = config[key]
|
| 105 |
-
# Convert torch_dtype to string representation if it exists
|
| 106 |
-
if key == "torch_dtype" and value is not None and hasattr(value, "name"):
|
| 107 |
-
value = value.name
|
| 108 |
-
elif value is None:
|
| 109 |
-
value = "Not specified"
|
| 110 |
-
info.append(f"**{key.replace('_', ' ').title()}:** {value}")
|
| 111 |
-
|
| 112 |
-
if not info:
|
| 113 |
-
return "Model configuration details not available or keys not found."
|
| 114 |
-
|
| 115 |
-
return "\n".join(info)
|
| 116 |
|
|
|
|
| 117 |
def format_tokenizer_info(tokenizer_instance):
|
| 118 |
try:
|
| 119 |
info = [
|
| 120 |
-
f"**Tokenizer Class:** {tokenizer_instance.__class__.__name__}",
|
| 121 |
f"**Vocabulary Size:** {tokenizer_instance.vocab_size}",
|
| 122 |
f"**Model Max Length:** {tokenizer_instance.model_max_length}",
|
| 123 |
-
f"**
|
| 124 |
-
f"**
|
| 125 |
-
f"**BOS Token:** {tokenizer_instance.bos_token} (ID: {tokenizer_instance.bos_token_id})",
|
| 126 |
-
f"**UNK Token:** {tokenizer_instance.unk_token} (ID: {tokenizer_instance.unk_token_id})",
|
| 127 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
return "\n".join(info)
|
| 129 |
except Exception as e:
|
| 130 |
print(f"Error getting tokenizer info: {e}")
|
| 131 |
return f"Could not retrieve full tokenizer details. Vocab size: {getattr(tokenizer_instance, 'vocab_size', 'N/A')}"
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
#
|
| 139 |
-
#
|
| 140 |
-
#
|
|
|
|
|
|
|
| 141 |
messages = []
|
| 142 |
if system_prompt and system_prompt.strip():
|
|
|
|
|
|
|
| 143 |
messages.append({"role": "system", "content": system_prompt})
|
| 144 |
messages.append({"role": "user", "content": user_prompt})
|
| 145 |
|
| 146 |
-
# Use the tokenizer's apply_chat_template method if available
|
| 147 |
try:
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
except Exception as e:
|
| 151 |
-
# Fallback
|
| 152 |
-
print(f"Could not use apply_chat_template (Error: {e}). Falling back to
|
| 153 |
prompt_parts = []
|
| 154 |
if system_prompt and system_prompt.strip():
|
| 155 |
prompt_parts.append(f"System: {system_prompt}")
|
| 156 |
-
prompt_parts.append(f"
|
| 157 |
-
prompt_parts.append("
|
| 158 |
full_prompt = "\n".join(prompt_parts)
|
| 159 |
|
| 160 |
-
print(f"\n--- Generating
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
-
#
|
| 163 |
-
|
| 164 |
-
|
|
|
|
| 165 |
|
| 166 |
-
# Check max_new_tokens against potential model limits (optional)
|
| 167 |
-
# max_possible_tokens = getattr(model.config, 'max_position_embeddings', 4096) - inputs['input_ids'].shape[1]
|
| 168 |
-
# if max_new_tokens > max_possible_tokens:
|
| 169 |
-
# print(f"Warning: max_new_tokens ({max_new_tokens}) exceeds model's likely capacity ({max_possible_tokens}). Clamping.")
|
| 170 |
-
# max_new_tokens = max(1, max_possible_tokens) # Ensure at least 1 token can be generated
|
| 171 |
|
| 172 |
# Generate response
|
| 173 |
-
# Use torch.inference_mode() for efficiency
|
| 174 |
with torch.inference_mode():
|
| 175 |
-
outputs = model.generate(
|
| 176 |
-
**inputs,
|
| 177 |
-
max_new_tokens=int(max_new_tokens), # Ensure it's an int
|
| 178 |
-
temperature=float(temperature),
|
| 179 |
-
top_p=float(top_p),
|
| 180 |
-
top_k=int(top_k),
|
| 181 |
-
repetition_penalty=float(repetition_penalty),
|
| 182 |
-
do_sample=True if temperature > 0 else False, # Only sample if temperature > 0
|
| 183 |
-
pad_token_id=tokenizer.eos_token_id,
|
| 184 |
-
eos_token_id=tokenizer.eos_token_id # Explicitly set EOS token ID
|
| 185 |
-
)
|
| 186 |
|
| 187 |
-
# Decode and
|
| 188 |
-
# We need to decode only the newly generated tokens
|
| 189 |
input_length = inputs['input_ids'].shape[1]
|
| 190 |
generated_tokens = outputs[0][input_length:]
|
| 191 |
response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
| 192 |
|
| 193 |
-
|
| 194 |
-
# response = response.replace("<|end_of_turn|>", "").strip() # Example cleanup
|
| 195 |
-
|
| 196 |
-
# Ensure output doesn't exceed 10k tokens (already limited by max_new_tokens slider, but as a safeguard)
|
| 197 |
-
# This check is mostly symbolic here as max_new_tokens is the primary control.
|
| 198 |
-
response_tokens = tokenizer(response, return_tensors="pt").input_ids.shape[1]
|
| 199 |
-
if response_tokens > 10000:
|
| 200 |
-
print(f"Warning: Generated response exceeded 10k tokens ({response_tokens}). Truncating.")
|
| 201 |
-
# This part is tricky, as truncating mid-thought is bad.
|
| 202 |
-
# A better approach is to rely on max_new_tokens slider limit.
|
| 203 |
-
# For demonstration, we could truncate, but it's not ideal:
|
| 204 |
-
# truncated_ids = tokenizer(response, return_tensors="pt").input_ids[0, :10000]
|
| 205 |
-
# response = tokenizer.decode(truncated_ids, skip_special_tokens=True)
|
| 206 |
-
pass # Relying on max_new_tokens control instead of hard truncation here.
|
| 207 |
-
|
| 208 |
-
|
| 209 |
return response.strip()
|
| 210 |
|
| 211 |
# --- Gradio Interface ---
|
| 212 |
-
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 213 |
gr.Markdown(Title)
|
|
|
|
| 214 |
|
| 215 |
with gr.Row():
|
| 216 |
-
with gr.Column(scale=
|
| 217 |
-
|
| 218 |
-
with gr.Column(scale=1):
|
| 219 |
-
gr.Markdown(training) # Show generalized training info
|
| 220 |
-
|
| 221 |
-
with gr.Row():
|
| 222 |
-
with gr.Column(scale=1):
|
| 223 |
with gr.Group():
|
| 224 |
-
gr.
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
-
|
| 234 |
-
with gr.Group():
|
| 235 |
-
gr.Markdown(join_us) # Keep the community links section
|
| 236 |
|
| 237 |
-
with gr.Row():
|
| 238 |
with gr.Column(scale=2):
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
label="
|
| 242 |
-
|
| 243 |
-
lines=
|
| 244 |
-
|
| 245 |
)
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
|
|
|
| 253 |
|
| 254 |
-
with gr.Accordion("π§ͺ Advanced Parameters", open=False):
|
| 255 |
-
temperature = gr.Slider(
|
| 256 |
-
minimum=0.0, # Allow 0 for deterministic output
|
| 257 |
-
maximum=2.0,
|
| 258 |
-
value=0.6,
|
| 259 |
-
step=0.1,
|
| 260 |
-
label="π‘οΈ Temperature",
|
| 261 |
-
info="Lower values make output more deterministic, higher values increase randomness."
|
| 262 |
-
)
|
| 263 |
-
# Max length slider restricted to 2048 to fit typical context windows and prevent excessive generation
|
| 264 |
-
max_new_tokens = gr.Slider(
|
| 265 |
-
minimum=50,
|
| 266 |
-
maximum=min(getattr(model.config, 'max_position_embeddings', 4096), 10000), # Set max based on model config or 10k limit
|
| 267 |
-
value=512,
|
| 268 |
-
step=16,
|
| 269 |
-
label="π Max New Tokens",
|
| 270 |
-
info=f"Maximum number of tokens to generate. Max capped at {min(getattr(model.config, 'max_position_embeddings', 4096), 10000)}."
|
| 271 |
-
)
|
| 272 |
-
top_p = gr.Slider(
|
| 273 |
-
minimum=0.1,
|
| 274 |
-
maximum=1.0,
|
| 275 |
-
value=0.9,
|
| 276 |
-
step=0.05,
|
| 277 |
-
label="π
Top-p (nucleus sampling)",
|
| 278 |
-
info="Considers tokens with cumulative probability >= top_p."
|
| 279 |
-
)
|
| 280 |
-
top_k = gr.Slider(
|
| 281 |
-
minimum=1,
|
| 282 |
-
maximum=200,
|
| 283 |
-
value=50,
|
| 284 |
-
step=1,
|
| 285 |
-
label="π Top-k",
|
| 286 |
-
info="Considers the top k most likely tokens."
|
| 287 |
-
)
|
| 288 |
-
repetition_penalty = gr.Slider(
|
| 289 |
-
minimum=1.0,
|
| 290 |
-
maximum=2.0,
|
| 291 |
-
value=1.15,
|
| 292 |
-
step=0.05,
|
| 293 |
-
label="π¦ Repetition Penalty",
|
| 294 |
-
info="Penalizes repeated tokens. 1.0 means no penalty."
|
| 295 |
-
)
|
| 296 |
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
output = gr.Textbox(
|
| 302 |
-
label=f"π {model_id.split('/')[-1]}", # Use model name in label
|
| 303 |
-
lines=18, # Increased lines for output
|
| 304 |
-
show_copy_button=True
|
| 305 |
-
)
|
| 306 |
|
| 307 |
-
#
|
| 308 |
gr.Examples(
|
| 309 |
examples=[
|
| 310 |
-
#
|
| 311 |
[
|
| 312 |
-
"You are
|
| 313 |
-
"Hello!
|
| 314 |
-
0.
|
| 315 |
],
|
| 316 |
[
|
| 317 |
-
"You are an expert in
|
| 318 |
-
"
|
| 319 |
-
0.
|
| 320 |
],
|
| 321 |
[
|
| 322 |
-
"You are a
|
| 323 |
-
"Write a short
|
| 324 |
-
0.
|
| 325 |
],
|
| 326 |
[
|
| 327 |
-
"You are
|
| 328 |
-
"
|
| 329 |
-
0.
|
| 330 |
],
|
| 331 |
-
|
| 332 |
-
"You are a helpful
|
| 333 |
-
"
|
| 334 |
-
0.
|
| 335 |
]
|
| 336 |
],
|
| 337 |
inputs=[
|
|
@@ -341,22 +322,23 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 341 |
max_new_tokens,
|
| 342 |
top_p,
|
| 343 |
repetition_penalty,
|
| 344 |
-
top_k
|
|
|
|
| 345 |
],
|
| 346 |
outputs=output,
|
| 347 |
-
label="
|
| 348 |
)
|
| 349 |
|
| 350 |
-
#
|
| 351 |
generate_btn.click(
|
| 352 |
fn=generate_response,
|
| 353 |
-
inputs=[system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k],
|
| 354 |
outputs=output,
|
| 355 |
-
api_name="generate"
|
| 356 |
)
|
| 357 |
|
| 358 |
# Launch the demo
|
| 359 |
if __name__ == "__main__":
|
| 360 |
-
# share=True creates a public link (useful for Colab/remote)
|
| 361 |
-
#
|
| 362 |
-
demo.queue().launch(debug=True, share=False
|
|
|
|
| 6 |
import os
|
| 7 |
|
| 8 |
# --- Configuration ---
|
| 9 |
+
# Updated model ID
|
| 10 |
model_id = "Tesslate/Tessa-T1-14B"
|
| 11 |
+
creator_link = "https://huggingface.co/TesslateAI"
|
| 12 |
+
model_link = f"https://huggingface.co/{model_id}"
|
| 13 |
+
website_link = "https://tesslate.com"
|
| 14 |
+
discord_link = "https://discord.gg/DkzMzwBTaw"
|
| 15 |
|
| 16 |
# --- Text Content ---
|
| 17 |
+
Title = f"""
|
| 18 |
+
<div style="text-align: center; margin-bottom: 20px;">
|
| 19 |
+
<img src="https://huggingface.co/Tesslate/Tessa-T1-14B/resolve/main/tesslate_logo_color.png?download=true" alt="Tesslate Logo" style="height: 80px; margin-bottom: 10px;">
|
| 20 |
+
<h1 style="margin-bottom: 5px;">π Welcome to the Tessa-T1-14B Demo π</h1>
|
| 21 |
+
<p style="font-size: 1.1em;">Experience the power of specialized React reasoning!</p>
|
| 22 |
+
<p>Model by <a href="{creator_link}" target="_blank">TesslateAI</a> | <a href="{model_link}" target="_blank">View on Hugging Face</a></p>
|
| 23 |
+
</div>
|
| 24 |
+
"""
|
| 25 |
|
| 26 |
description = f"""
|
| 27 |
+
Interact with **[{model_id}]({model_link})**, an innovative 14B parameter transformer model fine-tuned from Qwen2.5-Coder-14B-Instruct.
|
| 28 |
+
Tessa-T1 specializes in **React frontend development**, leveraging advanced reasoning to autonomously generate well-structured, semantic React components.
|
| 29 |
+
It's designed for integration into AI coding agents and autonomous frontend systems.
|
| 30 |
"""
|
| 31 |
|
| 32 |
+
about_tesslate = f"""
|
| 33 |
+
## About Tesslate & Our Vision
|
| 34 |
+
<img src="https://huggingface.co/Tesslate/Tessa-T1-14B/resolve/main/tesslate_logo_notext.png?download=true" alt="Tesslate Icon" style="height: 40px; float: left; margin-right: 10px;">
|
| 35 |
+
Hi everyone, Iβm Manav, founder of Tesslate, and weβre on a mission to revolutionize AI by putting powerful reasoning models into your hands.
|
| 36 |
+
|
| 37 |
+
Today, the AI landscape is dominated by massive frontier modelsβlarge, costly, and slow. At Tesslate, we see things differently. The next wave of AI disruption wonβt come from sheer size; it'll be driven by **speed, specialization, and precision reasoning**. Smaller, specialized models arenβt just fasterβtheyβre smarter and more efficient.
|
| 38 |
+
|
| 39 |
+
Our story began when we released a UI-generation model on Hugging Face that didn't just replicate patternsβit could reason through entire component hierarchies. It resonated instantly, hitting over 10,000 downloads in weeks. That early success validated our vision, and we doubled down.
|
| 40 |
+
|
| 41 |
+
At Tesslate, we build lean, intelligent models that:
|
| 42 |
+
* π§ **Think** like human agents
|
| 43 |
+
* π‘ **Reason** through complex, real-world workflows
|
| 44 |
+
* π» **Execute** like elite developers, designers, and analysts
|
| 45 |
+
|
| 46 |
+
We've already delivered:
|
| 47 |
+
* **UIGEN-T1.5:** Creating stunning, editable interfaces (React, Tailwind, Three.js)
|
| 48 |
+
* **Tessa-T1:** A specialized reasoning engine optimized for React development and AI agents (You are here!)
|
| 49 |
+
* **Synthia S1:** Our flagship general-reasoning model, proving powerful reasoning capabilities beyond STEM into creativity and storytelling.
|
| 50 |
+
|
| 51 |
+
Our vision is bigger. We aim to be the **#1 trusted brand in fast, specialized AI**, covering training, inference, real-time agent actions, infrastructure, research, and innovative products. Weβre already piloting with industry-leading clients tackling everything from sophisticated design systems to real-time analytics.
|
| 52 |
+
|
| 53 |
+
**Join us!** We're seeking strategic advice, introductions, compute resources, and capital.
|
| 54 |
+
π Visit **[tesslate.com]({website_link})** to learn more and connect.
|
| 55 |
"""
|
| 56 |
|
| 57 |
+
join_us = f"""
|
| 58 |
+
<div style="text-align: center;">
|
| 59 |
+
<h3 style="margin-bottom: 10px;">Connect with Tesslate</h3>
|
| 60 |
+
<a href="{discord_link}" target="_blank" style="text-decoration: none; margin: 0 10px;">
|
| 61 |
+
<img src="https://img.shields.io/discord/1225631184402124842?label=Discord&logo=discord&style=for-the-badge&color=5865F2" alt="Join us on Discord">
|
| 62 |
+
</a>
|
| 63 |
+
<a href="{website_link}" target="_blank" style="text-decoration: none; margin: 0 10px;">
|
| 64 |
+
<img src="https://img.shields.io/badge/Website-tesslate.com-blue?style=for-the-badge&logo=googlechrome&logoColor=white" alt="Visit tesslate.com">
|
| 65 |
+
</a>
|
| 66 |
+
<a href="{model_link}" target="_blank" style="text-decoration: none; margin: 0 10px;">
|
| 67 |
+
<img src="https://img.shields.io/badge/π€%20Model-Tessa--T1--14B-yellow?style=for-the-badge&logo=huggingface" alt="Tessa-T1-14B on Hugging Face">
|
| 68 |
+
</a>
|
| 69 |
+
</div>
|
| 70 |
"""
|
| 71 |
|
| 72 |
# --- Model and Tokenizer Loading ---
|
|
|
|
| 74 |
print(f"Using device: {device}")
|
| 75 |
|
| 76 |
# Get the token from environment variables
|
| 77 |
+
hf_token = os.getenv('HF_TOKEN') # Standard env var name for HF token
|
| 78 |
if not hf_token:
|
| 79 |
# Try to load from Hugging Face login if available, otherwise raise error
|
| 80 |
try:
|
| 81 |
+
from huggingface_hub import HfApi, HfFolder
|
| 82 |
+
hf_token = HfFolder.get_token() # Use HfFolder to get token saved by login
|
| 83 |
+
if not hf_token:
|
| 84 |
+
# If still not found, try HfApi (less common for user login token)
|
| 85 |
+
hf_token = HfApi().token
|
| 86 |
if not hf_token:
|
| 87 |
+
raise ValueError("HF token not found. Please set HF_TOKEN env var or login via `huggingface-cli login`.")
|
| 88 |
print("Using token from Hugging Face login.")
|
| 89 |
except ImportError:
|
| 90 |
+
raise ValueError("huggingface_hub not installed. Please set the HF_TOKEN environment variable or install huggingface_hub.")
|
| 91 |
except Exception as e:
|
| 92 |
+
raise ValueError(f"HF token acquisition failed. Please set the HF_TOKEN environment variable or login via `huggingface-cli login`. Error: {e}")
|
|
|
|
| 93 |
|
| 94 |
print(f"Loading Tokenizer: {model_id}")
|
| 95 |
# Initialize tokenizer and model with token authentication
|
| 96 |
+
# trust_remote_code=True is necessary for models with custom code (like Qwen2)
|
| 97 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 98 |
model_id,
|
| 99 |
token=hf_token,
|
|
|
|
| 101 |
)
|
| 102 |
|
| 103 |
print(f"Loading Model: {model_id}")
|
| 104 |
+
# Load the model with bfloat16 and automatic device mapping
|
|
|
|
| 105 |
model = AutoModelForCausalLM.from_pretrained(
|
| 106 |
model_id,
|
| 107 |
token=hf_token,
|
| 108 |
device_map="auto",
|
| 109 |
+
torch_dtype=torch.bfloat16,
|
| 110 |
trust_remote_code=True
|
| 111 |
)
|
| 112 |
print("Model loaded successfully.")
|
| 113 |
|
| 114 |
+
# Attempt to get config, handle potential errors
|
| 115 |
try:
|
| 116 |
config_json = model.config.to_dict()
|
| 117 |
+
model_config_info = f"""
|
| 118 |
+
**Model Type:** {config_json.get('model_type', 'N/A')}
|
| 119 |
+
**Architecture:** {config_json.get('architectures', ['N/A'])[0]}
|
| 120 |
+
**Vocab Size:** {config_json.get('vocab_size', 'N/A')}
|
| 121 |
+
**Hidden Size:** {config_json.get('hidden_size', 'N/A')}
|
| 122 |
+
**Num Hidden Layers:** {config_json.get('num_hidden_layers', 'N/A')}
|
| 123 |
+
**Num Attention Heads:** {config_json.get('num_attention_heads', 'N/A')}
|
| 124 |
+
**Max Position Embeddings:** {config_json.get('max_position_embeddings', 'N/A')}
|
| 125 |
+
**Torch Dtype:** {str(config_json.get('torch_dtype', 'N/A'))}
|
| 126 |
+
"""
|
| 127 |
except Exception as e:
|
| 128 |
print(f"Could not retrieve model config: {e}")
|
| 129 |
+
model_config_info = f"**Error:** Could not load config for {model_id}. Check model files on Hugging Face."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
# --- Helper Function for Tokenizer Info ---
|
| 132 |
def format_tokenizer_info(tokenizer_instance):
|
| 133 |
try:
|
| 134 |
info = [
|
| 135 |
+
f"**Tokenizer Class:** `{tokenizer_instance.__class__.__name__}`",
|
| 136 |
f"**Vocabulary Size:** {tokenizer_instance.vocab_size}",
|
| 137 |
f"**Model Max Length:** {tokenizer_instance.model_max_length}",
|
| 138 |
+
f"**EOS Token:** `{tokenizer_instance.eos_token}` (ID: {tokenizer_instance.eos_token_id})",
|
| 139 |
+
f"**Special Tokens:** Check model card for specific template/tokens.", # Qwen2 has specific tokens
|
|
|
|
|
|
|
| 140 |
]
|
| 141 |
+
# Add BOS/PAD/UNK if they are commonly used and different from EOS
|
| 142 |
+
if hasattr(tokenizer_instance, 'pad_token') and tokenizer_instance.pad_token and tokenizer_instance.pad_token_id is not None:
|
| 143 |
+
info.append(f"**Padding Token:** `{tokenizer_instance.pad_token}` (ID: {tokenizer_instance.pad_token_id})")
|
| 144 |
+
if hasattr(tokenizer_instance, 'bos_token') and tokenizer_instance.bos_token and tokenizer_instance.bos_token_id is not None:
|
| 145 |
+
info.append(f"**BOS Token:** `{tokenizer_instance.bos_token}` (ID: {tokenizer_instance.bos_token_id})")
|
| 146 |
+
if hasattr(tokenizer_instance, 'unk_token') and tokenizer_instance.unk_token and tokenizer_instance.unk_token_id is not None:
|
| 147 |
+
info.append(f"**UNK Token:** `{tokenizer_instance.unk_token}` (ID: {tokenizer_instance.unk_token_id})")
|
| 148 |
return "\n".join(info)
|
| 149 |
except Exception as e:
|
| 150 |
print(f"Error getting tokenizer info: {e}")
|
| 151 |
return f"Could not retrieve full tokenizer details. Vocab size: {getattr(tokenizer_instance, 'vocab_size', 'N/A')}"
|
| 152 |
|
| 153 |
+
tokenizer_info = format_tokenizer_info(tokenizer)
|
| 154 |
+
|
| 155 |
+
# --- Generation Function ---
|
| 156 |
+
@spaces.GPU(duration=180) # Increased duration slightly
|
| 157 |
+
def generate_response(system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k, min_p):
|
| 158 |
+
# min_p is not directly supported by HF generate, it requires custom logit processing.
|
| 159 |
+
# We will ignore min_p for now but keep it in the UI if needed for future implementation.
|
| 160 |
+
# Note: Setting min_p typically involves filtering logits, which isn't done here.
|
| 161 |
+
|
| 162 |
+
# Use the tokenizer's chat template (Recommended for Qwen2 based models)
|
| 163 |
messages = []
|
| 164 |
if system_prompt and system_prompt.strip():
|
| 165 |
+
# Qwen2 template might prefer system prompt directly or integrated differently.
|
| 166 |
+
# Using the standard 'system' role here, assuming tokenizer handles it.
|
| 167 |
messages.append({"role": "system", "content": system_prompt})
|
| 168 |
messages.append({"role": "user", "content": user_prompt})
|
| 169 |
|
|
|
|
| 170 |
try:
|
| 171 |
+
# Let the tokenizer handle the template - crucial for models like Qwen2
|
| 172 |
+
full_prompt = tokenizer.apply_chat_template(
|
| 173 |
+
messages,
|
| 174 |
+
tokenize=False,
|
| 175 |
+
add_generation_prompt=True # Adds the prompt for the assistant's turn
|
| 176 |
+
)
|
| 177 |
+
print("Applied tokenizer's chat template.")
|
| 178 |
except Exception as e:
|
| 179 |
+
# Fallback only if template application fails catastrophically
|
| 180 |
+
print(f"Warning: Could not use apply_chat_template (Error: {e}). Falling back to basic format. This might degrade performance.")
|
| 181 |
prompt_parts = []
|
| 182 |
if system_prompt and system_prompt.strip():
|
| 183 |
prompt_parts.append(f"System: {system_prompt}")
|
| 184 |
+
prompt_parts.append(f"\nUser: {user_prompt}")
|
| 185 |
+
prompt_parts.append("\nAssistant:") # Basic prompt end
|
| 186 |
full_prompt = "\n".join(prompt_parts)
|
| 187 |
|
| 188 |
+
print(f"\n--- Generating ---")
|
| 189 |
+
# print(f"Prompt:\n{full_prompt}") # Optional: Print full prompt for debugging
|
| 190 |
+
print(f"Params: Temp={temperature}, TopK={top_k}, TopP={top_p}, RepPen={repetition_penalty}, MaxNew={max_new_tokens}, MinP={min_p} (MinP ignored by generate)")
|
| 191 |
+
print("-" * 20)
|
| 192 |
+
|
| 193 |
+
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=4096).to(model.device) # Added truncation safeguard
|
| 194 |
+
|
| 195 |
+
# Generation arguments
|
| 196 |
+
generation_kwargs = dict(
|
| 197 |
+
**inputs,
|
| 198 |
+
max_new_tokens=int(max_new_tokens),
|
| 199 |
+
temperature=float(temperature) if float(temperature) > 0 else None, # Temp 0 means greedy search
|
| 200 |
+
top_p=float(top_p),
|
| 201 |
+
top_k=int(top_k),
|
| 202 |
+
repetition_penalty=float(repetition_penalty),
|
| 203 |
+
do_sample=True if float(temperature) > 0 else False,
|
| 204 |
+
pad_token_id=tokenizer.eos_token_id, # Use EOS for padding when generating
|
| 205 |
+
eos_token_id=tokenizer.eos_token_id
|
| 206 |
+
# min_p cannot be directly passed here.
|
| 207 |
+
)
|
| 208 |
|
| 209 |
+
if temperature == 0: # If temp is 0, disable sampling params
|
| 210 |
+
generation_kwargs.pop('top_p', None)
|
| 211 |
+
generation_kwargs.pop('top_k', None)
|
| 212 |
+
generation_kwargs['do_sample'] = False
|
| 213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
# Generate response
|
|
|
|
| 216 |
with torch.inference_mode():
|
| 217 |
+
outputs = model.generate(**generation_kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
| 219 |
+
# Decode response, skipping special tokens and the input prompt part
|
|
|
|
| 220 |
input_length = inputs['input_ids'].shape[1]
|
| 221 |
generated_tokens = outputs[0][input_length:]
|
| 222 |
response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
| 223 |
|
| 224 |
+
print(f"--- Response ---\n{response}\n---------------\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
return response.strip()
|
| 226 |
|
| 227 |
# --- Gradio Interface ---
|
| 228 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css=".gradio-container { max-width: 90% !important; }") as demo:
|
| 229 |
gr.Markdown(Title)
|
| 230 |
+
gr.Markdown(description)
|
| 231 |
|
| 232 |
with gr.Row():
|
| 233 |
+
with gr.Column(scale=3):
|
| 234 |
+
# Main Interaction Area
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
with gr.Group():
|
| 236 |
+
system_prompt = gr.Textbox(
|
| 237 |
+
label="System Prompt (Persona & Instructions)",
|
| 238 |
+
value="You are Tessa, an expert AI assistant specialized in React development. Generate clean, semantic React code based on user requests. If the request is not about React, answer as a general helpful assistant.",
|
| 239 |
+
lines=3,
|
| 240 |
+
info="Guide the model's overall behavior and expertise."
|
| 241 |
+
)
|
| 242 |
+
user_prompt = gr.Textbox(
|
| 243 |
+
label="π¬ Your Request",
|
| 244 |
+
placeholder="e.g., 'Create a React functional component for a simple counter with increment and decrement buttons using useState.' or 'Explain the concept of virtual DOM.'",
|
| 245 |
+
lines=6
|
| 246 |
+
)
|
| 247 |
|
| 248 |
+
with gr.Accordion("π οΈ Generation Parameters", open=True):
|
| 249 |
+
with gr.Row():
|
| 250 |
+
temperature = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="π‘οΈ Temperature", info="Controls randomness. 0 = deterministic, >0 = random.")
|
| 251 |
+
max_new_tokens = gr.Slider(minimum=64, maximum=4096, value=1024, step=32, label="π Max New Tokens", info="Max length of the generated response.")
|
| 252 |
+
with gr.Row():
|
| 253 |
+
top_k = gr.Slider(minimum=1, maximum=200, value=40, step=1, label="π Top-k", info="Sample from top k likely tokens.")
|
| 254 |
+
top_p = gr.Slider(minimum=0.05, maximum=1.0, value=0.95, step=0.01, label="π
Top-p (nucleus)", info="Sample from tokens with cumulative probability >= top_p.")
|
| 255 |
+
with gr.Row():
|
| 256 |
+
repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.01, label="π¦ Repetition Penalty", info="Penalizes repeating tokens ( > 1).")
|
| 257 |
+
# Add min_p slider, but note it's not used in backend currently
|
| 258 |
+
min_p = gr.Slider(minimum=0.0, maximum=0.5, value=0.05, step=0.01, label="π Min-p (Not Active)", info="Filters tokens below this probability threshold (Requires custom logic - currently ignored).")
|
| 259 |
|
| 260 |
+
generate_btn = gr.Button("π Generate Response", variant="primary", size="lg")
|
|
|
|
|
|
|
| 261 |
|
|
|
|
| 262 |
with gr.Column(scale=2):
|
| 263 |
+
# Output Area
|
| 264 |
+
output = gr.Code(
|
| 265 |
+
label=f"π Tessa-T1-14B Output",
|
| 266 |
+
language="markdown", # Use markdown for mixed text/code
|
| 267 |
+
lines=25,
|
| 268 |
+
show_copy_button=True,
|
| 269 |
)
|
| 270 |
|
| 271 |
+
# Model & Tokenizer Info in an Accordion
|
| 272 |
+
with gr.Accordion("βοΈ Model & Tokenizer Details", open=False):
|
| 273 |
+
gr.Markdown("### Model Configuration")
|
| 274 |
+
gr.Markdown(model_config_info)
|
| 275 |
+
gr.Markdown("---")
|
| 276 |
+
gr.Markdown("### Tokenizer Configuration")
|
| 277 |
+
gr.Markdown(tokenizer_info)
|
| 278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
+
# About Tesslate Section
|
| 281 |
+
with gr.Row():
|
| 282 |
+
with gr.Accordion("π‘ About Tesslate & Our Mission", open=False):
|
| 283 |
+
gr.Markdown(about_tesslate)
|
| 284 |
|
| 285 |
+
# Links Section
|
| 286 |
+
gr.Markdown(join_us)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
+
# Examples (Updated for React/Coding focus)
|
| 289 |
gr.Examples(
|
| 290 |
examples=[
|
| 291 |
+
# [system_prompt, user_prompt, temperature, max_tokens, top_p, rep_penalty, top_k, min_p]
|
| 292 |
[
|
| 293 |
+
"You are Tessa, an expert AI assistant specialized in React development.",
|
| 294 |
+
"Create a simple React functional component for a button that alerts 'Hello!' when clicked.",
|
| 295 |
+
0.5, 512, 0.95, 1.1, 40, 0.05
|
| 296 |
],
|
| 297 |
[
|
| 298 |
+
"You are Tessa, an expert AI assistant specialized in React development.",
|
| 299 |
+
"Explain the difference between `useState` and `useEffect` hooks in React with simple examples.",
|
| 300 |
+
0.7, 1024, 0.95, 1.1, 40, 0.05
|
| 301 |
],
|
| 302 |
[
|
| 303 |
+
"You are a helpful AI assistant.",
|
| 304 |
+
"Write a short explanation of how React's reconciliation algorithm works.",
|
| 305 |
+
0.6, 768, 0.9, 1.15, 50, 0.05
|
| 306 |
],
|
| 307 |
[
|
| 308 |
+
"You are Tessa, an expert AI assistant specialized in React development. Use Tailwind CSS for styling.",
|
| 309 |
+
"Generate a React component for a responsive card with an image, title, and description, using Tailwind CSS classes.",
|
| 310 |
+
0.7, 1536, 0.95, 1.1, 40, 0.05
|
| 311 |
],
|
| 312 |
+
[
|
| 313 |
+
"You are a helpful AI assistant.",
|
| 314 |
+
"What are the pros and cons of using Next.js compared to Create React App?",
|
| 315 |
+
0.8, 1024, 0.98, 1.05, 60, 0.05
|
| 316 |
]
|
| 317 |
],
|
| 318 |
inputs=[
|
|
|
|
| 322 |
max_new_tokens,
|
| 323 |
top_p,
|
| 324 |
repetition_penalty,
|
| 325 |
+
top_k,
|
| 326 |
+
min_p # Include min_p here even if not used by backend, to match UI
|
| 327 |
],
|
| 328 |
outputs=output,
|
| 329 |
+
label="β¨ Example Prompts (Click to Load)"
|
| 330 |
)
|
| 331 |
|
| 332 |
+
# Connect button click to function
|
| 333 |
generate_btn.click(
|
| 334 |
fn=generate_response,
|
| 335 |
+
inputs=[system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k, min_p],
|
| 336 |
outputs=output,
|
| 337 |
+
api_name="generate"
|
| 338 |
)
|
| 339 |
|
| 340 |
# Launch the demo
|
| 341 |
if __name__ == "__main__":
|
| 342 |
+
# share=True creates a public link (useful for Colab/remote or HF Spaces)
|
| 343 |
+
# queue enables handling multiple users
|
| 344 |
+
demo.queue().launch(debug=True, share=False) # Set share=True if deploying on HF Spaces
|