smirki commited on
Commit
1944655
Β·
verified Β·
1 Parent(s): 4d93432

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +210 -228
app.py CHANGED
@@ -6,35 +6,67 @@ from datetime import datetime
6
  import os
7
 
8
  # --- Configuration ---
9
- # Use the specified model ID
10
  model_id = "Tesslate/Tessa-T1-14B"
11
- # Note: As of writing, "Tesslate/Tessa-T1-7B" might be a hypothetical model name.
12
- # Replace with the actual model ID if different.
 
 
13
 
14
  # --- Text Content ---
15
- # Updated Title and Description for Tessa model
16
- Title = f"""# Welcome to 🌟Tonic's 🌠{model_id} Demo"""
 
 
 
 
 
 
17
 
18
  description = f"""
19
- This is a demonstration of [{model_id}](https://huggingface.co/{model_id}), a 7B parameter language model.
20
- Interact with the model below. Enter your prompt and adjust the generation parameters as needed.
21
- *Disclaimer: The model details below are placeholders assuming a standard 7B model structure, as specific details for '{model_id}' may vary.*
22
  """
23
 
24
- # Generalized Training Info Placeholder
25
- training = f"""
26
- ## Training details for {model_id}
27
- *(Specific training data for {model_id} is not available in this template. Fine-tuning details depend on the specific model version.)*
28
- Generally, instruction-tuned models like this are fine-tuned on a mix of synthetic and/or real-world instruction datasets to improve their ability to follow commands and converse.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  """
30
 
31
- join_us = """
32
- ## Join us:
33
- 🌟TeamTonic🌟 is always making cool demos! Join our active builder's πŸ› οΈcommunity πŸ‘»
34
- [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
35
- On πŸ€—Huggingface: [MultiTransformer](https://huggingface.co/MultiTransformer)
36
- On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)
37
- πŸ€—Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant πŸ€—
 
 
 
 
 
 
38
  """
39
 
40
  # --- Model and Tokenizer Loading ---
@@ -42,24 +74,26 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
42
  print(f"Using device: {device}")
43
 
44
  # Get the token from environment variables
45
- hf_token = os.getenv('READTOKEN')
46
  if not hf_token:
47
  # Try to load from Hugging Face login if available, otherwise raise error
48
  try:
49
- from huggingface_hub import HfApi
50
- hf_token = HfApi().token
 
 
 
51
  if not hf_token:
52
- raise ValueError("HF token not found. Please set READTOKEN env var or login via huggingface-cli.")
53
  print("Using token from Hugging Face login.")
54
  except ImportError:
55
- raise ValueError("huggingface_hub not installed. Please set the READTOKEN environment variable.")
56
  except Exception as e:
57
- raise ValueError(f"Please set the READTOKEN environment variable or login via huggingface-cli. Error: {e}")
58
-
59
 
60
  print(f"Loading Tokenizer: {model_id}")
61
  # Initialize tokenizer and model with token authentication
62
- # Using trust_remote_code=True might be necessary depending on the model's implementation
63
  tokenizer = AutoTokenizer.from_pretrained(
64
  model_id,
65
  token=hf_token,
@@ -67,271 +101,218 @@ tokenizer = AutoTokenizer.from_pretrained(
67
  )
68
 
69
  print(f"Loading Model: {model_id}")
70
- # Load the model without any quantization (uses default or specified dtype like bfloat16)
71
- # device_map="auto" automatically distributes the model across available GPUs/CPU
72
  model = AutoModelForCausalLM.from_pretrained(
73
  model_id,
74
  token=hf_token,
75
  device_map="auto",
76
- torch_dtype=torch.bfloat16, # Using bfloat16 for better performance on compatible GPUs
77
  trust_remote_code=True
78
  )
79
  print("Model loaded successfully.")
80
 
81
- # Attempt to get config, handle potential errors if config is not standard
82
  try:
83
  config_json = model.config.to_dict()
 
 
 
 
 
 
 
 
 
 
84
  except Exception as e:
85
  print(f"Could not retrieve model config: {e}")
86
- config_json = {"error": f"Could not load config for {model_id}"}
87
-
88
- # --- Helper Functions ---
89
- def format_model_info(config):
90
- if "error" in config:
91
- return f"**Error:** {config['error']}"
92
-
93
- info = []
94
- important_keys = [
95
- "model_type", "vocab_size", "hidden_size", "num_attention_heads",
96
- "num_hidden_layers", "max_position_embeddings", "torch_dtype"
97
- ]
98
- # Add other potential keys if needed based on common model architectures
99
- potential_keys = ["intermediate_size", "rms_norm_eps", "rope_theta"]
100
- all_keys_to_check = important_keys + potential_keys
101
-
102
- for key in all_keys_to_check:
103
- if key in config:
104
- value = config[key]
105
- # Convert torch_dtype to string representation if it exists
106
- if key == "torch_dtype" and value is not None and hasattr(value, "name"):
107
- value = value.name
108
- elif value is None:
109
- value = "Not specified"
110
- info.append(f"**{key.replace('_', ' ').title()}:** {value}")
111
-
112
- if not info:
113
- return "Model configuration details not available or keys not found."
114
-
115
- return "\n".join(info)
116
 
 
117
  def format_tokenizer_info(tokenizer_instance):
118
  try:
119
  info = [
120
- f"**Tokenizer Class:** {tokenizer_instance.__class__.__name__}",
121
  f"**Vocabulary Size:** {tokenizer_instance.vocab_size}",
122
  f"**Model Max Length:** {tokenizer_instance.model_max_length}",
123
- f"**Padding Token:** {tokenizer_instance.pad_token} (ID: {tokenizer_instance.pad_token_id})",
124
- f"**EOS Token:** {tokenizer_instance.eos_token} (ID: {tokenizer_instance.eos_token_id})",
125
- f"**BOS Token:** {tokenizer_instance.bos_token} (ID: {tokenizer_instance.bos_token_id})",
126
- f"**UNK Token:** {tokenizer_instance.unk_token} (ID: {tokenizer_instance.unk_token_id})",
127
  ]
 
 
 
 
 
 
 
128
  return "\n".join(info)
129
  except Exception as e:
130
  print(f"Error getting tokenizer info: {e}")
131
  return f"Could not retrieve full tokenizer details. Vocab size: {getattr(tokenizer_instance, 'vocab_size', 'N/A')}"
132
 
133
- # Define the generation function using @spaces.GPU decorator for hardware acceleration
134
- @spaces.GPU(duration=120) # Set a timeout for the GPU request
135
- def generate_response(system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k):
136
- # Corrected Prompt Formatting: Using a standard instruction format.
137
- # Adapt this format if Tesslate/Tessa-T1-7B requires a specific template.
138
- # Common format: System Prompt, User Prompt, expect Assistant response.
139
- # Using the format potentially used by models like Llama 3, Mistral Instruct v0.2/0.3 etc.
140
- # Check the model card for Tesslate/Tessa-T1-7B for the official recommended format.
 
 
141
  messages = []
142
  if system_prompt and system_prompt.strip():
 
 
143
  messages.append({"role": "system", "content": system_prompt})
144
  messages.append({"role": "user", "content": user_prompt})
145
 
146
- # Use the tokenizer's apply_chat_template method if available
147
  try:
148
- full_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
149
- print("Using tokenizer's chat template.")
 
 
 
 
 
150
  except Exception as e:
151
- # Fallback to a generic format if apply_chat_template fails or is not available
152
- print(f"Could not use apply_chat_template (Error: {e}). Falling back to generic format.")
153
  prompt_parts = []
154
  if system_prompt and system_prompt.strip():
155
  prompt_parts.append(f"System: {system_prompt}")
156
- prompt_parts.append(f"User: {user_prompt}")
157
- prompt_parts.append("Assistant:")
158
  full_prompt = "\n".join(prompt_parts)
159
 
160
- print(f"\n--- Generating with Prompt ---\n{full_prompt}\n-----------------------------\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- # Prepare the input prompt
163
- # Ensure the input is on the correct device (model's device)
164
- inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
 
165
 
166
- # Check max_new_tokens against potential model limits (optional)
167
- # max_possible_tokens = getattr(model.config, 'max_position_embeddings', 4096) - inputs['input_ids'].shape[1]
168
- # if max_new_tokens > max_possible_tokens:
169
- # print(f"Warning: max_new_tokens ({max_new_tokens}) exceeds model's likely capacity ({max_possible_tokens}). Clamping.")
170
- # max_new_tokens = max(1, max_possible_tokens) # Ensure at least 1 token can be generated
171
 
172
  # Generate response
173
- # Use torch.inference_mode() for efficiency
174
  with torch.inference_mode():
175
- outputs = model.generate(
176
- **inputs,
177
- max_new_tokens=int(max_new_tokens), # Ensure it's an int
178
- temperature=float(temperature),
179
- top_p=float(top_p),
180
- top_k=int(top_k),
181
- repetition_penalty=float(repetition_penalty),
182
- do_sample=True if temperature > 0 else False, # Only sample if temperature > 0
183
- pad_token_id=tokenizer.eos_token_id,
184
- eos_token_id=tokenizer.eos_token_id # Explicitly set EOS token ID
185
- )
186
 
187
- # Decode and return the response
188
- # We need to decode only the newly generated tokens
189
  input_length = inputs['input_ids'].shape[1]
190
  generated_tokens = outputs[0][input_length:]
191
  response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
192
 
193
- # Optional: Clean up potential artifacts if needed, depending on the model's output behavior
194
- # response = response.replace("<|end_of_turn|>", "").strip() # Example cleanup
195
-
196
- # Ensure output doesn't exceed 10k tokens (already limited by max_new_tokens slider, but as a safeguard)
197
- # This check is mostly symbolic here as max_new_tokens is the primary control.
198
- response_tokens = tokenizer(response, return_tensors="pt").input_ids.shape[1]
199
- if response_tokens > 10000:
200
- print(f"Warning: Generated response exceeded 10k tokens ({response_tokens}). Truncating.")
201
- # This part is tricky, as truncating mid-thought is bad.
202
- # A better approach is to rely on max_new_tokens slider limit.
203
- # For demonstration, we could truncate, but it's not ideal:
204
- # truncated_ids = tokenizer(response, return_tensors="pt").input_ids[0, :10000]
205
- # response = tokenizer.decode(truncated_ids, skip_special_tokens=True)
206
- pass # Relying on max_new_tokens control instead of hard truncation here.
207
-
208
-
209
  return response.strip()
210
 
211
  # --- Gradio Interface ---
212
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
213
  gr.Markdown(Title)
 
214
 
215
  with gr.Row():
216
- with gr.Column(scale=1):
217
- gr.Markdown(description)
218
- with gr.Column(scale=1):
219
- gr.Markdown(training) # Show generalized training info
220
-
221
- with gr.Row():
222
- with gr.Column(scale=1):
223
  with gr.Group():
224
- gr.Markdown("### Model Configuration")
225
- gr.Markdown(format_model_info(config_json)) # Display formatted model config
226
-
227
- with gr.Column(scale=1):
228
- with gr.Group():
229
- gr.Markdown("### Tokenizer Configuration")
230
- gr.Markdown(format_tokenizer_info(tokenizer)) # Display formatted tokenizer info
 
 
 
 
231
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
- with gr.Row():
234
- with gr.Group():
235
- gr.Markdown(join_us) # Keep the community links section
236
 
237
- with gr.Row():
238
  with gr.Column(scale=2):
239
- # System prompt (English default)
240
- system_prompt = gr.Textbox(
241
- label="System Prompt",
242
- value="You are Tessa, a helpful and friendly AI assistant. Respond accurately and politely.",
243
- lines=3,
244
- info="Optional instruction for the model's persona or behavior."
245
  )
246
 
247
- # User prompt (English default)
248
- user_prompt = gr.Textbox(
249
- label="πŸ—£οΈ Your Message",
250
- placeholder="Enter your text here...",
251
- lines=5
252
- )
 
253
 
254
- with gr.Accordion("πŸ§ͺ Advanced Parameters", open=False):
255
- temperature = gr.Slider(
256
- minimum=0.0, # Allow 0 for deterministic output
257
- maximum=2.0,
258
- value=0.6,
259
- step=0.1,
260
- label="🌑️ Temperature",
261
- info="Lower values make output more deterministic, higher values increase randomness."
262
- )
263
- # Max length slider restricted to 2048 to fit typical context windows and prevent excessive generation
264
- max_new_tokens = gr.Slider(
265
- minimum=50,
266
- maximum=min(getattr(model.config, 'max_position_embeddings', 4096), 10000), # Set max based on model config or 10k limit
267
- value=512,
268
- step=16,
269
- label="πŸ“Š Max New Tokens",
270
- info=f"Maximum number of tokens to generate. Max capped at {min(getattr(model.config, 'max_position_embeddings', 4096), 10000)}."
271
- )
272
- top_p = gr.Slider(
273
- minimum=0.1,
274
- maximum=1.0,
275
- value=0.9,
276
- step=0.05,
277
- label="πŸ… Top-p (nucleus sampling)",
278
- info="Considers tokens with cumulative probability >= top_p."
279
- )
280
- top_k = gr.Slider(
281
- minimum=1,
282
- maximum=200,
283
- value=50,
284
- step=1,
285
- label="πŸ† Top-k",
286
- info="Considers the top k most likely tokens."
287
- )
288
- repetition_penalty = gr.Slider(
289
- minimum=1.0,
290
- maximum=2.0,
291
- value=1.15,
292
- step=0.05,
293
- label="🦜 Repetition Penalty",
294
- info="Penalizes repeated tokens. 1.0 means no penalty."
295
- )
296
 
297
- generate_btn = gr.Button("🌠 Generate", variant="primary")
 
 
 
298
 
299
- with gr.Column(scale=2):
300
- # Output component labeled with model name
301
- output = gr.Textbox(
302
- label=f"🌠 {model_id.split('/')[-1]}", # Use model name in label
303
- lines=18, # Increased lines for output
304
- show_copy_button=True
305
- )
306
 
307
- # Example prompts (updated to English and generic)
308
  gr.Examples(
309
  examples=[
310
- # Format: [system_prompt, user_prompt, temperature, max_tokens, top_p, rep_penalty, top_k]
311
  [
312
- "You are a helpful AI assistant.",
313
- "Hello! How are you today?",
314
- 0.7, 512, 0.9, 1.2, 50
315
  ],
316
  [
317
- "You are an expert in artificial intelligence.",
318
- "Can you explain what a Large Language Model (LLM) is in simple terms?",
319
- 0.6, 1024, 0.9, 1.15, 40
320
  ],
321
  [
322
- "You are a creative writer.",
323
- "Write a short story about a robot discovering music.",
324
- 0.9, 768, 0.95, 1.1, 60
325
  ],
326
  [
327
- "You are a knowledgeable historian.",
328
- "What were the main causes of World War I?",
329
- 0.5, 1024, 0.85, 1.1, 30
330
  ],
331
- [
332
- "You are a helpful coding assistant.",
333
- "Write a simple Python function to reverse a string.",
334
- 0.4, 256, 0.9, 1.2, 50
335
  ]
336
  ],
337
  inputs=[
@@ -341,22 +322,23 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
341
  max_new_tokens,
342
  top_p,
343
  repetition_penalty,
344
- top_k
 
345
  ],
346
  outputs=output,
347
- label="Examples (Click to load)"
348
  )
349
 
350
- # Set up the generation event
351
  generate_btn.click(
352
  fn=generate_response,
353
- inputs=[system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k],
354
  outputs=output,
355
- api_name="generate" # Add API name for programmatic access
356
  )
357
 
358
  # Launch the demo
359
  if __name__ == "__main__":
360
- # share=True creates a public link (useful for Colab/remote)
361
- # set ssr_mode=False if encountering issues with streaming or interactivity
362
- demo.queue().launch(debug=True, share=False, ssr_mode=False)
 
6
  import os
7
 
8
  # --- Configuration ---
9
+ # Updated model ID
10
  model_id = "Tesslate/Tessa-T1-14B"
11
+ creator_link = "https://huggingface.co/TesslateAI"
12
+ model_link = f"https://huggingface.co/{model_id}"
13
+ website_link = "https://tesslate.com"
14
+ discord_link = "https://discord.gg/DkzMzwBTaw"
15
 
16
  # --- Text Content ---
17
+ Title = f"""
18
+ <div style="text-align: center; margin-bottom: 20px;">
19
+ <img src="https://huggingface.co/Tesslate/Tessa-T1-14B/resolve/main/tesslate_logo_color.png?download=true" alt="Tesslate Logo" style="height: 80px; margin-bottom: 10px;">
20
+ <h1 style="margin-bottom: 5px;">πŸš€ Welcome to the Tessa-T1-14B Demo πŸš€</h1>
21
+ <p style="font-size: 1.1em;">Experience the power of specialized React reasoning!</p>
22
+ <p>Model by <a href="{creator_link}" target="_blank">TesslateAI</a> | <a href="{model_link}" target="_blank">View on Hugging Face</a></p>
23
+ </div>
24
+ """
25
 
26
  description = f"""
27
+ Interact with **[{model_id}]({model_link})**, an innovative 14B parameter transformer model fine-tuned from Qwen2.5-Coder-14B-Instruct.
28
+ Tessa-T1 specializes in **React frontend development**, leveraging advanced reasoning to autonomously generate well-structured, semantic React components.
29
+ It's designed for integration into AI coding agents and autonomous frontend systems.
30
  """
31
 
32
+ about_tesslate = f"""
33
+ ## About Tesslate & Our Vision
34
+ <img src="https://huggingface.co/Tesslate/Tessa-T1-14B/resolve/main/tesslate_logo_notext.png?download=true" alt="Tesslate Icon" style="height: 40px; float: left; margin-right: 10px;">
35
+ Hi everyone, I’m Manav, founder of Tesslate, and we’re on a mission to revolutionize AI by putting powerful reasoning models into your hands.
36
+
37
+ Today, the AI landscape is dominated by massive frontier modelsβ€”large, costly, and slow. At Tesslate, we see things differently. The next wave of AI disruption won’t come from sheer size; it'll be driven by **speed, specialization, and precision reasoning**. Smaller, specialized models aren’t just fasterβ€”they’re smarter and more efficient.
38
+
39
+ Our story began when we released a UI-generation model on Hugging Face that didn't just replicate patternsβ€”it could reason through entire component hierarchies. It resonated instantly, hitting over 10,000 downloads in weeks. That early success validated our vision, and we doubled down.
40
+
41
+ At Tesslate, we build lean, intelligent models that:
42
+ * 🧠 **Think** like human agents
43
+ * πŸ’‘ **Reason** through complex, real-world workflows
44
+ * πŸ’» **Execute** like elite developers, designers, and analysts
45
+
46
+ We've already delivered:
47
+ * **UIGEN-T1.5:** Creating stunning, editable interfaces (React, Tailwind, Three.js)
48
+ * **Tessa-T1:** A specialized reasoning engine optimized for React development and AI agents (You are here!)
49
+ * **Synthia S1:** Our flagship general-reasoning model, proving powerful reasoning capabilities beyond STEM into creativity and storytelling.
50
+
51
+ Our vision is bigger. We aim to be the **#1 trusted brand in fast, specialized AI**, covering training, inference, real-time agent actions, infrastructure, research, and innovative products. We’re already piloting with industry-leading clients tackling everything from sophisticated design systems to real-time analytics.
52
+
53
+ **Join us!** We're seeking strategic advice, introductions, compute resources, and capital.
54
+ πŸ‘‰ Visit **[tesslate.com]({website_link})** to learn more and connect.
55
  """
56
 
57
+ join_us = f"""
58
+ <div style="text-align: center;">
59
+ <h3 style="margin-bottom: 10px;">Connect with Tesslate</h3>
60
+ <a href="{discord_link}" target="_blank" style="text-decoration: none; margin: 0 10px;">
61
+ <img src="https://img.shields.io/discord/1225631184402124842?label=Discord&logo=discord&style=for-the-badge&color=5865F2" alt="Join us on Discord">
62
+ </a>
63
+ <a href="{website_link}" target="_blank" style="text-decoration: none; margin: 0 10px;">
64
+ <img src="https://img.shields.io/badge/Website-tesslate.com-blue?style=for-the-badge&logo=googlechrome&logoColor=white" alt="Visit tesslate.com">
65
+ </a>
66
+ <a href="{model_link}" target="_blank" style="text-decoration: none; margin: 0 10px;">
67
+ <img src="https://img.shields.io/badge/πŸ€—%20Model-Tessa--T1--14B-yellow?style=for-the-badge&logo=huggingface" alt="Tessa-T1-14B on Hugging Face">
68
+ </a>
69
+ </div>
70
  """
71
 
72
  # --- Model and Tokenizer Loading ---
 
74
  print(f"Using device: {device}")
75
 
76
  # Get the token from environment variables
77
+ hf_token = os.getenv('HF_TOKEN') # Standard env var name for HF token
78
  if not hf_token:
79
  # Try to load from Hugging Face login if available, otherwise raise error
80
  try:
81
+ from huggingface_hub import HfApi, HfFolder
82
+ hf_token = HfFolder.get_token() # Use HfFolder to get token saved by login
83
+ if not hf_token:
84
+ # If still not found, try HfApi (less common for user login token)
85
+ hf_token = HfApi().token
86
  if not hf_token:
87
+ raise ValueError("HF token not found. Please set HF_TOKEN env var or login via `huggingface-cli login`.")
88
  print("Using token from Hugging Face login.")
89
  except ImportError:
90
+ raise ValueError("huggingface_hub not installed. Please set the HF_TOKEN environment variable or install huggingface_hub.")
91
  except Exception as e:
92
+ raise ValueError(f"HF token acquisition failed. Please set the HF_TOKEN environment variable or login via `huggingface-cli login`. Error: {e}")
 
93
 
94
  print(f"Loading Tokenizer: {model_id}")
95
  # Initialize tokenizer and model with token authentication
96
+ # trust_remote_code=True is necessary for models with custom code (like Qwen2)
97
  tokenizer = AutoTokenizer.from_pretrained(
98
  model_id,
99
  token=hf_token,
 
101
  )
102
 
103
  print(f"Loading Model: {model_id}")
104
+ # Load the model with bfloat16 and automatic device mapping
 
105
  model = AutoModelForCausalLM.from_pretrained(
106
  model_id,
107
  token=hf_token,
108
  device_map="auto",
109
+ torch_dtype=torch.bfloat16,
110
  trust_remote_code=True
111
  )
112
  print("Model loaded successfully.")
113
 
114
+ # Attempt to get config, handle potential errors
115
  try:
116
  config_json = model.config.to_dict()
117
+ model_config_info = f"""
118
+ **Model Type:** {config_json.get('model_type', 'N/A')}
119
+ **Architecture:** {config_json.get('architectures', ['N/A'])[0]}
120
+ **Vocab Size:** {config_json.get('vocab_size', 'N/A')}
121
+ **Hidden Size:** {config_json.get('hidden_size', 'N/A')}
122
+ **Num Hidden Layers:** {config_json.get('num_hidden_layers', 'N/A')}
123
+ **Num Attention Heads:** {config_json.get('num_attention_heads', 'N/A')}
124
+ **Max Position Embeddings:** {config_json.get('max_position_embeddings', 'N/A')}
125
+ **Torch Dtype:** {str(config_json.get('torch_dtype', 'N/A'))}
126
+ """
127
  except Exception as e:
128
  print(f"Could not retrieve model config: {e}")
129
+ model_config_info = f"**Error:** Could not load config for {model_id}. Check model files on Hugging Face."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ # --- Helper Function for Tokenizer Info ---
132
  def format_tokenizer_info(tokenizer_instance):
133
  try:
134
  info = [
135
+ f"**Tokenizer Class:** `{tokenizer_instance.__class__.__name__}`",
136
  f"**Vocabulary Size:** {tokenizer_instance.vocab_size}",
137
  f"**Model Max Length:** {tokenizer_instance.model_max_length}",
138
+ f"**EOS Token:** `{tokenizer_instance.eos_token}` (ID: {tokenizer_instance.eos_token_id})",
139
+ f"**Special Tokens:** Check model card for specific template/tokens.", # Qwen2 has specific tokens
 
 
140
  ]
141
+ # Add BOS/PAD/UNK if they are commonly used and different from EOS
142
+ if hasattr(tokenizer_instance, 'pad_token') and tokenizer_instance.pad_token and tokenizer_instance.pad_token_id is not None:
143
+ info.append(f"**Padding Token:** `{tokenizer_instance.pad_token}` (ID: {tokenizer_instance.pad_token_id})")
144
+ if hasattr(tokenizer_instance, 'bos_token') and tokenizer_instance.bos_token and tokenizer_instance.bos_token_id is not None:
145
+ info.append(f"**BOS Token:** `{tokenizer_instance.bos_token}` (ID: {tokenizer_instance.bos_token_id})")
146
+ if hasattr(tokenizer_instance, 'unk_token') and tokenizer_instance.unk_token and tokenizer_instance.unk_token_id is not None:
147
+ info.append(f"**UNK Token:** `{tokenizer_instance.unk_token}` (ID: {tokenizer_instance.unk_token_id})")
148
  return "\n".join(info)
149
  except Exception as e:
150
  print(f"Error getting tokenizer info: {e}")
151
  return f"Could not retrieve full tokenizer details. Vocab size: {getattr(tokenizer_instance, 'vocab_size', 'N/A')}"
152
 
153
+ tokenizer_info = format_tokenizer_info(tokenizer)
154
+
155
+ # --- Generation Function ---
156
+ @spaces.GPU(duration=180) # Increased duration slightly
157
+ def generate_response(system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k, min_p):
158
+ # min_p is not directly supported by HF generate, it requires custom logit processing.
159
+ # We will ignore min_p for now but keep it in the UI if needed for future implementation.
160
+ # Note: Setting min_p typically involves filtering logits, which isn't done here.
161
+
162
+ # Use the tokenizer's chat template (Recommended for Qwen2 based models)
163
  messages = []
164
  if system_prompt and system_prompt.strip():
165
+ # Qwen2 template might prefer system prompt directly or integrated differently.
166
+ # Using the standard 'system' role here, assuming tokenizer handles it.
167
  messages.append({"role": "system", "content": system_prompt})
168
  messages.append({"role": "user", "content": user_prompt})
169
 
 
170
  try:
171
+ # Let the tokenizer handle the template - crucial for models like Qwen2
172
+ full_prompt = tokenizer.apply_chat_template(
173
+ messages,
174
+ tokenize=False,
175
+ add_generation_prompt=True # Adds the prompt for the assistant's turn
176
+ )
177
+ print("Applied tokenizer's chat template.")
178
  except Exception as e:
179
+ # Fallback only if template application fails catastrophically
180
+ print(f"Warning: Could not use apply_chat_template (Error: {e}). Falling back to basic format. This might degrade performance.")
181
  prompt_parts = []
182
  if system_prompt and system_prompt.strip():
183
  prompt_parts.append(f"System: {system_prompt}")
184
+ prompt_parts.append(f"\nUser: {user_prompt}")
185
+ prompt_parts.append("\nAssistant:") # Basic prompt end
186
  full_prompt = "\n".join(prompt_parts)
187
 
188
+ print(f"\n--- Generating ---")
189
+ # print(f"Prompt:\n{full_prompt}") # Optional: Print full prompt for debugging
190
+ print(f"Params: Temp={temperature}, TopK={top_k}, TopP={top_p}, RepPen={repetition_penalty}, MaxNew={max_new_tokens}, MinP={min_p} (MinP ignored by generate)")
191
+ print("-" * 20)
192
+
193
+ inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=4096).to(model.device) # Added truncation safeguard
194
+
195
+ # Generation arguments
196
+ generation_kwargs = dict(
197
+ **inputs,
198
+ max_new_tokens=int(max_new_tokens),
199
+ temperature=float(temperature) if float(temperature) > 0 else None, # Temp 0 means greedy search
200
+ top_p=float(top_p),
201
+ top_k=int(top_k),
202
+ repetition_penalty=float(repetition_penalty),
203
+ do_sample=True if float(temperature) > 0 else False,
204
+ pad_token_id=tokenizer.eos_token_id, # Use EOS for padding when generating
205
+ eos_token_id=tokenizer.eos_token_id
206
+ # min_p cannot be directly passed here.
207
+ )
208
 
209
+ if temperature == 0: # If temp is 0, disable sampling params
210
+ generation_kwargs.pop('top_p', None)
211
+ generation_kwargs.pop('top_k', None)
212
+ generation_kwargs['do_sample'] = False
213
 
 
 
 
 
 
214
 
215
  # Generate response
 
216
  with torch.inference_mode():
217
+ outputs = model.generate(**generation_kwargs)
 
 
 
 
 
 
 
 
 
 
218
 
219
+ # Decode response, skipping special tokens and the input prompt part
 
220
  input_length = inputs['input_ids'].shape[1]
221
  generated_tokens = outputs[0][input_length:]
222
  response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
223
 
224
+ print(f"--- Response ---\n{response}\n---------------\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  return response.strip()
226
 
227
  # --- Gradio Interface ---
228
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css=".gradio-container { max-width: 90% !important; }") as demo:
229
  gr.Markdown(Title)
230
+ gr.Markdown(description)
231
 
232
  with gr.Row():
233
+ with gr.Column(scale=3):
234
+ # Main Interaction Area
 
 
 
 
 
235
  with gr.Group():
236
+ system_prompt = gr.Textbox(
237
+ label="System Prompt (Persona & Instructions)",
238
+ value="You are Tessa, an expert AI assistant specialized in React development. Generate clean, semantic React code based on user requests. If the request is not about React, answer as a general helpful assistant.",
239
+ lines=3,
240
+ info="Guide the model's overall behavior and expertise."
241
+ )
242
+ user_prompt = gr.Textbox(
243
+ label="πŸ’¬ Your Request",
244
+ placeholder="e.g., 'Create a React functional component for a simple counter with increment and decrement buttons using useState.' or 'Explain the concept of virtual DOM.'",
245
+ lines=6
246
+ )
247
 
248
+ with gr.Accordion("πŸ› οΈ Generation Parameters", open=True):
249
+ with gr.Row():
250
+ temperature = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="🌑️ Temperature", info="Controls randomness. 0 = deterministic, >0 = random.")
251
+ max_new_tokens = gr.Slider(minimum=64, maximum=4096, value=1024, step=32, label="πŸ“Š Max New Tokens", info="Max length of the generated response.")
252
+ with gr.Row():
253
+ top_k = gr.Slider(minimum=1, maximum=200, value=40, step=1, label="πŸ† Top-k", info="Sample from top k likely tokens.")
254
+ top_p = gr.Slider(minimum=0.05, maximum=1.0, value=0.95, step=0.01, label="πŸ… Top-p (nucleus)", info="Sample from tokens with cumulative probability >= top_p.")
255
+ with gr.Row():
256
+ repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.01, label="🦜 Repetition Penalty", info="Penalizes repeating tokens ( > 1).")
257
+ # Add min_p slider, but note it's not used in backend currently
258
+ min_p = gr.Slider(minimum=0.0, maximum=0.5, value=0.05, step=0.01, label="πŸ“‰ Min-p (Not Active)", info="Filters tokens below this probability threshold (Requires custom logic - currently ignored).")
259
 
260
+ generate_btn = gr.Button("πŸš€ Generate Response", variant="primary", size="lg")
 
 
261
 
 
262
  with gr.Column(scale=2):
263
+ # Output Area
264
+ output = gr.Code(
265
+ label=f"🌠 Tessa-T1-14B Output",
266
+ language="markdown", # Use markdown for mixed text/code
267
+ lines=25,
268
+ show_copy_button=True,
269
  )
270
 
271
+ # Model & Tokenizer Info in an Accordion
272
+ with gr.Accordion("βš™οΈ Model & Tokenizer Details", open=False):
273
+ gr.Markdown("### Model Configuration")
274
+ gr.Markdown(model_config_info)
275
+ gr.Markdown("---")
276
+ gr.Markdown("### Tokenizer Configuration")
277
+ gr.Markdown(tokenizer_info)
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
+ # About Tesslate Section
281
+ with gr.Row():
282
+ with gr.Accordion("πŸ’‘ About Tesslate & Our Mission", open=False):
283
+ gr.Markdown(about_tesslate)
284
 
285
+ # Links Section
286
+ gr.Markdown(join_us)
 
 
 
 
 
287
 
288
+ # Examples (Updated for React/Coding focus)
289
  gr.Examples(
290
  examples=[
291
+ # [system_prompt, user_prompt, temperature, max_tokens, top_p, rep_penalty, top_k, min_p]
292
  [
293
+ "You are Tessa, an expert AI assistant specialized in React development.",
294
+ "Create a simple React functional component for a button that alerts 'Hello!' when clicked.",
295
+ 0.5, 512, 0.95, 1.1, 40, 0.05
296
  ],
297
  [
298
+ "You are Tessa, an expert AI assistant specialized in React development.",
299
+ "Explain the difference between `useState` and `useEffect` hooks in React with simple examples.",
300
+ 0.7, 1024, 0.95, 1.1, 40, 0.05
301
  ],
302
  [
303
+ "You are a helpful AI assistant.",
304
+ "Write a short explanation of how React's reconciliation algorithm works.",
305
+ 0.6, 768, 0.9, 1.15, 50, 0.05
306
  ],
307
  [
308
+ "You are Tessa, an expert AI assistant specialized in React development. Use Tailwind CSS for styling.",
309
+ "Generate a React component for a responsive card with an image, title, and description, using Tailwind CSS classes.",
310
+ 0.7, 1536, 0.95, 1.1, 40, 0.05
311
  ],
312
+ [
313
+ "You are a helpful AI assistant.",
314
+ "What are the pros and cons of using Next.js compared to Create React App?",
315
+ 0.8, 1024, 0.98, 1.05, 60, 0.05
316
  ]
317
  ],
318
  inputs=[
 
322
  max_new_tokens,
323
  top_p,
324
  repetition_penalty,
325
+ top_k,
326
+ min_p # Include min_p here even if not used by backend, to match UI
327
  ],
328
  outputs=output,
329
+ label="✨ Example Prompts (Click to Load)"
330
  )
331
 
332
+ # Connect button click to function
333
  generate_btn.click(
334
  fn=generate_response,
335
+ inputs=[system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k, min_p],
336
  outputs=output,
337
+ api_name="generate"
338
  )
339
 
340
  # Launch the demo
341
  if __name__ == "__main__":
342
+ # share=True creates a public link (useful for Colab/remote or HF Spaces)
343
+ # queue enables handling multiple users
344
+ demo.queue().launch(debug=True, share=False) # Set share=True if deploying on HF Spaces