bhardwaj08sarthak commited on
Commit
1f32a04
·
verified ·
1 Parent(s): 423136d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -64
app.py CHANGED
@@ -69,7 +69,6 @@ _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2
69
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
70
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
71
 
72
-
73
  # ------------------------ Tool: classify and score ------------------------
74
  @tool
75
  def classify_and_score(
@@ -78,23 +77,7 @@ def classify_and_score(
78
  target_dok: str,
79
  agg: str = "max"
80
  ) -> dict:
81
- """Classify a question against Bloom’s and DOK targets and return guidance.
82
-
83
- Args:
84
- question: The question text to evaluate for cognitive demand.
85
- target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
86
- or plus form (e.g., "Apply+") meaning that level or higher.
87
- target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
88
- or span (e.g., "DOK2-DOK3").
89
- agg: Aggregation method over phrase similarities within a level
90
- (choices: "mean", "max", "topk_mean").
91
-
92
- Returns:
93
- A dictionary with:
94
- ok: True if both Bloom’s and DOK match the targets.
95
- measured: Dict with best levels and per-level scores for Bloom’s and DOK.
96
- feedback: Brief guidance describing how to adjust the question to hit targets.
97
- """
98
  res = classify_levels_phrases(
99
  question,
100
  BLOOMS_PHRASES,
@@ -174,21 +157,62 @@ def classify_and_score(
174
  "feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
175
  }
176
 
 
 
 
 
 
177
 
178
- # ------------------------ Agent setup with timeout ------------------------
179
- def make_agent(hf_token: str, model_id: str, timeout: int, temperature: float, max_tokens: int):
180
- client = InferenceClient(
181
- model=model_id,
182
- timeout=timeout,
183
- token=hf_token or None,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  )
185
- model = InferenceClientModel(client=client)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  agent = CodeAgent(model=model, tools=[classify_and_score])
187
- # Not used by the agent core, but helpful for debugging/visibility
188
  agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
189
  return agent
190
 
191
-
192
  # ------------------------ Agent task template -----------------------------
193
  TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
194
 
@@ -209,35 +233,31 @@ Additionally, when you call classify_and_score, pass the exact question text you
209
  If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
210
  '''
211
 
212
-
213
  # ------------------------ Utility: robust JSON extractor ------------------
214
  def extract_top_level_json(s: str) -> str:
215
- """
216
- Extract the first top-level JSON object from a string by tracking braces.
217
- Returns the JSON string if found, else "".
218
- """
219
  start = s.find("{")
220
  if start == -1:
221
  return ""
222
  depth = 0
223
  for i in range(start, len(s)):
224
- if s[i] == "{":
 
225
  depth += 1
226
- elif s[i] == "}":
227
  depth -= 1
228
  if depth == 0:
229
  candidate = s[start:i + 1]
230
  try:
231
- # validate
232
- json.loads(candidate)
233
  return candidate
234
  except Exception:
235
  return ""
236
  return ""
237
 
238
-
239
  # ------------------------ Pipeline ---------------------------------------
240
  def run_pipeline(
 
241
  hf_token,
242
  topic,
243
  grade,
@@ -250,14 +270,19 @@ def run_pipeline(
250
  temperature,
251
  max_tokens
252
  ):
253
- # Build agent per run
254
- agent = make_agent(
255
- hf_token=(hf_token or "").strip(),
256
- model_id=model_id,
257
- timeout=int(timeout),
258
- temperature=float(temperature),
259
- max_tokens=int(max_tokens),
260
- )
 
 
 
 
 
261
 
262
  task = TASK_TMPL.format(
263
  grade=grade,
@@ -268,13 +293,11 @@ def run_pipeline(
268
  attempts=int(attempts)
269
  )
270
 
271
- # The agent will internally call the tool
272
  try:
273
  result_text = agent.run(task, max_steps=int(attempts) * 4)
274
  except Exception as e:
275
- result_text = f"ERROR: {e}"
276
 
277
- # Try to extract final JSON
278
  final_json = ""
279
  candidate = extract_top_level_json(result_text or "")
280
  if candidate:
@@ -285,25 +308,43 @@ def run_pipeline(
285
 
286
  return final_json, result_text
287
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
  # ------------------------ Gradio UI --------------------------------------
290
  with gr.Blocks() as demo:
291
  gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
292
  gr.Markdown(
293
- "This app uses a **CodeAgent** that *calls the scoring tool* "
294
- "(`classify_and_score`) after each proposal, and revises until it hits the target."
295
  )
296
 
297
- with gr.Accordion("API Settings", open=False):
298
- hf_token = gr.Textbox(
299
- label="Hugging Face Token (required if the endpoint needs auth)",
300
- type="password"
 
301
  )
302
- model_id = gr.Textbox(
303
- value="swiss-ai/Apertus-70B-Instruct-2509",
304
- label="Model ID"
305
- )
306
- timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
 
 
 
 
 
 
307
 
308
  with gr.Row():
309
  topic = gr.Textbox(value="Fractions", label="Topic")
@@ -332,21 +373,43 @@ with gr.Blocks() as demo:
332
  )
333
  attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
334
 
335
- with gr.Accordion("Generation Controls", open=False):
336
  temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
337
  max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
338
 
339
- run_btn = gr.Button("Run Agent")
 
 
 
 
 
 
 
340
 
341
  final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
342
  transcript = gr.Textbox(label="Agent Transcript", lines=18)
343
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  run_btn.click(
345
  fn=run_pipeline,
346
  inputs=[
347
- hf_token, topic, grade, subject,
348
- target_bloom, target_dok, attempts,
349
- model_id, timeout, temperature, max_tokens
350
  ],
351
  outputs=[final_json, transcript]
352
  )
 
69
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
70
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
71
 
 
72
  # ------------------------ Tool: classify and score ------------------------
73
  @tool
74
  def classify_and_score(
 
77
  target_dok: str,
78
  agg: str = "max"
79
  ) -> dict:
80
+ """Classify a question against Bloom’s and DOK targets and return guidance."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  res = classify_levels_phrases(
82
  question,
83
  BLOOMS_PHRASES,
 
157
  "feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
158
  }
159
 
160
+ # ------------------------ Backend selection + caching ------------------------
161
+ _LOCAL_MODEL_CACHE = {
162
+ "model": None,
163
+ "model_id": None,
164
+ }
165
 
166
+ def _get_local_model(model_id: str):
167
+ """Lazy-load and cache a local Transformers model for smolagents."""
168
+ # Import here so Hosted mode doesn't require local deps.
169
+ try:
170
+ from smolagents import TransformersModel # provided by smolagents
171
+ except Exception as e:
172
+ raise RuntimeError(
173
+ "Local backend requires 'TransformersModel' from smolagents. "
174
+ "Please ensure your smolagents version provides it."
175
+ ) from e
176
+
177
+ if (
178
+ _LOCAL_MODEL_CACHE["model"] is not None
179
+ and _LOCAL_MODEL_CACHE["model_id"] == model_id
180
+ ):
181
+ return _LOCAL_MODEL_CACHE["model"]
182
+
183
+ # Instantiate and cache
184
+ local_model = TransformersModel(
185
+ model_id=model_id,
186
+ device_map="auto"
187
  )
188
+ _LOCAL_MODEL_CACHE["model"] = local_model
189
+ _LOCAL_MODEL_CACHE["model_id"] = model_id
190
+ return local_model
191
+
192
+ # ------------------------ Agent setup with timeout ------------------------
193
+ def make_agent(
194
+ backend_choice: str, # "Hosted API" | "Local GPU"
195
+ hf_token: str,
196
+ model_id: str,
197
+ timeout: int,
198
+ temperature: float,
199
+ max_tokens: int
200
+ ):
201
+ if backend_choice == "Local GPU":
202
+ model = _get_local_model(model_id)
203
+ else:
204
+ client = InferenceClient(
205
+ model=model_id,
206
+ timeout=timeout,
207
+ token=(hf_token or None),
208
+ )
209
+ model = InferenceClientModel(client=client)
210
+
211
  agent = CodeAgent(model=model, tools=[classify_and_score])
212
+ # Not used by agent core; helpful for debugging
213
  agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
214
  return agent
215
 
 
216
  # ------------------------ Agent task template -----------------------------
217
  TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
218
 
 
233
  If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
234
  '''
235
 
 
236
  # ------------------------ Utility: robust JSON extractor ------------------
237
  def extract_top_level_json(s: str) -> str:
238
+ """Extract the first top-level JSON object by tracking braces."""
 
 
 
239
  start = s.find("{")
240
  if start == -1:
241
  return ""
242
  depth = 0
243
  for i in range(start, len(s)):
244
+ ch = s[i]
245
+ if ch == "{":
246
  depth += 1
247
+ elif ch == "}":
248
  depth -= 1
249
  if depth == 0:
250
  candidate = s[start:i + 1]
251
  try:
252
+ json.loads(candidate) # validate
 
253
  return candidate
254
  except Exception:
255
  return ""
256
  return ""
257
 
 
258
  # ------------------------ Pipeline ---------------------------------------
259
  def run_pipeline(
260
+ backend_choice,
261
  hf_token,
262
  topic,
263
  grade,
 
270
  temperature,
271
  max_tokens
272
  ):
273
+ try:
274
+ agent = make_agent(
275
+ backend_choice=backend_choice,
276
+ hf_token=(hf_token or "").strip(),
277
+ model_id=model_id,
278
+ timeout=int(timeout),
279
+ temperature=float(temperature),
280
+ max_tokens=int(max_tokens),
281
+ )
282
+ except Exception as e:
283
+ # Surface backend/model setup errors directly
284
+ err = f"ERROR initializing backend '{backend_choice}': {e}"
285
+ return "", err
286
 
287
  task = TASK_TMPL.format(
288
  grade=grade,
 
293
  attempts=int(attempts)
294
  )
295
 
 
296
  try:
297
  result_text = agent.run(task, max_steps=int(attempts) * 4)
298
  except Exception as e:
299
+ result_text = f"ERROR while running the agent: {e}"
300
 
 
301
  final_json = ""
302
  candidate = extract_top_level_json(result_text or "")
303
  if candidate:
 
308
 
309
  return final_json, result_text
310
 
311
+ # ------------------------ Optional Spaces warmup --------------------------
312
+ # If you deploy on HF Spaces and want to pre-allocate GPU for Local mode,
313
+ # you can try to warm up the model at startup by setting:
314
+ # BACKEND_WARMUP=1 and BACKEND_WARMUP_MODEL=<model id>
315
+ if (os.getenv("SYSTEM") == "spaces") and os.getenv("BACKEND_WARMUP") == "1":
316
+ try:
317
+ wm = os.getenv("BACKEND_WARMUP_MODEL", "swiss-ai/Apertus-70B-Instruct-2509")
318
+ _get_local_model(wm)
319
+ print(f"[Warmup] Local GPU model loaded: {wm}")
320
+ except Exception as e:
321
+ print(f"[Warmup] Skipped or failed: {e}")
322
 
323
  # ------------------------ Gradio UI --------------------------------------
324
  with gr.Blocks() as demo:
325
  gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
326
  gr.Markdown(
327
+ "Use a **CodeAgent** that calls the scoring tool (`classify_and_score`) after each proposal, "
328
+ "and revises until it hits your Bloom/DOK target."
329
  )
330
 
331
+ with gr.Accordion("API / Backend Settings", open=True):
332
+ backend_choice = gr.Radio(
333
+ choices=["Hosted API", "Local GPU"],
334
+ value="Hosted API",
335
+ label="Inference Backend"
336
  )
337
+ with gr.Row():
338
+ hf_token = gr.Textbox(
339
+ label="Hugging Face Token (required for private/hosted endpoints)",
340
+ type="password",
341
+ visible=True
342
+ )
343
+ model_id = gr.Textbox(
344
+ value="swiss-ai/Apertus-70B-Instruct-2509",
345
+ label="Model ID (repo or local path)"
346
+ )
347
+ timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s, Hosted API only)")
348
 
349
  with gr.Row():
350
  topic = gr.Textbox(value="Fractions", label="Topic")
 
373
  )
374
  attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
375
 
376
+ with gr.Accordion("⚙️ Generation Controls", open=False):
377
  temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
378
  max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
379
 
380
+ # Helpful hint text depending on backend
381
+ backend_tips = gr.Markdown(
382
+ "*Hosted API:* uses Hugging Face Inference endpoints. Provide a token if needed.\n\n"
383
+ "*Local GPU:* loads the model into the Space with `TransformersModel (device_map='auto')`. "
384
+ "Ensure your Space has a GPU and enough VRAM for the selected model."
385
+ )
386
+
387
+ run_btn = gr.Button("Run Agent 🚀")
388
 
389
  final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
390
  transcript = gr.Textbox(label="Agent Transcript", lines=18)
391
 
392
+ # Dynamically show/hide token & timeout based on backend
393
+ def _toggle_backend_fields(choice):
394
+ # Show token + timeout only for Hosted API
395
+ return (
396
+ gr.update(visible=(choice == "Hosted API")),
397
+ gr.update(visible=True), # model_id always visible
398
+ gr.update(visible=(choice == "Hosted API"))
399
+ )
400
+
401
+ backend_choice.change(
402
+ _toggle_backend_fields,
403
+ inputs=[backend_choice],
404
+ outputs=[hf_token, model_id, timeout]
405
+ )
406
+
407
  run_btn.click(
408
  fn=run_pipeline,
409
  inputs=[
410
+ backend_choice, hf_token, topic, grade, subject,
411
+ target_bloom, target_dok, attempts, model_id,
412
+ timeout, temperature, max_tokens
413
  ],
414
  outputs=[final_json, transcript]
415
  )