bhardwaj08sarthak commited on
Commit
f296c60
·
verified ·
1 Parent(s): c1b36ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -339
app.py CHANGED
@@ -1,282 +1,109 @@
1
- import os
 
 
 
2
  import json
3
  import gradio as gr
4
- import spaces
5
  from huggingface_hub import InferenceClient
6
  from smolagents import CodeAgent, InferenceClientModel, tool
 
 
7
  from huggingface_hub import login
8
- from smolagents import TransformersModel
9
- import os
10
- token = os.getenv("HF_Token")
11
- login(token=token)
12
- from level_classifier_tool import (
13
  classify_levels_phrases,
14
  HFEmbeddingBackend,
15
  build_phrase_index
16
  )
17
-
18
- # ------------------------ Taxonomy phrases ------------------------
19
- BLOOMS_PHRASES = {
20
- "Remember": [
21
- "define", "list", "recall", "identify", "state", "label", "name", "recognize", "find",
22
- "select", "match", "choose", "give", "write", "tell", "show"
23
- ],
24
- "Understand": [
25
- "classify", "interpret", "summarize", "explain", "estimate", "describe", "discuss",
26
- "predict", "paraphrase", "restate", "illustrate", "compare", "contrast", "report"
27
- ],
28
- "Apply": [
29
- "apply", "solve", "use", "demonstrate", "calculate", "implement", "perform",
30
- "execute", "carry out", "practice", "employ", "sketch"
31
- ],
32
- "Analyze": [
33
- "analyze", "differentiate", "organize", "structure", "break down", "distinguish",
34
- "dissect", "examine", "compare", "contrast", "attribute", "investigate"
35
- ],
36
- "Evaluate": [
37
- "evaluate", "judge", "critique", "assess", "defend", "argue", "select", "support",
38
- "appraise", "recommend", "conclude", "review"
39
- ],
40
- "Create": [
41
- "create", "design", "compose", "plan", "construct", "produce", "devise", "generate",
42
- "develop", "formulate", "invent", "build"
43
- ]
44
- }
45
-
46
- DOK_PHRASES = {
47
- "DOK1": [
48
- "define", "list", "recall", "compute", "identify", "state", "label", "how many",
49
- "name", "recognize", "find", "determine", "select", "match", "choose", "give",
50
- "write", "tell", "show", "point out"
51
- ],
52
- "DOK2": [
53
- "classify", "interpret", "estimate", "organise", "summarise", "explain", "solve",
54
- "categorize", "group", "compare", "contrast", "distinguish", "make observations",
55
- "collect data", "display data", "arrange", "sort", "paraphrase", "restate", "predict",
56
- "approximate", "demonstrate", "illustrate", "describe", "analyze data"
57
- ],
58
- "DOK3": [
59
- "justify", "analyze", "generalise", "compare", "construct", "investigate",
60
- "support", "defend", "argue", "examine", "differentiate", "criticize", "debate",
61
- "test", "experiment", "hypothesize", "draw conclusions", "break down", "dissect",
62
- "probe", "explore", "develop", "formulate"
63
- ],
64
- "DOK4": [
65
- "design", "synthesize", "model", "prove", "evaluate system", "critique", "create",
66
- "compose", "plan", "invent", "devise", "generate", "build", "construct", "produce",
67
- "formulate", "improve", "revise", "assess", "appraise", "judge", "recommend",
68
- "predict outcome", "simulate"
69
- ]
70
- }
71
-
72
- # ------------------------ Prebuild embeddings once ------------------------
73
  _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
74
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
75
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- # ------------------------ Tool: classify and score ------------------------
78
- @tool
79
- def classify_and_score(
80
- question: str,
81
- target_bloom: str,
82
- target_dok: str,
83
- agg: str = "max"
84
- ) -> dict:
85
- """Classify a question against Bloom’s and DOK targets and return guidance.
86
-
87
- Args:
88
- question: The question text to evaluate for cognitive demand.
89
- target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
90
- or plus form (e.g., "Apply+") meaning that level or higher.
91
- target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
92
- or span (e.g., "DOK2-DOK3").
93
- agg: Aggregation method over phrase similarities within a level
94
- (choices: "mean", "max", "topk_mean").
95
-
96
- Returns:
97
- A dictionary with:
98
- ok: True if both Bloom’s and DOK match the targets.
99
- measured: Dict with best levels and per-level scores for Bloom’s and DOK.
100
- feedback: Brief guidance describing how to adjust the question to hit targets.
101
- """
102
- res = classify_levels_phrases(
103
- question,
104
- BLOOMS_PHRASES,
105
- DOK_PHRASES,
106
- backend=_backend,
107
- prebuilt_bloom_index=_BLOOM_INDEX,
108
- prebuilt_dok_index=_DOK_INDEX,
109
- agg=agg,
110
- return_phrase_matches=True
111
- )
112
-
113
- def _parse_target_bloom(t: str):
114
- order = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]
115
- if t.endswith("+"):
116
- base = t[:-1]
117
- if base not in order:
118
- raise ValueError(f"Invalid Bloom target '{t}'")
119
- return set(order[order.index(base):])
120
- if t not in order:
121
- raise ValueError(f"Invalid Bloom target '{t}'")
122
- return {t}
123
-
124
- def _parse_target_dok(t: str):
125
- order = ["DOK1", "DOK2", "DOK3", "DOK4"]
126
- if "-" in t:
127
- lo, hi = t.split("-")
128
- if lo not in order or hi not in order or order.index(lo) > order.index(hi):
129
- raise ValueError(f"Invalid DOK range '{t}'")
130
- return set(order[order.index(lo):order.index(hi) + 1])
131
- if t not in order:
132
- raise ValueError(f"Invalid DOK target '{t}'")
133
- return {t}
134
-
135
- try:
136
- bloom_target_set = _parse_target_bloom(target_bloom)
137
- dok_target_set = _parse_target_dok(target_dok)
138
- except Exception as e:
139
- return {
140
- "ok": False,
141
- "measured": {},
142
- "feedback": (
143
- f"Invalid targets: {e}. Use Bloom in "
144
- "{Remember, Understand, Apply, Analyze, Evaluate, Create} "
145
- "and DOK in {DOK1..DOK4} or ranges like 'DOK2-DOK3'."
146
- ),
147
- }
148
-
149
- bloom_best = res["blooms"]["best_level"]
150
- dok_best = res["dok"]["best_level"]
151
-
152
- bloom_ok = bloom_best in bloom_target_set
153
- dok_ok = dok_best in dok_target_set
154
-
155
- top_bloom_phrases = res["blooms"].get("top_phrases", {})
156
- top_dok_phrases = res["dok"].get("top_phrases", {})
157
-
158
- feedback_parts = []
159
- if not bloom_ok:
160
- feedback_parts.append(
161
- f"Shift Bloom’s from {bloom_best} toward {sorted(list(bloom_target_set))}. "
162
- f"Top cues: {top_bloom_phrases.get(bloom_best, [])[:3]}"
163
- )
164
- if not dok_ok:
165
- feedback_parts.append(
166
- f"Shift DOK from {dok_best} toward {sorted(list(dok_target_set))}. "
167
- f"Top cues: {top_dok_phrases.get(dok_best, [])[:3]}"
168
- )
169
 
170
- return {
171
- "ok": bool(bloom_ok and dok_ok),
172
- "measured": {
173
- "bloom_best": bloom_best,
174
- "bloom_scores": res["blooms"]["scores"],
175
- "dok_best": dok_best,
176
- "dok_scores": res["dok"]["scores"],
177
- },
178
- "feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
179
- }
180
 
181
- # ------------------------ Backend selection + GPU-wrapped local loader ------------------------
182
- _LOCAL_MODEL_CACHE = {"model": None, "model_id": None}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- @spaces.GPU(duration=30) # request GPU only when loading/using local model
185
- def get_local_model_gpu(model_id: str):
186
- """
187
- Load and cache a local Transformers model for smolagents on GPU.
188
- Decorated so Spaces knows this task needs a GPU.
189
- """
190
- # Import here to keep Hosted mode lightweight.
191
- try:
192
- from smolagents import TransformersModel # provided by smolagents
193
- except Exception as e:
194
- raise RuntimeError(
195
- "Local backend requires 'TransformersModel' from smolagents. "
196
- "Please ensure your smolagents version provides it."
197
- ) from e
198
 
199
- if (
200
- _LOCAL_MODEL_CACHE["model"] is not None
201
- and _LOCAL_MODEL_CACHE["model_id"] == model_id
202
- ):
203
- return _LOCAL_MODEL_CACHE["model"]
204
 
205
- local_model = TransformersModel(
206
- model_id=model_id,
207
- device_map="auto" # lets accelerate pick the best device(s)
 
 
 
 
208
  )
209
- _LOCAL_MODEL_CACHE["model"] = local_model
210
- _LOCAL_MODEL_CACHE["model_id"] = model_id
211
- return local_model
212
-
213
- def make_agent(
214
- backend_choice: str, # "Hosted API" | "Local GPU"
215
- hf_token: str,
216
- model_id: str,
217
- timeout: int,
218
- temperature: float,
219
- max_tokens: int
220
- ):
221
- if backend_choice == "Local GPU":
222
- # This call is GPU-annotated; Spaces will allocate a GPU for it.
223
- model = get_local_model_gpu(model_id)
224
- else:
225
- client = InferenceClient(
226
- model=model_id,
227
- timeout=timeout,
228
- token=(hf_token or None),
229
- )
230
- model = InferenceClientModel(client=client)
231
 
232
- agent = CodeAgent(model=model, tools=[classify_and_score])
233
- agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
 
 
 
234
  return agent
235
 
236
- # ------------------------ Agent task template -----------------------------
237
- TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
238
-
239
- After you propose a candidate, you MUST immediately call:
240
- classify_and_score(
241
- question=<just the question text>,
242
- target_bloom="{target_bloom}",
243
- target_dok="{target_dok}",
244
- agg="max"
245
- )
246
 
247
- Use the returned dict:
248
- - If ok == True: print ONLY compact JSON {{"question": "...", "answer": "...", "reasoning": "..."}} and finish.
249
- - If ok == False: briefly explain the needed shift, revise the question, and call classify_and_score again.
250
- Repeat up to {attempts} attempts.
251
- Keep answers concise.
252
- Additionally, when you call classify_and_score, pass the exact question text you propose.
253
- If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
254
- '''
255
-
256
- # ------------------------ Utility: robust JSON extractor ------------------
257
- def extract_top_level_json(s: str) -> str:
258
- start = s.find("{")
259
- if start == -1:
260
- return ""
261
- depth = 0
262
- for i in range(start, len(s)):
263
- ch = s[i]
264
- if ch == "{":
265
- depth += 1
266
- elif ch == "}":
267
- depth -= 1
268
- if depth == 0:
269
- candidate = s[start:i + 1]
270
- try:
271
- json.loads(candidate) # validate
272
- return candidate
273
- except Exception:
274
- return ""
275
- return ""
276
 
277
- # ------------------------ Pipeline ---------------------------------------
278
  def run_pipeline(
279
- backend_choice,
280
  hf_token,
281
  topic,
282
  grade,
@@ -285,24 +112,23 @@ def run_pipeline(
285
  target_dok,
286
  attempts,
287
  model_id,
 
288
  timeout,
289
  temperature,
290
- max_tokens
 
291
  ):
292
- try:
293
- agent = make_agent(
294
- backend_choice=backend_choice,
295
- hf_token=(hf_token or "").strip(),
296
- model_id=model_id,
297
- timeout=int(timeout),
298
- temperature=float(temperature),
299
- max_tokens=int(max_tokens),
300
- )
301
- except Exception as e:
302
- err = f"ERROR initializing backend '{backend_choice}': {e}"
303
- return "", err
304
 
305
- task = TASK_TMPL.format(
306
  grade=grade,
307
  topic=topic,
308
  subject=subject,
@@ -311,117 +137,80 @@ def run_pipeline(
311
  attempts=int(attempts)
312
  )
313
 
 
314
  try:
315
- result_text = agent.run(task, max_steps=int(attempts) * 4)
316
  except Exception as e:
317
- result_text = f"ERROR while running the agent: {e}"
318
 
 
319
  final_json = ""
320
- candidate = extract_top_level_json(result_text or "")
321
- if candidate:
322
- try:
 
 
 
323
  final_json = json.dumps(json.loads(candidate), indent=2)
324
- except Exception:
325
- final_json = ""
326
 
327
  return final_json, result_text
328
 
329
- # ------------------------ Gradio UI --------------------------------------
330
  with gr.Blocks() as demo:
331
  gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
332
  gr.Markdown(
333
- "Use a **CodeAgent** that calls the scoring tool (`classify_and_score`) after each proposal, "
334
- "and revises until it hits your Bloom/DOK target."
335
  )
336
 
337
- with gr.Accordion("API / Backend Settings", open=True):
338
- backend_choice = gr.Radio(
339
- choices=["Hosted API", "Local GPU"],
340
- value="Hosted API",
341
- label="Inference Backend"
342
- )
343
- with gr.Row():
344
- hf_token = gr.Textbox(
345
- label="Hugging Face Token (required for private/hosted endpoints)",
346
- type="password",
347
- visible=True
348
- )
349
- model_id = gr.Textbox(
350
- value="swiss-ai/Apertus-70B-Instruct-2509",
351
- label="Model ID (repo id for Hosted, or local repo for GPU)"
352
- )
353
- timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s, Hosted API only)")
354
 
355
  with gr.Row():
356
  topic = gr.Textbox(value="Fractions", label="Topic")
357
  grade = gr.Dropdown(
358
- choices=[
359
- "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5", "Grade 6",
360
- "Grade 7", "Grade 8", "Grade 9",
361
- "Grade 10", "Grade 11", "Grade 12",
362
- "Under Graduate", "Post Graduate"
363
- ],
364
  value="Grade 7",
365
  label="Grade"
366
  )
367
- subject = gr.Textbox(value="Math", label="Subject")
 
 
 
368
 
369
  with gr.Row():
370
  target_bloom = gr.Dropdown(
371
- choices=["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"],
372
  value="Analyze",
373
  label="Target Bloom’s"
374
  )
375
  target_dok = gr.Dropdown(
376
- choices=["DOK1", "DOK2", "DOK3", "DOK4", "DOK1-DOK2", "DOK2-DOK3", "DOK3-DOK4"],
377
  value="DOK2-DOK3",
378
- label="Target Depth of Knowledge"
379
  )
380
  attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
381
 
382
- with gr.Accordion("⚙️ Generation Controls", open=False):
383
  temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
384
  max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
385
 
386
- backend_tips = gr.Markdown(
387
- "*Hosted API:* uses Hugging Face Inference endpoints. Provide a token if needed.\n\n"
388
- "*Local GPU:* loads the model into the Space with `TransformersModel (device_map='auto')`. "
389
- "Ensure your Space has a GPU and enough VRAM for the selected model."
390
- )
391
-
392
- run_btn = gr.Button("Run Agent 🚀")
393
 
394
  final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
395
  transcript = gr.Textbox(label="Agent Transcript", lines=18)
396
 
397
- def _toggle_backend_fields(choice):
398
- return (
399
- gr.update(visible=(choice == "Hosted API")), # hf_token
400
- gr.update(visible=True), # model_id always visible
401
- gr.update(visible=(choice == "Hosted API")) # timeout slider
402
- )
403
-
404
- backend_choice.change(
405
- _toggle_backend_fields,
406
- inputs=[backend_choice],
407
- outputs=[hf_token, model_id, timeout]
408
- )
409
-
410
  run_btn.click(
411
  fn=run_pipeline,
412
- inputs=[
413
- backend_choice, hf_token, topic, grade, subject,
414
- target_bloom, target_dok, attempts, model_id,
415
- timeout, temperature, max_tokens
416
- ],
417
  outputs=[final_json, transcript]
418
  )
419
 
420
- if __name__ == "__main__" or os.getenv("SYSTEM") == "spaces":
421
- try:
422
- get_local_model_gpu(model_id) # triggers GPU allocation during startup
423
- except Exception as e:
424
- # don't crash the app if warmup fails; logs will show details
425
- print("Warmup failed:", e)
426
-
427
- demo.launch()
 
1
+ # Create a self-contained Gradio app that uses the agent-driven loop (Option A)
2
+ # It expects `level_classifier_tool.py` to be colocated (or installed on PYTHONPATH).
3
+ import sys
4
+ sys.path.append(r"C:\Users\Sarthak\OneDrive - UT Cloud\thesis\HF_Agent\src") # use raw string because of spaces
5
  import json
6
  import gradio as gr
 
7
  from huggingface_hub import InferenceClient
8
  from smolagents import CodeAgent, InferenceClientModel, tool
9
+ from langchain.embeddings import HuggingFaceEmbeddings
10
+ from llama_index.core import VectorStoreIndex, Document
11
  from huggingface_hub import login
12
+ from smolagents import tool
13
+ from all_datasets import *
14
+ from level_classifier_tool_2 import (
 
 
15
  classify_levels_phrases,
16
  HFEmbeddingBackend,
17
  build_phrase_index
18
  )
19
+ from task_temp import TASK_TMPL, CLASSIFY_TMPL, GEN_TMPL, RAG_TMPL
20
+ from all_tools import classify_and_score, QuestionRetrieverTool
21
+ from phrases import BLOOMS_PHRASES, DOK_PHRASES
22
+ # Prebuild embeddings once
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
24
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
25
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
26
+ D = {
27
+ "GSM8k": GSM8k['question'],
28
+ "Olympiad": Olympiad_math['question'],
29
+ "Olympiad2": Olympiad_math2['question'],
30
+ "DeepMind Math": clean_math['question'],
31
+ "MMMLU": MMMLU['question'],
32
+ "MMMU": MMMU['question'],
33
+ "ScienceQA": ScienceQA['question'],
34
+ "PubmedQA": PubmedQA['question']
35
+ }
36
+ all_questions = (
37
+ list(D["GSM8k"]) +
38
+ list(D["Olympiad"]) +
39
+ list(D["MMMLU"]) +
40
+ list(D["MMMU"]) +
41
+ list(D["DeepMind Math"]) +
42
+ list(D["Olympiad2"]) +
43
+ list(D["ScienceQA"]) +
44
+ list(D["PubmedQA"])
45
+ )
46
 
47
+ emb = HuggingFaceEmbeddings(
48
+ model_name="google/embeddinggemma-300m",
49
+ encode_kwargs={"normalize_embeddings": True},
50
+ )
51
+ texts = all_questions
52
+ index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # ------------------------ Scoring TOOL -----------------------------------
 
 
 
 
 
 
 
 
 
55
 
56
+ emb = HuggingFaceEmbeddings(
57
+ model_name="google/embeddinggemma-300m",
58
+ encode_kwargs={"normalize_embeddings": True},
59
+ )
60
+ D = {
61
+ "GSM8k": GSM8k['question'],
62
+ "Olympiad": Olympiad_math['question'],
63
+ "Olympiad2": Olympiad_math2['question'],
64
+ "DeepMind Math": clean_math['question'],
65
+ "MMMLU": MMMLU['question'],
66
+ "MMMU": MMMU['question'],
67
+ "ScienceQA": ScienceQA['question'],
68
+ "PubmedQA": PubmedQA['question']
69
+ }
70
+ all_questions = (
71
+ list(D["GSM8k"]) +
72
+ list(D["Olympiad"]) +
73
+ list(D["MMMLU"]) +
74
+ list(D["MMMU"]) +
75
+ list(D["DeepMind Math"]) +
76
+ list(D["Olympiad2"]) +
77
+ list(D["ScienceQA"]) +
78
+ list(D["PubmedQA"])
79
+ )
80
+ texts = all_questions
81
+ index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
82
 
83
+ # ------------------------ Retriever TOOL -----------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
 
 
 
 
 
85
 
86
+ # ------------------------ Agent setup with timeout ------------------------
87
+ def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
88
+ client = InferenceClient(
89
+ model=model_id,
90
+ provider=provider,
91
+ timeout=timeout,
92
+ token=hf_token if hf_token else None,
93
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ # Bind generation params by partially applying via model kwargs.
96
+ # smolagents InferenceClientModel currently accepts client only; we pass runtime params in task text.
97
+ model = InferenceClientModel(model_id=model_id,client=client)
98
+ agent = CodeAgent(model=model, tools=[classify_and_score, QuestionRetrieverTool])
99
+ agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} # attach for reference
100
  return agent
101
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ # ------------------------ Agent task template -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ # ------------------------ Gradio glue ------------------------------------
106
  def run_pipeline(
 
107
  hf_token,
108
  topic,
109
  grade,
 
112
  target_dok,
113
  attempts,
114
  model_id,
115
+ provider,
116
  timeout,
117
  temperature,
118
+ max_tokens,
119
+ task_type
120
  ):
121
+ # Build agent per run (or cache if you prefer)
122
+ agent = make_agent(
123
+ hf_token=hf_token.strip(),
124
+ model_id=model_id,
125
+ provider=provider,
126
+ timeout=int(timeout),
127
+ temperature=float(temperature),
128
+ max_tokens=int(max_tokens),
129
+ )
 
 
 
130
 
131
+ task = task_type.format(
132
  grade=grade,
133
  topic=topic,
134
  subject=subject,
 
137
  attempts=int(attempts)
138
  )
139
 
140
+ # The agent will internally call the tool
141
  try:
142
+ result_text = agent.run(task, max_steps=int(attempts)*4)
143
  except Exception as e:
144
+ result_text = f"ERROR: {e}"
145
 
146
+ # Try to extract final JSON
147
  final_json = ""
148
+ try:
149
+ # find JSON object in result_text (simple heuristic)
150
+ start = result_text.find("{")
151
+ end = result_text.rfind("}")
152
+ if start != -1 and end != -1 and end > start:
153
+ candidate = result_text[start:end+1]
154
  final_json = json.dumps(json.loads(candidate), indent=2)
155
+ except Exception:
156
+ final_json = ""
157
 
158
  return final_json, result_text
159
 
160
+
161
  with gr.Blocks() as demo:
162
  gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
163
  gr.Markdown(
164
+ "This app uses a **CodeAgent** that *calls the scoring tool* "
165
+ "(`classify_and_score`) after each proposal, and revises until it hits the target."
166
  )
167
 
168
+ with gr.Accordion("API Settings", open=False):
169
+ hf_token = gr.Textbox(label="Hugging Face Token (required)", type="password")
170
+ model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID")
171
+ provider = gr.Textbox(value="novita", label="Provider")
172
+ timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  with gr.Row():
175
  topic = gr.Textbox(value="Fractions", label="Topic")
176
  grade = gr.Dropdown(
177
+ choices=["Grade 1","Grade 2","Grade 3","Grade4","Grade 5","Grade 6","Grade 7","Grade 8","Grade 9",
178
+ "Grade 10","Grade 11","Grade 12","Under Graduate","Post Graduate"],
 
 
 
 
179
  value="Grade 7",
180
  label="Grade"
181
  )
182
+ subject= gr.Textbox(value="Math", label="Subject")
183
+ task_type = gr.Dropdown(
184
+ choices=["TASK_TMPL", "CLASSIFY_TMPL", "GEN_TMPL", "RAG_TMPL"]
185
+ label= "task type")
186
 
187
  with gr.Row():
188
  target_bloom = gr.Dropdown(
189
+ choices=["Remember","Understand","Apply","Analyze","Evaluate","Create","Apply+","Analyze+","Evaluate+"],
190
  value="Analyze",
191
  label="Target Bloom’s"
192
  )
193
  target_dok = gr.Dropdown(
194
+ choices=["DOK1","DOK2","DOK3","DOK4","DOK1-DOK2","DOK2-DOK3","DOK3-DOK4"],
195
  value="DOK2-DOK3",
196
+ label="Target DOK"
197
  )
198
  attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
199
 
200
+ with gr.Accordion("" Generation Controls", open=False):
201
  temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
202
  max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
203
 
204
+ run_btn = gr.Button("Run Agent")
 
 
 
 
 
 
205
 
206
  final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
207
  transcript = gr.Textbox(label="Agent Transcript", lines=18)
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  run_btn.click(
210
  fn=run_pipeline,
211
+ inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens],
 
 
 
 
212
  outputs=[final_json, transcript]
213
  )
214
 
215
+ if __name__ == "__main__":
216
+ demo.launch(share=True)