Update app.py
Browse files
app.py
CHANGED
|
@@ -69,7 +69,6 @@ _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2
|
|
| 69 |
_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
|
| 70 |
_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
|
| 71 |
|
| 72 |
-
|
| 73 |
# ------------------------ Tool: classify and score ------------------------
|
| 74 |
@tool
|
| 75 |
def classify_and_score(
|
|
@@ -78,23 +77,7 @@ def classify_and_score(
|
|
| 78 |
target_dok: str,
|
| 79 |
agg: str = "max"
|
| 80 |
) -> dict:
|
| 81 |
-
"""Classify a question against Bloom’s and DOK targets and return guidance.
|
| 82 |
-
|
| 83 |
-
Args:
|
| 84 |
-
question: The question text to evaluate for cognitive demand.
|
| 85 |
-
target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
|
| 86 |
-
or plus form (e.g., "Apply+") meaning that level or higher.
|
| 87 |
-
target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
|
| 88 |
-
or span (e.g., "DOK2-DOK3").
|
| 89 |
-
agg: Aggregation method over phrase similarities within a level
|
| 90 |
-
(choices: "mean", "max", "topk_mean").
|
| 91 |
-
|
| 92 |
-
Returns:
|
| 93 |
-
A dictionary with:
|
| 94 |
-
ok: True if both Bloom’s and DOK match the targets.
|
| 95 |
-
measured: Dict with best levels and per-level scores for Bloom’s and DOK.
|
| 96 |
-
feedback: Brief guidance describing how to adjust the question to hit targets.
|
| 97 |
-
"""
|
| 98 |
res = classify_levels_phrases(
|
| 99 |
question,
|
| 100 |
BLOOMS_PHRASES,
|
|
@@ -174,21 +157,62 @@ def classify_and_score(
|
|
| 174 |
"feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
|
| 175 |
}
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
)
|
| 185 |
-
model =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
agent = CodeAgent(model=model, tools=[classify_and_score])
|
| 187 |
-
# Not used by
|
| 188 |
agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
|
| 189 |
return agent
|
| 190 |
|
| 191 |
-
|
| 192 |
# ------------------------ Agent task template -----------------------------
|
| 193 |
TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
|
| 194 |
|
|
@@ -209,35 +233,31 @@ Additionally, when you call classify_and_score, pass the exact question text you
|
|
| 209 |
If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
|
| 210 |
'''
|
| 211 |
|
| 212 |
-
|
| 213 |
# ------------------------ Utility: robust JSON extractor ------------------
|
| 214 |
def extract_top_level_json(s: str) -> str:
|
| 215 |
-
"""
|
| 216 |
-
Extract the first top-level JSON object from a string by tracking braces.
|
| 217 |
-
Returns the JSON string if found, else "".
|
| 218 |
-
"""
|
| 219 |
start = s.find("{")
|
| 220 |
if start == -1:
|
| 221 |
return ""
|
| 222 |
depth = 0
|
| 223 |
for i in range(start, len(s)):
|
| 224 |
-
|
|
|
|
| 225 |
depth += 1
|
| 226 |
-
elif
|
| 227 |
depth -= 1
|
| 228 |
if depth == 0:
|
| 229 |
candidate = s[start:i + 1]
|
| 230 |
try:
|
| 231 |
-
# validate
|
| 232 |
-
json.loads(candidate)
|
| 233 |
return candidate
|
| 234 |
except Exception:
|
| 235 |
return ""
|
| 236 |
return ""
|
| 237 |
|
| 238 |
-
|
| 239 |
# ------------------------ Pipeline ---------------------------------------
|
| 240 |
def run_pipeline(
|
|
|
|
| 241 |
hf_token,
|
| 242 |
topic,
|
| 243 |
grade,
|
|
@@ -250,14 +270,19 @@ def run_pipeline(
|
|
| 250 |
temperature,
|
| 251 |
max_tokens
|
| 252 |
):
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
task = TASK_TMPL.format(
|
| 263 |
grade=grade,
|
|
@@ -268,13 +293,11 @@ def run_pipeline(
|
|
| 268 |
attempts=int(attempts)
|
| 269 |
)
|
| 270 |
|
| 271 |
-
# The agent will internally call the tool
|
| 272 |
try:
|
| 273 |
result_text = agent.run(task, max_steps=int(attempts) * 4)
|
| 274 |
except Exception as e:
|
| 275 |
-
result_text = f"ERROR: {e}"
|
| 276 |
|
| 277 |
-
# Try to extract final JSON
|
| 278 |
final_json = ""
|
| 279 |
candidate = extract_top_level_json(result_text or "")
|
| 280 |
if candidate:
|
|
@@ -285,25 +308,43 @@ def run_pipeline(
|
|
| 285 |
|
| 286 |
return final_json, result_text
|
| 287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
|
| 289 |
# ------------------------ Gradio UI --------------------------------------
|
| 290 |
with gr.Blocks() as demo:
|
| 291 |
gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
|
| 292 |
gr.Markdown(
|
| 293 |
-
"
|
| 294 |
-
"
|
| 295 |
)
|
| 296 |
|
| 297 |
-
with gr.Accordion("API Settings", open=
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
|
|
|
| 301 |
)
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
with gr.Row():
|
| 309 |
topic = gr.Textbox(value="Fractions", label="Topic")
|
|
@@ -332,21 +373,43 @@ with gr.Blocks() as demo:
|
|
| 332 |
)
|
| 333 |
attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
|
| 334 |
|
| 335 |
-
with gr.Accordion("Generation Controls", open=False):
|
| 336 |
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
|
| 337 |
max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
|
| 338 |
|
| 339 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
|
| 341 |
final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
|
| 342 |
transcript = gr.Textbox(label="Agent Transcript", lines=18)
|
| 343 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
run_btn.click(
|
| 345 |
fn=run_pipeline,
|
| 346 |
inputs=[
|
| 347 |
-
hf_token, topic, grade, subject,
|
| 348 |
-
target_bloom, target_dok, attempts,
|
| 349 |
-
|
| 350 |
],
|
| 351 |
outputs=[final_json, transcript]
|
| 352 |
)
|
|
|
|
| 69 |
_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
|
| 70 |
_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
|
| 71 |
|
|
|
|
| 72 |
# ------------------------ Tool: classify and score ------------------------
|
| 73 |
@tool
|
| 74 |
def classify_and_score(
|
|
|
|
| 77 |
target_dok: str,
|
| 78 |
agg: str = "max"
|
| 79 |
) -> dict:
|
| 80 |
+
"""Classify a question against Bloom’s and DOK targets and return guidance."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
res = classify_levels_phrases(
|
| 82 |
question,
|
| 83 |
BLOOMS_PHRASES,
|
|
|
|
| 157 |
"feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
|
| 158 |
}
|
| 159 |
|
| 160 |
+
# ------------------------ Backend selection + caching ------------------------
|
| 161 |
+
_LOCAL_MODEL_CACHE = {
|
| 162 |
+
"model": None,
|
| 163 |
+
"model_id": None,
|
| 164 |
+
}
|
| 165 |
|
| 166 |
+
def _get_local_model(model_id: str):
|
| 167 |
+
"""Lazy-load and cache a local Transformers model for smolagents."""
|
| 168 |
+
# Import here so Hosted mode doesn't require local deps.
|
| 169 |
+
try:
|
| 170 |
+
from smolagents import TransformersModel # provided by smolagents
|
| 171 |
+
except Exception as e:
|
| 172 |
+
raise RuntimeError(
|
| 173 |
+
"Local backend requires 'TransformersModel' from smolagents. "
|
| 174 |
+
"Please ensure your smolagents version provides it."
|
| 175 |
+
) from e
|
| 176 |
+
|
| 177 |
+
if (
|
| 178 |
+
_LOCAL_MODEL_CACHE["model"] is not None
|
| 179 |
+
and _LOCAL_MODEL_CACHE["model_id"] == model_id
|
| 180 |
+
):
|
| 181 |
+
return _LOCAL_MODEL_CACHE["model"]
|
| 182 |
+
|
| 183 |
+
# Instantiate and cache
|
| 184 |
+
local_model = TransformersModel(
|
| 185 |
+
model_id=model_id,
|
| 186 |
+
device_map="auto"
|
| 187 |
)
|
| 188 |
+
_LOCAL_MODEL_CACHE["model"] = local_model
|
| 189 |
+
_LOCAL_MODEL_CACHE["model_id"] = model_id
|
| 190 |
+
return local_model
|
| 191 |
+
|
| 192 |
+
# ------------------------ Agent setup with timeout ------------------------
|
| 193 |
+
def make_agent(
|
| 194 |
+
backend_choice: str, # "Hosted API" | "Local GPU"
|
| 195 |
+
hf_token: str,
|
| 196 |
+
model_id: str,
|
| 197 |
+
timeout: int,
|
| 198 |
+
temperature: float,
|
| 199 |
+
max_tokens: int
|
| 200 |
+
):
|
| 201 |
+
if backend_choice == "Local GPU":
|
| 202 |
+
model = _get_local_model(model_id)
|
| 203 |
+
else:
|
| 204 |
+
client = InferenceClient(
|
| 205 |
+
model=model_id,
|
| 206 |
+
timeout=timeout,
|
| 207 |
+
token=(hf_token or None),
|
| 208 |
+
)
|
| 209 |
+
model = InferenceClientModel(client=client)
|
| 210 |
+
|
| 211 |
agent = CodeAgent(model=model, tools=[classify_and_score])
|
| 212 |
+
# Not used by agent core; helpful for debugging
|
| 213 |
agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
|
| 214 |
return agent
|
| 215 |
|
|
|
|
| 216 |
# ------------------------ Agent task template -----------------------------
|
| 217 |
TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
|
| 218 |
|
|
|
|
| 233 |
If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
|
| 234 |
'''
|
| 235 |
|
|
|
|
| 236 |
# ------------------------ Utility: robust JSON extractor ------------------
|
| 237 |
def extract_top_level_json(s: str) -> str:
|
| 238 |
+
"""Extract the first top-level JSON object by tracking braces."""
|
|
|
|
|
|
|
|
|
|
| 239 |
start = s.find("{")
|
| 240 |
if start == -1:
|
| 241 |
return ""
|
| 242 |
depth = 0
|
| 243 |
for i in range(start, len(s)):
|
| 244 |
+
ch = s[i]
|
| 245 |
+
if ch == "{":
|
| 246 |
depth += 1
|
| 247 |
+
elif ch == "}":
|
| 248 |
depth -= 1
|
| 249 |
if depth == 0:
|
| 250 |
candidate = s[start:i + 1]
|
| 251 |
try:
|
| 252 |
+
json.loads(candidate) # validate
|
|
|
|
| 253 |
return candidate
|
| 254 |
except Exception:
|
| 255 |
return ""
|
| 256 |
return ""
|
| 257 |
|
|
|
|
| 258 |
# ------------------------ Pipeline ---------------------------------------
|
| 259 |
def run_pipeline(
|
| 260 |
+
backend_choice,
|
| 261 |
hf_token,
|
| 262 |
topic,
|
| 263 |
grade,
|
|
|
|
| 270 |
temperature,
|
| 271 |
max_tokens
|
| 272 |
):
|
| 273 |
+
try:
|
| 274 |
+
agent = make_agent(
|
| 275 |
+
backend_choice=backend_choice,
|
| 276 |
+
hf_token=(hf_token or "").strip(),
|
| 277 |
+
model_id=model_id,
|
| 278 |
+
timeout=int(timeout),
|
| 279 |
+
temperature=float(temperature),
|
| 280 |
+
max_tokens=int(max_tokens),
|
| 281 |
+
)
|
| 282 |
+
except Exception as e:
|
| 283 |
+
# Surface backend/model setup errors directly
|
| 284 |
+
err = f"ERROR initializing backend '{backend_choice}': {e}"
|
| 285 |
+
return "", err
|
| 286 |
|
| 287 |
task = TASK_TMPL.format(
|
| 288 |
grade=grade,
|
|
|
|
| 293 |
attempts=int(attempts)
|
| 294 |
)
|
| 295 |
|
|
|
|
| 296 |
try:
|
| 297 |
result_text = agent.run(task, max_steps=int(attempts) * 4)
|
| 298 |
except Exception as e:
|
| 299 |
+
result_text = f"ERROR while running the agent: {e}"
|
| 300 |
|
|
|
|
| 301 |
final_json = ""
|
| 302 |
candidate = extract_top_level_json(result_text or "")
|
| 303 |
if candidate:
|
|
|
|
| 308 |
|
| 309 |
return final_json, result_text
|
| 310 |
|
| 311 |
+
# ------------------------ Optional Spaces warmup --------------------------
|
| 312 |
+
# If you deploy on HF Spaces and want to pre-allocate GPU for Local mode,
|
| 313 |
+
# you can try to warm up the model at startup by setting:
|
| 314 |
+
# BACKEND_WARMUP=1 and BACKEND_WARMUP_MODEL=<model id>
|
| 315 |
+
if (os.getenv("SYSTEM") == "spaces") and os.getenv("BACKEND_WARMUP") == "1":
|
| 316 |
+
try:
|
| 317 |
+
wm = os.getenv("BACKEND_WARMUP_MODEL", "swiss-ai/Apertus-70B-Instruct-2509")
|
| 318 |
+
_get_local_model(wm)
|
| 319 |
+
print(f"[Warmup] Local GPU model loaded: {wm}")
|
| 320 |
+
except Exception as e:
|
| 321 |
+
print(f"[Warmup] Skipped or failed: {e}")
|
| 322 |
|
| 323 |
# ------------------------ Gradio UI --------------------------------------
|
| 324 |
with gr.Blocks() as demo:
|
| 325 |
gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
|
| 326 |
gr.Markdown(
|
| 327 |
+
"Use a **CodeAgent** that calls the scoring tool (`classify_and_score`) after each proposal, "
|
| 328 |
+
"and revises until it hits your Bloom/DOK target."
|
| 329 |
)
|
| 330 |
|
| 331 |
+
with gr.Accordion("API / Backend Settings", open=True):
|
| 332 |
+
backend_choice = gr.Radio(
|
| 333 |
+
choices=["Hosted API", "Local GPU"],
|
| 334 |
+
value="Hosted API",
|
| 335 |
+
label="Inference Backend"
|
| 336 |
)
|
| 337 |
+
with gr.Row():
|
| 338 |
+
hf_token = gr.Textbox(
|
| 339 |
+
label="Hugging Face Token (required for private/hosted endpoints)",
|
| 340 |
+
type="password",
|
| 341 |
+
visible=True
|
| 342 |
+
)
|
| 343 |
+
model_id = gr.Textbox(
|
| 344 |
+
value="swiss-ai/Apertus-70B-Instruct-2509",
|
| 345 |
+
label="Model ID (repo or local path)"
|
| 346 |
+
)
|
| 347 |
+
timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s, Hosted API only)")
|
| 348 |
|
| 349 |
with gr.Row():
|
| 350 |
topic = gr.Textbox(value="Fractions", label="Topic")
|
|
|
|
| 373 |
)
|
| 374 |
attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
|
| 375 |
|
| 376 |
+
with gr.Accordion("⚙️ Generation Controls", open=False):
|
| 377 |
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
|
| 378 |
max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
|
| 379 |
|
| 380 |
+
# Helpful hint text depending on backend
|
| 381 |
+
backend_tips = gr.Markdown(
|
| 382 |
+
"*Hosted API:* uses Hugging Face Inference endpoints. Provide a token if needed.\n\n"
|
| 383 |
+
"*Local GPU:* loads the model into the Space with `TransformersModel (device_map='auto')`. "
|
| 384 |
+
"Ensure your Space has a GPU and enough VRAM for the selected model."
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
run_btn = gr.Button("Run Agent 🚀")
|
| 388 |
|
| 389 |
final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
|
| 390 |
transcript = gr.Textbox(label="Agent Transcript", lines=18)
|
| 391 |
|
| 392 |
+
# Dynamically show/hide token & timeout based on backend
|
| 393 |
+
def _toggle_backend_fields(choice):
|
| 394 |
+
# Show token + timeout only for Hosted API
|
| 395 |
+
return (
|
| 396 |
+
gr.update(visible=(choice == "Hosted API")),
|
| 397 |
+
gr.update(visible=True), # model_id always visible
|
| 398 |
+
gr.update(visible=(choice == "Hosted API"))
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
backend_choice.change(
|
| 402 |
+
_toggle_backend_fields,
|
| 403 |
+
inputs=[backend_choice],
|
| 404 |
+
outputs=[hf_token, model_id, timeout]
|
| 405 |
+
)
|
| 406 |
+
|
| 407 |
run_btn.click(
|
| 408 |
fn=run_pipeline,
|
| 409 |
inputs=[
|
| 410 |
+
backend_choice, hf_token, topic, grade, subject,
|
| 411 |
+
target_bloom, target_dok, attempts, model_id,
|
| 412 |
+
timeout, temperature, max_tokens
|
| 413 |
],
|
| 414 |
outputs=[final_json, transcript]
|
| 415 |
)
|