|
|
import os |
|
|
import json |
|
|
import gradio as gr |
|
|
import spaces |
|
|
from huggingface_hub import InferenceClient |
|
|
from smolagents import CodeAgent, InferenceClientModel, tool |
|
|
|
|
|
from level_classifier_tool import ( |
|
|
classify_levels_phrases, |
|
|
HFEmbeddingBackend, |
|
|
build_phrase_index |
|
|
) |
|
|
|
|
|
|
|
|
BLOOMS_PHRASES = { |
|
|
"Remember": [ |
|
|
"define", "list", "recall", "identify", "state", "label", "name", "recognize", "find", |
|
|
"select", "match", "choose", "give", "write", "tell", "show" |
|
|
], |
|
|
"Understand": [ |
|
|
"classify", "interpret", "summarize", "explain", "estimate", "describe", "discuss", |
|
|
"predict", "paraphrase", "restate", "illustrate", "compare", "contrast", "report" |
|
|
], |
|
|
"Apply": [ |
|
|
"apply", "solve", "use", "demonstrate", "calculate", "implement", "perform", |
|
|
"execute", "carry out", "practice", "employ", "sketch" |
|
|
], |
|
|
"Analyze": [ |
|
|
"analyze", "differentiate", "organize", "structure", "break down", "distinguish", |
|
|
"dissect", "examine", "compare", "contrast", "attribute", "investigate" |
|
|
], |
|
|
"Evaluate": [ |
|
|
"evaluate", "judge", "critique", "assess", "defend", "argue", "select", "support", |
|
|
"appraise", "recommend", "conclude", "review" |
|
|
], |
|
|
"Create": [ |
|
|
"create", "design", "compose", "plan", "construct", "produce", "devise", "generate", |
|
|
"develop", "formulate", "invent", "build" |
|
|
] |
|
|
} |
|
|
|
|
|
DOK_PHRASES = { |
|
|
"DOK1": [ |
|
|
"define", "list", "recall", "compute", "identify", "state", "label", "how many", |
|
|
"name", "recognize", "find", "determine", "select", "match", "choose", "give", |
|
|
"write", "tell", "show", "point out" |
|
|
], |
|
|
"DOK2": [ |
|
|
"classify", "interpret", "estimate", "organise", "summarise", "explain", "solve", |
|
|
"categorize", "group", "compare", "contrast", "distinguish", "make observations", |
|
|
"collect data", "display data", "arrange", "sort", "paraphrase", "restate", "predict", |
|
|
"approximate", "demonstrate", "illustrate", "describe", "analyze data" |
|
|
], |
|
|
"DOK3": [ |
|
|
"justify", "analyze", "generalise", "compare", "construct", "investigate", |
|
|
"support", "defend", "argue", "examine", "differentiate", "criticize", "debate", |
|
|
"test", "experiment", "hypothesize", "draw conclusions", "break down", "dissect", |
|
|
"probe", "explore", "develop", "formulate" |
|
|
], |
|
|
"DOK4": [ |
|
|
"design", "synthesize", "model", "prove", "evaluate system", "critique", "create", |
|
|
"compose", "plan", "invent", "devise", "generate", "build", "construct", "produce", |
|
|
"formulate", "improve", "revise", "assess", "appraise", "judge", "recommend", |
|
|
"predict outcome", "simulate" |
|
|
] |
|
|
} |
|
|
|
|
|
|
|
|
_backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES) |
|
|
_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES) |
|
|
|
|
|
|
|
|
@tool |
|
|
def classify_and_score( |
|
|
question: str, |
|
|
target_bloom: str, |
|
|
target_dok: str, |
|
|
agg: str = "max" |
|
|
) -> dict: |
|
|
"""Classify a question against Bloom’s and DOK targets and return guidance.""" |
|
|
res = classify_levels_phrases( |
|
|
question, |
|
|
BLOOMS_PHRASES, |
|
|
DOK_PHRASES, |
|
|
backend=_backend, |
|
|
prebuilt_bloom_index=_BLOOM_INDEX, |
|
|
prebuilt_dok_index=_DOK_INDEX, |
|
|
agg=agg, |
|
|
return_phrase_matches=True |
|
|
) |
|
|
|
|
|
def _parse_target_bloom(t: str): |
|
|
order = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"] |
|
|
if t.endswith("+"): |
|
|
base = t[:-1] |
|
|
if base not in order: |
|
|
raise ValueError(f"Invalid Bloom target '{t}'") |
|
|
return set(order[order.index(base):]) |
|
|
if t not in order: |
|
|
raise ValueError(f"Invalid Bloom target '{t}'") |
|
|
return {t} |
|
|
|
|
|
def _parse_target_dok(t: str): |
|
|
order = ["DOK1", "DOK2", "DOK3", "DOK4"] |
|
|
if "-" in t: |
|
|
lo, hi = t.split("-") |
|
|
if lo not in order or hi not in order or order.index(lo) > order.index(hi): |
|
|
raise ValueError(f"Invalid DOK range '{t}'") |
|
|
return set(order[order.index(lo):order.index(hi) + 1]) |
|
|
if t not in order: |
|
|
raise ValueError(f"Invalid DOK target '{t}'") |
|
|
return {t} |
|
|
|
|
|
try: |
|
|
bloom_target_set = _parse_target_bloom(target_bloom) |
|
|
dok_target_set = _parse_target_dok(target_dok) |
|
|
except Exception as e: |
|
|
return { |
|
|
"ok": False, |
|
|
"measured": {}, |
|
|
"feedback": ( |
|
|
f"Invalid targets: {e}. Use Bloom in " |
|
|
"{Remember, Understand, Apply, Analyze, Evaluate, Create} " |
|
|
"and DOK in {DOK1..DOK4} or ranges like 'DOK2-DOK3'." |
|
|
), |
|
|
} |
|
|
|
|
|
bloom_best = res["blooms"]["best_level"] |
|
|
dok_best = res["dok"]["best_level"] |
|
|
|
|
|
bloom_ok = bloom_best in bloom_target_set |
|
|
dok_ok = dok_best in dok_target_set |
|
|
|
|
|
top_bloom_phrases = res["blooms"].get("top_phrases", {}) |
|
|
top_dok_phrases = res["dok"].get("top_phrases", {}) |
|
|
|
|
|
feedback_parts = [] |
|
|
if not bloom_ok: |
|
|
feedback_parts.append( |
|
|
f"Shift Bloom’s from {bloom_best} toward {sorted(list(bloom_target_set))}. " |
|
|
f"Top cues: {top_bloom_phrases.get(bloom_best, [])[:3]}" |
|
|
) |
|
|
if not dok_ok: |
|
|
feedback_parts.append( |
|
|
f"Shift DOK from {dok_best} toward {sorted(list(dok_target_set))}. " |
|
|
f"Top cues: {top_dok_phrases.get(dok_best, [])[:3]}" |
|
|
) |
|
|
|
|
|
return { |
|
|
"ok": bool(bloom_ok and dok_ok), |
|
|
"measured": { |
|
|
"bloom_best": bloom_best, |
|
|
"bloom_scores": res["blooms"]["scores"], |
|
|
"dok_best": dok_best, |
|
|
"dok_scores": res["dok"]["scores"], |
|
|
}, |
|
|
"feedback": " ".join(feedback_parts) if feedback_parts else "On target.", |
|
|
} |
|
|
|
|
|
|
|
|
_LOCAL_MODEL_CACHE = {"model": None, "model_id": None} |
|
|
|
|
|
@spaces.GPU(duration=30) |
|
|
def get_local_model_gpu(model_id: str): |
|
|
""" |
|
|
Load and cache a local Transformers model for smolagents on GPU. |
|
|
Decorated so Spaces knows this task needs a GPU. |
|
|
""" |
|
|
|
|
|
try: |
|
|
from smolagents import TransformersModel |
|
|
except Exception as e: |
|
|
raise RuntimeError( |
|
|
"Local backend requires 'TransformersModel' from smolagents. " |
|
|
"Please ensure your smolagents version provides it." |
|
|
) from e |
|
|
|
|
|
if ( |
|
|
_LOCAL_MODEL_CACHE["model"] is not None |
|
|
and _LOCAL_MODEL_CACHE["model_id"] == model_id |
|
|
): |
|
|
return _LOCAL_MODEL_CACHE["model"] |
|
|
|
|
|
local_model = TransformersModel( |
|
|
model_id=model_id, |
|
|
device_map="auto" |
|
|
) |
|
|
_LOCAL_MODEL_CACHE["model"] = local_model |
|
|
_LOCAL_MODEL_CACHE["model_id"] = model_id |
|
|
return local_model |
|
|
|
|
|
def make_agent( |
|
|
backend_choice: str, |
|
|
hf_token: str, |
|
|
model_id: str, |
|
|
timeout: int, |
|
|
temperature: float, |
|
|
max_tokens: int |
|
|
): |
|
|
if backend_choice == "Local GPU": |
|
|
|
|
|
model = get_local_model_gpu(model_id) |
|
|
else: |
|
|
client = InferenceClient( |
|
|
model=model_id, |
|
|
timeout=timeout, |
|
|
token=(hf_token or None), |
|
|
) |
|
|
model = InferenceClientModel(client=client) |
|
|
|
|
|
agent = CodeAgent(model=model, tools=[classify_and_score]) |
|
|
agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} |
|
|
return agent |
|
|
|
|
|
|
|
|
TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}". |
|
|
|
|
|
After you propose a candidate, you MUST immediately call: |
|
|
classify_and_score( |
|
|
question=<just the question text>, |
|
|
target_bloom="{target_bloom}", |
|
|
target_dok="{target_dok}", |
|
|
agg="max" |
|
|
) |
|
|
|
|
|
Use the returned dict: |
|
|
- If ok == True: print ONLY compact JSON {{"question": "...", "answer": "...", "reasoning": "..."}} and finish. |
|
|
- If ok == False: briefly explain the needed shift, revise the question, and call classify_and_score again. |
|
|
Repeat up to {attempts} attempts. |
|
|
Keep answers concise. |
|
|
Additionally, when you call classify_and_score, pass the exact question text you propose. |
|
|
If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes). |
|
|
''' |
|
|
|
|
|
|
|
|
def extract_top_level_json(s: str) -> str: |
|
|
start = s.find("{") |
|
|
if start == -1: |
|
|
return "" |
|
|
depth = 0 |
|
|
for i in range(start, len(s)): |
|
|
ch = s[i] |
|
|
if ch == "{": |
|
|
depth += 1 |
|
|
elif ch == "}": |
|
|
depth -= 1 |
|
|
if depth == 0: |
|
|
candidate = s[start:i + 1] |
|
|
try: |
|
|
json.loads(candidate) |
|
|
return candidate |
|
|
except Exception: |
|
|
return "" |
|
|
return "" |
|
|
|
|
|
|
|
|
def run_pipeline( |
|
|
backend_choice, |
|
|
hf_token, |
|
|
topic, |
|
|
grade, |
|
|
subject, |
|
|
target_bloom, |
|
|
target_dok, |
|
|
attempts, |
|
|
model_id, |
|
|
timeout, |
|
|
temperature, |
|
|
max_tokens |
|
|
): |
|
|
try: |
|
|
agent = make_agent( |
|
|
backend_choice=backend_choice, |
|
|
hf_token=(hf_token or "").strip(), |
|
|
model_id=model_id, |
|
|
timeout=int(timeout), |
|
|
temperature=float(temperature), |
|
|
max_tokens=int(max_tokens), |
|
|
) |
|
|
except Exception as e: |
|
|
err = f"ERROR initializing backend '{backend_choice}': {e}" |
|
|
return "", err |
|
|
|
|
|
task = TASK_TMPL.format( |
|
|
grade=grade, |
|
|
topic=topic, |
|
|
subject=subject, |
|
|
target_bloom=target_bloom, |
|
|
target_dok=target_dok, |
|
|
attempts=int(attempts) |
|
|
) |
|
|
|
|
|
try: |
|
|
result_text = agent.run(task, max_steps=int(attempts) * 4) |
|
|
except Exception as e: |
|
|
result_text = f"ERROR while running the agent: {e}" |
|
|
|
|
|
final_json = "" |
|
|
candidate = extract_top_level_json(result_text or "") |
|
|
if candidate: |
|
|
try: |
|
|
final_json = json.dumps(json.loads(candidate), indent=2) |
|
|
except Exception: |
|
|
final_json = "" |
|
|
|
|
|
return final_json, result_text |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty") |
|
|
gr.Markdown( |
|
|
"Use a **CodeAgent** that calls the scoring tool (`classify_and_score`) after each proposal, " |
|
|
"and revises until it hits your Bloom/DOK target." |
|
|
) |
|
|
|
|
|
with gr.Accordion("API / Backend Settings", open=True): |
|
|
backend_choice = gr.Radio( |
|
|
choices=["Hosted API", "Local GPU"], |
|
|
value="Hosted API", |
|
|
label="Inference Backend" |
|
|
) |
|
|
with gr.Row(): |
|
|
hf_token = gr.Textbox( |
|
|
label="Hugging Face Token (required for private/hosted endpoints)", |
|
|
type="password", |
|
|
visible=True |
|
|
) |
|
|
model_id = gr.Textbox( |
|
|
value="swiss-ai/Apertus-70B-Instruct-2509", |
|
|
label="Model ID (repo id for Hosted, or local repo for GPU)" |
|
|
) |
|
|
timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s, Hosted API only)") |
|
|
|
|
|
with gr.Row(): |
|
|
topic = gr.Textbox(value="Fractions", label="Topic") |
|
|
grade = gr.Dropdown( |
|
|
choices=[ |
|
|
"Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5", "Grade 6", |
|
|
"Grade 7", "Grade 8", "Grade 9", |
|
|
"Grade 10", "Grade 11", "Grade 12", |
|
|
"Under Graduate", "Post Graduate" |
|
|
], |
|
|
value="Grade 7", |
|
|
label="Grade" |
|
|
) |
|
|
subject = gr.Textbox(value="Math", label="Subject") |
|
|
|
|
|
with gr.Row(): |
|
|
target_bloom = gr.Dropdown( |
|
|
choices=["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"], |
|
|
value="Analyze", |
|
|
label="Target Bloom’s" |
|
|
) |
|
|
target_dok = gr.Dropdown( |
|
|
choices=["DOK1", "DOK2", "DOK3", "DOK4", "DOK1-DOK2", "DOK2-DOK3", "DOK3-DOK4"], |
|
|
value="DOK2-DOK3", |
|
|
label="Target Depth of Knowledge" |
|
|
) |
|
|
attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts") |
|
|
|
|
|
with gr.Accordion("⚙️ Generation Controls", open=False): |
|
|
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature") |
|
|
max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens") |
|
|
|
|
|
backend_tips = gr.Markdown( |
|
|
"*Hosted API:* uses Hugging Face Inference endpoints. Provide a token if needed.\n\n" |
|
|
"*Local GPU:* loads the model into the Space with `TransformersModel (device_map='auto')`. " |
|
|
"Ensure your Space has a GPU and enough VRAM for the selected model." |
|
|
) |
|
|
|
|
|
run_btn = gr.Button("Run Agent 🚀") |
|
|
|
|
|
final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json") |
|
|
transcript = gr.Textbox(label="Agent Transcript", lines=18) |
|
|
|
|
|
def _toggle_backend_fields(choice): |
|
|
return ( |
|
|
gr.update(visible=(choice == "Hosted API")), |
|
|
gr.update(visible=True), |
|
|
gr.update(visible=(choice == "Hosted API")) |
|
|
) |
|
|
|
|
|
backend_choice.change( |
|
|
_toggle_backend_fields, |
|
|
inputs=[backend_choice], |
|
|
outputs=[hf_token, model_id, timeout] |
|
|
) |
|
|
|
|
|
run_btn.click( |
|
|
fn=run_pipeline, |
|
|
inputs=[ |
|
|
backend_choice, hf_token, topic, grade, subject, |
|
|
target_bloom, target_dok, attempts, model_id, |
|
|
timeout, temperature, max_tokens |
|
|
], |
|
|
outputs=[final_json, transcript] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__" or os.getenv("SYSTEM") == "spaces": |
|
|
demo.launch() |
|
|
|