|  | import os | 
					
						
						|  | import json | 
					
						
						|  | import gradio as gr | 
					
						
						|  | from huggingface_hub import InferenceClient | 
					
						
						|  | from smolagents import CodeAgent, InferenceClientModel, tool | 
					
						
						|  |  | 
					
						
						|  | from level_classifier_tool import ( | 
					
						
						|  | classify_levels_phrases, | 
					
						
						|  | HFEmbeddingBackend, | 
					
						
						|  | build_phrase_index | 
					
						
						|  | ) | 
					
						
						|  | BLOOMS_PHRASES = { | 
					
						
						|  | "Remember": [ | 
					
						
						|  | "define", "list", "recall", "identify", "state", "label", "name", "recognize", "find", "select", "match", "choose", "give", "write", "tell", "show" | 
					
						
						|  | ], | 
					
						
						|  | "Understand": [ | 
					
						
						|  | "classify", "interpret", "summarize", "explain", "estimate", "describe", "discuss", "predict", "paraphrase", "restate", "illustrate", "compare", "contrast", "report" | 
					
						
						|  | ], | 
					
						
						|  | "Apply": [ | 
					
						
						|  | "apply", "solve", "use", "demonstrate", "calculate", "implement", "perform", "execute", "carry out", "practice", "employ", "sketch" | 
					
						
						|  | ], | 
					
						
						|  | "Analyze": [ | 
					
						
						|  | "analyze", "differentiate", "organize", "structure", "break down", "distinguish", "dissect", "examine", "compare", "contrast", "attribute", "investigate" | 
					
						
						|  | ], | 
					
						
						|  | "Evaluate": [ | 
					
						
						|  | "evaluate", "judge", "critique", "assess", "defend", "argue", "select", "support", "appraise", "recommend", "conclude", "review" | 
					
						
						|  | ], | 
					
						
						|  | "Create": [ | 
					
						
						|  | "create", "design", "compose", "plan", "construct", "produce", "devise", "generate", "develop", "formulate", "invent", "build" | 
					
						
						|  | ] | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | DOK_PHRASES = { | 
					
						
						|  | "DOK1": [ | 
					
						
						|  | "define", "list", "recall", "compute", "identify", "state", "label", "how many", | 
					
						
						|  | "name", "recognize", "find", "determine", "select", "match", "choose", "give", | 
					
						
						|  | "write", "tell", "show", "point out" | 
					
						
						|  | ], | 
					
						
						|  | "DOK2": [ | 
					
						
						|  | "classify", "interpret", "estimate", "organise", "summarise", "explain", "solve", | 
					
						
						|  | "categorize", "group", "compare", "contrast", "distinguish", "make observations", | 
					
						
						|  | "collect data", "display data", "arrange", "sort", "paraphrase", "restate", "predict", | 
					
						
						|  | "approximate", "demonstrate", "illustrate", "describe", "analyze data" | 
					
						
						|  | ], | 
					
						
						|  | "DOK3": [ | 
					
						
						|  | "justify", "analyze", "generalise", "compare", "construct", "investigate", | 
					
						
						|  | "support", "defend", "argue", "examine", "differentiate", "criticize", "debate", | 
					
						
						|  | "test", "experiment", "hypothesize", "draw conclusions", "break down", "dissect", | 
					
						
						|  | "probe", "explore", "develop", "formulate" | 
					
						
						|  | ], | 
					
						
						|  | "DOK4": [ | 
					
						
						|  | "design", "synthesize", "model", "prove", "evaluate system", "critique", "create", | 
					
						
						|  | "compose", "plan", "invent", "devise", "generate", "build", "construct", "produce", | 
					
						
						|  | "formulate", "improve", "revise", "assess", "appraise", "judge", "recommend", | 
					
						
						|  | "predict outcome", "simulate" | 
					
						
						|  | ] | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2") | 
					
						
						|  | _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES) | 
					
						
						|  | _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES) | 
					
						
						|  |  | 
					
						
						|  | @tool | 
					
						
						|  | def classify_and_score( | 
					
						
						|  | question: str, | 
					
						
						|  | target_bloom: str, | 
					
						
						|  | target_dok: str, | 
					
						
						|  | agg: str = "max" | 
					
						
						|  | ) -> dict: | 
					
						
						|  | """Classify a question against Bloom’s and DOK targets and return guidance. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | question: The question text to evaluate for cognitive demand. | 
					
						
						|  | target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze") | 
					
						
						|  | or plus form (e.g., "Apply+") meaning that level or higher. | 
					
						
						|  | target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3") | 
					
						
						|  | or span (e.g., "DOK2-DOK3"). | 
					
						
						|  | agg: Aggregation method over phrase similarities within a level | 
					
						
						|  | (choices: "mean", "max", "topk_mean"). | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | A dictionary with: | 
					
						
						|  | ok: True if both Bloom’s and DOK match the targets. | 
					
						
						|  | measured: Dict with best levels and per-level scores for Bloom’s and DOK. | 
					
						
						|  | feedback: Brief guidance describing how to adjust the question to hit targets. | 
					
						
						|  | """ | 
					
						
						|  | res = classify_levels_phrases( | 
					
						
						|  | question, | 
					
						
						|  | BLOOMS_PHRASES, | 
					
						
						|  | DOK_PHRASES, | 
					
						
						|  | backend=_backend, | 
					
						
						|  | prebuilt_bloom_index=_BLOOM_INDEX, | 
					
						
						|  | prebuilt_dok_index=_DOK_INDEX, | 
					
						
						|  | agg=agg, | 
					
						
						|  | return_phrase_matches=True | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | def _parse_target_bloom(t: str): | 
					
						
						|  | order = ["Remember","Understand","Apply","Analyze","Evaluate","Create"] | 
					
						
						|  | if t.endswith("+"): | 
					
						
						|  | base = t[:-1] | 
					
						
						|  | return set(order[order.index(base):]) | 
					
						
						|  | return {t} | 
					
						
						|  |  | 
					
						
						|  | def _parse_target_dok(t: str): | 
					
						
						|  | order = ["DOK1","DOK2","DOK3","DOK4"] | 
					
						
						|  | if "-" in t: | 
					
						
						|  | lo, hi = t.split("-") | 
					
						
						|  | return set(order[order.index(lo):order.index(hi)+1]) | 
					
						
						|  | return {t} | 
					
						
						|  |  | 
					
						
						|  | bloom_target_set = _parse_target_bloom(target_bloom) | 
					
						
						|  | dok_target_set = _parse_target_dok(target_dok) | 
					
						
						|  |  | 
					
						
						|  | bloom_best = res["blooms"]["best_level"] | 
					
						
						|  | dok_best = res["dok"]["best_level"] | 
					
						
						|  |  | 
					
						
						|  | bloom_ok = bloom_best in bloom_target_set | 
					
						
						|  | dok_ok = dok_best in dok_target_set | 
					
						
						|  |  | 
					
						
						|  | feedback_parts = [] | 
					
						
						|  | if not bloom_ok: | 
					
						
						|  | feedback_parts.append( | 
					
						
						|  | f"Shift Bloom’s from {bloom_best} toward {sorted(bloom_target_set)}. " | 
					
						
						|  | f"Top cues: {res['blooms']['top_phrases'].get(bloom_best, [])[:3]}" | 
					
						
						|  | ) | 
					
						
						|  | if not dok_ok: | 
					
						
						|  | feedback_parts.append( | 
					
						
						|  | f"Shift DOK from {dok_best} toward {sorted(dok_target_set)}. " | 
					
						
						|  | f"Top cues: {res['dok']['top_phrases'].get(dok_best, [])[:3]}" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | return { | 
					
						
						|  | "ok": bool(bloom_ok and dok_ok), | 
					
						
						|  | "measured": { | 
					
						
						|  | "bloom_best": bloom_best, | 
					
						
						|  | "bloom_scores": res["blooms"]["scores"], | 
					
						
						|  | "dok_best": dok_best, | 
					
						
						|  | "dok_scores": res["dok"]["scores"], | 
					
						
						|  | }, | 
					
						
						|  | "feedback": " ".join(feedback_parts) if feedback_parts else "On target.", | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int): | 
					
						
						|  | client = InferenceClient( | 
					
						
						|  | model=model_id, | 
					
						
						|  | provider=provider, | 
					
						
						|  | timeout=timeout, | 
					
						
						|  | token=hf_token if hf_token else None, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | model = InferenceClientModel(client=client) | 
					
						
						|  | agent = CodeAgent(model=model, tools=[classify_and_score]) | 
					
						
						|  | agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} | 
					
						
						|  | return agent | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}". | 
					
						
						|  |  | 
					
						
						|  | After you propose a candidate, you MUST immediately call: | 
					
						
						|  | classify_and_score( | 
					
						
						|  | question=<just the question text>, | 
					
						
						|  | target_bloom="{target_bloom}", | 
					
						
						|  | target_dok="{target_dok}", | 
					
						
						|  | agg="max" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | Use the returned dict: | 
					
						
						|  | - If ok == True: print ONLY compact JSON {{"question": "...", "answer": "...", "reasoning": "..."}} and finish. | 
					
						
						|  | - If ok == False: briefly explain the needed shift, revise the question, and call classify_and_score again. | 
					
						
						|  | Repeat up to {attempts} attempts. | 
					
						
						|  | Keep answers concise. | 
					
						
						|  | Additionally, when you call classify_and_score, pass the exact question text you propose. | 
					
						
						|  | If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes). | 
					
						
						|  | ''' | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def run_pipeline( | 
					
						
						|  | hf_token, | 
					
						
						|  | topic, | 
					
						
						|  | grade, | 
					
						
						|  | subject, | 
					
						
						|  | target_bloom, | 
					
						
						|  | target_dok, | 
					
						
						|  | attempts, | 
					
						
						|  | model_id, | 
					
						
						|  | provider, | 
					
						
						|  | timeout, | 
					
						
						|  | temperature, | 
					
						
						|  | max_tokens | 
					
						
						|  | ): | 
					
						
						|  |  | 
					
						
						|  | agent = make_agent( | 
					
						
						|  | hf_token=hf_token.strip(), | 
					
						
						|  | model_id=model_id, | 
					
						
						|  | provider=provider, | 
					
						
						|  | timeout=int(timeout), | 
					
						
						|  | temperature=float(temperature), | 
					
						
						|  | max_tokens=int(max_tokens), | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | task = TASK_TMPL.format( | 
					
						
						|  | grade=grade, | 
					
						
						|  | topic=topic, | 
					
						
						|  | subject=subject, | 
					
						
						|  | target_bloom=target_bloom, | 
					
						
						|  | target_dok=target_dok, | 
					
						
						|  | attempts=int(attempts) | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | try: | 
					
						
						|  | result_text = agent.run(task, max_steps=int(attempts)*4) | 
					
						
						|  | except Exception as e: | 
					
						
						|  | result_text = f"ERROR: {e}" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | final_json = "" | 
					
						
						|  | try: | 
					
						
						|  |  | 
					
						
						|  | start = result_text.find("{") | 
					
						
						|  | end = result_text.rfind("}") | 
					
						
						|  | if start != -1 and end != -1 and end > start: | 
					
						
						|  | candidate = result_text[start:end+1] | 
					
						
						|  | final_json = json.dumps(json.loads(candidate), indent=2) | 
					
						
						|  | except Exception: | 
					
						
						|  | final_json = "" | 
					
						
						|  |  | 
					
						
						|  | return final_json, result_text | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with gr.Blocks() as demo: | 
					
						
						|  | gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty") | 
					
						
						|  | gr.Markdown( | 
					
						
						|  | "This app uses a **CodeAgent** that *calls the scoring tool* " | 
					
						
						|  | "(`classify_and_score`) after each proposal, and revises until it hits the target." | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | with gr.Accordion("API Settings", open=False): | 
					
						
						|  | hf_token = gr.Textbox(label="Hugging Face Token (required if the endpoint needs auth)", type="password") | 
					
						
						|  | model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID") | 
					
						
						|  | provider = gr.Textbox(value="novita", label="Provider") | 
					
						
						|  | timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)") | 
					
						
						|  |  | 
					
						
						|  | with gr.Row(): | 
					
						
						|  | topic = gr.Textbox(value="Fractions", label="Topic") | 
					
						
						|  | grade = gr.Dropdown( | 
					
						
						|  | choices=["Grade 1","Grade 2","Grade 3","Grade4","Grade 5","Grade 6","Grade 7","Grade 8","Grade 9", | 
					
						
						|  | "Grade 10","Grade 11","Grade 12","Under Graduate","Post Graduate"], | 
					
						
						|  | value="Grade 7", | 
					
						
						|  | label="Grade" | 
					
						
						|  | ) | 
					
						
						|  | subject= gr.Textbox(value="Math", label="Subject") | 
					
						
						|  |  | 
					
						
						|  | with gr.Row(): | 
					
						
						|  | target_bloom = gr.Dropdown( | 
					
						
						|  | choices=["Remember","Understand","Apply","Analyze","Evaluate","Create"], | 
					
						
						|  | value="Analyze", | 
					
						
						|  | label="Target Bloom’s" | 
					
						
						|  | ) | 
					
						
						|  | target_dok = gr.Dropdown( | 
					
						
						|  | choices=["DOK1","DOK2","DOK3","DOK4","DOK1-DOK2","DOK2-DOK3","DOK3-DOK4"], | 
					
						
						|  | value="DOK2-DOK3", | 
					
						
						|  | label="Target Depth of Knowledge" | 
					
						
						|  | ) | 
					
						
						|  | attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts") | 
					
						
						|  |  | 
					
						
						|  | with gr.Accordion("⚙️ Generation Controls", open=False): | 
					
						
						|  | temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature") | 
					
						
						|  | max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens") | 
					
						
						|  |  | 
					
						
						|  | run_btn = gr.Button("Run Agent 🚀") | 
					
						
						|  |  | 
					
						
						|  | final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json") | 
					
						
						|  | transcript = gr.Textbox(label="Agent Transcript", lines=18) | 
					
						
						|  |  | 
					
						
						|  | run_btn.click( | 
					
						
						|  | fn=run_pipeline, | 
					
						
						|  | inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens], | 
					
						
						|  | outputs=[final_json, transcript] | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | if __name__ == "__main__": | 
					
						
						|  | demo.launch() | 
					
						
						|  |  | 
					
						
						|  |  |