Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,282 +1,109 @@ | |
| 1 | 
            -
             | 
|  | |
|  | |
|  | |
| 2 | 
             
            import json
         | 
| 3 | 
             
            import gradio as gr
         | 
| 4 | 
            -
            import spaces
         | 
| 5 | 
             
            from huggingface_hub import InferenceClient
         | 
| 6 | 
             
            from smolagents import CodeAgent, InferenceClientModel, tool
         | 
|  | |
|  | |
| 7 | 
             
            from huggingface_hub import login
         | 
| 8 | 
            -
            from smolagents import  | 
| 9 | 
            -
            import  | 
| 10 | 
            -
             | 
| 11 | 
            -
            login(token=token)
         | 
| 12 | 
            -
            from level_classifier_tool import (
         | 
| 13 | 
             
                classify_levels_phrases,
         | 
| 14 | 
             
                HFEmbeddingBackend,
         | 
| 15 | 
             
                build_phrase_index
         | 
| 16 | 
             
            )
         | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
            BLOOMS_PHRASES  | 
| 20 | 
            -
             | 
| 21 | 
            -
                    "define", "list", "recall", "identify", "state", "label", "name", "recognize", "find",
         | 
| 22 | 
            -
                    "select", "match", "choose", "give", "write", "tell", "show"
         | 
| 23 | 
            -
                ],
         | 
| 24 | 
            -
                "Understand": [
         | 
| 25 | 
            -
                    "classify", "interpret", "summarize", "explain", "estimate", "describe", "discuss",
         | 
| 26 | 
            -
                    "predict", "paraphrase", "restate", "illustrate", "compare", "contrast", "report"
         | 
| 27 | 
            -
                ],
         | 
| 28 | 
            -
                "Apply": [
         | 
| 29 | 
            -
                    "apply", "solve", "use", "demonstrate", "calculate", "implement", "perform",
         | 
| 30 | 
            -
                    "execute", "carry out", "practice", "employ", "sketch"
         | 
| 31 | 
            -
                ],
         | 
| 32 | 
            -
                "Analyze": [
         | 
| 33 | 
            -
                    "analyze", "differentiate", "organize", "structure", "break down", "distinguish",
         | 
| 34 | 
            -
                    "dissect", "examine", "compare", "contrast", "attribute", "investigate"
         | 
| 35 | 
            -
                ],
         | 
| 36 | 
            -
                "Evaluate": [
         | 
| 37 | 
            -
                    "evaluate", "judge", "critique", "assess", "defend", "argue", "select", "support",
         | 
| 38 | 
            -
                    "appraise", "recommend", "conclude", "review"
         | 
| 39 | 
            -
                ],
         | 
| 40 | 
            -
                "Create": [
         | 
| 41 | 
            -
                    "create", "design", "compose", "plan", "construct", "produce", "devise", "generate",
         | 
| 42 | 
            -
                    "develop", "formulate", "invent", "build"
         | 
| 43 | 
            -
                ]
         | 
| 44 | 
            -
            }
         | 
| 45 | 
            -
             | 
| 46 | 
            -
            DOK_PHRASES = {
         | 
| 47 | 
            -
                "DOK1": [
         | 
| 48 | 
            -
                    "define", "list", "recall", "compute", "identify", "state", "label", "how many",
         | 
| 49 | 
            -
                    "name", "recognize", "find", "determine", "select", "match", "choose", "give",
         | 
| 50 | 
            -
                    "write", "tell", "show", "point out"
         | 
| 51 | 
            -
                ],
         | 
| 52 | 
            -
                "DOK2": [
         | 
| 53 | 
            -
                    "classify", "interpret", "estimate", "organise", "summarise", "explain", "solve",
         | 
| 54 | 
            -
                    "categorize", "group", "compare", "contrast", "distinguish", "make observations",
         | 
| 55 | 
            -
                    "collect data", "display data", "arrange", "sort", "paraphrase", "restate", "predict",
         | 
| 56 | 
            -
                    "approximate", "demonstrate", "illustrate", "describe", "analyze data"
         | 
| 57 | 
            -
                ],
         | 
| 58 | 
            -
                "DOK3": [
         | 
| 59 | 
            -
                    "justify", "analyze", "generalise", "compare", "construct", "investigate",
         | 
| 60 | 
            -
                    "support", "defend", "argue", "examine", "differentiate", "criticize", "debate",
         | 
| 61 | 
            -
                    "test", "experiment", "hypothesize", "draw conclusions", "break down", "dissect",
         | 
| 62 | 
            -
                    "probe", "explore", "develop", "formulate"
         | 
| 63 | 
            -
                ],
         | 
| 64 | 
            -
                "DOK4": [
         | 
| 65 | 
            -
                    "design", "synthesize", "model", "prove", "evaluate system", "critique", "create",
         | 
| 66 | 
            -
                    "compose", "plan", "invent", "devise", "generate", "build", "construct", "produce",
         | 
| 67 | 
            -
                    "formulate", "improve", "revise", "assess", "appraise", "judge", "recommend",
         | 
| 68 | 
            -
                    "predict outcome", "simulate"
         | 
| 69 | 
            -
                ]
         | 
| 70 | 
            -
            }
         | 
| 71 | 
            -
             | 
| 72 | 
            -
            # ------------------------ Prebuild embeddings once ------------------------
         | 
| 73 | 
             
            _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
         | 
| 74 | 
             
            _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
         | 
| 75 | 
             
            _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 76 |  | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
                agg: str = "max"
         | 
| 84 | 
            -
            ) -> dict:
         | 
| 85 | 
            -
                """Classify a question against Bloom’s and DOK targets and return guidance.
         | 
| 86 | 
            -
             | 
| 87 | 
            -
                Args:
         | 
| 88 | 
            -
                    question: The question text to evaluate for cognitive demand.
         | 
| 89 | 
            -
                    target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
         | 
| 90 | 
            -
                        or plus form (e.g., "Apply+") meaning that level or higher.
         | 
| 91 | 
            -
                    target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
         | 
| 92 | 
            -
                        or span (e.g., "DOK2-DOK3").
         | 
| 93 | 
            -
                    agg: Aggregation method over phrase similarities within a level
         | 
| 94 | 
            -
                        (choices: "mean", "max", "topk_mean").
         | 
| 95 | 
            -
             | 
| 96 | 
            -
                Returns:
         | 
| 97 | 
            -
                    A dictionary with:
         | 
| 98 | 
            -
                        ok: True if both Bloom’s and DOK match the targets.
         | 
| 99 | 
            -
                        measured: Dict with best levels and per-level scores for Bloom’s and DOK.
         | 
| 100 | 
            -
                        feedback: Brief guidance describing how to adjust the question to hit targets.
         | 
| 101 | 
            -
                """
         | 
| 102 | 
            -
                res = classify_levels_phrases(
         | 
| 103 | 
            -
                    question,
         | 
| 104 | 
            -
                    BLOOMS_PHRASES,
         | 
| 105 | 
            -
                    DOK_PHRASES,
         | 
| 106 | 
            -
                    backend=_backend,
         | 
| 107 | 
            -
                    prebuilt_bloom_index=_BLOOM_INDEX,
         | 
| 108 | 
            -
                    prebuilt_dok_index=_DOK_INDEX,
         | 
| 109 | 
            -
                    agg=agg,
         | 
| 110 | 
            -
                    return_phrase_matches=True
         | 
| 111 | 
            -
                )
         | 
| 112 | 
            -
             | 
| 113 | 
            -
                def _parse_target_bloom(t: str):
         | 
| 114 | 
            -
                    order = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]
         | 
| 115 | 
            -
                    if t.endswith("+"):
         | 
| 116 | 
            -
                        base = t[:-1]
         | 
| 117 | 
            -
                        if base not in order:
         | 
| 118 | 
            -
                            raise ValueError(f"Invalid Bloom target '{t}'")
         | 
| 119 | 
            -
                        return set(order[order.index(base):])
         | 
| 120 | 
            -
                    if t not in order:
         | 
| 121 | 
            -
                        raise ValueError(f"Invalid Bloom target '{t}'")
         | 
| 122 | 
            -
                    return {t}
         | 
| 123 | 
            -
             | 
| 124 | 
            -
                def _parse_target_dok(t: str):
         | 
| 125 | 
            -
                    order = ["DOK1", "DOK2", "DOK3", "DOK4"]
         | 
| 126 | 
            -
                    if "-" in t:
         | 
| 127 | 
            -
                        lo, hi = t.split("-")
         | 
| 128 | 
            -
                        if lo not in order or hi not in order or order.index(lo) > order.index(hi):
         | 
| 129 | 
            -
                            raise ValueError(f"Invalid DOK range '{t}'")
         | 
| 130 | 
            -
                        return set(order[order.index(lo):order.index(hi) + 1])
         | 
| 131 | 
            -
                    if t not in order:
         | 
| 132 | 
            -
                        raise ValueError(f"Invalid DOK target '{t}'")
         | 
| 133 | 
            -
                    return {t}
         | 
| 134 | 
            -
             | 
| 135 | 
            -
                try:
         | 
| 136 | 
            -
                    bloom_target_set = _parse_target_bloom(target_bloom)
         | 
| 137 | 
            -
                    dok_target_set = _parse_target_dok(target_dok)
         | 
| 138 | 
            -
                except Exception as e:
         | 
| 139 | 
            -
                    return {
         | 
| 140 | 
            -
                        "ok": False,
         | 
| 141 | 
            -
                        "measured": {},
         | 
| 142 | 
            -
                        "feedback": (
         | 
| 143 | 
            -
                            f"Invalid targets: {e}. Use Bloom in "
         | 
| 144 | 
            -
                            "{Remember, Understand, Apply, Analyze, Evaluate, Create} "
         | 
| 145 | 
            -
                            "and DOK in {DOK1..DOK4} or ranges like 'DOK2-DOK3'."
         | 
| 146 | 
            -
                        ),
         | 
| 147 | 
            -
                    }
         | 
| 148 | 
            -
             | 
| 149 | 
            -
                bloom_best = res["blooms"]["best_level"]
         | 
| 150 | 
            -
                dok_best = res["dok"]["best_level"]
         | 
| 151 | 
            -
             | 
| 152 | 
            -
                bloom_ok = bloom_best in bloom_target_set
         | 
| 153 | 
            -
                dok_ok = dok_best in dok_target_set
         | 
| 154 | 
            -
             | 
| 155 | 
            -
                top_bloom_phrases = res["blooms"].get("top_phrases", {})
         | 
| 156 | 
            -
                top_dok_phrases = res["dok"].get("top_phrases", {})
         | 
| 157 | 
            -
             | 
| 158 | 
            -
                feedback_parts = []
         | 
| 159 | 
            -
                if not bloom_ok:
         | 
| 160 | 
            -
                    feedback_parts.append(
         | 
| 161 | 
            -
                        f"Shift Bloom’s from {bloom_best} toward {sorted(list(bloom_target_set))}. "
         | 
| 162 | 
            -
                        f"Top cues: {top_bloom_phrases.get(bloom_best, [])[:3]}"
         | 
| 163 | 
            -
                    )
         | 
| 164 | 
            -
                if not dok_ok:
         | 
| 165 | 
            -
                    feedback_parts.append(
         | 
| 166 | 
            -
                        f"Shift DOK from {dok_best} toward {sorted(list(dok_target_set))}. "
         | 
| 167 | 
            -
                        f"Top cues: {top_dok_phrases.get(dok_best, [])[:3]}"
         | 
| 168 | 
            -
                    )
         | 
| 169 |  | 
| 170 | 
            -
             | 
| 171 | 
            -
                    "ok": bool(bloom_ok and dok_ok),
         | 
| 172 | 
            -
                    "measured": {
         | 
| 173 | 
            -
                        "bloom_best": bloom_best,
         | 
| 174 | 
            -
                        "bloom_scores": res["blooms"]["scores"],
         | 
| 175 | 
            -
                        "dok_best": dok_best,
         | 
| 176 | 
            -
                        "dok_scores": res["dok"]["scores"],
         | 
| 177 | 
            -
                    },
         | 
| 178 | 
            -
                    "feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
         | 
| 179 | 
            -
                }
         | 
| 180 |  | 
| 181 | 
            -
             | 
| 182 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 183 |  | 
| 184 | 
            -
             | 
| 185 | 
            -
            def get_local_model_gpu(model_id: str):
         | 
| 186 | 
            -
                """
         | 
| 187 | 
            -
                Load and cache a local Transformers model for smolagents on GPU.
         | 
| 188 | 
            -
                Decorated so Spaces knows this task needs a GPU.
         | 
| 189 | 
            -
                """
         | 
| 190 | 
            -
                # Import here to keep Hosted mode lightweight.
         | 
| 191 | 
            -
                try:
         | 
| 192 | 
            -
                    from smolagents import TransformersModel  # provided by smolagents
         | 
| 193 | 
            -
                except Exception as e:
         | 
| 194 | 
            -
                    raise RuntimeError(
         | 
| 195 | 
            -
                        "Local backend requires 'TransformersModel' from smolagents. "
         | 
| 196 | 
            -
                        "Please ensure your smolagents version provides it."
         | 
| 197 | 
            -
                    ) from e
         | 
| 198 |  | 
| 199 | 
            -
                if (
         | 
| 200 | 
            -
                    _LOCAL_MODEL_CACHE["model"] is not None
         | 
| 201 | 
            -
                    and _LOCAL_MODEL_CACHE["model_id"] == model_id
         | 
| 202 | 
            -
                ):
         | 
| 203 | 
            -
                    return _LOCAL_MODEL_CACHE["model"]
         | 
| 204 |  | 
| 205 | 
            -
             | 
| 206 | 
            -
             | 
| 207 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 208 | 
             
                )
         | 
| 209 | 
            -
                _LOCAL_MODEL_CACHE["model"] = local_model
         | 
| 210 | 
            -
                _LOCAL_MODEL_CACHE["model_id"] = model_id
         | 
| 211 | 
            -
                return local_model
         | 
| 212 | 
            -
             | 
| 213 | 
            -
            def make_agent(
         | 
| 214 | 
            -
                backend_choice: str,          # "Hosted API" | "Local GPU"
         | 
| 215 | 
            -
                hf_token: str,
         | 
| 216 | 
            -
                model_id: str,
         | 
| 217 | 
            -
                timeout: int,
         | 
| 218 | 
            -
                temperature: float,
         | 
| 219 | 
            -
                max_tokens: int
         | 
| 220 | 
            -
            ):
         | 
| 221 | 
            -
                if backend_choice == "Local GPU":
         | 
| 222 | 
            -
                    # This call is GPU-annotated; Spaces will allocate a GPU for it.
         | 
| 223 | 
            -
                    model = get_local_model_gpu(model_id)
         | 
| 224 | 
            -
                else:
         | 
| 225 | 
            -
                    client = InferenceClient(
         | 
| 226 | 
            -
                        model=model_id,
         | 
| 227 | 
            -
                        timeout=timeout,
         | 
| 228 | 
            -
                        token=(hf_token or None),
         | 
| 229 | 
            -
                    )
         | 
| 230 | 
            -
                    model = InferenceClientModel(client=client)
         | 
| 231 |  | 
| 232 | 
            -
                 | 
| 233 | 
            -
                 | 
|  | |
|  | |
|  | |
| 234 | 
             
                return agent
         | 
| 235 |  | 
| 236 | 
            -
            # ------------------------ Agent task template -----------------------------
         | 
| 237 | 
            -
            TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
         | 
| 238 | 
            -
             | 
| 239 | 
            -
            After you propose a candidate, you MUST immediately call:
         | 
| 240 | 
            -
            classify_and_score(
         | 
| 241 | 
            -
                question=<just the question text>,
         | 
| 242 | 
            -
                target_bloom="{target_bloom}",
         | 
| 243 | 
            -
                target_dok="{target_dok}",
         | 
| 244 | 
            -
                agg="max"
         | 
| 245 | 
            -
            )
         | 
| 246 |  | 
| 247 | 
            -
             | 
| 248 | 
            -
            - If ok == True: print ONLY compact JSON {{"question": "...", "answer": "...", "reasoning": "..."}} and finish.
         | 
| 249 | 
            -
            - If ok == False: briefly explain the needed shift, revise the question, and call classify_and_score again.
         | 
| 250 | 
            -
            Repeat up to {attempts} attempts.
         | 
| 251 | 
            -
            Keep answers concise.
         | 
| 252 | 
            -
            Additionally, when you call classify_and_score, pass the exact question text you propose.
         | 
| 253 | 
            -
            If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
         | 
| 254 | 
            -
            '''
         | 
| 255 | 
            -
             | 
| 256 | 
            -
            # ------------------------ Utility: robust JSON extractor ------------------
         | 
| 257 | 
            -
            def extract_top_level_json(s: str) -> str:
         | 
| 258 | 
            -
                start = s.find("{")
         | 
| 259 | 
            -
                if start == -1:
         | 
| 260 | 
            -
                    return ""
         | 
| 261 | 
            -
                depth = 0
         | 
| 262 | 
            -
                for i in range(start, len(s)):
         | 
| 263 | 
            -
                    ch = s[i]
         | 
| 264 | 
            -
                    if ch == "{":
         | 
| 265 | 
            -
                        depth += 1
         | 
| 266 | 
            -
                    elif ch == "}":
         | 
| 267 | 
            -
                        depth -= 1
         | 
| 268 | 
            -
                        if depth == 0:
         | 
| 269 | 
            -
                            candidate = s[start:i + 1]
         | 
| 270 | 
            -
                            try:
         | 
| 271 | 
            -
                                json.loads(candidate)  # validate
         | 
| 272 | 
            -
                                return candidate
         | 
| 273 | 
            -
                            except Exception:
         | 
| 274 | 
            -
                                return ""
         | 
| 275 | 
            -
                return ""
         | 
| 276 |  | 
| 277 | 
            -
            # ------------------------  | 
| 278 | 
             
            def run_pipeline(
         | 
| 279 | 
            -
                backend_choice,
         | 
| 280 | 
             
                hf_token,
         | 
| 281 | 
             
                topic,
         | 
| 282 | 
             
                grade,
         | 
| @@ -285,24 +112,23 @@ def run_pipeline( | |
| 285 | 
             
                target_dok,
         | 
| 286 | 
             
                attempts,
         | 
| 287 | 
             
                model_id,
         | 
|  | |
| 288 | 
             
                timeout,
         | 
| 289 | 
             
                temperature,
         | 
| 290 | 
            -
                max_tokens
         | 
|  | |
| 291 | 
             
            ):
         | 
| 292 | 
            -
                 | 
| 293 | 
            -
             | 
| 294 | 
            -
             | 
| 295 | 
            -
             | 
| 296 | 
            -
             | 
| 297 | 
            -
             | 
| 298 | 
            -
             | 
| 299 | 
            -
             | 
| 300 | 
            -
             | 
| 301 | 
            -
                except Exception as e:
         | 
| 302 | 
            -
                    err = f"ERROR initializing backend '{backend_choice}': {e}"
         | 
| 303 | 
            -
                    return "", err
         | 
| 304 |  | 
| 305 | 
            -
                task =  | 
| 306 | 
             
                    grade=grade,
         | 
| 307 | 
             
                    topic=topic,
         | 
| 308 | 
             
                    subject=subject,
         | 
| @@ -311,117 +137,80 @@ def run_pipeline( | |
| 311 | 
             
                    attempts=int(attempts)
         | 
| 312 | 
             
                )
         | 
| 313 |  | 
|  | |
| 314 | 
             
                try:
         | 
| 315 | 
            -
                    result_text = agent.run(task, max_steps=int(attempts) | 
| 316 | 
             
                except Exception as e:
         | 
| 317 | 
            -
                    result_text = f"ERROR | 
| 318 |  | 
|  | |
| 319 | 
             
                final_json = ""
         | 
| 320 | 
            -
                 | 
| 321 | 
            -
             | 
| 322 | 
            -
                     | 
|  | |
|  | |
|  | |
| 323 | 
             
                        final_json = json.dumps(json.loads(candidate), indent=2)
         | 
| 324 | 
            -
             | 
| 325 | 
            -
             | 
| 326 |  | 
| 327 | 
             
                return final_json, result_text
         | 
| 328 |  | 
| 329 | 
            -
             | 
| 330 | 
             
            with gr.Blocks() as demo:
         | 
| 331 | 
             
                gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
         | 
| 332 | 
             
                gr.Markdown(
         | 
| 333 | 
            -
                    " | 
| 334 | 
            -
                    "and revises until it hits  | 
| 335 | 
             
                )
         | 
| 336 |  | 
| 337 | 
            -
                with gr.Accordion("API  | 
| 338 | 
            -
                     | 
| 339 | 
            -
             | 
| 340 | 
            -
             | 
| 341 | 
            -
             | 
| 342 | 
            -
                    )
         | 
| 343 | 
            -
                    with gr.Row():
         | 
| 344 | 
            -
                        hf_token = gr.Textbox(
         | 
| 345 | 
            -
                            label="Hugging Face Token (required for private/hosted endpoints)",
         | 
| 346 | 
            -
                            type="password",
         | 
| 347 | 
            -
                            visible=True
         | 
| 348 | 
            -
                        )
         | 
| 349 | 
            -
                        model_id = gr.Textbox(
         | 
| 350 | 
            -
                            value="swiss-ai/Apertus-70B-Instruct-2509",
         | 
| 351 | 
            -
                            label="Model ID (repo id for Hosted, or local repo for GPU)"
         | 
| 352 | 
            -
                        )
         | 
| 353 | 
            -
                    timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s, Hosted API only)")
         | 
| 354 |  | 
| 355 | 
             
                with gr.Row():
         | 
| 356 | 
             
                    topic = gr.Textbox(value="Fractions", label="Topic")
         | 
| 357 | 
             
                    grade = gr.Dropdown(
         | 
| 358 | 
            -
                        choices=[
         | 
| 359 | 
            -
             | 
| 360 | 
            -
                            "Grade 7", "Grade 8", "Grade 9",
         | 
| 361 | 
            -
                            "Grade 10", "Grade 11", "Grade 12",
         | 
| 362 | 
            -
                            "Under Graduate", "Post Graduate"
         | 
| 363 | 
            -
                        ],
         | 
| 364 | 
             
                        value="Grade 7",
         | 
| 365 | 
             
                        label="Grade"
         | 
| 366 | 
             
                    )
         | 
| 367 | 
            -
                    subject | 
|  | |
|  | |
|  | |
| 368 |  | 
| 369 | 
             
                with gr.Row():
         | 
| 370 | 
             
                    target_bloom = gr.Dropdown(
         | 
| 371 | 
            -
                        choices=["Remember", | 
| 372 | 
             
                        value="Analyze",
         | 
| 373 | 
             
                        label="Target Bloom’s"
         | 
| 374 | 
             
                    )
         | 
| 375 | 
             
                    target_dok = gr.Dropdown(
         | 
| 376 | 
            -
                        choices=["DOK1", | 
| 377 | 
             
                        value="DOK2-DOK3",
         | 
| 378 | 
            -
                        label="Target  | 
| 379 | 
             
                    )
         | 
| 380 | 
             
                    attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
         | 
| 381 |  | 
| 382 | 
            -
                with gr.Accordion(" | 
| 383 | 
             
                    temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
         | 
| 384 | 
             
                    max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
         | 
| 385 |  | 
| 386 | 
            -
                 | 
| 387 | 
            -
                    "*Hosted API:* uses Hugging Face Inference endpoints. Provide a token if needed.\n\n"
         | 
| 388 | 
            -
                    "*Local GPU:* loads the model into the Space with `TransformersModel (device_map='auto')`. "
         | 
| 389 | 
            -
                    "Ensure your Space has a GPU and enough VRAM for the selected model."
         | 
| 390 | 
            -
                )
         | 
| 391 | 
            -
             | 
| 392 | 
            -
                run_btn = gr.Button("Run Agent 🚀")
         | 
| 393 |  | 
| 394 | 
             
                final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
         | 
| 395 | 
             
                transcript = gr.Textbox(label="Agent Transcript", lines=18)
         | 
| 396 |  | 
| 397 | 
            -
                def _toggle_backend_fields(choice):
         | 
| 398 | 
            -
                    return (
         | 
| 399 | 
            -
                        gr.update(visible=(choice == "Hosted API")),  # hf_token
         | 
| 400 | 
            -
                        gr.update(visible=True),                      # model_id always visible
         | 
| 401 | 
            -
                        gr.update(visible=(choice == "Hosted API"))   # timeout slider
         | 
| 402 | 
            -
                    )
         | 
| 403 | 
            -
             | 
| 404 | 
            -
                backend_choice.change(
         | 
| 405 | 
            -
                    _toggle_backend_fields,
         | 
| 406 | 
            -
                    inputs=[backend_choice],
         | 
| 407 | 
            -
                    outputs=[hf_token, model_id, timeout]
         | 
| 408 | 
            -
                )
         | 
| 409 | 
            -
             | 
| 410 | 
             
                run_btn.click(
         | 
| 411 | 
             
                    fn=run_pipeline,
         | 
| 412 | 
            -
                    inputs=[
         | 
| 413 | 
            -
                        backend_choice, hf_token, topic, grade, subject,
         | 
| 414 | 
            -
                        target_bloom, target_dok, attempts, model_id,
         | 
| 415 | 
            -
                        timeout, temperature, max_tokens
         | 
| 416 | 
            -
                    ],
         | 
| 417 | 
             
                    outputs=[final_json, transcript]
         | 
| 418 | 
             
                )
         | 
| 419 |  | 
| 420 | 
            -
            if __name__ == "__main__" | 
| 421 | 
            -
                 | 
| 422 | 
            -
                    get_local_model_gpu(model_id)  # triggers GPU allocation during startup
         | 
| 423 | 
            -
                except Exception as e:
         | 
| 424 | 
            -
                    # don't crash the app if warmup fails; logs will show details
         | 
| 425 | 
            -
                    print("Warmup failed:", e)
         | 
| 426 | 
            -
             | 
| 427 | 
            -
                demo.launch()
         | 
|  | |
| 1 | 
            +
            # Create a self-contained Gradio app that uses the agent-driven loop (Option A)
         | 
| 2 | 
            +
            # It expects `level_classifier_tool.py` to be colocated (or installed on PYTHONPATH).
         | 
| 3 | 
            +
            import sys
         | 
| 4 | 
            +
            sys.path.append(r"C:\Users\Sarthak\OneDrive - UT Cloud\thesis\HF_Agent\src")  # use raw string because of spaces
         | 
| 5 | 
             
            import json
         | 
| 6 | 
             
            import gradio as gr
         | 
|  | |
| 7 | 
             
            from huggingface_hub import InferenceClient
         | 
| 8 | 
             
            from smolagents import CodeAgent, InferenceClientModel, tool
         | 
| 9 | 
            +
            from langchain.embeddings import HuggingFaceEmbeddings
         | 
| 10 | 
            +
            from llama_index.core import VectorStoreIndex, Document
         | 
| 11 | 
             
            from huggingface_hub import login
         | 
| 12 | 
            +
            from smolagents import tool
         | 
| 13 | 
            +
            from all_datasets import *
         | 
| 14 | 
            +
            from level_classifier_tool_2 import (
         | 
|  | |
|  | |
| 15 | 
             
                classify_levels_phrases,
         | 
| 16 | 
             
                HFEmbeddingBackend,
         | 
| 17 | 
             
                build_phrase_index
         | 
| 18 | 
             
            )
         | 
| 19 | 
            +
            from task_temp import TASK_TMPL, CLASSIFY_TMPL, GEN_TMPL, RAG_TMPL
         | 
| 20 | 
            +
            from all_tools import classify_and_score, QuestionRetrieverTool
         | 
| 21 | 
            +
            from phrases import BLOOMS_PHRASES, DOK_PHRASES
         | 
| 22 | 
            +
            # Prebuild embeddings once
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 23 | 
             
            _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
         | 
| 24 | 
             
            _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
         | 
| 25 | 
             
            _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
         | 
| 26 | 
            +
            D = {
         | 
| 27 | 
            +
                "GSM8k": GSM8k['question'],
         | 
| 28 | 
            +
                "Olympiad": Olympiad_math['question'],
         | 
| 29 | 
            +
                "Olympiad2": Olympiad_math2['question'],
         | 
| 30 | 
            +
                "DeepMind Math": clean_math['question'],
         | 
| 31 | 
            +
                "MMMLU": MMMLU['question'],
         | 
| 32 | 
            +
                "MMMU": MMMU['question'],
         | 
| 33 | 
            +
                "ScienceQA": ScienceQA['question'],
         | 
| 34 | 
            +
                "PubmedQA": PubmedQA['question']
         | 
| 35 | 
            +
            }
         | 
| 36 | 
            +
            all_questions = (
         | 
| 37 | 
            +
                list(D["GSM8k"]) +
         | 
| 38 | 
            +
                list(D["Olympiad"]) +
         | 
| 39 | 
            +
                list(D["MMMLU"]) +
         | 
| 40 | 
            +
                list(D["MMMU"]) +
         | 
| 41 | 
            +
                list(D["DeepMind Math"]) +
         | 
| 42 | 
            +
                list(D["Olympiad2"]) +
         | 
| 43 | 
            +
                list(D["ScienceQA"]) +
         | 
| 44 | 
            +
                list(D["PubmedQA"])
         | 
| 45 | 
            +
            )
         | 
| 46 |  | 
| 47 | 
            +
            emb = HuggingFaceEmbeddings(
         | 
| 48 | 
            +
                model_name="google/embeddinggemma-300m",
         | 
| 49 | 
            +
                encode_kwargs={"normalize_embeddings": True},
         | 
| 50 | 
            +
            )
         | 
| 51 | 
            +
            texts = all_questions
         | 
| 52 | 
            +
            index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 53 |  | 
| 54 | 
            +
            # ------------------------ Scoring TOOL -----------------------------------
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 55 |  | 
| 56 | 
            +
            emb = HuggingFaceEmbeddings(
         | 
| 57 | 
            +
                model_name="google/embeddinggemma-300m",
         | 
| 58 | 
            +
                encode_kwargs={"normalize_embeddings": True},
         | 
| 59 | 
            +
            )
         | 
| 60 | 
            +
            D = {
         | 
| 61 | 
            +
                "GSM8k": GSM8k['question'],
         | 
| 62 | 
            +
                "Olympiad": Olympiad_math['question'],
         | 
| 63 | 
            +
                "Olympiad2": Olympiad_math2['question'],
         | 
| 64 | 
            +
                "DeepMind Math": clean_math['question'],
         | 
| 65 | 
            +
                "MMMLU": MMMLU['question'],
         | 
| 66 | 
            +
                "MMMU": MMMU['question'],
         | 
| 67 | 
            +
                "ScienceQA": ScienceQA['question'],
         | 
| 68 | 
            +
                "PubmedQA": PubmedQA['question']
         | 
| 69 | 
            +
            }
         | 
| 70 | 
            +
            all_questions = (
         | 
| 71 | 
            +
                list(D["GSM8k"]) +
         | 
| 72 | 
            +
                list(D["Olympiad"]) +
         | 
| 73 | 
            +
                list(D["MMMLU"]) +
         | 
| 74 | 
            +
                list(D["MMMU"]) +
         | 
| 75 | 
            +
                list(D["DeepMind Math"]) +
         | 
| 76 | 
            +
                list(D["Olympiad2"]) +
         | 
| 77 | 
            +
                list(D["ScienceQA"]) +
         | 
| 78 | 
            +
                list(D["PubmedQA"])
         | 
| 79 | 
            +
            )
         | 
| 80 | 
            +
            texts = all_questions
         | 
| 81 | 
            +
            index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
         | 
| 82 |  | 
| 83 | 
            +
            # ------------------------ Retriever TOOL -----------------------------------
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 84 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 85 |  | 
| 86 | 
            +
            # ------------------------ Agent setup with timeout ------------------------
         | 
| 87 | 
            +
            def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
         | 
| 88 | 
            +
                client = InferenceClient(
         | 
| 89 | 
            +
                    model=model_id,
         | 
| 90 | 
            +
                    provider=provider,
         | 
| 91 | 
            +
                    timeout=timeout,
         | 
| 92 | 
            +
                    token=hf_token if hf_token else None,
         | 
| 93 | 
             
                )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 94 |  | 
| 95 | 
            +
                # Bind generation params by partially applying via model kwargs.
         | 
| 96 | 
            +
                # smolagents InferenceClientModel currently accepts client only; we pass runtime params in task text.
         | 
| 97 | 
            +
                model = InferenceClientModel(model_id=model_id,client=client)
         | 
| 98 | 
            +
                agent = CodeAgent(model=model, tools=[classify_and_score, QuestionRetrieverTool])
         | 
| 99 | 
            +
                agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}  # attach for reference
         | 
| 100 | 
             
                return agent
         | 
| 101 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 102 |  | 
| 103 | 
            +
            # ------------------------ Agent task template -----------------------------
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 104 |  | 
| 105 | 
            +
            # ------------------------ Gradio glue ------------------------------------
         | 
| 106 | 
             
            def run_pipeline(
         | 
|  | |
| 107 | 
             
                hf_token,
         | 
| 108 | 
             
                topic,
         | 
| 109 | 
             
                grade,
         | 
|  | |
| 112 | 
             
                target_dok,
         | 
| 113 | 
             
                attempts,
         | 
| 114 | 
             
                model_id,
         | 
| 115 | 
            +
                provider,
         | 
| 116 | 
             
                timeout,
         | 
| 117 | 
             
                temperature,
         | 
| 118 | 
            +
                max_tokens,
         | 
| 119 | 
            +
                task_type
         | 
| 120 | 
             
            ):
         | 
| 121 | 
            +
                # Build agent per run (or cache if you prefer)
         | 
| 122 | 
            +
                agent = make_agent(
         | 
| 123 | 
            +
                    hf_token=hf_token.strip(),
         | 
| 124 | 
            +
                    model_id=model_id,
         | 
| 125 | 
            +
                    provider=provider,
         | 
| 126 | 
            +
                    timeout=int(timeout),
         | 
| 127 | 
            +
                    temperature=float(temperature),
         | 
| 128 | 
            +
                    max_tokens=int(max_tokens),
         | 
| 129 | 
            +
                )
         | 
|  | |
|  | |
|  | |
| 130 |  | 
| 131 | 
            +
                task = task_type.format(
         | 
| 132 | 
             
                    grade=grade,
         | 
| 133 | 
             
                    topic=topic,
         | 
| 134 | 
             
                    subject=subject,
         | 
|  | |
| 137 | 
             
                    attempts=int(attempts)
         | 
| 138 | 
             
                )
         | 
| 139 |  | 
| 140 | 
            +
                # The agent will internally call the tool
         | 
| 141 | 
             
                try:
         | 
| 142 | 
            +
                    result_text = agent.run(task, max_steps=int(attempts)*4)
         | 
| 143 | 
             
                except Exception as e:
         | 
| 144 | 
            +
                    result_text = f"ERROR: {e}"
         | 
| 145 |  | 
| 146 | 
            +
                # Try to extract final JSON
         | 
| 147 | 
             
                final_json = ""
         | 
| 148 | 
            +
                try:
         | 
| 149 | 
            +
                    # find JSON object in result_text (simple heuristic)
         | 
| 150 | 
            +
                    start = result_text.find("{")
         | 
| 151 | 
            +
                    end = result_text.rfind("}")
         | 
| 152 | 
            +
                    if start != -1 and end != -1 and end > start:
         | 
| 153 | 
            +
                        candidate = result_text[start:end+1]
         | 
| 154 | 
             
                        final_json = json.dumps(json.loads(candidate), indent=2)
         | 
| 155 | 
            +
                except Exception:
         | 
| 156 | 
            +
                    final_json = ""
         | 
| 157 |  | 
| 158 | 
             
                return final_json, result_text
         | 
| 159 |  | 
| 160 | 
            +
             | 
| 161 | 
             
            with gr.Blocks() as demo:
         | 
| 162 | 
             
                gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
         | 
| 163 | 
             
                gr.Markdown(
         | 
| 164 | 
            +
                    "This app uses a **CodeAgent** that *calls the scoring tool* "
         | 
| 165 | 
            +
                    "(`classify_and_score`) after each proposal, and revises until it hits the target."
         | 
| 166 | 
             
                )
         | 
| 167 |  | 
| 168 | 
            +
                with gr.Accordion("API Settings", open=False):
         | 
| 169 | 
            +
                    hf_token = gr.Textbox(label="Hugging Face Token (required)", type="password")
         | 
| 170 | 
            +
                    model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID")
         | 
| 171 | 
            +
                    provider = gr.Textbox(value="novita", label="Provider")
         | 
| 172 | 
            +
                    timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 173 |  | 
| 174 | 
             
                with gr.Row():
         | 
| 175 | 
             
                    topic = gr.Textbox(value="Fractions", label="Topic")
         | 
| 176 | 
             
                    grade = gr.Dropdown(
         | 
| 177 | 
            +
                        choices=["Grade 1","Grade 2","Grade 3","Grade4","Grade 5","Grade 6","Grade 7","Grade 8","Grade 9",
         | 
| 178 | 
            +
                                 "Grade 10","Grade 11","Grade 12","Under Graduate","Post Graduate"],
         | 
|  | |
|  | |
|  | |
|  | |
| 179 | 
             
                        value="Grade 7",
         | 
| 180 | 
             
                        label="Grade"
         | 
| 181 | 
             
                    )
         | 
| 182 | 
            +
                    subject= gr.Textbox(value="Math", label="Subject")
         | 
| 183 | 
            +
                    task_type = gr.Dropdown(
         | 
| 184 | 
            +
                        choices=["TASK_TMPL", "CLASSIFY_TMPL", "GEN_TMPL", "RAG_TMPL"]
         | 
| 185 | 
            +
                    label= "task type")
         | 
| 186 |  | 
| 187 | 
             
                with gr.Row():
         | 
| 188 | 
             
                    target_bloom = gr.Dropdown(
         | 
| 189 | 
            +
                        choices=["Remember","Understand","Apply","Analyze","Evaluate","Create","Apply+","Analyze+","Evaluate+"],
         | 
| 190 | 
             
                        value="Analyze",
         | 
| 191 | 
             
                        label="Target Bloom’s"
         | 
| 192 | 
             
                    )
         | 
| 193 | 
             
                    target_dok = gr.Dropdown(
         | 
| 194 | 
            +
                        choices=["DOK1","DOK2","DOK3","DOK4","DOK1-DOK2","DOK2-DOK3","DOK3-DOK4"],
         | 
| 195 | 
             
                        value="DOK2-DOK3",
         | 
| 196 | 
            +
                        label="Target DOK"
         | 
| 197 | 
             
                    )
         | 
| 198 | 
             
                    attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
         | 
| 199 |  | 
| 200 | 
            +
                with gr.Accordion("" Generation Controls", open=False):
         | 
| 201 | 
             
                    temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
         | 
| 202 | 
             
                    max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
         | 
| 203 |  | 
| 204 | 
            +
                run_btn = gr.Button("Run Agent")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 205 |  | 
| 206 | 
             
                final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
         | 
| 207 | 
             
                transcript = gr.Textbox(label="Agent Transcript", lines=18)
         | 
| 208 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 209 | 
             
                run_btn.click(
         | 
| 210 | 
             
                    fn=run_pipeline,
         | 
| 211 | 
            +
                    inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens],
         | 
|  | |
|  | |
|  | |
|  | |
| 212 | 
             
                    outputs=[final_json, transcript]
         | 
| 213 | 
             
                )
         | 
| 214 |  | 
| 215 | 
            +
            if __name__ == "__main__":
         | 
| 216 | 
            +
                demo.launch(share=True)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  |