Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Sleeping

App Files Files Community

bhardwaj08sarthak commited on Sep 15

Commit

1f32a04

verified ·

1 Parent(s): 423136d

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -64

app.py CHANGED Viewed

@@ -69,7 +69,6 @@ _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2
 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
 # ------------------------ Tool: classify and score ------------------------
 @tool
 def classify_and_score(
@@ -78,23 +77,7 @@ def classify_and_score(
     target_dok: str,
     agg: str = "max"
 ) -> dict:
-    """Classify a question against Bloom’s and DOK targets and return guidance.
-    Args:
-        question: The question text to evaluate for cognitive demand.
-        target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
-            or plus form (e.g., "Apply+") meaning that level or higher.
-        target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
-            or span (e.g., "DOK2-DOK3").
-        agg: Aggregation method over phrase similarities within a level
-            (choices: "mean", "max", "topk_mean").
-    Returns:
-        A dictionary with:
-            ok: True if both Bloom’s and DOK match the targets.
-            measured: Dict with best levels and per-level scores for Bloom’s and DOK.
-            feedback: Brief guidance describing how to adjust the question to hit targets.
-    """
     res = classify_levels_phrases(
         question,
         BLOOMS_PHRASES,
@@ -174,21 +157,62 @@ def classify_and_score(
         "feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
     }
-# ------------------------ Agent setup with timeout ------------------------
-def make_agent(hf_token: str, model_id: str, timeout: int, temperature: float, max_tokens: int):
-    client = InferenceClient(
-        model=model_id,
-        timeout=timeout,
-        token=hf_token or None,
     )
-    model = InferenceClientModel(client=client)
     agent = CodeAgent(model=model, tools=[classify_and_score])
-    # Not used by the agent core, but helpful for debugging/visibility
     agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
     return agent
 # ------------------------ Agent task template -----------------------------
 TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
@@ -209,35 +233,31 @@ Additionally, when you call classify_and_score, pass the exact question text you
 If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
 '''
 # ------------------------ Utility: robust JSON extractor ------------------
 def extract_top_level_json(s: str) -> str:
-    """
-    Extract the first top-level JSON object from a string by tracking braces.
-    Returns the JSON string if found, else "".
-    """
     start = s.find("{")
     if start == -1:
         return ""
     depth = 0
     for i in range(start, len(s)):
-        if s[i] == "{":
             depth += 1
-        elif s[i] == "}":
             depth -= 1
             if depth == 0:
                 candidate = s[start:i + 1]
                 try:
-                    # validate
-                    json.loads(candidate)
                     return candidate
                 except Exception:
                     return ""
     return ""
 # ------------------------ Pipeline ---------------------------------------
 def run_pipeline(
     hf_token,
     topic,
     grade,
@@ -250,14 +270,19 @@ def run_pipeline(
     temperature,
     max_tokens
 ):
-    # Build agent per run
-    agent = make_agent(
-        hf_token=(hf_token or "").strip(),
-        model_id=model_id,
-        timeout=int(timeout),
-        temperature=float(temperature),
-        max_tokens=int(max_tokens),
-    )
     task = TASK_TMPL.format(
         grade=grade,
@@ -268,13 +293,11 @@ def run_pipeline(
         attempts=int(attempts)
     )
-    # The agent will internally call the tool
     try:
         result_text = agent.run(task, max_steps=int(attempts) * 4)
     except Exception as e:
-        result_text = f"ERROR: {e}"
-    # Try to extract final JSON
     final_json = ""
     candidate = extract_top_level_json(result_text or "")
     if candidate:
@@ -285,25 +308,43 @@ def run_pipeline(
     return final_json, result_text
 # ------------------------ Gradio UI --------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
     gr.Markdown(
-        "This app uses a **CodeAgent** that *calls the scoring tool* "
-        "(`classify_and_score`) after each proposal, and revises until it hits the target."
     )
-    with gr.Accordion("API Settings", open=False):
-        hf_token = gr.Textbox(
-            label="Hugging Face Token (required if the endpoint needs auth)",
-            type="password"
         )
-        model_id = gr.Textbox(
-            value="swiss-ai/Apertus-70B-Instruct-2509",
-            label="Model ID"
-        )
-        timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
     with gr.Row():
         topic = gr.Textbox(value="Fractions", label="Topic")
@@ -332,21 +373,43 @@ with gr.Blocks() as demo:
         )
         attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
-    with gr.Accordion("Generation Controls", open=False):
         temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
         max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
-    run_btn = gr.Button("Run Agent")
     final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
     transcript = gr.Textbox(label="Agent Transcript", lines=18)
     run_btn.click(
         fn=run_pipeline,
         inputs=[
-            hf_token, topic, grade, subject,
-            target_bloom, target_dok, attempts,
-            model_id, timeout, temperature, max_tokens
         ],
         outputs=[final_json, transcript]
     )

 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
 # ------------------------ Tool: classify and score ------------------------
 @tool
 def classify_and_score(
     target_dok: str,
     agg: str = "max"
 ) -> dict:
+    """Classify a question against Bloom’s and DOK targets and return guidance."""
     res = classify_levels_phrases(
         question,
         BLOOMS_PHRASES,
         "feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
     }
+# ------------------------ Backend selection + caching ------------------------
+_LOCAL_MODEL_CACHE = {
+    "model": None,
+    "model_id": None,
+}
+def _get_local_model(model_id: str):
+    """Lazy-load and cache a local Transformers model for smolagents."""
+    # Import here so Hosted mode doesn't require local deps.
+    try:
+        from smolagents import TransformersModel  # provided by smolagents
+    except Exception as e:
+        raise RuntimeError(
+            "Local backend requires 'TransformersModel' from smolagents. "
+            "Please ensure your smolagents version provides it."
+        ) from e
+    if (
+        _LOCAL_MODEL_CACHE["model"] is not None
+        and _LOCAL_MODEL_CACHE["model_id"] == model_id
+    ):
+        return _LOCAL_MODEL_CACHE["model"]
+    # Instantiate and cache
+    local_model = TransformersModel(
+        model_id=model_id,
+        device_map="auto"
     )
+    _LOCAL_MODEL_CACHE["model"] = local_model
+    _LOCAL_MODEL_CACHE["model_id"] = model_id
+    return local_model
+# ------------------------ Agent setup with timeout ------------------------
+def make_agent(
+    backend_choice: str,          # "Hosted API" | "Local GPU"
+    hf_token: str,
+    model_id: str,
+    timeout: int,
+    temperature: float,
+    max_tokens: int
+):
+    if backend_choice == "Local GPU":
+        model = _get_local_model(model_id)
+    else:
+        client = InferenceClient(
+            model=model_id,
+            timeout=timeout,
+            token=(hf_token or None),
+        )
+        model = InferenceClientModel(client=client)
     agent = CodeAgent(model=model, tools=[classify_and_score])
+    # Not used by agent core; helpful for debugging
     agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
     return agent
 # ------------------------ Agent task template -----------------------------
 TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
 If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
 '''
 # ------------------------ Utility: robust JSON extractor ------------------
 def extract_top_level_json(s: str) -> str:
+    """Extract the first top-level JSON object by tracking braces."""
     start = s.find("{")
     if start == -1:
         return ""
     depth = 0
     for i in range(start, len(s)):
+        ch = s[i]
+        if ch == "{":
             depth += 1
+        elif ch == "}":
             depth -= 1
             if depth == 0:
                 candidate = s[start:i + 1]
                 try:
+                    json.loads(candidate)  # validate
                     return candidate
                 except Exception:
                     return ""
     return ""
 # ------------------------ Pipeline ---------------------------------------
 def run_pipeline(
+    backend_choice,
     hf_token,
     topic,
     grade,
     temperature,
     max_tokens
 ):
+    try:
+        agent = make_agent(
+            backend_choice=backend_choice,
+            hf_token=(hf_token or "").strip(),
+            model_id=model_id,
+            timeout=int(timeout),
+            temperature=float(temperature),
+            max_tokens=int(max_tokens),
+        )
+    except Exception as e:
+        # Surface backend/model setup errors directly
+        err = f"ERROR initializing backend '{backend_choice}': {e}"
+        return "", err
     task = TASK_TMPL.format(
         grade=grade,
         attempts=int(attempts)
     )
     try:
         result_text = agent.run(task, max_steps=int(attempts) * 4)
     except Exception as e:
+        result_text = f"ERROR while running the agent: {e}"
     final_json = ""
     candidate = extract_top_level_json(result_text or "")
     if candidate:
     return final_json, result_text
+# ------------------------ Optional Spaces warmup --------------------------
+# If you deploy on HF Spaces and want to pre-allocate GPU for Local mode,
+# you can try to warm up the model at startup by setting:
+#   BACKEND_WARMUP=1 and BACKEND_WARMUP_MODEL=<model id>
+if (os.getenv("SYSTEM") == "spaces") and os.getenv("BACKEND_WARMUP") == "1":
+    try:
+        wm = os.getenv("BACKEND_WARMUP_MODEL", "swiss-ai/Apertus-70B-Instruct-2509")
+        _get_local_model(wm)
+        print(f"[Warmup] Local GPU model loaded: {wm}")
+    except Exception as e:
+        print(f"[Warmup] Skipped or failed: {e}")
 # ------------------------ Gradio UI --------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
     gr.Markdown(
+        "Use a **CodeAgent** that calls the scoring tool (`classify_and_score`) after each proposal, "
+        "and revises until it hits your Bloom/DOK target."
     )
+    with gr.Accordion("API / Backend Settings", open=True):
+        backend_choice = gr.Radio(
+            choices=["Hosted API", "Local GPU"],
+            value="Hosted API",
+            label="Inference Backend"
         )
+        with gr.Row():
+            hf_token = gr.Textbox(
+                label="Hugging Face Token (required for private/hosted endpoints)",
+                type="password",
+                visible=True
+            )
+            model_id = gr.Textbox(
+                value="swiss-ai/Apertus-70B-Instruct-2509",
+                label="Model ID (repo or local path)"
+            )
+        timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s, Hosted API only)")
     with gr.Row():
         topic = gr.Textbox(value="Fractions", label="Topic")
         )
         attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
+    with gr.Accordion("⚙️ Generation Controls", open=False):
         temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
         max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
+    # Helpful hint text depending on backend
+    backend_tips = gr.Markdown(
+        "*Hosted API:* uses Hugging Face Inference endpoints. Provide a token if needed.\n\n"
+        "*Local GPU:* loads the model into the Space with `TransformersModel (device_map='auto')`. "
+        "Ensure your Space has a GPU and enough VRAM for the selected model."
+    )
+    run_btn = gr.Button("Run Agent 🚀")
     final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
     transcript = gr.Textbox(label="Agent Transcript", lines=18)
+    # Dynamically show/hide token & timeout based on backend
+    def _toggle_backend_fields(choice):
+        # Show token + timeout only for Hosted API
+        return (
+            gr.update(visible=(choice == "Hosted API")),
+            gr.update(visible=True),  # model_id always visible
+            gr.update(visible=(choice == "Hosted API"))
+        )
+    backend_choice.change(
+        _toggle_backend_fields,
+        inputs=[backend_choice],
+        outputs=[hf_token, model_id, timeout]
+    )
     run_btn.click(
         fn=run_pipeline,
         inputs=[
+            backend_choice, hf_token, topic, grade, subject,
+            target_bloom, target_dok, attempts, model_id,
+            timeout, temperature, max_tokens
         ],
         outputs=[final_json, transcript]
     )