Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Sleeping

App Files Files Community

STEM-Question-Generator / app.py

bhardwaj08sarthak

Update app.py

700d92f verified about 2 months ago

raw

history blame

14.3 kB

	import os
	import json
	import gradio as gr
	import spaces
	from huggingface_hub import InferenceClient
	from smolagents import CodeAgent, InferenceClientModel, tool

	from level_classifier_tool import (
	classify_levels_phrases,
	HFEmbeddingBackend,
	build_phrase_index
	)

	# ------------------------ Taxonomy phrases ------------------------
	BLOOMS_PHRASES = {
	"Remember": [
	"define", "list", "recall", "identify", "state", "label", "name", "recognize", "find",
	"select", "match", "choose", "give", "write", "tell", "show"
	],
	"Understand": [
	"classify", "interpret", "summarize", "explain", "estimate", "describe", "discuss",
	"predict", "paraphrase", "restate", "illustrate", "compare", "contrast", "report"
	],
	"Apply": [
	"apply", "solve", "use", "demonstrate", "calculate", "implement", "perform",
	"execute", "carry out", "practice", "employ", "sketch"
	],
	"Analyze": [
	"analyze", "differentiate", "organize", "structure", "break down", "distinguish",
	"dissect", "examine", "compare", "contrast", "attribute", "investigate"
	],
	"Evaluate": [
	"evaluate", "judge", "critique", "assess", "defend", "argue", "select", "support",
	"appraise", "recommend", "conclude", "review"
	],
	"Create": [
	"create", "design", "compose", "plan", "construct", "produce", "devise", "generate",
	"develop", "formulate", "invent", "build"
	]
	}

	DOK_PHRASES = {
	"DOK1": [
	"define", "list", "recall", "compute", "identify", "state", "label", "how many",
	"name", "recognize", "find", "determine", "select", "match", "choose", "give",
	"write", "tell", "show", "point out"
	],
	"DOK2": [
	"classify", "interpret", "estimate", "organise", "summarise", "explain", "solve",
	"categorize", "group", "compare", "contrast", "distinguish", "make observations",
	"collect data", "display data", "arrange", "sort", "paraphrase", "restate", "predict",
	"approximate", "demonstrate", "illustrate", "describe", "analyze data"
	],
	"DOK3": [
	"justify", "analyze", "generalise", "compare", "construct", "investigate",
	"support", "defend", "argue", "examine", "differentiate", "criticize", "debate",
	"test", "experiment", "hypothesize", "draw conclusions", "break down", "dissect",
	"probe", "explore", "develop", "formulate"
	],
	"DOK4": [
	"design", "synthesize", "model", "prove", "evaluate system", "critique", "create",
	"compose", "plan", "invent", "devise", "generate", "build", "construct", "produce",
	"formulate", "improve", "revise", "assess", "appraise", "judge", "recommend",
	"predict outcome", "simulate"
	]
	}

	# ------------------------ Prebuild embeddings once ------------------------
	_backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
	_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
	_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)

	# ------------------------ Tool: classify and score ------------------------
	@tool
	def classify_and_score(
	question: str,
	target_bloom: str,
	target_dok: str,
	agg: str = "max"
	) -> dict:
	"""Classify a question against Bloom’s and DOK targets and return guidance."""
	res = classify_levels_phrases(
	question,
	BLOOMS_PHRASES,
	DOK_PHRASES,
	backend=_backend,
	prebuilt_bloom_index=_BLOOM_INDEX,
	prebuilt_dok_index=_DOK_INDEX,
	agg=agg,
	return_phrase_matches=True
	)

	def _parse_target_bloom(t: str):
	order = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]
	if t.endswith("+"):
	base = t[:-1]
	if base not in order:
	raise ValueError(f"Invalid Bloom target '{t}'")
	return set(order[order.index(base):])
	if t not in order:
	raise ValueError(f"Invalid Bloom target '{t}'")
	return {t}

	def _parse_target_dok(t: str):
	order = ["DOK1", "DOK2", "DOK3", "DOK4"]
	if "-" in t:
	lo, hi = t.split("-")
	if lo not in order or hi not in order or order.index(lo) > order.index(hi):
	raise ValueError(f"Invalid DOK range '{t}'")
	return set(order[order.index(lo):order.index(hi) + 1])
	if t not in order:
	raise ValueError(f"Invalid DOK target '{t}'")
	return {t}

	try:
	bloom_target_set = _parse_target_bloom(target_bloom)
	dok_target_set = _parse_target_dok(target_dok)
	except Exception as e:
	return {
	"ok": False,
	"measured": {},
	"feedback": (
	f"Invalid targets: {e}. Use Bloom in "
	"{Remember, Understand, Apply, Analyze, Evaluate, Create} "
	"and DOK in {DOK1..DOK4} or ranges like 'DOK2-DOK3'."
	),
	}

	bloom_best = res["blooms"]["best_level"]
	dok_best = res["dok"]["best_level"]

	bloom_ok = bloom_best in bloom_target_set
	dok_ok = dok_best in dok_target_set

	top_bloom_phrases = res["blooms"].get("top_phrases", {})
	top_dok_phrases = res["dok"].get("top_phrases", {})

	feedback_parts = []
	if not bloom_ok:
	feedback_parts.append(
	f"Shift Bloom’s from {bloom_best} toward {sorted(list(bloom_target_set))}. "
	f"Top cues: {top_bloom_phrases.get(bloom_best, [])[:3]}"
	)
	if not dok_ok:
	feedback_parts.append(
	f"Shift DOK from {dok_best} toward {sorted(list(dok_target_set))}. "
	f"Top cues: {top_dok_phrases.get(dok_best, [])[:3]}"
	)

	return {
	"ok": bool(bloom_ok and dok_ok),
	"measured": {
	"bloom_best": bloom_best,
	"bloom_scores": res["blooms"]["scores"],
	"dok_best": dok_best,
	"dok_scores": res["dok"]["scores"],
	},
	"feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
	}

	# ------------------------ Backend selection + GPU-wrapped local loader ------------------------
	_LOCAL_MODEL_CACHE = {"model": None, "model_id": None}

	@spaces.GPU(duration=30) # request GPU only when loading/using local model
	def get_local_model_gpu(model_id: str):
	"""
	Load and cache a local Transformers model for smolagents on GPU.
	Decorated so Spaces knows this task needs a GPU.
	"""
	# Import here to keep Hosted mode lightweight.
	try:
	from smolagents import TransformersModel # provided by smolagents
	except Exception as e:
	raise RuntimeError(
	"Local backend requires 'TransformersModel' from smolagents. "
	"Please ensure your smolagents version provides it."
	) from e

	if (
	_LOCAL_MODEL_CACHE["model"] is not None
	and _LOCAL_MODEL_CACHE["model_id"] == model_id
	):
	return _LOCAL_MODEL_CACHE["model"]

	local_model = TransformersModel(
	model_id=model_id,
	device_map="auto" # lets accelerate pick the best device(s)
	)
	_LOCAL_MODEL_CACHE["model"] = local_model
	_LOCAL_MODEL_CACHE["model_id"] = model_id
	return local_model

	def make_agent(
	backend_choice: str, # "Hosted API" \| "Local GPU"
	hf_token: str,
	model_id: str,
	timeout: int,
	temperature: float,
	max_tokens: int
	):
	if backend_choice == "Local GPU":
	# This call is GPU-annotated; Spaces will allocate a GPU for it.
	model = get_local_model_gpu(model_id)
	else:
	client = InferenceClient(
	model=model_id,
	timeout=timeout,
	token=(hf_token or None),
	)
	model = InferenceClientModel(client=client)

	agent = CodeAgent(model=model, tools=[classify_and_score])
	agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
	return agent

	# ------------------------ Agent task template -----------------------------
	TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".

	After you propose a candidate, you MUST immediately call:
	classify_and_score(
	question=<just the question text>,
	target_bloom="{target_bloom}",
	target_dok="{target_dok}",
	agg="max"
	)

	Use the returned dict:
	- If ok == True: print ONLY compact JSON {{"question": "...", "answer": "...", "reasoning": "..."}} and finish.
	- If ok == False: briefly explain the needed shift, revise the question, and call classify_and_score again.
	Repeat up to {attempts} attempts.
	Keep answers concise.
	Additionally, when you call classify_and_score, pass the exact question text you propose.
	If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
	'''

	# ------------------------ Utility: robust JSON extractor ------------------
	def extract_top_level_json(s: str) -> str:
	start = s.find("{")
	if start == -1:
	return ""
	depth = 0
	for i in range(start, len(s)):
	ch = s[i]
	if ch == "{":
	depth += 1
	elif ch == "}":
	depth -= 1
	if depth == 0:
	candidate = s[start:i + 1]
	try:
	json.loads(candidate) # validate
	return candidate
	except Exception:
	return ""
	return ""

	# ------------------------ Pipeline ---------------------------------------
	def run_pipeline(
	backend_choice,
	hf_token,
	topic,
	grade,
	subject,
	target_bloom,
	target_dok,
	attempts,
	model_id,
	timeout,
	temperature,
	max_tokens
	):
	try:
	agent = make_agent(
	backend_choice=backend_choice,
	hf_token=(hf_token or "").strip(),
	model_id=model_id,
	timeout=int(timeout),
	temperature=float(temperature),
	max_tokens=int(max_tokens),
	)
	except Exception as e:
	err = f"ERROR initializing backend '{backend_choice}': {e}"
	return "", err

	task = TASK_TMPL.format(
	grade=grade,
	topic=topic,
	subject=subject,
	target_bloom=target_bloom,
	target_dok=target_dok,
	attempts=int(attempts)
	)

	try:
	result_text = agent.run(task, max_steps=int(attempts) * 4)
	except Exception as e:
	result_text = f"ERROR while running the agent: {e}"

	final_json = ""
	candidate = extract_top_level_json(result_text or "")
	if candidate:
	try:
	final_json = json.dumps(json.loads(candidate), indent=2)
	except Exception:
	final_json = ""

	return final_json, result_text

	# ------------------------ Gradio UI --------------------------------------
	with gr.Blocks() as demo:
	gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
	gr.Markdown(
	"Use a CodeAgent that calls the scoring tool (`classify_and_score`) after each proposal, "
	"and revises until it hits your Bloom/DOK target."
	)

	with gr.Accordion("API / Backend Settings", open=True):
	backend_choice = gr.Radio(
	choices=["Hosted API", "Local GPU"],
	value="Hosted API",
	label="Inference Backend"
	)
	with gr.Row():
	hf_token = gr.Textbox(
	label="Hugging Face Token (required for private/hosted endpoints)",
	type="password",
	visible=True
	)
	model_id = gr.Textbox(
	value="swiss-ai/Apertus-70B-Instruct-2509",
	label="Model ID (repo id for Hosted, or local repo for GPU)"
	)
	timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s, Hosted API only)")

	with gr.Row():
	topic = gr.Textbox(value="Fractions", label="Topic")
	grade = gr.Dropdown(
	choices=[
	"Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5", "Grade 6",
	"Grade 7", "Grade 8", "Grade 9",
	"Grade 10", "Grade 11", "Grade 12",
	"Under Graduate", "Post Graduate"
	],
	value="Grade 7",
	label="Grade"
	)
	subject = gr.Textbox(value="Math", label="Subject")

	with gr.Row():
	target_bloom = gr.Dropdown(
	choices=["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"],
	value="Analyze",
	label="Target Bloom’s"
	)
	target_dok = gr.Dropdown(
	choices=["DOK1", "DOK2", "DOK3", "DOK4", "DOK1-DOK2", "DOK2-DOK3", "DOK3-DOK4"],
	value="DOK2-DOK3",
	label="Target Depth of Knowledge"
	)
	attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")

	with gr.Accordion("⚙️ Generation Controls", open=False):
	temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
	max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")

	backend_tips = gr.Markdown(
	"Hosted API: uses Hugging Face Inference endpoints. Provide a token if needed.\n\n"
	"Local GPU: loads the model into the Space with `TransformersModel (device_map='auto')`. "
	"Ensure your Space has a GPU and enough VRAM for the selected model."
	)

	run_btn = gr.Button("Run Agent 🚀")

	final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
	transcript = gr.Textbox(label="Agent Transcript", lines=18)

	def _toggle_backend_fields(choice):
	return (
	gr.update(visible=(choice == "Hosted API")), # hf_token
	gr.update(visible=True), # model_id always visible
	gr.update(visible=(choice == "Hosted API")) # timeout slider
	)

	backend_choice.change(
	_toggle_backend_fields,
	inputs=[backend_choice],
	outputs=[hf_token, model_id, timeout]
	)

	run_btn.click(
	fn=run_pipeline,
	inputs=[
	backend_choice, hf_token, topic, grade, subject,
	target_bloom, target_dok, attempts, model_id,
	timeout, temperature, max_tokens
	],
	outputs=[final_json, transcript]
	)

	if __name__ == "__main__" or os.getenv("SYSTEM") == "spaces":
	demo.launch()