Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Sleeping

App Files Files Community

STEM-Question-Generator / app.py

bhardwaj08sarthak

Update app.py

71fa3f5 verified about 1 month ago

raw

history blame

7.6 kB

	# Create a self-contained Gradio app that uses the agent-driven loop (Option A)
	# It expects `level_classifier_tool.py` to be colocated (or installed on PYTHONPATH).
	import sys
	sys.path.append(r"C:\Users\Sarthak\OneDrive - UT Cloud\thesis\HF_Agent\src") # use raw string because of spaces
	import json
	import gradio as gr
	from huggingface_hub import InferenceClient
	from smolagents import CodeAgent, InferenceClientModel, tool
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from llama_index.core import VectorStoreIndex, Document
	from huggingface_hub import login
	from smolagents import tool
	from all_datasets import *
	from level_classifier_tool_2 import (
	classify_levels_phrases,
	HFEmbeddingBackend,
	build_phrase_index
	)
	from task_temp import TASK_TMPL, CLASSIFY_TMPL, GEN_TMPL, RAG_TMPL
	from all_tools import classify_and_score, QuestionRetrieverTool
	from phrases import BLOOMS_PHRASES, DOK_PHRASES
	# Prebuild embeddings once
	_backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
	_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
	_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
	D = {
	"GSM8k": GSM8k['question'],
	"Olympiad": Olympiad_math['question'],
	"Olympiad2": Olympiad_math2['question'],
	"DeepMind Math": clean_math['question'],
	"MMMLU": MMMLU['question'],
	"MMMU": MMMU['question'],
	"ScienceQA": ScienceQA['question'],
	"PubmedQA": PubmedQA['question']
	}
	all_questions = (
	list(D["GSM8k"]) +
	list(D["Olympiad"]) +
	list(D["MMMLU"]) +
	list(D["MMMU"]) +
	list(D["DeepMind Math"]) +
	list(D["Olympiad2"]) +
	list(D["ScienceQA"]) +
	list(D["PubmedQA"])
	)

	emb = HuggingFaceEmbeddings(
	model_name="google/embeddinggemma-300m",
	encode_kwargs={"normalize_embeddings": True},
	)
	texts = all_questions
	index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)

	# ------------------------ Scoring TOOL -----------------------------------

	emb = HuggingFaceEmbeddings(
	model_name="google/embeddinggemma-300m",
	encode_kwargs={"normalize_embeddings": True},
	)
	D = {
	"GSM8k": GSM8k['question'],
	"Olympiad": Olympiad_math['question'],
	"Olympiad2": Olympiad_math2['question'],
	"DeepMind Math": clean_math['question'],
	"MMMLU": MMMLU['question'],
	"MMMU": MMMU['question'],
	"ScienceQA": ScienceQA['question'],
	"PubmedQA": PubmedQA['question']
	}
	all_questions = (
	list(D["GSM8k"]) +
	list(D["Olympiad"]) +
	list(D["MMMLU"]) +
	list(D["MMMU"]) +
	list(D["DeepMind Math"]) +
	list(D["Olympiad2"]) +
	list(D["ScienceQA"]) +
	list(D["PubmedQA"])
	)
	texts = all_questions
	index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)

	# ------------------------ Retriever TOOL -----------------------------------


	# ------------------------ Agent setup with timeout ------------------------
	def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
	client = InferenceClient(
	model=model_id,
	provider=provider,
	timeout=timeout,
	token=hf_token if hf_token else None,
	)

	# Bind generation params by partially applying via model kwargs.
	# smolagents InferenceClientModel currently accepts client only; we pass runtime params in task text.
	model = InferenceClientModel(model_id=model_id,client=client)
	agent = CodeAgent(model=model, tools=[classify_and_score, QuestionRetrieverTool])
	agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} # attach for reference
	return agent


	# ------------------------ Agent task template -----------------------------

	# ------------------------ Gradio glue ------------------------------------
	def run_pipeline(
	hf_token,
	topic,
	grade,
	subject,
	target_bloom,
	target_dok,
	attempts,
	model_id,
	provider,
	timeout,
	temperature,
	max_tokens,
	task_type
	):
	# Build agent per run (or cache if you prefer)
	agent = make_agent(
	hf_token=hf_token.strip(),
	model_id=model_id,
	provider=provider,
	timeout=int(timeout),
	temperature=float(temperature),
	max_tokens=int(max_tokens),
	)

	task = task_type.format(
	grade=grade,
	topic=topic,
	subject=subject,
	target_bloom=target_bloom,
	target_dok=target_dok,
	attempts=int(attempts)
	)

	# The agent will internally call the tool
	try:
	result_text = agent.run(task, max_steps=int(attempts)*4)
	except Exception as e:
	result_text = f"ERROR: {e}"

	# Try to extract final JSON
	final_json = ""
	try:
	# find JSON object in result_text (simple heuristic)
	start = result_text.find("{")
	end = result_text.rfind("}")
	if start != -1 and end != -1 and end > start:
	candidate = result_text[start:end+1]
	final_json = json.dumps(json.loads(candidate), indent=2)
	except Exception:
	final_json = ""

	return final_json, result_text


	with gr.Blocks() as demo:
	gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
	gr.Markdown(
	"This app uses a CodeAgent that calls the scoring tool "
	"(`classify_and_score`) after each proposal, and revises until it hits the target."
	)

	with gr.Accordion("API Settings", open=False):
	hf_token = gr.Textbox(label="Hugging Face Token (required)", type="password")
	model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID")
	provider = gr.Textbox(value="novita", label="Provider")
	timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")

	with gr.Row():
	topic = gr.Textbox(value="Fractions", label="Topic")
	grade = gr.Dropdown(
	choices=["Grade 1","Grade 2","Grade 3","Grade4","Grade 5","Grade 6","Grade 7","Grade 8","Grade 9",
	"Grade 10","Grade 11","Grade 12","Under Graduate","Post Graduate"],
	value="Grade 7",
	label="Grade"
	)
	subject= gr.Textbox(value="Math", label="Subject")
	task_type = gr.Dropdown(
	choices=["TASK_TMPL", "CLASSIFY_TMPL", "GEN_TMPL", "RAG_TMPL"],
	label= "task type")

	with gr.Row():
	target_bloom = gr.Dropdown(
	choices=["Remember","Understand","Apply","Analyze","Evaluate","Create","Apply+","Analyze+","Evaluate+"],
	value="Analyze",
	label="Target Bloom’s"
	)
	target_dok = gr.Dropdown(
	choices=["DOK1","DOK2","DOK3","DOK4","DOK1-DOK2","DOK2-DOK3","DOK3-DOK4"],
	value="DOK2-DOK3",
	label="Target DOK"
	)
	attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")

	with gr.Accordion("Generation Controls", open=False):
	temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
	max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")

	run_btn = gr.Button("Run Agent")

	final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
	transcript = gr.Textbox(label="Agent Transcript", lines=18)

	run_btn.click(
	fn=run_pipeline,
	inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens,task_type],
	outputs=[final_json, transcript]
	)

	if __name__ == "__main__":
	demo.launch(share=True)