from llama_index.core import VectorStoreIndex, Document from smolagents import tool from all_datasets import * from level_classifier_tool_2 import ( classify_levels_phrases, HFEmbeddingBackend, build_phrase_index ) from phrases import BLOOMS_PHRASES, DOK_PHRASES D = { "GSM8k": GSM8k['question'], "Olympiad": Olympiad_math['question'], "Olympiad2": Olympiad_math2['question'], "DeepMind Math": clean_math['question'], "MMMLU": MMMLU['question'], "MMMU": MMMU['question'], "ScienceQA": ScienceQA['question'], "PubmedQA": PubmedQA['question'] } all_questions = ( list(D["GSM8k"]) + list(D["Olympiad"]) + list(D["MMMLU"]) + list(D["MMMU"]) + list(D["DeepMind Math"]) + list(D["Olympiad2"]) + list(D["ScienceQA"]) + list(D["PubmedQA"]) ) emb = HuggingFaceEmbeddings( model_name="google/embeddinggemma-300m", encode_kwargs={"normalize_embeddings": True}, ) texts = all_questions index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb) #Retriever tool @tool def QuestionRetrieverTool(subject: str, topic: str, grade: str) -> dict: """ Retrieve up to 5 closely-related example Q&A pairs from the source datasets. description = ( "Retrieve up to 5 closely-related example Q&A pairs from the source datasets." ) Args: subject: The subject area (e.g., "Math", "Science"). type: "string", topic: The specific topic within the subject (e.g., "Algebra", "Biology"). type: "string", grade: The grade level (e.g., "5th", "8th"). type: "string" Returns: A dictionary with: closest questions found for: dict with subject, topic, grade questions: List of up to 5 dicts with question text. }""" query = f"{topic} question for {grade} of the {subject}" results = index.as_retriever(similarity_top_k=5).retrieve(query) question_texts = [r.node.text for r in results] return {"closest questions found for": { "subject": subject, "topic": topic, "grade": grade,}, "questions": [{"text": question} for question in question_texts] } # Scoring tool @tool def classify_and_score( question: str, target_bloom: str, target_dok: str, agg: str = "max" ) -> dict: """Classify a question against Bloom’s and DOK targets and return guidance. Args: question: The question text to evaluate for cognitive demand. target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze") or plus form (e.g., "Apply+") meaning that level or higher. target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3") or span (e.g., "DOK2-DOK3"). agg: Aggregation method over phrase similarities within a level (choices: "mean", "max", "topk_mean"). Returns: A dictionary with: ok: True if both Bloom’s and DOK match the targets. measured: Dict with best levels and per-level scores for Bloom’s and DOK. feedback: Brief guidance describing how to adjust the question to hit targets. """ res = classify_levels_phrases( question, BLOOMS_PHRASES, DOK_PHRASES, backend=_backend, prebuilt_bloom_index=_BLOOM_INDEX, prebuilt_dok_index=_DOK_INDEX, agg=agg, return_phrase_matches=True ) def _parse_target_bloom(t: str): order = ["Remember","Understand","Apply","Analyze","Evaluate","Create"] if t.endswith("+"): base = t[:-1] return set(order[order.index(base):]) return {t} def _parse_target_dok(t: str): order = ["DOK1","DOK2","DOK3","DOK4"] if "-" in t: lo, hi = t.split("-") return set(order[order.index(lo):order.index(hi)+1]) return {t} bloom_target_set = _parse_target_bloom(target_bloom) dok_target_set = _parse_target_dok(target_dok) bloom_best = res["blooms"]["best_level"] dok_best = res["dok"]["best_level"] bloom_ok = bloom_best in bloom_target_set dok_ok = dok_best in dok_target_set feedback_parts = [] if not bloom_ok: feedback_parts.append( f"Shift Bloom’s from {bloom_best} toward {sorted(bloom_target_set)}. " f"Top cues: {res['blooms']['top_phrases'].get(bloom_best, [])[:3]}" ) if not dok_ok: feedback_parts.append( f"Shift DOK from {dok_best} toward {sorted(dok_target_set)}. " f"Top cues: {res['dok']['top_phrases'].get(dok_best, [])[:3]}" ) return { "ok": bool(bloom_ok and dok_ok), "measured": { "bloom_best": bloom_best, "bloom_scores": res["blooms"]["scores"], "dok_best": dok_best, "dok_scores": res["dok"]["scores"], }, "feedback": " ".join(feedback_parts) if feedback_parts else "On target.", }