File size: 5,136 Bytes
44bb040 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
from llama_index.core import VectorStoreIndex, Document
from smolagents import tool
from all_datasets import *
from level_classifier_tool_2 import (
classify_levels_phrases,
HFEmbeddingBackend,
build_phrase_index
)
from phrases import BLOOMS_PHRASES, DOK_PHRASES
D = {
"GSM8k": GSM8k['question'],
"Olympiad": Olympiad_math['question'],
"Olympiad2": Olympiad_math2['question'],
"DeepMind Math": clean_math['question'],
"MMMLU": MMMLU['question'],
"MMMU": MMMU['question'],
"ScienceQA": ScienceQA['question'],
"PubmedQA": PubmedQA['question']
}
all_questions = (
list(D["GSM8k"]) +
list(D["Olympiad"]) +
list(D["MMMLU"]) +
list(D["MMMU"]) +
list(D["DeepMind Math"]) +
list(D["Olympiad2"]) +
list(D["ScienceQA"]) +
list(D["PubmedQA"])
)
emb = HuggingFaceEmbeddings(
model_name="google/embeddinggemma-300m",
encode_kwargs={"normalize_embeddings": True},
)
texts = all_questions
index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
#Retriever tool
@tool
def QuestionRetrieverTool(subject: str, topic: str, grade: str) -> dict:
""" Retrieve up to 5 closely-related example Q&A pairs from the source datasets.
description = (
"Retrieve up to 5 closely-related example Q&A pairs from the source datasets."
)
Args:
subject: The subject area (e.g., "Math", "Science"). type: "string",
topic: The specific topic within the subject (e.g., "Algebra", "Biology"). type: "string",
grade: The grade level (e.g., "5th", "8th"). type: "string"
Returns:
A dictionary with:
closest questions found for: dict with subject, topic, grade
questions: List of up to 5 dicts with question text.
}"""
query = f"{topic} question for {grade} of the {subject}"
results = index.as_retriever(similarity_top_k=5).retrieve(query)
question_texts = [r.node.text for r in results]
return {"closest questions found for": {
"subject": subject,
"topic": topic,
"grade": grade,},
"questions": [{"text": question} for question in question_texts]
}
# Scoring tool
@tool
def classify_and_score(
question: str,
target_bloom: str,
target_dok: str,
agg: str = "max"
) -> dict:
"""Classify a question against Bloom’s and DOK targets and return guidance.
Args:
question: The question text to evaluate for cognitive demand.
target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
or plus form (e.g., "Apply+") meaning that level or higher.
target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
or span (e.g., "DOK2-DOK3").
agg: Aggregation method over phrase similarities within a level
(choices: "mean", "max", "topk_mean").
Returns:
A dictionary with:
ok: True if both Bloom’s and DOK match the targets.
measured: Dict with best levels and per-level scores for Bloom’s and DOK.
feedback: Brief guidance describing how to adjust the question to hit targets.
"""
res = classify_levels_phrases(
question,
BLOOMS_PHRASES,
DOK_PHRASES,
backend=_backend,
prebuilt_bloom_index=_BLOOM_INDEX,
prebuilt_dok_index=_DOK_INDEX,
agg=agg,
return_phrase_matches=True
)
def _parse_target_bloom(t: str):
order = ["Remember","Understand","Apply","Analyze","Evaluate","Create"]
if t.endswith("+"):
base = t[:-1]
return set(order[order.index(base):])
return {t}
def _parse_target_dok(t: str):
order = ["DOK1","DOK2","DOK3","DOK4"]
if "-" in t:
lo, hi = t.split("-")
return set(order[order.index(lo):order.index(hi)+1])
return {t}
bloom_target_set = _parse_target_bloom(target_bloom)
dok_target_set = _parse_target_dok(target_dok)
bloom_best = res["blooms"]["best_level"]
dok_best = res["dok"]["best_level"]
bloom_ok = bloom_best in bloom_target_set
dok_ok = dok_best in dok_target_set
feedback_parts = []
if not bloom_ok:
feedback_parts.append(
f"Shift Bloom’s from {bloom_best} toward {sorted(bloom_target_set)}. "
f"Top cues: {res['blooms']['top_phrases'].get(bloom_best, [])[:3]}"
)
if not dok_ok:
feedback_parts.append(
f"Shift DOK from {dok_best} toward {sorted(dok_target_set)}. "
f"Top cues: {res['dok']['top_phrases'].get(dok_best, [])[:3]}"
)
return {
"ok": bool(bloom_ok and dok_ok),
"measured": {
"bloom_best": bloom_best,
"bloom_scores": res["blooms"]["scores"],
"dok_best": dok_best,
"dok_scores": res["dok"]["scores"],
},
"feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
} |