File size: 5,136 Bytes
44bb040
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from llama_index.core import VectorStoreIndex, Document
from smolagents import tool
from all_datasets import *
from level_classifier_tool_2 import (
    classify_levels_phrases,
    HFEmbeddingBackend,
    build_phrase_index
)
from phrases import BLOOMS_PHRASES, DOK_PHRASES
D = {
    "GSM8k": GSM8k['question'],
    "Olympiad": Olympiad_math['question'],
    "Olympiad2": Olympiad_math2['question'],
    "DeepMind Math": clean_math['question'],
    "MMMLU": MMMLU['question'],
    "MMMU": MMMU['question'],
    "ScienceQA": ScienceQA['question'],
    "PubmedQA": PubmedQA['question']
}

all_questions = (
    list(D["GSM8k"]) +
    list(D["Olympiad"]) +
    list(D["MMMLU"]) +
    list(D["MMMU"]) +
    list(D["DeepMind Math"]) +
    list(D["Olympiad2"]) +
    list(D["ScienceQA"]) +
    list(D["PubmedQA"])
)

emb = HuggingFaceEmbeddings(
    model_name="google/embeddinggemma-300m",
    encode_kwargs={"normalize_embeddings": True},
)

texts = all_questions
index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
#Retriever tool
@tool
def QuestionRetrieverTool(subject: str, topic: str, grade: str) -> dict:
    """ Retrieve up to 5 closely-related example Q&A pairs from the source datasets.

    description = (

        "Retrieve up to 5 closely-related example Q&A pairs from the source datasets."

    )

    Args:

        subject: The subject area (e.g., "Math", "Science"). type: "string",

        topic: The specific topic within the subject (e.g., "Algebra", "Biology"). type: "string",

        grade: The grade level (e.g., "5th", "8th"). type: "string"

    Returns:

        A dictionary with:

            closest questions found for: dict with subject, topic, grade

            questions: List of up to 5 dicts with question text.

    }"""
    query = f"{topic} question for {grade} of the {subject}"
    results = index.as_retriever(similarity_top_k=5).retrieve(query)
    question_texts = [r.node.text for r in results]
    return {"closest questions found for": {
        "subject": subject,
        "topic": topic,
        "grade": grade,},
        "questions": [{"text": question} for question in question_texts]
    }
# Scoring tool
@tool
def classify_and_score(

    question: str,

    target_bloom: str,

    target_dok: str,

    agg: str = "max"

) -> dict:
    """Classify a question against Bloom’s and DOK targets and return guidance.

    

    Args:

        question: The question text to evaluate for cognitive demand.

        target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")

            or plus form (e.g., "Apply+") meaning that level or higher.

        target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")

            or span (e.g., "DOK2-DOK3").

        agg: Aggregation method over phrase similarities within a level

            (choices: "mean", "max", "topk_mean").

    

    Returns:

        A dictionary with:

            ok: True if both Bloom’s and DOK match the targets.

            measured: Dict with best levels and per-level scores for Bloom’s and DOK.

            feedback: Brief guidance describing how to adjust the question to hit targets.

    """
    res = classify_levels_phrases(
        question,
        BLOOMS_PHRASES,
        DOK_PHRASES,
        backend=_backend,
        prebuilt_bloom_index=_BLOOM_INDEX,
        prebuilt_dok_index=_DOK_INDEX,
        agg=agg,
        return_phrase_matches=True
    )

    def _parse_target_bloom(t: str):
        order = ["Remember","Understand","Apply","Analyze","Evaluate","Create"]
        if t.endswith("+"):
            base = t[:-1]
            return set(order[order.index(base):])
        return {t}

    def _parse_target_dok(t: str):
        order = ["DOK1","DOK2","DOK3","DOK4"]
        if "-" in t:
            lo, hi = t.split("-")
            return set(order[order.index(lo):order.index(hi)+1])
        return {t}

    bloom_target_set = _parse_target_bloom(target_bloom)
    dok_target_set = _parse_target_dok(target_dok)

    bloom_best = res["blooms"]["best_level"]
    dok_best = res["dok"]["best_level"]

    bloom_ok = bloom_best in bloom_target_set
    dok_ok = dok_best in dok_target_set

    feedback_parts = []
    if not bloom_ok:
        feedback_parts.append(
            f"Shift Bloom’s from {bloom_best} toward {sorted(bloom_target_set)}. "
            f"Top cues: {res['blooms']['top_phrases'].get(bloom_best, [])[:3]}"
        )
    if not dok_ok:
        feedback_parts.append(
            f"Shift DOK from {dok_best} toward {sorted(dok_target_set)}. "
            f"Top cues: {res['dok']['top_phrases'].get(dok_best, [])[:3]}"
        )

    return {
        "ok": bool(bloom_ok and dok_ok),
        "measured": {
            "bloom_best": bloom_best,
            "bloom_scores": res["blooms"]["scores"],
            "dok_best": dok_best,
            "dok_scores": res["dok"]["scores"],
        },
        "feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
    }