Spaces:
Running
Running
add gsm8k
Browse files
tlem.py
CHANGED
|
@@ -77,6 +77,14 @@ class ReasoningMetric(evaluate.Metric):
|
|
| 77 |
return results
|
| 78 |
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
class Suite(EvaluationSuite):
|
| 81 |
def run(
|
| 82 |
self,
|
|
@@ -110,6 +118,8 @@ class Suite(EvaluationSuite):
|
|
| 110 |
suite = MMLU.suite(chat=chat)
|
| 111 |
case _ if name.startswith("cmmlu"):
|
| 112 |
suite = CMMLU.suite(chat=chat)
|
|
|
|
|
|
|
| 113 |
match name:
|
| 114 |
case _ if "test" in name:
|
| 115 |
suite = suite["Test"]
|
|
@@ -120,20 +130,7 @@ class Suite(EvaluationSuite):
|
|
| 120 |
super().__init__(name)
|
| 121 |
self.cached_result = {}
|
| 122 |
|
| 123 |
-
match self.name:
|
| 124 |
-
case "cmmlu":
|
| 125 |
-
pass
|
| 126 |
-
|
| 127 |
self.suite = [
|
| 128 |
-
Task(
|
| 129 |
-
dataset_name=("gsm8k", "main"),
|
| 130 |
-
metric_name=("sustech/tlem", "gsm8k"),
|
| 131 |
-
input_column="question",
|
| 132 |
-
label_column="answer",
|
| 133 |
-
)
|
| 134 |
# TASK_REGISTRY["gsm8k"],
|
| 135 |
# TASK_REGISTRY["competition_math"],
|
| 136 |
]
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
# %%
|
|
|
|
| 77 |
return results
|
| 78 |
|
| 79 |
|
| 80 |
+
gsm8k = Task(
|
| 81 |
+
dataset_name=("gsm8k", "main"),
|
| 82 |
+
metric_name=("sustech/tlem", "gsm8k"),
|
| 83 |
+
input_column="question",
|
| 84 |
+
label_column="answer",
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
class Suite(EvaluationSuite):
|
| 89 |
def run(
|
| 90 |
self,
|
|
|
|
| 118 |
suite = MMLU.suite(chat=chat)
|
| 119 |
case _ if name.startswith("cmmlu"):
|
| 120 |
suite = CMMLU.suite(chat=chat)
|
| 121 |
+
case "gsm8k":
|
| 122 |
+
suite = [gsm8k]
|
| 123 |
match name:
|
| 124 |
case _ if "test" in name:
|
| 125 |
suite = suite["Test"]
|
|
|
|
| 130 |
super().__init__(name)
|
| 131 |
self.cached_result = {}
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
self.suite = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
# TASK_REGISTRY["gsm8k"],
|
| 135 |
# TASK_REGISTRY["competition_math"],
|
| 136 |
]
|
|
|
|
|
|
|
|
|