Spaces:
Running
Running
new
Browse files- all_results.json +40 -40
- app.py +5 -5
all_results.json
CHANGED
|
@@ -32277,70 +32277,70 @@
|
|
| 32277 |
},
|
| 32278 |
"flores_ind2eng": {
|
| 32279 |
"prompt_1": {
|
| 32280 |
-
"bleu_score": 0.
|
| 32281 |
},
|
| 32282 |
"prompt_2": {
|
| 32283 |
-
"bleu_score": 0.
|
| 32284 |
},
|
| 32285 |
"prompt_3": {
|
| 32286 |
-
"bleu_score": 0.
|
| 32287 |
},
|
| 32288 |
"prompt_4": {
|
| 32289 |
-
"bleu_score": 0.
|
| 32290 |
},
|
| 32291 |
"prompt_5": {
|
| 32292 |
-
"bleu_score": 0.
|
| 32293 |
}
|
| 32294 |
},
|
| 32295 |
"flores_vie2eng": {
|
| 32296 |
"prompt_1": {
|
| 32297 |
-
"bleu_score": 0.
|
| 32298 |
},
|
| 32299 |
"prompt_2": {
|
| 32300 |
-
"bleu_score": 0.
|
| 32301 |
},
|
| 32302 |
"prompt_3": {
|
| 32303 |
-
"bleu_score": 0.
|
| 32304 |
},
|
| 32305 |
"prompt_4": {
|
| 32306 |
-
"bleu_score": 0.
|
| 32307 |
},
|
| 32308 |
"prompt_5": {
|
| 32309 |
-
"bleu_score": 0.
|
| 32310 |
}
|
| 32311 |
},
|
| 32312 |
"flores_zho2eng": {
|
| 32313 |
"prompt_1": {
|
| 32314 |
-
"bleu_score": 0.
|
| 32315 |
},
|
| 32316 |
"prompt_2": {
|
| 32317 |
-
"bleu_score": 0.
|
| 32318 |
},
|
| 32319 |
"prompt_3": {
|
| 32320 |
-
"bleu_score": 0.
|
| 32321 |
},
|
| 32322 |
"prompt_4": {
|
| 32323 |
-
"bleu_score": 0.
|
| 32324 |
},
|
| 32325 |
"prompt_5": {
|
| 32326 |
-
"bleu_score": 0.
|
| 32327 |
}
|
| 32328 |
},
|
| 32329 |
"flores_zsm2eng": {
|
| 32330 |
"prompt_1": {
|
| 32331 |
-
"bleu_score": 0.
|
| 32332 |
},
|
| 32333 |
"prompt_2": {
|
| 32334 |
-
"bleu_score": 0.
|
| 32335 |
},
|
| 32336 |
"prompt_3": {
|
| 32337 |
-
"bleu_score": 0.
|
| 32338 |
},
|
| 32339 |
"prompt_4": {
|
| 32340 |
-
"bleu_score": 0.
|
| 32341 |
},
|
| 32342 |
"prompt_5": {
|
| 32343 |
-
"bleu_score": 0.
|
| 32344 |
}
|
| 32345 |
},
|
| 32346 |
"mmlu": {
|
|
@@ -67059,70 +67059,70 @@
|
|
| 67059 |
},
|
| 67060 |
"flores_ind2eng": {
|
| 67061 |
"prompt_1": {
|
| 67062 |
-
"bleu_score": 0.
|
| 67063 |
},
|
| 67064 |
"prompt_2": {
|
| 67065 |
-
"bleu_score": 0.
|
| 67066 |
},
|
| 67067 |
"prompt_3": {
|
| 67068 |
-
"bleu_score": 0.
|
| 67069 |
},
|
| 67070 |
"prompt_4": {
|
| 67071 |
-
"bleu_score": 0.
|
| 67072 |
},
|
| 67073 |
"prompt_5": {
|
| 67074 |
-
"bleu_score": 0.
|
| 67075 |
}
|
| 67076 |
},
|
| 67077 |
"flores_vie2eng": {
|
| 67078 |
"prompt_1": {
|
| 67079 |
-
"bleu_score": 0.
|
| 67080 |
},
|
| 67081 |
"prompt_2": {
|
| 67082 |
-
"bleu_score": 0.
|
| 67083 |
},
|
| 67084 |
"prompt_3": {
|
| 67085 |
-
"bleu_score": 0.
|
| 67086 |
},
|
| 67087 |
"prompt_4": {
|
| 67088 |
-
"bleu_score": 0.
|
| 67089 |
},
|
| 67090 |
"prompt_5": {
|
| 67091 |
-
"bleu_score": 0.
|
| 67092 |
}
|
| 67093 |
},
|
| 67094 |
"flores_zho2eng": {
|
| 67095 |
"prompt_1": {
|
| 67096 |
-
"bleu_score": 0.
|
| 67097 |
},
|
| 67098 |
"prompt_2": {
|
| 67099 |
-
"bleu_score": 0.
|
| 67100 |
},
|
| 67101 |
"prompt_3": {
|
| 67102 |
-
"bleu_score": 0.
|
| 67103 |
},
|
| 67104 |
"prompt_4": {
|
| 67105 |
-
"bleu_score": 0.
|
| 67106 |
},
|
| 67107 |
"prompt_5": {
|
| 67108 |
-
"bleu_score": 0.
|
| 67109 |
}
|
| 67110 |
},
|
| 67111 |
"flores_zsm2eng": {
|
| 67112 |
"prompt_1": {
|
| 67113 |
-
"bleu_score": 0.
|
| 67114 |
},
|
| 67115 |
"prompt_2": {
|
| 67116 |
-
"bleu_score": 0.
|
| 67117 |
},
|
| 67118 |
"prompt_3": {
|
| 67119 |
-
"bleu_score": 0.
|
| 67120 |
},
|
| 67121 |
"prompt_4": {
|
| 67122 |
-
"bleu_score": 0.
|
| 67123 |
},
|
| 67124 |
"prompt_5": {
|
| 67125 |
-
"bleu_score": 0.
|
| 67126 |
}
|
| 67127 |
},
|
| 67128 |
"mmlu": {
|
|
|
|
| 32277 |
},
|
| 32278 |
"flores_ind2eng": {
|
| 32279 |
"prompt_1": {
|
| 32280 |
+
"bleu_score": 0.3087387231733152
|
| 32281 |
},
|
| 32282 |
"prompt_2": {
|
| 32283 |
+
"bleu_score": 0.3094226547039261
|
| 32284 |
},
|
| 32285 |
"prompt_3": {
|
| 32286 |
+
"bleu_score": 0.3061124934874166
|
| 32287 |
},
|
| 32288 |
"prompt_4": {
|
| 32289 |
+
"bleu_score": 0.30135340693301044
|
| 32290 |
},
|
| 32291 |
"prompt_5": {
|
| 32292 |
+
"bleu_score": 0.30791510943643785
|
| 32293 |
}
|
| 32294 |
},
|
| 32295 |
"flores_vie2eng": {
|
| 32296 |
"prompt_1": {
|
| 32297 |
+
"bleu_score": 0.24226557595813872
|
| 32298 |
},
|
| 32299 |
"prompt_2": {
|
| 32300 |
+
"bleu_score": 0.24374681205197152
|
| 32301 |
},
|
| 32302 |
"prompt_3": {
|
| 32303 |
+
"bleu_score": 0.23865746431889961
|
| 32304 |
},
|
| 32305 |
"prompt_4": {
|
| 32306 |
+
"bleu_score": 0.24343786296993222
|
| 32307 |
},
|
| 32308 |
"prompt_5": {
|
| 32309 |
+
"bleu_score": 0.2496790676198905
|
| 32310 |
}
|
| 32311 |
},
|
| 32312 |
"flores_zho2eng": {
|
| 32313 |
"prompt_1": {
|
| 32314 |
+
"bleu_score": 0.18741482916807534
|
| 32315 |
},
|
| 32316 |
"prompt_2": {
|
| 32317 |
+
"bleu_score": 0.18861522471729936
|
| 32318 |
},
|
| 32319 |
"prompt_3": {
|
| 32320 |
+
"bleu_score": 0.1828941675772202
|
| 32321 |
},
|
| 32322 |
"prompt_4": {
|
| 32323 |
+
"bleu_score": 0.18500544495397628
|
| 32324 |
},
|
| 32325 |
"prompt_5": {
|
| 32326 |
+
"bleu_score": 0.19088057936700595
|
| 32327 |
}
|
| 32328 |
},
|
| 32329 |
"flores_zsm2eng": {
|
| 32330 |
"prompt_1": {
|
| 32331 |
+
"bleu_score": 0.31040973391193794
|
| 32332 |
},
|
| 32333 |
"prompt_2": {
|
| 32334 |
+
"bleu_score": 0.31410450445911836
|
| 32335 |
},
|
| 32336 |
"prompt_3": {
|
| 32337 |
+
"bleu_score": 0.30742063457580054
|
| 32338 |
},
|
| 32339 |
"prompt_4": {
|
| 32340 |
+
"bleu_score": 0.2954984182513215
|
| 32341 |
},
|
| 32342 |
"prompt_5": {
|
| 32343 |
+
"bleu_score": 0.3059634141807576
|
| 32344 |
}
|
| 32345 |
},
|
| 32346 |
"mmlu": {
|
|
|
|
| 67059 |
},
|
| 67060 |
"flores_ind2eng": {
|
| 67061 |
"prompt_1": {
|
| 67062 |
+
"bleu_score": 0.011674358088733964
|
| 67063 |
},
|
| 67064 |
"prompt_2": {
|
| 67065 |
+
"bleu_score": 0.34299290022800966
|
| 67066 |
},
|
| 67067 |
"prompt_3": {
|
| 67068 |
+
"bleu_score": 0.34235818894094877
|
| 67069 |
},
|
| 67070 |
"prompt_4": {
|
| 67071 |
+
"bleu_score": 0.344471697570177
|
| 67072 |
},
|
| 67073 |
"prompt_5": {
|
| 67074 |
+
"bleu_score": 0.34330146854458155
|
| 67075 |
}
|
| 67076 |
},
|
| 67077 |
"flores_vie2eng": {
|
| 67078 |
"prompt_1": {
|
| 67079 |
+
"bleu_score": 0.004016316923669523
|
| 67080 |
},
|
| 67081 |
"prompt_2": {
|
| 67082 |
+
"bleu_score": 0.28858215451103547
|
| 67083 |
},
|
| 67084 |
"prompt_3": {
|
| 67085 |
+
"bleu_score": 0.2874460615046707
|
| 67086 |
},
|
| 67087 |
"prompt_4": {
|
| 67088 |
+
"bleu_score": 0.2895463893365964
|
| 67089 |
},
|
| 67090 |
"prompt_5": {
|
| 67091 |
+
"bleu_score": 0.28765593996471855
|
| 67092 |
}
|
| 67093 |
},
|
| 67094 |
"flores_zho2eng": {
|
| 67095 |
"prompt_1": {
|
| 67096 |
+
"bleu_score": 0.004745747326013472
|
| 67097 |
},
|
| 67098 |
"prompt_2": {
|
| 67099 |
+
"bleu_score": 0.21742919083139323
|
| 67100 |
},
|
| 67101 |
"prompt_3": {
|
| 67102 |
+
"bleu_score": 0.21718999888377416
|
| 67103 |
},
|
| 67104 |
"prompt_4": {
|
| 67105 |
+
"bleu_score": 0.2171201069019555
|
| 67106 |
},
|
| 67107 |
"prompt_5": {
|
| 67108 |
+
"bleu_score": 0.21609843798223957
|
| 67109 |
}
|
| 67110 |
},
|
| 67111 |
"flores_zsm2eng": {
|
| 67112 |
"prompt_1": {
|
| 67113 |
+
"bleu_score": 0.010737551256522686
|
| 67114 |
},
|
| 67115 |
"prompt_2": {
|
| 67116 |
+
"bleu_score": 0.35662624808916016
|
| 67117 |
},
|
| 67118 |
"prompt_3": {
|
| 67119 |
+
"bleu_score": 0.35860534258636234
|
| 67120 |
},
|
| 67121 |
"prompt_4": {
|
| 67122 |
+
"bleu_score": 0.35739510518617695
|
| 67123 |
},
|
| 67124 |
"prompt_5": {
|
| 67125 |
+
"bleu_score": 0.3485870508300006
|
| 67126 |
}
|
| 67127 |
},
|
| 67128 |
"mmlu": {
|
app.py
CHANGED
|
@@ -2297,11 +2297,11 @@ with block:
|
|
| 2297 |
""")
|
| 2298 |
|
| 2299 |
|
| 2300 |
-
with gr.TabItem("Reasoning"):
|
| 2301 |
|
| 2302 |
|
| 2303 |
# dataset 12:
|
| 2304 |
-
with gr.TabItem("MMLU"):
|
| 2305 |
with gr.TabItem("Zero Shot"):
|
| 2306 |
with gr.TabItem("Overall"):
|
| 2307 |
with gr.Row():
|
|
@@ -2355,7 +2355,7 @@ with block:
|
|
| 2355 |
|
| 2356 |
|
| 2357 |
# dataset 14:
|
| 2358 |
-
with gr.TabItem("C_EVAL"):
|
| 2359 |
with gr.TabItem("Zero Shot"):
|
| 2360 |
with gr.TabItem("Overall"):
|
| 2361 |
with gr.Row():
|
|
@@ -2408,7 +2408,7 @@ with block:
|
|
| 2408 |
|
| 2409 |
|
| 2410 |
# dataset 16:
|
| 2411 |
-
with gr.TabItem("CMMLU"):
|
| 2412 |
with gr.TabItem("Zero Shot"):
|
| 2413 |
with gr.TabItem("Overall"):
|
| 2414 |
with gr.Row():
|
|
@@ -2622,7 +2622,7 @@ with block:
|
|
| 2622 |
with gr.TabItem("Emotion"):
|
| 2623 |
|
| 2624 |
# dataset 18:
|
| 2625 |
-
with gr.TabItem("
|
| 2626 |
with gr.TabItem("Zero Shot"):
|
| 2627 |
with gr.TabItem("Overall"):
|
| 2628 |
with gr.Row():
|
|
|
|
| 2297 |
""")
|
| 2298 |
|
| 2299 |
|
| 2300 |
+
with gr.TabItem("General Reasoning"):
|
| 2301 |
|
| 2302 |
|
| 2303 |
# dataset 12:
|
| 2304 |
+
with gr.TabItem("MMLU Subset"):
|
| 2305 |
with gr.TabItem("Zero Shot"):
|
| 2306 |
with gr.TabItem("Overall"):
|
| 2307 |
with gr.Row():
|
|
|
|
| 2355 |
|
| 2356 |
|
| 2357 |
# dataset 14:
|
| 2358 |
+
with gr.TabItem("C_EVAL Subset"):
|
| 2359 |
with gr.TabItem("Zero Shot"):
|
| 2360 |
with gr.TabItem("Overall"):
|
| 2361 |
with gr.Row():
|
|
|
|
| 2408 |
|
| 2409 |
|
| 2410 |
# dataset 16:
|
| 2411 |
+
with gr.TabItem("CMMLU Subset"):
|
| 2412 |
with gr.TabItem("Zero Shot"):
|
| 2413 |
with gr.TabItem("Overall"):
|
| 2414 |
with gr.Row():
|
|
|
|
| 2622 |
with gr.TabItem("Emotion"):
|
| 2623 |
|
| 2624 |
# dataset 18:
|
| 2625 |
+
with gr.TabItem("Indonesian Emotion Classification"):
|
| 2626 |
with gr.TabItem("Zero Shot"):
|
| 2627 |
with gr.TabItem("Overall"):
|
| 2628 |
with gr.Row():
|