Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- results/cross_lingual/zero_shot/cross_logiqa_no_prompt.csv +3 -0
- results/cross_lingual/zero_shot/cross_mmlu_no_prompt.csv +3 -0
- results/cross_lingual/zero_shot/cross_xquad_no_prompt.csv +3 -0
- results/cultural_reasoning/zero_shot/sg_eval_v2_mcq_no_prompt.csv +3 -0
- results/general_reasoning/zero_shot/indommlu_no_prompt.csv +5 -0
- results/general_reasoning/zero_shot/mmlu_no_prompt.csv +4 -0
results/cross_lingual/zero_shot/cross_logiqa_no_prompt.csv
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
| 2 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.4375,0.2728896103896104,0.3361231717991198,0.4318181818181818,0.4772727272727273,0.4943181818181818,0.4034090909090909,0.4318181818181818,0.42613636363636365,0.3977272727272727
|
|
|
|
| 3 |
gemma2-9b-cpt-sea-lionv3-instruct,0.48214285714285715,0.3753246753246754,0.4220803807589114,0.5454545454545454,0.5227272727272727,0.4943181818181818,0.4431818181818182,0.45454545454545453,0.4318181818181818,0.48295454545454547
|
|
|
|
|
|
|
|
|
| 1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
| 2 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.4375,0.2728896103896104,0.3361231717991198,0.4318181818181818,0.4772727272727273,0.4943181818181818,0.4034090909090909,0.4318181818181818,0.42613636363636365,0.3977272727272727
|
| 3 |
+
Meta-Llama-3-8B-Instruct,0.41558441558441567,0.24577922077922076,0.3088830658319437,0.45454545454545453,0.4772727272727273,0.42045454545454547,0.3522727272727273,0.3977272727272727,0.42613636363636365,0.3806818181818182
|
| 4 |
gemma2-9b-cpt-sea-lionv3-instruct,0.48214285714285715,0.3753246753246754,0.4220803807589114,0.5454545454545454,0.5227272727272727,0.4943181818181818,0.4431818181818182,0.45454545454545453,0.4318181818181818,0.48295454545454547
|
| 5 |
+
cross_openhermes_llama3_8b_12288_inst,0.3896103896103896,0.23279220779220777,0.2914456435114937,0.4659090909090909,0.3806818181818182,0.4147727272727273,0.30113636363636365,0.3693181818181818,0.42613636363636365,0.3693181818181818
|
| 6 |
+
GPT4o_0513,0.575487012987013,0.4172077922077923,0.48372906728622567,0.6534090909090909,0.6079545454545454,0.6022727272727273,0.4659090909090909,0.5511363636363636,0.5965909090909091,0.5511363636363636
|
results/cross_lingual/zero_shot/cross_mmlu_no_prompt.csv
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
| 2 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.6638095238095237,0.5363809523809523,0.5933304614797237,0.78,0.62,0.6933333333333334,0.64,0.66,0.6466666666666666,0.6066666666666667
|
|
|
|
| 3 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7809523809523808,0.7506666666666667,0.7655100940510849,0.8466666666666667,0.7866666666666666,0.7733333333333333,0.78,0.7933333333333333,0.7333333333333333,0.7533333333333333
|
|
|
|
|
|
|
|
|
| 1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
| 2 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.6638095238095237,0.5363809523809523,0.5933304614797237,0.78,0.62,0.6933333333333334,0.64,0.66,0.6466666666666666,0.6066666666666667
|
| 3 |
+
Meta-Llama-3-8B-Instruct,0.6428571428571429,0.49542857142857133,0.5595955249078094,0.7666666666666667,0.6533333333333333,0.7,0.6466666666666666,0.5733333333333334,0.5733333333333334,0.5866666666666667
|
| 4 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7809523809523808,0.7506666666666667,0.7655100940510849,0.8466666666666667,0.7866666666666666,0.7733333333333333,0.78,0.7933333333333333,0.7333333333333333,0.7533333333333333
|
| 5 |
+
cross_openhermes_llama3_8b_12288_inst,0.6066666666666667,0.4874285714285715,0.54055013922636,0.7266666666666667,0.6,0.6,0.5866666666666667,0.58,0.5733333333333334,0.58
|
| 6 |
+
GPT4o_0513,0.8819047619047619,0.8609523809523807,0.8713026281050943,0.9266666666666666,0.8866666666666667,0.9066666666666666,0.7933333333333333,0.88,0.9066666666666666,0.8733333333333333
|
results/cross_lingual/zero_shot/cross_xquad_no_prompt.csv
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
| 2 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.9191176470588235,0.8352941176470587,0.87520339228777,0.9394957983193277,0.8932773109243698,0.9285714285714286,0.915126050420168,,,
|
|
|
|
| 3 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9315126050420168,0.8716386554621849,0.9005815677746684,0.9453781512605042,0.9142857142857143,0.9369747899159664,0.9294117647058824,,,
|
|
|
|
|
|
|
|
|
| 1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
| 2 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.9191176470588235,0.8352941176470587,0.87520339228777,0.9394957983193277,0.8932773109243698,0.9285714285714286,0.915126050420168,,,
|
| 3 |
+
Meta-Llama-3-8B-Instruct,0.9060924369747899,0.8224789915966386,0.8622634639161603,0.9319327731092437,0.8932773109243698,0.9134453781512605,0.8857142857142857,,,
|
| 4 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9315126050420168,0.8716386554621849,0.9005815677746684,0.9453781512605042,0.9142857142857143,0.9369747899159664,0.9294117647058824,,,
|
| 5 |
+
cross_openhermes_llama3_8b_12288_inst,0.9054621848739496,0.8298319327731092,0.8659989418997561,0.9285714285714286,0.892436974789916,0.9134453781512605,0.8873949579831932,,,
|
| 6 |
+
GPT4o_0513,0.8941176470588236,0.8014705882352942,0.8452629967360276,0.9302521008403362,0.8857142857142857,0.9168067226890756,0.8436974789915966,,,
|
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq_no_prompt.csv
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
Model,Accuracy
|
| 2 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.8
|
|
|
|
| 3 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7818181818181819
|
|
|
|
|
|
|
|
|
| 1 |
Model,Accuracy
|
| 2 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.8
|
| 3 |
+
Meta-Llama-3-8B-Instruct,0.8054545454545454
|
| 4 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7818181818181819
|
| 5 |
+
cross_openhermes_llama3_8b_12288_inst,0.7945454545454546
|
| 6 |
+
GPT4o_0513,0.9072727272727272
|
results/general_reasoning/zero_shot/indommlu_no_prompt.csv
CHANGED
|
@@ -1 +1,6 @@
|
|
| 1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
Model,Accuracy
|
| 2 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.5561786501101542
|
| 3 |
+
Meta-Llama-3-8B-Instruct,0.5207957807597303
|
| 4 |
+
gemma2-9b-cpt-sea-lionv3-instruct,0.6258762267174044
|
| 5 |
+
cross_openhermes_llama3_8b_12288_inst,0.5279391147606649
|
| 6 |
+
GPT4o_0513,0.7599305694639161
|
results/general_reasoning/zero_shot/mmlu_no_prompt.csv
CHANGED
|
@@ -1 +1,5 @@
|
|
| 1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
Model,Accuracy
|
| 2 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.6551304969610297
|
| 3 |
+
Meta-Llama-3-8B-Instruct,0.6618519842688595
|
| 4 |
+
cross_openhermes_llama3_8b_12288_inst,0.6010010725777619
|
| 5 |
+
GPT4o_0513,0.871576689309975
|