Spaces:
Sleeping
Sleeping
new
Browse files- all_results.json +8 -531
all_results.json
CHANGED
|
@@ -32277,7 +32277,7 @@
|
|
| 32277 |
},
|
| 32278 |
"flores_ind2eng": {
|
| 32279 |
"prompt_1": {
|
| 32280 |
-
"bleu_score": 0.
|
| 32281 |
},
|
| 32282 |
"prompt_2": {
|
| 32283 |
"bleu_score": 0.08178489772334997
|
|
@@ -32294,24 +32294,24 @@
|
|
| 32294 |
},
|
| 32295 |
"flores_vie2eng": {
|
| 32296 |
"prompt_1": {
|
| 32297 |
-
"bleu_score": 0.
|
| 32298 |
},
|
| 32299 |
"prompt_2": {
|
| 32300 |
-
"bleu_score": 0.
|
| 32301 |
},
|
| 32302 |
"prompt_3": {
|
| 32303 |
-
"bleu_score": 0.
|
| 32304 |
},
|
| 32305 |
"prompt_4": {
|
| 32306 |
-
"bleu_score": 0.
|
| 32307 |
},
|
| 32308 |
"prompt_5": {
|
| 32309 |
-
"bleu_score": 0.
|
| 32310 |
}
|
| 32311 |
},
|
| 32312 |
"flores_zho2eng": {
|
| 32313 |
"prompt_1": {
|
| 32314 |
-
"bleu_score": 0.
|
| 32315 |
},
|
| 32316 |
"prompt_2": {
|
| 32317 |
"bleu_score": 0.05134705353375384
|
|
@@ -32328,7 +32328,7 @@
|
|
| 32328 |
},
|
| 32329 |
"flores_zsm2eng": {
|
| 32330 |
"prompt_1": {
|
| 32331 |
-
"bleu_score": 0.
|
| 32332 |
},
|
| 32333 |
"prompt_2": {
|
| 32334 |
"bleu_score": 0.08240154342677156
|
|
@@ -93866,528 +93866,5 @@
|
|
| 93866 |
"prompt_1": -1
|
| 93867 |
}
|
| 93868 |
}
|
| 93869 |
-
},
|
| 93870 |
-
"LLaMA_3_Merlion_8B": {
|
| 93871 |
-
"model_size": "8B",
|
| 93872 |
-
"model_link": "https://seaeval.github.io/",
|
| 93873 |
-
"zero_shot": {
|
| 93874 |
-
"cross_xquad": {
|
| 93875 |
-
"prompt_1": {
|
| 93876 |
-
"overall_acc": 0.8876050420168068,
|
| 93877 |
-
"language_acc": {
|
| 93878 |
-
"Spanish": 0.8890756302521008,
|
| 93879 |
-
"English": 0.9277310924369748,
|
| 93880 |
-
"Chinese": 0.8663865546218488,
|
| 93881 |
-
"Vietnamese": 0.8672268907563025
|
| 93882 |
-
},
|
| 93883 |
-
"consistency_score_2": 0.8428571428571429,
|
| 93884 |
-
"consistency_score_3": 0.7733193277310924,
|
| 93885 |
-
"consistency_score_4": 0.7235294117647059,
|
| 93886 |
-
"detailed_consistency_score": {
|
| 93887 |
-
"2_combine": {
|
| 93888 |
-
"Spanish,English": 0.880672268907563,
|
| 93889 |
-
"Spanish,Chinese": 0.8151260504201681,
|
| 93890 |
-
"Spanish,Vietnamese": 0.8369747899159664,
|
| 93891 |
-
"English,Chinese": 0.8571428571428571,
|
| 93892 |
-
"English,Vietnamese": 0.8512605042016806,
|
| 93893 |
-
"Chinese,Vietnamese": 0.8159663865546218
|
| 93894 |
-
},
|
| 93895 |
-
"3_combine": {
|
| 93896 |
-
"Spanish,English,Chinese": 0.7840336134453781,
|
| 93897 |
-
"Spanish,English,Vietnamese": 0.7907563025210084,
|
| 93898 |
-
"Spanish,Chinese,Vietnamese": 0.7453781512605042,
|
| 93899 |
-
"English,Chinese,Vietnamese": 0.773109243697479
|
| 93900 |
-
},
|
| 93901 |
-
"4_combine": {
|
| 93902 |
-
"Spanish,English,Chinese,Vietnamese": 0.7235294117647059
|
| 93903 |
-
}
|
| 93904 |
-
},
|
| 93905 |
-
"AC3_2": 0.8646525260084111,
|
| 93906 |
-
"AC3_3": 0.8265302705456998,
|
| 93907 |
-
"AC3_4": 0.7972126129915623
|
| 93908 |
-
},
|
| 93909 |
-
"prompt_2": {
|
| 93910 |
-
"overall_acc": 0.8831932773109243,
|
| 93911 |
-
"language_acc": {
|
| 93912 |
-
"Spanish": 0.8705882352941177,
|
| 93913 |
-
"English": 0.9218487394957983,
|
| 93914 |
-
"Chinese": 0.8705882352941177,
|
| 93915 |
-
"Vietnamese": 0.8697478991596639
|
| 93916 |
-
},
|
| 93917 |
-
"consistency_score_2": 0.838375350140056,
|
| 93918 |
-
"consistency_score_3": 0.7674369747899159,
|
| 93919 |
-
"consistency_score_4": 0.7151260504201681,
|
| 93920 |
-
"detailed_consistency_score": {
|
| 93921 |
-
"2_combine": {
|
| 93922 |
-
"Spanish,English": 0.865546218487395,
|
| 93923 |
-
"Spanish,Chinese": 0.8126050420168067,
|
| 93924 |
-
"Spanish,Vietnamese": 0.8168067226890756,
|
| 93925 |
-
"English,Chinese": 0.8613445378151261,
|
| 93926 |
-
"English,Vietnamese": 0.8621848739495799,
|
| 93927 |
-
"Chinese,Vietnamese": 0.8117647058823529
|
| 93928 |
-
},
|
| 93929 |
-
"3_combine": {
|
| 93930 |
-
"Spanish,English,Chinese": 0.7773109243697479,
|
| 93931 |
-
"Spanish,English,Vietnamese": 0.7823529411764706,
|
| 93932 |
-
"Spanish,Chinese,Vietnamese": 0.7336134453781512,
|
| 93933 |
-
"English,Chinese,Vietnamese": 0.7764705882352941
|
| 93934 |
-
},
|
| 93935 |
-
"4_combine": {
|
| 93936 |
-
"Spanish,English,Chinese,Vietnamese": 0.7151260504201681
|
| 93937 |
-
}
|
| 93938 |
-
},
|
| 93939 |
-
"AC3_2": 0.8602009368168079,
|
| 93940 |
-
"AC3_3": 0.8212562153038939,
|
| 93941 |
-
"AC3_4": 0.790323321708278
|
| 93942 |
-
},
|
| 93943 |
-
"prompt_3": {
|
| 93944 |
-
"overall_acc": 0.8493697478991598,
|
| 93945 |
-
"language_acc": {
|
| 93946 |
-
"Spanish": 0.8176470588235294,
|
| 93947 |
-
"English": 0.9008403361344538,
|
| 93948 |
-
"Chinese": 0.8403361344537815,
|
| 93949 |
-
"Vietnamese": 0.838655462184874
|
| 93950 |
-
},
|
| 93951 |
-
"consistency_score_2": 0.7768907563025209,
|
| 93952 |
-
"consistency_score_3": 0.6796218487394958,
|
| 93953 |
-
"consistency_score_4": 0.6067226890756302,
|
| 93954 |
-
"detailed_consistency_score": {
|
| 93955 |
-
"2_combine": {
|
| 93956 |
-
"Spanish,English": 0.7848739495798319,
|
| 93957 |
-
"Spanish,Chinese": 0.7411764705882353,
|
| 93958 |
-
"Spanish,Vietnamese": 0.7394957983193278,
|
| 93959 |
-
"English,Chinese": 0.8084033613445378,
|
| 93960 |
-
"English,Vietnamese": 0.8151260504201681,
|
| 93961 |
-
"Chinese,Vietnamese": 0.7722689075630252
|
| 93962 |
-
},
|
| 93963 |
-
"3_combine": {
|
| 93964 |
-
"Spanish,English,Chinese": 0.680672268907563,
|
| 93965 |
-
"Spanish,English,Vietnamese": 0.6840336134453782,
|
| 93966 |
-
"Spanish,Chinese,Vietnamese": 0.6453781512605042,
|
| 93967 |
-
"English,Chinese,Vietnamese": 0.7084033613445379
|
| 93968 |
-
},
|
| 93969 |
-
"4_combine": {
|
| 93970 |
-
"Spanish,English,Chinese,Vietnamese": 0.6067226890756302
|
| 93971 |
-
}
|
| 93972 |
-
},
|
| 93973 |
-
"AC3_2": 0.8115151343593726,
|
| 93974 |
-
"AC3_3": 0.7550731339032906,
|
| 93975 |
-
"AC3_4": 0.7078285476163063
|
| 93976 |
-
},
|
| 93977 |
-
"prompt_4": {
|
| 93978 |
-
"overall_acc": 0.8831932773109245,
|
| 93979 |
-
"language_acc": {
|
| 93980 |
-
"Spanish": 0.892436974789916,
|
| 93981 |
-
"English": 0.9277310924369748,
|
| 93982 |
-
"Chinese": 0.8638655462184874,
|
| 93983 |
-
"Vietnamese": 0.8487394957983193
|
| 93984 |
-
},
|
| 93985 |
-
"consistency_score_2": 0.8341736694677871,
|
| 93986 |
-
"consistency_score_3": 0.7602941176470588,
|
| 93987 |
-
"consistency_score_4": 0.7050420168067227,
|
| 93988 |
-
"detailed_consistency_score": {
|
| 93989 |
-
"2_combine": {
|
| 93990 |
-
"Spanish,English": 0.8798319327731092,
|
| 93991 |
-
"Spanish,Chinese": 0.8201680672268907,
|
| 93992 |
-
"Spanish,Vietnamese": 0.8142857142857143,
|
| 93993 |
-
"English,Chinese": 0.8495798319327731,
|
| 93994 |
-
"English,Vietnamese": 0.8428571428571429,
|
| 93995 |
-
"Chinese,Vietnamese": 0.7983193277310925
|
| 93996 |
-
},
|
| 93997 |
-
"3_combine": {
|
| 93998 |
-
"Spanish,English,Chinese": 0.780672268907563,
|
| 93999 |
-
"Spanish,English,Vietnamese": 0.7756302521008404,
|
| 94000 |
-
"Spanish,Chinese,Vietnamese": 0.7294117647058823,
|
| 94001 |
-
"English,Chinese,Vietnamese": 0.7554621848739496
|
| 94002 |
-
},
|
| 94003 |
-
"4_combine": {
|
| 94004 |
-
"Spanish,English,Chinese,Vietnamese": 0.7050420168067227
|
| 94005 |
-
}
|
| 94006 |
-
},
|
| 94007 |
-
"AC3_2": 0.8579838785447016,
|
| 94008 |
-
"AC3_3": 0.8171485287980964,
|
| 94009 |
-
"AC3_4": 0.7841260948330266
|
| 94010 |
-
},
|
| 94011 |
-
"prompt_5": {
|
| 94012 |
-
"overall_acc": 0.8684873949579832,
|
| 94013 |
-
"language_acc": {
|
| 94014 |
-
"Spanish": 0.8680672268907563,
|
| 94015 |
-
"English": 0.9168067226890756,
|
| 94016 |
-
"Chinese": 0.846218487394958,
|
| 94017 |
-
"Vietnamese": 0.8428571428571429
|
| 94018 |
-
},
|
| 94019 |
-
"consistency_score_2": 0.8084033613445377,
|
| 94020 |
-
"consistency_score_3": 0.7252100840336135,
|
| 94021 |
-
"consistency_score_4": 0.6638655462184874,
|
| 94022 |
-
"detailed_consistency_score": {
|
| 94023 |
-
"2_combine": {
|
| 94024 |
-
"Spanish,English": 0.8453781512605042,
|
| 94025 |
-
"Spanish,Chinese": 0.788235294117647,
|
| 94026 |
-
"Spanish,Vietnamese": 0.7907563025210084,
|
| 94027 |
-
"English,Chinese": 0.826890756302521,
|
| 94028 |
-
"English,Vietnamese": 0.8226890756302521,
|
| 94029 |
-
"Chinese,Vietnamese": 0.7764705882352941
|
| 94030 |
-
},
|
| 94031 |
-
"3_combine": {
|
| 94032 |
-
"Spanish,English,Chinese": 0.7420168067226891,
|
| 94033 |
-
"Spanish,English,Vietnamese": 0.7403361344537815,
|
| 94034 |
-
"Spanish,Chinese,Vietnamese": 0.6932773109243697,
|
| 94035 |
-
"English,Chinese,Vietnamese": 0.7252100840336134
|
| 94036 |
-
},
|
| 94037 |
-
"4_combine": {
|
| 94038 |
-
"Spanish,English,Chinese,Vietnamese": 0.6638655462184874
|
| 94039 |
-
}
|
| 94040 |
-
},
|
| 94041 |
-
"AC3_2": 0.8373689540463608,
|
| 94042 |
-
"AC3_3": 0.7904082486704036,
|
| 94043 |
-
"AC3_4": 0.7525144414280582
|
| 94044 |
-
}
|
| 94045 |
-
},
|
| 94046 |
-
"cross_mmlu": {
|
| 94047 |
-
"prompt_1": -1,
|
| 94048 |
-
"prompt_2": -1,
|
| 94049 |
-
"prompt_3": -1,
|
| 94050 |
-
"prompt_4": -1,
|
| 94051 |
-
"prompt_5": -1
|
| 94052 |
-
},
|
| 94053 |
-
"cross_logiqa": {
|
| 94054 |
-
"prompt_1": -1,
|
| 94055 |
-
"prompt_2": -1,
|
| 94056 |
-
"prompt_3": -1,
|
| 94057 |
-
"prompt_4": -1,
|
| 94058 |
-
"prompt_5": -1
|
| 94059 |
-
},
|
| 94060 |
-
"sg_eval": {
|
| 94061 |
-
"prompt_1": {
|
| 94062 |
-
"accuracy": 0.5533980582524272
|
| 94063 |
-
},
|
| 94064 |
-
"prompt_2": {
|
| 94065 |
-
"accuracy": 0.6213592233009708
|
| 94066 |
-
},
|
| 94067 |
-
"prompt_3": {
|
| 94068 |
-
"accuracy": 0.6310679611650486
|
| 94069 |
-
},
|
| 94070 |
-
"prompt_4": {
|
| 94071 |
-
"accuracy": 0.5436893203883495
|
| 94072 |
-
},
|
| 94073 |
-
"prompt_5": {
|
| 94074 |
-
"accuracy": 0.5631067961165048
|
| 94075 |
-
}
|
| 94076 |
-
},
|
| 94077 |
-
"cn_eval": {
|
| 94078 |
-
"prompt_1": -1,
|
| 94079 |
-
"prompt_2": -1,
|
| 94080 |
-
"prompt_3": -1,
|
| 94081 |
-
"prompt_4": -1,
|
| 94082 |
-
"prompt_5": -1
|
| 94083 |
-
},
|
| 94084 |
-
"us_eval": {
|
| 94085 |
-
"prompt_1": -1,
|
| 94086 |
-
"prompt_2": -1,
|
| 94087 |
-
"prompt_3": -1,
|
| 94088 |
-
"prompt_4": -1,
|
| 94089 |
-
"prompt_5": -1
|
| 94090 |
-
},
|
| 94091 |
-
"ph_eval": {
|
| 94092 |
-
"prompt_1": -1,
|
| 94093 |
-
"prompt_2": -1,
|
| 94094 |
-
"prompt_3": -1,
|
| 94095 |
-
"prompt_4": -1,
|
| 94096 |
-
"prompt_5": -1
|
| 94097 |
-
},
|
| 94098 |
-
"sing2eng": {
|
| 94099 |
-
"prompt_1": -1,
|
| 94100 |
-
"prompt_2": -1,
|
| 94101 |
-
"prompt_3": -1,
|
| 94102 |
-
"prompt_4": -1,
|
| 94103 |
-
"prompt_5": -1
|
| 94104 |
-
},
|
| 94105 |
-
"indommlu": {
|
| 94106 |
-
"prompt_1": -1,
|
| 94107 |
-
"prompt_2": -1,
|
| 94108 |
-
"prompt_3": -1,
|
| 94109 |
-
"prompt_4": -1,
|
| 94110 |
-
"prompt_5": -1
|
| 94111 |
-
},
|
| 94112 |
-
"flores_ind2eng": {
|
| 94113 |
-
"prompt_1": -1,
|
| 94114 |
-
"prompt_2": -1,
|
| 94115 |
-
"prompt_3": -1,
|
| 94116 |
-
"prompt_4": -1,
|
| 94117 |
-
"prompt_5": -1
|
| 94118 |
-
},
|
| 94119 |
-
"flores_vie2eng": {
|
| 94120 |
-
"prompt_1": -1,
|
| 94121 |
-
"prompt_2": -1,
|
| 94122 |
-
"prompt_3": -1,
|
| 94123 |
-
"prompt_4": -1,
|
| 94124 |
-
"prompt_5": -1
|
| 94125 |
-
},
|
| 94126 |
-
"flores_zho2eng": {
|
| 94127 |
-
"prompt_1": -1,
|
| 94128 |
-
"prompt_2": -1,
|
| 94129 |
-
"prompt_3": -1,
|
| 94130 |
-
"prompt_4": -1,
|
| 94131 |
-
"prompt_5": -1
|
| 94132 |
-
},
|
| 94133 |
-
"flores_zsm2eng": {
|
| 94134 |
-
"prompt_1": -1,
|
| 94135 |
-
"prompt_2": -1,
|
| 94136 |
-
"prompt_3": -1,
|
| 94137 |
-
"prompt_4": -1,
|
| 94138 |
-
"prompt_5": -1
|
| 94139 |
-
},
|
| 94140 |
-
"mmlu": {
|
| 94141 |
-
"prompt_1": -1,
|
| 94142 |
-
"prompt_2": -1,
|
| 94143 |
-
"prompt_3": -1,
|
| 94144 |
-
"prompt_4": -1,
|
| 94145 |
-
"prompt_5": -1
|
| 94146 |
-
},
|
| 94147 |
-
"mmlu_full": {
|
| 94148 |
-
"prompt_1": -1,
|
| 94149 |
-
"prompt_2": -1,
|
| 94150 |
-
"prompt_3": -1,
|
| 94151 |
-
"prompt_4": -1,
|
| 94152 |
-
"prompt_5": -1
|
| 94153 |
-
},
|
| 94154 |
-
"c_eval": {
|
| 94155 |
-
"prompt_1": -1,
|
| 94156 |
-
"prompt_2": -1,
|
| 94157 |
-
"prompt_3": -1,
|
| 94158 |
-
"prompt_4": -1,
|
| 94159 |
-
"prompt_5": -1
|
| 94160 |
-
},
|
| 94161 |
-
"c_eval_full": {
|
| 94162 |
-
"prompt_1": -1,
|
| 94163 |
-
"prompt_2": -1,
|
| 94164 |
-
"prompt_3": -1,
|
| 94165 |
-
"prompt_4": -1,
|
| 94166 |
-
"prompt_5": -1
|
| 94167 |
-
},
|
| 94168 |
-
"cmmlu": {
|
| 94169 |
-
"prompt_1": -1,
|
| 94170 |
-
"prompt_2": -1,
|
| 94171 |
-
"prompt_3": -1,
|
| 94172 |
-
"prompt_4": -1,
|
| 94173 |
-
"prompt_5": -1
|
| 94174 |
-
},
|
| 94175 |
-
"cmmlu_full": {
|
| 94176 |
-
"prompt_1": -1,
|
| 94177 |
-
"prompt_2": -1,
|
| 94178 |
-
"prompt_3": -1,
|
| 94179 |
-
"prompt_4": -1,
|
| 94180 |
-
"prompt_5": -1
|
| 94181 |
-
},
|
| 94182 |
-
"zbench": {
|
| 94183 |
-
"prompt_1": -1,
|
| 94184 |
-
"prompt_2": -1,
|
| 94185 |
-
"prompt_3": -1,
|
| 94186 |
-
"prompt_4": -1,
|
| 94187 |
-
"prompt_5": -1
|
| 94188 |
-
},
|
| 94189 |
-
"ind_emotion": {
|
| 94190 |
-
"prompt_1": -1,
|
| 94191 |
-
"prompt_2": -1,
|
| 94192 |
-
"prompt_3": -1,
|
| 94193 |
-
"prompt_4": -1,
|
| 94194 |
-
"prompt_5": -1
|
| 94195 |
-
},
|
| 94196 |
-
"ocnli": {
|
| 94197 |
-
"prompt_1": -1,
|
| 94198 |
-
"prompt_2": -1,
|
| 94199 |
-
"prompt_3": -1,
|
| 94200 |
-
"prompt_4": -1,
|
| 94201 |
-
"prompt_5": -1
|
| 94202 |
-
},
|
| 94203 |
-
"c3": {
|
| 94204 |
-
"prompt_1": -1,
|
| 94205 |
-
"prompt_2": -1,
|
| 94206 |
-
"prompt_3": -1,
|
| 94207 |
-
"prompt_4": -1,
|
| 94208 |
-
"prompt_5": -1
|
| 94209 |
-
},
|
| 94210 |
-
"dream": {
|
| 94211 |
-
"prompt_1": -1,
|
| 94212 |
-
"prompt_2": -1,
|
| 94213 |
-
"prompt_3": -1,
|
| 94214 |
-
"prompt_4": -1,
|
| 94215 |
-
"prompt_5": -1
|
| 94216 |
-
},
|
| 94217 |
-
"samsum": {
|
| 94218 |
-
"prompt_1": -1,
|
| 94219 |
-
"prompt_2": -1,
|
| 94220 |
-
"prompt_3": -1,
|
| 94221 |
-
"prompt_4": -1,
|
| 94222 |
-
"prompt_5": -1
|
| 94223 |
-
},
|
| 94224 |
-
"dialogsum": {
|
| 94225 |
-
"prompt_1": -1,
|
| 94226 |
-
"prompt_2": -1,
|
| 94227 |
-
"prompt_3": -1,
|
| 94228 |
-
"prompt_4": -1,
|
| 94229 |
-
"prompt_5": -1
|
| 94230 |
-
},
|
| 94231 |
-
"sst2": {
|
| 94232 |
-
"prompt_1": -1,
|
| 94233 |
-
"prompt_2": -1,
|
| 94234 |
-
"prompt_3": -1,
|
| 94235 |
-
"prompt_4": -1,
|
| 94236 |
-
"prompt_5": -1
|
| 94237 |
-
},
|
| 94238 |
-
"cola": {
|
| 94239 |
-
"prompt_1": -1,
|
| 94240 |
-
"prompt_2": -1,
|
| 94241 |
-
"prompt_3": -1,
|
| 94242 |
-
"prompt_4": -1,
|
| 94243 |
-
"prompt_5": -1
|
| 94244 |
-
},
|
| 94245 |
-
"qqp": {
|
| 94246 |
-
"prompt_1": -1,
|
| 94247 |
-
"prompt_2": -1,
|
| 94248 |
-
"prompt_3": -1,
|
| 94249 |
-
"prompt_4": -1,
|
| 94250 |
-
"prompt_5": -1
|
| 94251 |
-
},
|
| 94252 |
-
"mnli": {
|
| 94253 |
-
"prompt_1": -1,
|
| 94254 |
-
"prompt_2": -1,
|
| 94255 |
-
"prompt_3": -1,
|
| 94256 |
-
"prompt_4": -1,
|
| 94257 |
-
"prompt_5": -1
|
| 94258 |
-
},
|
| 94259 |
-
"qnli": {
|
| 94260 |
-
"prompt_1": -1,
|
| 94261 |
-
"prompt_2": -1,
|
| 94262 |
-
"prompt_3": -1,
|
| 94263 |
-
"prompt_4": -1,
|
| 94264 |
-
"prompt_5": -1
|
| 94265 |
-
},
|
| 94266 |
-
"wnli": {
|
| 94267 |
-
"prompt_1": -1,
|
| 94268 |
-
"prompt_2": -1,
|
| 94269 |
-
"prompt_3": -1,
|
| 94270 |
-
"prompt_4": -1,
|
| 94271 |
-
"prompt_5": -1
|
| 94272 |
-
},
|
| 94273 |
-
"rte": {
|
| 94274 |
-
"prompt_1": -1,
|
| 94275 |
-
"prompt_2": -1,
|
| 94276 |
-
"prompt_3": -1,
|
| 94277 |
-
"prompt_4": -1,
|
| 94278 |
-
"prompt_5": -1
|
| 94279 |
-
},
|
| 94280 |
-
"mrpc": {
|
| 94281 |
-
"prompt_1": -1,
|
| 94282 |
-
"prompt_2": -1,
|
| 94283 |
-
"prompt_3": -1,
|
| 94284 |
-
"prompt_4": -1,
|
| 94285 |
-
"prompt_5": -1
|
| 94286 |
-
}
|
| 94287 |
-
},
|
| 94288 |
-
"five_shot": {
|
| 94289 |
-
"cross_xquad": {
|
| 94290 |
-
"prompt_1": -1
|
| 94291 |
-
},
|
| 94292 |
-
"cross_mmlu": {
|
| 94293 |
-
"prompt_1": -1
|
| 94294 |
-
},
|
| 94295 |
-
"cross_logiqa": {
|
| 94296 |
-
"prompt_1": -1
|
| 94297 |
-
},
|
| 94298 |
-
"sg_eval": {
|
| 94299 |
-
"prompt_1": -1
|
| 94300 |
-
},
|
| 94301 |
-
"cn_eval": {
|
| 94302 |
-
"prompt_1": -1
|
| 94303 |
-
},
|
| 94304 |
-
"us_eval": {
|
| 94305 |
-
"prompt_1": -1
|
| 94306 |
-
},
|
| 94307 |
-
"ph_eval": {
|
| 94308 |
-
"prompt_1": -1
|
| 94309 |
-
},
|
| 94310 |
-
"sing2eng": {
|
| 94311 |
-
"prompt_1": -1
|
| 94312 |
-
},
|
| 94313 |
-
"indommlu": {
|
| 94314 |
-
"prompt_1": -1
|
| 94315 |
-
},
|
| 94316 |
-
"flores_ind2eng": {
|
| 94317 |
-
"prompt_1": -1
|
| 94318 |
-
},
|
| 94319 |
-
"flores_vie2eng": {
|
| 94320 |
-
"prompt_1": -1
|
| 94321 |
-
},
|
| 94322 |
-
"flores_zho2eng": {
|
| 94323 |
-
"prompt_1": -1
|
| 94324 |
-
},
|
| 94325 |
-
"flores_zsm2eng": {
|
| 94326 |
-
"prompt_1": -1
|
| 94327 |
-
},
|
| 94328 |
-
"mmlu": {
|
| 94329 |
-
"prompt_1": -1
|
| 94330 |
-
},
|
| 94331 |
-
"mmlu_full": {
|
| 94332 |
-
"prompt_1": -1
|
| 94333 |
-
},
|
| 94334 |
-
"c_eval": {
|
| 94335 |
-
"prompt_1": -1
|
| 94336 |
-
},
|
| 94337 |
-
"c_eval_full": {
|
| 94338 |
-
"prompt_1": -1
|
| 94339 |
-
},
|
| 94340 |
-
"cmmlu": {
|
| 94341 |
-
"prompt_1": -1
|
| 94342 |
-
},
|
| 94343 |
-
"cmmlu_full": {
|
| 94344 |
-
"prompt_1": -1
|
| 94345 |
-
},
|
| 94346 |
-
"zbench": {
|
| 94347 |
-
"prompt_1": -1
|
| 94348 |
-
},
|
| 94349 |
-
"ind_emotion": {
|
| 94350 |
-
"prompt_1": -1
|
| 94351 |
-
},
|
| 94352 |
-
"ocnli": {
|
| 94353 |
-
"prompt_1": -1
|
| 94354 |
-
},
|
| 94355 |
-
"c3": {
|
| 94356 |
-
"prompt_1": -1
|
| 94357 |
-
},
|
| 94358 |
-
"dream": {
|
| 94359 |
-
"prompt_1": -1
|
| 94360 |
-
},
|
| 94361 |
-
"samsum": {
|
| 94362 |
-
"prompt_1": -1
|
| 94363 |
-
},
|
| 94364 |
-
"dialogsum": {
|
| 94365 |
-
"prompt_1": -1
|
| 94366 |
-
},
|
| 94367 |
-
"sst2": {
|
| 94368 |
-
"prompt_1": -1
|
| 94369 |
-
},
|
| 94370 |
-
"cola": {
|
| 94371 |
-
"prompt_1": -1
|
| 94372 |
-
},
|
| 94373 |
-
"qqp": {
|
| 94374 |
-
"prompt_1": -1
|
| 94375 |
-
},
|
| 94376 |
-
"mnli": {
|
| 94377 |
-
"prompt_1": -1
|
| 94378 |
-
},
|
| 94379 |
-
"qnli": {
|
| 94380 |
-
"prompt_1": -1
|
| 94381 |
-
},
|
| 94382 |
-
"wnli": {
|
| 94383 |
-
"prompt_1": -1
|
| 94384 |
-
},
|
| 94385 |
-
"rte": {
|
| 94386 |
-
"prompt_1": -1
|
| 94387 |
-
},
|
| 94388 |
-
"mrpc": {
|
| 94389 |
-
"prompt_1": -1
|
| 94390 |
-
}
|
| 94391 |
-
}
|
| 94392 |
}
|
| 94393 |
}
|
|
|
|
| 32277 |
},
|
| 32278 |
"flores_ind2eng": {
|
| 32279 |
"prompt_1": {
|
| 32280 |
+
"bleu_score": 0.07998569558291352
|
| 32281 |
},
|
| 32282 |
"prompt_2": {
|
| 32283 |
"bleu_score": 0.08178489772334997
|
|
|
|
| 32294 |
},
|
| 32295 |
"flores_vie2eng": {
|
| 32296 |
"prompt_1": {
|
| 32297 |
+
"bleu_score": 0.06595796158495684
|
| 32298 |
},
|
| 32299 |
"prompt_2": {
|
| 32300 |
+
"bleu_score": 0.0659317494666598
|
| 32301 |
},
|
| 32302 |
"prompt_3": {
|
| 32303 |
+
"bleu_score": 0.06631225923987717
|
| 32304 |
},
|
| 32305 |
"prompt_4": {
|
| 32306 |
+
"bleu_score": 0.06562445607881094
|
| 32307 |
},
|
| 32308 |
"prompt_5": {
|
| 32309 |
+
"bleu_score": 0.07261211813311091
|
| 32310 |
}
|
| 32311 |
},
|
| 32312 |
"flores_zho2eng": {
|
| 32313 |
"prompt_1": {
|
| 32314 |
+
"bleu_score": 0.0502826099860938
|
| 32315 |
},
|
| 32316 |
"prompt_2": {
|
| 32317 |
"bleu_score": 0.05134705353375384
|
|
|
|
| 32328 |
},
|
| 32329 |
"flores_zsm2eng": {
|
| 32330 |
"prompt_1": {
|
| 32331 |
+
"bleu_score": 0.08113017898863324
|
| 32332 |
},
|
| 32333 |
"prompt_2": {
|
| 32334 |
"bleu_score": 0.08240154342677156
|
|
|
|
| 93866 |
"prompt_1": -1
|
| 93867 |
}
|
| 93868 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93869 |
}
|
| 93870 |
}
|