Spaces:
Running
Running
David Pomerenke
commited on
Commit
·
07dcc45
1
Parent(s):
3a698ab
Don't translate a langauge to itself
Browse filesThis makes the language comparison fairer
- languagebench.py +8 -3
- results.json +56 -424
languagebench.py
CHANGED
|
@@ -96,12 +96,13 @@ languages["in_benchmark"] = languages["in_benchmark"].fillna(False)
|
|
| 96 |
languages = languages.sort_values(by="speakers", ascending=False)
|
| 97 |
|
| 98 |
# sample languages to translate from
|
|
|
|
| 99 |
original_languages = languages[languages["in_benchmark"]].sample(
|
| 100 |
-
n=n_sentences, weights="speakers", replace=True, random_state=42
|
| 101 |
)
|
| 102 |
# sample languages to analyze with all models
|
| 103 |
detailed_target_languages = languages[languages["in_benchmark"]].sample(
|
| 104 |
-
n=
|
| 105 |
)
|
| 106 |
|
| 107 |
|
|
@@ -176,9 +177,13 @@ async def main():
|
|
| 176 |
not in detailed_target_languages.language_code.values
|
| 177 |
):
|
| 178 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
original_sentences = [
|
| 180 |
load_sentences(lang)[i]
|
| 181 |
-
for i, lang in enumerate(
|
| 182 |
]
|
| 183 |
print(model)
|
| 184 |
predictions = [
|
|
|
|
| 96 |
languages = languages.sort_values(by="speakers", ascending=False)
|
| 97 |
|
| 98 |
# sample languages to translate from
|
| 99 |
+
# when translating e.g. to Mandarin, we drop Mandarin from the sample and use the next samples from the list instead; therefore we need to sample more than n_sentences
|
| 100 |
original_languages = languages[languages["in_benchmark"]].sample(
|
| 101 |
+
n=n_sentences * 2, weights="speakers", replace=True, random_state=42
|
| 102 |
)
|
| 103 |
# sample languages to analyze with all models
|
| 104 |
detailed_target_languages = languages[languages["in_benchmark"]].sample(
|
| 105 |
+
n=3, random_state=42
|
| 106 |
)
|
| 107 |
|
| 108 |
|
|
|
|
| 177 |
not in detailed_target_languages.language_code.values
|
| 178 |
):
|
| 179 |
continue
|
| 180 |
+
# drop the target language from the original languages sample
|
| 181 |
+
_original_languages = original_languages[
|
| 182 |
+
original_languages.language_code != language.language_code
|
| 183 |
+
].iloc[:n_sentences]
|
| 184 |
original_sentences = [
|
| 185 |
load_sentences(lang)[i]
|
| 186 |
+
for i, lang in enumerate(_original_languages.itertuples())
|
| 187 |
]
|
| 188 |
print(model)
|
| 189 |
predictions = [
|
results.json
CHANGED
|
@@ -6,10 +6,10 @@
|
|
| 6 |
"scores": [
|
| 7 |
{
|
| 8 |
"model": "anthropic/claude-3.5-sonnet",
|
| 9 |
-
"bleu": 0.
|
| 10 |
}
|
| 11 |
],
|
| 12 |
-
"bleu": 0.
|
| 13 |
},
|
| 14 |
{
|
| 15 |
"language_name": "Mandarin Chinese",
|
|
@@ -18,10 +18,10 @@
|
|
| 18 |
"scores": [
|
| 19 |
{
|
| 20 |
"model": "anthropic/claude-3.5-sonnet",
|
| 21 |
-
"bleu": 0.
|
| 22 |
}
|
| 23 |
],
|
| 24 |
-
"bleu": 0.
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"language_name": "Spanish",
|
|
@@ -30,10 +30,10 @@
|
|
| 30 |
"scores": [
|
| 31 |
{
|
| 32 |
"model": "anthropic/claude-3.5-sonnet",
|
| 33 |
-
"bleu": 0.
|
| 34 |
}
|
| 35 |
],
|
| 36 |
-
"bleu": 0.
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"language_name": "Hindi",
|
|
@@ -42,10 +42,10 @@
|
|
| 42 |
"scores": [
|
| 43 |
{
|
| 44 |
"model": "anthropic/claude-3.5-sonnet",
|
| 45 |
-
"bleu": 0.
|
| 46 |
}
|
| 47 |
],
|
| 48 |
-
"bleu": 0.
|
| 49 |
},
|
| 50 |
{
|
| 51 |
"language_name": "Bengali",
|
|
@@ -54,10 +54,10 @@
|
|
| 54 |
"scores": [
|
| 55 |
{
|
| 56 |
"model": "anthropic/claude-3.5-sonnet",
|
| 57 |
-
"bleu": 0.
|
| 58 |
}
|
| 59 |
],
|
| 60 |
-
"bleu": 0.
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"language_name": "Portuguese",
|
|
@@ -78,10 +78,10 @@
|
|
| 78 |
"scores": [
|
| 79 |
{
|
| 80 |
"model": "anthropic/claude-3.5-sonnet",
|
| 81 |
-
"bleu": 0.
|
| 82 |
}
|
| 83 |
],
|
| 84 |
-
"bleu": 0.
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"language_name": "Indonesian",
|
|
@@ -114,10 +114,10 @@
|
|
| 114 |
"scores": [
|
| 115 |
{
|
| 116 |
"model": "anthropic/claude-3.5-sonnet",
|
| 117 |
-
"bleu": 0.
|
| 118 |
}
|
| 119 |
],
|
| 120 |
-
"bleu": 0.
|
| 121 |
},
|
| 122 |
{
|
| 123 |
"language_name": "Eastern Punjabi",
|
|
@@ -126,10 +126,10 @@
|
|
| 126 |
"scores": [
|
| 127 |
{
|
| 128 |
"model": "anthropic/claude-3.5-sonnet",
|
| 129 |
-
"bleu": 0.
|
| 130 |
}
|
| 131 |
],
|
| 132 |
-
"bleu": 0.
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"language_name": "Standard German",
|
|
@@ -212,28 +212,12 @@
|
|
| 212 |
"language_code": "mar",
|
| 213 |
"speakers": 83100000.0,
|
| 214 |
"scores": [
|
| 215 |
-
{
|
| 216 |
-
"model": "openai/gpt-4o",
|
| 217 |
-
"bleu": 0.22522575757065674
|
| 218 |
-
},
|
| 219 |
{
|
| 220 |
"model": "anthropic/claude-3.5-sonnet",
|
| 221 |
"bleu": 0.2333407635240652
|
| 222 |
-
},
|
| 223 |
-
{
|
| 224 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 225 |
-
"bleu": 0.10203588314680621
|
| 226 |
-
},
|
| 227 |
-
{
|
| 228 |
-
"model": "mistralai/mistral-large",
|
| 229 |
-
"bleu": 0.16554398005927695
|
| 230 |
-
},
|
| 231 |
-
{
|
| 232 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 233 |
-
"bleu": 0.0743622715348743
|
| 234 |
}
|
| 235 |
],
|
| 236 |
-
"bleu": 0.
|
| 237 |
},
|
| 238 |
{
|
| 239 |
"language_name": "Swahili",
|
|
@@ -254,10 +238,10 @@
|
|
| 254 |
"scores": [
|
| 255 |
{
|
| 256 |
"model": "anthropic/claude-3.5-sonnet",
|
| 257 |
-
"bleu": 0.
|
| 258 |
}
|
| 259 |
],
|
| 260 |
-
"bleu": 0.
|
| 261 |
},
|
| 262 |
{
|
| 263 |
"language_name": "Telugu",
|
|
@@ -285,10 +269,10 @@
|
|
| 285 |
"scores": [
|
| 286 |
{
|
| 287 |
"model": "anthropic/claude-3.5-sonnet",
|
| 288 |
-
"bleu": 0.
|
| 289 |
}
|
| 290 |
],
|
| 291 |
-
"bleu": 0.
|
| 292 |
},
|
| 293 |
{
|
| 294 |
"language_name": "Vietnamese",
|
|
@@ -357,10 +341,10 @@
|
|
| 357 |
"scores": [
|
| 358 |
{
|
| 359 |
"model": "anthropic/claude-3.5-sonnet",
|
| 360 |
-
"bleu": 0.
|
| 361 |
}
|
| 362 |
],
|
| 363 |
-
"bleu": 0.
|
| 364 |
},
|
| 365 |
{
|
| 366 |
"language_name": "Bhojpuri",
|
|
@@ -414,28 +398,12 @@
|
|
| 414 |
"language_code": "apc",
|
| 415 |
"speakers": 44000000.0,
|
| 416 |
"scores": [
|
| 417 |
-
{
|
| 418 |
-
"model": "openai/gpt-4o",
|
| 419 |
-
"bleu": 0.3018366486807633
|
| 420 |
-
},
|
| 421 |
{
|
| 422 |
"model": "anthropic/claude-3.5-sonnet",
|
| 423 |
"bleu": 0.21013619903144296
|
| 424 |
-
},
|
| 425 |
-
{
|
| 426 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 427 |
-
"bleu": 0.1499005259127334
|
| 428 |
-
},
|
| 429 |
-
{
|
| 430 |
-
"model": "mistralai/mistral-large",
|
| 431 |
-
"bleu": 0.18169602550114305
|
| 432 |
-
},
|
| 433 |
-
{
|
| 434 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 435 |
-
"bleu": 0.02549625724666183
|
| 436 |
}
|
| 437 |
],
|
| 438 |
-
"bleu": 0.
|
| 439 |
},
|
| 440 |
{
|
| 441 |
"language_name": "Hausa",
|
|
@@ -515,10 +483,10 @@
|
|
| 515 |
"scores": [
|
| 516 |
{
|
| 517 |
"model": "anthropic/claude-3.5-sonnet",
|
| 518 |
-
"bleu": 0.
|
| 519 |
}
|
| 520 |
],
|
| 521 |
-
"bleu": 0.
|
| 522 |
},
|
| 523 |
{
|
| 524 |
"language_name": "Xiang Chinese",
|
|
@@ -653,10 +621,10 @@
|
|
| 653 |
"scores": [
|
| 654 |
{
|
| 655 |
"model": "anthropic/claude-3.5-sonnet",
|
| 656 |
-
"bleu": 0.
|
| 657 |
}
|
| 658 |
],
|
| 659 |
-
"bleu": 0.
|
| 660 |
},
|
| 661 |
{
|
| 662 |
"language_name": "Northern Uzbek",
|
|
@@ -699,28 +667,12 @@
|
|
| 699 |
"language_code": "apc",
|
| 700 |
"speakers": 24600000.0,
|
| 701 |
"scores": [
|
| 702 |
-
{
|
| 703 |
-
"model": "openai/gpt-4o",
|
| 704 |
-
"bleu": 0.3018366486807633
|
| 705 |
-
},
|
| 706 |
{
|
| 707 |
"model": "anthropic/claude-3.5-sonnet",
|
| 708 |
"bleu": 0.21013619903144296
|
| 709 |
-
},
|
| 710 |
-
{
|
| 711 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 712 |
-
"bleu": 0.1499005259127334
|
| 713 |
-
},
|
| 714 |
-
{
|
| 715 |
-
"model": "mistralai/mistral-large",
|
| 716 |
-
"bleu": 0.18169602550114305
|
| 717 |
-
},
|
| 718 |
-
{
|
| 719 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 720 |
-
"bleu": 0.02549625724666183
|
| 721 |
}
|
| 722 |
],
|
| 723 |
-
"bleu": 0.
|
| 724 |
},
|
| 725 |
{
|
| 726 |
"language_name": "Romanian",
|
|
@@ -917,28 +869,12 @@
|
|
| 917 |
"language_code": "zul",
|
| 918 |
"speakers": 15700000.0,
|
| 919 |
"scores": [
|
| 920 |
-
{
|
| 921 |
-
"model": "openai/gpt-4o",
|
| 922 |
-
"bleu": 0.22516622902472544
|
| 923 |
-
},
|
| 924 |
{
|
| 925 |
"model": "anthropic/claude-3.5-sonnet",
|
| 926 |
"bleu": 0.20245371733247658
|
| 927 |
-
},
|
| 928 |
-
{
|
| 929 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 930 |
-
"bleu": 0.0378198916279553
|
| 931 |
-
},
|
| 932 |
-
{
|
| 933 |
-
"model": "mistralai/mistral-large",
|
| 934 |
-
"bleu": 0.0515062321283951
|
| 935 |
-
},
|
| 936 |
-
{
|
| 937 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 938 |
-
"bleu": 0.01701042472693127
|
| 939 |
}
|
| 940 |
],
|
| 941 |
-
"bleu": 0.
|
| 942 |
},
|
| 943 |
{
|
| 944 |
"language_name": "Mesopotamian Arabic",
|
|
@@ -1104,28 +1040,12 @@
|
|
| 1104 |
"language_code": "kaz",
|
| 1105 |
"speakers": 13161980.0,
|
| 1106 |
"scores": [
|
| 1107 |
-
{
|
| 1108 |
-
"model": "openai/gpt-4o",
|
| 1109 |
-
"bleu": 0.2865687127063326
|
| 1110 |
-
},
|
| 1111 |
{
|
| 1112 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1113 |
"bleu": 0.25054345947985385
|
| 1114 |
-
},
|
| 1115 |
-
{
|
| 1116 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 1117 |
-
"bleu": 0.181288673149438
|
| 1118 |
-
},
|
| 1119 |
-
{
|
| 1120 |
-
"model": "mistralai/mistral-large",
|
| 1121 |
-
"bleu": 0.188276270677333
|
| 1122 |
-
},
|
| 1123 |
-
{
|
| 1124 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 1125 |
-
"bleu": 0.054576267347929
|
| 1126 |
}
|
| 1127 |
],
|
| 1128 |
-
"bleu": 0.
|
| 1129 |
},
|
| 1130 |
{
|
| 1131 |
"language_name": "Chittagonian",
|
|
@@ -1139,28 +1059,12 @@
|
|
| 1139 |
"language_code": "tso",
|
| 1140 |
"speakers": 13000000.0,
|
| 1141 |
"scores": [
|
| 1142 |
-
{
|
| 1143 |
-
"model": "openai/gpt-4o",
|
| 1144 |
-
"bleu": 0.17202786716122928
|
| 1145 |
-
},
|
| 1146 |
{
|
| 1147 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1148 |
"bleu": 0.1952038863089787
|
| 1149 |
-
},
|
| 1150 |
-
{
|
| 1151 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 1152 |
-
"bleu": 0.0866195039237461
|
| 1153 |
-
},
|
| 1154 |
-
{
|
| 1155 |
-
"model": "mistralai/mistral-large",
|
| 1156 |
-
"bleu": 0.06062634407793842
|
| 1157 |
-
},
|
| 1158 |
-
{
|
| 1159 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 1160 |
-
"bleu": 0.014720823522746878
|
| 1161 |
}
|
| 1162 |
],
|
| 1163 |
-
"bleu": 0.
|
| 1164 |
},
|
| 1165 |
{
|
| 1166 |
"language_name": "Hungarian",
|
|
@@ -1179,56 +1083,24 @@
|
|
| 1179 |
"language_code": "kin",
|
| 1180 |
"speakers": 12100000.0,
|
| 1181 |
"scores": [
|
| 1182 |
-
{
|
| 1183 |
-
"model": "openai/gpt-4o",
|
| 1184 |
-
"bleu": 0.22728268497187282
|
| 1185 |
-
},
|
| 1186 |
{
|
| 1187 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1188 |
"bleu": 0.21847668603031067
|
| 1189 |
-
},
|
| 1190 |
-
{
|
| 1191 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 1192 |
-
"bleu": 0.10074185841058973
|
| 1193 |
-
},
|
| 1194 |
-
{
|
| 1195 |
-
"model": "mistralai/mistral-large",
|
| 1196 |
-
"bleu": 0.059392901501730454
|
| 1197 |
-
},
|
| 1198 |
-
{
|
| 1199 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 1200 |
-
"bleu": 0.0074286483802763524
|
| 1201 |
}
|
| 1202 |
],
|
| 1203 |
-
"bleu": 0.
|
| 1204 |
},
|
| 1205 |
{
|
| 1206 |
"language_name": "Chichewa",
|
| 1207 |
"language_code": "nya",
|
| 1208 |
"speakers": 12000000.0,
|
| 1209 |
"scores": [
|
| 1210 |
-
{
|
| 1211 |
-
"model": "openai/gpt-4o",
|
| 1212 |
-
"bleu": 0.1895725455230325
|
| 1213 |
-
},
|
| 1214 |
{
|
| 1215 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1216 |
"bleu": 0.17401113784791736
|
| 1217 |
-
},
|
| 1218 |
-
{
|
| 1219 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 1220 |
-
"bleu": 0.09658218667531897
|
| 1221 |
-
},
|
| 1222 |
-
{
|
| 1223 |
-
"model": "mistralai/mistral-large",
|
| 1224 |
-
"bleu": 0.04964676481672043
|
| 1225 |
-
},
|
| 1226 |
-
{
|
| 1227 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 1228 |
-
"bleu": 0.02040785086927957
|
| 1229 |
}
|
| 1230 |
],
|
| 1231 |
-
"bleu": 0.
|
| 1232 |
},
|
| 1233 |
{
|
| 1234 |
"language_name": "Tunisian Arabic",
|
|
@@ -1394,28 +1266,12 @@
|
|
| 1394 |
"language_code": "hat",
|
| 1395 |
"speakers": 9600000.0,
|
| 1396 |
"scores": [
|
| 1397 |
-
{
|
| 1398 |
-
"model": "openai/gpt-4o",
|
| 1399 |
-
"bleu": 0.288129435659592
|
| 1400 |
-
},
|
| 1401 |
{
|
| 1402 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1403 |
-
"bleu": 0.
|
| 1404 |
-
},
|
| 1405 |
-
{
|
| 1406 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 1407 |
-
"bleu": 0.26523811773703404
|
| 1408 |
-
},
|
| 1409 |
-
{
|
| 1410 |
-
"model": "mistralai/mistral-large",
|
| 1411 |
-
"bleu": 0.20181127876225555
|
| 1412 |
-
},
|
| 1413 |
-
{
|
| 1414 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 1415 |
-
"bleu": 0.10821493612429332
|
| 1416 |
}
|
| 1417 |
],
|
| 1418 |
-
"bleu": 0.
|
| 1419 |
},
|
| 1420 |
{
|
| 1421 |
"language_name": "Dari",
|
|
@@ -1458,28 +1314,12 @@
|
|
| 1458 |
"language_code": "azj",
|
| 1459 |
"speakers": 9220610.0,
|
| 1460 |
"scores": [
|
| 1461 |
-
{
|
| 1462 |
-
"model": "openai/gpt-4o",
|
| 1463 |
-
"bleu": 0.20542741434113335
|
| 1464 |
-
},
|
| 1465 |
{
|
| 1466 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1467 |
"bleu": 0.24029548337141315
|
| 1468 |
-
},
|
| 1469 |
-
{
|
| 1470 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 1471 |
-
"bleu": 0.1985709579217795
|
| 1472 |
-
},
|
| 1473 |
-
{
|
| 1474 |
-
"model": "mistralai/mistral-large",
|
| 1475 |
-
"bleu": 0.16426254772371085
|
| 1476 |
-
},
|
| 1477 |
-
{
|
| 1478 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 1479 |
-
"bleu": 0.029108557467416214
|
| 1480 |
}
|
| 1481 |
],
|
| 1482 |
-
"bleu": 0.
|
| 1483 |
},
|
| 1484 |
{
|
| 1485 |
"language_name": "Congo Swahili",
|
|
@@ -1697,28 +1537,12 @@
|
|
| 1697 |
"language_code": "kas",
|
| 1698 |
"speakers": 6900000.0,
|
| 1699 |
"scores": [
|
| 1700 |
-
{
|
| 1701 |
-
"model": "openai/gpt-4o",
|
| 1702 |
-
"bleu": 0.058464446367472944
|
| 1703 |
-
},
|
| 1704 |
{
|
| 1705 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1706 |
"bleu": 0.06589195125918151
|
| 1707 |
-
},
|
| 1708 |
-
{
|
| 1709 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 1710 |
-
"bleu": 0.04553335750357841
|
| 1711 |
-
},
|
| 1712 |
-
{
|
| 1713 |
-
"model": "mistralai/mistral-large",
|
| 1714 |
-
"bleu": 0.0441481215103373
|
| 1715 |
-
},
|
| 1716 |
-
{
|
| 1717 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 1718 |
-
"bleu": 0.038143033479956526
|
| 1719 |
}
|
| 1720 |
],
|
| 1721 |
-
"bleu": 0.
|
| 1722 |
},
|
| 1723 |
{
|
| 1724 |
"language_name": "Armenian",
|
|
@@ -1739,26 +1563,26 @@
|
|
| 1739 |
"scores": [
|
| 1740 |
{
|
| 1741 |
"model": "openai/gpt-4o",
|
| 1742 |
-
"bleu": 0.
|
| 1743 |
},
|
| 1744 |
{
|
| 1745 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1746 |
-
"bleu": 0.
|
| 1747 |
},
|
| 1748 |
{
|
| 1749 |
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 1750 |
-
"bleu": 0.
|
| 1751 |
},
|
| 1752 |
{
|
| 1753 |
"model": "mistralai/mistral-large",
|
| 1754 |
-
"bleu": 0.
|
| 1755 |
},
|
| 1756 |
{
|
| 1757 |
"model": "qwen/qwen-2.5-72b-instruct",
|
| 1758 |
-
"bleu": 0.
|
| 1759 |
}
|
| 1760 |
],
|
| 1761 |
-
"bleu": 0.
|
| 1762 |
},
|
| 1763 |
{
|
| 1764 |
"language_name": "Central Pashto",
|
|
@@ -2133,28 +1957,12 @@
|
|
| 2133 |
"language_code": "lit",
|
| 2134 |
"speakers": 4000000.0,
|
| 2135 |
"scores": [
|
| 2136 |
-
{
|
| 2137 |
-
"model": "openai/gpt-4o",
|
| 2138 |
-
"bleu": 0.2826991552063554
|
| 2139 |
-
},
|
| 2140 |
{
|
| 2141 |
"model": "anthropic/claude-3.5-sonnet",
|
| 2142 |
"bleu": 0.30370845804188434
|
| 2143 |
-
},
|
| 2144 |
-
{
|
| 2145 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 2146 |
-
"bleu": 0.2787991559825463
|
| 2147 |
-
},
|
| 2148 |
-
{
|
| 2149 |
-
"model": "mistralai/mistral-large",
|
| 2150 |
-
"bleu": 0.22260546031191955
|
| 2151 |
-
},
|
| 2152 |
-
{
|
| 2153 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 2154 |
-
"bleu": 0.13004923167033042
|
| 2155 |
}
|
| 2156 |
],
|
| 2157 |
-
"bleu": 0.
|
| 2158 |
},
|
| 2159 |
{
|
| 2160 |
"language_name": "Tok Pisin",
|
|
@@ -2292,28 +2100,12 @@
|
|
| 2292 |
"language_code": "ace",
|
| 2293 |
"speakers": 3500032.0,
|
| 2294 |
"scores": [
|
| 2295 |
-
{
|
| 2296 |
-
"model": "openai/gpt-4o",
|
| 2297 |
-
"bleu": 0.025447626712218067
|
| 2298 |
-
},
|
| 2299 |
{
|
| 2300 |
"model": "anthropic/claude-3.5-sonnet",
|
| 2301 |
"bleu": 0.06711853873605253
|
| 2302 |
-
},
|
| 2303 |
-
{
|
| 2304 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 2305 |
-
"bleu": 0.002679704493921361
|
| 2306 |
-
},
|
| 2307 |
-
{
|
| 2308 |
-
"model": "mistralai/mistral-large",
|
| 2309 |
-
"bleu": 0.013442877254370728
|
| 2310 |
-
},
|
| 2311 |
-
{
|
| 2312 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 2313 |
-
"bleu": 0.005062086703614178
|
| 2314 |
}
|
| 2315 |
],
|
| 2316 |
-
"bleu": 0.
|
| 2317 |
},
|
| 2318 |
{
|
| 2319 |
"language_name": "Banjar",
|
|
@@ -2405,28 +2197,12 @@
|
|
| 2405 |
"language_code": "ewe",
|
| 2406 |
"speakers": 3000000.0,
|
| 2407 |
"scores": [
|
| 2408 |
-
{
|
| 2409 |
-
"model": "openai/gpt-4o",
|
| 2410 |
-
"bleu": 0.01735238801571977
|
| 2411 |
-
},
|
| 2412 |
{
|
| 2413 |
"model": "anthropic/claude-3.5-sonnet",
|
| 2414 |
"bleu": 0.08106169448483001
|
| 2415 |
-
},
|
| 2416 |
-
{
|
| 2417 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 2418 |
-
"bleu": 0.010779616104049154
|
| 2419 |
-
},
|
| 2420 |
-
{
|
| 2421 |
-
"model": "mistralai/mistral-large",
|
| 2422 |
-
"bleu": 0.037190408434750306
|
| 2423 |
-
},
|
| 2424 |
-
{
|
| 2425 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 2426 |
-
"bleu": 0.006988505107902836
|
| 2427 |
}
|
| 2428 |
],
|
| 2429 |
-
"bleu": 0.
|
| 2430 |
},
|
| 2431 |
{
|
| 2432 |
"language_name": "Tosk Albanian",
|
|
@@ -3158,28 +2934,12 @@
|
|
| 3158 |
"language_code": "srd",
|
| 3159 |
"speakers": 1300000.0,
|
| 3160 |
"scores": [
|
| 3161 |
-
{
|
| 3162 |
-
"model": "openai/gpt-4o",
|
| 3163 |
-
"bleu": 0.039786729911513496
|
| 3164 |
-
},
|
| 3165 |
{
|
| 3166 |
"model": "anthropic/claude-3.5-sonnet",
|
| 3167 |
"bleu": 0.01817225070836904
|
| 3168 |
-
},
|
| 3169 |
-
{
|
| 3170 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 3171 |
-
"bleu": 0.0
|
| 3172 |
-
},
|
| 3173 |
-
{
|
| 3174 |
-
"model": "mistralai/mistral-large",
|
| 3175 |
-
"bleu": 0.03789690570026145
|
| 3176 |
-
},
|
| 3177 |
-
{
|
| 3178 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 3179 |
-
"bleu": 0.028031956319831585
|
| 3180 |
}
|
| 3181 |
],
|
| 3182 |
-
"bleu": 0.
|
| 3183 |
},
|
| 3184 |
{
|
| 3185 |
"language_name": "Emilian",
|
|
@@ -3285,28 +3045,12 @@
|
|
| 3285 |
"language_code": "ekk",
|
| 3286 |
"speakers": 1164770.0,
|
| 3287 |
"scores": [
|
| 3288 |
-
{
|
| 3289 |
-
"model": "openai/gpt-4o",
|
| 3290 |
-
"bleu": 0.32857986618400864
|
| 3291 |
-
},
|
| 3292 |
{
|
| 3293 |
"model": "anthropic/claude-3.5-sonnet",
|
| 3294 |
"bleu": 0.3107627601397992
|
| 3295 |
-
},
|
| 3296 |
-
{
|
| 3297 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 3298 |
-
"bleu": 0.264677035091384
|
| 3299 |
-
},
|
| 3300 |
-
{
|
| 3301 |
-
"model": "mistralai/mistral-large",
|
| 3302 |
-
"bleu": 0.23351285522302628
|
| 3303 |
-
},
|
| 3304 |
-
{
|
| 3305 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 3306 |
-
"bleu": 0.1151818587731128
|
| 3307 |
}
|
| 3308 |
],
|
| 3309 |
-
"bleu": 0.
|
| 3310 |
},
|
| 3311 |
{
|
| 3312 |
"language_name": "Muong",
|
|
@@ -3548,28 +3292,12 @@
|
|
| 3548 |
"language_code": "quy",
|
| 3549 |
"speakers": 918200.0,
|
| 3550 |
"scores": [
|
| 3551 |
-
{
|
| 3552 |
-
"model": "openai/gpt-4o",
|
| 3553 |
-
"bleu": 0.04264750796981389
|
| 3554 |
-
},
|
| 3555 |
{
|
| 3556 |
"model": "anthropic/claude-3.5-sonnet",
|
| 3557 |
"bleu": 0.07871425240461387
|
| 3558 |
-
},
|
| 3559 |
-
{
|
| 3560 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 3561 |
-
"bleu": 0.016883187863719572
|
| 3562 |
-
},
|
| 3563 |
-
{
|
| 3564 |
-
"model": "mistralai/mistral-large",
|
| 3565 |
-
"bleu": 0.04439823175345493
|
| 3566 |
-
},
|
| 3567 |
-
{
|
| 3568 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 3569 |
-
"bleu": 0.013066506239359036
|
| 3570 |
}
|
| 3571 |
],
|
| 3572 |
-
"bleu": 0.
|
| 3573 |
},
|
| 3574 |
{
|
| 3575 |
"language_name": "Nuer",
|
|
@@ -4145,28 +3873,12 @@
|
|
| 4145 |
"language_code": "szl",
|
| 4146 |
"speakers": 522000.0,
|
| 4147 |
"scores": [
|
| 4148 |
-
{
|
| 4149 |
-
"model": "openai/gpt-4o",
|
| 4150 |
-
"bleu": 0.1560774913216495
|
| 4151 |
-
},
|
| 4152 |
{
|
| 4153 |
"model": "anthropic/claude-3.5-sonnet",
|
| 4154 |
"bleu": 0.17422519619712065
|
| 4155 |
-
},
|
| 4156 |
-
{
|
| 4157 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 4158 |
-
"bleu": 0.12400774609673239
|
| 4159 |
-
},
|
| 4160 |
-
{
|
| 4161 |
-
"model": "mistralai/mistral-large",
|
| 4162 |
-
"bleu": 0.09674030113339326
|
| 4163 |
-
},
|
| 4164 |
-
{
|
| 4165 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 4166 |
-
"bleu": 0.096266902845627
|
| 4167 |
}
|
| 4168 |
],
|
| 4169 |
-
"bleu": 0.
|
| 4170 |
},
|
| 4171 |
{
|
| 4172 |
"language_name": "Flaaitaal",
|
|
@@ -4408,28 +4120,12 @@
|
|
| 4408 |
"language_code": "smo",
|
| 4409 |
"speakers": 415720.0,
|
| 4410 |
"scores": [
|
| 4411 |
-
{
|
| 4412 |
-
"model": "openai/gpt-4o",
|
| 4413 |
-
"bleu": 0.21326629811705108
|
| 4414 |
-
},
|
| 4415 |
{
|
| 4416 |
"model": "anthropic/claude-3.5-sonnet",
|
| 4417 |
"bleu": 0.2024917287660747
|
| 4418 |
-
},
|
| 4419 |
-
{
|
| 4420 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 4421 |
-
"bleu": 0.16385599822945093
|
| 4422 |
-
},
|
| 4423 |
-
{
|
| 4424 |
-
"model": "mistralai/mistral-large",
|
| 4425 |
-
"bleu": 0.08705749106630675
|
| 4426 |
-
},
|
| 4427 |
-
{
|
| 4428 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 4429 |
-
"bleu": 0.021765197503505862
|
| 4430 |
}
|
| 4431 |
],
|
| 4432 |
-
"bleu": 0.
|
| 4433 |
},
|
| 4434 |
{
|
| 4435 |
"language_name": "Sranan Tongo",
|
|
@@ -4682,28 +4378,12 @@
|
|
| 4682 |
"language_code": "pap",
|
| 4683 |
"speakers": 321300.0,
|
| 4684 |
"scores": [
|
| 4685 |
-
{
|
| 4686 |
-
"model": "openai/gpt-4o",
|
| 4687 |
-
"bleu": 0.22785468046191032
|
| 4688 |
-
},
|
| 4689 |
{
|
| 4690 |
"model": "anthropic/claude-3.5-sonnet",
|
| 4691 |
"bleu": 0.28092666579128994
|
| 4692 |
-
},
|
| 4693 |
-
{
|
| 4694 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 4695 |
-
"bleu": 0.22002899232464
|
| 4696 |
-
},
|
| 4697 |
-
{
|
| 4698 |
-
"model": "mistralai/mistral-large",
|
| 4699 |
-
"bleu": 0.11261357394303675
|
| 4700 |
-
},
|
| 4701 |
-
{
|
| 4702 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 4703 |
-
"bleu": 0.06527147003401967
|
| 4704 |
}
|
| 4705 |
],
|
| 4706 |
-
"bleu": 0.
|
| 4707 |
},
|
| 4708 |
{
|
| 4709 |
"language_name": "Kiembu",
|
|
@@ -5196,28 +4876,12 @@
|
|
| 5196 |
"language_code": "ltg",
|
| 5197 |
"speakers": 200000.0,
|
| 5198 |
"scores": [
|
| 5199 |
-
{
|
| 5200 |
-
"model": "openai/gpt-4o",
|
| 5201 |
-
"bleu": 0.06220005456851305
|
| 5202 |
-
},
|
| 5203 |
{
|
| 5204 |
"model": "anthropic/claude-3.5-sonnet",
|
| 5205 |
"bleu": 0.16898752975227693
|
| 5206 |
-
},
|
| 5207 |
-
{
|
| 5208 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 5209 |
-
"bleu": 0.07373773186072476
|
| 5210 |
-
},
|
| 5211 |
-
{
|
| 5212 |
-
"model": "mistralai/mistral-large",
|
| 5213 |
-
"bleu": 0.04077981475688863
|
| 5214 |
-
},
|
| 5215 |
-
{
|
| 5216 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 5217 |
-
"bleu": 0.02988351640091225
|
| 5218 |
}
|
| 5219 |
],
|
| 5220 |
-
"bleu": 0.
|
| 5221 |
},
|
| 5222 |
{
|
| 5223 |
"language_name": "Dongxiang",
|
|
@@ -13903,28 +13567,12 @@
|
|
| 13903 |
"language_code": "arb",
|
| 13904 |
"speakers": 0.0,
|
| 13905 |
"scores": [
|
| 13906 |
-
{
|
| 13907 |
-
"model": "openai/gpt-4o",
|
| 13908 |
-
"bleu": 0.09526855781979184
|
| 13909 |
-
},
|
| 13910 |
{
|
| 13911 |
"model": "anthropic/claude-3.5-sonnet",
|
| 13912 |
"bleu": 0.1160671201312185
|
| 13913 |
-
},
|
| 13914 |
-
{
|
| 13915 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 13916 |
-
"bleu": 0.04096256526230796
|
| 13917 |
-
},
|
| 13918 |
-
{
|
| 13919 |
-
"model": "mistralai/mistral-large",
|
| 13920 |
-
"bleu": 0.07433538408053277
|
| 13921 |
-
},
|
| 13922 |
-
{
|
| 13923 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 13924 |
-
"bleu": 0.012873795681693922
|
| 13925 |
}
|
| 13926 |
],
|
| 13927 |
-
"bleu": 0.
|
| 13928 |
},
|
| 13929 |
{
|
| 13930 |
"language_name": "ocm",
|
|
@@ -22297,28 +21945,12 @@
|
|
| 22297 |
"language_code": "dik",
|
| 22298 |
"speakers": 0,
|
| 22299 |
"scores": [
|
| 22300 |
-
{
|
| 22301 |
-
"model": "openai/gpt-4o",
|
| 22302 |
-
"bleu": 0.0071179726075512725
|
| 22303 |
-
},
|
| 22304 |
{
|
| 22305 |
"model": "anthropic/claude-3.5-sonnet",
|
| 22306 |
"bleu": 0.04004591158378547
|
| 22307 |
-
},
|
| 22308 |
-
{
|
| 22309 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 22310 |
-
"bleu": 0.012071851713508174
|
| 22311 |
-
},
|
| 22312 |
-
{
|
| 22313 |
-
"model": "mistralai/mistral-large",
|
| 22314 |
-
"bleu": 0.021620964225193613
|
| 22315 |
-
},
|
| 22316 |
-
{
|
| 22317 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
| 22318 |
-
"bleu": 0.009725228948559986
|
| 22319 |
}
|
| 22320 |
],
|
| 22321 |
-
"bleu": 0.
|
| 22322 |
},
|
| 22323 |
{
|
| 22324 |
"language_name": "Dilling",
|
|
|
|
| 6 |
"scores": [
|
| 7 |
{
|
| 8 |
"model": "anthropic/claude-3.5-sonnet",
|
| 9 |
+
"bleu": 0.438607997913414
|
| 10 |
}
|
| 11 |
],
|
| 12 |
+
"bleu": 0.438607997913414
|
| 13 |
},
|
| 14 |
{
|
| 15 |
"language_name": "Mandarin Chinese",
|
|
|
|
| 18 |
"scores": [
|
| 19 |
{
|
| 20 |
"model": "anthropic/claude-3.5-sonnet",
|
| 21 |
+
"bleu": 0.28859709196576455
|
| 22 |
}
|
| 23 |
],
|
| 24 |
+
"bleu": 0.28859709196576455
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"language_name": "Spanish",
|
|
|
|
| 30 |
"scores": [
|
| 31 |
{
|
| 32 |
"model": "anthropic/claude-3.5-sonnet",
|
| 33 |
+
"bleu": 0.31529908667129014
|
| 34 |
}
|
| 35 |
],
|
| 36 |
+
"bleu": 0.31529908667129014
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"language_name": "Hindi",
|
|
|
|
| 42 |
"scores": [
|
| 43 |
{
|
| 44 |
"model": "anthropic/claude-3.5-sonnet",
|
| 45 |
+
"bleu": 0.3063790221508274
|
| 46 |
}
|
| 47 |
],
|
| 48 |
+
"bleu": 0.3063790221508274
|
| 49 |
},
|
| 50 |
{
|
| 51 |
"language_name": "Bengali",
|
|
|
|
| 54 |
"scores": [
|
| 55 |
{
|
| 56 |
"model": "anthropic/claude-3.5-sonnet",
|
| 57 |
+
"bleu": 0.2932161392776923
|
| 58 |
}
|
| 59 |
],
|
| 60 |
+
"bleu": 0.2932161392776923
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"language_name": "Portuguese",
|
|
|
|
| 78 |
"scores": [
|
| 79 |
{
|
| 80 |
"model": "anthropic/claude-3.5-sonnet",
|
| 81 |
+
"bleu": 0.4225557942193369
|
| 82 |
}
|
| 83 |
],
|
| 84 |
+
"bleu": 0.4225557942193369
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"language_name": "Indonesian",
|
|
|
|
| 114 |
"scores": [
|
| 115 |
{
|
| 116 |
"model": "anthropic/claude-3.5-sonnet",
|
| 117 |
+
"bleu": 0.3224776412158195
|
| 118 |
}
|
| 119 |
],
|
| 120 |
+
"bleu": 0.3224776412158195
|
| 121 |
},
|
| 122 |
{
|
| 123 |
"language_name": "Eastern Punjabi",
|
|
|
|
| 126 |
"scores": [
|
| 127 |
{
|
| 128 |
"model": "anthropic/claude-3.5-sonnet",
|
| 129 |
+
"bleu": 0.3441710075977166
|
| 130 |
}
|
| 131 |
],
|
| 132 |
+
"bleu": 0.3441710075977166
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"language_name": "Standard German",
|
|
|
|
| 212 |
"language_code": "mar",
|
| 213 |
"speakers": 83100000.0,
|
| 214 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
{
|
| 216 |
"model": "anthropic/claude-3.5-sonnet",
|
| 217 |
"bleu": 0.2333407635240652
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
}
|
| 219 |
],
|
| 220 |
+
"bleu": 0.2333407635240652
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"language_name": "Swahili",
|
|
|
|
| 238 |
"scores": [
|
| 239 |
{
|
| 240 |
"model": "anthropic/claude-3.5-sonnet",
|
| 241 |
+
"bleu": 0.31845792720586547
|
| 242 |
}
|
| 243 |
],
|
| 244 |
+
"bleu": 0.31845792720586547
|
| 245 |
},
|
| 246 |
{
|
| 247 |
"language_name": "Telugu",
|
|
|
|
| 269 |
"scores": [
|
| 270 |
{
|
| 271 |
"model": "anthropic/claude-3.5-sonnet",
|
| 272 |
+
"bleu": 0.28861844740070713
|
| 273 |
}
|
| 274 |
],
|
| 275 |
+
"bleu": 0.28861844740070713
|
| 276 |
},
|
| 277 |
{
|
| 278 |
"language_name": "Vietnamese",
|
|
|
|
| 341 |
"scores": [
|
| 342 |
{
|
| 343 |
"model": "anthropic/claude-3.5-sonnet",
|
| 344 |
+
"bleu": 0.27067019149599314
|
| 345 |
}
|
| 346 |
],
|
| 347 |
+
"bleu": 0.27067019149599314
|
| 348 |
},
|
| 349 |
{
|
| 350 |
"language_name": "Bhojpuri",
|
|
|
|
| 398 |
"language_code": "apc",
|
| 399 |
"speakers": 44000000.0,
|
| 400 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
{
|
| 402 |
"model": "anthropic/claude-3.5-sonnet",
|
| 403 |
"bleu": 0.21013619903144296
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
}
|
| 405 |
],
|
| 406 |
+
"bleu": 0.21013619903144296
|
| 407 |
},
|
| 408 |
{
|
| 409 |
"language_name": "Hausa",
|
|
|
|
| 483 |
"scores": [
|
| 484 |
{
|
| 485 |
"model": "anthropic/claude-3.5-sonnet",
|
| 486 |
+
"bleu": 0.4055817497511186
|
| 487 |
}
|
| 488 |
],
|
| 489 |
+
"bleu": 0.4055817497511186
|
| 490 |
},
|
| 491 |
{
|
| 492 |
"language_name": "Xiang Chinese",
|
|
|
|
| 621 |
"scores": [
|
| 622 |
{
|
| 623 |
"model": "anthropic/claude-3.5-sonnet",
|
| 624 |
+
"bleu": 0.17452025039334695
|
| 625 |
}
|
| 626 |
],
|
| 627 |
+
"bleu": 0.17452025039334695
|
| 628 |
},
|
| 629 |
{
|
| 630 |
"language_name": "Northern Uzbek",
|
|
|
|
| 667 |
"language_code": "apc",
|
| 668 |
"speakers": 24600000.0,
|
| 669 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
{
|
| 671 |
"model": "anthropic/claude-3.5-sonnet",
|
| 672 |
"bleu": 0.21013619903144296
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
}
|
| 674 |
],
|
| 675 |
+
"bleu": 0.21013619903144296
|
| 676 |
},
|
| 677 |
{
|
| 678 |
"language_name": "Romanian",
|
|
|
|
| 869 |
"language_code": "zul",
|
| 870 |
"speakers": 15700000.0,
|
| 871 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 872 |
{
|
| 873 |
"model": "anthropic/claude-3.5-sonnet",
|
| 874 |
"bleu": 0.20245371733247658
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 875 |
}
|
| 876 |
],
|
| 877 |
+
"bleu": 0.20245371733247658
|
| 878 |
},
|
| 879 |
{
|
| 880 |
"language_name": "Mesopotamian Arabic",
|
|
|
|
| 1040 |
"language_code": "kaz",
|
| 1041 |
"speakers": 13161980.0,
|
| 1042 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1043 |
{
|
| 1044 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1045 |
"bleu": 0.25054345947985385
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1046 |
}
|
| 1047 |
],
|
| 1048 |
+
"bleu": 0.25054345947985385
|
| 1049 |
},
|
| 1050 |
{
|
| 1051 |
"language_name": "Chittagonian",
|
|
|
|
| 1059 |
"language_code": "tso",
|
| 1060 |
"speakers": 13000000.0,
|
| 1061 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1062 |
{
|
| 1063 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1064 |
"bleu": 0.1952038863089787
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1065 |
}
|
| 1066 |
],
|
| 1067 |
+
"bleu": 0.1952038863089787
|
| 1068 |
},
|
| 1069 |
{
|
| 1070 |
"language_name": "Hungarian",
|
|
|
|
| 1083 |
"language_code": "kin",
|
| 1084 |
"speakers": 12100000.0,
|
| 1085 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1086 |
{
|
| 1087 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1088 |
"bleu": 0.21847668603031067
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1089 |
}
|
| 1090 |
],
|
| 1091 |
+
"bleu": 0.21847668603031067
|
| 1092 |
},
|
| 1093 |
{
|
| 1094 |
"language_name": "Chichewa",
|
| 1095 |
"language_code": "nya",
|
| 1096 |
"speakers": 12000000.0,
|
| 1097 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1098 |
{
|
| 1099 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1100 |
"bleu": 0.17401113784791736
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1101 |
}
|
| 1102 |
],
|
| 1103 |
+
"bleu": 0.17401113784791736
|
| 1104 |
},
|
| 1105 |
{
|
| 1106 |
"language_name": "Tunisian Arabic",
|
|
|
|
| 1266 |
"language_code": "hat",
|
| 1267 |
"speakers": 9600000.0,
|
| 1268 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1269 |
{
|
| 1270 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1271 |
+
"bleu": 0.2780257097562799
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1272 |
}
|
| 1273 |
],
|
| 1274 |
+
"bleu": 0.2780257097562799
|
| 1275 |
},
|
| 1276 |
{
|
| 1277 |
"language_name": "Dari",
|
|
|
|
| 1314 |
"language_code": "azj",
|
| 1315 |
"speakers": 9220610.0,
|
| 1316 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1317 |
{
|
| 1318 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1319 |
"bleu": 0.24029548337141315
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1320 |
}
|
| 1321 |
],
|
| 1322 |
+
"bleu": 0.24029548337141315
|
| 1323 |
},
|
| 1324 |
{
|
| 1325 |
"language_name": "Congo Swahili",
|
|
|
|
| 1537 |
"language_code": "kas",
|
| 1538 |
"speakers": 6900000.0,
|
| 1539 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1540 |
{
|
| 1541 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1542 |
"bleu": 0.06589195125918151
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1543 |
}
|
| 1544 |
],
|
| 1545 |
+
"bleu": 0.06589195125918151
|
| 1546 |
},
|
| 1547 |
{
|
| 1548 |
"language_name": "Armenian",
|
|
|
|
| 1563 |
"scores": [
|
| 1564 |
{
|
| 1565 |
"model": "openai/gpt-4o",
|
| 1566 |
+
"bleu": 0.0315377397347853
|
| 1567 |
},
|
| 1568 |
{
|
| 1569 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1570 |
+
"bleu": 0.09333641225693347
|
| 1571 |
},
|
| 1572 |
{
|
| 1573 |
"model": "meta-llama/llama-3.1-405b-instruct",
|
| 1574 |
+
"bleu": 0.030121023774013433
|
| 1575 |
},
|
| 1576 |
{
|
| 1577 |
"model": "mistralai/mistral-large",
|
| 1578 |
+
"bleu": 0.04230836190600749
|
| 1579 |
},
|
| 1580 |
{
|
| 1581 |
"model": "qwen/qwen-2.5-72b-instruct",
|
| 1582 |
+
"bleu": 0.027727350391206936
|
| 1583 |
}
|
| 1584 |
],
|
| 1585 |
+
"bleu": 0.04500617761258932
|
| 1586 |
},
|
| 1587 |
{
|
| 1588 |
"language_name": "Central Pashto",
|
|
|
|
| 1957 |
"language_code": "lit",
|
| 1958 |
"speakers": 4000000.0,
|
| 1959 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1960 |
{
|
| 1961 |
"model": "anthropic/claude-3.5-sonnet",
|
| 1962 |
"bleu": 0.30370845804188434
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1963 |
}
|
| 1964 |
],
|
| 1965 |
+
"bleu": 0.30370845804188434
|
| 1966 |
},
|
| 1967 |
{
|
| 1968 |
"language_name": "Tok Pisin",
|
|
|
|
| 2100 |
"language_code": "ace",
|
| 2101 |
"speakers": 3500032.0,
|
| 2102 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2103 |
{
|
| 2104 |
"model": "anthropic/claude-3.5-sonnet",
|
| 2105 |
"bleu": 0.06711853873605253
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2106 |
}
|
| 2107 |
],
|
| 2108 |
+
"bleu": 0.06711853873605253
|
| 2109 |
},
|
| 2110 |
{
|
| 2111 |
"language_name": "Banjar",
|
|
|
|
| 2197 |
"language_code": "ewe",
|
| 2198 |
"speakers": 3000000.0,
|
| 2199 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2200 |
{
|
| 2201 |
"model": "anthropic/claude-3.5-sonnet",
|
| 2202 |
"bleu": 0.08106169448483001
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2203 |
}
|
| 2204 |
],
|
| 2205 |
+
"bleu": 0.08106169448483001
|
| 2206 |
},
|
| 2207 |
{
|
| 2208 |
"language_name": "Tosk Albanian",
|
|
|
|
| 2934 |
"language_code": "srd",
|
| 2935 |
"speakers": 1300000.0,
|
| 2936 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2937 |
{
|
| 2938 |
"model": "anthropic/claude-3.5-sonnet",
|
| 2939 |
"bleu": 0.01817225070836904
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2940 |
}
|
| 2941 |
],
|
| 2942 |
+
"bleu": 0.01817225070836904
|
| 2943 |
},
|
| 2944 |
{
|
| 2945 |
"language_name": "Emilian",
|
|
|
|
| 3045 |
"language_code": "ekk",
|
| 3046 |
"speakers": 1164770.0,
|
| 3047 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3048 |
{
|
| 3049 |
"model": "anthropic/claude-3.5-sonnet",
|
| 3050 |
"bleu": 0.3107627601397992
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3051 |
}
|
| 3052 |
],
|
| 3053 |
+
"bleu": 0.3107627601397992
|
| 3054 |
},
|
| 3055 |
{
|
| 3056 |
"language_name": "Muong",
|
|
|
|
| 3292 |
"language_code": "quy",
|
| 3293 |
"speakers": 918200.0,
|
| 3294 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3295 |
{
|
| 3296 |
"model": "anthropic/claude-3.5-sonnet",
|
| 3297 |
"bleu": 0.07871425240461387
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3298 |
}
|
| 3299 |
],
|
| 3300 |
+
"bleu": 0.07871425240461387
|
| 3301 |
},
|
| 3302 |
{
|
| 3303 |
"language_name": "Nuer",
|
|
|
|
| 3873 |
"language_code": "szl",
|
| 3874 |
"speakers": 522000.0,
|
| 3875 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3876 |
{
|
| 3877 |
"model": "anthropic/claude-3.5-sonnet",
|
| 3878 |
"bleu": 0.17422519619712065
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3879 |
}
|
| 3880 |
],
|
| 3881 |
+
"bleu": 0.17422519619712065
|
| 3882 |
},
|
| 3883 |
{
|
| 3884 |
"language_name": "Flaaitaal",
|
|
|
|
| 4120 |
"language_code": "smo",
|
| 4121 |
"speakers": 415720.0,
|
| 4122 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4123 |
{
|
| 4124 |
"model": "anthropic/claude-3.5-sonnet",
|
| 4125 |
"bleu": 0.2024917287660747
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4126 |
}
|
| 4127 |
],
|
| 4128 |
+
"bleu": 0.2024917287660747
|
| 4129 |
},
|
| 4130 |
{
|
| 4131 |
"language_name": "Sranan Tongo",
|
|
|
|
| 4378 |
"language_code": "pap",
|
| 4379 |
"speakers": 321300.0,
|
| 4380 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4381 |
{
|
| 4382 |
"model": "anthropic/claude-3.5-sonnet",
|
| 4383 |
"bleu": 0.28092666579128994
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4384 |
}
|
| 4385 |
],
|
| 4386 |
+
"bleu": 0.28092666579128994
|
| 4387 |
},
|
| 4388 |
{
|
| 4389 |
"language_name": "Kiembu",
|
|
|
|
| 4876 |
"language_code": "ltg",
|
| 4877 |
"speakers": 200000.0,
|
| 4878 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4879 |
{
|
| 4880 |
"model": "anthropic/claude-3.5-sonnet",
|
| 4881 |
"bleu": 0.16898752975227693
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4882 |
}
|
| 4883 |
],
|
| 4884 |
+
"bleu": 0.16898752975227693
|
| 4885 |
},
|
| 4886 |
{
|
| 4887 |
"language_name": "Dongxiang",
|
|
|
|
| 13567 |
"language_code": "arb",
|
| 13568 |
"speakers": 0.0,
|
| 13569 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13570 |
{
|
| 13571 |
"model": "anthropic/claude-3.5-sonnet",
|
| 13572 |
"bleu": 0.1160671201312185
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13573 |
}
|
| 13574 |
],
|
| 13575 |
+
"bleu": 0.1160671201312185
|
| 13576 |
},
|
| 13577 |
{
|
| 13578 |
"language_name": "ocm",
|
|
|
|
| 21945 |
"language_code": "dik",
|
| 21946 |
"speakers": 0,
|
| 21947 |
"scores": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21948 |
{
|
| 21949 |
"model": "anthropic/claude-3.5-sonnet",
|
| 21950 |
"bleu": 0.04004591158378547
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21951 |
}
|
| 21952 |
],
|
| 21953 |
+
"bleu": 0.04004591158378547
|
| 21954 |
},
|
| 21955 |
{
|
| 21956 |
"language_name": "Dilling",
|