David Pomerenke
commited on
Commit
·
6b6f157
1
Parent(s):
86b8b3a
Display all languages and translate from multiple languages
Browse files- bibliography.bib +6 -0
- index.html +15 -18
- languagebench.py +131 -75
- pyproject.toml +5 -0
- results.json +0 -0
- results_summary.json +0 -1202
- uv.lock +119 -0
bibliography.bib
CHANGED
|
@@ -243,6 +243,12 @@
|
|
| 243 |
file = {/Users/david/Zotero/storage/VU6IFENR/Siminyu et al. - 2021 - AI4D -- African Language Program.pdf;/Users/david/Zotero/storage/7TV2PS8J/2104.html}
|
| 244 |
}
|
| 245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
@misc{teamNoLanguageLeft2022,
|
| 247 |
title = {No {{Language Left Behind}}: {{Scaling Human-Centered Machine Translation}}},
|
| 248 |
shorttitle = {No {{Language Left Behind}}},
|
|
|
|
| 243 |
file = {/Users/david/Zotero/storage/VU6IFENR/Siminyu et al. - 2021 - AI4D -- African Language Program.pdf;/Users/david/Zotero/storage/7TV2PS8J/2104.html}
|
| 244 |
}
|
| 245 |
|
| 246 |
+
@misc{Tatoeba,
|
| 247 |
+
title = {Tatoeba},
|
| 248 |
+
urldate = {2024-11-03},
|
| 249 |
+
file = {/Users/david/Zotero/storage/4NDTCGWG/sentences_by_language.html}
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
@misc{teamNoLanguageLeft2022,
|
| 253 |
title = {No {{Language Left Behind}}: {{Scaling Human-Centered Machine Translation}}},
|
| 254 |
shorttitle = {No {{Language Left Behind}}},
|
index.html
CHANGED
|
@@ -39,42 +39,43 @@
|
|
| 39 |
const scoreName = "BLEU Score"
|
| 40 |
const chartsDiv = document.getElementById('charts');
|
| 41 |
|
| 42 |
-
const
|
| 43 |
-
const
|
| 44 |
// Format captions
|
| 45 |
-
const
|
|
|
|
| 46 |
|
| 47 |
// Create summary plot
|
| 48 |
const summaryPlot = Plot.plot({
|
| 49 |
-
width:
|
| 50 |
height: 400,
|
| 51 |
marginBottom: 100,
|
| 52 |
x: { label: "Number of speakers", axis: null },
|
| 53 |
y: { label: `${scoreName} (average across models)` },
|
| 54 |
// color: { scheme: "BrBG" },
|
| 55 |
marks: [
|
| 56 |
-
Plot.rectY(
|
| 57 |
x: "speakers",
|
| 58 |
order: scoreKey,
|
| 59 |
reverse: true,
|
| 60 |
y2: scoreKey, // y2 to avoid stacking by y
|
| 61 |
title: formatTitle,
|
| 62 |
tip: true,
|
| 63 |
-
|
| 64 |
})),
|
| 65 |
-
Plot.rectY(
|
| 66 |
x: "speakers",
|
| 67 |
order: scoreKey,
|
| 68 |
reverse: true,
|
| 69 |
y2: scoreKey, // y2 to avoid stacking by y
|
| 70 |
fill: "grey",
|
| 71 |
}))),
|
| 72 |
-
Plot.text(
|
| 73 |
x: "speakers",
|
| 74 |
y2: scoreKey,
|
| 75 |
order: scoreKey,
|
| 76 |
reverse: true,
|
| 77 |
-
text: "
|
| 78 |
frameAnchor: "bottom",
|
| 79 |
textAnchor: "end",
|
| 80 |
dy: 10,
|
|
@@ -87,14 +88,11 @@
|
|
| 87 |
// Add summary plot at the top
|
| 88 |
chartsDiv.insertBefore(summaryPlot, chartsDiv.firstChild);
|
| 89 |
|
| 90 |
-
const response = await fetch('results.json');
|
| 91 |
-
const results = await response.json();
|
| 92 |
-
|
| 93 |
// Get unique languages with their speaker counts
|
| 94 |
const languageMap = new Map();
|
| 95 |
-
|
| 96 |
-
if (!languageMap.has(r.
|
| 97 |
-
languageMap.set(r.
|
| 98 |
}
|
| 99 |
});
|
| 100 |
|
|
@@ -122,7 +120,7 @@
|
|
| 122 |
headerDiv.appendChild(speakerP);
|
| 123 |
chartsDiv.appendChild(headerDiv);
|
| 124 |
|
| 125 |
-
const languageData =
|
| 126 |
|
| 127 |
const descriptor = code => {
|
| 128 |
let [org, model] = code.split("/")
|
|
@@ -130,8 +128,7 @@
|
|
| 130 |
}
|
| 131 |
|
| 132 |
// Plot for how well the models perform on this language
|
| 133 |
-
if (languageData.length >
|
| 134 |
-
console.log(languageData);
|
| 135 |
const plot = Plot.plot({
|
| 136 |
width: 400,
|
| 137 |
height: 200,
|
|
|
|
| 39 |
const scoreName = "BLEU Score"
|
| 40 |
const chartsDiv = document.getElementById('charts');
|
| 41 |
|
| 42 |
+
const response = await fetch('results.json');
|
| 43 |
+
const data = await response.json();
|
| 44 |
// Format captions
|
| 45 |
+
const formatScore = (score) => score > 0 ? score.toFixed(2) : "No benchmark available!"
|
| 46 |
+
const formatTitle = d => (d.language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\n" + scoreName + ": " + formatScore(d[scoreKey]))
|
| 47 |
|
| 48 |
// Create summary plot
|
| 49 |
const summaryPlot = Plot.plot({
|
| 50 |
+
width: chartsDiv.clientWidth,
|
| 51 |
height: 400,
|
| 52 |
marginBottom: 100,
|
| 53 |
x: { label: "Number of speakers", axis: null },
|
| 54 |
y: { label: `${scoreName} (average across models)` },
|
| 55 |
// color: { scheme: "BrBG" },
|
| 56 |
marks: [
|
| 57 |
+
Plot.rectY(data, Plot.stackX({
|
| 58 |
x: "speakers",
|
| 59 |
order: scoreKey,
|
| 60 |
reverse: true,
|
| 61 |
y2: scoreKey, // y2 to avoid stacking by y
|
| 62 |
title: formatTitle,
|
| 63 |
tip: true,
|
| 64 |
+
fill: d => d[scoreKey] > 0 ? "black" : "pink"
|
| 65 |
})),
|
| 66 |
+
Plot.rectY(data, Plot.pointerX(Plot.stackX({
|
| 67 |
x: "speakers",
|
| 68 |
order: scoreKey,
|
| 69 |
reverse: true,
|
| 70 |
y2: scoreKey, // y2 to avoid stacking by y
|
| 71 |
fill: "grey",
|
| 72 |
}))),
|
| 73 |
+
Plot.text(data, Plot.stackX({
|
| 74 |
x: "speakers",
|
| 75 |
y2: scoreKey,
|
| 76 |
order: scoreKey,
|
| 77 |
reverse: true,
|
| 78 |
+
text: "language_name",
|
| 79 |
frameAnchor: "bottom",
|
| 80 |
textAnchor: "end",
|
| 81 |
dy: 10,
|
|
|
|
| 88 |
// Add summary plot at the top
|
| 89 |
chartsDiv.insertBefore(summaryPlot, chartsDiv.firstChild);
|
| 90 |
|
|
|
|
|
|
|
|
|
|
| 91 |
// Get unique languages with their speaker counts
|
| 92 |
const languageMap = new Map();
|
| 93 |
+
data.forEach(r => {
|
| 94 |
+
if (!languageMap.has(r.language_name)) {
|
| 95 |
+
languageMap.set(r.language_name, r.speakers);
|
| 96 |
}
|
| 97 |
});
|
| 98 |
|
|
|
|
| 120 |
headerDiv.appendChild(speakerP);
|
| 121 |
chartsDiv.appendChild(headerDiv);
|
| 122 |
|
| 123 |
+
const languageData = data.filter(r => r.language_name === language)[0]["scores"];
|
| 124 |
|
| 125 |
const descriptor = code => {
|
| 126 |
let [org, model] = code.split("/")
|
|
|
|
| 128 |
}
|
| 129 |
|
| 130 |
// Plot for how well the models perform on this language
|
| 131 |
+
if (languageData && languageData.length > 1) {
|
|
|
|
| 132 |
const plot = Plot.plot({
|
| 133 |
width: 400,
|
| 134 |
height: 200,
|
languagebench.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import asyncio
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
-
import random
|
| 5 |
from os import getenv
|
| 6 |
|
| 7 |
import evaluate
|
|
@@ -12,22 +11,19 @@ from dotenv import load_dotenv
|
|
| 12 |
from joblib.memory import Memory
|
| 13 |
from openai import AsyncOpenAI
|
| 14 |
from tqdm.asyncio import tqdm_asyncio
|
|
|
|
| 15 |
|
| 16 |
# config
|
| 17 |
models = [
|
| 18 |
-
"openai/gpt-4o
|
| 19 |
"anthropic/claude-3.5-sonnet",
|
| 20 |
-
"meta-llama/llama-3.1-
|
| 21 |
-
"mistralai/mistral-
|
| 22 |
# "google/gemini-flash-1.5", # very fast
|
| 23 |
"qwen/qwen-2.5-72b-instruct", # somewhat slow
|
| 24 |
]
|
| 25 |
fast_model = "anthropic/claude-3.5-sonnet"
|
| 26 |
-
|
| 27 |
-
dataset = "floresp-v2.0-rc.3/dev"
|
| 28 |
-
random.seed(42)
|
| 29 |
-
target_languages = [f.split(".")[1] for f in os.listdir(dataset)]
|
| 30 |
-
detailed_target_languages = random.choices(target_languages, k=5)
|
| 31 |
|
| 32 |
# setup
|
| 33 |
load_dotenv()
|
|
@@ -36,9 +32,10 @@ client = AsyncOpenAI(
|
|
| 36 |
api_key=getenv("OPENROUTER_API_KEY"),
|
| 37 |
)
|
| 38 |
cache = Memory(location=".cache", verbose=0).cache
|
| 39 |
-
bleu = evaluate.load("
|
| 40 |
bertscore = evaluate.load("bertscore")
|
| 41 |
-
|
|
|
|
| 42 |
|
| 43 |
|
| 44 |
def reorder(language_name):
|
|
@@ -47,10 +44,65 @@ def reorder(language_name):
|
|
| 47 |
return language_name
|
| 48 |
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
# utils
|
|
@@ -94,73 +146,77 @@ async def translate(model, target_language, target_script, sentence):
|
|
| 94 |
return reply.choices[0].message.content
|
| 95 |
|
| 96 |
|
| 97 |
-
def
|
| 98 |
-
|
| 99 |
-
script = script.split("_", 1)[0]
|
| 100 |
-
stats = language_stats[language_stats["iso639_3"] == lang]
|
| 101 |
-
if not stats.empty:
|
| 102 |
-
stats = stats.iloc[0].to_dict()
|
| 103 |
-
else:
|
| 104 |
-
stats = dict()
|
| 105 |
-
stats["script"] = script_names[script_names["Code"] == script]["English Name"].iloc[
|
| 106 |
-
0
|
| 107 |
-
]
|
| 108 |
-
name_series = language_names[language_names["LangID"] == lang]["Name"]
|
| 109 |
-
stats["name"] = (
|
| 110 |
-
name_series.iloc[0]
|
| 111 |
-
if not name_series.empty
|
| 112 |
-
else stats.get("itemLabel_en") or stats.get("itemLabel", lang)
|
| 113 |
-
)
|
| 114 |
-
return stats
|
| 115 |
|
| 116 |
|
| 117 |
-
def
|
| 118 |
-
return
|
|
|
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
# evaluation!
|
| 122 |
async def main():
|
| 123 |
-
n = 30
|
| 124 |
results = []
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
|
| 166 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import json
|
| 3 |
import os
|
|
|
|
| 4 |
from os import getenv
|
| 5 |
|
| 6 |
import evaluate
|
|
|
|
| 11 |
from joblib.memory import Memory
|
| 12 |
from openai import AsyncOpenAI
|
| 13 |
from tqdm.asyncio import tqdm_asyncio
|
| 14 |
+
from transformers import NllbTokenizer
|
| 15 |
|
| 16 |
# config
|
| 17 |
models = [
|
| 18 |
+
"openai/gpt-4o",
|
| 19 |
"anthropic/claude-3.5-sonnet",
|
| 20 |
+
"meta-llama/llama-3.1-405b-instruct", # lots of slow repetitions for LRLs
|
| 21 |
+
"mistralai/mistral-large",
|
| 22 |
# "google/gemini-flash-1.5", # very fast
|
| 23 |
"qwen/qwen-2.5-72b-instruct", # somewhat slow
|
| 24 |
]
|
| 25 |
fast_model = "anthropic/claude-3.5-sonnet"
|
| 26 |
+
n_sentences = 30
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# setup
|
| 29 |
load_dotenv()
|
|
|
|
| 32 |
api_key=getenv("OPENROUTER_API_KEY"),
|
| 33 |
)
|
| 34 |
cache = Memory(location=".cache", verbose=0).cache
|
| 35 |
+
bleu = evaluate.load("bleu")
|
| 36 |
bertscore = evaluate.load("bertscore")
|
| 37 |
+
tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
| 38 |
+
rate_limit = AsyncLimiter(max_rate=20, time_period=1)
|
| 39 |
|
| 40 |
|
| 41 |
def reorder(language_name):
|
|
|
|
| 44 |
return language_name
|
| 45 |
|
| 46 |
|
| 47 |
+
# load benchmark languages and scripts
|
| 48 |
+
benchmark_dir = "floresp-v2.0-rc.3/dev"
|
| 49 |
+
benchmark_languages = pd.DataFrame(
|
| 50 |
+
[f.split(".")[1].split("_", 1) for f in os.listdir(benchmark_dir)],
|
| 51 |
+
columns=["language_code", "script_code"],
|
| 52 |
+
)
|
| 53 |
+
# hack: drop additional script codes for languages with multiple scripts
|
| 54 |
+
benchmark_languages = benchmark_languages.groupby("language_code").head(1)
|
| 55 |
+
benchmark_languages["in_benchmark"] = True
|
| 56 |
+
|
| 57 |
+
# load Ethnologue language names
|
| 58 |
+
language_names = (
|
| 59 |
+
pd.read_csv("LanguageCodes.tab", sep="\t")
|
| 60 |
+
.rename(columns={"LangID": "language_code", "Name": "language_name"})[
|
| 61 |
+
["language_code", "language_name"]
|
| 62 |
+
]
|
| 63 |
+
.assign(language_name=lambda df: df["language_name"].apply(reorder).str.strip())
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# load Wikidata speaker stats
|
| 67 |
+
language_stats = (
|
| 68 |
+
pd.read_csv("languages.tsv", sep="\t")
|
| 69 |
+
.rename(columns={"iso639_3": "language_code", "maxSpeakers": "speakers"})[
|
| 70 |
+
["language_code", "speakers"]
|
| 71 |
+
]
|
| 72 |
+
.dropna(subset=["language_code"])
|
| 73 |
+
)
|
| 74 |
+
language_stats["speakers"] = pd.to_numeric(language_stats["speakers"], errors="coerce")
|
| 75 |
+
ignored_languages = [
|
| 76 |
+
"zho", # Chinese -> use Mandarin (cmn) instead
|
| 77 |
+
"ara", # Arabic -> use Standard Arabic (arb) instead
|
| 78 |
+
"pus", # Pashto -> use Nothern / Central / Southern Pashto instead (pbt / pst / pbu)
|
| 79 |
+
"fas", # Persian -> use Iranian Persian (pes) instead
|
| 80 |
+
"msa", # Malay -> use Indonesian (ind) instead
|
| 81 |
+
]
|
| 82 |
+
language_stats = language_stats[
|
| 83 |
+
~language_stats["language_code"].isin(ignored_languages)
|
| 84 |
+
]
|
| 85 |
+
|
| 86 |
+
# load unicode script names
|
| 87 |
+
script_names = pd.read_csv("ScriptCodes.csv").rename(
|
| 88 |
+
columns={"Code": "script_code", "English Name": "script_name"}
|
| 89 |
+
)[["script_code", "script_name"]]
|
| 90 |
+
|
| 91 |
+
# merge data
|
| 92 |
+
languages = pd.merge(language_stats, language_names, on="language_code", how="outer")
|
| 93 |
+
languages = pd.merge(benchmark_languages, languages, on="language_code", how="outer")
|
| 94 |
+
languages = pd.merge(languages, script_names, on="script_code", how="left")
|
| 95 |
+
languages["in_benchmark"] = languages["in_benchmark"].fillna(False)
|
| 96 |
+
languages = languages.sort_values(by="speakers", ascending=False)
|
| 97 |
+
|
| 98 |
+
# sample languages to translate from
|
| 99 |
+
original_languages = languages[languages["in_benchmark"]].sample(
|
| 100 |
+
n=n_sentences, weights="speakers", replace=True, random_state=42
|
| 101 |
+
)
|
| 102 |
+
# sample languages to analyze with all models
|
| 103 |
+
detailed_target_languages = languages[languages["in_benchmark"]].sample(
|
| 104 |
+
n=25, random_state=42
|
| 105 |
+
)
|
| 106 |
|
| 107 |
|
| 108 |
# utils
|
|
|
|
| 146 |
return reply.choices[0].message.content
|
| 147 |
|
| 148 |
|
| 149 |
+
def mean(l):
|
| 150 |
+
return sum(l) / len(l) if l else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
+
def load_sentences(language):
|
| 154 |
+
return open(
|
| 155 |
+
f"{benchmark_dir}/dev.{language.language_code}_{language.script_code}"
|
| 156 |
+
).readlines()
|
| 157 |
|
| 158 |
|
| 159 |
# evaluation!
|
| 160 |
async def main():
|
|
|
|
| 161 |
results = []
|
| 162 |
+
for language in languages.itertuples():
|
| 163 |
+
name = (
|
| 164 |
+
language.language_name
|
| 165 |
+
if not pd.isna(language.language_name)
|
| 166 |
+
else language.language_code
|
| 167 |
+
)
|
| 168 |
+
print(name)
|
| 169 |
+
scores = []
|
| 170 |
+
if language.in_benchmark:
|
| 171 |
+
target_sentences = load_sentences(language)[:n_sentences]
|
| 172 |
+
for model in models:
|
| 173 |
+
if (
|
| 174 |
+
model != fast_model
|
| 175 |
+
and language.language_code
|
| 176 |
+
not in detailed_target_languages.language_code.values
|
| 177 |
+
):
|
| 178 |
+
continue
|
| 179 |
+
original_sentences = [
|
| 180 |
+
load_sentences(lang)[i]
|
| 181 |
+
for i, lang in enumerate(original_languages.itertuples())
|
| 182 |
+
]
|
| 183 |
+
print(model)
|
| 184 |
+
predictions = [
|
| 185 |
+
translate(
|
| 186 |
+
model, language.language_name, language.script_name, sentence
|
| 187 |
+
)
|
| 188 |
+
for sentence in original_sentences
|
| 189 |
+
]
|
| 190 |
+
predictions = await tqdm_asyncio.gather(*predictions, miniters=1)
|
| 191 |
+
metrics_bleu = bleu.compute(
|
| 192 |
+
predictions=predictions,
|
| 193 |
+
references=target_sentences,
|
| 194 |
+
tokenizer=tokenizer.tokenize,
|
| 195 |
+
)
|
| 196 |
+
# metrics_bert = bertscore.compute(
|
| 197 |
+
# predictions=predictions,
|
| 198 |
+
# references=target_sentences,
|
| 199 |
+
# model_type="distilbert-base-uncased",
|
| 200 |
+
# )
|
| 201 |
+
scores.append(
|
| 202 |
+
{
|
| 203 |
+
"model": model,
|
| 204 |
+
"bleu": metrics_bleu["bleu"],
|
| 205 |
+
# "bert_score": mean(metrics_bert["f1"]),
|
| 206 |
+
}
|
| 207 |
+
)
|
| 208 |
+
results.append(
|
| 209 |
+
{
|
| 210 |
+
"language_name": name,
|
| 211 |
+
"language_code": language.language_code,
|
| 212 |
+
"speakers": language.speakers if not pd.isna(language.speakers) else 0,
|
| 213 |
+
"scores": scores,
|
| 214 |
+
"bleu": mean([s["bleu"] for s in scores]) or -0.02,
|
| 215 |
+
# "bert_score": mean([s["bert_score"] for s in scores]),
|
| 216 |
+
}
|
| 217 |
+
)
|
| 218 |
+
with open("results.json", "w") as f:
|
| 219 |
+
json.dump(results, f, indent=2, ensure_ascii=False)
|
| 220 |
|
| 221 |
|
| 222 |
if __name__ == "__main__":
|
pyproject.toml
CHANGED
|
@@ -9,9 +9,14 @@ dependencies = [
|
|
| 9 |
"bert-score>=0.3.13",
|
| 10 |
"evaluate>=0.4.3",
|
| 11 |
"joblib>=1.4.2",
|
|
|
|
| 12 |
"openai>=1.52.2",
|
| 13 |
"pandas>=2.2.3",
|
|
|
|
| 14 |
"python-dotenv>=1.0.1",
|
| 15 |
"sacrebleu>=2.4.3",
|
|
|
|
|
|
|
| 16 |
"tqdm>=4.66.6",
|
|
|
|
| 17 |
]
|
|
|
|
| 9 |
"bert-score>=0.3.13",
|
| 10 |
"evaluate>=0.4.3",
|
| 11 |
"joblib>=1.4.2",
|
| 12 |
+
"nltk>=3.9.1",
|
| 13 |
"openai>=1.52.2",
|
| 14 |
"pandas>=2.2.3",
|
| 15 |
+
"protobuf>=5.28.3",
|
| 16 |
"python-dotenv>=1.0.1",
|
| 17 |
"sacrebleu>=2.4.3",
|
| 18 |
+
"sentencepiece>=0.2.0",
|
| 19 |
+
"tiktoken>=0.8.0",
|
| 20 |
"tqdm>=4.66.6",
|
| 21 |
+
"transformers>=4.46.1",
|
| 22 |
]
|
results.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results_summary.json
DELETED
|
@@ -1,1202 +0,0 @@
|
|
| 1 |
-
[
|
| 2 |
-
{
|
| 3 |
-
"target_language_name":"Aceh",
|
| 4 |
-
"bleu":39.1659660901,
|
| 5 |
-
"bert_score":0.8998966595,
|
| 6 |
-
"speakers":3500032.0
|
| 7 |
-
},
|
| 8 |
-
{
|
| 9 |
-
"target_language_name":"Afrikaans",
|
| 10 |
-
"bleu":76.8900540777,
|
| 11 |
-
"bert_score":0.9481831173,
|
| 12 |
-
"speakers":10300000.0
|
| 13 |
-
},
|
| 14 |
-
{
|
| 15 |
-
"target_language_name":"Amharic",
|
| 16 |
-
"bleu":43.1544568697,
|
| 17 |
-
"bert_score":0.989116921,
|
| 18 |
-
"speakers":25000000.0
|
| 19 |
-
},
|
| 20 |
-
{
|
| 21 |
-
"target_language_name":"Armenian",
|
| 22 |
-
"bleu":64.6804400806,
|
| 23 |
-
"bert_score":0.9550812801,
|
| 24 |
-
"speakers":6700000.0
|
| 25 |
-
},
|
| 26 |
-
{
|
| 27 |
-
"target_language_name":"Assamese",
|
| 28 |
-
"bleu":47.0351331605,
|
| 29 |
-
"bert_score":0.928119574,
|
| 30 |
-
"speakers":15300000.0
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"target_language_name":"Asturian",
|
| 34 |
-
"bleu":71.3445623493,
|
| 35 |
-
"bert_score":0.931475842,
|
| 36 |
-
"speakers":450000.0
|
| 37 |
-
},
|
| 38 |
-
{
|
| 39 |
-
"target_language_name":"Awadhi",
|
| 40 |
-
"bleu":46.0797144146,
|
| 41 |
-
"bert_score":0.9333642821,
|
| 42 |
-
"speakers":22000000.0
|
| 43 |
-
},
|
| 44 |
-
{
|
| 45 |
-
"target_language_name":"Ayacucho Quechua",
|
| 46 |
-
"bleu":45.6534927028,
|
| 47 |
-
"bert_score":0.8731370111,
|
| 48 |
-
"speakers":918200.0
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"target_language_name":"Bali (Indonesia)",
|
| 52 |
-
"bleu":52.8752419159,
|
| 53 |
-
"bert_score":0.8934772114,
|
| 54 |
-
"speakers":4000000.0
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"target_language_name":"Bamanankan",
|
| 58 |
-
"bleu":38.6939091408,
|
| 59 |
-
"bert_score":0.8872043769,
|
| 60 |
-
"speakers":2700000.0
|
| 61 |
-
},
|
| 62 |
-
{
|
| 63 |
-
"target_language_name":"Banjar",
|
| 64 |
-
"bleu":46.5453977487,
|
| 65 |
-
"bert_score":0.91599799,
|
| 66 |
-
"speakers":3500000.0
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"target_language_name":"Bashkort",
|
| 70 |
-
"bleu":57.5453842927,
|
| 71 |
-
"bert_score":0.9298217595,
|
| 72 |
-
"speakers":1200000.0
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"target_language_name":"Basque",
|
| 76 |
-
"bleu":65.8968721377,
|
| 77 |
-
"bert_score":0.9192741295,
|
| 78 |
-
"speakers":750000.0
|
| 79 |
-
},
|
| 80 |
-
{
|
| 81 |
-
"target_language_name":"Belarusian",
|
| 82 |
-
"bleu":54.5195166442,
|
| 83 |
-
"bert_score":0.9329862595,
|
| 84 |
-
"speakers":7900000.0
|
| 85 |
-
},
|
| 86 |
-
{
|
| 87 |
-
"target_language_name":"Bemba",
|
| 88 |
-
"bleu":47.8068548956,
|
| 89 |
-
"bert_score":0.889907831,
|
| 90 |
-
"speakers":3600000.0
|
| 91 |
-
},
|
| 92 |
-
{
|
| 93 |
-
"target_language_name":"Bengali",
|
| 94 |
-
"bleu":57.1417588816,
|
| 95 |
-
"bert_score":0.9483523647,
|
| 96 |
-
"speakers":300000000.0
|
| 97 |
-
},
|
| 98 |
-
{
|
| 99 |
-
"target_language_name":"Bhojpuri",
|
| 100 |
-
"bleu":44.5412337907,
|
| 101 |
-
"bert_score":0.9288184981,
|
| 102 |
-
"speakers":52200000.0
|
| 103 |
-
},
|
| 104 |
-
{
|
| 105 |
-
"target_language_name":"Bokm\u00e5l",
|
| 106 |
-
"bleu":77.4939513016,
|
| 107 |
-
"bert_score":0.9550971886,
|
| 108 |
-
"speakers":4000000.0
|
| 109 |
-
},
|
| 110 |
-
{
|
| 111 |
-
"target_language_name":"Boro (India)",
|
| 112 |
-
"bleu":36.1100474969,
|
| 113 |
-
"bert_score":0.925187854,
|
| 114 |
-
"speakers":1482929.0
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"target_language_name":"Bosnian",
|
| 118 |
-
"bleu":72.5488027131,
|
| 119 |
-
"bert_score":0.947693936,
|
| 120 |
-
"speakers":3500000.0
|
| 121 |
-
},
|
| 122 |
-
{
|
| 123 |
-
"target_language_name":"Bugis",
|
| 124 |
-
"bleu":44.8388170031,
|
| 125 |
-
"bert_score":0.8647923966,
|
| 126 |
-
"speakers":5017800.0
|
| 127 |
-
},
|
| 128 |
-
{
|
| 129 |
-
"target_language_name":"Bulgarian",
|
| 130 |
-
"bleu":72.9695925131,
|
| 131 |
-
"bert_score":0.9545443177,
|
| 132 |
-
"speakers":9000000.0
|
| 133 |
-
},
|
| 134 |
-
{
|
| 135 |
-
"target_language_name":"Burmese",
|
| 136 |
-
"bleu":55.7235911677,
|
| 137 |
-
"bert_score":0.9759751062,
|
| 138 |
-
"speakers":32900000.0
|
| 139 |
-
},
|
| 140 |
-
{
|
| 141 |
-
"target_language_name":"Catalan",
|
| 142 |
-
"bleu":74.4595007932,
|
| 143 |
-
"bert_score":0.9464139263,
|
| 144 |
-
"speakers":5100000.0
|
| 145 |
-
},
|
| 146 |
-
{
|
| 147 |
-
"target_language_name":"Cebuano",
|
| 148 |
-
"bleu":69.4557958655,
|
| 149 |
-
"bert_score":0.9321281234,
|
| 150 |
-
"speakers":15900000.0
|
| 151 |
-
},
|
| 152 |
-
{
|
| 153 |
-
"target_language_name":"Central Aymara",
|
| 154 |
-
"bleu":42.7698436669,
|
| 155 |
-
"bert_score":0.8625142018,
|
| 156 |
-
"speakers":0.0
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"target_language_name":"Central Kurdish",
|
| 160 |
-
"bleu":59.1927910692,
|
| 161 |
-
"bert_score":0.9332568824,
|
| 162 |
-
"speakers":7250000.0
|
| 163 |
-
},
|
| 164 |
-
{
|
| 165 |
-
"target_language_name":"Central Tibetan",
|
| 166 |
-
"bleu":51.349075274,
|
| 167 |
-
"bert_score":0.967157503,
|
| 168 |
-
"speakers":1200000.0
|
| 169 |
-
},
|
| 170 |
-
{
|
| 171 |
-
"target_language_name":"Chhattisgarhi",
|
| 172 |
-
"bleu":47.9797501304,
|
| 173 |
-
"bert_score":0.9363766015,
|
| 174 |
-
"speakers":16300000.0
|
| 175 |
-
},
|
| 176 |
-
{
|
| 177 |
-
"target_language_name":"Chichewa",
|
| 178 |
-
"bleu":59.7601680161,
|
| 179 |
-
"bert_score":0.9069253902,
|
| 180 |
-
"speakers":12000000.0
|
| 181 |
-
},
|
| 182 |
-
{
|
| 183 |
-
"target_language_name":"Chokwe",
|
| 184 |
-
"bleu":10.1864074161,
|
| 185 |
-
"bert_score":0.727788798,
|
| 186 |
-
"speakers":0.0
|
| 187 |
-
},
|
| 188 |
-
{
|
| 189 |
-
"target_language_name":"Chuvash",
|
| 190 |
-
"bleu":45.0546658723,
|
| 191 |
-
"bert_score":0.9203916192,
|
| 192 |
-
"speakers":1279650.0
|
| 193 |
-
},
|
| 194 |
-
{
|
| 195 |
-
"target_language_name":"Crimean Tatar",
|
| 196 |
-
"bleu":52.7050249448,
|
| 197 |
-
"bert_score":0.8972040812,
|
| 198 |
-
"speakers":552740.0
|
| 199 |
-
},
|
| 200 |
-
{
|
| 201 |
-
"target_language_name":"Croatian",
|
| 202 |
-
"bleu":69.5456983662,
|
| 203 |
-
"bert_score":0.9444877982,
|
| 204 |
-
"speakers":7000000.0
|
| 205 |
-
},
|
| 206 |
-
{
|
| 207 |
-
"target_language_name":"Czech",
|
| 208 |
-
"bleu":69.7112290599,
|
| 209 |
-
"bert_score":0.9384464244,
|
| 210 |
-
"speakers":10700000.0
|
| 211 |
-
},
|
| 212 |
-
{
|
| 213 |
-
"target_language_name":"Danish",
|
| 214 |
-
"bleu":78.0935433284,
|
| 215 |
-
"bert_score":0.9506490747,
|
| 216 |
-
"speakers":6000000.0
|
| 217 |
-
},
|
| 218 |
-
{
|
| 219 |
-
"target_language_name":"Dari",
|
| 220 |
-
"bleu":52.5539795795,
|
| 221 |
-
"bert_score":0.9466466506,
|
| 222 |
-
"speakers":9600000.0
|
| 223 |
-
},
|
| 224 |
-
{
|
| 225 |
-
"target_language_name":"Dholuo",
|
| 226 |
-
"bleu":46.4119479071,
|
| 227 |
-
"bert_score":0.8803233822,
|
| 228 |
-
"speakers":3000000.0
|
| 229 |
-
},
|
| 230 |
-
{
|
| 231 |
-
"target_language_name":"Dogri",
|
| 232 |
-
"bleu":44.9153535278,
|
| 233 |
-
"bert_score":0.934070154,
|
| 234 |
-
"speakers":2000000.0
|
| 235 |
-
},
|
| 236 |
-
{
|
| 237 |
-
"target_language_name":"Dutch",
|
| 238 |
-
"bleu":71.1849326315,
|
| 239 |
-
"bert_score":0.9376831949,
|
| 240 |
-
"speakers":23100000.0
|
| 241 |
-
},
|
| 242 |
-
{
|
| 243 |
-
"target_language_name":"Dzongkha",
|
| 244 |
-
"bleu":44.3573814017,
|
| 245 |
-
"bert_score":0.9664796074,
|
| 246 |
-
"speakers":237080.0
|
| 247 |
-
},
|
| 248 |
-
{
|
| 249 |
-
"target_language_name":"Eastern Punjabi",
|
| 250 |
-
"bleu":60.468441109,
|
| 251 |
-
"bert_score":0.988244007,
|
| 252 |
-
"speakers":125000000.0
|
| 253 |
-
},
|
| 254 |
-
{
|
| 255 |
-
"target_language_name":"Eastern Yiddish",
|
| 256 |
-
"bleu":47.5562009325,
|
| 257 |
-
"bert_score":0.9590989411,
|
| 258 |
-
"speakers":0.0
|
| 259 |
-
},
|
| 260 |
-
{
|
| 261 |
-
"target_language_name":"Egyptian Arabic",
|
| 262 |
-
"bleu":53.6818081038,
|
| 263 |
-
"bert_score":0.9394114673,
|
| 264 |
-
"speakers":100542400.0
|
| 265 |
-
},
|
| 266 |
-
{
|
| 267 |
-
"target_language_name":"English",
|
| 268 |
-
"bleu":75.3501173486,
|
| 269 |
-
"bert_score":0.8807334363,
|
| 270 |
-
"speakers":1132366680.0
|
| 271 |
-
},
|
| 272 |
-
{
|
| 273 |
-
"target_language_name":"Esperanto",
|
| 274 |
-
"bleu":69.6056577554,
|
| 275 |
-
"bert_score":0.9302131255,
|
| 276 |
-
"speakers":2000000.0
|
| 277 |
-
},
|
| 278 |
-
{
|
| 279 |
-
"target_language_name":"Faroese",
|
| 280 |
-
"bleu":65.9147902483,
|
| 281 |
-
"bert_score":0.9332413753,
|
| 282 |
-
"speakers":69150.0
|
| 283 |
-
},
|
| 284 |
-
{
|
| 285 |
-
"target_language_name":"Fijian",
|
| 286 |
-
"bleu":58.2892667246,
|
| 287 |
-
"bert_score":0.9183188617,
|
| 288 |
-
"speakers":341270.0
|
| 289 |
-
},
|
| 290 |
-
{
|
| 291 |
-
"target_language_name":"Filipino",
|
| 292 |
-
"bleu":70.1928498378,
|
| 293 |
-
"bert_score":0.9269425154,
|
| 294 |
-
"speakers":90000000.0
|
| 295 |
-
},
|
| 296 |
-
{
|
| 297 |
-
"target_language_name":"Finnish",
|
| 298 |
-
"bleu":70.9425029518,
|
| 299 |
-
"bert_score":0.9320579688,
|
| 300 |
-
"speakers":5413380.0
|
| 301 |
-
},
|
| 302 |
-
{
|
| 303 |
-
"target_language_name":"Fon",
|
| 304 |
-
"bleu":25.2797773666,
|
| 305 |
-
"bert_score":0.8664443592,
|
| 306 |
-
"speakers":1935500.0
|
| 307 |
-
},
|
| 308 |
-
{
|
| 309 |
-
"target_language_name":"French",
|
| 310 |
-
"bleu":79.3023871219,
|
| 311 |
-
"bert_score":0.9554367423,
|
| 312 |
-
"speakers":208157220.0
|
| 313 |
-
},
|
| 314 |
-
{
|
| 315 |
-
"target_language_name":"Friulian",
|
| 316 |
-
"bleu":66.5488092372,
|
| 317 |
-
"bert_score":0.9255799611,
|
| 318 |
-
"speakers":300000.0
|
| 319 |
-
},
|
| 320 |
-
{
|
| 321 |
-
"target_language_name":"Galician",
|
| 322 |
-
"bleu":68.7024786904,
|
| 323 |
-
"bert_score":0.9283550183,
|
| 324 |
-
"speakers":2500000.0
|
| 325 |
-
},
|
| 326 |
-
{
|
| 327 |
-
"target_language_name":"Ganda",
|
| 328 |
-
"bleu":45.8693322936,
|
| 329 |
-
"bert_score":0.88344028,
|
| 330 |
-
"speakers":4100000.0
|
| 331 |
-
},
|
| 332 |
-
{
|
| 333 |
-
"target_language_name":"Georgian",
|
| 334 |
-
"bleu":61.0166361442,
|
| 335 |
-
"bert_score":0.9546662311,
|
| 336 |
-
"speakers":3700000.0
|
| 337 |
-
},
|
| 338 |
-
{
|
| 339 |
-
"target_language_name":"Gikuyu",
|
| 340 |
-
"bleu":40.9288275291,
|
| 341 |
-
"bert_score":0.8850945433,
|
| 342 |
-
"speakers":6623000.0
|
| 343 |
-
},
|
| 344 |
-
{
|
| 345 |
-
"target_language_name":"Goan Konkani",
|
| 346 |
-
"bleu":47.1084017945,
|
| 347 |
-
"bert_score":0.9314287245,
|
| 348 |
-
"speakers":3633900.0
|
| 349 |
-
},
|
| 350 |
-
{
|
| 351 |
-
"target_language_name":"Greek",
|
| 352 |
-
"bleu":66.2347782153,
|
| 353 |
-
"bert_score":0.9577525119,
|
| 354 |
-
"speakers":15000000.0
|
| 355 |
-
},
|
| 356 |
-
{
|
| 357 |
-
"target_language_name":"Gujarati",
|
| 358 |
-
"bleu":55.5884513452,
|
| 359 |
-
"bert_score":0.9753397226,
|
| 360 |
-
"speakers":56400000.0
|
| 361 |
-
},
|
| 362 |
-
{
|
| 363 |
-
"target_language_name":"Haitian Creole",
|
| 364 |
-
"bleu":63.8532187591,
|
| 365 |
-
"bert_score":0.93236112,
|
| 366 |
-
"speakers":9600000.0
|
| 367 |
-
},
|
| 368 |
-
{
|
| 369 |
-
"target_language_name":"Halh Mongolian",
|
| 370 |
-
"bleu":58.5037789971,
|
| 371 |
-
"bert_score":0.9380823056,
|
| 372 |
-
"speakers":2704030.0
|
| 373 |
-
},
|
| 374 |
-
{
|
| 375 |
-
"target_language_name":"Hausa",
|
| 376 |
-
"bleu":56.3431957901,
|
| 377 |
-
"bert_score":0.9012877802,
|
| 378 |
-
"speakers":43900000.0
|
| 379 |
-
},
|
| 380 |
-
{
|
| 381 |
-
"target_language_name":"Hebrew",
|
| 382 |
-
"bleu":72.0702990513,
|
| 383 |
-
"bert_score":0.964064618,
|
| 384 |
-
"speakers":9303950.0
|
| 385 |
-
},
|
| 386 |
-
{
|
| 387 |
-
"target_language_name":"Hindi",
|
| 388 |
-
"bleu":64.9362166898,
|
| 389 |
-
"bert_score":0.9463364283,
|
| 390 |
-
"speakers":341000000.0
|
| 391 |
-
},
|
| 392 |
-
{
|
| 393 |
-
"target_language_name":"Hungarian",
|
| 394 |
-
"bleu":66.1301119408,
|
| 395 |
-
"bert_score":0.9249218643,
|
| 396 |
-
"speakers":12600000.0
|
| 397 |
-
},
|
| 398 |
-
{
|
| 399 |
-
"target_language_name":"Icelandic",
|
| 400 |
-
"bleu":54.4330055353,
|
| 401 |
-
"bert_score":0.9120460276,
|
| 402 |
-
"speakers":358000.0
|
| 403 |
-
},
|
| 404 |
-
{
|
| 405 |
-
"target_language_name":"Igbo",
|
| 406 |
-
"bleu":46.4017344934,
|
| 407 |
-
"bert_score":0.9137314638,
|
| 408 |
-
"speakers":27000000.0
|
| 409 |
-
},
|
| 410 |
-
{
|
| 411 |
-
"target_language_name":"Ilocano",
|
| 412 |
-
"bleu":62.6058864594,
|
| 413 |
-
"bert_score":0.9115280092,
|
| 414 |
-
"speakers":9100000.0
|
| 415 |
-
},
|
| 416 |
-
{
|
| 417 |
-
"target_language_name":"Indonesian",
|
| 418 |
-
"bleu":72.9087066262,
|
| 419 |
-
"bert_score":0.9301403503,
|
| 420 |
-
"speakers":198996550.0
|
| 421 |
-
},
|
| 422 |
-
{
|
| 423 |
-
"target_language_name":"Iranian Persian",
|
| 424 |
-
"bleu":57.6444169698,
|
| 425 |
-
"bert_score":0.9476486345,
|
| 426 |
-
"speakers":52800000.0
|
| 427 |
-
},
|
| 428 |
-
{
|
| 429 |
-
"target_language_name":"Irish",
|
| 430 |
-
"bleu":69.9725194524,
|
| 431 |
-
"bert_score":0.9440232972,
|
| 432 |
-
"speakers":1030000.0
|
| 433 |
-
},
|
| 434 |
-
{
|
| 435 |
-
"target_language_name":"Italian",
|
| 436 |
-
"bleu":69.1588343572,
|
| 437 |
-
"bert_score":0.9358606537,
|
| 438 |
-
"speakers":64819790.0
|
| 439 |
-
},
|
| 440 |
-
{
|
| 441 |
-
"target_language_name":"Japanese",
|
| 442 |
-
"bleu":49.9166135693,
|
| 443 |
-
"bert_score":0.9425287286,
|
| 444 |
-
"speakers":128000000.0
|
| 445 |
-
},
|
| 446 |
-
{
|
| 447 |
-
"target_language_name":"Javanese",
|
| 448 |
-
"bleu":60.440335299,
|
| 449 |
-
"bert_score":0.9125308077,
|
| 450 |
-
"speakers":84308740.0
|
| 451 |
-
},
|
| 452 |
-
{
|
| 453 |
-
"target_language_name":"Jingpho",
|
| 454 |
-
"bleu":43.5500581403,
|
| 455 |
-
"bert_score":0.8727998992,
|
| 456 |
-
"speakers":940000.0
|
| 457 |
-
},
|
| 458 |
-
{
|
| 459 |
-
"target_language_name":"Jula",
|
| 460 |
-
"bleu":29.5415180297,
|
| 461 |
-
"bert_score":0.822332112,
|
| 462 |
-
"speakers":2700000.0
|
| 463 |
-
},
|
| 464 |
-
{
|
| 465 |
-
"target_language_name":"Kabiy\u00e8",
|
| 466 |
-
"bleu":22.5498504655,
|
| 467 |
-
"bert_score":0.8587520639,
|
| 468 |
-
"speakers":1000000.0
|
| 469 |
-
},
|
| 470 |
-
{
|
| 471 |
-
"target_language_name":"Kabuverdianu",
|
| 472 |
-
"bleu":65.1106010391,
|
| 473 |
-
"bert_score":0.9213403026,
|
| 474 |
-
"speakers":871000.0
|
| 475 |
-
},
|
| 476 |
-
{
|
| 477 |
-
"target_language_name":"Kabyle",
|
| 478 |
-
"bleu":41.1442992587,
|
| 479 |
-
"bert_score":0.8803219795,
|
| 480 |
-
"speakers":5586000.0
|
| 481 |
-
},
|
| 482 |
-
{
|
| 483 |
-
"target_language_name":"Kamba",
|
| 484 |
-
"bleu":41.733489671,
|
| 485 |
-
"bert_score":0.8780206362,
|
| 486 |
-
"speakers":3893000.0
|
| 487 |
-
},
|
| 488 |
-
{
|
| 489 |
-
"target_language_name":"Kannada",
|
| 490 |
-
"bleu":60.0142028332,
|
| 491 |
-
"bert_score":0.9730932295,
|
| 492 |
-
"speakers":43600000.0
|
| 493 |
-
},
|
| 494 |
-
{
|
| 495 |
-
"target_language_name":"Kashmiri",
|
| 496 |
-
"bleu":22.3019416547,
|
| 497 |
-
"bert_score":0.8984790143,
|
| 498 |
-
"speakers":6900000.0
|
| 499 |
-
},
|
| 500 |
-
{
|
| 501 |
-
"target_language_name":"Kazakh",
|
| 502 |
-
"bleu":61.1251621375,
|
| 503 |
-
"bert_score":0.9379647116,
|
| 504 |
-
"speakers":13161980.0
|
| 505 |
-
},
|
| 506 |
-
{
|
| 507 |
-
"target_language_name":"Khmer",
|
| 508 |
-
"bleu":49.2098257043,
|
| 509 |
-
"bert_score":0.8907732884,
|
| 510 |
-
"speakers":16600000.0
|
| 511 |
-
},
|
| 512 |
-
{
|
| 513 |
-
"target_language_name":"Kimbundu",
|
| 514 |
-
"bleu":5.8523457224,
|
| 515 |
-
"bert_score":0.6849321783,
|
| 516 |
-
"speakers":0.0
|
| 517 |
-
},
|
| 518 |
-
{
|
| 519 |
-
"target_language_name":"Kinyarwanda",
|
| 520 |
-
"bleu":57.2410626756,
|
| 521 |
-
"bert_score":0.906923449,
|
| 522 |
-
"speakers":12100000.0
|
| 523 |
-
},
|
| 524 |
-
{
|
| 525 |
-
"target_language_name":"Kituba (Democratic Republic of the Congo)",
|
| 526 |
-
"bleu":52.8484601602,
|
| 527 |
-
"bert_score":0.9017938395,
|
| 528 |
-
"speakers":0.0
|
| 529 |
-
},
|
| 530 |
-
{
|
| 531 |
-
"target_language_name":"Korean",
|
| 532 |
-
"bleu":43.6872285974,
|
| 533 |
-
"bert_score":0.9579092761,
|
| 534 |
-
"speakers":77300000.0
|
| 535 |
-
},
|
| 536 |
-
{
|
| 537 |
-
"target_language_name":"Kyrgyz",
|
| 538 |
-
"bleu":57.0824422453,
|
| 539 |
-
"bert_score":0.9317750076,
|
| 540 |
-
"speakers":4568480.0
|
| 541 |
-
},
|
| 542 |
-
{
|
| 543 |
-
"target_language_name":"Lao",
|
| 544 |
-
"bleu":60.0210909677,
|
| 545 |
-
"bert_score":0.904438438,
|
| 546 |
-
"speakers":5225552.0
|
| 547 |
-
},
|
| 548 |
-
{
|
| 549 |
-
"target_language_name":"Latgalian",
|
| 550 |
-
"bleu":56.4843556524,
|
| 551 |
-
"bert_score":0.9078494012,
|
| 552 |
-
"speakers":200000.0
|
| 553 |
-
},
|
| 554 |
-
{
|
| 555 |
-
"target_language_name":"Levantine Arabic",
|
| 556 |
-
"bleu":56.0898634013,
|
| 557 |
-
"bert_score":0.9437467565,
|
| 558 |
-
"speakers":44000000.0
|
| 559 |
-
},
|
| 560 |
-
{
|
| 561 |
-
"target_language_name":"Ligurian",
|
| 562 |
-
"bleu":55.8530636302,
|
| 563 |
-
"bert_score":0.9047620773,
|
| 564 |
-
"speakers":500000.0
|
| 565 |
-
},
|
| 566 |
-
{
|
| 567 |
-
"target_language_name":"Limburgish",
|
| 568 |
-
"bleu":59.4485504982,
|
| 569 |
-
"bert_score":0.8987095455,
|
| 570 |
-
"speakers":1600000.0
|
| 571 |
-
},
|
| 572 |
-
{
|
| 573 |
-
"target_language_name":"Lingala",
|
| 574 |
-
"bleu":30.4322896531,
|
| 575 |
-
"bert_score":0.8553236572,
|
| 576 |
-
"speakers":20000000.0
|
| 577 |
-
},
|
| 578 |
-
{
|
| 579 |
-
"target_language_name":"Lithuanian",
|
| 580 |
-
"bleu":67.1625695571,
|
| 581 |
-
"bert_score":0.9154702902,
|
| 582 |
-
"speakers":4000000.0
|
| 583 |
-
},
|
| 584 |
-
{
|
| 585 |
-
"target_language_name":"Lombard",
|
| 586 |
-
"bleu":46.3884402674,
|
| 587 |
-
"bert_score":0.8643471499,
|
| 588 |
-
"speakers":3900000.0
|
| 589 |
-
},
|
| 590 |
-
{
|
| 591 |
-
"target_language_name":"Luba-Kasai",
|
| 592 |
-
"bleu":45.0655291655,
|
| 593 |
-
"bert_score":0.8749240279,
|
| 594 |
-
"speakers":6300000.0
|
| 595 |
-
},
|
| 596 |
-
{
|
| 597 |
-
"target_language_name":"Luxembourgish",
|
| 598 |
-
"bleu":70.8338190438,
|
| 599 |
-
"bert_score":0.9297492107,
|
| 600 |
-
"speakers":391200.0
|
| 601 |
-
},
|
| 602 |
-
{
|
| 603 |
-
"target_language_name":"Macedonian",
|
| 604 |
-
"bleu":72.2733471437,
|
| 605 |
-
"bert_score":0.9558346649,
|
| 606 |
-
"speakers":2000000.0
|
| 607 |
-
},
|
| 608 |
-
{
|
| 609 |
-
"target_language_name":"Magahi",
|
| 610 |
-
"bleu":58.5474221546,
|
| 611 |
-
"bert_score":0.9458349566,
|
| 612 |
-
"speakers":20700000.0
|
| 613 |
-
},
|
| 614 |
-
{
|
| 615 |
-
"target_language_name":"Maithili",
|
| 616 |
-
"bleu":54.6530071391,
|
| 617 |
-
"bert_score":0.9433513383,
|
| 618 |
-
"speakers":33900000.0
|
| 619 |
-
},
|
| 620 |
-
{
|
| 621 |
-
"target_language_name":"Malayalam",
|
| 622 |
-
"bleu":64.0655894091,
|
| 623 |
-
"bert_score":0.9803075671,
|
| 624 |
-
"speakers":37100000.0
|
| 625 |
-
},
|
| 626 |
-
{
|
| 627 |
-
"target_language_name":"Maltese",
|
| 628 |
-
"bleu":80.0866777263,
|
| 629 |
-
"bert_score":0.9520254652,
|
| 630 |
-
"speakers":570000.0
|
| 631 |
-
},
|
| 632 |
-
{
|
| 633 |
-
"target_language_name":"Mandarin Chinese",
|
| 634 |
-
"bleu":42.5300166785,
|
| 635 |
-
"bert_score":0.9634857118,
|
| 636 |
-
"speakers":1074000000.0
|
| 637 |
-
},
|
| 638 |
-
{
|
| 639 |
-
"target_language_name":"Maori",
|
| 640 |
-
"bleu":54.8319935643,
|
| 641 |
-
"bert_score":0.9185245017,
|
| 642 |
-
"speakers":160000.0
|
| 643 |
-
},
|
| 644 |
-
{
|
| 645 |
-
"target_language_name":"Marathi",
|
| 646 |
-
"bleu":57.4434090711,
|
| 647 |
-
"bert_score":0.9421781262,
|
| 648 |
-
"speakers":83100000.0
|
| 649 |
-
},
|
| 650 |
-
{
|
| 651 |
-
"target_language_name":"Meadow Mari",
|
| 652 |
-
"bleu":49.7911680582,
|
| 653 |
-
"bert_score":0.9295116961,
|
| 654 |
-
"speakers":482000.0
|
| 655 |
-
},
|
| 656 |
-
{
|
| 657 |
-
"target_language_name":"Meitei",
|
| 658 |
-
"bleu":41.2619945571,
|
| 659 |
-
"bert_score":0.9528288851,
|
| 660 |
-
"speakers":1470000.0
|
| 661 |
-
},
|
| 662 |
-
{
|
| 663 |
-
"target_language_name":"Merina Malagasy",
|
| 664 |
-
"bleu":61.0968434546,
|
| 665 |
-
"bert_score":0.9032936792,
|
| 666 |
-
"speakers":0.0
|
| 667 |
-
},
|
| 668 |
-
{
|
| 669 |
-
"target_language_name":"Mesopotamian Arabic",
|
| 670 |
-
"bleu":49.5184865297,
|
| 671 |
-
"bert_score":0.9382626355,
|
| 672 |
-
"speakers":15700000.0
|
| 673 |
-
},
|
| 674 |
-
{
|
| 675 |
-
"target_language_name":"Minangkabau",
|
| 676 |
-
"bleu":50.7407956197,
|
| 677 |
-
"bert_score":0.9252789746,
|
| 678 |
-
"speakers":5530000.0
|
| 679 |
-
},
|
| 680 |
-
{
|
| 681 |
-
"target_language_name":"Mizo",
|
| 682 |
-
"bleu":51.6558017488,
|
| 683 |
-
"bert_score":0.8875152906,
|
| 684 |
-
"speakers":500000.0
|
| 685 |
-
},
|
| 686 |
-
{
|
| 687 |
-
"target_language_name":"Moore",
|
| 688 |
-
"bleu":32.8458097983,
|
| 689 |
-
"bert_score":0.8583020627,
|
| 690 |
-
"speakers":7600000.0
|
| 691 |
-
},
|
| 692 |
-
{
|
| 693 |
-
"target_language_name":"Moroccan Arabic",
|
| 694 |
-
"bleu":49.3082976781,
|
| 695 |
-
"bert_score":0.9317501009,
|
| 696 |
-
"speakers":27500000.0
|
| 697 |
-
},
|
| 698 |
-
{
|
| 699 |
-
"target_language_name":"Najdi Arabic",
|
| 700 |
-
"bleu":46.4102430377,
|
| 701 |
-
"bert_score":0.9332984229,
|
| 702 |
-
"speakers":0.0
|
| 703 |
-
},
|
| 704 |
-
{
|
| 705 |
-
"target_language_name":"Nepali",
|
| 706 |
-
"bleu":55.2919347352,
|
| 707 |
-
"bert_score":0.9358912428,
|
| 708 |
-
"speakers":0.0
|
| 709 |
-
},
|
| 710 |
-
{
|
| 711 |
-
"target_language_name":"Nigerian Fulfulde",
|
| 712 |
-
"bleu":28.1761055913,
|
| 713 |
-
"bert_score":0.8343587597,
|
| 714 |
-
"speakers":14500000.0
|
| 715 |
-
},
|
| 716 |
-
{
|
| 717 |
-
"target_language_name":"North Azerbaijani",
|
| 718 |
-
"bleu":55.5265107063,
|
| 719 |
-
"bert_score":0.9145456314,
|
| 720 |
-
"speakers":9220610.0
|
| 721 |
-
},
|
| 722 |
-
{
|
| 723 |
-
"target_language_name":"Northern Kurdish",
|
| 724 |
-
"bleu":55.7965878227,
|
| 725 |
-
"bert_score":0.9104436457,
|
| 726 |
-
"speakers":14600000.0
|
| 727 |
-
},
|
| 728 |
-
{
|
| 729 |
-
"target_language_name":"Northern Sotho",
|
| 730 |
-
"bleu":62.8769401692,
|
| 731 |
-
"bert_score":0.9261207898,
|
| 732 |
-
"speakers":4100000.0
|
| 733 |
-
},
|
| 734 |
-
{
|
| 735 |
-
"target_language_name":"Northern Uzbek",
|
| 736 |
-
"bleu":63.205573851,
|
| 737 |
-
"bert_score":0.9120756924,
|
| 738 |
-
"speakers":26912410.0
|
| 739 |
-
},
|
| 740 |
-
{
|
| 741 |
-
"target_language_name":"Nuer",
|
| 742 |
-
"bleu":16.5796987951,
|
| 743 |
-
"bert_score":0.8528214693,
|
| 744 |
-
"speakers":900000.0
|
| 745 |
-
},
|
| 746 |
-
{
|
| 747 |
-
"target_language_name":"N\u2019Ko",
|
| 748 |
-
"bleu":32.483490799,
|
| 749 |
-
"bert_score":0.9823745767,
|
| 750 |
-
"speakers":0.0
|
| 751 |
-
},
|
| 752 |
-
{
|
| 753 |
-
"target_language_name":"Occitan",
|
| 754 |
-
"bleu":71.532740184,
|
| 755 |
-
"bert_score":0.9337525626,
|
| 756 |
-
"speakers":542000.0
|
| 757 |
-
},
|
| 758 |
-
{
|
| 759 |
-
"target_language_name":"Odia",
|
| 760 |
-
"bleu":57.3628096518,
|
| 761 |
-
"bert_score":0.9768644154,
|
| 762 |
-
"speakers":34500000.0
|
| 763 |
-
},
|
| 764 |
-
{
|
| 765 |
-
"target_language_name":"Pangasinan",
|
| 766 |
-
"bleu":56.0048183827,
|
| 767 |
-
"bert_score":0.8906280657,
|
| 768 |
-
"speakers":1100000.0
|
| 769 |
-
},
|
| 770 |
-
{
|
| 771 |
-
"target_language_name":"Papiamentu",
|
| 772 |
-
"bleu":69.7955328133,
|
| 773 |
-
"bert_score":0.9325902323,
|
| 774 |
-
"speakers":321300.0
|
| 775 |
-
},
|
| 776 |
-
{
|
| 777 |
-
"target_language_name":"Paraguayan Guaran\u00ed",
|
| 778 |
-
"bleu":41.7929863707,
|
| 779 |
-
"bert_score":0.8764786462,
|
| 780 |
-
"speakers":0.0
|
| 781 |
-
},
|
| 782 |
-
{
|
| 783 |
-
"target_language_name":"Polish",
|
| 784 |
-
"bleu":61.8768399674,
|
| 785 |
-
"bert_score":0.9179250948,
|
| 786 |
-
"speakers":40200000.0
|
| 787 |
-
},
|
| 788 |
-
{
|
| 789 |
-
"target_language_name":"Portuguese",
|
| 790 |
-
"bleu":77.4978074222,
|
| 791 |
-
"bert_score":0.9494876027,
|
| 792 |
-
"speakers":254300000.0
|
| 793 |
-
},
|
| 794 |
-
{
|
| 795 |
-
"target_language_name":"Romanian",
|
| 796 |
-
"bleu":76.4907159035,
|
| 797 |
-
"bert_score":0.9455295324,
|
| 798 |
-
"speakers":24300000.0
|
| 799 |
-
},
|
| 800 |
-
{
|
| 801 |
-
"target_language_name":"Rundi",
|
| 802 |
-
"bleu":48.943513629,
|
| 803 |
-
"bert_score":0.8933652222,
|
| 804 |
-
"speakers":10800000.0
|
| 805 |
-
},
|
| 806 |
-
{
|
| 807 |
-
"target_language_name":"Russian",
|
| 808 |
-
"bleu":71.1489441039,
|
| 809 |
-
"bert_score":0.9518508852,
|
| 810 |
-
"speakers":171428900.0
|
| 811 |
-
},
|
| 812 |
-
{
|
| 813 |
-
"target_language_name":"Samoan",
|
| 814 |
-
"bleu":56.7138831423,
|
| 815 |
-
"bert_score":0.9166683555,
|
| 816 |
-
"speakers":415720.0
|
| 817 |
-
},
|
| 818 |
-
{
|
| 819 |
-
"target_language_name":"Sango",
|
| 820 |
-
"bleu":34.8754222657,
|
| 821 |
-
"bert_score":0.8720244229,
|
| 822 |
-
"speakers":4600000.0
|
| 823 |
-
},
|
| 824 |
-
{
|
| 825 |
-
"target_language_name":"Sanskrit",
|
| 826 |
-
"bleu":32.7813249911,
|
| 827 |
-
"bert_score":0.8987655501,
|
| 828 |
-
"speakers":49736.0
|
| 829 |
-
},
|
| 830 |
-
{
|
| 831 |
-
"target_language_name":"Santhali",
|
| 832 |
-
"bleu":31.5119247269,
|
| 833 |
-
"bert_score":0.944095705,
|
| 834 |
-
"speakers":7200000.0
|
| 835 |
-
},
|
| 836 |
-
{
|
| 837 |
-
"target_language_name":"Sardinian",
|
| 838 |
-
"bleu":62.6903914771,
|
| 839 |
-
"bert_score":0.9118991812,
|
| 840 |
-
"speakers":1300000.0
|
| 841 |
-
},
|
| 842 |
-
{
|
| 843 |
-
"target_language_name":"Scottish Gaelic",
|
| 844 |
-
"bleu":62.6044371338,
|
| 845 |
-
"bert_score":0.9264988482,
|
| 846 |
-
"speakers":60130.0
|
| 847 |
-
},
|
| 848 |
-
{
|
| 849 |
-
"target_language_name":"Serbian",
|
| 850 |
-
"bleu":69.9691396176,
|
| 851 |
-
"bert_score":0.9582955678,
|
| 852 |
-
"speakers":9000000.0
|
| 853 |
-
},
|
| 854 |
-
{
|
| 855 |
-
"target_language_name":"Setswana",
|
| 856 |
-
"bleu":55.2288890228,
|
| 857 |
-
"bert_score":0.9117900888,
|
| 858 |
-
"speakers":4500000.0
|
| 859 |
-
},
|
| 860 |
-
{
|
| 861 |
-
"target_language_name":"Shan",
|
| 862 |
-
"bleu":29.2129948577,
|
| 863 |
-
"bert_score":0.9378574808,
|
| 864 |
-
"speakers":3000000.0
|
| 865 |
-
},
|
| 866 |
-
{
|
| 867 |
-
"target_language_name":"Shona",
|
| 868 |
-
"bleu":51.5592191405,
|
| 869 |
-
"bert_score":0.8798740129,
|
| 870 |
-
"speakers":9023000.0
|
| 871 |
-
},
|
| 872 |
-
{
|
| 873 |
-
"target_language_name":"Sicilian",
|
| 874 |
-
"bleu":58.5895359443,
|
| 875 |
-
"bert_score":0.90428345,
|
| 876 |
-
"speakers":4700000.0
|
| 877 |
-
},
|
| 878 |
-
{
|
| 879 |
-
"target_language_name":"Silesian",
|
| 880 |
-
"bleu":56.7836392069,
|
| 881 |
-
"bert_score":0.9106028736,
|
| 882 |
-
"speakers":522000.0
|
| 883 |
-
},
|
| 884 |
-
{
|
| 885 |
-
"target_language_name":"Sindhi",
|
| 886 |
-
"bleu":48.1876056648,
|
| 887 |
-
"bert_score":0.936702015,
|
| 888 |
-
"speakers":25000000.0
|
| 889 |
-
},
|
| 890 |
-
{
|
| 891 |
-
"target_language_name":"Sinhala",
|
| 892 |
-
"bleu":56.7567311796,
|
| 893 |
-
"bert_score":0.9713358581,
|
| 894 |
-
"speakers":15300000.0
|
| 895 |
-
},
|
| 896 |
-
{
|
| 897 |
-
"target_language_name":"Slovak",
|
| 898 |
-
"bleu":67.9284804086,
|
| 899 |
-
"bert_score":0.9360236605,
|
| 900 |
-
"speakers":6000000.0
|
| 901 |
-
},
|
| 902 |
-
{
|
| 903 |
-
"target_language_name":"Slovene",
|
| 904 |
-
"bleu":72.5691270757,
|
| 905 |
-
"bert_score":0.9432346245,
|
| 906 |
-
"speakers":2400000.0
|
| 907 |
-
},
|
| 908 |
-
{
|
| 909 |
-
"target_language_name":"Somali",
|
| 910 |
-
"bleu":55.3706496473,
|
| 911 |
-
"bert_score":0.908571593,
|
| 912 |
-
"speakers":16200000.0
|
| 913 |
-
},
|
| 914 |
-
{
|
| 915 |
-
"target_language_name":"South Azerbaijani",
|
| 916 |
-
"bleu":44.3712804302,
|
| 917 |
-
"bert_score":0.9420697371,
|
| 918 |
-
"speakers":15000000.0
|
| 919 |
-
},
|
| 920 |
-
{
|
| 921 |
-
"target_language_name":"Southern Pashto",
|
| 922 |
-
"bleu":38.3124819374,
|
| 923 |
-
"bert_score":0.921268179,
|
| 924 |
-
"speakers":10900000.0
|
| 925 |
-
},
|
| 926 |
-
{
|
| 927 |
-
"target_language_name":"Southern Sotho",
|
| 928 |
-
"bleu":56.735299554,
|
| 929 |
-
"bert_score":0.9102749407,
|
| 930 |
-
"speakers":6000000.0
|
| 931 |
-
},
|
| 932 |
-
{
|
| 933 |
-
"target_language_name":"Southwestern Dinka",
|
| 934 |
-
"bleu":17.5913281403,
|
| 935 |
-
"bert_score":0.8016291638,
|
| 936 |
-
"speakers":0.0
|
| 937 |
-
},
|
| 938 |
-
{
|
| 939 |
-
"target_language_name":"Spanish",
|
| 940 |
-
"bleu":63.8467073379,
|
| 941 |
-
"bert_score":0.9224406302,
|
| 942 |
-
"speakers":485000000.0
|
| 943 |
-
},
|
| 944 |
-
{
|
| 945 |
-
"target_language_name":"Standard Arabic",
|
| 946 |
-
"bleu":56.8831262708,
|
| 947 |
-
"bert_score":0.9168330083,
|
| 948 |
-
"speakers":0.0
|
| 949 |
-
},
|
| 950 |
-
{
|
| 951 |
-
"target_language_name":"Standard Estonian",
|
| 952 |
-
"bleu":67.4156919517,
|
| 953 |
-
"bert_score":0.9277306815,
|
| 954 |
-
"speakers":1164770.0
|
| 955 |
-
},
|
| 956 |
-
{
|
| 957 |
-
"target_language_name":"Standard German",
|
| 958 |
-
"bleu":77.1966515107,
|
| 959 |
-
"bert_score":0.9468763133,
|
| 960 |
-
"speakers":105000000.0
|
| 961 |
-
},
|
| 962 |
-
{
|
| 963 |
-
"target_language_name":"Standard Latvian",
|
| 964 |
-
"bleu":65.0833210037,
|
| 965 |
-
"bert_score":0.9217625757,
|
| 966 |
-
"speakers":0.0
|
| 967 |
-
},
|
| 968 |
-
{
|
| 969 |
-
"target_language_name":"Standard Malay",
|
| 970 |
-
"bleu":74.2657232798,
|
| 971 |
-
"bert_score":0.9445500493,
|
| 972 |
-
"speakers":0.0
|
| 973 |
-
},
|
| 974 |
-
{
|
| 975 |
-
"target_language_name":"Standard Moroccan Tamazight",
|
| 976 |
-
"bleu":35.6247648109,
|
| 977 |
-
"bert_score":0.9847298423,
|
| 978 |
-
"speakers":0.0
|
| 979 |
-
},
|
| 980 |
-
{
|
| 981 |
-
"target_language_name":"Sunda",
|
| 982 |
-
"bleu":56.4065999104,
|
| 983 |
-
"bert_score":0.9077177823,
|
| 984 |
-
"speakers":32400000.0
|
| 985 |
-
},
|
| 986 |
-
{
|
| 987 |
-
"target_language_name":"Swahili",
|
| 988 |
-
"bleu":73.5199042142,
|
| 989 |
-
"bert_score":0.9450787365,
|
| 990 |
-
"speakers":82300000.0
|
| 991 |
-
},
|
| 992 |
-
{
|
| 993 |
-
"target_language_name":"Swati",
|
| 994 |
-
"bleu":52.7746096439,
|
| 995 |
-
"bert_score":0.8899940272,
|
| 996 |
-
"speakers":2034200.0
|
| 997 |
-
},
|
| 998 |
-
{
|
| 999 |
-
"target_language_name":"Swedish",
|
| 1000 |
-
"bleu":77.421610247,
|
| 1001 |
-
"bert_score":0.9571870168,
|
| 1002 |
-
"speakers":9244250.0
|
| 1003 |
-
},
|
| 1004 |
-
{
|
| 1005 |
-
"target_language_name":"Tajik",
|
| 1006 |
-
"bleu":60.9783684158,
|
| 1007 |
-
"bert_score":0.9378365338,
|
| 1008 |
-
"speakers":14000000.0
|
| 1009 |
-
},
|
| 1010 |
-
{
|
| 1011 |
-
"target_language_name":"Tamasheq",
|
| 1012 |
-
"bleu":18.4319889721,
|
| 1013 |
-
"bert_score":0.8427422295,
|
| 1014 |
-
"speakers":500000.0
|
| 1015 |
-
},
|
| 1016 |
-
{
|
| 1017 |
-
"target_language_name":"Tamil",
|
| 1018 |
-
"bleu":65.7863221054,
|
| 1019 |
-
"bert_score":0.9536473691,
|
| 1020 |
-
"speakers":75000000.0
|
| 1021 |
-
},
|
| 1022 |
-
{
|
| 1023 |
-
"target_language_name":"Tatar",
|
| 1024 |
-
"bleu":60.3447467213,
|
| 1025 |
-
"bert_score":0.9364115715,
|
| 1026 |
-
"speakers":5427318.0
|
| 1027 |
-
},
|
| 1028 |
-
{
|
| 1029 |
-
"target_language_name":"Ta\u2019izzi-Adeni Arabic",
|
| 1030 |
-
"bleu":49.4139335281,
|
| 1031 |
-
"bert_score":0.9354432185,
|
| 1032 |
-
"speakers":10500000.0
|
| 1033 |
-
},
|
| 1034 |
-
{
|
| 1035 |
-
"target_language_name":"Telugu",
|
| 1036 |
-
"bleu":61.6352457629,
|
| 1037 |
-
"bert_score":0.9790697515,
|
| 1038 |
-
"speakers":82000000.0
|
| 1039 |
-
},
|
| 1040 |
-
{
|
| 1041 |
-
"target_language_name":"Thai",
|
| 1042 |
-
"bleu":62.8125360944,
|
| 1043 |
-
"bert_score":0.9225328485,
|
| 1044 |
-
"speakers":40000000.0
|
| 1045 |
-
},
|
| 1046 |
-
{
|
| 1047 |
-
"target_language_name":"Tigrigna",
|
| 1048 |
-
"bleu":32.8711961703,
|
| 1049 |
-
"bert_score":0.9852415164,
|
| 1050 |
-
"speakers":7507780.0
|
| 1051 |
-
},
|
| 1052 |
-
{
|
| 1053 |
-
"target_language_name":"Tok Pisin",
|
| 1054 |
-
"bleu":56.5407760367,
|
| 1055 |
-
"bert_score":0.9031182428,
|
| 1056 |
-
"speakers":4000000.0
|
| 1057 |
-
},
|
| 1058 |
-
{
|
| 1059 |
-
"target_language_name":"Tosk Albanian",
|
| 1060 |
-
"bleu":69.4218765092,
|
| 1061 |
-
"bert_score":0.9402680953,
|
| 1062 |
-
"speakers":3000000.0
|
| 1063 |
-
},
|
| 1064 |
-
{
|
| 1065 |
-
"target_language_name":"Tsonga",
|
| 1066 |
-
"bleu":58.3516573597,
|
| 1067 |
-
"bert_score":0.9134832978,
|
| 1068 |
-
"speakers":13000000.0
|
| 1069 |
-
},
|
| 1070 |
-
{
|
| 1071 |
-
"target_language_name":"Tumbuka",
|
| 1072 |
-
"bleu":44.0490017392,
|
| 1073 |
-
"bert_score":0.8865564326,
|
| 1074 |
-
"speakers":2680000.0
|
| 1075 |
-
},
|
| 1076 |
-
{
|
| 1077 |
-
"target_language_name":"Tunisian Arabic",
|
| 1078 |
-
"bleu":49.6714090744,
|
| 1079 |
-
"bert_score":0.9337966998,
|
| 1080 |
-
"speakers":11600000.0
|
| 1081 |
-
},
|
| 1082 |
-
{
|
| 1083 |
-
"target_language_name":"Turkish",
|
| 1084 |
-
"bleu":67.1600625676,
|
| 1085 |
-
"bert_score":0.9309494158,
|
| 1086 |
-
"speakers":82231620.0
|
| 1087 |
-
},
|
| 1088 |
-
{
|
| 1089 |
-
"target_language_name":"Turkmen",
|
| 1090 |
-
"bleu":60.5593705936,
|
| 1091 |
-
"bert_score":0.9125106474,
|
| 1092 |
-
"speakers":16000000.0
|
| 1093 |
-
},
|
| 1094 |
-
{
|
| 1095 |
-
"target_language_name":"Twi",
|
| 1096 |
-
"bleu":44.7976562068,
|
| 1097 |
-
"bert_score":0.8913615406,
|
| 1098 |
-
"speakers":3000000.0
|
| 1099 |
-
},
|
| 1100 |
-
{
|
| 1101 |
-
"target_language_name":"Ukrainian",
|
| 1102 |
-
"bleu":68.0976232544,
|
| 1103 |
-
"bert_score":0.9468558848,
|
| 1104 |
-
"speakers":34710100.0
|
| 1105 |
-
},
|
| 1106 |
-
{
|
| 1107 |
-
"target_language_name":"Umbundu",
|
| 1108 |
-
"bleu":21.0802775597,
|
| 1109 |
-
"bert_score":0.8461364289,
|
| 1110 |
-
"speakers":6000000.0
|
| 1111 |
-
},
|
| 1112 |
-
{
|
| 1113 |
-
"target_language_name":"Urdu",
|
| 1114 |
-
"bleu":61.1255457272,
|
| 1115 |
-
"bert_score":0.953888009,
|
| 1116 |
-
"speakers":94022900.0
|
| 1117 |
-
},
|
| 1118 |
-
{
|
| 1119 |
-
"target_language_name":"Uyghur",
|
| 1120 |
-
"bleu":53.5346877103,
|
| 1121 |
-
"bert_score":0.9397906005,
|
| 1122 |
-
"speakers":10400000.0
|
| 1123 |
-
},
|
| 1124 |
-
{
|
| 1125 |
-
"target_language_name":"Venetian",
|
| 1126 |
-
"bleu":60.6140876271,
|
| 1127 |
-
"bert_score":0.9080212533,
|
| 1128 |
-
"speakers":2000000.0
|
| 1129 |
-
},
|
| 1130 |
-
{
|
| 1131 |
-
"target_language_name":"Vietnamese",
|
| 1132 |
-
"bleu":70.3560749464,
|
| 1133 |
-
"bert_score":0.9527418713,
|
| 1134 |
-
"speakers":76000000.0
|
| 1135 |
-
},
|
| 1136 |
-
{
|
| 1137 |
-
"target_language_name":"Waray-Waray",
|
| 1138 |
-
"bleu":66.3850231243,
|
| 1139 |
-
"bert_score":0.920412008,
|
| 1140 |
-
"speakers":3100000.0
|
| 1141 |
-
},
|
| 1142 |
-
{
|
| 1143 |
-
"target_language_name":"Welsh",
|
| 1144 |
-
"bleu":83.3437724474,
|
| 1145 |
-
"bert_score":0.9662299534,
|
| 1146 |
-
"speakers":977366.0
|
| 1147 |
-
},
|
| 1148 |
-
{
|
| 1149 |
-
"target_language_name":"West Central Oromo",
|
| 1150 |
-
"bleu":46.9090350028,
|
| 1151 |
-
"bert_score":0.8845542371,
|
| 1152 |
-
"speakers":0.0
|
| 1153 |
-
},
|
| 1154 |
-
{
|
| 1155 |
-
"target_language_name":"Wolof",
|
| 1156 |
-
"bleu":42.6430127569,
|
| 1157 |
-
"bert_score":0.8762976408,
|
| 1158 |
-
"speakers":3700000.0
|
| 1159 |
-
},
|
| 1160 |
-
{
|
| 1161 |
-
"target_language_name":"Xhosa",
|
| 1162 |
-
"bleu":55.4688091009,
|
| 1163 |
-
"bert_score":0.9008744816,
|
| 1164 |
-
"speakers":11000000.0
|
| 1165 |
-
},
|
| 1166 |
-
{
|
| 1167 |
-
"target_language_name":"Yerwa Kanuri",
|
| 1168 |
-
"bleu":18.5081787556,
|
| 1169 |
-
"bert_score":0.839997381,
|
| 1170 |
-
"speakers":0.0
|
| 1171 |
-
},
|
| 1172 |
-
{
|
| 1173 |
-
"target_language_name":"Yoruba",
|
| 1174 |
-
"bleu":34.2642542268,
|
| 1175 |
-
"bert_score":0.9001545012,
|
| 1176 |
-
"speakers":40000000.0
|
| 1177 |
-
},
|
| 1178 |
-
{
|
| 1179 |
-
"target_language_name":"Yue Chinese",
|
| 1180 |
-
"bleu":34.5614651228,
|
| 1181 |
-
"bert_score":0.9634495397,
|
| 1182 |
-
"speakers":73100000.0
|
| 1183 |
-
},
|
| 1184 |
-
{
|
| 1185 |
-
"target_language_name":"Zulu",
|
| 1186 |
-
"bleu":59.1762078389,
|
| 1187 |
-
"bert_score":0.9099391103,
|
| 1188 |
-
"speakers":15700000.0
|
| 1189 |
-
},
|
| 1190 |
-
{
|
| 1191 |
-
"target_language_name":"nno",
|
| 1192 |
-
"bleu":71.8615646296,
|
| 1193 |
-
"bert_score":0.9335320314,
|
| 1194 |
-
"speakers":0.0
|
| 1195 |
-
},
|
| 1196 |
-
{
|
| 1197 |
-
"target_language_name":"\u00c9w\u00e9",
|
| 1198 |
-
"bleu":41.6614038791,
|
| 1199 |
-
"bert_score":0.8829316159,
|
| 1200 |
-
"speakers":3000000.0
|
| 1201 |
-
}
|
| 1202 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uv.lock
CHANGED
|
@@ -253,6 +253,18 @@ wheels = [
|
|
| 253 |
{ url = "https://files.pythonhosted.org/packages/bf/9b/08c0432272d77b04803958a4598a51e2a4b51c06640af8b8f0f908c18bf2/charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", size = 49446 },
|
| 254 |
]
|
| 255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
[[package]]
|
| 257 |
name = "colorama"
|
| 258 |
version = "0.4.6"
|
|
@@ -767,11 +779,16 @@ dependencies = [
|
|
| 767 |
{ name = "bert-score" },
|
| 768 |
{ name = "evaluate" },
|
| 769 |
{ name = "joblib" },
|
|
|
|
| 770 |
{ name = "openai" },
|
| 771 |
{ name = "pandas" },
|
|
|
|
| 772 |
{ name = "python-dotenv" },
|
| 773 |
{ name = "sacrebleu" },
|
|
|
|
|
|
|
| 774 |
{ name = "tqdm" },
|
|
|
|
| 775 |
]
|
| 776 |
|
| 777 |
[package.metadata]
|
|
@@ -780,11 +797,16 @@ requires-dist = [
|
|
| 780 |
{ name = "bert-score", specifier = ">=0.3.13" },
|
| 781 |
{ name = "evaluate", specifier = ">=0.4.3" },
|
| 782 |
{ name = "joblib", specifier = ">=1.4.2" },
|
|
|
|
| 783 |
{ name = "openai", specifier = ">=1.52.2" },
|
| 784 |
{ name = "pandas", specifier = ">=2.2.3" },
|
|
|
|
| 785 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
| 786 |
{ name = "sacrebleu", specifier = ">=2.4.3" },
|
|
|
|
|
|
|
| 787 |
{ name = "tqdm", specifier = ">=4.66.6" },
|
|
|
|
| 788 |
]
|
| 789 |
|
| 790 |
[[package]]
|
|
@@ -1083,6 +1105,21 @@ wheels = [
|
|
| 1083 |
{ url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
|
| 1084 |
]
|
| 1085 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1086 |
[[package]]
|
| 1087 |
name = "numpy"
|
| 1088 |
version = "2.1.2"
|
|
@@ -1491,6 +1528,20 @@ wheels = [
|
|
| 1491 |
{ url = "https://files.pythonhosted.org/packages/3d/b6/e6d98278f2d49b22b4d033c9f792eda783b9ab2094b041f013fc69bcde87/propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036", size = 11603 },
|
| 1492 |
]
|
| 1493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1494 |
[[package]]
|
| 1495 |
name = "pyarrow"
|
| 1496 |
version = "18.0.0"
|
|
@@ -1878,6 +1929,38 @@ wheels = [
|
|
| 1878 |
{ url = "https://files.pythonhosted.org/packages/19/46/5d11dc300feaad285c2f1bd784ff3f689f5e0ab6be49aaf568f3a77019eb/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", size = 606660 },
|
| 1879 |
]
|
| 1880 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1881 |
[[package]]
|
| 1882 |
name = "setuptools"
|
| 1883 |
version = "75.3.0"
|
|
@@ -1926,6 +2009,42 @@ wheels = [
|
|
| 1926 |
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 },
|
| 1927 |
]
|
| 1928 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1929 |
[[package]]
|
| 1930 |
name = "tokenizers"
|
| 1931 |
version = "0.20.1"
|
|
|
|
| 253 |
{ url = "https://files.pythonhosted.org/packages/bf/9b/08c0432272d77b04803958a4598a51e2a4b51c06640af8b8f0f908c18bf2/charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", size = 49446 },
|
| 254 |
]
|
| 255 |
|
| 256 |
+
[[package]]
|
| 257 |
+
name = "click"
|
| 258 |
+
version = "8.1.7"
|
| 259 |
+
source = { registry = "https://pypi.org/simple" }
|
| 260 |
+
dependencies = [
|
| 261 |
+
{ name = "colorama", marker = "platform_system == 'Windows'" },
|
| 262 |
+
]
|
| 263 |
+
sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 }
|
| 264 |
+
wheels = [
|
| 265 |
+
{ url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 },
|
| 266 |
+
]
|
| 267 |
+
|
| 268 |
[[package]]
|
| 269 |
name = "colorama"
|
| 270 |
version = "0.4.6"
|
|
|
|
| 779 |
{ name = "bert-score" },
|
| 780 |
{ name = "evaluate" },
|
| 781 |
{ name = "joblib" },
|
| 782 |
+
{ name = "nltk" },
|
| 783 |
{ name = "openai" },
|
| 784 |
{ name = "pandas" },
|
| 785 |
+
{ name = "protobuf" },
|
| 786 |
{ name = "python-dotenv" },
|
| 787 |
{ name = "sacrebleu" },
|
| 788 |
+
{ name = "sentencepiece" },
|
| 789 |
+
{ name = "tiktoken" },
|
| 790 |
{ name = "tqdm" },
|
| 791 |
+
{ name = "transformers" },
|
| 792 |
]
|
| 793 |
|
| 794 |
[package.metadata]
|
|
|
|
| 797 |
{ name = "bert-score", specifier = ">=0.3.13" },
|
| 798 |
{ name = "evaluate", specifier = ">=0.4.3" },
|
| 799 |
{ name = "joblib", specifier = ">=1.4.2" },
|
| 800 |
+
{ name = "nltk", specifier = ">=3.9.1" },
|
| 801 |
{ name = "openai", specifier = ">=1.52.2" },
|
| 802 |
{ name = "pandas", specifier = ">=2.2.3" },
|
| 803 |
+
{ name = "protobuf", specifier = ">=5.28.3" },
|
| 804 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
| 805 |
{ name = "sacrebleu", specifier = ">=2.4.3" },
|
| 806 |
+
{ name = "sentencepiece", specifier = ">=0.2.0" },
|
| 807 |
+
{ name = "tiktoken", specifier = ">=0.8.0" },
|
| 808 |
{ name = "tqdm", specifier = ">=4.66.6" },
|
| 809 |
+
{ name = "transformers", specifier = ">=4.46.1" },
|
| 810 |
]
|
| 811 |
|
| 812 |
[[package]]
|
|
|
|
| 1105 |
{ url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
|
| 1106 |
]
|
| 1107 |
|
| 1108 |
+
[[package]]
|
| 1109 |
+
name = "nltk"
|
| 1110 |
+
version = "3.9.1"
|
| 1111 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1112 |
+
dependencies = [
|
| 1113 |
+
{ name = "click" },
|
| 1114 |
+
{ name = "joblib" },
|
| 1115 |
+
{ name = "regex" },
|
| 1116 |
+
{ name = "tqdm" },
|
| 1117 |
+
]
|
| 1118 |
+
sdist = { url = "https://files.pythonhosted.org/packages/3c/87/db8be88ad32c2d042420b6fd9ffd4a149f9a0d7f0e86b3f543be2eeeedd2/nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868", size = 2904691 }
|
| 1119 |
+
wheels = [
|
| 1120 |
+
{ url = "https://files.pythonhosted.org/packages/4d/66/7d9e26593edda06e8cb531874633f7c2372279c3b0f46235539fe546df8b/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", size = 1505442 },
|
| 1121 |
+
]
|
| 1122 |
+
|
| 1123 |
[[package]]
|
| 1124 |
name = "numpy"
|
| 1125 |
version = "2.1.2"
|
|
|
|
| 1528 |
{ url = "https://files.pythonhosted.org/packages/3d/b6/e6d98278f2d49b22b4d033c9f792eda783b9ab2094b041f013fc69bcde87/propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036", size = 11603 },
|
| 1529 |
]
|
| 1530 |
|
| 1531 |
+
[[package]]
|
| 1532 |
+
name = "protobuf"
|
| 1533 |
+
version = "5.28.3"
|
| 1534 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1535 |
+
sdist = { url = "https://files.pythonhosted.org/packages/74/6e/e69eb906fddcb38f8530a12f4b410699972ab7ced4e21524ece9d546ac27/protobuf-5.28.3.tar.gz", hash = "sha256:64badbc49180a5e401f373f9ce7ab1d18b63f7dd4a9cdc43c92b9f0b481cef7b", size = 422479 }
|
| 1536 |
+
wheels = [
|
| 1537 |
+
{ url = "https://files.pythonhosted.org/packages/d1/c5/05163fad52d7c43e124a545f1372d18266db36036377ad29de4271134a6a/protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24", size = 419624 },
|
| 1538 |
+
{ url = "https://files.pythonhosted.org/packages/9c/4c/4563ebe001ff30dca9d7ed12e471fa098d9759712980cde1fd03a3a44fb7/protobuf-5.28.3-cp310-abi3-win_amd64.whl", hash = "sha256:91fba8f445723fcf400fdbe9ca796b19d3b1242cd873907979b9ed71e4afe868", size = 431464 },
|
| 1539 |
+
{ url = "https://files.pythonhosted.org/packages/1c/f2/baf397f3dd1d3e4af7e3f5a0382b868d25ac068eefe1ebde05132333436c/protobuf-5.28.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a3f6857551e53ce35e60b403b8a27b0295f7d6eb63d10484f12bc6879c715687", size = 414743 },
|
| 1540 |
+
{ url = "https://files.pythonhosted.org/packages/85/50/cd61a358ba1601f40e7d38bcfba22e053f40ef2c50d55b55926aecc8fec7/protobuf-5.28.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:3fa2de6b8b29d12c61911505d893afe7320ce7ccba4df913e2971461fa36d584", size = 316511 },
|
| 1541 |
+
{ url = "https://files.pythonhosted.org/packages/5d/ae/3257b09328c0b4e59535e497b0c7537d4954038bdd53a2f0d2f49d15a7c4/protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:712319fbdddb46f21abb66cd33cb9e491a5763b2febd8f228251add221981135", size = 316624 },
|
| 1542 |
+
{ url = "https://files.pythonhosted.org/packages/ad/c3/2377c159e28ea89a91cf1ca223f827ae8deccb2c9c401e5ca233cd73002f/protobuf-5.28.3-py3-none-any.whl", hash = "sha256:cee1757663fa32a1ee673434fcf3bf24dd54763c79690201208bafec62f19eed", size = 169511 },
|
| 1543 |
+
]
|
| 1544 |
+
|
| 1545 |
[[package]]
|
| 1546 |
name = "pyarrow"
|
| 1547 |
version = "18.0.0"
|
|
|
|
| 1929 |
{ url = "https://files.pythonhosted.org/packages/19/46/5d11dc300feaad285c2f1bd784ff3f689f5e0ab6be49aaf568f3a77019eb/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", size = 606660 },
|
| 1930 |
]
|
| 1931 |
|
| 1932 |
+
[[package]]
|
| 1933 |
+
name = "sentencepiece"
|
| 1934 |
+
version = "0.2.0"
|
| 1935 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1936 |
+
sdist = { url = "https://files.pythonhosted.org/packages/c9/d2/b9c7ca067c26d8ff085d252c89b5f69609ca93fb85a00ede95f4857865d4/sentencepiece-0.2.0.tar.gz", hash = "sha256:a52c19171daaf2e697dc6cbe67684e0fa341b1248966f6aebb541de654d15843", size = 2632106 }
|
| 1937 |
+
wheels = [
|
| 1938 |
+
{ url = "https://files.pythonhosted.org/packages/f6/71/98648c3b64b23edb5403f74bcc906ad21766872a6e1ada26ea3f1eb941ab/sentencepiece-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227", size = 2408979 },
|
| 1939 |
+
{ url = "https://files.pythonhosted.org/packages/77/9f/7efbaa6d4c0c718a9affbecc536b03ca62f99f421bdffb531c16030e2d2b/sentencepiece-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452", size = 1238845 },
|
| 1940 |
+
{ url = "https://files.pythonhosted.org/packages/1c/e4/c2541027a43ec6962ba9b601805d17ba3f86b38bdeae0e8ac65a2981e248/sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7b67e724bead13f18db6e1d10b6bbdc454af574d70efbb36f27d90387be1ca3", size = 1181472 },
|
| 1941 |
+
{ url = "https://files.pythonhosted.org/packages/fd/46/316c1ba6c52b97de76aff7b9da678f7afbb52136afb2987c474d95630e65/sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fde4b08cfe237be4484c6c7c2e2c75fb862cfeab6bd5449ce4caeafd97b767a", size = 1259151 },
|
| 1942 |
+
{ url = "https://files.pythonhosted.org/packages/aa/5a/3c48738a0835d76dd06c62b6ac48d39c923cde78dd0f587353bdcbb99851/sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c378492056202d1c48a4979650981635fd97875a00eabb1f00c6a236b013b5e", size = 1355931 },
|
| 1943 |
+
{ url = "https://files.pythonhosted.org/packages/a6/27/33019685023221ca8ed98e8ceb7ae5e166032686fa3662c68f1f1edf334e/sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1380ce6540a368de2ef6d7e6ba14ba8f3258df650d39ba7d833b79ee68a52040", size = 1301537 },
|
| 1944 |
+
{ url = "https://files.pythonhosted.org/packages/ca/e4/55f97cef14293171fef5f96e96999919ab5b4d1ce95b53547ad653d7e3bf/sentencepiece-0.2.0-cp310-cp310-win32.whl", hash = "sha256:a1151d6a6dd4b43e552394aed0edfe9292820272f0194bd56c7c1660a0c06c3d", size = 936747 },
|
| 1945 |
+
{ url = "https://files.pythonhosted.org/packages/85/f4/4ef1a6e0e9dbd8a60780a91df8b7452ada14cfaa0e17b3b8dfa42cecae18/sentencepiece-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:d490142b0521ef22bc1085f061d922a2a6666175bb6b42e588ff95c0db6819b2", size = 991525 },
|
| 1946 |
+
{ url = "https://files.pythonhosted.org/packages/32/43/8f8885168a47a02eba1455bd3f4f169f50ad5b8cebd2402d0f5e20854d04/sentencepiece-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:17982700c4f6dbb55fa3594f3d7e5dd1c8659a274af3738e33c987d2a27c9d5c", size = 2409036 },
|
| 1947 |
+
{ url = "https://files.pythonhosted.org/packages/0f/35/e63ba28062af0a3d688a9f128e407a1a2608544b2f480cb49bf7f4b1cbb9/sentencepiece-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7c867012c0e8bcd5bdad0f791609101cb5c66acb303ab3270218d6debc68a65e", size = 1238921 },
|
| 1948 |
+
{ url = "https://files.pythonhosted.org/packages/de/42/ae30952c4a0bd773e90c9bf2579f5533037c886dfc8ec68133d5694f4dd2/sentencepiece-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fd6071249c74f779c5b27183295b9202f8dedb68034e716784364443879eaa6", size = 1181477 },
|
| 1949 |
+
{ url = "https://files.pythonhosted.org/packages/e3/ac/2f2ab1d60bb2d795d054eebe5e3f24b164bc21b5a9b75fba7968b3b91b5a/sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f90c55a65013cbb8f4d7aab0599bf925cde4adc67ae43a0d323677b5a1c6cb", size = 1259182 },
|
| 1950 |
+
{ url = "https://files.pythonhosted.org/packages/45/fb/14633c6ecf262c468759ffcdb55c3a7ee38fe4eda6a70d75ee7c7d63c58b/sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b293734059ef656dcd65be62ff771507bea8fed0a711b6733976e1ed3add4553", size = 1355537 },
|
| 1951 |
+
{ url = "https://files.pythonhosted.org/packages/fb/12/2f5c8d4764b00033cf1c935b702d3bb878d10be9f0b87f0253495832d85f/sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e58b47f933aca74c6a60a79dcb21d5b9e47416256c795c2d58d55cec27f9551d", size = 1301464 },
|
| 1952 |
+
{ url = "https://files.pythonhosted.org/packages/4e/b1/67afc0bde24f6dcb3acdea0dd8dcdf4b8b0db240f6bacd39378bd32d09f8/sentencepiece-0.2.0-cp311-cp311-win32.whl", hash = "sha256:c581258cf346b327c62c4f1cebd32691826306f6a41d8c4bec43b010dee08e75", size = 936749 },
|
| 1953 |
+
{ url = "https://files.pythonhosted.org/packages/a2/f6/587c62fd21fc988555b85351f50bbde43a51524caafd63bc69240ded14fd/sentencepiece-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:0993dbc665f4113017892f1b87c3904a44d0640eda510abcacdfb07f74286d36", size = 991520 },
|
| 1954 |
+
{ url = "https://files.pythonhosted.org/packages/27/5a/141b227ed54293360a9ffbb7bf8252b4e5efc0400cdeac5809340e5d2b21/sentencepiece-0.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ea5f536e32ea8ec96086ee00d7a4a131ce583a1b18d130711707c10e69601cb2", size = 2409370 },
|
| 1955 |
+
{ url = "https://files.pythonhosted.org/packages/2e/08/a4c135ad6fc2ce26798d14ab72790d66e813efc9589fd30a5316a88ca8d5/sentencepiece-0.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0cb51f53b6aae3c36bafe41e86167c71af8370a039f542c43b0cce5ef24a68c", size = 1239288 },
|
| 1956 |
+
{ url = "https://files.pythonhosted.org/packages/49/0a/2fe387f825ac5aad5a0bfe221904882106cac58e1b693ba7818785a882b6/sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3212121805afc58d8b00ab4e7dd1f8f76c203ddb9dc94aa4079618a31cf5da0f", size = 1181597 },
|
| 1957 |
+
{ url = "https://files.pythonhosted.org/packages/cc/38/e4698ee2293fe4835dc033c49796a39b3eebd8752098f6bd0aa53a14af1f/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a3149e3066c2a75e0d68a43eb632d7ae728c7925b517f4c05c40f6f7280ce08", size = 1259220 },
|
| 1958 |
+
{ url = "https://files.pythonhosted.org/packages/12/24/fd7ef967c9dad2f6e6e5386d0cadaf65cda8b7be6e3861a9ab3121035139/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:632f3594d3e7ac8b367bca204cb3fd05a01d5b21455acd097ea4c0e30e2f63d7", size = 1355962 },
|
| 1959 |
+
{ url = "https://files.pythonhosted.org/packages/4f/d2/18246f43ca730bb81918f87b7e886531eda32d835811ad9f4657c54eee35/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f295105c6bdbb05bd5e1b0cafbd78ff95036f5d3641e7949455a3f4e5e7c3109", size = 1301706 },
|
| 1960 |
+
{ url = "https://files.pythonhosted.org/packages/8a/47/ca237b562f420044ab56ddb4c278672f7e8c866e183730a20e413b38a989/sentencepiece-0.2.0-cp312-cp312-win32.whl", hash = "sha256:fb89f811e5efd18bab141afc3fea3de141c3f69f3fe9e898f710ae7fe3aab251", size = 936941 },
|
| 1961 |
+
{ url = "https://files.pythonhosted.org/packages/c6/97/d159c32642306ee2b70732077632895438867b3b6df282354bd550cf2a67/sentencepiece-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a673a72aab81fef5ebe755c6e0cc60087d1f3a4700835d40537183c1703a45f", size = 991994 },
|
| 1962 |
+
]
|
| 1963 |
+
|
| 1964 |
[[package]]
|
| 1965 |
name = "setuptools"
|
| 1966 |
version = "75.3.0"
|
|
|
|
| 2009 |
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 },
|
| 2010 |
]
|
| 2011 |
|
| 2012 |
+
[[package]]
|
| 2013 |
+
name = "tiktoken"
|
| 2014 |
+
version = "0.8.0"
|
| 2015 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2016 |
+
dependencies = [
|
| 2017 |
+
{ name = "regex" },
|
| 2018 |
+
{ name = "requests" },
|
| 2019 |
+
]
|
| 2020 |
+
sdist = { url = "https://files.pythonhosted.org/packages/37/02/576ff3a6639e755c4f70997b2d315f56d6d71e0d046f4fb64cb81a3fb099/tiktoken-0.8.0.tar.gz", hash = "sha256:9ccbb2740f24542534369c5635cfd9b2b3c2490754a78ac8831d99f89f94eeb2", size = 35107 }
|
| 2021 |
+
wheels = [
|
| 2022 |
+
{ url = "https://files.pythonhosted.org/packages/c9/ba/a35fad753bbca8ba0cc1b0f3402a70256a110ced7ac332cf84ba89fc87ab/tiktoken-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b07e33283463089c81ef1467180e3e00ab00d46c2c4bbcef0acab5f771d6695e", size = 1039905 },
|
| 2023 |
+
{ url = "https://files.pythonhosted.org/packages/91/05/13dab8fd7460391c387b3e69e14bf1e51ff71fe0a202cd2933cc3ea93fb6/tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9269348cb650726f44dd3bbb3f9110ac19a8dcc8f54949ad3ef652ca22a38e21", size = 982417 },
|
| 2024 |
+
{ url = "https://files.pythonhosted.org/packages/e9/98/18ec4a8351a6cf4537e40cd6e19a422c10cce1ef00a2fcb716e0a96af58b/tiktoken-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e13f37bc4ef2d012731e93e0fef21dc3b7aea5bb9009618de9a4026844e560", size = 1144915 },
|
| 2025 |
+
{ url = "https://files.pythonhosted.org/packages/2e/28/cf3633018cbcc6deb7805b700ccd6085c9a5a7f72b38974ee0bffd56d311/tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f13d13c981511331eac0d01a59b5df7c0d4060a8be1e378672822213da51e0a2", size = 1177221 },
|
| 2026 |
+
{ url = "https://files.pythonhosted.org/packages/57/81/8a5be305cbd39d4e83a794f9e80c7f2c84b524587b7feb27c797b2046d51/tiktoken-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6b2ddbc79a22621ce8b1166afa9f9a888a664a579350dc7c09346a3b5de837d9", size = 1237398 },
|
| 2027 |
+
{ url = "https://files.pythonhosted.org/packages/dc/da/8d1cc3089a83f5cf11c2e489332752981435280285231924557350523a59/tiktoken-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d8c2d0e5ba6453a290b86cd65fc51fedf247e1ba170191715b049dac1f628005", size = 884215 },
|
| 2028 |
+
{ url = "https://files.pythonhosted.org/packages/f6/1e/ca48e7bfeeccaf76f3a501bd84db1fa28b3c22c9d1a1f41af9fb7579c5f6/tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1", size = 1039700 },
|
| 2029 |
+
{ url = "https://files.pythonhosted.org/packages/8c/f8/f0101d98d661b34534769c3818f5af631e59c36ac6d07268fbfc89e539ce/tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a", size = 982413 },
|
| 2030 |
+
{ url = "https://files.pythonhosted.org/packages/ac/3c/2b95391d9bd520a73830469f80a96e3790e6c0a5ac2444f80f20b4b31051/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d", size = 1144242 },
|
| 2031 |
+
{ url = "https://files.pythonhosted.org/packages/01/c4/c4a4360de845217b6aa9709c15773484b50479f36bb50419c443204e5de9/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47", size = 1176588 },
|
| 2032 |
+
{ url = "https://files.pythonhosted.org/packages/f8/a3/ef984e976822cd6c2227c854f74d2e60cf4cd6fbfca46251199914746f78/tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419", size = 1237261 },
|
| 2033 |
+
{ url = "https://files.pythonhosted.org/packages/1e/86/eea2309dc258fb86c7d9b10db536434fc16420feaa3b6113df18b23db7c2/tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99", size = 884537 },
|
| 2034 |
+
{ url = "https://files.pythonhosted.org/packages/c1/22/34b2e136a6f4af186b6640cbfd6f93400783c9ef6cd550d9eab80628d9de/tiktoken-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:881839cfeae051b3628d9823b2e56b5cc93a9e2efb435f4cf15f17dc45f21586", size = 1039357 },
|
| 2035 |
+
{ url = "https://files.pythonhosted.org/packages/04/d2/c793cf49c20f5855fd6ce05d080c0537d7418f22c58e71f392d5e8c8dbf7/tiktoken-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fe9399bdc3f29d428f16a2f86c3c8ec20be3eac5f53693ce4980371c3245729b", size = 982616 },
|
| 2036 |
+
{ url = "https://files.pythonhosted.org/packages/b3/a1/79846e5ef911cd5d75c844de3fa496a10c91b4b5f550aad695c5df153d72/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a58deb7075d5b69237a3ff4bb51a726670419db6ea62bdcd8bd80c78497d7ab", size = 1144011 },
|
| 2037 |
+
{ url = "https://files.pythonhosted.org/packages/26/32/e0e3a859136e95c85a572e4806dc58bf1ddf651108ae8b97d5f3ebe1a244/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2908c0d043a7d03ebd80347266b0e58440bdef5564f84f4d29fb235b5df3b04", size = 1175432 },
|
| 2038 |
+
{ url = "https://files.pythonhosted.org/packages/c7/89/926b66e9025b97e9fbabeaa59048a736fe3c3e4530a204109571104f921c/tiktoken-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:294440d21a2a51e12d4238e68a5972095534fe9878be57d905c476017bff99fc", size = 1236576 },
|
| 2039 |
+
{ url = "https://files.pythonhosted.org/packages/45/e2/39d4aa02a52bba73b2cd21ba4533c84425ff8786cc63c511d68c8897376e/tiktoken-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:d8f3192733ac4d77977432947d563d7e1b310b96497acd3c196c9bddb36ed9db", size = 883824 },
|
| 2040 |
+
{ url = "https://files.pythonhosted.org/packages/e3/38/802e79ba0ee5fcbf240cd624143f57744e5d411d2e9d9ad2db70d8395986/tiktoken-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:02be1666096aff7da6cbd7cdaa8e7917bfed3467cd64b38b1f112e96d3b06a24", size = 1039648 },
|
| 2041 |
+
{ url = "https://files.pythonhosted.org/packages/b1/da/24cdbfc302c98663fbea66f5866f7fa1048405c7564ab88483aea97c3b1a/tiktoken-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94ff53c5c74b535b2cbf431d907fc13c678bbd009ee633a2aca269a04389f9a", size = 982763 },
|
| 2042 |
+
{ url = "https://files.pythonhosted.org/packages/e4/f0/0ecf79a279dfa41fc97d00adccf976ecc2556d3c08ef3e25e45eb31f665b/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b231f5e8982c245ee3065cd84a4712d64692348bc609d84467c57b4b72dcbc5", size = 1144417 },
|
| 2043 |
+
{ url = "https://files.pythonhosted.org/packages/ab/d3/155d2d4514f3471a25dc1d6d20549ef254e2aa9bb5b1060809b1d3b03d3a/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4177faa809bd55f699e88c96d9bb4635d22e3f59d635ba6fd9ffedf7150b9953", size = 1175108 },
|
| 2044 |
+
{ url = "https://files.pythonhosted.org/packages/19/eb/5989e16821ee8300ef8ee13c16effc20dfc26c777d05fbb6825e3c037b81/tiktoken-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5376b6f8dc4753cd81ead935c5f518fa0fbe7e133d9e25f648d8c4dabdd4bad7", size = 1236520 },
|
| 2045 |
+
{ url = "https://files.pythonhosted.org/packages/40/59/14b20465f1d1cb89cfbc96ec27e5617b2d41c79da12b5e04e96d689be2a7/tiktoken-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:18228d624807d66c87acd8f25fc135665617cab220671eb65b50f5d70fa51f69", size = 883849 },
|
| 2046 |
+
]
|
| 2047 |
+
|
| 2048 |
[[package]]
|
| 2049 |
name = "tokenizers"
|
| 2050 |
version = "0.20.1"
|