Spaces:
Running
Running
David Pomerenke
commited on
Commit
·
29c8ef6
1
Parent(s):
56081d8
Make a map
Browse files- app.py +215 -8
- evals.py +16 -2
- pyproject.toml +1 -0
- requirements.txt +2 -0
- results.json +571 -29
- uv.lock +11 -0
app.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
import json
|
| 2 |
|
| 3 |
import gradio as gr
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import plotly.graph_objects as go
|
|
|
|
| 6 |
|
| 7 |
with open("results.json") as f:
|
| 8 |
results = json.load(f)
|
|
@@ -157,10 +159,17 @@ def create_model_comparison_plot(results):
|
|
| 157 |
fig = go.Figure(data=traces)
|
| 158 |
fig.update_layout(
|
| 159 |
title="BLEU Scores by Model and Language",
|
| 160 |
-
xaxis_title=
|
| 161 |
yaxis_title="BLEU Score",
|
| 162 |
barmode="group",
|
| 163 |
height=500,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
)
|
| 165 |
return fig
|
| 166 |
|
|
@@ -175,10 +184,18 @@ def create_language_stats_df(results):
|
|
| 175 |
lang["scores"] or [{"bleu": None, "model": None}], key=lambda x: x["bleu"]
|
| 176 |
)
|
| 177 |
|
| 178 |
-
model = best_score[
|
| 179 |
-
model_name = model.split(
|
| 180 |
-
model_link =
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
row = {
|
| 183 |
"Language": f"**{lang['language_name']}**",
|
| 184 |
"Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
|
|
@@ -199,7 +216,15 @@ def create_language_stats_df(results):
|
|
| 199 |
value=df,
|
| 200 |
label="Language Results",
|
| 201 |
show_search="search",
|
| 202 |
-
datatype=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
)
|
| 204 |
|
| 205 |
|
|
@@ -224,7 +249,7 @@ def create_scatter_plot(results):
|
|
| 224 |
)
|
| 225 |
|
| 226 |
fig.update_layout(
|
| 227 |
-
title=
|
| 228 |
xaxis_title="Number of Speakers (Millions)",
|
| 229 |
yaxis_title="Average BLEU Score",
|
| 230 |
height=500,
|
|
@@ -237,6 +262,186 @@ def create_scatter_plot(results):
|
|
| 237 |
return fig
|
| 238 |
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
# Create the visualization components
|
| 241 |
with gr.Blocks(title="AI Language Translation Benchmark") as demo:
|
| 242 |
gr.Markdown("# AI Language Translation Benchmark")
|
|
@@ -246,11 +451,13 @@ with gr.Blocks(title="AI Language Translation Benchmark") as demo:
|
|
| 246 |
|
| 247 |
bar_plot = create_model_comparison_plot(results)
|
| 248 |
scatter_plot = create_scatter_plot(results)
|
|
|
|
| 249 |
|
| 250 |
create_leaderboard_df(results)
|
| 251 |
gr.Plot(value=bar_plot, label="Model Comparison")
|
| 252 |
create_language_stats_df(results)
|
| 253 |
-
gr.Plot(value=scatter_plot, label="
|
|
|
|
| 254 |
|
| 255 |
gr.Markdown(
|
| 256 |
"""
|
|
|
|
| 1 |
import json
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
+
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
import plotly.graph_objects as go
|
| 7 |
+
import pycountry
|
| 8 |
|
| 9 |
with open("results.json") as f:
|
| 10 |
results = json.load(f)
|
|
|
|
| 159 |
fig = go.Figure(data=traces)
|
| 160 |
fig.update_layout(
|
| 161 |
title="BLEU Scores by Model and Language",
|
| 162 |
+
xaxis_title=None,
|
| 163 |
yaxis_title="BLEU Score",
|
| 164 |
barmode="group",
|
| 165 |
height=500,
|
| 166 |
+
legend=dict(
|
| 167 |
+
orientation="h", # horizontal orientation
|
| 168 |
+
yanchor="bottom",
|
| 169 |
+
y=-0.3, # position below plot
|
| 170 |
+
xanchor="center",
|
| 171 |
+
x=0.5, # center horizontally
|
| 172 |
+
),
|
| 173 |
)
|
| 174 |
return fig
|
| 175 |
|
|
|
|
| 184 |
lang["scores"] or [{"bleu": None, "model": None}], key=lambda x: x["bleu"]
|
| 185 |
)
|
| 186 |
|
| 187 |
+
model = best_score["model"]
|
| 188 |
+
model_name = model.split("/")[-1] if model else "N/A"
|
| 189 |
+
model_link = (
|
| 190 |
+
f"<a href='https://openrouter.ai/{model}' style='text-decoration: none; color: inherit;'>{model_name}</a>"
|
| 191 |
+
if model
|
| 192 |
+
else "N/A"
|
| 193 |
+
)
|
| 194 |
+
commonvoice_link = (
|
| 195 |
+
f"<!--{lang['commonvoice_hours']:07} (for sorting)--> <a href='https://commonvoice.mozilla.org/{lang['commonvoice_locale']}/speak' style='text-decoration: none; color: inherit;'>🎙️ {lang['commonvoice_hours']}</a>"
|
| 196 |
+
if lang["commonvoice_hours"]
|
| 197 |
+
else "N/A"
|
| 198 |
+
)
|
| 199 |
row = {
|
| 200 |
"Language": f"**{lang['language_name']}**",
|
| 201 |
"Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
|
|
|
|
| 216 |
value=df,
|
| 217 |
label="Language Results",
|
| 218 |
show_search="search",
|
| 219 |
+
datatype=[
|
| 220 |
+
"markdown",
|
| 221 |
+
"number",
|
| 222 |
+
"number",
|
| 223 |
+
"number",
|
| 224 |
+
"markdown",
|
| 225 |
+
"number",
|
| 226 |
+
"markdown",
|
| 227 |
+
],
|
| 228 |
)
|
| 229 |
|
| 230 |
|
|
|
|
| 249 |
)
|
| 250 |
|
| 251 |
fig.update_layout(
|
| 252 |
+
title=None,
|
| 253 |
xaxis_title="Number of Speakers (Millions)",
|
| 254 |
yaxis_title="Average BLEU Score",
|
| 255 |
height=500,
|
|
|
|
| 262 |
return fig
|
| 263 |
|
| 264 |
|
| 265 |
+
def format_number(n):
|
| 266 |
+
"""Format number with K/M suffix"""
|
| 267 |
+
if n >= 1_000_000:
|
| 268 |
+
return f"{n/1_000_000:.1f}M"
|
| 269 |
+
elif n >= 1_000:
|
| 270 |
+
return f"{n/1_000:.0f}K"
|
| 271 |
+
return str(n)
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def create_world_map(results):
|
| 275 |
+
# Collect all country data
|
| 276 |
+
country_data = {}
|
| 277 |
+
for lang in results:
|
| 278 |
+
if "population" not in lang or lang["bleu"] is None:
|
| 279 |
+
continue
|
| 280 |
+
|
| 281 |
+
for country_code, speakers in lang["population"].items():
|
| 282 |
+
try:
|
| 283 |
+
# Convert alpha_2 (2-letter) to alpha_3 (3-letter) code
|
| 284 |
+
country = pycountry.countries.get(alpha_2=country_code)
|
| 285 |
+
if country is None:
|
| 286 |
+
continue
|
| 287 |
+
|
| 288 |
+
iso3_code = country.alpha_3
|
| 289 |
+
if iso3_code not in country_data:
|
| 290 |
+
country_data[iso3_code] = {
|
| 291 |
+
"total_speakers": 0,
|
| 292 |
+
"weighted_bleu_sum": 0,
|
| 293 |
+
"languages": [],
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
country_data[iso3_code]["total_speakers"] += speakers
|
| 297 |
+
country_data[iso3_code]["weighted_bleu_sum"] += speakers * lang["bleu"]
|
| 298 |
+
country_data[iso3_code]["languages"].append(
|
| 299 |
+
{
|
| 300 |
+
"name": lang["language_name"],
|
| 301 |
+
"speakers": speakers,
|
| 302 |
+
"bleu": lang["bleu"],
|
| 303 |
+
}
|
| 304 |
+
)
|
| 305 |
+
except (KeyError, AttributeError):
|
| 306 |
+
# Skip invalid or unrecognized country codes
|
| 307 |
+
continue
|
| 308 |
+
|
| 309 |
+
# Calculate final weighted averages and prepare hover text
|
| 310 |
+
countries = []
|
| 311 |
+
bleu_scores = []
|
| 312 |
+
hover_texts = []
|
| 313 |
+
|
| 314 |
+
def make_black_bar(value, max_width=10):
|
| 315 |
+
filled = int(value * max_width)
|
| 316 |
+
return "⬛️" * filled + "⬜️" * (max_width - filled)
|
| 317 |
+
|
| 318 |
+
def make_colored_bar(value, max_width=10):
|
| 319 |
+
"""Create a colored bar using Unicode blocks
|
| 320 |
+
🟦 for high values (>0.35)
|
| 321 |
+
🟨 for medium values (0.25-0.35)
|
| 322 |
+
🟥 for low values (<0.25)
|
| 323 |
+
⬜ for empty space
|
| 324 |
+
"""
|
| 325 |
+
filled = int(value * max_width)
|
| 326 |
+
filled = max(0, min(filled, max_width))
|
| 327 |
+
empty = max_width - filled
|
| 328 |
+
|
| 329 |
+
if value > 0.35:
|
| 330 |
+
return "🟦" * filled + "⬜" * empty
|
| 331 |
+
elif value > 0.25:
|
| 332 |
+
return "🟨" * filled + "⬜" * empty
|
| 333 |
+
else:
|
| 334 |
+
return "🟥" * filled + "⬜" * empty
|
| 335 |
+
|
| 336 |
+
for country_code, data in country_data.items():
|
| 337 |
+
weighted_avg = data["weighted_bleu_sum"] / data["total_speakers"]
|
| 338 |
+
|
| 339 |
+
try:
|
| 340 |
+
country_name = pycountry.countries.get(alpha_3=country_code).name
|
| 341 |
+
except AttributeError:
|
| 342 |
+
country_name = country_code
|
| 343 |
+
|
| 344 |
+
# Sort languages by number of speakers
|
| 345 |
+
langs = sorted(data["languages"], key=lambda x: x["speakers"], reverse=True)
|
| 346 |
+
total_speakers = sum(lang["speakers"] for lang in langs)
|
| 347 |
+
|
| 348 |
+
# Take top 5 languages and summarize the rest
|
| 349 |
+
main_langs = langs[:5]
|
| 350 |
+
other_langs = langs[5:]
|
| 351 |
+
|
| 352 |
+
# Create language rows with bars
|
| 353 |
+
lang_rows = []
|
| 354 |
+
for lang in main_langs:
|
| 355 |
+
percentage = (lang["speakers"] / total_speakers) * 100
|
| 356 |
+
speaker_bar = make_black_bar(percentage / 100)
|
| 357 |
+
bleu_bar = make_colored_bar((lang["bleu"] - 0.2) / 0.2)
|
| 358 |
+
|
| 359 |
+
lang_rows.append(
|
| 360 |
+
f"<b>{lang['name']}</b><br>"
|
| 361 |
+
f"{speaker_bar} {format_number(lang['speakers'])} speakers<br>"
|
| 362 |
+
f"{bleu_bar} {lang['bleu']:.3f} BLEU<br>"
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
# Add summary for other languages if any
|
| 366 |
+
if other_langs:
|
| 367 |
+
other_speakers = sum(lang["speakers"] for lang in other_langs)
|
| 368 |
+
other_percentage = (other_speakers / total_speakers) * 100
|
| 369 |
+
other_avg_bleu = sum(lang["bleu"] for lang in other_langs) / len(
|
| 370 |
+
other_langs
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
speaker_bar = make_black_bar(other_percentage / 100)
|
| 374 |
+
bleu_bar = make_colored_bar((other_avg_bleu - 0.2) / 0.2)
|
| 375 |
+
|
| 376 |
+
lang_rows.append(
|
| 377 |
+
f"<b>+{len(other_langs)} other languages</b><br>"
|
| 378 |
+
f"{speaker_bar} {format_number(other_speakers)} speakers<br>"
|
| 379 |
+
f"{bleu_bar} {other_avg_bleu:.3f} BLEU<br>"
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
# Create overall BLEU visualization
|
| 383 |
+
bleu_percentage = (weighted_avg - 0.2) / 0.2 # Scale from 0.2-0.4 to 0-1
|
| 384 |
+
overall_bleu_bar = make_colored_bar(bleu_percentage)
|
| 385 |
+
|
| 386 |
+
hover_text = (
|
| 387 |
+
f"<b>{country_name}</b><br><br>"
|
| 388 |
+
f"{format_number(data['total_speakers'])} speakers*<br>"
|
| 389 |
+
f"{overall_bleu_bar} {weighted_avg:.3f} BLEU<br><br>"
|
| 390 |
+
f"<b>Languages:</b><br><br>"
|
| 391 |
+
f"{'<br>'.join(lang_rows)}"
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
countries.append(country_code)
|
| 395 |
+
bleu_scores.append(weighted_avg)
|
| 396 |
+
hover_texts.append(hover_text)
|
| 397 |
+
|
| 398 |
+
# Create the choropleth map
|
| 399 |
+
fig = go.Figure(
|
| 400 |
+
data=go.Choropleth(
|
| 401 |
+
locations=countries,
|
| 402 |
+
locationmode="ISO-3",
|
| 403 |
+
z=bleu_scores,
|
| 404 |
+
text=hover_texts,
|
| 405 |
+
hoverinfo="text",
|
| 406 |
+
colorscale=[[0, "#ff9999"], [1, "#99ccff"]],
|
| 407 |
+
colorbar=dict(
|
| 408 |
+
title="BLEU Score",
|
| 409 |
+
orientation="h", # horizontal orientation
|
| 410 |
+
y=-0.2, # position below map
|
| 411 |
+
yanchor="bottom",
|
| 412 |
+
len=0.5, # length of colorbar
|
| 413 |
+
x=0.5, # center horizontally
|
| 414 |
+
xanchor="center",
|
| 415 |
+
thickness=20, # make it a bit thicker when horizontal
|
| 416 |
+
),
|
| 417 |
+
zmin=0.2,
|
| 418 |
+
zmax=0.5,
|
| 419 |
+
)
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
fig.update_layout(
|
| 423 |
+
title=dict(text="BLEU Score by Country", x=0.5, xanchor="center"),
|
| 424 |
+
geo=dict(
|
| 425 |
+
showframe=True,
|
| 426 |
+
showcoastlines=True,
|
| 427 |
+
projection_type="equal earth",
|
| 428 |
+
showland=True,
|
| 429 |
+
landcolor="#f8f9fa",
|
| 430 |
+
coastlinecolor="#e0e0e0",
|
| 431 |
+
countrycolor="#e0e0e0",
|
| 432 |
+
),
|
| 433 |
+
height=600,
|
| 434 |
+
margin=dict(l=0, r=0, t=30, b=0),
|
| 435 |
+
paper_bgcolor="white",
|
| 436 |
+
hoverlabel=dict(
|
| 437 |
+
bgcolor="beige",
|
| 438 |
+
font_size=12,
|
| 439 |
+
),
|
| 440 |
+
)
|
| 441 |
+
|
| 442 |
+
return fig
|
| 443 |
+
|
| 444 |
+
|
| 445 |
# Create the visualization components
|
| 446 |
with gr.Blocks(title="AI Language Translation Benchmark") as demo:
|
| 447 |
gr.Markdown("# AI Language Translation Benchmark")
|
|
|
|
| 451 |
|
| 452 |
bar_plot = create_model_comparison_plot(results)
|
| 453 |
scatter_plot = create_scatter_plot(results)
|
| 454 |
+
world_map = create_world_map(results)
|
| 455 |
|
| 456 |
create_leaderboard_df(results)
|
| 457 |
gr.Plot(value=bar_plot, label="Model Comparison")
|
| 458 |
create_language_stats_df(results)
|
| 459 |
+
gr.Plot(value=scatter_plot, label="Speaker population vs BLEU")
|
| 460 |
+
gr.Plot(value=world_map, container=False, elem_classes="fullwidth-plot")
|
| 461 |
|
| 462 |
gr.Markdown(
|
| 463 |
"""
|
evals.py
CHANGED
|
@@ -62,6 +62,15 @@ scripts = pd.read_csv("data/ScriptCodes.csv").rename(
|
|
| 62 |
)
|
| 63 |
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
def script_name(iso15924):
|
| 66 |
return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
|
| 67 |
|
|
@@ -246,8 +255,13 @@ async def main():
|
|
| 246 |
"speakers": language.speakers,
|
| 247 |
"scores": results_for_language,
|
| 248 |
"bleu": mean([s["bleu"] for s in results_for_language]),
|
| 249 |
-
"commonvoice_hours": language.commonvoice_hours
|
| 250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
}
|
| 252 |
)
|
| 253 |
with open("results.json", "w") as f:
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
|
| 65 |
+
def population(bcp_47):
|
| 66 |
+
items = {
|
| 67 |
+
re.sub(r"^[a-z]+-", "", lang): pop
|
| 68 |
+
for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
|
| 69 |
+
if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
|
| 70 |
+
}
|
| 71 |
+
return items
|
| 72 |
+
|
| 73 |
+
|
| 74 |
def script_name(iso15924):
|
| 75 |
return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
|
| 76 |
|
|
|
|
| 255 |
"speakers": language.speakers,
|
| 256 |
"scores": results_for_language,
|
| 257 |
"bleu": mean([s["bleu"] for s in results_for_language]),
|
| 258 |
+
"commonvoice_hours": language.commonvoice_hours
|
| 259 |
+
if not pd.isna(language.commonvoice_hours)
|
| 260 |
+
else None,
|
| 261 |
+
"commonvoice_locale": language.commonvoice_locale
|
| 262 |
+
if not pd.isna(language.commonvoice_locale)
|
| 263 |
+
else None,
|
| 264 |
+
"population": population(language.bcp_47),
|
| 265 |
}
|
| 266 |
)
|
| 267 |
with open("results.json", "w") as f:
|
pyproject.toml
CHANGED
|
@@ -8,6 +8,7 @@ dependencies = [
|
|
| 8 |
"gradio>=5.16.2",
|
| 9 |
"pandas>=2.2.3",
|
| 10 |
"plotly>=6.0.0",
|
|
|
|
| 11 |
]
|
| 12 |
|
| 13 |
[tool.uv]
|
|
|
|
| 8 |
"gradio>=5.16.2",
|
| 9 |
"pandas>=2.2.3",
|
| 10 |
"plotly>=6.0.0",
|
| 11 |
+
"pycountry>=24.6.1",
|
| 12 |
]
|
| 13 |
|
| 14 |
[tool.uv]
|
requirements.txt
CHANGED
|
@@ -88,6 +88,8 @@ pillow==11.1.0
|
|
| 88 |
# via gradio
|
| 89 |
plotly==6.0.0
|
| 90 |
# via languagebench (pyproject.toml)
|
|
|
|
|
|
|
| 91 |
pydantic==2.10.6
|
| 92 |
# via
|
| 93 |
# fastapi
|
|
|
|
| 88 |
# via gradio
|
| 89 |
plotly==6.0.0
|
| 90 |
# via languagebench (pyproject.toml)
|
| 91 |
+
pycountry==24.6.1
|
| 92 |
+
# via languagebench (pyproject.toml)
|
| 93 |
pydantic==2.10.6
|
| 94 |
# via
|
| 95 |
# fastapi
|
results.json
CHANGED
|
@@ -31,7 +31,164 @@
|
|
| 31 |
],
|
| 32 |
"bleu": 0.5035795595158651,
|
| 33 |
"commonvoice_hours": 2649.0,
|
| 34 |
-
"commonvoice_locale": "en"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"language_name": "Chinese",
|
|
@@ -45,7 +202,29 @@
|
|
| 45 |
],
|
| 46 |
"bleu": 0.35763875438716014,
|
| 47 |
"commonvoice_hours": 422.0,
|
| 48 |
-
"commonvoice_locale": "zh-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
},
|
| 50 |
{
|
| 51 |
"language_name": "Hindi",
|
|
@@ -59,7 +238,15 @@
|
|
| 59 |
],
|
| 60 |
"bleu": 0.33760351976648345,
|
| 61 |
"commonvoice_hours": 16.0,
|
| 62 |
-
"commonvoice_locale": "hi"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"language_name": "Spanish",
|
|
@@ -73,7 +260,48 @@
|
|
| 73 |
],
|
| 74 |
"bleu": 0.3600460831160618,
|
| 75 |
"commonvoice_hours": 446.0,
|
| 76 |
-
"commonvoice_locale": "es"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"language_name": "Arabic",
|
|
@@ -87,7 +315,47 @@
|
|
| 87 |
],
|
| 88 |
"bleu": 0.3046598747480405,
|
| 89 |
"commonvoice_hours": 91.0,
|
| 90 |
-
"commonvoice_locale": "ar"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
},
|
| 92 |
{
|
| 93 |
"language_name": "Urdu",
|
|
@@ -101,7 +369,14 @@
|
|
| 101 |
],
|
| 102 |
"bleu": 0.331647033312127,
|
| 103 |
"commonvoice_hours": 76.0,
|
| 104 |
-
"commonvoice_locale": "ur"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
},
|
| 106 |
{
|
| 107 |
"language_name": "French",
|
|
@@ -115,7 +390,71 @@
|
|
| 115 |
],
|
| 116 |
"bleu": 0.3141809404018014,
|
| 117 |
"commonvoice_hours": 1051.0,
|
| 118 |
-
"commonvoice_locale": "fr"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"language_name": "Bangla",
|
|
@@ -129,7 +468,14 @@
|
|
| 129 |
],
|
| 130 |
"bleu": 0.27472181972977344,
|
| 131 |
"commonvoice_hours": 49.0,
|
| 132 |
-
"commonvoice_locale": "bn"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"language_name": "Portuguese",
|
|
@@ -163,7 +509,25 @@
|
|
| 163 |
],
|
| 164 |
"bleu": 0.367787171884892,
|
| 165 |
"commonvoice_hours": 176.0,
|
| 166 |
-
"commonvoice_locale": "pt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
},
|
| 168 |
{
|
| 169 |
"language_name": "Punjabi",
|
|
@@ -197,7 +561,15 @@
|
|
| 197 |
],
|
| 198 |
"bleu": 0.31594664710428266,
|
| 199 |
"commonvoice_hours": 2.3,
|
| 200 |
-
"commonvoice_locale": "pa-IN"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
},
|
| 202 |
{
|
| 203 |
"language_name": "Russian",
|
|
@@ -211,7 +583,32 @@
|
|
| 211 |
],
|
| 212 |
"bleu": 0.2920291935463745,
|
| 213 |
"commonvoice_hours": 241.0,
|
| 214 |
-
"commonvoice_locale": "ru"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
},
|
| 216 |
{
|
| 217 |
"language_name": "Swahili",
|
|
@@ -245,7 +642,18 @@
|
|
| 245 |
],
|
| 246 |
"bleu": 0.3018786362743097,
|
| 247 |
"commonvoice_hours": 411.0,
|
| 248 |
-
"commonvoice_locale": "sw"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"language_name": "Indonesian",
|
|
@@ -279,7 +687,11 @@
|
|
| 279 |
],
|
| 280 |
"bleu": 0.31132422822400946,
|
| 281 |
"commonvoice_hours": 33.0,
|
| 282 |
-
"commonvoice_locale": "id"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
},
|
| 284 |
{
|
| 285 |
"language_name": "German",
|
|
@@ -313,7 +725,36 @@
|
|
| 313 |
],
|
| 314 |
"bleu": 0.3992689214831344,
|
| 315 |
"commonvoice_hours": 1357.0,
|
| 316 |
-
"commonvoice_locale": "de"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
},
|
| 318 |
{
|
| 319 |
"language_name": "Japanese",
|
|
@@ -327,7 +768,12 @@
|
|
| 327 |
],
|
| 328 |
"bleu": 0.2954810072264808,
|
| 329 |
"commonvoice_hours": 222.0,
|
| 330 |
-
"commonvoice_locale": "ja"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
},
|
| 332 |
{
|
| 333 |
"language_name": "Telugu",
|
|
@@ -341,7 +787,10 @@
|
|
| 341 |
],
|
| 342 |
"bleu": 0.37949545228579734,
|
| 343 |
"commonvoice_hours": 0.3,
|
| 344 |
-
"commonvoice_locale": "te"
|
|
|
|
|
|
|
|
|
|
| 345 |
},
|
| 346 |
{
|
| 347 |
"language_name": "Marathi",
|
|
@@ -355,7 +804,10 @@
|
|
| 355 |
],
|
| 356 |
"bleu": 0.2852384896861461,
|
| 357 |
"commonvoice_hours": 20.0,
|
| 358 |
-
"commonvoice_locale": "mr"
|
|
|
|
|
|
|
|
|
|
| 359 |
},
|
| 360 |
{
|
| 361 |
"language_name": "Javanese",
|
|
@@ -389,7 +841,11 @@
|
|
| 389 |
],
|
| 390 |
"bleu": 0.2505244065073906,
|
| 391 |
"commonvoice_hours": 0.0,
|
| 392 |
-
"commonvoice_locale": "jv"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
},
|
| 394 |
{
|
| 395 |
"language_name": "Vietnamese",
|
|
@@ -403,7 +859,13 @@
|
|
| 403 |
],
|
| 404 |
"bleu": 0.2956750563565745,
|
| 405 |
"commonvoice_hours": 5.9,
|
| 406 |
-
"commonvoice_locale": "vi"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
},
|
| 408 |
{
|
| 409 |
"language_name": "Tamil",
|
|
@@ -417,7 +879,17 @@
|
|
| 417 |
],
|
| 418 |
"bleu": 0.27547489589987734,
|
| 419 |
"commonvoice_hours": 234.0,
|
| 420 |
-
"commonvoice_locale": "ta"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
},
|
| 422 |
{
|
| 423 |
"language_name": "Persian",
|
|
@@ -431,7 +903,18 @@
|
|
| 431 |
],
|
| 432 |
"bleu": 0.2858012364771329,
|
| 433 |
"commonvoice_hours": 370.0,
|
| 434 |
-
"commonvoice_locale": "fa"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
},
|
| 436 |
{
|
| 437 |
"language_name": "Turkish",
|
|
@@ -465,7 +948,21 @@
|
|
| 465 |
],
|
| 466 |
"bleu": 0.30402386618673855,
|
| 467 |
"commonvoice_hours": 127.0,
|
| 468 |
-
"commonvoice_locale": "tr"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
},
|
| 470 |
{
|
| 471 |
"language_name": "Cantonese",
|
|
@@ -499,7 +996,12 @@
|
|
| 499 |
],
|
| 500 |
"bleu": 0.27975991005230577,
|
| 501 |
"commonvoice_hours": 203.0,
|
| 502 |
-
"commonvoice_locale": "yue"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
},
|
| 504 |
{
|
| 505 |
"language_name": "Korean",
|
|
@@ -513,7 +1015,16 @@
|
|
| 513 |
],
|
| 514 |
"bleu": 0.24501349273295708,
|
| 515 |
"commonvoice_hours": 1.7,
|
| 516 |
-
"commonvoice_locale": "ko"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
},
|
| 518 |
{
|
| 519 |
"language_name": "Italian",
|
|
@@ -527,7 +1038,24 @@
|
|
| 527 |
],
|
| 528 |
"bleu": 0.3273249067267197,
|
| 529 |
"commonvoice_hours": 362.0,
|
| 530 |
-
"commonvoice_locale": "it"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
},
|
| 532 |
{
|
| 533 |
"language_name": "Filipino",
|
|
@@ -561,7 +1089,12 @@
|
|
| 561 |
],
|
| 562 |
"bleu": 0.3353425581350746,
|
| 563 |
"commonvoice_hours": 0.0,
|
| 564 |
-
"commonvoice_locale": "tl"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
},
|
| 566 |
{
|
| 567 |
"language_name": "Egyptian Arabic",
|
|
@@ -574,8 +1107,11 @@
|
|
| 574 |
}
|
| 575 |
],
|
| 576 |
"bleu": 0.23431638822117362,
|
| 577 |
-
"commonvoice_hours":
|
| 578 |
-
"commonvoice_locale":
|
|
|
|
|
|
|
|
|
|
| 579 |
},
|
| 580 |
{
|
| 581 |
"language_name": "Gujarati",
|
|
@@ -589,6 +1125,12 @@
|
|
| 589 |
],
|
| 590 |
"bleu": 0.27834507803114356,
|
| 591 |
"commonvoice_hours": 0.0,
|
| 592 |
-
"commonvoice_locale": "gu-IN"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
}
|
| 594 |
]
|
|
|
|
| 31 |
],
|
| 32 |
"bleu": 0.5035795595158651,
|
| 33 |
"commonvoice_hours": 2649.0,
|
| 34 |
+
"commonvoice_locale": "en",
|
| 35 |
+
"population": {
|
| 36 |
+
"AC": 931,
|
| 37 |
+
"AE": 4996040,
|
| 38 |
+
"AG": 84434,
|
| 39 |
+
"AI": 17186,
|
| 40 |
+
"AQ": 300,
|
| 41 |
+
"AR": 3183537,
|
| 42 |
+
"AS": 47954,
|
| 43 |
+
"AT": 6467398,
|
| 44 |
+
"AU": 24447840,
|
| 45 |
+
"AW": 2986,
|
| 46 |
+
"BA": 1726016,
|
| 47 |
+
"BB": 294560,
|
| 48 |
+
"BD": 29277180,
|
| 49 |
+
"BE": 6915213,
|
| 50 |
+
"BG": 1741725,
|
| 51 |
+
"BI": 6289,
|
| 52 |
+
"BM": 66010,
|
| 53 |
+
"BN": 7896,
|
| 54 |
+
"BR": 16937280,
|
| 55 |
+
"BS": 337721,
|
| 56 |
+
"BT": 86055,
|
| 57 |
+
"BV": 1,
|
| 58 |
+
"BW": 1876956,
|
| 59 |
+
"BZ": 399598,
|
| 60 |
+
"CA": 32416926,
|
| 61 |
+
"CC": 101,
|
| 62 |
+
"CH": 5126434,
|
| 63 |
+
"CK": 8574,
|
| 64 |
+
"CL": 1727746,
|
| 65 |
+
"CM": 10543100,
|
| 66 |
+
"CN": 62731,
|
| 67 |
+
"CP": 1,
|
| 68 |
+
"CQ": 482,
|
| 69 |
+
"CX": 1389,
|
| 70 |
+
"CY": 924676,
|
| 71 |
+
"CZ": 2889675,
|
| 72 |
+
"DE": 51302208,
|
| 73 |
+
"DG": 495,
|
| 74 |
+
"DK": 5047693,
|
| 75 |
+
"DM": 69788,
|
| 76 |
+
"DO": 7980,
|
| 77 |
+
"DZ": 3008103,
|
| 78 |
+
"EE": 614310,
|
| 79 |
+
"EG": 36443400,
|
| 80 |
+
"ER": 3587908,
|
| 81 |
+
"ES": 12003792,
|
| 82 |
+
"ET": 46488590,
|
| 83 |
+
"FI": 3900169,
|
| 84 |
+
"FJ": 879816,
|
| 85 |
+
"FK": 2814,
|
| 86 |
+
"FM": 58389,
|
| 87 |
+
"FR": 26460798,
|
| 88 |
+
"GB": 64445878,
|
| 89 |
+
"GD": 108570,
|
| 90 |
+
"GG": 67052,
|
| 91 |
+
"GH": 6161442,
|
| 92 |
+
"GI": 23665,
|
| 93 |
+
"GM": 869600,
|
| 94 |
+
"GR": 5409621,
|
| 95 |
+
"GS": 20,
|
| 96 |
+
"GU": 153321,
|
| 97 |
+
"GY": 750204,
|
| 98 |
+
"HK": 3697454,
|
| 99 |
+
"HM": 1,
|
| 100 |
+
"HN": 40635,
|
| 101 |
+
"HR": 2071598,
|
| 102 |
+
"HU": 1954366,
|
| 103 |
+
"IE": 5073039,
|
| 104 |
+
"IL": 7374158,
|
| 105 |
+
"IM": 90499,
|
| 106 |
+
"IN": 251957100,
|
| 107 |
+
"IO": 3500,
|
| 108 |
+
"IQ": 13605445,
|
| 109 |
+
"IT": 21216918,
|
| 110 |
+
"JE": 96019,
|
| 111 |
+
"JM": 2752399,
|
| 112 |
+
"JO": 4869270,
|
| 113 |
+
"KE": 10170301,
|
| 114 |
+
"KI": 111796,
|
| 115 |
+
"KN": 52745,
|
| 116 |
+
"KY": 60705,
|
| 117 |
+
"KZ": 2863785,
|
| 118 |
+
"LB": 2187844,
|
| 119 |
+
"LC": 149838,
|
| 120 |
+
"LK": 2288920,
|
| 121 |
+
"LR": 4210839,
|
| 122 |
+
"LS": 531719,
|
| 123 |
+
"LT": 1037955,
|
| 124 |
+
"LU": 351893,
|
| 125 |
+
"LV": 865366,
|
| 126 |
+
"MA": 4978638,
|
| 127 |
+
"MG": 4852026,
|
| 128 |
+
"MH": 72463,
|
| 129 |
+
"MO": 14133,
|
| 130 |
+
"MP": 49890,
|
| 131 |
+
"MS": 3492,
|
| 132 |
+
"MT": 402395,
|
| 133 |
+
"MU": 993146,
|
| 134 |
+
"MV": 293928,
|
| 135 |
+
"MW": 13353858,
|
| 136 |
+
"MX": 16724500,
|
| 137 |
+
"MY": 6856941,
|
| 138 |
+
"NA": 184105,
|
| 139 |
+
"NF": 1678,
|
| 140 |
+
"NG": 113434840,
|
| 141 |
+
"NL": 15552360,
|
| 142 |
+
"NP": 909837,
|
| 143 |
+
"NR": 9350,
|
| 144 |
+
"NU": 1120,
|
| 145 |
+
"NZ": 4826970,
|
| 146 |
+
"PA": 545171,
|
| 147 |
+
"PG": 3629730,
|
| 148 |
+
"PH": 69875840,
|
| 149 |
+
"PK": 116750500,
|
| 150 |
+
"PL": 12633159,
|
| 151 |
+
"PM": 187,
|
| 152 |
+
"PN": 46,
|
| 153 |
+
"PR": 1562644,
|
| 154 |
+
"PT": 2781729,
|
| 155 |
+
"PW": 1887,
|
| 156 |
+
"RO": 6603899,
|
| 157 |
+
"RW": 1906860,
|
| 158 |
+
"SB": 685097,
|
| 159 |
+
"SC": 36473,
|
| 160 |
+
"SD": 27792576,
|
| 161 |
+
"SE": 8774150,
|
| 162 |
+
"SG": 5774984,
|
| 163 |
+
"SH": 5425,
|
| 164 |
+
"SI": 1240581,
|
| 165 |
+
"SK": 1414556,
|
| 166 |
+
"SL": 2318726,
|
| 167 |
+
"SS": 2851524,
|
| 168 |
+
"SX": 29816,
|
| 169 |
+
"SZ": 883584,
|
| 170 |
+
"TA": 272,
|
| 171 |
+
"TC": 54807,
|
| 172 |
+
"TH": 18623898,
|
| 173 |
+
"TK": 1285,
|
| 174 |
+
"TO": 29707,
|
| 175 |
+
"TR": 13942975,
|
| 176 |
+
"TT": 1063735,
|
| 177 |
+
"TV": 1066,
|
| 178 |
+
"TZ": 40401432,
|
| 179 |
+
"UG": 1686867,
|
| 180 |
+
"UM": 316,
|
| 181 |
+
"US": 319333440,
|
| 182 |
+
"VC": 97334,
|
| 183 |
+
"VG": 36633,
|
| 184 |
+
"VI": 79676,
|
| 185 |
+
"VU": 247616,
|
| 186 |
+
"WS": 4279,
|
| 187 |
+
"YE": 2689596,
|
| 188 |
+
"ZA": 17503716,
|
| 189 |
+
"ZM": 2788256,
|
| 190 |
+
"ZW": 6109446
|
| 191 |
+
}
|
| 192 |
},
|
| 193 |
{
|
| 194 |
"language_name": "Chinese",
|
|
|
|
| 202 |
],
|
| 203 |
"bleu": 0.35763875438716014,
|
| 204 |
"commonvoice_hours": 422.0,
|
| 205 |
+
"commonvoice_locale": "zh-TW",
|
| 206 |
+
"population": {
|
| 207 |
+
"AU": 534796,
|
| 208 |
+
"BN": 51093,
|
| 209 |
+
"CA": 678494,
|
| 210 |
+
"CN": 1254618000,
|
| 211 |
+
"GB": 197283,
|
| 212 |
+
"GF": 4988,
|
| 213 |
+
"HK": 7249910,
|
| 214 |
+
"ID": 2456639,
|
| 215 |
+
"MN": 44352,
|
| 216 |
+
"MO": 632892,
|
| 217 |
+
"MY": 5550857,
|
| 218 |
+
"PA": 5841,
|
| 219 |
+
"PF": 23019,
|
| 220 |
+
"PH": 797021,
|
| 221 |
+
"SG": 4781438,
|
| 222 |
+
"SR": 6705,
|
| 223 |
+
"TH": 1241593,
|
| 224 |
+
"TW": 22422850,
|
| 225 |
+
"US": 2295209,
|
| 226 |
+
"VN": 1085934
|
| 227 |
+
}
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"language_name": "Hindi",
|
|
|
|
| 238 |
],
|
| 239 |
"bleu": 0.33760351976648345,
|
| 240 |
"commonvoice_hours": 16.0,
|
| 241 |
+
"commonvoice_locale": "hi-IN",
|
| 242 |
+
"population": {
|
| 243 |
+
"CA": 188470,
|
| 244 |
+
"FJ": 411829,
|
| 245 |
+
"IN": 545022990,
|
| 246 |
+
"NP": 127377,
|
| 247 |
+
"UG": 2206,
|
| 248 |
+
"ZA": 1129272
|
| 249 |
+
}
|
| 250 |
},
|
| 251 |
{
|
| 252 |
"language_name": "Spanish",
|
|
|
|
| 260 |
],
|
| 261 |
"bleu": 0.3600460831160618,
|
| 262 |
"commonvoice_hours": 446.0,
|
| 263 |
+
"commonvoice_locale": "es",
|
| 264 |
+
"population": {
|
| 265 |
+
"AD": 33110,
|
| 266 |
+
"AR": 45479100,
|
| 267 |
+
"BO": 7100339,
|
| 268 |
+
"BR": 76218,
|
| 269 |
+
"BZ": 111887,
|
| 270 |
+
"CA": 603106,
|
| 271 |
+
"CL": 17823064,
|
| 272 |
+
"CO": 45648864,
|
| 273 |
+
"CR": 4843090,
|
| 274 |
+
"CU": 11059100,
|
| 275 |
+
"CW": 5751,
|
| 276 |
+
"DE": 4809582,
|
| 277 |
+
"DO": 8189766,
|
| 278 |
+
"EA": 147000,
|
| 279 |
+
"EC": 16228704,
|
| 280 |
+
"ES": 49515642,
|
| 281 |
+
"FR": 8820266,
|
| 282 |
+
"GB": 5260888,
|
| 283 |
+
"GI": 14790,
|
| 284 |
+
"GQ": 727475,
|
| 285 |
+
"GT": 15952569,
|
| 286 |
+
"HN": 7203565,
|
| 287 |
+
"IC": 2056618,
|
| 288 |
+
"MA": 23115,
|
| 289 |
+
"MX": 106779500,
|
| 290 |
+
"NI": 4838683,
|
| 291 |
+
"PA": 2686915,
|
| 292 |
+
"PE": 23297950,
|
| 293 |
+
"PH": 33846110,
|
| 294 |
+
"PR": 2774491,
|
| 295 |
+
"PT": 1030270,
|
| 296 |
+
"PY": 230134,
|
| 297 |
+
"RO": 2130290,
|
| 298 |
+
"SV": 5768179,
|
| 299 |
+
"SX": 4823,
|
| 300 |
+
"TT": 4110,
|
| 301 |
+
"US": 31933344,
|
| 302 |
+
"UY": 2981097,
|
| 303 |
+
"VE": 23488572
|
| 304 |
+
}
|
| 305 |
},
|
| 306 |
{
|
| 307 |
"language_name": "Arabic",
|
|
|
|
| 315 |
],
|
| 316 |
"bleu": 0.3046598747480405,
|
| 317 |
"commonvoice_hours": 91.0,
|
| 318 |
+
"commonvoice_locale": "ar",
|
| 319 |
+
"population": {
|
| 320 |
+
"AE": 7793822,
|
| 321 |
+
"BH": 1309350,
|
| 322 |
+
"CA": 565412,
|
| 323 |
+
"CM": 108206,
|
| 324 |
+
"CY": 1267,
|
| 325 |
+
"DJ": 67292,
|
| 326 |
+
"DZ": 31799946,
|
| 327 |
+
"EG": 97876560,
|
| 328 |
+
"EH": 652271,
|
| 329 |
+
"ER": 297979,
|
| 330 |
+
"GB": 197283,
|
| 331 |
+
"IL": 1735096,
|
| 332 |
+
"IQ": 26433436,
|
| 333 |
+
"IR": 1698466,
|
| 334 |
+
"JO": 10820600,
|
| 335 |
+
"KE": 24623,
|
| 336 |
+
"KM": 558545,
|
| 337 |
+
"KW": 2993710,
|
| 338 |
+
"LB": 4703865,
|
| 339 |
+
"LY": 5099000,
|
| 340 |
+
"MA": 22048254,
|
| 341 |
+
"ML": 175981,
|
| 342 |
+
"MR": 3404658,
|
| 343 |
+
"NE": 47822,
|
| 344 |
+
"NG": 151960,
|
| 345 |
+
"OM": 3778520,
|
| 346 |
+
"PS": 4818260,
|
| 347 |
+
"QA": 2175311,
|
| 348 |
+
"SA": 34173500,
|
| 349 |
+
"SD": 27792576,
|
| 350 |
+
"SO": 3997414,
|
| 351 |
+
"SS": 2851524,
|
| 352 |
+
"SY": 15518720,
|
| 353 |
+
"TD": 2869158,
|
| 354 |
+
"TJ": 976,
|
| 355 |
+
"TN": 10549080,
|
| 356 |
+
"TR": 459298,
|
| 357 |
+
"YE": 22114456
|
| 358 |
+
}
|
| 359 |
},
|
| 360 |
{
|
| 361 |
"language_name": "Urdu",
|
|
|
|
| 369 |
],
|
| 370 |
"bleu": 0.331647033312127,
|
| 371 |
"commonvoice_hours": 76.0,
|
| 372 |
+
"commonvoice_locale": "ur",
|
| 373 |
+
"population": {
|
| 374 |
+
"CA": 286475,
|
| 375 |
+
"GB": 2301638,
|
| 376 |
+
"IN": 66304500,
|
| 377 |
+
"MU": 71727,
|
| 378 |
+
"PK": 221825950
|
| 379 |
+
}
|
| 380 |
},
|
| 381 |
{
|
| 382 |
"language_name": "French",
|
|
|
|
| 390 |
],
|
| 391 |
"bleu": 0.3141809404018014,
|
| 392 |
"commonvoice_hours": 1051.0,
|
| 393 |
+
"commonvoice_locale": "fr",
|
| 394 |
+
"population": {
|
| 395 |
+
"AD": 5775,
|
| 396 |
+
"AT": 974540,
|
| 397 |
+
"BE": 4453866,
|
| 398 |
+
"BF": 4583788,
|
| 399 |
+
"BI": 7000822,
|
| 400 |
+
"BJ": 4502610,
|
| 401 |
+
"BL": 6837,
|
| 402 |
+
"CA": 11308230,
|
| 403 |
+
"CD": 3867640,
|
| 404 |
+
"CF": 2935521,
|
| 405 |
+
"CG": 4446179,
|
| 406 |
+
"CH": 1764838,
|
| 407 |
+
"CI": 13465739,
|
| 408 |
+
"CM": 18866600,
|
| 409 |
+
"CY": 88668,
|
| 410 |
+
"DE": 14428746,
|
| 411 |
+
"DJ": 19358,
|
| 412 |
+
"DZ": 8594580,
|
| 413 |
+
"FR": 67169718,
|
| 414 |
+
"GA": 1405473,
|
| 415 |
+
"GB": 15125053,
|
| 416 |
+
"GF": 153622,
|
| 417 |
+
"GN": 3632946,
|
| 418 |
+
"GP": 407498,
|
| 419 |
+
"GQ": 73584,
|
| 420 |
+
"GR": 954639,
|
| 421 |
+
"HT": 520187,
|
| 422 |
+
"HU": 293155,
|
| 423 |
+
"IE": 880017,
|
| 424 |
+
"IT": 3931370,
|
| 425 |
+
"KM": 473917,
|
| 426 |
+
"LB": 20238,
|
| 427 |
+
"LU": 546691,
|
| 428 |
+
"MA": 7112340,
|
| 429 |
+
"MC": 38610,
|
| 430 |
+
"MF": 32556,
|
| 431 |
+
"MG": 18599433,
|
| 432 |
+
"ML": 8994564,
|
| 433 |
+
"MQ": 427408,
|
| 434 |
+
"MR": 680932,
|
| 435 |
+
"MT": 50299,
|
| 436 |
+
"MU": 41381,
|
| 437 |
+
"NC": 278409,
|
| 438 |
+
"NE": 6603996,
|
| 439 |
+
"NL": 5011316,
|
| 440 |
+
"PF": 180024,
|
| 441 |
+
"PM": 5133,
|
| 442 |
+
"PT": 1545405,
|
| 443 |
+
"RE": 700950,
|
| 444 |
+
"RO": 3621493,
|
| 445 |
+
"RW": 2288,
|
| 446 |
+
"SC": 57589,
|
| 447 |
+
"SN": 6137196,
|
| 448 |
+
"SY": 1144506,
|
| 449 |
+
"TD": 4388124,
|
| 450 |
+
"TF": 140,
|
| 451 |
+
"TG": 5251148,
|
| 452 |
+
"TN": 8673688,
|
| 453 |
+
"US": 1862778,
|
| 454 |
+
"VU": 149166,
|
| 455 |
+
"WF": 7610,
|
| 456 |
+
"YT": 110580
|
| 457 |
+
}
|
| 458 |
},
|
| 459 |
{
|
| 460 |
"language_name": "Bangla",
|
|
|
|
| 468 |
],
|
| 469 |
"bleu": 0.27472181972977344,
|
| 470 |
"commonvoice_hours": 49.0,
|
| 471 |
+
"commonvoice_locale": "bn",
|
| 472 |
+
"population": {
|
| 473 |
+
"BD": 159397980,
|
| 474 |
+
"CA": 90466,
|
| 475 |
+
"GB": 263044,
|
| 476 |
+
"IN": 107413290,
|
| 477 |
+
"NP": 28508
|
| 478 |
+
}
|
| 479 |
},
|
| 480 |
{
|
| 481 |
"language_name": "Portuguese",
|
|
|
|
| 509 |
],
|
| 510 |
"bleu": 0.367787171884892,
|
| 511 |
"commonvoice_hours": 176.0,
|
| 512 |
+
"commonvoice_locale": "pt",
|
| 513 |
+
"population": {
|
| 514 |
+
"AG": 1571,
|
| 515 |
+
"AO": 21789941,
|
| 516 |
+
"BR": 192661560,
|
| 517 |
+
"CA": 229934,
|
| 518 |
+
"CH": 285736,
|
| 519 |
+
"CV": 443274,
|
| 520 |
+
"FR": 882027,
|
| 521 |
+
"GB": 131522,
|
| 522 |
+
"GQ": 1,
|
| 523 |
+
"GW": 1927100,
|
| 524 |
+
"LU": 100541,
|
| 525 |
+
"MO": 30723,
|
| 526 |
+
"MZ": 8126514,
|
| 527 |
+
"PT": 9890592,
|
| 528 |
+
"ST": 179454,
|
| 529 |
+
"TL": 816395
|
| 530 |
+
}
|
| 531 |
},
|
| 532 |
{
|
| 533 |
"language_name": "Punjabi",
|
|
|
|
| 561 |
],
|
| 562 |
"bleu": 0.31594664710428266,
|
| 563 |
"commonvoice_hours": 2.3,
|
| 564 |
+
"commonvoice_locale": "pa-IN",
|
| 565 |
+
"population": {
|
| 566 |
+
"CA": 603106,
|
| 567 |
+
"GB": 2367400,
|
| 568 |
+
"IN": 37130520,
|
| 569 |
+
"KE": 10170,
|
| 570 |
+
"PK": 163450700,
|
| 571 |
+
"SG": 9314
|
| 572 |
+
}
|
| 573 |
},
|
| 574 |
{
|
| 575 |
"language_name": "Russian",
|
|
|
|
| 583 |
],
|
| 584 |
"bleu": 0.2920291935463745,
|
| 585 |
"commonvoice_hours": 241.0,
|
| 586 |
+
"commonvoice_locale": "ru",
|
| 587 |
+
"population": {
|
| 588 |
+
"BG": 1602387,
|
| 589 |
+
"BY": 1137350,
|
| 590 |
+
"CA": 211087,
|
| 591 |
+
"CN": 13940,
|
| 592 |
+
"DE": 4809582,
|
| 593 |
+
"EE": 688027,
|
| 594 |
+
"FI": 45131,
|
| 595 |
+
"GE": 359730,
|
| 596 |
+
"IL": 954303,
|
| 597 |
+
"KG": 2147364,
|
| 598 |
+
"KZ": 13746168,
|
| 599 |
+
"LT": 2185168,
|
| 600 |
+
"LV": 714867,
|
| 601 |
+
"MD": 100935,
|
| 602 |
+
"MN": 4118,
|
| 603 |
+
"PL": 6890814,
|
| 604 |
+
"RU": 133218680,
|
| 605 |
+
"SJ": 1200,
|
| 606 |
+
"TJ": 1064840,
|
| 607 |
+
"TM": 663436,
|
| 608 |
+
"UA": 20204534,
|
| 609 |
+
"US": 798334,
|
| 610 |
+
"UZ": 4279156
|
| 611 |
+
}
|
| 612 |
},
|
| 613 |
{
|
| 614 |
"language_name": "Swahili",
|
|
|
|
| 642 |
],
|
| 643 |
"bleu": 0.3018786362743097,
|
| 644 |
"commonvoice_hours": 411.0,
|
| 645 |
+
"commonvoice_locale": "sw",
|
| 646 |
+
"population": {
|
| 647 |
+
"BI": 6408,
|
| 648 |
+
"CD": 50890000,
|
| 649 |
+
"KE": 35328414,
|
| 650 |
+
"MZ": 9330,
|
| 651 |
+
"SO": 235142,
|
| 652 |
+
"TZ": 52697520,
|
| 653 |
+
"UG": 32439750,
|
| 654 |
+
"YT": 2716,
|
| 655 |
+
"ZA": 1016
|
| 656 |
+
}
|
| 657 |
},
|
| 658 |
{
|
| 659 |
"language_name": "Indonesian",
|
|
|
|
| 687 |
],
|
| 688 |
"bleu": 0.31132422822400946,
|
| 689 |
"commonvoice_hours": 33.0,
|
| 690 |
+
"commonvoice_locale": "id",
|
| 691 |
+
"population": {
|
| 692 |
+
"ID": 170896640,
|
| 693 |
+
"NL": 311047
|
| 694 |
+
}
|
| 695 |
},
|
| 696 |
{
|
| 697 |
"language_name": "German",
|
|
|
|
| 725 |
],
|
| 726 |
"bleu": 0.3992689214831344,
|
| 727 |
"commonvoice_hours": 1357.0,
|
| 728 |
+
"commonvoice_locale": "de",
|
| 729 |
+
"population": {
|
| 730 |
+
"AT": 8593666,
|
| 731 |
+
"BE": 2578554,
|
| 732 |
+
"BG": 557352,
|
| 733 |
+
"BR": 1778414,
|
| 734 |
+
"CA": 294014,
|
| 735 |
+
"CH": 6134913,
|
| 736 |
+
"CZ": 1605375,
|
| 737 |
+
"DE": 72945327,
|
| 738 |
+
"DK": 2758623,
|
| 739 |
+
"FI": 1002901,
|
| 740 |
+
"FR": 3392410,
|
| 741 |
+
"GB": 5918499,
|
| 742 |
+
"GR": 530355,
|
| 743 |
+
"HU": 1758929,
|
| 744 |
+
"IT": 998443,
|
| 745 |
+
"KZ": 1221882,
|
| 746 |
+
"LI": 39137,
|
| 747 |
+
"LT": 382404,
|
| 748 |
+
"LU": 395880,
|
| 749 |
+
"NA": 23671,
|
| 750 |
+
"NL": 12269084,
|
| 751 |
+
"PL": 7273637,
|
| 752 |
+
"PY": 208559,
|
| 753 |
+
"RO": 44736,
|
| 754 |
+
"SI": 883126,
|
| 755 |
+
"SK": 1196932,
|
| 756 |
+
"US": 1563403
|
| 757 |
+
}
|
| 758 |
},
|
| 759 |
{
|
| 760 |
"language_name": "Japanese",
|
|
|
|
| 768 |
],
|
| 769 |
"bleu": 0.2954810072264808,
|
| 770 |
"commonvoice_hours": 222.0,
|
| 771 |
+
"commonvoice_locale": "ja",
|
| 772 |
+
"population": {
|
| 773 |
+
"BR": 444604,
|
| 774 |
+
"CA": 52772,
|
| 775 |
+
"JP": 119231650
|
| 776 |
+
}
|
| 777 |
},
|
| 778 |
{
|
| 779 |
"language_name": "Telugu",
|
|
|
|
| 787 |
],
|
| 788 |
"bleu": 0.37949545228579734,
|
| 789 |
"commonvoice_hours": 0.3,
|
| 790 |
+
"commonvoice_locale": "te",
|
| 791 |
+
"population": {
|
| 792 |
+
"IN": 95478480
|
| 793 |
+
}
|
| 794 |
},
|
| 795 |
{
|
| 796 |
"language_name": "Marathi",
|
|
|
|
| 804 |
],
|
| 805 |
"bleu": 0.2852384896861461,
|
| 806 |
"commonvoice_hours": 20.0,
|
| 807 |
+
"commonvoice_locale": "mr",
|
| 808 |
+
"population": {
|
| 809 |
+
"IN": 92826300
|
| 810 |
+
}
|
| 811 |
},
|
| 812 |
{
|
| 813 |
"language_name": "Javanese",
|
|
|
|
| 841 |
],
|
| 842 |
"bleu": 0.2505244065073906,
|
| 843 |
"commonvoice_hours": 0.0,
|
| 844 |
+
"commonvoice_locale": "jv",
|
| 845 |
+
"population": {
|
| 846 |
+
"ID": 90788840,
|
| 847 |
+
"MY": 391825
|
| 848 |
+
}
|
| 849 |
},
|
| 850 |
{
|
| 851 |
"language_name": "Vietnamese",
|
|
|
|
| 859 |
],
|
| 860 |
"bleu": 0.2956750563565745,
|
| 861 |
"commonvoice_hours": 5.9,
|
| 862 |
+
"commonvoice_locale": "vi",
|
| 863 |
+
"population": {
|
| 864 |
+
"CA": 184701,
|
| 865 |
+
"CN": 6970,
|
| 866 |
+
"US": 1130973,
|
| 867 |
+
"VN": 84900318
|
| 868 |
+
}
|
| 869 |
},
|
| 870 |
{
|
| 871 |
"language_name": "Tamil",
|
|
|
|
| 879 |
],
|
| 880 |
"bleu": 0.27547489589987734,
|
| 881 |
"commonvoice_hours": 234.0,
|
| 882 |
+
"commonvoice_locale": "ta",
|
| 883 |
+
"population": {
|
| 884 |
+
"CA": 184701,
|
| 885 |
+
"GB": 2104355,
|
| 886 |
+
"IN": 78239310,
|
| 887 |
+
"LK": 3433380,
|
| 888 |
+
"MU": 34484,
|
| 889 |
+
"MY": 1371388,
|
| 890 |
+
"RE": 118138,
|
| 891 |
+
"SG": 130403
|
| 892 |
+
}
|
| 893 |
},
|
| 894 |
{
|
| 895 |
"language_name": "Persian",
|
|
|
|
| 903 |
],
|
| 904 |
"bleu": 0.2858012364771329,
|
| 905 |
"commonvoice_hours": 370.0,
|
| 906 |
+
"commonvoice_locale": "fa",
|
| 907 |
+
"population": {
|
| 908 |
+
"AE": 189850,
|
| 909 |
+
"AF": 18321900,
|
| 910 |
+
"CA": 245012,
|
| 911 |
+
"IQ": 338192,
|
| 912 |
+
"IR": 63692475,
|
| 913 |
+
"OM": 43849,
|
| 914 |
+
"PK": 1541107,
|
| 915 |
+
"QA": 268859,
|
| 916 |
+
"TJ": 69215
|
| 917 |
+
}
|
| 918 |
},
|
| 919 |
{
|
| 920 |
"language_name": "Turkish",
|
|
|
|
| 948 |
],
|
| 949 |
"bleu": 0.30402386618673855,
|
| 950 |
"commonvoice_hours": 127.0,
|
| 951 |
+
"commonvoice_locale": "tr",
|
| 952 |
+
"population": {
|
| 953 |
+
"BG": 766359,
|
| 954 |
+
"CA": 37694,
|
| 955 |
+
"CY": 291336,
|
| 956 |
+
"DE": 2003992,
|
| 957 |
+
"GB": 131522,
|
| 958 |
+
"GR": 127285,
|
| 959 |
+
"MK": 74409,
|
| 960 |
+
"NL": 207365,
|
| 961 |
+
"RO": 27694,
|
| 962 |
+
"TR": 76276275,
|
| 963 |
+
"UA": 184476,
|
| 964 |
+
"UZ": 232297
|
| 965 |
+
}
|
| 966 |
},
|
| 967 |
{
|
| 968 |
"language_name": "Cantonese",
|
|
|
|
| 996 |
],
|
| 997 |
"bleu": 0.27975991005230577,
|
| 998 |
"commonvoice_hours": 203.0,
|
| 999 |
+
"commonvoice_locale": "yue",
|
| 1000 |
+
"population": {
|
| 1001 |
+
"CA": 640800,
|
| 1002 |
+
"CN": 72489040,
|
| 1003 |
+
"HK": 6524919
|
| 1004 |
+
}
|
| 1005 |
},
|
| 1006 |
{
|
| 1007 |
"language_name": "Korean",
|
|
|
|
| 1015 |
],
|
| 1016 |
"bleu": 0.24501349273295708,
|
| 1017 |
"commonvoice_hours": 1.7,
|
| 1018 |
+
"commonvoice_locale": "ko",
|
| 1019 |
+
"population": {
|
| 1020 |
+
"BR": 44460,
|
| 1021 |
+
"CA": 169623,
|
| 1022 |
+
"CN": 2091030,
|
| 1023 |
+
"JP": 652636,
|
| 1024 |
+
"KP": 22566280,
|
| 1025 |
+
"KR": 51835100,
|
| 1026 |
+
"US": 997917
|
| 1027 |
+
}
|
| 1028 |
},
|
| 1029 |
{
|
| 1030 |
"language_name": "Italian",
|
|
|
|
| 1038 |
],
|
| 1039 |
"bleu": 0.3273249067267197,
|
| 1040 |
"commonvoice_hours": 362.0,
|
| 1041 |
+
"commonvoice_locale": "it",
|
| 1042 |
+
"population": {
|
| 1043 |
+
"AT": 797350,
|
| 1044 |
+
"AU": 483864,
|
| 1045 |
+
"BR": 592805,
|
| 1046 |
+
"CA": 343016,
|
| 1047 |
+
"CH": 361372,
|
| 1048 |
+
"DE": 5611179,
|
| 1049 |
+
"FR": 1153419,
|
| 1050 |
+
"GB": 131522,
|
| 1051 |
+
"HR": 67644,
|
| 1052 |
+
"IT": 59282565,
|
| 1053 |
+
"MT": 256070,
|
| 1054 |
+
"SI": 3995,
|
| 1055 |
+
"SM": 30466,
|
| 1056 |
+
"US": 1130973,
|
| 1057 |
+
"VA": 820
|
| 1058 |
+
}
|
| 1059 |
},
|
| 1060 |
{
|
| 1061 |
"language_name": "Filipino",
|
|
|
|
| 1089 |
],
|
| 1090 |
"bleu": 0.3353425581350746,
|
| 1091 |
"commonvoice_hours": 0.0,
|
| 1092 |
+
"commonvoice_locale": "tl",
|
| 1093 |
+
"population": {
|
| 1094 |
+
"CA": 565412,
|
| 1095 |
+
"PH": 65508600,
|
| 1096 |
+
"US": 1397084
|
| 1097 |
+
}
|
| 1098 |
},
|
| 1099 |
{
|
| 1100 |
"language_name": "Egyptian Arabic",
|
|
|
|
| 1107 |
}
|
| 1108 |
],
|
| 1109 |
"bleu": 0.23431638822117362,
|
| 1110 |
+
"commonvoice_hours": null,
|
| 1111 |
+
"commonvoice_locale": null,
|
| 1112 |
+
"population": {
|
| 1113 |
+
"EG": 66639360
|
| 1114 |
+
}
|
| 1115 |
},
|
| 1116 |
{
|
| 1117 |
"language_name": "Gujarati",
|
|
|
|
| 1125 |
],
|
| 1126 |
"bleu": 0.27834507803114356,
|
| 1127 |
"commonvoice_hours": 0.0,
|
| 1128 |
+
"commonvoice_locale": "gu-IN",
|
| 1129 |
+
"population": {
|
| 1130 |
+
"CA": 135699,
|
| 1131 |
+
"GB": 1907072,
|
| 1132 |
+
"IN": 59674050,
|
| 1133 |
+
"KE": 4978
|
| 1134 |
+
}
|
| 1135 |
}
|
| 1136 |
]
|
uv.lock
CHANGED
|
@@ -930,6 +930,7 @@ dependencies = [
|
|
| 930 |
{ name = "gradio" },
|
| 931 |
{ name = "pandas" },
|
| 932 |
{ name = "plotly" },
|
|
|
|
| 933 |
]
|
| 934 |
|
| 935 |
[package.dev-dependencies]
|
|
@@ -954,6 +955,7 @@ requires-dist = [
|
|
| 954 |
{ name = "gradio", specifier = ">=5.16.2" },
|
| 955 |
{ name = "pandas", specifier = ">=2.2.3" },
|
| 956 |
{ name = "plotly", specifier = ">=6.0.0" },
|
|
|
|
| 957 |
]
|
| 958 |
|
| 959 |
[package.metadata.requires-dev]
|
|
@@ -1871,6 +1873,15 @@ wheels = [
|
|
| 1871 |
{ url = "https://files.pythonhosted.org/packages/92/a2/81c1dd744b322c0c548f793deb521bf23500806d754128ddf6f978736dff/pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420", size = 40006508 },
|
| 1872 |
]
|
| 1873 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1874 |
[[package]]
|
| 1875 |
name = "pydantic"
|
| 1876 |
version = "2.9.2"
|
|
|
|
| 930 |
{ name = "gradio" },
|
| 931 |
{ name = "pandas" },
|
| 932 |
{ name = "plotly" },
|
| 933 |
+
{ name = "pycountry" },
|
| 934 |
]
|
| 935 |
|
| 936 |
[package.dev-dependencies]
|
|
|
|
| 955 |
{ name = "gradio", specifier = ">=5.16.2" },
|
| 956 |
{ name = "pandas", specifier = ">=2.2.3" },
|
| 957 |
{ name = "plotly", specifier = ">=6.0.0" },
|
| 958 |
+
{ name = "pycountry" },
|
| 959 |
]
|
| 960 |
|
| 961 |
[package.metadata.requires-dev]
|
|
|
|
| 1873 |
{ url = "https://files.pythonhosted.org/packages/92/a2/81c1dd744b322c0c548f793deb521bf23500806d754128ddf6f978736dff/pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420", size = 40006508 },
|
| 1874 |
]
|
| 1875 |
|
| 1876 |
+
[[package]]
|
| 1877 |
+
name = "pycountry"
|
| 1878 |
+
version = "24.6.1"
|
| 1879 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1880 |
+
sdist = { url = "https://files.pythonhosted.org/packages/76/57/c389fa68c50590881a75b7883eeb3dc15e9e73a0fdc001cdd45c13290c92/pycountry-24.6.1.tar.gz", hash = "sha256:b61b3faccea67f87d10c1f2b0fc0be714409e8fcdcc1315613174f6466c10221", size = 6043910 }
|
| 1881 |
+
wheels = [
|
| 1882 |
+
{ url = "https://files.pythonhosted.org/packages/b1/ec/1fb891d8a2660716aadb2143235481d15ed1cbfe3ad669194690b0604492/pycountry-24.6.1-py3-none-any.whl", hash = "sha256:f1a4fb391cd7214f8eefd39556d740adcc233c778a27f8942c8dca351d6ce06f", size = 6335189 },
|
| 1883 |
+
]
|
| 1884 |
+
|
| 1885 |
[[package]]
|
| 1886 |
name = "pydantic"
|
| 1887 |
version = "2.9.2"
|