Spaces:
Running
Running
David Pomerenke
commited on
Commit
·
7f54946
1
Parent(s):
086a421
Only show top languages in bar chart
Browse files- app.py +12 -31
- language-chart.js +0 -68
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import gradio as gr
|
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
import plotly.graph_objects as go
|
|
|
|
| 7 |
import pycountry
|
| 8 |
|
| 9 |
with open("results.json") as f:
|
|
@@ -127,36 +128,14 @@ def create_leaderboard_df(results):
|
|
| 127 |
|
| 128 |
|
| 129 |
def create_model_comparison_plot(results):
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
for
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
traces = []
|
| 139 |
-
for model in models:
|
| 140 |
-
x_vals = [] # languages
|
| 141 |
-
y_vals = [] # BLEU scores
|
| 142 |
-
|
| 143 |
-
for lang in results:
|
| 144 |
-
model_score = next(
|
| 145 |
-
(s["bleu"] for s in lang["scores"] if s["model"] == model), None
|
| 146 |
-
)
|
| 147 |
-
if model_score is not None:
|
| 148 |
-
x_vals.append(lang["language_name"])
|
| 149 |
-
y_vals.append(model_score)
|
| 150 |
-
|
| 151 |
-
traces.append(
|
| 152 |
-
go.Bar(
|
| 153 |
-
name=model.split("/")[-1],
|
| 154 |
-
x=x_vals,
|
| 155 |
-
y=y_vals,
|
| 156 |
-
)
|
| 157 |
-
)
|
| 158 |
-
|
| 159 |
-
fig = go.Figure(data=traces)
|
| 160 |
fig.update_layout(
|
| 161 |
title="BLEU Scores by Model and Language",
|
| 162 |
xaxis_title=None,
|
|
@@ -231,7 +210,9 @@ def create_language_stats_df(results):
|
|
| 231 |
def create_scatter_plot(results):
|
| 232 |
fig = go.Figure()
|
| 233 |
|
| 234 |
-
x_vals = [
|
|
|
|
|
|
|
| 235 |
y_vals = [lang["bleu"] for lang in results]
|
| 236 |
labels = [lang["language_name"] for lang in results]
|
| 237 |
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
import plotly.graph_objects as go
|
| 7 |
+
import plotly.express as px
|
| 8 |
import pycountry
|
| 9 |
|
| 10 |
with open("results.json") as f:
|
|
|
|
| 128 |
|
| 129 |
|
| 130 |
def create_model_comparison_plot(results):
|
| 131 |
+
top_languages = sorted(results, key=lambda x: x["speakers"], reverse=True)[:10]
|
| 132 |
+
scores_flat = [
|
| 133 |
+
{"language": lang["language_name"], "model": score["model"], "bleu": score["bleu"]}
|
| 134 |
+
for lang in top_languages
|
| 135 |
+
for score in lang["scores"]
|
| 136 |
+
]
|
| 137 |
+
df = pd.DataFrame(scores_flat)
|
| 138 |
+
fig = px.bar(df, x="language", y="bleu", color="model", barmode="group")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
fig.update_layout(
|
| 140 |
title="BLEU Scores by Model and Language",
|
| 141 |
xaxis_title=None,
|
|
|
|
| 210 |
def create_scatter_plot(results):
|
| 211 |
fig = go.Figure()
|
| 212 |
|
| 213 |
+
x_vals = [
|
| 214 |
+
lang["speakers"] / 1_000_000 for lang in results if lang["speakers"] >= 10_000
|
| 215 |
+
] # Convert to millions
|
| 216 |
y_vals = [lang["bleu"] for lang in results]
|
| 217 |
labels = [lang["language_name"] for lang in results]
|
| 218 |
|
language-chart.js
DELETED
|
@@ -1,68 +0,0 @@
|
|
| 1 |
-
import * as Plot from "npm:@observablehq/plot";
|
| 2 |
-
|
| 3 |
-
export function languageChart(
|
| 4 |
-
languageData,
|
| 5 |
-
{ width, height, scoreKey, scoreName } = {}
|
| 6 |
-
) {
|
| 7 |
-
// Format captions
|
| 8 |
-
const formatScore = (score) =>
|
| 9 |
-
score > 0 ? score.toFixed(2) : "No benchmark available!";
|
| 10 |
-
const formatTitle = (d) =>
|
| 11 |
-
d.language_name +
|
| 12 |
-
"\n" +
|
| 13 |
-
parseInt(d.speakers / 1_000_00) / 10 +
|
| 14 |
-
"M speakers\n" +
|
| 15 |
-
scoreName +
|
| 16 |
-
": " +
|
| 17 |
-
formatScore(d[scoreKey]);
|
| 18 |
-
|
| 19 |
-
return Plot.plot({
|
| 20 |
-
width: width,
|
| 21 |
-
height: height,
|
| 22 |
-
marginBottom: 100,
|
| 23 |
-
x: { label: "Number of speakers", axis: null },
|
| 24 |
-
y: { label: `${scoreName} (average across models)` },
|
| 25 |
-
// color: { scheme: "BrBG" },
|
| 26 |
-
marks: [
|
| 27 |
-
Plot.rectY(
|
| 28 |
-
languageData,
|
| 29 |
-
Plot.stackX({
|
| 30 |
-
x: "speakers",
|
| 31 |
-
order: scoreKey,
|
| 32 |
-
reverse: true,
|
| 33 |
-
y2: scoreKey, // y2 to avoid stacking by y
|
| 34 |
-
title: formatTitle,
|
| 35 |
-
tip: true,
|
| 36 |
-
fill: (d) => (d[scoreKey] > 0 ? "black" : "pink"),
|
| 37 |
-
})
|
| 38 |
-
),
|
| 39 |
-
Plot.rectY(
|
| 40 |
-
languageData,
|
| 41 |
-
Plot.pointerX(
|
| 42 |
-
Plot.stackX({
|
| 43 |
-
x: "speakers",
|
| 44 |
-
order: scoreKey,
|
| 45 |
-
reverse: true,
|
| 46 |
-
y2: scoreKey, // y2 to avoid stacking by y
|
| 47 |
-
fill: "grey",
|
| 48 |
-
})
|
| 49 |
-
)
|
| 50 |
-
),
|
| 51 |
-
Plot.text(
|
| 52 |
-
languageData,
|
| 53 |
-
Plot.stackX({
|
| 54 |
-
x: "speakers",
|
| 55 |
-
y2: scoreKey,
|
| 56 |
-
order: scoreKey,
|
| 57 |
-
reverse: true,
|
| 58 |
-
text: "language_name",
|
| 59 |
-
frameAnchor: "bottom",
|
| 60 |
-
textAnchor: "end",
|
| 61 |
-
dy: 10,
|
| 62 |
-
rotate: 270,
|
| 63 |
-
opacity: (d) => (d.speakers > 50_000_000 ? 1 : 0),
|
| 64 |
-
})
|
| 65 |
-
),
|
| 66 |
-
],
|
| 67 |
-
});
|
| 68 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|