Spaces:
Running
Running
File size: 6,543 Bytes
2c21cf7 4d13673 9002fc2 2c21cf7 2cdada4 68a93b5 2cdada4 d91b022 2c21cf7 d91b022 2cdada4 4d13673 9002fc2 eb1696c 9002fc2 9c2c019 913253a 9c2c019 b0aa389 2cdada4 549360a 9c2c019 eb1696c 9002fc2 941d5c5 2cdada4 941d5c5 2cdada4 a0d1624 2cdada4 53d2039 2cdada4 98c6811 2cdada4 eb1696c 2cdada4 eb1696c c790fdb 2cdada4 eb1696c 2cdada4 4e8cb1a 2cdada4 53d2039 2cdada4 98c6811 eb1696c 2cdada4 eb1696c c790fdb 2cdada4 eb1696c 2c21cf7 d91b022 2cdada4 32d50b0 2cdada4 2c21cf7 2cdada4 2c21cf7 9002fc2 2cdada4 ebaf279 2c21cf7 4d13673 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
import json
import os
import numpy as np
import pandas as pd
import uvicorn
from countries import make_country_table
from datasets_.util import load
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
scores = load("results")
languages = load("languages")
models = load("models")
def mean(lst):
return sum(lst) / len(lst) if lst else None
task_metrics = [
"translation_from_bleu",
"translation_to_bleu",
"classification_accuracy",
"mmlu_accuracy",
"arc_accuracy",
"truthfulqa_accuracy",
"mgsm_accuracy",
]
def compute_normalized_average(df, metrics):
"""Compute average of min-max normalized metric columns."""
normalized_df = df[metrics].copy()
for col in metrics:
if col in normalized_df.columns:
col_min = normalized_df[col].min()
col_max = normalized_df[col].max()
if col_max > col_min: # Avoid division by zero
normalized_df[col] = (normalized_df[col] - col_min) / (
col_max - col_min
)
else:
normalized_df[col] = 0 # If all values are the same, set to 0
return normalized_df.mean(axis=1, skipna=False)
def make_model_table(scores_df, models):
scores_df = scores_df.copy()
# Create a combined task_metric for origin
scores_df["task_metric_origin"] = (
scores_df["task"] + "_" + scores_df["metric"] + "_" + scores_df["origin"]
)
# Pivot to get scores for each origin-specific metric
scores_pivot = scores_df.pivot_table(
index="model",
columns="task_metric_origin",
values="score",
aggfunc="mean",
)
# Create the regular task_metric for the main average calculation
scores_df["task_metric"] = scores_df["task"] + "_" + scores_df["metric"]
main_pivot = scores_df.pivot_table(
index="model", columns="task_metric", values="score", aggfunc="mean"
)
# Merge the two pivots
df = pd.merge(main_pivot, scores_pivot, on="model", how="outer")
for metric in task_metrics:
if metric not in df.columns:
df[metric] = np.nan
df["average"] = compute_normalized_average(df, task_metrics)
# Add flag if any machine-origin data was used
machine_presence = scores_df[scores_df["origin"] == "machine"].groupby(["model", "task_metric"]).size()
for metric in task_metrics:
df[f"{metric}_contains_machine"] = df.index.map(lambda m: (m, metric) in machine_presence.index)
df = df.sort_values(by="average", ascending=False).reset_index()
df = pd.merge(df, models, left_on="model", right_on="id", how="left")
df["rank"] = df.index + 1
# Dynamically find all metric columns to include
final_cols = df.columns
metric_cols = [m for m in final_cols if any(tm in m for tm in task_metrics)]
df["creation_date"] = df["creation_date"].apply(lambda x: x.isoformat() if x else None)
df = df[
[
"rank",
"model",
"name",
"provider_name",
"hf_id",
"creation_date",
"size",
"type",
"license",
"cost",
"average",
*sorted(list(set(metric_cols))),
]
]
return df
def make_language_table(scores_df, languages):
scores_df = scores_df.copy()
scores_df["task_metric"] = scores_df["task"] + "_" + scores_df["metric"]
# Pivot scores
score_pivot = scores_df.pivot_table(
index="bcp_47", columns="task_metric", values="score", aggfunc="mean"
)
# Pivot origins (first origin since each task+lang combo has only one)
origin_pivot = scores_df.pivot_table(
index="bcp_47", columns="task_metric", values="origin", aggfunc="first"
)
origin_pivot = origin_pivot.add_suffix("_origin")
df = pd.merge(score_pivot, origin_pivot, on="bcp_47", how="outer")
for metric in task_metrics:
if metric not in df.columns:
df[metric] = np.nan
df["average"] = compute_normalized_average(df, task_metrics)
df = pd.merge(languages, df, on="bcp_47", how="outer")
df = df.sort_values(by="speakers", ascending=False)
# Dynamically find all metric columns to include
final_cols = df.columns
metric_cols = [m for m in final_cols if any(tm in m for tm in task_metrics)]
df = df[
[
"bcp_47",
"language_name",
"autonym",
"speakers",
"family",
"average",
"in_benchmark",
*sorted(list(set(metric_cols))),
]
]
return df
app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"])
app.add_middleware(GZipMiddleware, minimum_size=1000)
def serialize(df):
return df.replace({np.nan: None}).to_dict(orient="records")
@app.post("/api/data")
async def data(request: Request):
body = await request.body()
data = json.loads(body)
selected_languages = data.get("selectedLanguages", {})
# Identify which metrics have machine translations available
machine_translated_metrics = {
f"{row['task']}_{row['metric']}"
for _, row in scores.iterrows()
if row["origin"] == "machine"
}
# Filter by selected languages if provided
df = scores[scores["bcp_47"].isin(lang["bcp_47"] for lang in selected_languages)] if selected_languages else scores
if len(df) == 0:
model_table = pd.DataFrame()
countries = pd.DataFrame()
else:
model_table = make_model_table(df, models)
countries = make_country_table(make_language_table(df, languages))
language_table = make_language_table(scores, languages)
datasets_df = pd.read_json("data/datasets.json")
return JSONResponse(content={
"model_table": serialize(model_table),
"language_table": serialize(language_table),
"dataset_table": serialize(datasets_df),
"countries": serialize(countries),
"machine_translated_metrics": list(machine_translated_metrics),
})
# Only serve static files if build directory exists
if os.path.exists("frontend/build"):
app.mount("/", StaticFiles(directory="frontend/build", html=True), name="frontend")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8000)))
|