Spaces:
Running
Running
David Pomerenke
commited on
Commit
·
9002fc2
1
Parent(s):
8274634
Add OpenRouter metadata to models
Browse filesAnd via the hf_slug attribute this also improves the HuggingFace metadata
- evals/backend.py +10 -5
- evals/models.py +49 -19
- frontend/src/components/ModelTable.js +60 -34
- results.json +66 -30
evals/backend.py
CHANGED
|
@@ -1,27 +1,30 @@
|
|
| 1 |
import json
|
| 2 |
import os
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import pandas as pd
|
| 5 |
import uvicorn
|
|
|
|
| 6 |
from fastapi import FastAPI, Request
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
from fastapi.middleware.gzip import GZipMiddleware
|
| 9 |
from fastapi.responses import JSONResponse
|
| 10 |
from fastapi.staticfiles import StaticFiles
|
| 11 |
|
| 12 |
-
from countries import make_country_table
|
| 13 |
-
|
| 14 |
with open("results.json", "r") as f:
|
| 15 |
results = json.load(f)
|
| 16 |
scores = pd.DataFrame(results["scores"])
|
| 17 |
languages = pd.DataFrame(results["languages"])
|
| 18 |
models = pd.DataFrame(results["models"])
|
| 19 |
|
|
|
|
| 20 |
def mean(lst):
|
| 21 |
return sum(lst) / len(lst) if lst else None
|
| 22 |
|
|
|
|
| 23 |
task_metrics = ["translation_bleu", "classification_accuracy"]
|
| 24 |
|
|
|
|
| 25 |
def make_model_table(df, models):
|
| 26 |
df = (
|
| 27 |
df.groupby(["model", "task", "metric"])
|
|
@@ -39,11 +42,14 @@ def make_model_table(df, models):
|
|
| 39 |
[
|
| 40 |
"rank",
|
| 41 |
"model",
|
|
|
|
|
|
|
| 42 |
"hf_id",
|
| 43 |
"creation_date",
|
| 44 |
"size",
|
| 45 |
"type",
|
| 46 |
"license",
|
|
|
|
| 47 |
"average",
|
| 48 |
*task_metrics,
|
| 49 |
]
|
|
@@ -97,9 +103,7 @@ async def data(request: Request):
|
|
| 97 |
body = await request.body()
|
| 98 |
data = json.loads(body)
|
| 99 |
selected_languages = data.get("selectedLanguages", {})
|
| 100 |
-
df = (
|
| 101 |
-
scores.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
|
| 102 |
-
)
|
| 103 |
# lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
|
| 104 |
language_table = make_language_table(df, languages)
|
| 105 |
datasets_df = pd.read_json("datasets.json")
|
|
@@ -116,6 +120,7 @@ async def data(request: Request):
|
|
| 116 |
}
|
| 117 |
return JSONResponse(content=all_tables)
|
| 118 |
|
|
|
|
| 119 |
app.mount("/", StaticFiles(directory="frontend/build", html=True), name="frontend")
|
| 120 |
|
| 121 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
+
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
import uvicorn
|
| 7 |
+
from countries import make_country_table
|
| 8 |
from fastapi import FastAPI, Request
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
from fastapi.middleware.gzip import GZipMiddleware
|
| 11 |
from fastapi.responses import JSONResponse
|
| 12 |
from fastapi.staticfiles import StaticFiles
|
| 13 |
|
|
|
|
|
|
|
| 14 |
with open("results.json", "r") as f:
|
| 15 |
results = json.load(f)
|
| 16 |
scores = pd.DataFrame(results["scores"])
|
| 17 |
languages = pd.DataFrame(results["languages"])
|
| 18 |
models = pd.DataFrame(results["models"])
|
| 19 |
|
| 20 |
+
|
| 21 |
def mean(lst):
|
| 22 |
return sum(lst) / len(lst) if lst else None
|
| 23 |
|
| 24 |
+
|
| 25 |
task_metrics = ["translation_bleu", "classification_accuracy"]
|
| 26 |
|
| 27 |
+
|
| 28 |
def make_model_table(df, models):
|
| 29 |
df = (
|
| 30 |
df.groupby(["model", "task", "metric"])
|
|
|
|
| 42 |
[
|
| 43 |
"rank",
|
| 44 |
"model",
|
| 45 |
+
"name",
|
| 46 |
+
"provider_name",
|
| 47 |
"hf_id",
|
| 48 |
"creation_date",
|
| 49 |
"size",
|
| 50 |
"type",
|
| 51 |
"license",
|
| 52 |
+
"cost",
|
| 53 |
"average",
|
| 54 |
*task_metrics,
|
| 55 |
]
|
|
|
|
| 103 |
body = await request.body()
|
| 104 |
data = json.loads(body)
|
| 105 |
selected_languages = data.get("selectedLanguages", {})
|
| 106 |
+
df = scores.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
|
|
|
|
|
|
|
| 107 |
# lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
|
| 108 |
language_table = make_language_table(df, languages)
|
| 109 |
datasets_df = pd.read_json("datasets.json")
|
|
|
|
| 120 |
}
|
| 121 |
return JSONResponse(content=all_tables)
|
| 122 |
|
| 123 |
+
|
| 124 |
app.mount("/", StaticFiles(directory="frontend/build", html=True), name="frontend")
|
| 125 |
|
| 126 |
if __name__ == "__main__":
|
evals/models.py
CHANGED
|
@@ -7,17 +7,17 @@ from elevenlabs import AsyncElevenLabs
|
|
| 7 |
from huggingface_hub import AsyncInferenceClient, HfApi
|
| 8 |
from joblib.memory import Memory
|
| 9 |
from openai import AsyncOpenAI
|
| 10 |
-
from requests import HTTPError
|
| 11 |
|
| 12 |
# for development purposes, all languages will be evaluated on the fast models
|
| 13 |
# and only a sample of languages will be evaluated on all models
|
| 14 |
models = [
|
| 15 |
"openai/gpt-4o-mini", # 0.6$/M tokens
|
| 16 |
# "anthropic/claude-3.5-haiku", # 4$/M tokens -> too expensive for dev
|
| 17 |
-
"meta-llama/llama-4-maverick",
|
| 18 |
"meta-llama/llama-3.3-70b-instruct", # 0.3$/M tokens
|
| 19 |
"meta-llama/llama-3.1-70b-instruct", # 0.3$/M tokens
|
| 20 |
-
"meta-llama/llama-3-70b-instruct",
|
| 21 |
"mistralai/mistral-small-3.1-24b-instruct", # 0.3$/M tokens
|
| 22 |
# "mistralai/mistral-saba", # 0.6$/M tokens
|
| 23 |
# "mistralai/mistral-nemo", # 0.08$/M tokens
|
|
@@ -25,10 +25,10 @@ models = [
|
|
| 25 |
# "google/gemini-2.0-flash-lite-001", # 0.3$/M tokens
|
| 26 |
"google/gemma-3-27b-it", # 0.2$/M tokens
|
| 27 |
# "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
|
| 28 |
-
"qwen/qwq-32b",
|
| 29 |
"deepseek/deepseek-chat-v3-0324", # 1.1$/M tokens
|
| 30 |
# "microsoft/phi-4", # 0.07$/M tokens; only 16k tokens context
|
| 31 |
-
"microsoft/phi-4-multimodal-instruct",
|
| 32 |
"amazon/nova-micro-v1", # 0.09$/M tokens
|
| 33 |
# "openGPT-X/Teuken-7B-instruct-research-v0.4", # not on OpenRouter
|
| 34 |
]
|
|
@@ -94,10 +94,32 @@ async def transcribe(path, model="elevenlabs/scribe_v1"):
|
|
| 94 |
|
| 95 |
models = pd.DataFrame(models, columns=["id"])
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
api = HfApi()
|
| 98 |
|
|
|
|
| 99 |
@cache
|
| 100 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
try:
|
| 102 |
info = api.model_info(id)
|
| 103 |
license = info.card_data.license.replace("-", " ").replace("mit", "MIT").title()
|
|
@@ -109,17 +131,25 @@ def get_metadata(id):
|
|
| 109 |
"license": license,
|
| 110 |
}
|
| 111 |
except HTTPError:
|
| 112 |
-
return
|
| 113 |
-
"hf_id": None,
|
| 114 |
-
"creation_date": None,
|
| 115 |
-
"size": None,
|
| 116 |
-
"type": "Commercial",
|
| 117 |
-
"license": None,
|
| 118 |
-
}
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from huggingface_hub import AsyncInferenceClient, HfApi
|
| 8 |
from joblib.memory import Memory
|
| 9 |
from openai import AsyncOpenAI
|
| 10 |
+
from requests import HTTPError, get
|
| 11 |
|
| 12 |
# for development purposes, all languages will be evaluated on the fast models
|
| 13 |
# and only a sample of languages will be evaluated on all models
|
| 14 |
models = [
|
| 15 |
"openai/gpt-4o-mini", # 0.6$/M tokens
|
| 16 |
# "anthropic/claude-3.5-haiku", # 4$/M tokens -> too expensive for dev
|
| 17 |
+
"meta-llama/llama-4-maverick", # 0.6$/M tokens
|
| 18 |
"meta-llama/llama-3.3-70b-instruct", # 0.3$/M tokens
|
| 19 |
"meta-llama/llama-3.1-70b-instruct", # 0.3$/M tokens
|
| 20 |
+
"meta-llama/llama-3-70b-instruct", # 0.4$/M tokens
|
| 21 |
"mistralai/mistral-small-3.1-24b-instruct", # 0.3$/M tokens
|
| 22 |
# "mistralai/mistral-saba", # 0.6$/M tokens
|
| 23 |
# "mistralai/mistral-nemo", # 0.08$/M tokens
|
|
|
|
| 25 |
# "google/gemini-2.0-flash-lite-001", # 0.3$/M tokens
|
| 26 |
"google/gemma-3-27b-it", # 0.2$/M tokens
|
| 27 |
# "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
|
| 28 |
+
"qwen/qwq-32b", # 0.2$/M tokens
|
| 29 |
"deepseek/deepseek-chat-v3-0324", # 1.1$/M tokens
|
| 30 |
# "microsoft/phi-4", # 0.07$/M tokens; only 16k tokens context
|
| 31 |
+
"microsoft/phi-4-multimodal-instruct", # 0.1$/M tokens
|
| 32 |
"amazon/nova-micro-v1", # 0.09$/M tokens
|
| 33 |
# "openGPT-X/Teuken-7B-instruct-research-v0.4", # not on OpenRouter
|
| 34 |
]
|
|
|
|
| 94 |
|
| 95 |
models = pd.DataFrame(models, columns=["id"])
|
| 96 |
|
| 97 |
+
|
| 98 |
+
@cache
|
| 99 |
+
def get_or_metadata(id):
|
| 100 |
+
# get metadata from OpenRouter
|
| 101 |
+
response = cache(get)("https://openrouter.ai/api/frontend/models/")
|
| 102 |
+
models = response.json()["data"]
|
| 103 |
+
metadata = next((m for m in models if m["slug"] == id), None)
|
| 104 |
+
return metadata
|
| 105 |
+
|
| 106 |
+
|
| 107 |
api = HfApi()
|
| 108 |
|
| 109 |
+
|
| 110 |
@cache
|
| 111 |
+
def get_hf_metadata(row):
|
| 112 |
+
# get metadata from the HuggingFace API
|
| 113 |
+
empty = {
|
| 114 |
+
"hf_id": None,
|
| 115 |
+
"creation_date": None,
|
| 116 |
+
"size": None,
|
| 117 |
+
"type": "Commercial",
|
| 118 |
+
"license": None,
|
| 119 |
+
}
|
| 120 |
+
id = row["hf_slug"] or row["slug"]
|
| 121 |
+
if not id:
|
| 122 |
+
return empty
|
| 123 |
try:
|
| 124 |
info = api.model_info(id)
|
| 125 |
license = info.card_data.license.replace("-", " ").replace("mit", "MIT").title()
|
|
|
|
| 131 |
"license": license,
|
| 132 |
}
|
| 133 |
except HTTPError:
|
| 134 |
+
return empty
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
|
| 137 |
+
or_metadata = models["id"].apply(get_or_metadata)
|
| 138 |
+
hf_metadata = or_metadata.apply(get_hf_metadata)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def get_cost(row):
|
| 142 |
+
cost = float(row["endpoint"]["pricing"]["completion"])
|
| 143 |
+
return round(cost * 1_000_000, 2)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
models = models.assign(
|
| 147 |
+
name=or_metadata.str["short_name"],
|
| 148 |
+
provider_name=or_metadata.str["name"].str.split(": ").str[0],
|
| 149 |
+
cost=or_metadata.apply(get_cost),
|
| 150 |
+
hf_id=hf_metadata.str["hf_id"],
|
| 151 |
+
creation_date=pd.to_datetime(hf_metadata.str["creation_date"]),
|
| 152 |
+
size=hf_metadata.str["size"],
|
| 153 |
+
type=hf_metadata.str["type"],
|
| 154 |
+
license=hf_metadata.str["license"],
|
| 155 |
+
)
|
frontend/src/components/ModelTable.js
CHANGED
|
@@ -10,7 +10,8 @@ import ScoreField from './ScoreField'
|
|
| 10 |
const ModelTable = ({ data }) => {
|
| 11 |
const [filters, setFilters] = useState({
|
| 12 |
type: { value: null, matchMode: FilterMatchMode.IN },
|
| 13 |
-
size: { value: null, matchMode: FilterMatchMode.BETWEEN }
|
|
|
|
| 14 |
})
|
| 15 |
const rankBodyTemplate = rowData => {
|
| 16 |
return <Medal rank={rowData.rank} />
|
|
@@ -36,6 +37,8 @@ const ModelTable = ({ data }) => {
|
|
| 36 |
const formatSize = size => {
|
| 37 |
if (size === null) {
|
| 38 |
return ''
|
|
|
|
|
|
|
| 39 |
} else if (size < 1000) {
|
| 40 |
return size.toFixed(0) + ''
|
| 41 |
} else if (size < 1000 * 1000) {
|
|
@@ -47,10 +50,8 @@ const ModelTable = ({ data }) => {
|
|
| 47 |
}
|
| 48 |
}
|
| 49 |
|
| 50 |
-
const SliderWithLabel = ({ value, onChange }) => {
|
| 51 |
const p = 10
|
| 52 |
-
const min = 8
|
| 53 |
-
const max = 12
|
| 54 |
const start = value === null ? min : Math.log(value[0]) / Math.log(p)
|
| 55 |
const stop = value === null ? max : Math.log(value[1]) / Math.log(p)
|
| 56 |
const [_value, _setValue] = useState([start, stop])
|
|
@@ -58,13 +59,14 @@ const ModelTable = ({ data }) => {
|
|
| 58 |
const timer = setTimeout(() => {
|
| 59 |
onChange({
|
| 60 |
value:
|
|
|
|
| 61 |
_value[0] <= min + 0.1 && _value[1] >= max - 0.1
|
| 62 |
? null
|
| 63 |
: [p ** _value[0], p ** _value[1]]
|
| 64 |
})
|
| 65 |
}, 1000)
|
| 66 |
return () => clearTimeout(timer)
|
| 67 |
-
}, [_value, onChange])
|
| 68 |
return (
|
| 69 |
<div style={{ minWidth: '20rem' }}>
|
| 70 |
<div>{formatSize(p ** _value[0])}</div>
|
|
@@ -87,6 +89,8 @@ const ModelTable = ({ data }) => {
|
|
| 87 |
return (
|
| 88 |
<SliderWithLabel
|
| 89 |
value={options.value}
|
|
|
|
|
|
|
| 90 |
onChange={e => {
|
| 91 |
options.filterApplyCallback(e.value)
|
| 92 |
setFilters(prevFilters => ({
|
|
@@ -98,35 +102,42 @@ const ModelTable = ({ data }) => {
|
|
| 98 |
)
|
| 99 |
}
|
| 100 |
|
| 101 |
-
const
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
}
|
| 105 |
|
| 106 |
-
const
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
}
|
| 121 |
|
| 122 |
-
const modelBodyTemplate = rowData =>
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
.split('-')
|
| 126 |
-
.map(capitalize)
|
| 127 |
-
.join(' ')
|
| 128 |
-
return <div style={{ fontWeight: 'bold', height: '100%' }}>{modelName}</div>
|
| 129 |
-
}
|
| 130 |
|
| 131 |
const typeBodyTemplate = rowData => {
|
| 132 |
return rowData.type === 'Open' ? (
|
|
@@ -136,6 +147,10 @@ const ModelTable = ({ data }) => {
|
|
| 136 |
)
|
| 137 |
}
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
const scoreBodyTemplate = (field, options = {}) => {
|
| 140 |
const { minScore = 0, maxScore = 1 } = options
|
| 141 |
|
|
@@ -160,13 +175,12 @@ const ModelTable = ({ data }) => {
|
|
| 160 |
>
|
| 161 |
<Column field='rank' body={rankBodyTemplate} />
|
| 162 |
<Column
|
| 163 |
-
field='
|
| 164 |
header='Provider'
|
| 165 |
style={{ minWidth: '7rem' }}
|
| 166 |
-
body={providerBodyTemplate}
|
| 167 |
/>
|
| 168 |
<Column
|
| 169 |
-
field='
|
| 170 |
header='Model'
|
| 171 |
style={{ minWidth: '10rem' }}
|
| 172 |
body={modelBodyTemplate}
|
|
@@ -182,7 +196,8 @@ const ModelTable = ({ data }) => {
|
|
| 182 |
/>
|
| 183 |
<Column
|
| 184 |
field='size'
|
| 185 |
-
header=
|
|
|
|
| 186 |
filter
|
| 187 |
filterElement={sizeFilterTemplate}
|
| 188 |
showFilterMatchModes={false}
|
|
@@ -190,6 +205,17 @@ const ModelTable = ({ data }) => {
|
|
| 190 |
body={sizeBodyTemplate}
|
| 191 |
style={{ minWidth: '5rem' }}
|
| 192 |
/>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
<Column
|
| 194 |
field='average'
|
| 195 |
header='Average'
|
|
|
|
| 10 |
const ModelTable = ({ data }) => {
|
| 11 |
const [filters, setFilters] = useState({
|
| 12 |
type: { value: null, matchMode: FilterMatchMode.IN },
|
| 13 |
+
size: { value: null, matchMode: FilterMatchMode.BETWEEN },
|
| 14 |
+
cost: { value: null, matchMode: FilterMatchMode.BETWEEN }
|
| 15 |
})
|
| 16 |
const rankBodyTemplate = rowData => {
|
| 17 |
return <Medal rank={rowData.rank} />
|
|
|
|
| 37 |
const formatSize = size => {
|
| 38 |
if (size === null) {
|
| 39 |
return ''
|
| 40 |
+
} else if (size >= 0 && size <= 1) {
|
| 41 |
+
return size.toFixed(2) + ''
|
| 42 |
} else if (size < 1000) {
|
| 43 |
return size.toFixed(0) + ''
|
| 44 |
} else if (size < 1000 * 1000) {
|
|
|
|
| 50 |
}
|
| 51 |
}
|
| 52 |
|
| 53 |
+
const SliderWithLabel = ({ value, onChange, min, max }) => {
|
| 54 |
const p = 10
|
|
|
|
|
|
|
| 55 |
const start = value === null ? min : Math.log(value[0]) / Math.log(p)
|
| 56 |
const stop = value === null ? max : Math.log(value[1]) / Math.log(p)
|
| 57 |
const [_value, _setValue] = useState([start, stop])
|
|
|
|
| 59 |
const timer = setTimeout(() => {
|
| 60 |
onChange({
|
| 61 |
value:
|
| 62 |
+
// set to "no filter" when (almost) the whole range is selected
|
| 63 |
_value[0] <= min + 0.1 && _value[1] >= max - 0.1
|
| 64 |
? null
|
| 65 |
: [p ** _value[0], p ** _value[1]]
|
| 66 |
})
|
| 67 |
}, 1000)
|
| 68 |
return () => clearTimeout(timer)
|
| 69 |
+
}, [_value, onChange, min, max])
|
| 70 |
return (
|
| 71 |
<div style={{ minWidth: '20rem' }}>
|
| 72 |
<div>{formatSize(p ** _value[0])}</div>
|
|
|
|
| 89 |
return (
|
| 90 |
<SliderWithLabel
|
| 91 |
value={options.value}
|
| 92 |
+
min={8}
|
| 93 |
+
max={12}
|
| 94 |
onChange={e => {
|
| 95 |
options.filterApplyCallback(e.value)
|
| 96 |
setFilters(prevFilters => ({
|
|
|
|
| 102 |
)
|
| 103 |
}
|
| 104 |
|
| 105 |
+
const costFilterTemplate = options => {
|
| 106 |
+
return (
|
| 107 |
+
<SliderWithLabel
|
| 108 |
+
value={options.value}
|
| 109 |
+
min={-2}
|
| 110 |
+
max={2}
|
| 111 |
+
onChange={e => {
|
| 112 |
+
options.filterApplyCallback(e.value)
|
| 113 |
+
setFilters(prevFilters => ({
|
| 114 |
+
...prevFilters,
|
| 115 |
+
cost: { value: e.value, matchMode: FilterMatchMode.BETWEEN }
|
| 116 |
+
}))
|
| 117 |
+
}}
|
| 118 |
+
/>
|
| 119 |
+
)
|
| 120 |
}
|
| 121 |
|
| 122 |
+
const sizeBodyTemplate = rowData => {
|
| 123 |
+
const sizeStr = formatSize(rowData.size)
|
| 124 |
+
return (
|
| 125 |
+
<div style={{ textAlign: 'center' }}>
|
| 126 |
+
<a
|
| 127 |
+
href={`https://huggingface.co/${rowData.hf_id}`}
|
| 128 |
+
target='_blank'
|
| 129 |
+
rel='noopener noreferrer'
|
| 130 |
+
style={{ textDecoration: 'none', color: 'inherit' }}
|
| 131 |
+
>
|
| 132 |
+
{sizeStr}
|
| 133 |
+
</a>
|
| 134 |
+
</div>
|
| 135 |
+
)
|
| 136 |
}
|
| 137 |
|
| 138 |
+
const modelBodyTemplate = rowData => (
|
| 139 |
+
<div style={{ fontWeight: 'bold', height: '100%' }}>{rowData.name}</div>
|
| 140 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
const typeBodyTemplate = rowData => {
|
| 143 |
return rowData.type === 'Open' ? (
|
|
|
|
| 147 |
)
|
| 148 |
}
|
| 149 |
|
| 150 |
+
const costBodyTemplate = rowData => {
|
| 151 |
+
return <div style={{ textAlign: 'center' }}>${rowData.cost.toFixed(2)}</div>
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
const scoreBodyTemplate = (field, options = {}) => {
|
| 155 |
const { minScore = 0, maxScore = 1 } = options
|
| 156 |
|
|
|
|
| 175 |
>
|
| 176 |
<Column field='rank' body={rankBodyTemplate} />
|
| 177 |
<Column
|
| 178 |
+
field='provider_name'
|
| 179 |
header='Provider'
|
| 180 |
style={{ minWidth: '7rem' }}
|
|
|
|
| 181 |
/>
|
| 182 |
<Column
|
| 183 |
+
field='name'
|
| 184 |
header='Model'
|
| 185 |
style={{ minWidth: '10rem' }}
|
| 186 |
body={modelBodyTemplate}
|
|
|
|
| 196 |
/>
|
| 197 |
<Column
|
| 198 |
field='size'
|
| 199 |
+
header='Size'
|
| 200 |
+
headerTooltip='Number of parameters'
|
| 201 |
filter
|
| 202 |
filterElement={sizeFilterTemplate}
|
| 203 |
showFilterMatchModes={false}
|
|
|
|
| 205 |
body={sizeBodyTemplate}
|
| 206 |
style={{ minWidth: '5rem' }}
|
| 207 |
/>
|
| 208 |
+
<Column
|
| 209 |
+
field='cost'
|
| 210 |
+
header='Cost'
|
| 211 |
+
headerTooltip='Cost in USD per million completion tokens'
|
| 212 |
+
filter
|
| 213 |
+
filterElement={costFilterTemplate}
|
| 214 |
+
showFilterMatchModes={false}
|
| 215 |
+
sortable
|
| 216 |
+
body={costBodyTemplate}
|
| 217 |
+
style={{ minWidth: '5rem' }}
|
| 218 |
+
/>
|
| 219 |
<Column
|
| 220 |
field='average'
|
| 221 |
header='Average'
|
results.json
CHANGED
|
@@ -8,7 +8,7 @@
|
|
| 8 |
"family": "Indo-European",
|
| 9 |
"flores_path": "eng_Latn",
|
| 10 |
"fleurs_tag": "en_us",
|
| 11 |
-
"commonvoice_hours":
|
| 12 |
"commonvoice_locale": "en",
|
| 13 |
"in_benchmark": true
|
| 14 |
},
|
|
@@ -236,7 +236,7 @@
|
|
| 236 |
"family": "Austroasiatic",
|
| 237 |
"flores_path": "vie_Latn",
|
| 238 |
"fleurs_tag": "vi_vn",
|
| 239 |
-
"commonvoice_hours": 6.
|
| 240 |
"commonvoice_locale": "vi",
|
| 241 |
"in_benchmark": true
|
| 242 |
},
|
|
@@ -1004,7 +1004,7 @@
|
|
| 1004 |
"family": "Indo-European",
|
| 1005 |
"flores_path": "ces_Latn",
|
| 1006 |
"fleurs_tag": "cs_cz",
|
| 1007 |
-
"commonvoice_hours":
|
| 1008 |
"commonvoice_locale": "cs",
|
| 1009 |
"in_benchmark": true
|
| 1010 |
},
|
|
@@ -2156,7 +2156,7 @@
|
|
| 2156 |
"family": "Kartvelian",
|
| 2157 |
"flores_path": "kat_Geor",
|
| 2158 |
"fleurs_tag": "ka_ge",
|
| 2159 |
-
"commonvoice_hours":
|
| 2160 |
"commonvoice_locale": "ka",
|
| 2161 |
"in_benchmark": true
|
| 2162 |
},
|
|
@@ -2168,7 +2168,7 @@
|
|
| 2168 |
"family": "Indo-European",
|
| 2169 |
"flores_path": "glg_Latn",
|
| 2170 |
"fleurs_tag": "gl_es",
|
| 2171 |
-
"commonvoice_hours":
|
| 2172 |
"commonvoice_locale": "gl",
|
| 2173 |
"in_benchmark": true
|
| 2174 |
},
|
|
@@ -3560,7 +3560,7 @@
|
|
| 3560 |
"family": "Abkhaz-Adyge",
|
| 3561 |
"flores_path": null,
|
| 3562 |
"fleurs_tag": null,
|
| 3563 |
-
"commonvoice_hours":
|
| 3564 |
"commonvoice_locale": "kbd",
|
| 3565 |
"in_benchmark": false
|
| 3566 |
},
|
|
@@ -4352,7 +4352,7 @@
|
|
| 4352 |
"family": "Indo-European",
|
| 4353 |
"flores_path": null,
|
| 4354 |
"fleurs_tag": null,
|
| 4355 |
-
"commonvoice_hours":
|
| 4356 |
"commonvoice_locale": "br",
|
| 4357 |
"in_benchmark": false
|
| 4358 |
},
|
|
@@ -4616,7 +4616,7 @@
|
|
| 4616 |
"family": "Turkic",
|
| 4617 |
"flores_path": null,
|
| 4618 |
"fleurs_tag": null,
|
| 4619 |
-
"commonvoice_hours":
|
| 4620 |
"commonvoice_locale": "sah",
|
| 4621 |
"in_benchmark": false
|
| 4622 |
},
|
|
@@ -4652,7 +4652,7 @@
|
|
| 4652 |
"family": "Abkhaz-Adyge",
|
| 4653 |
"flores_path": null,
|
| 4654 |
"fleurs_tag": null,
|
| 4655 |
-
"commonvoice_hours":
|
| 4656 |
"commonvoice_locale": "ady",
|
| 4657 |
"in_benchmark": false
|
| 4658 |
},
|
|
@@ -7616,7 +7616,7 @@
|
|
| 7616 |
"family": "Indo-European",
|
| 7617 |
"flores_path": null,
|
| 7618 |
"fleurs_tag": null,
|
| 7619 |
-
"commonvoice_hours":
|
| 7620 |
"commonvoice_locale": "kw",
|
| 7621 |
"in_benchmark": false
|
| 7622 |
},
|
|
@@ -8140,6 +8140,9 @@
|
|
| 8140 |
"models": [
|
| 8141 |
{
|
| 8142 |
"id": "openai/gpt-4o-mini",
|
|
|
|
|
|
|
|
|
|
| 8143 |
"hf_id": null,
|
| 8144 |
"creation_date": "NaT",
|
| 8145 |
"size": null,
|
|
@@ -8148,14 +8151,20 @@
|
|
| 8148 |
},
|
| 8149 |
{
|
| 8150 |
"id": "meta-llama/llama-4-maverick",
|
| 8151 |
-
"
|
| 8152 |
-
"
|
| 8153 |
-
"
|
| 8154 |
-
"
|
| 8155 |
-
"
|
|
|
|
|
|
|
|
|
|
| 8156 |
},
|
| 8157 |
{
|
| 8158 |
"id": "meta-llama/llama-3.3-70b-instruct",
|
|
|
|
|
|
|
|
|
|
| 8159 |
"hf_id": "meta-llama/Llama-3.3-70B-Instruct",
|
| 8160 |
"creation_date": "2024-11-26T16:08:47+00:00",
|
| 8161 |
"size": 70553706496.0,
|
|
@@ -8164,6 +8173,9 @@
|
|
| 8164 |
},
|
| 8165 |
{
|
| 8166 |
"id": "meta-llama/llama-3.1-70b-instruct",
|
|
|
|
|
|
|
|
|
|
| 8167 |
"hf_id": "meta-llama/Llama-3.1-70B-Instruct",
|
| 8168 |
"creation_date": "2024-07-16T16:07:46+00:00",
|
| 8169 |
"size": 70553706496.0,
|
|
@@ -8172,22 +8184,31 @@
|
|
| 8172 |
},
|
| 8173 |
{
|
| 8174 |
"id": "meta-llama/llama-3-70b-instruct",
|
| 8175 |
-
"
|
| 8176 |
-
"
|
| 8177 |
-
"
|
| 8178 |
-
"
|
| 8179 |
-
"
|
|
|
|
|
|
|
|
|
|
| 8180 |
},
|
| 8181 |
{
|
| 8182 |
"id": "mistralai/mistral-small-3.1-24b-instruct",
|
| 8183 |
-
"
|
| 8184 |
-
"
|
| 8185 |
-
"
|
| 8186 |
-
"
|
| 8187 |
-
"
|
|
|
|
|
|
|
|
|
|
| 8188 |
},
|
| 8189 |
{
|
| 8190 |
"id": "google/gemini-2.0-flash-001",
|
|
|
|
|
|
|
|
|
|
| 8191 |
"hf_id": null,
|
| 8192 |
"creation_date": "NaT",
|
| 8193 |
"size": null,
|
|
@@ -8196,6 +8217,9 @@
|
|
| 8196 |
},
|
| 8197 |
{
|
| 8198 |
"id": "google/gemma-3-27b-it",
|
|
|
|
|
|
|
|
|
|
| 8199 |
"hf_id": "google/gemma-3-27b-it",
|
| 8200 |
"creation_date": "2025-03-01T19:10:19+00:00",
|
| 8201 |
"size": 27432406640.0,
|
|
@@ -8204,6 +8228,9 @@
|
|
| 8204 |
},
|
| 8205 |
{
|
| 8206 |
"id": "qwen/qwq-32b",
|
|
|
|
|
|
|
|
|
|
| 8207 |
"hf_id": "Qwen/QwQ-32B",
|
| 8208 |
"creation_date": "2025-03-05T14:16:59+00:00",
|
| 8209 |
"size": 32763876352.0,
|
|
@@ -8212,14 +8239,20 @@
|
|
| 8212 |
},
|
| 8213 |
{
|
| 8214 |
"id": "deepseek/deepseek-chat-v3-0324",
|
| 8215 |
-
"
|
| 8216 |
-
"
|
| 8217 |
-
"
|
| 8218 |
-
"
|
| 8219 |
-
"
|
|
|
|
|
|
|
|
|
|
| 8220 |
},
|
| 8221 |
{
|
| 8222 |
"id": "microsoft/phi-4-multimodal-instruct",
|
|
|
|
|
|
|
|
|
|
| 8223 |
"hf_id": "microsoft/Phi-4-multimodal-instruct",
|
| 8224 |
"creation_date": "2025-02-24T22:33:32+00:00",
|
| 8225 |
"size": 5574460384.0,
|
|
@@ -8228,6 +8261,9 @@
|
|
| 8228 |
},
|
| 8229 |
{
|
| 8230 |
"id": "amazon/nova-micro-v1",
|
|
|
|
|
|
|
|
|
|
| 8231 |
"hf_id": null,
|
| 8232 |
"creation_date": "NaT",
|
| 8233 |
"size": null,
|
|
|
|
| 8 |
"family": "Indo-European",
|
| 9 |
"flores_path": "eng_Latn",
|
| 10 |
"fleurs_tag": "en_us",
|
| 11 |
+
"commonvoice_hours": 2657.0,
|
| 12 |
"commonvoice_locale": "en",
|
| 13 |
"in_benchmark": true
|
| 14 |
},
|
|
|
|
| 236 |
"family": "Austroasiatic",
|
| 237 |
"flores_path": "vie_Latn",
|
| 238 |
"fleurs_tag": "vi_vn",
|
| 239 |
+
"commonvoice_hours": 6.1,
|
| 240 |
"commonvoice_locale": "vi",
|
| 241 |
"in_benchmark": true
|
| 242 |
},
|
|
|
|
| 1004 |
"family": "Indo-European",
|
| 1005 |
"flores_path": "ces_Latn",
|
| 1006 |
"fleurs_tag": "cs_cz",
|
| 1007 |
+
"commonvoice_hours": 75.0,
|
| 1008 |
"commonvoice_locale": "cs",
|
| 1009 |
"in_benchmark": true
|
| 1010 |
},
|
|
|
|
| 2156 |
"family": "Kartvelian",
|
| 2157 |
"flores_path": "kat_Geor",
|
| 2158 |
"fleurs_tag": "ka_ge",
|
| 2159 |
+
"commonvoice_hours": 162.0,
|
| 2160 |
"commonvoice_locale": "ka",
|
| 2161 |
"in_benchmark": true
|
| 2162 |
},
|
|
|
|
| 2168 |
"family": "Indo-European",
|
| 2169 |
"flores_path": "glg_Latn",
|
| 2170 |
"fleurs_tag": "gl_es",
|
| 2171 |
+
"commonvoice_hours": 114.0,
|
| 2172 |
"commonvoice_locale": "gl",
|
| 2173 |
"in_benchmark": true
|
| 2174 |
},
|
|
|
|
| 3560 |
"family": "Abkhaz-Adyge",
|
| 3561 |
"flores_path": null,
|
| 3562 |
"fleurs_tag": null,
|
| 3563 |
+
"commonvoice_hours": 30.0,
|
| 3564 |
"commonvoice_locale": "kbd",
|
| 3565 |
"in_benchmark": false
|
| 3566 |
},
|
|
|
|
| 4352 |
"family": "Indo-European",
|
| 4353 |
"flores_path": null,
|
| 4354 |
"fleurs_tag": null,
|
| 4355 |
+
"commonvoice_hours": 28.0,
|
| 4356 |
"commonvoice_locale": "br",
|
| 4357 |
"in_benchmark": false
|
| 4358 |
},
|
|
|
|
| 4616 |
"family": "Turkic",
|
| 4617 |
"flores_path": null,
|
| 4618 |
"fleurs_tag": null,
|
| 4619 |
+
"commonvoice_hours": 12.0,
|
| 4620 |
"commonvoice_locale": "sah",
|
| 4621 |
"in_benchmark": false
|
| 4622 |
},
|
|
|
|
| 4652 |
"family": "Abkhaz-Adyge",
|
| 4653 |
"flores_path": null,
|
| 4654 |
"fleurs_tag": null,
|
| 4655 |
+
"commonvoice_hours": 13.0,
|
| 4656 |
"commonvoice_locale": "ady",
|
| 4657 |
"in_benchmark": false
|
| 4658 |
},
|
|
|
|
| 7616 |
"family": "Indo-European",
|
| 7617 |
"flores_path": null,
|
| 7618 |
"fleurs_tag": null,
|
| 7619 |
+
"commonvoice_hours": 12.0,
|
| 7620 |
"commonvoice_locale": "kw",
|
| 7621 |
"in_benchmark": false
|
| 7622 |
},
|
|
|
|
| 8140 |
"models": [
|
| 8141 |
{
|
| 8142 |
"id": "openai/gpt-4o-mini",
|
| 8143 |
+
"name": "GPT-4o-mini",
|
| 8144 |
+
"provider_name": "OpenAI",
|
| 8145 |
+
"cost": 0.6,
|
| 8146 |
"hf_id": null,
|
| 8147 |
"creation_date": "NaT",
|
| 8148 |
"size": null,
|
|
|
|
| 8151 |
},
|
| 8152 |
{
|
| 8153 |
"id": "meta-llama/llama-4-maverick",
|
| 8154 |
+
"name": "Llama 4 Maverick",
|
| 8155 |
+
"provider_name": "Meta",
|
| 8156 |
+
"cost": 0.85,
|
| 8157 |
+
"hf_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
|
| 8158 |
+
"creation_date": "2025-04-01T22:17:20+00:00",
|
| 8159 |
+
"size": 401583781376.0,
|
| 8160 |
+
"type": "Open",
|
| 8161 |
+
"license": "Other"
|
| 8162 |
},
|
| 8163 |
{
|
| 8164 |
"id": "meta-llama/llama-3.3-70b-instruct",
|
| 8165 |
+
"name": "Llama 3.3 70B Instruct",
|
| 8166 |
+
"provider_name": "Meta",
|
| 8167 |
+
"cost": 0.3,
|
| 8168 |
"hf_id": "meta-llama/Llama-3.3-70B-Instruct",
|
| 8169 |
"creation_date": "2024-11-26T16:08:47+00:00",
|
| 8170 |
"size": 70553706496.0,
|
|
|
|
| 8173 |
},
|
| 8174 |
{
|
| 8175 |
"id": "meta-llama/llama-3.1-70b-instruct",
|
| 8176 |
+
"name": "Llama 3.1 70B Instruct",
|
| 8177 |
+
"provider_name": "Meta",
|
| 8178 |
+
"cost": 0.3,
|
| 8179 |
"hf_id": "meta-llama/Llama-3.1-70B-Instruct",
|
| 8180 |
"creation_date": "2024-07-16T16:07:46+00:00",
|
| 8181 |
"size": 70553706496.0,
|
|
|
|
| 8184 |
},
|
| 8185 |
{
|
| 8186 |
"id": "meta-llama/llama-3-70b-instruct",
|
| 8187 |
+
"name": "Llama 3 70B Instruct",
|
| 8188 |
+
"provider_name": "Meta",
|
| 8189 |
+
"cost": 0.4,
|
| 8190 |
+
"hf_id": "meta-llama/Meta-Llama-3-70B-Instruct",
|
| 8191 |
+
"creation_date": "2024-04-17T09:34:54+00:00",
|
| 8192 |
+
"size": 70553706496.0,
|
| 8193 |
+
"type": "Open",
|
| 8194 |
+
"license": "Llama3"
|
| 8195 |
},
|
| 8196 |
{
|
| 8197 |
"id": "mistralai/mistral-small-3.1-24b-instruct",
|
| 8198 |
+
"name": "Mistral Small 3.1 24B",
|
| 8199 |
+
"provider_name": "Mistral",
|
| 8200 |
+
"cost": 0.3,
|
| 8201 |
+
"hf_id": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
|
| 8202 |
+
"creation_date": "2025-03-11T17:04:58+00:00",
|
| 8203 |
+
"size": 24011361280.0,
|
| 8204 |
+
"type": "Open",
|
| 8205 |
+
"license": "Apache 2.0"
|
| 8206 |
},
|
| 8207 |
{
|
| 8208 |
"id": "google/gemini-2.0-flash-001",
|
| 8209 |
+
"name": "Gemini 2.0 Flash",
|
| 8210 |
+
"provider_name": "Google",
|
| 8211 |
+
"cost": 0.4,
|
| 8212 |
"hf_id": null,
|
| 8213 |
"creation_date": "NaT",
|
| 8214 |
"size": null,
|
|
|
|
| 8217 |
},
|
| 8218 |
{
|
| 8219 |
"id": "google/gemma-3-27b-it",
|
| 8220 |
+
"name": "Gemma 3 27B",
|
| 8221 |
+
"provider_name": "Google",
|
| 8222 |
+
"cost": 0.2,
|
| 8223 |
"hf_id": "google/gemma-3-27b-it",
|
| 8224 |
"creation_date": "2025-03-01T19:10:19+00:00",
|
| 8225 |
"size": 27432406640.0,
|
|
|
|
| 8228 |
},
|
| 8229 |
{
|
| 8230 |
"id": "qwen/qwq-32b",
|
| 8231 |
+
"name": "QwQ 32B",
|
| 8232 |
+
"provider_name": "Qwen",
|
| 8233 |
+
"cost": 0.2,
|
| 8234 |
"hf_id": "Qwen/QwQ-32B",
|
| 8235 |
"creation_date": "2025-03-05T14:16:59+00:00",
|
| 8236 |
"size": 32763876352.0,
|
|
|
|
| 8239 |
},
|
| 8240 |
{
|
| 8241 |
"id": "deepseek/deepseek-chat-v3-0324",
|
| 8242 |
+
"name": "DeepSeek V3 0324",
|
| 8243 |
+
"provider_name": "DeepSeek",
|
| 8244 |
+
"cost": 1.1,
|
| 8245 |
+
"hf_id": "deepseek-ai/DeepSeek-V3-0324",
|
| 8246 |
+
"creation_date": "2025-03-24T09:28:22+00:00",
|
| 8247 |
+
"size": 684531386000.0,
|
| 8248 |
+
"type": "Open",
|
| 8249 |
+
"license": "Mit"
|
| 8250 |
},
|
| 8251 |
{
|
| 8252 |
"id": "microsoft/phi-4-multimodal-instruct",
|
| 8253 |
+
"name": "Phi 4 Multimodal Instruct",
|
| 8254 |
+
"provider_name": "Microsoft",
|
| 8255 |
+
"cost": 0.1,
|
| 8256 |
"hf_id": "microsoft/Phi-4-multimodal-instruct",
|
| 8257 |
"creation_date": "2025-02-24T22:33:32+00:00",
|
| 8258 |
"size": 5574460384.0,
|
|
|
|
| 8261 |
},
|
| 8262 |
{
|
| 8263 |
"id": "amazon/nova-micro-v1",
|
| 8264 |
+
"name": "Nova Micro 1.0",
|
| 8265 |
+
"provider_name": "Amazon",
|
| 8266 |
+
"cost": 0.14,
|
| 8267 |
"hf_id": null,
|
| 8268 |
"creation_date": "NaT",
|
| 8269 |
"size": null,
|