Spaces:

fair-forward
/

languagebench

Running

App Files Files Community

David Pomerenke commited on Apr 11

Commit

9002fc2

1 Parent(s): 8274634

Add OpenRouter metadata to models

Browse files

And via the hf_slug attribute this also improves the HuggingFace metadata

Files changed (4) hide show

evals/backend.py +10 -5
evals/models.py +49 -19
frontend/src/components/ModelTable.js +60 -34
results.json +66 -30

evals/backend.py CHANGED Viewed

@@ -1,27 +1,30 @@
 import json
 import os
 import numpy as np
 import pandas as pd
 import uvicorn
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.gzip import GZipMiddleware
 from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
-from countries import make_country_table
 with open("results.json", "r") as f:
     results = json.load(f)
 scores = pd.DataFrame(results["scores"])
 languages = pd.DataFrame(results["languages"])
 models = pd.DataFrame(results["models"])
 def mean(lst):
     return sum(lst) / len(lst) if lst else None
 task_metrics = ["translation_bleu", "classification_accuracy"]
 def make_model_table(df, models):
     df = (
         df.groupby(["model", "task", "metric"])
@@ -39,11 +42,14 @@ def make_model_table(df, models):
         [
             "rank",
             "model",
             "hf_id",
             "creation_date",
             "size",
             "type",
             "license",
             "average",
             *task_metrics,
         ]
@@ -97,9 +103,7 @@ async def data(request: Request):
     body = await request.body()
     data = json.loads(body)
     selected_languages = data.get("selectedLanguages", {})
-    df = (
-        scores.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
-    )
     # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
     language_table = make_language_table(df, languages)
     datasets_df = pd.read_json("datasets.json")
@@ -116,6 +120,7 @@ async def data(request: Request):
     }
     return JSONResponse(content=all_tables)
 app.mount("/", StaticFiles(directory="frontend/build", html=True), name="frontend")
 if __name__ == "__main__":

 import json
 import os
 import numpy as np
 import pandas as pd
 import uvicorn
+from countries import make_country_table
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.gzip import GZipMiddleware
 from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
 with open("results.json", "r") as f:
     results = json.load(f)
 scores = pd.DataFrame(results["scores"])
 languages = pd.DataFrame(results["languages"])
 models = pd.DataFrame(results["models"])
 def mean(lst):
     return sum(lst) / len(lst) if lst else None
 task_metrics = ["translation_bleu", "classification_accuracy"]
 def make_model_table(df, models):
     df = (
         df.groupby(["model", "task", "metric"])
         [
             "rank",
             "model",
+            "name",
+            "provider_name",
             "hf_id",
             "creation_date",
             "size",
             "type",
             "license",
+            "cost",
             "average",
             *task_metrics,
         ]
     body = await request.body()
     data = json.loads(body)
     selected_languages = data.get("selectedLanguages", {})
+    df = scores.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
     # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
     language_table = make_language_table(df, languages)
     datasets_df = pd.read_json("datasets.json")
     }
     return JSONResponse(content=all_tables)
 app.mount("/", StaticFiles(directory="frontend/build", html=True), name="frontend")
 if __name__ == "__main__":

evals/models.py CHANGED Viewed

@@ -7,17 +7,17 @@ from elevenlabs import AsyncElevenLabs
 from huggingface_hub import AsyncInferenceClient, HfApi
 from joblib.memory import Memory
 from openai import AsyncOpenAI
-from requests import HTTPError
 # for development purposes, all languages will be evaluated on the fast models
 # and only a sample of languages will be evaluated on all models
 models = [
     "openai/gpt-4o-mini",  # 0.6$/M tokens
     # "anthropic/claude-3.5-haiku", # 4$/M tokens -> too expensive for dev
-    "meta-llama/llama-4-maverick", # 0.6$/M tokens
     "meta-llama/llama-3.3-70b-instruct",  # 0.3$/M tokens
     "meta-llama/llama-3.1-70b-instruct",  # 0.3$/M tokens
-    "meta-llama/llama-3-70b-instruct", # 0.4$/M tokens
     "mistralai/mistral-small-3.1-24b-instruct",  # 0.3$/M tokens
     # "mistralai/mistral-saba", # 0.6$/M tokens
     # "mistralai/mistral-nemo", # 0.08$/M tokens
@@ -25,10 +25,10 @@ models = [
     # "google/gemini-2.0-flash-lite-001",  # 0.3$/M tokens
     "google/gemma-3-27b-it",  # 0.2$/M tokens
     # "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
-    "qwen/qwq-32b", # 0.2$/M tokens
     "deepseek/deepseek-chat-v3-0324",  # 1.1$/M tokens
     # "microsoft/phi-4",  # 0.07$/M tokens; only 16k tokens context
-    "microsoft/phi-4-multimodal-instruct", # 0.1$/M tokens
     "amazon/nova-micro-v1",  # 0.09$/M tokens
     # "openGPT-X/Teuken-7B-instruct-research-v0.4",  # not on OpenRouter
 ]
@@ -94,10 +94,32 @@ async def transcribe(path, model="elevenlabs/scribe_v1"):
 models = pd.DataFrame(models, columns=["id"])
 api = HfApi()
 @cache
-def get_metadata(id):
     try:
         info = api.model_info(id)
         license = info.card_data.license.replace("-", " ").replace("mit", "MIT").title()
@@ -109,17 +131,25 @@ def get_metadata(id):
             "license": license,
         }
     except HTTPError:
-        return {
-            "hf_id": None,
-            "creation_date": None,
-            "size": None,
-            "type": "Commercial",
-            "license": None,
-        }
-models["hf_id"] = models["id"].apply(get_metadata).str["hf_id"]
-models["creation_date"] = models["id"].apply(get_metadata).str["creation_date"]
-models["creation_date"] = pd.to_datetime(models["creation_date"])
-models["size"] = models["id"].apply(get_metadata).str["size"]
-models["type"] = models["id"].apply(get_metadata).str["type"]
-models["license"] = models["id"].apply(get_metadata).str["license"]

 from huggingface_hub import AsyncInferenceClient, HfApi
 from joblib.memory import Memory
 from openai import AsyncOpenAI
+from requests import HTTPError, get
 # for development purposes, all languages will be evaluated on the fast models
 # and only a sample of languages will be evaluated on all models
 models = [
     "openai/gpt-4o-mini",  # 0.6$/M tokens
     # "anthropic/claude-3.5-haiku", # 4$/M tokens -> too expensive for dev
+    "meta-llama/llama-4-maverick",  # 0.6$/M tokens
     "meta-llama/llama-3.3-70b-instruct",  # 0.3$/M tokens
     "meta-llama/llama-3.1-70b-instruct",  # 0.3$/M tokens
+    "meta-llama/llama-3-70b-instruct",  # 0.4$/M tokens
     "mistralai/mistral-small-3.1-24b-instruct",  # 0.3$/M tokens
     # "mistralai/mistral-saba", # 0.6$/M tokens
     # "mistralai/mistral-nemo", # 0.08$/M tokens
     # "google/gemini-2.0-flash-lite-001",  # 0.3$/M tokens
     "google/gemma-3-27b-it",  # 0.2$/M tokens
     # "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
+    "qwen/qwq-32b",  # 0.2$/M tokens
     "deepseek/deepseek-chat-v3-0324",  # 1.1$/M tokens
     # "microsoft/phi-4",  # 0.07$/M tokens; only 16k tokens context
+    "microsoft/phi-4-multimodal-instruct",  # 0.1$/M tokens
     "amazon/nova-micro-v1",  # 0.09$/M tokens
     # "openGPT-X/Teuken-7B-instruct-research-v0.4",  # not on OpenRouter
 ]
 models = pd.DataFrame(models, columns=["id"])
+@cache
+def get_or_metadata(id):
+    # get metadata from OpenRouter
+    response = cache(get)("https://openrouter.ai/api/frontend/models/")
+    models = response.json()["data"]
+    metadata = next((m for m in models if m["slug"] == id), None)
+    return metadata
 api = HfApi()
 @cache
+def get_hf_metadata(row):
+    # get metadata from the HuggingFace API
+    empty = {
+        "hf_id": None,
+        "creation_date": None,
+        "size": None,
+        "type": "Commercial",
+        "license": None,
+    }
+    id = row["hf_slug"] or row["slug"]
+    if not id:
+        return empty
     try:
         info = api.model_info(id)
         license = info.card_data.license.replace("-", " ").replace("mit", "MIT").title()
             "license": license,
         }
     except HTTPError:
+        return empty
+or_metadata = models["id"].apply(get_or_metadata)
+hf_metadata = or_metadata.apply(get_hf_metadata)
+def get_cost(row):
+    cost = float(row["endpoint"]["pricing"]["completion"])
+    return round(cost * 1_000_000, 2)
+models = models.assign(
+    name=or_metadata.str["short_name"],
+    provider_name=or_metadata.str["name"].str.split(": ").str[0],
+    cost=or_metadata.apply(get_cost),
+    hf_id=hf_metadata.str["hf_id"],
+    creation_date=pd.to_datetime(hf_metadata.str["creation_date"]),
+    size=hf_metadata.str["size"],
+    type=hf_metadata.str["type"],
+    license=hf_metadata.str["license"],
+)

frontend/src/components/ModelTable.js CHANGED Viewed

@@ -10,7 +10,8 @@ import ScoreField from './ScoreField'
 const ModelTable = ({ data }) => {
   const [filters, setFilters] = useState({
     type: { value: null, matchMode: FilterMatchMode.IN },
-    size: { value: null, matchMode: FilterMatchMode.BETWEEN }
   })
   const rankBodyTemplate = rowData => {
     return <Medal rank={rowData.rank} />
@@ -36,6 +37,8 @@ const ModelTable = ({ data }) => {
   const formatSize = size => {
     if (size === null) {
       return ''
     } else if (size < 1000) {
       return size.toFixed(0) + ''
     } else if (size < 1000 * 1000) {
@@ -47,10 +50,8 @@ const ModelTable = ({ data }) => {
     }
   }
-  const SliderWithLabel = ({ value, onChange }) => {
     const p = 10
-    const min = 8
-    const max = 12
     const start = value === null ? min : Math.log(value[0]) / Math.log(p)
     const stop = value === null ? max : Math.log(value[1]) / Math.log(p)
     const [_value, _setValue] = useState([start, stop])
@@ -58,13 +59,14 @@ const ModelTable = ({ data }) => {
       const timer = setTimeout(() => {
         onChange({
           value:
             _value[0] <= min + 0.1 && _value[1] >= max - 0.1
               ? null
               : [p ** _value[0], p ** _value[1]]
         })
       }, 1000)
       return () => clearTimeout(timer)
-    }, [_value, onChange])
     return (
       <div style={{ minWidth: '20rem' }}>
         <div>{formatSize(p ** _value[0])}</div>
@@ -87,6 +89,8 @@ const ModelTable = ({ data }) => {
     return (
       <SliderWithLabel
         value={options.value}
         onChange={e => {
           options.filterApplyCallback(e.value)
           setFilters(prevFilters => ({
@@ -98,35 +102,42 @@ const ModelTable = ({ data }) => {
     )
   }
-  const sizeBodyTemplate = rowData => {
-    const sizeStr = formatSize(rowData.size)
-    return <div style={{ textAlign: 'center' }}>{sizeStr}</div>
   }
-  const capitalize = s =>
-    (String(s).charAt(0).toUpperCase() + String(s).slice(1))
-      .replace(/gpt/i, 'GPT')
-      .replace(/qwq/i, 'QwQ')
-      .replace(/deepseek/i, 'DeepSeek')
-      .replace(/openai/i, 'OpenAI')
-  const providerBodyTemplate = rowData => {
-    const providerName = rowData.model
-      .split('/')[0]
-      .split('-')
-      .map(capitalize)
-      .join(' ')
-    return providerName
   }
-  const modelBodyTemplate = rowData => {
-    const modelName = rowData.model
-      .split('/')[1]
-      .split('-')
-      .map(capitalize)
-      .join(' ')
-    return <div style={{ fontWeight: 'bold', height: '100%' }}>{modelName}</div>
-  }
   const typeBodyTemplate = rowData => {
     return rowData.type === 'Open' ? (
@@ -136,6 +147,10 @@ const ModelTable = ({ data }) => {
     )
   }
   const scoreBodyTemplate = (field, options = {}) => {
     const { minScore = 0, maxScore = 1 } = options
@@ -160,13 +175,12 @@ const ModelTable = ({ data }) => {
     >
       <Column field='rank' body={rankBodyTemplate} />
       <Column
-        field='provider'
         header='Provider'
         style={{ minWidth: '7rem' }}
-        body={providerBodyTemplate}
       />
       <Column
-        field='model'
         header='Model'
         style={{ minWidth: '10rem' }}
         body={modelBodyTemplate}
@@ -182,7 +196,8 @@ const ModelTable = ({ data }) => {
       />
       <Column
         field='size'
-        header={null}
         filter
         filterElement={sizeFilterTemplate}
         showFilterMatchModes={false}
@@ -190,6 +205,17 @@ const ModelTable = ({ data }) => {
         body={sizeBodyTemplate}
         style={{ minWidth: '5rem' }}
       />
       <Column
         field='average'
         header='Average'

 const ModelTable = ({ data }) => {
   const [filters, setFilters] = useState({
     type: { value: null, matchMode: FilterMatchMode.IN },
+    size: { value: null, matchMode: FilterMatchMode.BETWEEN },
+    cost: { value: null, matchMode: FilterMatchMode.BETWEEN }
   })
   const rankBodyTemplate = rowData => {
     return <Medal rank={rowData.rank} />
   const formatSize = size => {
     if (size === null) {
       return ''
+    } else if (size >= 0 && size <= 1) {
+      return size.toFixed(2) + ''
     } else if (size < 1000) {
       return size.toFixed(0) + ''
     } else if (size < 1000 * 1000) {
     }
   }
+  const SliderWithLabel = ({ value, onChange, min, max }) => {
     const p = 10
     const start = value === null ? min : Math.log(value[0]) / Math.log(p)
     const stop = value === null ? max : Math.log(value[1]) / Math.log(p)
     const [_value, _setValue] = useState([start, stop])
       const timer = setTimeout(() => {
         onChange({
           value:
+            // set to "no filter" when (almost) the whole range is selected
             _value[0] <= min + 0.1 && _value[1] >= max - 0.1
               ? null
               : [p ** _value[0], p ** _value[1]]
         })
       }, 1000)
       return () => clearTimeout(timer)
+    }, [_value, onChange, min, max])
     return (
       <div style={{ minWidth: '20rem' }}>
         <div>{formatSize(p ** _value[0])}</div>
     return (
       <SliderWithLabel
         value={options.value}
+        min={8}
+        max={12}
         onChange={e => {
           options.filterApplyCallback(e.value)
           setFilters(prevFilters => ({
     )
   }
+  const costFilterTemplate = options => {
+    return (
+      <SliderWithLabel
+        value={options.value}
+        min={-2}
+        max={2}
+        onChange={e => {
+          options.filterApplyCallback(e.value)
+          setFilters(prevFilters => ({
+            ...prevFilters,
+            cost: { value: e.value, matchMode: FilterMatchMode.BETWEEN }
+          }))
+        }}
+      />
+    )
   }
+  const sizeBodyTemplate = rowData => {
+    const sizeStr = formatSize(rowData.size)
+    return (
+      <div style={{ textAlign: 'center' }}>
+        <a
+          href={`https://huggingface.co/${rowData.hf_id}`}
+          target='_blank'
+          rel='noopener noreferrer'
+          style={{ textDecoration: 'none', color: 'inherit' }}
+        >
+          {sizeStr}
+        </a>
+      </div>
+    )
   }
+  const modelBodyTemplate = rowData => (
+    <div style={{ fontWeight: 'bold', height: '100%' }}>{rowData.name}</div>
+  )
   const typeBodyTemplate = rowData => {
     return rowData.type === 'Open' ? (
     )
   }
+  const costBodyTemplate = rowData => {
+    return <div style={{ textAlign: 'center' }}>${rowData.cost.toFixed(2)}</div>
+  }
   const scoreBodyTemplate = (field, options = {}) => {
     const { minScore = 0, maxScore = 1 } = options
     >
       <Column field='rank' body={rankBodyTemplate} />
       <Column
+        field='provider_name'
         header='Provider'
         style={{ minWidth: '7rem' }}
       />
       <Column
+        field='name'
         header='Model'
         style={{ minWidth: '10rem' }}
         body={modelBodyTemplate}
       />
       <Column
         field='size'
+        header='Size'
+        headerTooltip='Number of parameters'
         filter
         filterElement={sizeFilterTemplate}
         showFilterMatchModes={false}
         body={sizeBodyTemplate}
         style={{ minWidth: '5rem' }}
       />
+      <Column
+        field='cost'
+        header='Cost'
+        headerTooltip='Cost in USD per million completion tokens'
+        filter
+        filterElement={costFilterTemplate}
+        showFilterMatchModes={false}
+        sortable
+        body={costBodyTemplate}
+        style={{ minWidth: '5rem' }}
+      />
       <Column
         field='average'
         header='Average'

results.json CHANGED Viewed

@@ -8,7 +8,7 @@
       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
-      "commonvoice_hours": 2655.0,
       "commonvoice_locale": "en",
       "in_benchmark": true
     },
@@ -236,7 +236,7 @@
       "family": "Austroasiatic",
       "flores_path": "vie_Latn",
       "fleurs_tag": "vi_vn",
-      "commonvoice_hours": 6.0,
       "commonvoice_locale": "vi",
       "in_benchmark": true
     },
@@ -1004,7 +1004,7 @@
       "family": "Indo-European",
       "flores_path": "ces_Latn",
       "fleurs_tag": "cs_cz",
-      "commonvoice_hours": 74.0,
       "commonvoice_locale": "cs",
       "in_benchmark": true
     },
@@ -2156,7 +2156,7 @@
       "family": "Kartvelian",
       "flores_path": "kat_Geor",
       "fleurs_tag": "ka_ge",
-      "commonvoice_hours": 161.0,
       "commonvoice_locale": "ka",
       "in_benchmark": true
     },
@@ -2168,7 +2168,7 @@
       "family": "Indo-European",
       "flores_path": "glg_Latn",
       "fleurs_tag": "gl_es",
-      "commonvoice_hours": 111.0,
       "commonvoice_locale": "gl",
       "in_benchmark": true
     },
@@ -3560,7 +3560,7 @@
       "family": "Abkhaz-Adyge",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 27.0,
       "commonvoice_locale": "kbd",
       "in_benchmark": false
     },
@@ -4352,7 +4352,7 @@
       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 27.0,
       "commonvoice_locale": "br",
       "in_benchmark": false
     },
@@ -4616,7 +4616,7 @@
       "family": "Turkic",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 11.0,
       "commonvoice_locale": "sah",
       "in_benchmark": false
     },
@@ -4652,7 +4652,7 @@
       "family": "Abkhaz-Adyge",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 12.0,
       "commonvoice_locale": "ady",
       "in_benchmark": false
     },
@@ -7616,7 +7616,7 @@
       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 11.0,
       "commonvoice_locale": "kw",
       "in_benchmark": false
     },
@@ -8140,6 +8140,9 @@
   "models": [
     {
       "id": "openai/gpt-4o-mini",
       "hf_id": null,
       "creation_date": "NaT",
       "size": null,
@@ -8148,14 +8151,20 @@
     },
     {
       "id": "meta-llama/llama-4-maverick",
-      "hf_id": null,
-      "creation_date": "NaT",
-      "size": null,
-      "type": "Commercial",
-      "license": null
     },
     {
       "id": "meta-llama/llama-3.3-70b-instruct",
       "hf_id": "meta-llama/Llama-3.3-70B-Instruct",
       "creation_date": "2024-11-26T16:08:47+00:00",
       "size": 70553706496.0,
@@ -8164,6 +8173,9 @@
     },
     {
       "id": "meta-llama/llama-3.1-70b-instruct",
       "hf_id": "meta-llama/Llama-3.1-70B-Instruct",
       "creation_date": "2024-07-16T16:07:46+00:00",
       "size": 70553706496.0,
@@ -8172,22 +8184,31 @@
     },
     {
       "id": "meta-llama/llama-3-70b-instruct",
-      "hf_id": null,
-      "creation_date": "NaT",
-      "size": null,
-      "type": "Commercial",
-      "license": null
     },
     {
       "id": "mistralai/mistral-small-3.1-24b-instruct",
-      "hf_id": null,
-      "creation_date": "NaT",
-      "size": null,
-      "type": "Commercial",
-      "license": null
     },
     {
       "id": "google/gemini-2.0-flash-001",
       "hf_id": null,
       "creation_date": "NaT",
       "size": null,
@@ -8196,6 +8217,9 @@
     },
     {
       "id": "google/gemma-3-27b-it",
       "hf_id": "google/gemma-3-27b-it",
       "creation_date": "2025-03-01T19:10:19+00:00",
       "size": 27432406640.0,
@@ -8204,6 +8228,9 @@
     },
     {
       "id": "qwen/qwq-32b",
       "hf_id": "Qwen/QwQ-32B",
       "creation_date": "2025-03-05T14:16:59+00:00",
       "size": 32763876352.0,
@@ -8212,14 +8239,20 @@
     },
     {
       "id": "deepseek/deepseek-chat-v3-0324",
-      "hf_id": null,
-      "creation_date": "NaT",
-      "size": null,
-      "type": "Commercial",
-      "license": null
     },
     {
       "id": "microsoft/phi-4-multimodal-instruct",
       "hf_id": "microsoft/Phi-4-multimodal-instruct",
       "creation_date": "2025-02-24T22:33:32+00:00",
       "size": 5574460384.0,
@@ -8228,6 +8261,9 @@
     },
     {
       "id": "amazon/nova-micro-v1",
       "hf_id": null,
       "creation_date": "NaT",
       "size": null,

       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
+      "commonvoice_hours": 2657.0,
       "commonvoice_locale": "en",
       "in_benchmark": true
     },
       "family": "Austroasiatic",
       "flores_path": "vie_Latn",
       "fleurs_tag": "vi_vn",
+      "commonvoice_hours": 6.1,
       "commonvoice_locale": "vi",
       "in_benchmark": true
     },
       "family": "Indo-European",
       "flores_path": "ces_Latn",
       "fleurs_tag": "cs_cz",
+      "commonvoice_hours": 75.0,
       "commonvoice_locale": "cs",
       "in_benchmark": true
     },
       "family": "Kartvelian",
       "flores_path": "kat_Geor",
       "fleurs_tag": "ka_ge",
+      "commonvoice_hours": 162.0,
       "commonvoice_locale": "ka",
       "in_benchmark": true
     },
       "family": "Indo-European",
       "flores_path": "glg_Latn",
       "fleurs_tag": "gl_es",
+      "commonvoice_hours": 114.0,
       "commonvoice_locale": "gl",
       "in_benchmark": true
     },
       "family": "Abkhaz-Adyge",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 30.0,
       "commonvoice_locale": "kbd",
       "in_benchmark": false
     },
       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 28.0,
       "commonvoice_locale": "br",
       "in_benchmark": false
     },
       "family": "Turkic",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 12.0,
       "commonvoice_locale": "sah",
       "in_benchmark": false
     },
       "family": "Abkhaz-Adyge",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 13.0,
       "commonvoice_locale": "ady",
       "in_benchmark": false
     },
       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 12.0,
       "commonvoice_locale": "kw",
       "in_benchmark": false
     },
   "models": [
     {
       "id": "openai/gpt-4o-mini",
+      "name": "GPT-4o-mini",
+      "provider_name": "OpenAI",
+      "cost": 0.6,
       "hf_id": null,
       "creation_date": "NaT",
       "size": null,
     },
     {
       "id": "meta-llama/llama-4-maverick",
+      "name": "Llama 4 Maverick",
+      "provider_name": "Meta",
+      "cost": 0.85,
+      "hf_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
+      "creation_date": "2025-04-01T22:17:20+00:00",
+      "size": 401583781376.0,
+      "type": "Open",
+      "license": "Other"
     },
     {
       "id": "meta-llama/llama-3.3-70b-instruct",
+      "name": "Llama 3.3 70B Instruct",
+      "provider_name": "Meta",
+      "cost": 0.3,
       "hf_id": "meta-llama/Llama-3.3-70B-Instruct",
       "creation_date": "2024-11-26T16:08:47+00:00",
       "size": 70553706496.0,
     },
     {
       "id": "meta-llama/llama-3.1-70b-instruct",
+      "name": "Llama 3.1 70B Instruct",
+      "provider_name": "Meta",
+      "cost": 0.3,
       "hf_id": "meta-llama/Llama-3.1-70B-Instruct",
       "creation_date": "2024-07-16T16:07:46+00:00",
       "size": 70553706496.0,
     },
     {
       "id": "meta-llama/llama-3-70b-instruct",
+      "name": "Llama 3 70B Instruct",
+      "provider_name": "Meta",
+      "cost": 0.4,
+      "hf_id": "meta-llama/Meta-Llama-3-70B-Instruct",
+      "creation_date": "2024-04-17T09:34:54+00:00",
+      "size": 70553706496.0,
+      "type": "Open",
+      "license": "Llama3"
     },
     {
       "id": "mistralai/mistral-small-3.1-24b-instruct",
+      "name": "Mistral Small 3.1 24B",
+      "provider_name": "Mistral",
+      "cost": 0.3,
+      "hf_id": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
+      "creation_date": "2025-03-11T17:04:58+00:00",
+      "size": 24011361280.0,
+      "type": "Open",
+      "license": "Apache 2.0"
     },
     {
       "id": "google/gemini-2.0-flash-001",
+      "name": "Gemini 2.0 Flash",
+      "provider_name": "Google",
+      "cost": 0.4,
       "hf_id": null,
       "creation_date": "NaT",
       "size": null,
     },
     {
       "id": "google/gemma-3-27b-it",
+      "name": "Gemma 3 27B",
+      "provider_name": "Google",
+      "cost": 0.2,
       "hf_id": "google/gemma-3-27b-it",
       "creation_date": "2025-03-01T19:10:19+00:00",
       "size": 27432406640.0,
     },
     {
       "id": "qwen/qwq-32b",
+      "name": "QwQ 32B",
+      "provider_name": "Qwen",
+      "cost": 0.2,
       "hf_id": "Qwen/QwQ-32B",
       "creation_date": "2025-03-05T14:16:59+00:00",
       "size": 32763876352.0,
     },
     {
       "id": "deepseek/deepseek-chat-v3-0324",
+      "name": "DeepSeek V3 0324",
+      "provider_name": "DeepSeek",
+      "cost": 1.1,
+      "hf_id": "deepseek-ai/DeepSeek-V3-0324",
+      "creation_date": "2025-03-24T09:28:22+00:00",
+      "size": 684531386000.0,
+      "type": "Open",
+      "license": "Mit"
     },
     {
       "id": "microsoft/phi-4-multimodal-instruct",
+      "name": "Phi 4 Multimodal Instruct",
+      "provider_name": "Microsoft",
+      "cost": 0.1,
       "hf_id": "microsoft/Phi-4-multimodal-instruct",
       "creation_date": "2025-02-24T22:33:32+00:00",
       "size": 5574460384.0,
     },
     {
       "id": "amazon/nova-micro-v1",
+      "name": "Nova Micro 1.0",
+      "provider_name": "Amazon",
+      "cost": 0.14,
       "hf_id": null,
       "creation_date": "NaT",
       "size": null,