ZeroGPU-LLM-Inference

Running

Luigi commited on Oct 11

Commit

a4681bd

verified ·

1 Parent(s): 2cadf8a

use apriel 8bit

Files changed (1) hide show

app.py CHANGED Viewed

@@ -55,8 +55,8 @@ MODELS = {
     #     "description": "4-bit AWQ quantized dense causal language model with 32.8B total parameters (31.2B non-embedding), 64 layers, 64 query heads & 8 KV heads, native 32,768-token context (extendable to 131,072 via YaRN). Features seamless switching between thinking mode (for complex reasoning, math, coding) and non-thinking mode (for efficient dialogue), strong multilingual support (100+ languages), and leading open-source agent capabilities."
     # },
-    "Apriel-1.5-15b-Thinker-AWQ-4bit": {
-        "repo_id": "cpatonn/Apriel-1.5-15b-Thinker-AWQ-4bit",
         "description": "Multimodal reasoning model with 15B parameters, trained via extensive mid-training on text and image data, and fine-tuned only on text (no image SFT). Achieves competitive performance on reasoning benchmarks like Artificial Analysis (score: 52), Tau2 Bench Telecom (68), and IFBench (62). Supports both text and image understanding, fits on a single GPU, and includes structured reasoning output with tool and function calling capabilities."
     },

     #     "description": "4-bit AWQ quantized dense causal language model with 32.8B total parameters (31.2B non-embedding), 64 layers, 64 query heads & 8 KV heads, native 32,768-token context (extendable to 131,072 via YaRN). Features seamless switching between thinking mode (for complex reasoning, math, coding) and non-thinking mode (for efficient dialogue), strong multilingual support (100+ languages), and leading open-source agent capabilities."
     # },
+    "Apriel-1.5-15b-Thinker-AWQ-8bit": {
+        "repo_id": "cpatonn/Apriel-1.5-15b-Thinker-AWQ-8bit",
         "description": "Multimodal reasoning model with 15B parameters, trained via extensive mid-training on text and image data, and fine-tuned only on text (no image SFT). Achieves competitive performance on reasoning benchmarks like Artificial Analysis (score: 52), Tau2 Bench Telecom (68), and IFBench (62). Supports both text and image understanding, fits on a single GPU, and includes structured reasoning output with tool and function calling capabilities."
     },