feat(models): add Granite-4.0-Micro and Qwen3-4B-Instruct-2507 to MODELS registry
Browse filesAdded Granite-4.0-Micro (3B dense instruct model from IBM) under ~3B parameter category
Added Qwen3-4B-Instruct-2507 (updated non-thinking instruct variant with 4B params and 256K context) under 4B–4.3B category
Included detailed descriptions based on official Hugging Face model cards
Maintained consistent formatting and category structure
app.py
CHANGED
|
@@ -75,6 +75,10 @@ MODELS = {
|
|
| 75 |
"repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
|
| 76 |
"description": "SmallThinker 4 B backbone with 0.6 B activated parameters, instruction‑tuned"
|
| 77 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
# Models with ~3B parameters
|
| 80 |
"AI21-Jamba-Reasoning-3B": {
|
|
@@ -97,6 +101,10 @@ MODELS = {
|
|
| 97 |
"repo_id": "Qwen/Qwen2.5-Omni-3B",
|
| 98 |
"description": "Qwen2.5-Omni-3B"
|
| 99 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
# Models with 2.6B parameters
|
| 102 |
"LFM2-2.6B": {
|
|
|
|
| 75 |
"repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
|
| 76 |
"description": "SmallThinker 4 B backbone with 0.6 B activated parameters, instruction‑tuned"
|
| 77 |
},
|
| 78 |
+
"Qwen3-4B-Instruct-2507": {
|
| 79 |
+
"repo_id": "Qwen/Qwen3-4B-Instruct-2507",
|
| 80 |
+
"description": "Updated non-thinking instruct variant of Qwen3-4B with 4.0B parameters, featuring significant improvements in instruction following, logical reasoning, multilingualism, and 256K long-context understanding. Strong performance across knowledge, coding, alignment, and agent benchmarks."
|
| 81 |
+
},
|
| 82 |
|
| 83 |
# Models with ~3B parameters
|
| 84 |
"AI21-Jamba-Reasoning-3B": {
|
|
|
|
| 101 |
"repo_id": "Qwen/Qwen2.5-Omni-3B",
|
| 102 |
"description": "Qwen2.5-Omni-3B"
|
| 103 |
},
|
| 104 |
+
"Granite-4.0-Micro": {
|
| 105 |
+
"repo_id": "ibm-granite/granite-4.0-micro",
|
| 106 |
+
"description": "A 3B-parameter long-context instruct model from IBM, finetuned for enhanced instruction following and tool-calling. Supports 12 languages including English, Chinese, Arabic, and Japanese. Built on a dense Transformer with GQA, RoPE, SwiGLU, and 128K context length. Trained using SFT, RL alignment, and model merging techniques for enterprise applications."
|
| 107 |
+
},
|
| 108 |
|
| 109 |
# Models with 2.6B parameters
|
| 110 |
"LFM2-2.6B": {
|