Luigi commited on
Commit
c30a7f7
·
verified ·
1 Parent(s): 3c22497

feat(models): add Granite-4.0-Micro and Qwen3-4B-Instruct-2507 to MODELS registry

Browse files

Added Granite-4.0-Micro (3B dense instruct model from IBM) under ~3B parameter category
Added Qwen3-4B-Instruct-2507 (updated non-thinking instruct variant with 4B params and 256K context) under 4B–4.3B category
Included detailed descriptions based on official Hugging Face model cards
Maintained consistent formatting and category structure

Files changed (1) hide show
  1. app.py +8 -0
app.py CHANGED
@@ -75,6 +75,10 @@ MODELS = {
75
  "repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
76
  "description": "SmallThinker 4 B backbone with 0.6 B activated parameters, instruction‑tuned"
77
  },
 
 
 
 
78
 
79
  # Models with ~3B parameters
80
  "AI21-Jamba-Reasoning-3B": {
@@ -97,6 +101,10 @@ MODELS = {
97
  "repo_id": "Qwen/Qwen2.5-Omni-3B",
98
  "description": "Qwen2.5-Omni-3B"
99
  },
 
 
 
 
100
 
101
  # Models with 2.6B parameters
102
  "LFM2-2.6B": {
 
75
  "repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
76
  "description": "SmallThinker 4 B backbone with 0.6 B activated parameters, instruction‑tuned"
77
  },
78
+ "Qwen3-4B-Instruct-2507": {
79
+ "repo_id": "Qwen/Qwen3-4B-Instruct-2507",
80
+ "description": "Updated non-thinking instruct variant of Qwen3-4B with 4.0B parameters, featuring significant improvements in instruction following, logical reasoning, multilingualism, and 256K long-context understanding. Strong performance across knowledge, coding, alignment, and agent benchmarks."
81
+ },
82
 
83
  # Models with ~3B parameters
84
  "AI21-Jamba-Reasoning-3B": {
 
101
  "repo_id": "Qwen/Qwen2.5-Omni-3B",
102
  "description": "Qwen2.5-Omni-3B"
103
  },
104
+ "Granite-4.0-Micro": {
105
+ "repo_id": "ibm-granite/granite-4.0-micro",
106
+ "description": "A 3B-parameter long-context instruct model from IBM, finetuned for enhanced instruction following and tool-calling. Supports 12 languages including English, Chinese, Arabic, and Japanese. Built on a dense Transformer with GQA, RoPE, SwiGLU, and 128K context length. Trained using SFT, RL alignment, and model merging techniques for enterprise applications."
107
+ },
108
 
109
  # Models with 2.6B parameters
110
  "LFM2-2.6B": {