ZeroGPU-LLM-Inference

Running

App Files Files Community

Luigi commited on Oct 9

Commit

c30a7f7

verified ·

1 Parent(s): 3c22497

feat(models): add Granite-4.0-Micro and Qwen3-4B-Instruct-2507 to MODELS registry

Browse files

Added Granite-4.0-Micro (3B dense instruct model from IBM) under ~3B parameter category
Added Qwen3-4B-Instruct-2507 (updated non-thinking instruct variant with 4B params and 256K context) under 4B–4.3B category
Included detailed descriptions based on official Hugging Face model cards
Maintained consistent formatting and category structure

Files changed (1) hide show

app.py +8 -0

app.py CHANGED Viewed

@@ -75,6 +75,10 @@ MODELS = {
         "repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
         "description": "SmallThinker 4 B backbone with 0.6 B activated parameters, instruction‑tuned"
     },
     # Models with ~3B parameters
     "AI21-Jamba-Reasoning-3B": {
@@ -97,6 +101,10 @@ MODELS = {
         "repo_id": "Qwen/Qwen2.5-Omni-3B",
         "description": "Qwen2.5-Omni-3B"
     },
     # Models with 2.6B parameters
     "LFM2-2.6B": {

         "repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
         "description": "SmallThinker 4 B backbone with 0.6 B activated parameters, instruction‑tuned"
     },
+    "Qwen3-4B-Instruct-2507": {
+        "repo_id": "Qwen/Qwen3-4B-Instruct-2507",
+        "description": "Updated non-thinking instruct variant of Qwen3-4B with 4.0B parameters, featuring significant improvements in instruction following, logical reasoning, multilingualism, and 256K long-context understanding. Strong performance across knowledge, coding, alignment, and agent benchmarks."
+    },
     # Models with ~3B parameters
     "AI21-Jamba-Reasoning-3B": {
         "repo_id": "Qwen/Qwen2.5-Omni-3B",
         "description": "Qwen2.5-Omni-3B"
     },
+    "Granite-4.0-Micro": {
+        "repo_id": "ibm-granite/granite-4.0-micro",
+        "description": "A 3B-parameter long-context instruct model from IBM, finetuned for enhanced instruction following and tool-calling. Supports 12 languages including English, Chinese, Arabic, and Japanese. Built on a dense Transformer with GQA, RoPE, SwiGLU, and 128K context length. Trained using SFT, RL alignment, and model merging techniques for enterprise applications."
+    },
     # Models with 2.6B parameters
     "LFM2-2.6B": {