Luigi commited on
Commit
8eefe94
·
1 Parent(s): ac20174

add 5 models from liquid ai

Browse files
Files changed (1) hide show
  1. app.py +67 -54
app.py CHANGED
@@ -26,9 +26,17 @@ cancel_event = threading.Event()
26
  # Torch-Compatible Model Definitions with Adjusted Descriptions
27
  # ------------------------------
28
  MODELS = {
29
- # your existing entries …
30
- "Qwen2.5-Taiwan-1.5B-Instruct": {"repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct", "description": "Qwen2.5-Taiwan-1.5B-Instruct"},
31
  "SmolLM2-135M-multilingual-base": {"repo_id": "agentlans/SmolLM2-135M-multilingual-base", "description": "SmolLM2-135M-multilingual-base"},
 
 
 
 
 
 
 
 
 
32
  "parser_model_ner_gemma_v0.1": {
33
  "repo_id": "myfi/parser_model_ner_gemma_v0.1",
34
  "description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s **Gemma‑3‑270M** model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments."
@@ -38,17 +46,18 @@ MODELS = {
38
  "description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset"
39
  },
40
  "gemma-3-270m-it":{
41
- "repo_id":"google/gemma-3-270m-it",
42
- "description":"Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.",
43
- },
44
- "SmolLM-135M-Taiwan-Instruct-v1.0": {
45
- "repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0",
46
- "description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks"
47
  },
48
- "Llama-3.2-Taiwan-1B": {
49
- "repo_id": "lianghsun/Llama-3.2-Taiwan-1B",
50
- "description": "Llama-3.2-Taiwan base model with 1 B parameters"
 
 
 
51
  },
 
 
52
  "Qwen2.5-0.5B-Taiwan-Instruct": {
53
  "repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct",
54
  "description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned"
@@ -57,73 +66,77 @@ MODELS = {
57
  "repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan",
58
  "description": "Qwen3-Taiwan model with 0.6 B parameters"
59
  },
60
-
61
- "Qwen2.5-Taiwan-3B-Reason-GRPO": {
62
- "repo_id": "benchang1110/Qwen2.5-Taiwan-3B-Reason-GRPO",
63
- "description":"Qwen2.5-Taiwan model with 3 B parameters, Reason-GRPO fine-tuned"
64
  },
 
 
65
  "Llama-3.2-Taiwan-1B": {
66
  "repo_id": "lianghsun/Llama-3.2-Taiwan-1B",
67
  "description":"Llama-3.2-Taiwan base model with 1 B parameters"
68
  },
69
-
70
-
71
-
72
- # Gemma 3n “effective” variants (official Google repos)
 
 
 
 
 
 
 
 
73
  "Gemma-3n-E2B": {
74
  "repo_id": "google/gemma-3n-E2B",
75
  "description":"Gemma 3n base model with effective 2 B parameters (≈2 GB VRAM)"
76
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  "Gemma-3n-E4B": {
78
  "repo_id": "google/gemma-3n-E4B",
79
  "description":"Gemma 3n base model with effective 4 B parameters (≈3 GB VRAM)"
80
  },
81
-
82
- # PowerInfer SmallThinker (instruction‑tuned)
83
  "SmallThinker-4BA0.6B-Instruct": {
84
  "repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
85
  "description":"SmallThinker 4 B backbone with 0.6 B activated parameters, instruction‑tuned"
86
  },
87
- # TIIUAE Falcon‑H1 (instruction‑tuned)
88
- "Falcon-H1-1.5B-Instruct": {
89
- "repo_id": "tiiuae/Falcon-H1-1.5B-Instruct",
90
- "description":"Falcon‑H1 model with 1.5 B parameters, instruction‑tuned"
91
- },
92
- "Qwen/Qwen3-14B-FP8": {"repo_id": "Qwen/Qwen3-14B-FP8", "description": "Qwen/Qwen3-14B-FP8"},
93
- #"Qwen/Qwen3-32B-FP8": {"repo_id": "Qwen/Qwen3-32B-FP8", "description": "Qwen/Qwen3-32B-FP8"},
94
- "DeepSeek-R1-0528-Qwen3-8B": {"repo_id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "DeepSeek-R1-0528-Qwen3-8B"},
95
- "Nemotron-Research-Reasoning-Qwen-1.5B": {"repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B", "description": "Nemotron-Research-Reasoning-Qwen-1.5B"},
96
- "Taiwan-ELM-1_1B-Instruct": {"repo_id": "liswei/Taiwan-ELM-1_1B-Instruct", "description": "Taiwan-ELM-1_1B-Instruct"},
97
- "Taiwan-ELM-270M-Instruct": {"repo_id": "liswei/Taiwan-ELM-270M-Instruct", "description": "Taiwan-ELM-270M-Instruct"},
98
- # "Granite-4.0-Tiny-Preview": {"repo_id": "ibm-granite/granite-4.0-tiny-preview", "description": "Granite-4.0-Tiny-Preview"},
99
- "Qwen3-0.6B": {"repo_id":"Qwen/Qwen3-0.6B","description":"Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities."},
100
- "Qwen3-1.7B": {"repo_id":"Qwen/Qwen3-1.7B","description":"Dense causal language model with 1.7 B total parameters (1.4 B non-embedding), 28 layers, 16 query heads & 8 KV heads, 32 768-token context, stronger reasoning vs. 0.6 B variant, dual-mode inference, instruction following across 100+ languages."},
101
  "Qwen3-4B": {"repo_id":"Qwen/Qwen3-4B","description":"Dense causal language model with 4.0 B total parameters (3.6 B non-embedding), 36 layers, 32 query heads & 8 KV heads, native 32 768-token context (extendable to 131 072 via YaRN), balanced mid-range capacity & long-context reasoning."},
102
- "Qwen3-8B": {"repo_id":"Qwen/Qwen3-8B","description":"Dense causal language model with 8.2 B total parameters (6.95 B non-embedding), 36 layers, 32 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), excels at multilingual instruction following & zero-shot tasks."},
103
- "Qwen3-14B": {"repo_id":"Qwen/Qwen3-14B","description":"Dense causal language model with 14.8 B total parameters (13.2 B non-embedding), 40 layers, 40 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), enhanced human preference alignment & advanced agent integration."},
104
- # "Qwen3-32B": {"repo_id":"Qwen/Qwen3-32B","description":"Dense causal language model with 32.8 B total parameters (31.2 B non-embedding), 64 layers, 64 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), flagship variant delivering state-of-the-art reasoning & instruction following."},
105
- # "Qwen3-30B-A3B": {"repo_id":"Qwen/Qwen3-30B-A3B","description":"Mixture-of-Experts model with 30.5 B total parameters (29.9 B non-embedding, 3.3 B activated per token), 48 layers, 128 experts (8 activated per token), 32 query heads & 4 KV heads, 32 768-token context (131 072 via YaRN), MoE routing for scalable specialized reasoning."},
106
- # "Qwen3-235B-A22B":{"repo_id":"Qwen/Qwen3-235B-A22B","description":"Mixture-of-Experts model with 235 B total parameters (234 B non-embedding, 22 B activated per token), 94 layers, 128 experts (8 activated per token), 64 query heads & 4 KV heads, 32 768-token context (131 072 via YaRN), ultra-scale reasoning & agentic workflows."},
107
  "Gemma-3-4B-IT": {"repo_id": "unsloth/gemma-3-4b-it", "description": "Gemma-3-4B-IT"},
108
- "SmolLM2_135M_Grpo_Gsm8k":{"repo_id":"prithivMLmods/SmolLM2_135M_Grpo_Gsm8k", "desscription":"SmolLM2_135M_Grpo_Gsm8k"},
109
- "SmolLM2-135M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat", "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat"},
110
- "SmolLM2-135M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct", "description": "Original SmolLM2‑135M Instruct"},
111
- "SmolLM2-360M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat", "description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat"},
112
- "SmolLM2-360M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct", "description": "Original SmolLM2‑360M Instruct"},
113
- "Llama-3.2-Taiwan-3B-Instruct": {"repo_id": "lianghsun/Llama-3.2-Taiwan-3B-Instruct", "description": "Llama-3.2-Taiwan-3B-Instruct"},
114
  "MiniCPM3-4B": {"repo_id": "openbmb/MiniCPM3-4B", "description": "MiniCPM3-4B"},
115
- "Qwen2.5-3B-Instruct": {"repo_id": "Qwen/Qwen2.5-3B-Instruct", "description": "Qwen2.5-3B-Instruct"},
 
116
  "Qwen2.5-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-7B-Instruct", "description": "Qwen2.5-7B-Instruct"},
117
- "Phi-4-mini-Reasoning": {"repo_id": "microsoft/Phi-4-mini-reasoning", "description": "Phi-4-mini-Reasoning"},
118
- # "Phi-4-Reasoning": {"repo_id": "microsoft/Phi-4-reasoning", "description": "Phi-4-Reasoning"},
119
- "Phi-4-mini-Instruct": {"repo_id": "microsoft/Phi-4-mini-instruct", "description": "Phi-4-mini-Instruct"},
120
- "Meta-Llama-3.1-8B-Instruct": {"repo_id": "MaziyarPanahi/Meta-Llama-3.1-8B-Instruct", "description": "Meta-Llama-3.1-8B-Instruct"},
121
- "DeepSeek-R1-Distill-Llama-8B": {"repo_id": "unsloth/DeepSeek-R1-Distill-Llama-8B", "description": "DeepSeek-R1-Distill-Llama-8B"},
122
- "Mistral-7B-Instruct-v0.3": {"repo_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3", "description": "Mistral-7B-Instruct-v0.3"},
123
  "Qwen2.5-Coder-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Qwen2.5-Coder-7B-Instruct"},
124
- "Qwen2.5-Omni-3B": {"repo_id": "Qwen/Qwen2.5-Omni-3B", "description": "Qwen2.5-Omni-3B"},
125
  "MiMo-7B-RL": {"repo_id": "XiaomiMiMo/MiMo-7B-RL", "description": "MiMo-7B-RL"},
 
 
 
 
 
 
 
 
 
126
 
 
 
 
127
  }
128
 
129
  # Global cache for pipelines to avoid re-loading.
 
26
  # Torch-Compatible Model Definitions with Adjusted Descriptions
27
  # ------------------------------
28
  MODELS = {
29
+ # Models with ~135M parameters
 
30
  "SmolLM2-135M-multilingual-base": {"repo_id": "agentlans/SmolLM2-135M-multilingual-base", "description": "SmolLM2-135M-multilingual-base"},
31
+ "SmolLM-135M-Taiwan-Instruct-v1.0": {
32
+ "repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0",
33
+ "description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks"
34
+ },
35
+ "SmolLM2_135M_Grpo_Gsm8k":{"repo_id":"prithivMLmods/SmolLM2_135M_Grpo_Gsm8k", "description":"SmolLM2_135M_Grpo_Gsm8k"},
36
+ "SmolLM2-135M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct", "description": "Original SmolLM2‑135M Instruct"},
37
+ "SmolLM2-135M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat", "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat"},
38
+
39
+ # Models with ~270M parameters
40
  "parser_model_ner_gemma_v0.1": {
41
  "repo_id": "myfi/parser_model_ner_gemma_v0.1",
42
  "description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s **Gemma‑3‑270M** model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments."
 
46
  "description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset"
47
  },
48
  "gemma-3-270m-it":{
49
+ "repo_id":"google/gemma-3-270m-it",
50
+ "description":"Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.",
 
 
 
 
51
  },
52
+ "Taiwan-ELM-270M-Instruct": {"repo_id": "liswei/Taiwan-ELM-270M-Instruct", "description": "Taiwan-ELM-270M-Instruct"},
53
+
54
+ # Models with 350M-700M parameters
55
+ "LFM2-350M": {
56
+ "repo_id": "LiquidAI/LFM2-350M",
57
+ "description": "A compact 350M parameter hybrid model optimized for edge and on-device applications, offering significantly faster training and inference speeds compared to models like Qwen3."
58
  },
59
+ "SmolLM2-360M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat", "description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat"},
60
+ "SmolLM2-360M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct", "description": "Original SmolLM2‑360M Instruct"},
61
  "Qwen2.5-0.5B-Taiwan-Instruct": {
62
  "repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct",
63
  "description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned"
 
66
  "repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan",
67
  "description": "Qwen3-Taiwan model with 0.6 B parameters"
68
  },
69
+ "Qwen3-0.6B": {"repo_id":"Qwen/Qwen3-0.6B","description":"Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities."},
70
+ "LFM2-700M": {
71
+ "repo_id": "LiquidAI/LFM2-700M",
72
+ "description": "A 700M parameter model from the LFM2 family, designed for high efficiency on edge devices with a hybrid architecture of multiplicative gates and short convolutions."
73
  },
74
+
75
+ # Models with 1B-2B parameters
76
  "Llama-3.2-Taiwan-1B": {
77
  "repo_id": "lianghsun/Llama-3.2-Taiwan-1B",
78
  "description":"Llama-3.2-Taiwan base model with 1 B parameters"
79
  },
80
+ "Taiwan-ELM-1_1B-Instruct": {"repo_id": "liswei/Taiwan-ELM-1_1B-Instruct", "description": "Taiwan-ELM-1_1B-Instruct"},
81
+ "LFM2-1.2B": {
82
+ "repo_id": "LiquidAI/LFM2-1.2B",
83
+ "description": "A 1.2B parameter hybrid language model from Liquid AI, designed for efficient on-device and edge AI deployment, outperforming larger models like Llama-2-7b-hf in specific tasks."
84
+ },
85
+ "Qwen2.5-Taiwan-1.5B-Instruct": {"repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct", "description": "Qwen2.5-Taiwan-1.5B-Instruct"},
86
+ "Falcon-H1-1.5B-Instruct": {
87
+ "repo_id": "tiiuae/Falcon-H1-1.5B-Instruct",
88
+ "description":"Falcon‑H1 model with 1.5 B parameters, instruction‑tuned"
89
+ },
90
+ "Nemotron-Research-Reasoning-Qwen-1.5B": {"repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B", "description": "Nemotron-Research-Reasoning-Qwen-1.5B"},
91
+ "Qwen3-1.7B": {"repo_id":"Qwen/Qwen3-1.7B","description":"Dense causal language model with 1.7 B total parameters (1.4 B non-embedding), 28 layers, 16 query heads & 8 KV heads, 32 768-token context, stronger reasoning vs. 0.6 B variant, dual-mode inference, instruction following across 100+ languages."},
92
  "Gemma-3n-E2B": {
93
  "repo_id": "google/gemma-3n-E2B",
94
  "description":"Gemma 3n base model with effective 2 B parameters (≈2 GB VRAM)"
95
  },
96
+
97
+ # Models with 2.6B-4B parameters
98
+ "LFM2-2.6B": {
99
+ "repo_id": "LiquidAI/LFM2-2.6B",
100
+ "description": "The 2.6B parameter model in the LFM2 series, it outperforms models in the 3B+ class and features a hybrid architecture for faster inference."
101
+ },
102
+ "Qwen2.5-Taiwan-3B-Reason-GRPO": {
103
+ "repo_id": "benchang1110/Qwen2.5-Taiwan-3B-Reason-GRPO",
104
+ "description":"Qwen2.5-Taiwan model with 3 B parameters, Reason-GRPO fine-tuned"
105
+ },
106
+ "Llama-3.2-Taiwan-3B-Instruct": {"repo_id": "lianghsun/Llama-3.2-Taiwan-3B-Instruct", "description": "Llama-3.2-Taiwan-3B-Instruct"},
107
+ "Qwen2.5-3B-Instruct": {"repo_id": "Qwen/Qwen2.5-3B-Instruct", "description": "Qwen2.5-3B-Instruct"},
108
+ "Qwen2.5-Omni-3B": {"repo_id": "Qwen/Qwen2.5-Omni-3B", "description": "Qwen2.5-Omni-3B"},
109
+ "Phi-4-mini-Reasoning": {"repo_id": "microsoft/Phi-4-mini-reasoning", "description": "Phi-4-mini-Reasoning (4.3B parameters)"},
110
+ "Phi-4-mini-Instruct": {"repo_id": "microsoft/Phi-4-mini-instruct", "description": "Phi-4-mini-Instruct (4.3B parameters)"},
111
  "Gemma-3n-E4B": {
112
  "repo_id": "google/gemma-3n-E4B",
113
  "description":"Gemma 3n base model with effective 4 B parameters (≈3 GB VRAM)"
114
  },
 
 
115
  "SmallThinker-4BA0.6B-Instruct": {
116
  "repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
117
  "description":"SmallThinker 4 B backbone with 0.6 B activated parameters, instruction‑tuned"
118
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  "Qwen3-4B": {"repo_id":"Qwen/Qwen3-4B","description":"Dense causal language model with 4.0 B total parameters (3.6 B non-embedding), 36 layers, 32 query heads & 8 KV heads, native 32 768-token context (extendable to 131 072 via YaRN), balanced mid-range capacity & long-context reasoning."},
 
 
 
 
 
120
  "Gemma-3-4B-IT": {"repo_id": "unsloth/gemma-3-4b-it", "description": "Gemma-3-4B-IT"},
 
 
 
 
 
 
121
  "MiniCPM3-4B": {"repo_id": "openbmb/MiniCPM3-4B", "description": "MiniCPM3-4B"},
122
+
123
+ # Models with 7B-8.3B parameters
124
  "Qwen2.5-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-7B-Instruct", "description": "Qwen2.5-7B-Instruct"},
 
 
 
 
 
 
125
  "Qwen2.5-Coder-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Qwen2.5-Coder-7B-Instruct"},
 
126
  "MiMo-7B-RL": {"repo_id": "XiaomiMiMo/MiMo-7B-RL", "description": "MiMo-7B-RL"},
127
+ "Mistral-7B-Instruct-v0.3": {"repo_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3", "description": "Mistral-7B-Instruct-v0.3"},
128
+ "DeepSeek-R1-0528-Qwen3-8B": {"repo_id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "DeepSeek-R1-0528-Qwen3-8B"},
129
+ "Meta-Llama-3.1-8B-Instruct": {"repo_id": "MaziyarPanahi/Meta-Llama-3.1-8B-Instruct", "description": "Meta-Llama-3.1-8B-Instruct"},
130
+ "DeepSeek-R1-Distill-Llama-8B": {"repo_id": "unsloth/DeepSeek-R1-Distill-Llama-8B", "description": "DeepSeek-R1-Distill-Llama-8B"},
131
+ "Qwen3-8B": {"repo_id":"Qwen/Qwen3-8B","description":"Dense causal language model with 8.2 B total parameters (6.95 B non-embedding), 36 layers, 32 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), excels at multilingual instruction following & zero-shot tasks."},
132
+ "LFM2-8B-A1B": {
133
+ "repo_id": "LiquidAI/LFM2-8B-A1B",
134
+ "description": "A Mixture-of-Experts (MoE) model with 8.3B total parameters (1.5B active) designed for on-device use, providing the quality of larger models with the speed of a 1.5B-class model."
135
+ },
136
 
137
+ # Models with 14B+ parameters
138
+ "Qwen/Qwen3-14B-FP8": {"repo_id": "Qwen/Qwen3-14B-FP8", "description": "Qwen/Qwen3-14B-FP8"},
139
+ "Qwen3-14B": {"repo_id":"Qwen/Qwen3-14B","description":"Dense causal language model with 14.8 B total parameters (13.2 B non-embedding), 40 layers, 40 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), enhanced human preference alignment & advanced agent integration."},
140
  }
141
 
142
  # Global cache for pipelines to avoid re-loading.