Luigi commited on
Commit
3c22497
·
verified ·
1 Parent(s): 8eefe94

feat(models): Added three new models

Browse files

ServiceNow-AI/Apriel-1.5-15b-Thinker (15B, multimodal reasoning)
ServiceNow-AI/Apriel-5B-Instruct (5B, instruction-tuned)
ai21labs/AI21-Jamba-Reasoning-3B (3B, hybrid Transformer–Mamba with 256K context)
Reordered entire MODELS dictionary from largest (~15B) to smallest (~135M)
Grouped models into logical size-based sections with updated descriptions using official Hugging Face metadata
Enhanced model descriptions with key capabilities, benchmarks, and usage context from source model cards

Files changed (1) hide show
  1. app.py +160 -88
app.py CHANGED
@@ -26,117 +26,189 @@ cancel_event = threading.Event()
26
  # Torch-Compatible Model Definitions with Adjusted Descriptions
27
  # ------------------------------
28
  MODELS = {
29
- # Models with ~135M parameters
30
- "SmolLM2-135M-multilingual-base": {"repo_id": "agentlans/SmolLM2-135M-multilingual-base", "description": "SmolLM2-135M-multilingual-base"},
31
- "SmolLM-135M-Taiwan-Instruct-v1.0": {
32
- "repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0",
33
- "description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks"
 
 
 
 
 
 
 
34
  },
35
- "SmolLM2_135M_Grpo_Gsm8k":{"repo_id":"prithivMLmods/SmolLM2_135M_Grpo_Gsm8k", "description":"SmolLM2_135M_Grpo_Gsm8k"},
36
- "SmolLM2-135M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct", "description": "Original SmolLM2‑135M Instruct"},
37
- "SmolLM2-135M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat", "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat"},
38
 
39
- # Models with ~270M parameters
40
- "parser_model_ner_gemma_v0.1": {
41
- "repo_id": "myfi/parser_model_ner_gemma_v0.1",
42
- "description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s **Gemma‑3‑270M** model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments."
43
  },
44
- "Gemma-3-Taiwan-270M-it":{
45
- "repo_id":"lianghsun/Gemma-3-Taiwan-270M-it",
46
- "description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  },
48
- "gemma-3-270m-it":{
49
- "repo_id":"google/gemma-3-270m-it",
50
- "description":"Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.",
 
 
 
 
51
  },
52
- "Taiwan-ELM-270M-Instruct": {"repo_id": "liswei/Taiwan-ELM-270M-Instruct", "description": "Taiwan-ELM-270M-Instruct"},
53
 
54
- # Models with 350M-700M parameters
55
- "LFM2-350M": {
56
- "repo_id": "LiquidAI/LFM2-350M",
57
- "description": "A compact 350M parameter hybrid model optimized for edge and on-device applications, offering significantly faster training and inference speeds compared to models like Qwen3."
58
  },
59
- "SmolLM2-360M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat", "description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat"},
60
- "SmolLM2-360M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct", "description": "Original SmolLM2‑360M Instruct"},
61
- "Qwen2.5-0.5B-Taiwan-Instruct": {
62
- "repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct",
63
- "description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned"
64
  },
65
- "Qwen3-0.6B-Taiwan": {
66
- "repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan",
67
- "description": "Qwen3-Taiwan model with 0.6 B parameters"
68
  },
69
- "Qwen3-0.6B": {"repo_id":"Qwen/Qwen3-0.6B","description":"Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities."},
70
- "LFM2-700M": {
71
- "repo_id": "LiquidAI/LFM2-700M",
72
- "description": "A 700M parameter model from the LFM2 family, designed for high efficiency on edge devices with a hybrid architecture of multiplicative gates and short convolutions."
 
 
 
73
  },
74
 
75
- # Models with 1B-2B parameters
76
- "Llama-3.2-Taiwan-1B": {
77
- "repo_id": "lianghsun/Llama-3.2-Taiwan-1B",
78
- "description":"Llama-3.2-Taiwan base model with 1 B parameters"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  },
80
- "Taiwan-ELM-1_1B-Instruct": {"repo_id": "liswei/Taiwan-ELM-1_1B-Instruct", "description": "Taiwan-ELM-1_1B-Instruct"},
81
  "LFM2-1.2B": {
82
  "repo_id": "LiquidAI/LFM2-1.2B",
83
  "description": "A 1.2B parameter hybrid language model from Liquid AI, designed for efficient on-device and edge AI deployment, outperforming larger models like Llama-2-7b-hf in specific tasks."
84
  },
85
- "Qwen2.5-Taiwan-1.5B-Instruct": {"repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct", "description": "Qwen2.5-Taiwan-1.5B-Instruct"},
86
- "Falcon-H1-1.5B-Instruct": {
87
- "repo_id": "tiiuae/Falcon-H1-1.5B-Instruct",
88
- "description":"Falcon‑H1 model with 1.5 B parameters, instruction‑tuned"
89
  },
90
- "Nemotron-Research-Reasoning-Qwen-1.5B": {"repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B", "description": "Nemotron-Research-Reasoning-Qwen-1.5B"},
91
- "Qwen3-1.7B": {"repo_id":"Qwen/Qwen3-1.7B","description":"Dense causal language model with 1.7 B total parameters (1.4 B non-embedding), 28 layers, 16 query heads & 8 KV heads, 32 768-token context, stronger reasoning vs. 0.6 B variant, dual-mode inference, instruction following across 100+ languages."},
92
- "Gemma-3n-E2B": {
93
- "repo_id": "google/gemma-3n-E2B",
94
- "description":"Gemma 3n base model with effective 2 B parameters (≈2 GB VRAM)"
95
  },
96
 
97
- # Models with 2.6B-4B parameters
98
- "LFM2-2.6B": {
99
- "repo_id": "LiquidAI/LFM2-2.6B",
100
- "description": "The 2.6B parameter model in the LFM2 series, it outperforms models in the 3B+ class and features a hybrid architecture for faster inference."
101
  },
102
- "Qwen2.5-Taiwan-3B-Reason-GRPO": {
103
- "repo_id": "benchang1110/Qwen2.5-Taiwan-3B-Reason-GRPO",
104
- "description":"Qwen2.5-Taiwan model with 3 B parameters, Reason-GRPO fine-tuned"
105
- },
106
- "Llama-3.2-Taiwan-3B-Instruct": {"repo_id": "lianghsun/Llama-3.2-Taiwan-3B-Instruct", "description": "Llama-3.2-Taiwan-3B-Instruct"},
107
- "Qwen2.5-3B-Instruct": {"repo_id": "Qwen/Qwen2.5-3B-Instruct", "description": "Qwen2.5-3B-Instruct"},
108
- "Qwen2.5-Omni-3B": {"repo_id": "Qwen/Qwen2.5-Omni-3B", "description": "Qwen2.5-Omni-3B"},
109
- "Phi-4-mini-Reasoning": {"repo_id": "microsoft/Phi-4-mini-reasoning", "description": "Phi-4-mini-Reasoning (4.3B parameters)"},
110
- "Phi-4-mini-Instruct": {"repo_id": "microsoft/Phi-4-mini-instruct", "description": "Phi-4-mini-Instruct (4.3B parameters)"},
111
- "Gemma-3n-E4B": {
112
- "repo_id": "google/gemma-3n-E4B",
113
- "description":"Gemma 3n base model with effective 4 B parameters (≈3 GB VRAM)"
114
  },
115
- "SmallThinker-4BA0.6B-Instruct": {
116
- "repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
117
- "description":"SmallThinker 4 B backbone with 0.6B activated parameters, instruction‑tuned"
118
- },
119
- "Qwen3-4B": {"repo_id":"Qwen/Qwen3-4B","description":"Dense causal language model with 4.0 B total parameters (3.6 B non-embedding), 36 layers, 32 query heads & 8 KV heads, native 32 768-token context (extendable to 131 072 via YaRN), balanced mid-range capacity & long-context reasoning."},
120
- "Gemma-3-4B-IT": {"repo_id": "unsloth/gemma-3-4b-it", "description": "Gemma-3-4B-IT"},
121
- "MiniCPM3-4B": {"repo_id": "openbmb/MiniCPM3-4B", "description": "MiniCPM3-4B"},
122
-
123
- # Models with 7B-8.3B parameters
124
- "Qwen2.5-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-7B-Instruct", "description": "Qwen2.5-7B-Instruct"},
125
- "Qwen2.5-Coder-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Qwen2.5-Coder-7B-Instruct"},
126
- "MiMo-7B-RL": {"repo_id": "XiaomiMiMo/MiMo-7B-RL", "description": "MiMo-7B-RL"},
127
- "Mistral-7B-Instruct-v0.3": {"repo_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3", "description": "Mistral-7B-Instruct-v0.3"},
128
- "DeepSeek-R1-0528-Qwen3-8B": {"repo_id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "DeepSeek-R1-0528-Qwen3-8B"},
129
- "Meta-Llama-3.1-8B-Instruct": {"repo_id": "MaziyarPanahi/Meta-Llama-3.1-8B-Instruct", "description": "Meta-Llama-3.1-8B-Instruct"},
130
- "DeepSeek-R1-Distill-Llama-8B": {"repo_id": "unsloth/DeepSeek-R1-Distill-Llama-8B", "description": "DeepSeek-R1-Distill-Llama-8B"},
131
- "Qwen3-8B": {"repo_id":"Qwen/Qwen3-8B","description":"Dense causal language model with 8.2 B total parameters (6.95 B non-embedding), 36 layers, 32 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), excels at multilingual instruction following & zero-shot tasks."},
132
- "LFM2-8B-A1B": {
133
- "repo_id": "LiquidAI/LFM2-8B-A1B",
134
- "description": "A Mixture-of-Experts (MoE) model with 8.3B total parameters (1.5B active) designed for on-device use, providing the quality of larger models with the speed of a 1.5B-class model."
135
  },
136
 
137
- # Models with 14B+ parameters
138
- "Qwen/Qwen3-14B-FP8": {"repo_id": "Qwen/Qwen3-14B-FP8", "description": "Qwen/Qwen3-14B-FP8"},
139
- "Qwen3-14B": {"repo_id":"Qwen/Qwen3-14B","description":"Dense causal language model with 14.8 B total parameters (13.2 B non-embedding), 40 layers, 40 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), enhanced human preference alignment & advanced agent integration."},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  }
141
 
142
  # Global cache for pipelines to avoid re-loading.
 
26
  # Torch-Compatible Model Definitions with Adjusted Descriptions
27
  # ------------------------------
28
  MODELS = {
29
+ # Models with 14B+ parameters
30
+ "Apriel-1.5-15b-Thinker": {
31
+ "repo_id": "ServiceNow-AI/Apriel-1.5-15b-Thinker",
32
+ "description": "A 15B multimodal reasoning model from ServiceNow’s Apriel series. Achieves SOTA performance on text and image reasoning (52 on Artificial Analysis index, 68 on Tau2 Bench Telecom, 62 on IFBench) despite undergoing only text SFT—no image fine-tuning. Fits on a single GPU and competes with models 10× its size like Deepseek R1 and Gemini-Flash."
33
+ },
34
+ "Qwen3-14B": {
35
+ "repo_id": "Qwen/Qwen3-14B",
36
+ "description": "Dense causal language model with 14.8 B total parameters (13.2 B non-embedding), 40 layers, 40 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), enhanced human preference alignment & advanced agent integration."
37
+ },
38
+ "Qwen/Qwen3-14B-FP8": {
39
+ "repo_id": "Qwen/Qwen3-14B-FP8",
40
+ "description": "FP8-quantized version of Qwen3-14B for efficient inference."
41
  },
 
 
 
42
 
43
+ # Models with ~5B parameters
44
+ "Apriel-5B-Instruct": {
45
+ "repo_id": "ServiceNow-AI/Apriel-5B-Instruct",
46
+ "description": "A 5B-parameter instruction-tuned model from ServiceNow’s Apriel series, optimized for enterprise tasks and general-purpose instruction following."
47
  },
48
+
49
+ # Models with 4B–4.3B parameters
50
+ "Qwen3-4B": {
51
+ "repo_id": "Qwen/Qwen3-4B",
52
+ "description": "Dense causal language model with 4.0 B total parameters (3.6 B non-embedding), 36 layers, 32 query heads & 8 KV heads, native 32 768-token context (extendable to 131 072 via YaRN), balanced mid-range capacity & long-context reasoning."
53
+ },
54
+ "Gemma-3-4B-IT": {
55
+ "repo_id": "unsloth/gemma-3-4b-it",
56
+ "description": "Gemma-3-4B-IT"
57
+ },
58
+ "MiniCPM3-4B": {
59
+ "repo_id": "openbmb/MiniCPM3-4B",
60
+ "description": "MiniCPM3-4B"
61
+ },
62
+ "Gemma-3n-E4B": {
63
+ "repo_id": "google/gemma-3n-E4B",
64
+ "description": "Gemma 3n base model with effective 4 B parameters (≈3 GB VRAM)"
65
+ },
66
+ "Phi-4-mini-Reasoning": {
67
+ "repo_id": "microsoft/Phi-4-mini-reasoning",
68
+ "description": "Phi-4-mini-Reasoning (4.3B parameters)"
69
  },
70
+ "Phi-4-mini-Instruct": {
71
+ "repo_id": "microsoft/Phi-4-mini-instruct",
72
+ "description": "Phi-4-mini-Instruct (4.3B parameters)"
73
+ },
74
+ "SmallThinker-4BA0.6B-Instruct": {
75
+ "repo_id": "PowerInfer/SmallThinker-4BA0.6B-Instruct",
76
+ "description": "SmallThinker 4 B backbone with 0.6 B activated parameters, instruction‑tuned"
77
  },
 
78
 
79
+ # Models with ~3B parameters
80
+ "AI21-Jamba-Reasoning-3B": {
81
+ "repo_id": "ai21labs/AI21-Jamba-Reasoning-3B",
82
+ "description": "A compact 3B hybrid Transformer–Mamba reasoning model with 256K context length, strong intelligence benchmark scores (61% MMLU-Pro, 52% IFBench), and efficient inference suitable for edge and datacenter use. Outperforms Gemma-3 4B and Llama-3.2 3B despite smaller size."
83
  },
84
+ "Qwen2.5-Taiwan-3B-Reason-GRPO": {
85
+ "repo_id": "benchang1110/Qwen2.5-Taiwan-3B-Reason-GRPO",
86
+ "description": "Qwen2.5-Taiwan model with 3 B parameters, Reason-GRPO fine-tuned"
 
 
87
  },
88
+ "Llama-3.2-Taiwan-3B-Instruct": {
89
+ "repo_id": "lianghsun/Llama-3.2-Taiwan-3B-Instruct",
90
+ "description": "Llama-3.2-Taiwan-3B-Instruct"
91
  },
92
+ "Qwen2.5-3B-Instruct": {
93
+ "repo_id": "Qwen/Qwen2.5-3B-Instruct",
94
+ "description": "Qwen2.5-3B-Instruct"
95
+ },
96
+ "Qwen2.5-Omni-3B": {
97
+ "repo_id": "Qwen/Qwen2.5-Omni-3B",
98
+ "description": "Qwen2.5-Omni-3B"
99
  },
100
 
101
+ # Models with 2.6B parameters
102
+ "LFM2-2.6B": {
103
+ "repo_id": "LiquidAI/LFM2-2.6B",
104
+ "description": "The 2.6B parameter model in the LFM2 series, it outperforms models in the 3B+ class and features a hybrid architecture for faster inference."
105
+ },
106
+
107
+ # Models with 1.7B–2B parameters
108
+ "Qwen3-1.7B": {
109
+ "repo_id": "Qwen/Qwen3-1.7B",
110
+ "description": "Dense causal language model with 1.7 B total parameters (1.4 B non-embedding), 28 layers, 16 query heads & 8 KV heads, 32 768-token context, stronger reasoning vs. 0.6 B variant, dual-mode inference, instruction following across 100+ languages."
111
+ },
112
+ "Gemma-3n-E2B": {
113
+ "repo_id": "google/gemma-3n-E2B",
114
+ "description": "Gemma 3n base model with effective 2 B parameters (≈2 GB VRAM)"
115
+ },
116
+
117
+ # Models with 1B–1.5B parameters
118
+ "Nemotron-Research-Reasoning-Qwen-1.5B": {
119
+ "repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B",
120
+ "description": "Nemotron-Research-Reasoning-Qwen-1.5B"
121
+ },
122
+ "Falcon-H1-1.5B-Instruct": {
123
+ "repo_id": "tiiuae/Falcon-H1-1.5B-Instruct",
124
+ "description": "Falcon‑H1 model with 1.5 B parameters, instruction‑tuned"
125
+ },
126
+ "Qwen2.5-Taiwan-1.5B-Instruct": {
127
+ "repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct",
128
+ "description": "Qwen2.5-Taiwan-1.5B-Instruct"
129
  },
 
130
  "LFM2-1.2B": {
131
  "repo_id": "LiquidAI/LFM2-1.2B",
132
  "description": "A 1.2B parameter hybrid language model from Liquid AI, designed for efficient on-device and edge AI deployment, outperforming larger models like Llama-2-7b-hf in specific tasks."
133
  },
134
+ "Taiwan-ELM-1_1B-Instruct": {
135
+ "repo_id": "liswei/Taiwan-ELM-1_1B-Instruct",
136
+ "description": "Taiwan-ELM-1_1B-Instruct"
 
137
  },
138
+ "Llama-3.2-Taiwan-1B": {
139
+ "repo_id": "lianghsun/Llama-3.2-Taiwan-1B",
140
+ "description": "Llama-3.2-Taiwan base model with 1 B parameters"
 
 
141
  },
142
 
143
+ # Models with 700M–360M parameters
144
+ "LFM2-700M": {
145
+ "repo_id": "LiquidAI/LFM2-700M",
146
+ "description": "A 700M parameter model from the LFM2 family, designed for high efficiency on edge devices with a hybrid architecture of multiplicative gates and short convolutions."
147
  },
148
+ "Qwen3-0.6B": {
149
+ "repo_id": "Qwen/Qwen3-0.6B",
150
+ "description": "Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities."
 
 
 
 
 
 
 
 
 
151
  },
152
+ "Qwen3-0.6B-Taiwan": {
153
+ "repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan",
154
+ "description": "Qwen3-Taiwan model with 0.6 B parameters"
155
+ },
156
+ "Qwen2.5-0.5B-Taiwan-Instruct": {
157
+ "repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct",
158
+ "description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned"
159
+ },
160
+ "SmolLM2-360M-Instruct": {
161
+ "repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct",
162
+ "description": "Original SmolLM2‑360M Instruct"
163
+ },
164
+ "SmolLM2-360M-Instruct-TaiwanChat": {
165
+ "repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat",
166
+ "description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat"
167
+ },
168
+ "LFM2-350M": {
169
+ "repo_id": "LiquidAI/LFM2-350M",
170
+ "description": "A compact 350M parameter hybrid model optimized for edge and on-device applications, offering significantly faster training and inference speeds compared to models like Qwen3."
 
171
  },
172
 
173
+ # Models with ~270M parameters
174
+ "parser_model_ner_gemma_v0.1": {
175
+ "repo_id": "myfi/parser_model_ner_gemma_v0.1",
176
+ "description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s **Gemma‑3‑270M** model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments."
177
+ },
178
+ "Gemma-3-Taiwan-270M-it": {
179
+ "repo_id": "lianghsun/Gemma-3-Taiwan-270M-it",
180
+ "description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset"
181
+ },
182
+ "gemma-3-270m-it": {
183
+ "repo_id": "google/gemma-3-270m-it",
184
+ "description": "Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.",
185
+ },
186
+ "Taiwan-ELM-270M-Instruct": {
187
+ "repo_id": "liswei/Taiwan-ELM-270M-Instruct",
188
+ "description": "Taiwan-ELM-270M-Instruct"
189
+ },
190
+
191
+ # Models with ~135M parameters
192
+ "SmolLM2-135M-multilingual-base": {
193
+ "repo_id": "agentlans/SmolLM2-135M-multilingual-base",
194
+ "description": "SmolLM2-135M-multilingual-base"
195
+ },
196
+ "SmolLM-135M-Taiwan-Instruct-v1.0": {
197
+ "repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0",
198
+ "description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks"
199
+ },
200
+ "SmolLM2_135M_Grpo_Gsm8k": {
201
+ "repo_id": "prithivMLmods/SmolLM2_135M_Grpo_Gsm8k",
202
+ "description": "SmolLM2_135M_Grpo_Gsm8k"
203
+ },
204
+ "SmolLM2-135M-Instruct": {
205
+ "repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct",
206
+ "description": "Original SmolLM2‑135M Instruct"
207
+ },
208
+ "SmolLM2-135M-Instruct-TaiwanChat": {
209
+ "repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat",
210
+ "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat"
211
+ },
212
  }
213
 
214
  # Global cache for pipelines to avoid re-loading.