Spaces:
Running
on
Zero
Running
on
Zero
Upload llmdolphin.py
Browse files- llmdolphin.py +16 -4
llmdolphin.py
CHANGED
|
@@ -28,11 +28,21 @@ llm_models = {
|
|
| 28 |
"Nemo-12B-Marlin-v7.Q4_K_M.gguf": ["mradermacher/Nemo-12B-Marlin-v7-GGUF", MessagesFormatterType.MISTRAL],
|
| 29 |
"NemoDori-v0.2-Upscaled.1-14B.Q4_K_M.gguf": ["mradermacher/NemoDori-v0.2-Upscaled.1-14B-GGUF", MessagesFormatterType.MISTRAL],
|
| 30 |
"Fireball-Mistral-Nemo-Base-2407-sft-v2.2a.Q4_K_M.gguf": ["mradermacher/Fireball-Mistral-Nemo-Base-2407-sft-v2.2a-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
|
| 31 |
"Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"storiecreative-q5_k_m.gguf": ["ClaudioItaly/StorieCreative-Q5_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
| 33 |
"L3.1-gramamax.Q5_K_M.gguf": ["mradermacher/L3.1-gramamax-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
|
| 34 |
"sellen-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Sellen-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
| 35 |
"nokstella_coder-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Nokstella_coder-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.LLAMA_3],
|
|
|
|
| 36 |
"L3.1-Sithamo-v0.4-8B.q5_k_m.gguf": ["kromquant/L3.1-Siithamo-v0.4-8B-GGUFs", MessagesFormatterType.MISTRAL],
|
| 37 |
"Berry-Spark-7B-Fix.Q5_K_M.gguf": ["mradermacher/Berry-Spark-7B-Fix-GGUF", MessagesFormatterType.OPEN_CHAT],
|
| 38 |
"llama3.1-gutenberg-8B.Q4_K_S.gguf": ["mradermacher/llama3.1-gutenberg-8B-GGUF", MessagesFormatterType.LLAMA_3],
|
|
@@ -682,9 +692,10 @@ def dolphin_respond(
|
|
| 682 |
llm = Llama(
|
| 683 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
| 684 |
flash_attn=True,
|
| 685 |
-
n_gpu_layers=81
|
| 686 |
n_batch=1024,
|
| 687 |
-
n_ctx=8192
|
|
|
|
| 688 |
)
|
| 689 |
provider = LlamaCppPythonProvider(llm)
|
| 690 |
|
|
@@ -776,9 +787,10 @@ def dolphin_respond_auto(
|
|
| 776 |
llm = Llama(
|
| 777 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
| 778 |
flash_attn=True,
|
| 779 |
-
n_gpu_layers=81
|
| 780 |
n_batch=1024,
|
| 781 |
-
n_ctx=8192
|
|
|
|
| 782 |
)
|
| 783 |
provider = LlamaCppPythonProvider(llm)
|
| 784 |
|
|
|
|
| 28 |
"Nemo-12B-Marlin-v7.Q4_K_M.gguf": ["mradermacher/Nemo-12B-Marlin-v7-GGUF", MessagesFormatterType.MISTRAL],
|
| 29 |
"NemoDori-v0.2-Upscaled.1-14B.Q4_K_M.gguf": ["mradermacher/NemoDori-v0.2-Upscaled.1-14B-GGUF", MessagesFormatterType.MISTRAL],
|
| 30 |
"Fireball-Mistral-Nemo-Base-2407-sft-v2.2a.Q4_K_M.gguf": ["mradermacher/Fireball-Mistral-Nemo-Base-2407-sft-v2.2a-GGUF", MessagesFormatterType.MISTRAL],
|
| 31 |
+
"Deutscher-Pantheon-12B.Q4_K_M.gguf": ["mradermacher/Deutscher-Pantheon-12B-GGUF", MessagesFormatterType.MISTRAL],
|
| 32 |
"Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
|
| 33 |
+
"NemoMix-Unleashed-12B-Q4_K_M.gguf": ["bartowski/NemoMix-Unleashed-12B-GGUF", MessagesFormatterType.MISTRAL],
|
| 34 |
+
"IceTea21EnergyDrinkRPV13.Q4_K_S.gguf": ["mradermacher/IceTea21EnergyDrinkRPV13-GGUF", MessagesFormatterType.MISTRAL],
|
| 35 |
+
"MegaBeam-Mistral-7B-512k-Q5_K_M.gguf": ["bartowski/MegaBeam-Mistral-7B-512k-GGUF", MessagesFormatterType.MISTRAL],
|
| 36 |
+
"Chronos-Gold-12B-1.0-Q4_K_M.gguf": ["bartowski/Chronos-Gold-12B-1.0-GGUF", MessagesFormatterType.MISTRAL],
|
| 37 |
+
"L3.1-Romes-Ninomos-Maxxing.Q5_K_M.gguf": ["mradermacher/L3.1-Romes-Ninomos-Maxxing-GGUF", MessagesFormatterType.LLAMA_3],
|
| 38 |
+
"mistral-nemo-minitron-8b-base-q4_k_m.gguf": ["Daemontatox/Mistral-NeMo-Minitron-8B-Base-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
| 39 |
+
"Nokstella_coder-8B-model_stock.i1-Q4_K_S.gguf": ["mradermacher/Nokstella_coder-8B-model_stock-i1-GGUF", MessagesFormatterType.LLAMA_3],
|
| 40 |
"storiecreative-q5_k_m.gguf": ["ClaudioItaly/StorieCreative-Q5_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
| 41 |
"L3.1-gramamax.Q5_K_M.gguf": ["mradermacher/L3.1-gramamax-GGUF", MessagesFormatterType.MISTRAL],
|
| 42 |
+
"Evolutionstory128.Q5_K_M.gguf": ["mradermacher/Evolutionstory128-GGUF", MessagesFormatterType.CHATML],
|
| 43 |
"sellen-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Sellen-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
| 44 |
"nokstella_coder-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Nokstella_coder-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.LLAMA_3],
|
| 45 |
+
"Ultra-Instruct-12B-Q4_K_M.gguf": ["bartowski/Ultra-Instruct-12B-GGUF", MessagesFormatterType.MISTRAL],
|
| 46 |
"L3.1-Sithamo-v0.4-8B.q5_k_m.gguf": ["kromquant/L3.1-Siithamo-v0.4-8B-GGUFs", MessagesFormatterType.MISTRAL],
|
| 47 |
"Berry-Spark-7B-Fix.Q5_K_M.gguf": ["mradermacher/Berry-Spark-7B-Fix-GGUF", MessagesFormatterType.OPEN_CHAT],
|
| 48 |
"llama3.1-gutenberg-8B.Q4_K_S.gguf": ["mradermacher/llama3.1-gutenberg-8B-GGUF", MessagesFormatterType.LLAMA_3],
|
|
|
|
| 692 |
llm = Llama(
|
| 693 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
| 694 |
flash_attn=True,
|
| 695 |
+
n_gpu_layers=35, # 81
|
| 696 |
n_batch=1024,
|
| 697 |
+
n_ctx=4096, #8192
|
| 698 |
+
n_threads=8,
|
| 699 |
)
|
| 700 |
provider = LlamaCppPythonProvider(llm)
|
| 701 |
|
|
|
|
| 787 |
llm = Llama(
|
| 788 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
| 789 |
flash_attn=True,
|
| 790 |
+
n_gpu_layers=35, # 81
|
| 791 |
n_batch=1024,
|
| 792 |
+
n_ctx=4096, #8192
|
| 793 |
+
n_threads=8,
|
| 794 |
)
|
| 795 |
provider = LlamaCppPythonProvider(llm)
|
| 796 |
|