Spaces:
Running
on
Zero
Running
on
Zero
Upload 3 files
Browse files- app.py +93 -45
- llmdolphin.py +83 -0
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -7,54 +7,90 @@ from llmdolphin import (get_llm_formats, get_dolphin_model_format,
|
|
| 7 |
get_dolphin_models, get_dolphin_model_info, select_dolphin_model,
|
| 8 |
select_dolphin_format, add_dolphin_models, get_dolphin_sysprompt,
|
| 9 |
get_dolphin_sysprompt_mode, select_dolphin_sysprompt, get_dolphin_languages,
|
| 10 |
-
select_dolphin_language, dolphin_respond, dolphin_parse)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
with gr.Blocks(theme='NoCrypt/miku@>=1.2.2', fill_width=True, css=
|
| 14 |
-
gr.
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
chatbot = gr.Chatbot(likeable=False, show_copy_button=True, show_share_button=False, layout="bubble", container=True)
|
| 20 |
-
with gr.Row():
|
| 21 |
-
chat_msg = gr.Textbox(show_label=False, placeholder="Input text in English, Japanese, or any other languages and press Enter or click Send.", scale=4)
|
| 22 |
-
chat_submit = gr.Button("Send", scale=1)
|
| 23 |
-
chat_clear = gr.Button("Clear", scale=1)
|
| 24 |
-
with gr.Accordion("Additional inputs", open=False):
|
| 25 |
-
chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0][1]), label="Message format")
|
| 26 |
-
chat_sysmsg = gr.Textbox(value=get_dolphin_sysprompt(), label="System message")
|
| 27 |
-
with gr.Row():
|
| 28 |
-
chat_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens")
|
| 29 |
-
chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
| 30 |
-
chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
|
| 31 |
-
chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
|
| 32 |
-
chat_rp = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
|
| 33 |
-
with gr.Accordion("Add models", open=False):
|
| 34 |
-
chat_add_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/mradermacher/MagnumChronos-i1-GGUF/blob/main/MagnumChronos.i1-Q4_K_M.gguf", lines=1)
|
| 35 |
-
chat_add_format = gr.Dropdown(choices=get_llm_formats(), value=get_llm_formats()[0], label="Message format")
|
| 36 |
-
chat_add_submit = gr.Button("Update lists of models")
|
| 37 |
-
with gr.Accordion("Modes", open=True):
|
| 38 |
-
chat_model = gr.Dropdown(choices=get_dolphin_models(), value=get_dolphin_models()[0][1], allow_custom_value=True, label="Model")
|
| 39 |
-
chat_model_info = gr.Markdown(value=get_dolphin_model_info(get_dolphin_models()[0][1]), label="Model info")
|
| 40 |
-
with gr.Row():
|
| 41 |
-
chat_mode = gr.Dropdown(choices=get_dolphin_sysprompt_mode(), value=get_dolphin_sysprompt_mode()[0], allow_custom_value=False, label="Mode")
|
| 42 |
-
chat_lang = gr.Dropdown(choices=get_dolphin_languages(), value="English", allow_custom_value=True, label="Output language")
|
| 43 |
-
with gr.Row():
|
| 44 |
-
with gr.Group():
|
| 45 |
-
output_text = gr.TextArea(label="Output tags", interactive=False, show_copy_button=True)
|
| 46 |
-
copy_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
|
| 47 |
with gr.Group():
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
gr.LoginButton()
|
| 59 |
gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")
|
| 60 |
|
|
@@ -91,6 +127,18 @@ with gr.Blocks(theme='NoCrypt/miku@>=1.2.2', fill_width=True, css="", delete_cac
|
|
| 91 |
|
| 92 |
generate_image_btn.click(generate_image, [output_text, dummy_np], [result_image], show_progress="full")
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
if __name__ == "__main__":
|
| 96 |
app.queue()
|
|
|
|
| 7 |
get_dolphin_models, get_dolphin_model_info, select_dolphin_model,
|
| 8 |
select_dolphin_format, add_dolphin_models, get_dolphin_sysprompt,
|
| 9 |
get_dolphin_sysprompt_mode, select_dolphin_sysprompt, get_dolphin_languages,
|
| 10 |
+
select_dolphin_language, dolphin_respond, dolphin_parse, respond_playground)
|
| 11 |
|
| 12 |
+
css = """
|
| 13 |
+
.title { text-align: center; }
|
| 14 |
+
"""
|
| 15 |
|
| 16 |
+
with gr.Blocks(theme='NoCrypt/miku@>=1.2.2', fill_width=True, css=css, delete_cache=(60, 3600)) as app:
|
| 17 |
+
with gr.Tab("Prompt Translator"):
|
| 18 |
+
with gr.Column():
|
| 19 |
+
gr.Markdown("""# Natural Text to SD Prompt Translator With LLM alpha
|
| 20 |
+
Text in natural language (English, Japanese, ...) => Prompt
|
| 21 |
+
""", elem_classes="title")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
with gr.Group():
|
| 23 |
+
chatbot = gr.Chatbot(likeable=False, show_copy_button=True, show_share_button=False, layout="bubble", container=True)
|
| 24 |
+
with gr.Row():
|
| 25 |
+
chat_msg = gr.Textbox(show_label=False, placeholder="Input text in English, Japanese, or any other languages and press Enter or click Send.", lines=2, scale=4)
|
| 26 |
+
chat_submit = gr.Button("Send", scale=1, variant="primary")
|
| 27 |
+
chat_clear = gr.Button("Clear", scale=1, variant="secondary")
|
| 28 |
+
with gr.Accordion("Additional inputs", open=False):
|
| 29 |
+
chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0][1]), label="Message format")
|
| 30 |
+
chat_sysmsg = gr.Textbox(value=get_dolphin_sysprompt(), label="System message")
|
| 31 |
+
with gr.Row():
|
| 32 |
+
chat_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens")
|
| 33 |
+
chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
| 34 |
+
chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
|
| 35 |
+
chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
|
| 36 |
+
chat_rp = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
|
| 37 |
+
with gr.Accordion("Add models", open=False):
|
| 38 |
+
chat_add_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/mradermacher/MagnumChronos-i1-GGUF/blob/main/MagnumChronos.i1-Q4_K_M.gguf", lines=1)
|
| 39 |
+
chat_add_format = gr.Dropdown(choices=get_llm_formats(), value=get_llm_formats()[0], label="Message format")
|
| 40 |
+
chat_add_submit = gr.Button("Update lists of models")
|
| 41 |
+
with gr.Accordion("Modes", open=True):
|
| 42 |
+
chat_model = gr.Dropdown(choices=get_dolphin_models(), value=get_dolphin_models()[0][1], allow_custom_value=True, label="Model")
|
| 43 |
+
chat_model_info = gr.Markdown(value=get_dolphin_model_info(get_dolphin_models()[0][1]), label="Model info")
|
| 44 |
+
with gr.Row():
|
| 45 |
+
chat_mode = gr.Dropdown(choices=get_dolphin_sysprompt_mode(), value=get_dolphin_sysprompt_mode()[0], allow_custom_value=False, label="Mode")
|
| 46 |
+
chat_lang = gr.Dropdown(choices=get_dolphin_languages(), value="English", allow_custom_value=True, label="Output language")
|
| 47 |
+
with gr.Row():
|
| 48 |
+
with gr.Group():
|
| 49 |
+
output_text = gr.TextArea(label="Output tags", interactive=False, show_copy_button=True)
|
| 50 |
+
copy_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
|
| 51 |
+
with gr.Group():
|
| 52 |
+
output_text_pony = gr.TextArea(label="Output tags (Pony e621 style)", interactive=False, show_copy_button=True)
|
| 53 |
+
copy_btn_pony = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
|
| 54 |
+
with gr.Accordion(label="Advanced options", open=False, visible=False):
|
| 55 |
+
tag_type = gr.Radio(label="Output tag conversion", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="e621", visible=False)
|
| 56 |
+
dummy_np = gr.Textbox(label="Negative prompt", value="", visible=False)
|
| 57 |
+
dummy_np_pony = gr.Textbox(label="Negative prompt", value="", visible=False)
|
| 58 |
+
recom_animagine = gr.Textbox(label="Animagine reccomended prompt", value="Animagine", visible=False)
|
| 59 |
+
recom_pony = gr.Textbox(label="Pony reccomended prompt", value="Pony", visible=False)
|
| 60 |
+
generate_image_btn = gr.Button(value="GENERATE IMAGE", size="lg", variant="primary")
|
| 61 |
+
with gr.Row():
|
| 62 |
+
result_image = gr.Gallery(label="Generated images", columns=1, object_fit="contain", container=True, preview=True, show_label=False, show_share_button=False, show_download_button=True, interactive=False, visible=True, format="png")
|
| 63 |
+
with gr.Tab("GGUF-Playground"):
|
| 64 |
+
gr.Markdown("""# Chat with lots of Models and LLMs using llama.cpp
|
| 65 |
+
This tab is copy of [CaioXapelaum/GGUF-Playground](https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground).<br>
|
| 66 |
+
Don't worry about the strange appearance, **it's just a bug of Gradio!**""", elem_classes="title")
|
| 67 |
+
pg_chatbot = gr.Chatbot(scale=1, likeable=False, show_copy_button=True, show_share_button=False)
|
| 68 |
+
with gr.Accordion("Additional inputs", open=False):
|
| 69 |
+
pg_chat_model = gr.Dropdown(choices=get_dolphin_models(), value=get_dolphin_models()[0][1], allow_custom_value=True, label="Model")
|
| 70 |
+
pg_chat_model_info = gr.Markdown(value=get_dolphin_model_info(get_dolphin_models()[0][1]), label="Model info")
|
| 71 |
+
pg_chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0][1]), label="Message format")
|
| 72 |
+
pg_chat_sysmsg = gr.Textbox(value="You are a helpful assistant.", label="System message")
|
| 73 |
+
with gr.Row():
|
| 74 |
+
pg_chat_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens")
|
| 75 |
+
pg_chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
| 76 |
+
pg_chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
|
| 77 |
+
pg_chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
|
| 78 |
+
pg_chat_rp = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
|
| 79 |
+
with gr.Accordion("Add models", open=True):
|
| 80 |
+
pg_chat_add_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/mradermacher/MagnumChronos-i1-GGUF/blob/main/MagnumChronos.i1-Q4_K_M.gguf", lines=1)
|
| 81 |
+
pg_chat_add_format = gr.Dropdown(choices=get_llm_formats(), value=get_llm_formats()[0], label="Message format")
|
| 82 |
+
pg_chat_add_submit = gr.Button("Update lists of models")
|
| 83 |
+
gr.ChatInterface(
|
| 84 |
+
fn=respond_playground,
|
| 85 |
+
#title="Chat with lots of Models and LLMs using llama.cpp",
|
| 86 |
+
retry_btn="Retry",
|
| 87 |
+
undo_btn="Undo",
|
| 88 |
+
clear_btn="Clear",
|
| 89 |
+
submit_btn="Send",
|
| 90 |
+
#additional_inputs_accordion='gr.Accordion(label="Additional Inputs", open=False)',
|
| 91 |
+
additional_inputs=[pg_chat_model, pg_chat_sysmsg, pg_chat_tokens, pg_chat_temperature, pg_chat_topp, pg_chat_topk, pg_chat_rp],
|
| 92 |
+
chatbot=pg_chatbot
|
| 93 |
+
)
|
| 94 |
gr.LoginButton()
|
| 95 |
gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")
|
| 96 |
|
|
|
|
| 127 |
|
| 128 |
generate_image_btn.click(generate_image, [output_text, dummy_np], [result_image], show_progress="full")
|
| 129 |
|
| 130 |
+
pg_chat_model.change(select_dolphin_model, [pg_chat_model], [pg_chat_model, pg_chat_format, pg_chat_model_info], queue=True, show_progress="full")#\
|
| 131 |
+
#.success(lambda: None, None, pg_chatbot, queue=False)
|
| 132 |
+
pg_chat_format.change(select_dolphin_format, [pg_chat_format], [pg_chat_format], queue=False)#\
|
| 133 |
+
#.success(lambda: None, None, pg_chatbot, queue=False)
|
| 134 |
+
gr.on(
|
| 135 |
+
triggers=[pg_chat_add_text.submit, pg_chat_add_submit.click],
|
| 136 |
+
fn=add_dolphin_models,
|
| 137 |
+
inputs=[pg_chat_add_text, pg_chat_add_format],
|
| 138 |
+
outputs=[pg_chat_model],
|
| 139 |
+
queue=False,
|
| 140 |
+
trigger_mode="once",
|
| 141 |
+
)
|
| 142 |
|
| 143 |
if __name__ == "__main__":
|
| 144 |
app.queue()
|
llmdolphin.py
CHANGED
|
@@ -7,6 +7,7 @@ from llama_cpp_agent.chat_history import BasicChatHistory
|
|
| 7 |
from llama_cpp_agent.chat_history.messages import Roles
|
| 8 |
from ja_to_danbooru.ja_to_danbooru import jatags_to_danbooru_tags
|
| 9 |
import wrapt_timeout_decorator
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
llm_models_dir = "./llm_models"
|
|
@@ -53,6 +54,16 @@ llm_models = {
|
|
| 53 |
"Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
|
| 54 |
"ChatWaifu_v1.4.Q5_K_M.gguf": ["mradermacher/ChatWaifu_v1.4-GGUF", MessagesFormatterType.MISTRAL],
|
| 55 |
"ChatWaifu_v1.3.1.Q4_K_M.gguf": ["mradermacher/ChatWaifu_v1.3.1-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
"L3.1-Vulca-Epith-Bluegrade-v0.2-8B.q8_0.gguf": ["kromquant/L3.1-Vulca-Epith-Bluegrade-v0.2-8B-GGUFs", MessagesFormatterType.LLAMA_3],
|
| 57 |
"llama-3.1-8b-omnimatrix-iq4_nl-imat.gguf": ["bunnycore/Llama-3.1-8B-OmniMatrix-IQ4_NL-GGUF", MessagesFormatterType.LLAMA_3],
|
| 58 |
"L3.1-Artemis-d-8B.i1-Q5_K_M.gguf": ["mradermacher/L3.1-Artemis-d-8B-i1-GGUF", MessagesFormatterType.LLAMA_3],
|
|
@@ -1087,3 +1098,75 @@ def dolphin_parse_simple(
|
|
| 1087 |
else:
|
| 1088 |
prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit", "rating_explicit"])
|
| 1089 |
return ", ".join(prompts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from llama_cpp_agent.chat_history.messages import Roles
|
| 8 |
from ja_to_danbooru.ja_to_danbooru import jatags_to_danbooru_tags
|
| 9 |
import wrapt_timeout_decorator
|
| 10 |
+
from pathlib import Path
|
| 11 |
|
| 12 |
|
| 13 |
llm_models_dir = "./llm_models"
|
|
|
|
| 54 |
"Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
|
| 55 |
"ChatWaifu_v1.4.Q5_K_M.gguf": ["mradermacher/ChatWaifu_v1.4-GGUF", MessagesFormatterType.MISTRAL],
|
| 56 |
"ChatWaifu_v1.3.1.Q4_K_M.gguf": ["mradermacher/ChatWaifu_v1.3.1-GGUF", MessagesFormatterType.MISTRAL],
|
| 57 |
+
"experiment_x-wip-q4_k_m.gguf": ["DreadPoor/EXPERIMENT_X-WIP-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
| 58 |
+
"narrathinker12b-q4_k_m.gguf": ["ClaudioItaly/NarraThinker12B-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
| 59 |
+
"llama-3.1-8b-matrix-q5_k_m.gguf": ["bunnycore/LLama-3.1-8B-Matrix-Q5_K_M-GGUF", MessagesFormatterType.LLAMA_3],
|
| 60 |
+
"Barcenas-8b-Cartas.Q5_K_M.gguf": ["mradermacher/Barcenas-8b-Cartas-GGUF", MessagesFormatterType.LLAMA_3],
|
| 61 |
+
"HannaOpenHermes-2.5-Mistral-7B.Q5_K_M.gguf": ["mradermacher/HannaOpenHermes-2.5-Mistral-7B-GGUF", MessagesFormatterType.MISTRAL],
|
| 62 |
+
"IceDrinkNameGoesHereRP-7b-Model_Stock.i1-Q4_K_M.gguf": ["mradermacher/IceDrinkNameGoesHereRP-7b-Model_Stock-i1-GGUF", MessagesFormatterType.ALPACA],
|
| 63 |
+
"Llama-3.1-Literotica-8B.Q4_K_S.gguf": ["mradermacher/Llama-3.1-Literotica-8B-GGUF", MessagesFormatterType.LLAMA_3],
|
| 64 |
+
"project-12-q4_k_m.gguf": ["ClaudioItaly/Project-12-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
| 65 |
+
"L3.1-Celestial-Stone-2x8B.i1-Q4_K_M.gguf": ["mradermacher/L3.1-Celestial-Stone-2x8B-i1-GGUF", MessagesFormatterType.LLAMA_3],
|
| 66 |
+
"experiment_y-wip-q4_k_m.gguf": ["DreadPoor/EXPERIMENT_Y-WIP-Q4_K_M-GGUF", MessagesFormatterType.LLAMA_3],
|
| 67 |
"L3.1-Vulca-Epith-Bluegrade-v0.2-8B.q8_0.gguf": ["kromquant/L3.1-Vulca-Epith-Bluegrade-v0.2-8B-GGUFs", MessagesFormatterType.LLAMA_3],
|
| 68 |
"llama-3.1-8b-omnimatrix-iq4_nl-imat.gguf": ["bunnycore/Llama-3.1-8B-OmniMatrix-IQ4_NL-GGUF", MessagesFormatterType.LLAMA_3],
|
| 69 |
"L3.1-Artemis-d-8B.i1-Q5_K_M.gguf": ["mradermacher/L3.1-Artemis-d-8B-i1-GGUF", MessagesFormatterType.LLAMA_3],
|
|
|
|
| 1098 |
else:
|
| 1099 |
prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit", "rating_explicit"])
|
| 1100 |
return ", ".join(prompts)
|
| 1101 |
+
|
| 1102 |
+
|
| 1103 |
+
# https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground
|
| 1104 |
+
import cv2
|
| 1105 |
+
cv2.setNumThreads(1)
|
| 1106 |
+
|
| 1107 |
+
@spaces.GPU()
|
| 1108 |
+
def respond_playground(
|
| 1109 |
+
message,
|
| 1110 |
+
history: list[tuple[str, str]],
|
| 1111 |
+
model,
|
| 1112 |
+
system_message,
|
| 1113 |
+
max_tokens,
|
| 1114 |
+
temperature,
|
| 1115 |
+
top_p,
|
| 1116 |
+
top_k,
|
| 1117 |
+
repeat_penalty,
|
| 1118 |
+
):
|
| 1119 |
+
if override_llm_format:
|
| 1120 |
+
chat_template = override_llm_format
|
| 1121 |
+
else:
|
| 1122 |
+
chat_template = llm_models[model][1]
|
| 1123 |
+
|
| 1124 |
+
llm = Llama(
|
| 1125 |
+
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
| 1126 |
+
flash_attn=True,
|
| 1127 |
+
n_gpu_layers=81, # 81
|
| 1128 |
+
n_batch=1024,
|
| 1129 |
+
n_ctx=8192, #8192
|
| 1130 |
+
)
|
| 1131 |
+
provider = LlamaCppPythonProvider(llm)
|
| 1132 |
+
|
| 1133 |
+
agent = LlamaCppAgent(
|
| 1134 |
+
provider,
|
| 1135 |
+
system_prompt=f"{system_message}",
|
| 1136 |
+
predefined_messages_formatter_type=chat_template,
|
| 1137 |
+
debug_output=False
|
| 1138 |
+
)
|
| 1139 |
+
|
| 1140 |
+
settings = provider.get_provider_default_settings()
|
| 1141 |
+
settings.temperature = temperature
|
| 1142 |
+
settings.top_k = top_k
|
| 1143 |
+
settings.top_p = top_p
|
| 1144 |
+
settings.max_tokens = max_tokens
|
| 1145 |
+
settings.repeat_penalty = repeat_penalty
|
| 1146 |
+
settings.stream = True
|
| 1147 |
+
|
| 1148 |
+
messages = BasicChatHistory()
|
| 1149 |
+
|
| 1150 |
+
# Add user and assistant messages to the history
|
| 1151 |
+
for msn in history:
|
| 1152 |
+
user = {'role': Roles.user, 'content': msn[0]}
|
| 1153 |
+
assistant = {'role': Roles.assistant, 'content': msn[1]}
|
| 1154 |
+
messages.add_message(user)
|
| 1155 |
+
messages.add_message(assistant)
|
| 1156 |
+
|
| 1157 |
+
# Stream the response
|
| 1158 |
+
try:
|
| 1159 |
+
stream = agent.get_chat_response(
|
| 1160 |
+
message,
|
| 1161 |
+
llm_sampling_settings=settings,
|
| 1162 |
+
chat_history=messages,
|
| 1163 |
+
returns_streaming_generator=True,
|
| 1164 |
+
print_output=False
|
| 1165 |
+
)
|
| 1166 |
+
|
| 1167 |
+
outputs = ""
|
| 1168 |
+
for output in stream:
|
| 1169 |
+
outputs += output
|
| 1170 |
+
yield outputs
|
| 1171 |
+
except Exception as e:
|
| 1172 |
+
yield f"Error during response generation: {str(e)}"
|
requirements.txt
CHANGED
|
@@ -15,4 +15,5 @@ httpcore
|
|
| 15 |
googletrans==4.0.0rc1
|
| 16 |
git+https://github.com/huggingface/diffusers
|
| 17 |
rapidfuzz
|
| 18 |
-
wrapt-timeout-decorator
|
|
|
|
|
|
| 15 |
googletrans==4.0.0rc1
|
| 16 |
git+https://github.com/huggingface/diffusers
|
| 17 |
rapidfuzz
|
| 18 |
+
wrapt-timeout-decorator
|
| 19 |
+
opencv-python
|