Spaces:

openaccess-ai-collective
/

ggml-ui

Build error

winglian commited on May 15, 2023

Commit

1dc6c65

1 Parent(s): e3ba05b

link model attributions, use config.yml for some of the chat settings, increase context size

Files changed (2) hide show

chat.py CHANGED Viewed

@@ -36,7 +36,7 @@ def chat(history, system_message):
                         for item in history])
     history[-1][1] = ""
-    for output in llm(messages, max_tokens=512, stop=["</s>", "<unk>", "### User:"], echo=False, stream=True):
         answer = output['choices'][0]['text']
         history[-1][1] += answer
@@ -91,7 +91,7 @@ with blocks:
     stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
     gr.Markdown(f"""
-        - This is the {config["repo"]}/{config["file"]} model.
         - This Space uses GGML with GPU support, so it can run larger models on smaller GPUs & VRAM quickly.
         - This is running on a smaller, shared GPU, so it may take a few seconds to respond.
         - [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
@@ -99,4 +99,4 @@ with blocks:
         - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
         """)
-blocks.queue(max_size=8, concurrency_count=2).launch(debug=True, server_name="0.0.0.0", server_port=7860)

                         for item in history])
     history[-1][1] = ""
+    for output in llm(messages, echo=False, stream=True, **config['chat']):
         answer = output['choices'][0]['text']
         history[-1][1] += answer
     stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
     gr.Markdown(f"""
+        - This is the [{config["repo"]}](https://huggingface.co/{config["repo"]}) model file [{config["file"]}](https://huggingface.co/{config["repo"]}/blob/main/{config["file"]})
         - This Space uses GGML with GPU support, so it can run larger models on smaller GPUs & VRAM quickly.
         - This is running on a smaller, shared GPU, so it may take a few seconds to respond.
         - [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
         - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
         """)
+blocks.queue(max_size=32, concurrency_count=4).launch(debug=True, server_name="0.0.0.0", server_port=7860)

config.yml CHANGED Viewed

@@ -4,5 +4,11 @@ file: wizard-vicuna-13B.ggml.q5_1.bin
 # if the repo above doesn't include the tokenizer set the base repo it was based on with a valid tokenizer model
 base_model: junelee/wizard-vicuna-13b
 llama_cpp:
-  n_ctx: 1024
-  n_gpu_layers: 40  # llama 13b has 40 layers

 # if the repo above doesn't include the tokenizer set the base repo it was based on with a valid tokenizer model
 base_model: junelee/wizard-vicuna-13b
 llama_cpp:
+  n_ctx: 2048
+  n_gpu_layers: 40  # llama 13b has 40 layers
+chat:
+  max_tokens: 1024
+  stop:
+    - "</s>"
+    - "<unk>"
+    - "### User:"