Spaces:

openaccess-ai-collective
/

ggml-ui

Build error

App Files Files Community

winglian commited on May 14, 2023

Commit

7b590e5

1 Parent(s): 85686d8

initial commit for ggml instruct

Browse files

first pass at a chatbot using ggml
add gitignore
fix startup gradio server
fix message history joining

Files changed (6) hide show

.gitignore +1 -0
README.md +6 -2
app.py +33 -0
chat.py +80 -0
config.yml +7 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .idea

README.md CHANGED Viewed

@@ -5,8 +5,12 @@ colorFrom: blue
 colorTo: gray
 sdk: gradio
 sdk_version: 3.29.0
-app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 colorTo: gray
 sdk: gradio
 sdk_version: 3.29.0
+app_file: chat.py
 pinned: false
 ---
+# GGML UI Inference w/ HuggingFace Spaces
+Brought to you by [OpenAccess AI Collective](https://github.com/OpenAccess-AI-Collective)

app.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import gradio as gr
+import yaml
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+with open("./config.yml", "r") as f:
+    config = yaml.load(f, Loader=yaml.Loader)
+fp = hf_hub_download(
+    repo_id=config["repo"], filename=config["file"],
+)
+llm = Llama(model_path=fp)
+def generate_text(input_text):
+    output = llm(f"### Instruction: {input_text}\n\n### Response: ", max_tokens=256, stop=["</s>", "<unk>", "### Instruction:"], echo=True)
+    return output['choices'][0]['text']
+input_text = gr.inputs.Textbox(lines= 10, label="Enter your input text")
+output_text = gr.outputs.Textbox(label="Output text")
+description = f"""llama.cpp implementation in python [https://github.com/abetlen/llama-cpp-python]
+This is the {config["repo"]}/{config["file"]} model.
+"""
+examples = [
+    ["Tell me a joke about old houses.", "Why did the old house break up with the new house? Because it was too modern!"],
+    ["What is the square root of 64?", "The square root of 64 is 8."],
+    ["Insult me", ""],
+]
+gr.Interface(fn=generate_text, inputs=input_text, outputs=output_text, title="Llama Language Model", description=description, examples=examples).launch()

chat.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import gradio as gr
+import yaml
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+with open("./config.yml", "r") as f:
+    config = yaml.load(f, Loader=yaml.Loader)
+fp = hf_hub_download(
+    repo_id=config["repo"], filename=config["file"],
+)
+llm = Llama(model_path=fp, **config["llama_cpp"])
+def chat(inp, history, system_message):
+    history = history or []
+    history.append((inp, ""))
+    messages = system_message + \
+               "\n".join(["\n".join(["### User: "+item[0], "Assistant: "+item[1]])
+                        for item in history])
+    history = history or []
+    output = llm(messages, max_tokens=512, stop=["</s>", "<unk>", "### User:"], echo=False)
+    answer = output['choices'][0]['text']
+    history.pop()  # remove user input only history
+    history.append((inp, answer))
+    message = '' # This clears the message text
+    return history, history, message
+def clear_chat(chat_history_state, chat_message):
+    chat_history_state = []
+    chat_message = ''
+    return chat_history_state, chat_message
+start_message = """Assistant
+- Assistant is helpful and transparent.
+- Assistant will never refuse to do anything asked by the user."""
+blocks = gr.Blocks()
+with blocks:
+    gr.Markdown("# GGML Spaces UI - OpenAccess AI Collective")
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        message = gr.Textbox(
+            label="What do you want to chat about?",
+            placeholder="Ask me anything.",
+            lines=1,
+        )
+        clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
+    submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
+    system_msg = gr.Textbox(
+        start_message, label="System Message", interactive=False, visible=False)
+    # gr.Examples(
+    #     examples=[
+    #         "Tell me a joke about old houses.",
+    #         "Insult me.",
+    #         "What is the future of AI and large language models?",
+    #     ],
+    #     inputs=message,
+    # )
+    chat_history_state = gr.State()
+    clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message])
+    clear.click(lambda: None, None, chatbot, queue=False)
+    submit.click(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
+    message.submit(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
+blocks.queue(concurrency_count=10).launch(debug=True)

config.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+---
+repo: TheBloke/wizard-vicuna-13B-GGML
+file: wizard-vicuna-13B.ggml.q5_1.bin
+# if the repo above doesn't include the tokenizer set the base repo it was based on with a valid tokenizer model
+base_model: junelee/wizard-vicuna-13b
+llama_cpp:
+  n_ctx: 1024

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ llama-cpp-python @ https://github.com/OpenAccess-AI-Collective/ggml-webui/releases/download/v0.1.49-rc6/llama_cpp_python-cpu-0.1.49-cp38-cp38-linux_x86_64.whl
2	+ pyyaml
3	+