Spaces:
Build error
Build error
| from huggingface_hub import hf_hub_download | |
| import logging | |
| import sys | |
| import gradio as gr | |
| from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt | |
| from llama_index.llms import LlamaCPP | |
| from llama_index.llms.llama_utils import ( | |
| messages_to_prompt, | |
| completion_to_prompt, | |
| ) | |
| def download(model): | |
| repo_id = model_info[model]["repo_id"] | |
| filename = model_info[model]["filename"] | |
| model_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename=filename, | |
| resume_download=True, | |
| cache_dir=MODELS_PATH, | |
| ) | |
| return model_path | |
| MODELS_PATH = "./models" | |
| models = ["Llama-2-13B-chat", "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta", "vicuna-7B-v1.5", "CodeLlama-7B"] | |
| model_info = { | |
| "Llama-2-13B-chat": { | |
| "repo_id": "TheBloke/Llama-2-13B-chat-GGUF", | |
| "filename": "llama-2-13b-chat.Q4_K_M.gguf", | |
| }, | |
| "Mistral-7B-Instruct-v0.2": { | |
| "repo_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", | |
| "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf", | |
| }, | |
| "zephyr-7B-beta": { | |
| "repo_id": "TheBloke/zephyr-7B-beta-GGUF", | |
| "filename": "zephyr-7b-beta.Q4_K_M.gguf", | |
| }, | |
| "vicuna-7B-v1.5": { | |
| "repo_id": "TheBloke/vicuna-7B-v1.5-GGUF", | |
| "filename": "vicuna-7b-v1.5.Q4_K_M.gguf", | |
| }, | |
| "CodeLlama-7B": { | |
| "repo_id": "TheBloke/CodeLlama-7B-GGUF", | |
| "filename": "codellama-7b.Q4_K_M.gguf", | |
| }, | |
| # "Falcon-7B-Instruct": { | |
| # "repo_id": "TheBloke/Falcon-7B-Instruct-GGML", | |
| # "filename": "falcon-7b-instruct.ggccv1.q4_1.bin", | |
| # }, | |
| } | |
| for model_name in models: | |
| download(model_name) | |
| mistral_model_path = hf_hub_download( | |
| repo_id= "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", | |
| filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf", | |
| resume_download=True, | |
| cache_dir=MODELS_PATH,) | |
| """Step 3 : if you use GPU then make sure ( n_gpu_layers":1) at least 1, you can increase or decrease it based on your GPU performance""" | |
| llm = LlamaCPP( | |
| # You can pass in the URL to a GGML model to download it automatically | |
| # model_url=model_url, | |
| # optionally, you can set the path to a pre-downloaded model instead of model_url | |
| model_path=mistral_model_path, | |
| temperature=0.1, | |
| max_new_tokens=256, | |
| # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room | |
| context_window=3900, | |
| # kwargs to pass to __call__() | |
| generate_kwargs={}, | |
| # kwargs to pass to __init__() | |
| # set to at least 1 to use GPU | |
| model_kwargs={"n_gpu_layers": -1}, | |
| # transform inputs into Llama2 format | |
| messages_to_prompt=messages_to_prompt, | |
| completion_to_prompt=completion_to_prompt, | |
| verbose=True, | |
| ) | |
| def model_initialization(model): | |
| if(model !=""): | |
| gr.Info("model downloading and configuration process has been started, please wait...") | |
| MODELS_PATH = "./models" | |
| repo_id="" | |
| filename="" | |
| if(model=="Llama-2-13B-chat"): | |
| repo_id="TheBloke/Llama-2-13B-chat-GGUF" | |
| filename="llama-2-13b-chat.Q4_K_M.gguf" | |
| elif(model=="Mistral-7B-Instruct-v0.2") : | |
| repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF" | |
| filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf" | |
| elif(model=="zephyr-7B-beta"): | |
| repo_id="TheBloke/zephyr-7B-beta-GGUF " | |
| filename="zephyr-7b-beta.Q4_K_M.gguf" | |
| elif(model=="vicuna-7B-v1.5"): | |
| repo_id="TheBloke/vicuna-7B-v1.5-GGUF" | |
| filename="vicuna-7b-v1.5.Q4_K_M.gguf" | |
| # elif(model=="Falcon-7B-Instruct"): | |
| # repo_id="TheBloke/Falcon-7B-Instruct-GGML" | |
| # filename="falcon-7b-instruct.ggccv1.q4_1.bin" | |
| elif(model=="CodeLlama-7B"): | |
| repo_id="TheBloke/CodeLlama-7B-GGUF" | |
| filename="codellama-7b.Q4_K_M.gguf" | |
| else: | |
| gr.Warning("please select at least one model") | |
| mistral_model_path = hf_hub_download( | |
| repo_id= repo_id, | |
| filename= filename, | |
| resume_download=True, | |
| cache_dir=MODELS_PATH,) | |
| llm = LlamaCPP( | |
| # You can pass in the URL to a GGML model to download it automatically | |
| # model_url=model_url, | |
| # optionally, you can set the path to a pre-downloaded model instead of model_url | |
| model_path=mistral_model_path, | |
| temperature=0.1, | |
| max_new_tokens=256, | |
| # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room | |
| context_window=3900, | |
| # kwargs to pass to __call__() | |
| generate_kwargs={}, | |
| # set to at least 1 to use GPU | |
| model_kwargs={"n_gpu_layers": -1}, | |
| # transform inputs into Llama2 format | |
| messages_to_prompt=messages_to_prompt, | |
| completion_to_prompt=completion_to_prompt, | |
| verbose=True, | |
| ) | |
| gr.Info("model has been configured and ready to chat") | |
| return "model has been configured and ready to chat, your current model is "+model | |
| def predict(message, history): | |
| messages = [] | |
| answer = [] | |
| response = llm.stream_complete(message) | |
| for bot_response in response: | |
| token = bot_response.delta | |
| answer.append(token) | |
| final_answer = " ".join(answer) | |
| yield final_answer | |
| with gr.Blocks() as UI: | |
| models=gr.Dropdown(["CodeLlama-7B","Llama-2-13B-chat" ,"Mistral-7B-Instruct-v0.2", "zephyr-7B-beta", | |
| "vicuna-7B-v1.5"],value=["CodeLlama-7B","Llama-2-13B-chat", "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta", | |
| "vicuna-7B-v1.5"], label="please select at least one model", info="default model is Mistral-7B-Instruct-v0.2") | |
| textInfo = gr.Textbox(value="current model is Mistral-7B-Instruct-v0.2",label="Model Status"); | |
| # Chatbot interface | |
| chatUI= gr.ChatInterface( | |
| predict, | |
| title="Open Source LLM ChatBot", | |
| description="Ask any question", | |
| theme="soft", | |
| examples=["Hello", "are you LLM model?", "how can i finetune a pre-trained LLM model?","How can i build a chatbot using local open-souce LLM ?"], | |
| cache_examples=False, | |
| submit_btn="Send Message", | |
| retry_btn=None, | |
| undo_btn="Delete Previous", | |
| clear_btn="Clear", | |
| ) | |
| models.change(fn=model_initialization,inputs=[models],outputs=[textInfo]) | |
| if __name__ == "__main__": | |
| UI.launch(debug=True) # |