Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| #chatbot | |
| from langchain.llms import HuggingFacePipeline | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains import RetrievalQA | |
| from textwrap import fill | |
| DATA_PATH='data/' | |
| DB_FAISS_PATH='vectorstore/db_faiss' | |
| #Call of the model | |
| model_name = "TheBloke/Llama-2-13b-Chat-GPTQ" | |
| model = AutoModelForCausalLM.from_pretrained(model_name, | |
| device_map="auto", | |
| trust_remote_code=True) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) | |
| gen_cfg = GenerationConfig.from_pretrained(model_name) | |
| gen_cfg.max_new_tokens=512 | |
| gen_cfg.temperature=0.0000001 # 0.0 | |
| gen_cfg.return_full_text=True | |
| gen_cfg.do_sample=True | |
| gen_cfg.repetition_penalty=1.11 | |
| pipe=pipeline( | |
| task="text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| generation_config=gen_cfg | |
| ) | |
| if gr.NO_RELOAD: | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| embeddings = HuggingFaceEmbeddings() | |
| db = FAISS.load_local(DB_FAISS_PATH, embeddings) | |
| print('todo ok') | |
| #st.title('π¦π Flint, your FinanceBot') | |
| Description=""" | |
| ## Finance Bot: Get instant insights from Finance | |
| This chatbot is built using the Retrieval-Augmented Generation (RAG) framework | |
| """ | |
| #DB_FAISS_PATH = os.path.join(local_path, 'vectorstore_docs/db_faiss') | |
| prompt_template = """Use the following pieces of information to answer the user's question. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| Context: {context} | |
| Question: {question} | |
| Only return the helpful answer below and nothing else. Try to make it short. Maximum of 500 words. | |
| Helpful answer: | |
| """ | |
| prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) | |
| Chain_pdf = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| # retriever=db.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k': 5, 'score_threshold': 0.8}) | |
| # Similarity Search is the default way to retrieve documents relevant to a query, but we can use MMR by setting search_type = "mmr" | |
| # k defines how many documents are returned; defaults to 4. | |
| # score_threshold allows to set a minimum relevance for documents returned by the retriever, if we are using the "similarity_score_threshold" search type. | |
| # return_source_documents=True, # Optional parameter, returns the source documents used to answer the question | |
| retriever=db.as_retriever(), # (search_kwargs={'k': 5, 'score_threshold': 0.8}), | |
| chain_type_kwargs={"prompt": prompt}, | |
| ) | |
| #query = "When was the solar system formed?" | |
| #result = Chain_pdf.invoke(query) | |
| #print(fill(result['result'].strip(), width=100)) | |
| def final_result(query,history, Chain_pdf): | |
| result = Chain_pdf.invoke(query) | |
| print(fill(result['result'].strip(), width=100)) | |
| return result | |
| with gr.Blocks() as demo: | |
| system_prompt = gr.Textbox("You are helpful AI.", label="System Prompt") | |
| slider = gr.Slider(10, 100, render=False) | |
| gr.ChatInterface( | |
| final_result, additional_inputs=[Chain_pdf] | |
| ) | |
| demo.launch() | |