Spaces:
Sleeping
Sleeping
| # Importing Python libraries | |
| import os | |
| import asyncio | |
| from dotenv import load_dotenv | |
| import chainlit as cl | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.memory import ChatMessageHistory, ConversationBufferMemory | |
| from langchain_community.document_loaders import PyMuPDFLoader | |
| from langchain_community.vectorstores import Qdrant | |
| from langchain_openai import ChatOpenAI | |
| from langchain_openai.embeddings import OpenAIEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| import tiktoken | |
| # Load environment variables from a .env file | |
| load_dotenv() | |
| async def start_chat(): | |
| # Notify the user that the system is setting up the vector store | |
| await cl.Message(content="Setting up Qdrant vector store. Please wait...").send() | |
| # Load documents using PyMuPDFLoader from the specified URL | |
| docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load() | |
| # Define a function to calculate the token length using tiktoken | |
| def tiktoken_len(text): | |
| tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(text) | |
| return len(tokens) | |
| # Configure a text splitter that handles large documents | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size = 200, | |
| chunk_overlap = 0, # Ensure there is no cutoff at the edges of chunks | |
| length_function = tiktoken_len, | |
| ) | |
| # Split the document into manageable chunks | |
| split_chunks = text_splitter.split_documents(docs) | |
| # Set up the embedding model for document encoding | |
| embedding_model = OpenAIEmbeddings(model="text-embedding-3-small") | |
| # Asynchronously create a Qdrant vector store with the document chunks | |
| qdrant_vectorstore = await cl.make_async(Qdrant.from_documents)( | |
| split_chunks, | |
| embedding_model, | |
| location=":memory:", # Use in-memory storage for vectors | |
| collection_name="meta_10k" # Name of the collection in Qdrant | |
| ) | |
| # Initialize a retriever from the Qdrant vector store | |
| qdrant_retriever = qdrant_vectorstore.as_retriever() | |
| # Notify the user that setup is complete | |
| await cl.Message(content="Qdrant setup complete. You can now start asking questions!").send() | |
| # Initialize a message history to track the conversation | |
| message_history = ChatMessageHistory() | |
| # Set up memory to hold the conversation context and return answers | |
| memory = ConversationBufferMemory( | |
| memory_key="chat_history", | |
| output_key="answer", | |
| chat_memory=message_history, | |
| return_messages=True, | |
| ) | |
| # Configure the LLM for generating responses | |
| llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True) | |
| # Create a retrieval chain combining the LLM and the retriever | |
| chain = ConversationalRetrievalChain.from_llm( | |
| llm, | |
| retriever=qdrant_retriever, | |
| chain_type="stuff", # Specify the type of chain (customizable based on application) | |
| memory=memory, | |
| return_source_documents=True | |
| ) | |
| # Store the configured chain in the user session | |
| cl.user_session.set("chain", chain) | |
| async def main(message: cl.Message): | |
| # Retrieve the conversational chain from the user session | |
| chain = cl.user_session.get("chain") | |
| # Define a callback handler for asynchronous operations | |
| cb = cl.AsyncLangchainCallbackHandler() | |
| # Process the incoming message using the conversational chain | |
| res = await chain.acall(message.content, callbacks=[cb]) | |
| answer = res["answer"] # Extract the answer from the response | |
| # Send the processed answer back to the user | |
| await cl.Message(content=answer).send() | |