Spaces:

rajkstats
/

FilingFinder

Sleeping

App Files Files Community

FilingFinder / app.py

rajkstats

changes to app chunking

f6eb55b over 1 year ago

raw

history blame

3.74 kB

	# Importing Python libraries
	import os
	import asyncio
	from dotenv import load_dotenv

	import chainlit as cl

	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ChatMessageHistory, ConversationBufferMemory
	from langchain_community.document_loaders import PyMuPDFLoader
	from langchain_community.vectorstores import Qdrant
	from langchain_openai import ChatOpenAI
	from langchain_openai.embeddings import OpenAIEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	import tiktoken

	# Load environment variables from a .env file
	load_dotenv()

	@cl.on_chat_start
	async def start_chat():
	# Notify the user that the system is setting up the vector store
	await cl.Message(content="Setting up Qdrant vector store. Please wait...").send()

	# Load documents using PyMuPDFLoader from the specified URL
	docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()

	# Define a function to calculate the token length using tiktoken
	def tiktoken_len(text):
	tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(text)
	return len(tokens)

	# Configure a text splitter that handles large documents
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size = 200,
	chunk_overlap = 0, # Ensure there is no cutoff at the edges of chunks
	length_function = tiktoken_len,
	)

	# Split the document into manageable chunks
	split_chunks = text_splitter.split_documents(docs)

	# Set up the embedding model for document encoding
	embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

	# Asynchronously create a Qdrant vector store with the document chunks
	qdrant_vectorstore = await cl.make_async(Qdrant.from_documents)(
	split_chunks,
	embedding_model,
	location=":memory:", # Use in-memory storage for vectors
	collection_name="meta_10k" # Name of the collection in Qdrant
	)

	# Initialize a retriever from the Qdrant vector store
	qdrant_retriever = qdrant_vectorstore.as_retriever()

	# Notify the user that setup is complete
	await cl.Message(content="Qdrant setup complete. You can now start asking questions!").send()

	# Initialize a message history to track the conversation
	message_history = ChatMessageHistory()

	# Set up memory to hold the conversation context and return answers
	memory = ConversationBufferMemory(
	memory_key="chat_history",
	output_key="answer",
	chat_memory=message_history,
	return_messages=True,
	)

	# Configure the LLM for generating responses
	llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True)

	# Create a retrieval chain combining the LLM and the retriever
	chain = ConversationalRetrievalChain.from_llm(
	llm,
	retriever=qdrant_retriever,
	chain_type="stuff", # Specify the type of chain (customizable based on application)
	memory=memory,
	return_source_documents=True
	)

	# Store the configured chain in the user session
	cl.user_session.set("chain", chain)

	@cl.on_message
	async def main(message: cl.Message):
	# Retrieve the conversational chain from the user session
	chain = cl.user_session.get("chain")
	# Define a callback handler for asynchronous operations
	cb = cl.AsyncLangchainCallbackHandler()

	# Process the incoming message using the conversational chain
	res = await chain.acall(message.content, callbacks=[cb])
	answer = res["answer"] # Extract the answer from the response

	# Send the processed answer back to the user
	await cl.Message(content=answer).send()