Spaces:

hunterXdk
/

RagModels

Sleeping

App Files Files Community

hunterXdk commited on Nov 26, 2024

Commit

50e4be7

verified ·

1 Parent(s): 34001f9

Initial Commit With ❤

Browse files

Files changed (2) hide show

chatbot.py +79 -0
requirements.txt +7 -0

chatbot.py ADDED Viewed

	@@ -0,0 +1,79 @@

+def get_pdf_text(pdf_docs):
+    text = ""
+    for pdf in pdf_docs:
+        pdf_reader = PdfReader(pdf)
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+    return text
+# chuck_size = 1000, chunk_overlap = 200 (for shorted PDFs)
+def get_text_chunks(text):
+  text_splitter= RecursiveCharacterTextSplitter(
+    chunk_size=10000,
+    chunk_overlap=1000,
+    # length_function=len
+  )
+  chunks=text_splitter.split_text(text)
+  return chunks
+# Converting into Vector data/store (can also be stored)
+def get_vector_store(text_chunks):
+  # embeddings = GoogleGenerativeAIEmbeddings(model='embedding-gecko-001')
+  embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
+  vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+  vector_store.save_local("faiss_index")
+  # return vector_store
+def get_conversation_chain():
+  prompt_template="""Answer the query as detailed as possible from the provided context, make sure to provide all the details, if answeris not in
+  the provided context, just say, "Answer is not available in the provided documents", don't provide the wrong answer:\n {context}? \n Query: {query}? \n
+  Answer:
+  """
+  model=ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
+  prompt=PromptTemplate(template=prompt_template, input_variables=["context", "query"])
+  # chain=load_qa_chain(llm=model, chain_type="stuff", prompt=prompt)
+  chain=load_qa_chain(model, chain_type="stuff", prompt=prompt)
+  return chain
+def user_input(user_question):
+  # embeddings = GoogleGenerativeAIEmbeddings(model='embedding-gecko-001')
+  embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
+  # Loading the embeddings
+  new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
+  docs = new_db.similarity_search(user_question)
+  chain=get_conversation_chain()
+  response = chain(
+      {"input_documents": docs, "question": user_question}
+      , return_only_outputs=True)
+  print(response)
+  st.write("Reply: ", response["output_text"])
+# Frontend page Processor
+def main():
+  st.set_page_config(page_title="PDF Chatbot")
+  st.header("PDF Chatbot made with ❤")
+  user_question = st.text_input("Ask a question about your documents:")
+  if user_question:
+    user_input(user_question)
+  with st.sidebar:
+    st.title("Menu:")
+    pdf_docs = st.file_uploader(
+        "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
+    if st.button("Submit & Process"):
+      with st.spinner("Ruko Padh raha hu..."):
+        raw_text = get_pdf_text(pdf_docs)
+        text_chunks = get_text_chunks(raw_text)
+        get_vector_store(text_chunks)
+        st.success("Saare documents padh liya. Ab swaal pucho 😤")
+if __name__ == '__main__':
+  main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+google-generativeai
+langchain
+PyPDF2
+chromadb
+faiss-cpu
+langchain_google_genai