Spaces:

DrishtiSharma
/

docqa-with-deepseek-r1

Build error

App Files Files Community

DrishtiSharma commited on Feb 15

Commit

bcd96c3

verified ·

1 Parent(s): a22e896

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -42

app.py CHANGED Viewed

@@ -43,47 +43,35 @@ st.title("Blah-2")
 # Step 1: Choose PDF Source
 pdf_source = st.radio("Upload or provide a link to a PDF:", ["Enter a PDF URL", "Upload a PDF file"], index=0, horizontal=True)
-# Function to download and process the PDF
-def download_pdf():
-    if st.session_state.pdf_url and not st.session_state.pdf_path:
         with st.spinner("Downloading PDF..."):
             try:
-                response = requests.get(st.session_state.pdf_url)
                 if response.status_code == 200:
                     st.session_state.pdf_path = "temp.pdf"
                     with open(st.session_state.pdf_path, "wb") as f:
                         f.write(response.content)
-                    # Reset processing state
                     st.session_state.pdf_loaded = False
                     st.session_state.chunked = False
                     st.session_state.vector_created = False
                     st.success("✅ PDF Downloaded Successfully!")
                 else:
                     st.error("❌ Failed to download PDF. Check the URL.")
             except Exception as e:
                 st.error(f"❌ Error downloading PDF: {e}")
-if pdf_source == "Upload a PDF file":
-    uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
-    if uploaded_file:
-        st.session_state.pdf_path = "temp.pdf"
-        with open(st.session_state.pdf_path, "wb") as f:
-            f.write(uploaded_file.getbuffer())
-        st.session_state.pdf_loaded = False
-        st.session_state.chunked = False
-        st.session_state.vector_created = False
-elif pdf_source == "Enter a PDF URL":
-    # ✅ Text input with Enter support
-    st.text_input("Enter PDF URL:", value="https://arxiv.org/pdf/2406.06998", key="pdf_url", on_change=download_pdf)
-    # ✅ Button support
-    if st.button("Download and Process PDF"):
-        download_pdf()
 # Step 2: Load & Process PDF (Only Once)
 if st.session_state.pdf_path and not st.session_state.pdf_loaded:
     with st.spinner("Loading PDF..."):
@@ -132,17 +120,7 @@ if st.session_state.pdf_loaded and not st.session_state.chunked:
 # Step 4: Setup Vectorstore
 def load_vector_store():
-    try:
-        vector_store = Chroma(
-            persist_directory=VECTOR_DB_PATH,
-            collection_name="deepseek_collection",
-            embedding_function=HuggingFaceEmbeddings(model_name="nomic-ai/modernbert-embed-base")
-        )
-        st.success("✅ Vector store loaded successfully!")
-        return vector_store
-    except Exception as e:
-        st.error(f"❌ Failed to load vector store: {e}")
-        return None  # Return None if there's an error
 if st.session_state.chunked and not st.session_state.vector_created:
     with st.spinner("Creating vector store..."):
@@ -169,11 +147,7 @@ st.write("📂 **Vector Store Created:**", st.session_state.vector_created)
 query = st.text_input("🔍 Ask a question about the document:")
 if query:
     with st.spinner("🔄 Retrieving relevant context..."):
-        if st.session_state.vector_store is None:
-            st.error("❌ Vector store is not initialized. Ensure document processing and chunking are completed.")
-        else:
-            retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
         contexts = retriever.invoke(query)
         # Debugging: Check what was retrieved
         st.write("Retrieved Contexts:", contexts)

 # Step 1: Choose PDF Source
 pdf_source = st.radio("Upload or provide a link to a PDF:", ["Enter a PDF URL", "Upload a PDF file"], index=0, horizontal=True)
+if pdf_source == "Upload a PDF file":
+    uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
+    if uploaded_file:
+        st.session_state.pdf_path = "temp.pdf"
+        with open(st.session_state.pdf_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        st.session_state.pdf_loaded = False
+        st.session_state.chunked = False
+        st.session_state.vector_created = False
+elif pdf_source == "Enter a PDF URL":
+    pdf_url = st.text_input("Enter PDF URL:", value = "https://arxiv.org/pdf/2406.06998")
+    if pdf_url and not st.session_state.pdf_path:
         with st.spinner("Downloading PDF..."):
             try:
+                response = requests.get(pdf_url)
                 if response.status_code == 200:
                     st.session_state.pdf_path = "temp.pdf"
                     with open(st.session_state.pdf_path, "wb") as f:
                         f.write(response.content)
                     st.session_state.pdf_loaded = False
                     st.session_state.chunked = False
                     st.session_state.vector_created = False
                     st.success("✅ PDF Downloaded Successfully!")
                 else:
                     st.error("❌ Failed to download PDF. Check the URL.")
             except Exception as e:
                 st.error(f"❌ Error downloading PDF: {e}")
 # Step 2: Load & Process PDF (Only Once)
 if st.session_state.pdf_path and not st.session_state.pdf_loaded:
     with st.spinner("Loading PDF..."):
 # Step 4: Setup Vectorstore
 def load_vector_store():
+    return Chroma(persist_directory=VECTOR_DB_PATH, collection_name="deepseek_collection", embedding_function=HuggingFaceEmbeddings(model_name="nomic-ai/modernbert-embed-base"))
 if st.session_state.chunked and not st.session_state.vector_created:
     with st.spinner("Creating vector store..."):
 query = st.text_input("🔍 Ask a question about the document:")
 if query:
     with st.spinner("🔄 Retrieving relevant context..."):
+        retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
         contexts = retriever.invoke(query)
         # Debugging: Check what was retrieved
         st.write("Retrieved Contexts:", contexts)