Spaces:

Vallabhpatil777
/

PDF_Search_Qdrant

Paused

PDF_Search_Qdrant / main.py

Upload 12 files (#1)

833b888 verified 3 months ago

1.58 kB

	import streamlit as st
	import tempfile
	from backend.pdf_utils import extract_chunks_with_langchain
	from backend.indexer import setup_collections, index_documents
	from backend.search import search_and_rerank

	st.title("Qdrant PDF Search")

	if "indexed" not in st.session_state:
	st.session_state.indexed = False

	uploaded = st.file_uploader("Upload a PDF", type=["pdf"])

	if uploaded:
	with tempfile.NamedTemporaryFile(delete=False) as tmp:
	tmp.write(uploaded.read())
	pdf_path = tmp.name

	st.success("PDF uploaded!")
	chunks = extract_chunks_with_langchain(pdf_path)

	if st.button("Index PDF in Qdrant Cloud"):
	with st.spinner("Indexing..."):
	setup_collections()
	index_documents(chunks)
	st.session_state.indexed = True # Mark as indexed
	st.success("Indexed successfully!")

	# Only show query input after indexing is done
	if st.session_state.indexed:
	query = st.text_input("Enter your search query:")

	if query:
	results = search_and_rerank(query)

	st.subheader("Raw Dense Results")
	for chunk, score in results["raw"]:
	st.markdown(f"{score:.3f} - {chunk[:200]}...")

	st.subheader("Cross-Encoder Reranked")
	for chunk, score in results["cross"]:
	st.markdown(f"{score:.3f} - {chunk[:200]}...")

	st.subheader("ColBERT Reranked")
	for chunk, score in results["colbert"]:
	st.markdown(f"{score:.3f} - {chunk[:200]}...")
	else:
	st.info("Please upload and index a PDF before searching.")