|
|
import streamlit as st |
|
|
import tempfile |
|
|
from backend.pdf_utils import extract_chunks_with_langchain |
|
|
from backend.indexer import setup_collections, index_documents |
|
|
from backend.search import search_and_rerank |
|
|
|
|
|
st.title("Qdrant PDF Search") |
|
|
|
|
|
if "indexed" not in st.session_state: |
|
|
st.session_state.indexed = False |
|
|
|
|
|
uploaded = st.file_uploader("Upload a PDF", type=["pdf"]) |
|
|
|
|
|
if uploaded: |
|
|
with tempfile.NamedTemporaryFile(delete=False) as tmp: |
|
|
tmp.write(uploaded.read()) |
|
|
pdf_path = tmp.name |
|
|
|
|
|
st.success("PDF uploaded!") |
|
|
chunks = extract_chunks_with_langchain(pdf_path) |
|
|
|
|
|
if st.button("Index PDF in Qdrant Cloud"): |
|
|
with st.spinner("Indexing..."): |
|
|
setup_collections() |
|
|
index_documents(chunks) |
|
|
st.session_state.indexed = True |
|
|
st.success("Indexed successfully!") |
|
|
|
|
|
|
|
|
if st.session_state.indexed: |
|
|
query = st.text_input("Enter your search query:") |
|
|
|
|
|
if query: |
|
|
results = search_and_rerank(query) |
|
|
|
|
|
st.subheader("Raw Dense Results") |
|
|
for chunk, score in results["raw"]: |
|
|
st.markdown(f"**{score:.3f}** - {chunk[:200]}...") |
|
|
|
|
|
st.subheader("Cross-Encoder Reranked") |
|
|
for chunk, score in results["cross"]: |
|
|
st.markdown(f"**{score:.3f}** - {chunk[:200]}...") |
|
|
|
|
|
st.subheader("ColBERT Reranked") |
|
|
for chunk, score in results["colbert"]: |
|
|
st.markdown(f"**{score:.3f}** - {chunk[:200]}...") |
|
|
else: |
|
|
st.info("Please upload and index a PDF before searching.") |
|
|
|