File size: 1,073 Bytes
833b888 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from backend.qdrant import client
from backend.embed_models import dense_model, colbert_model, cross_encoder
def search_and_rerank(query, top_k=5):
query_vec = list(dense_model.query_embed(query))[0]
raw_results = client.query_points(
collection_name="pdf_dense",
using="embedding",
query=query_vec,
limit=top_k,
with_payload=True
)
chunks = [pt.payload["chunk"] for pt in raw_results.points]
cross_scores = list(cross_encoder.rerank(query, chunks))
cross_ranks = sorted(zip(chunks, cross_scores), key=lambda x: x[1], reverse=True)
query_colbert = list(colbert_model.query_embed(query))[0]
colbert_results = client.query_points(
collection_name="pdf_colbert",
query=query_colbert,
limit=top_k,
with_payload=True
)
colbert_ranks = [(pt.payload["chunk"], pt.score) for pt in colbert_results.points]
return {
"raw": [(pt.payload["chunk"], pt.score) for pt in raw_results.points],
"cross": cross_ranks,
"colbert": colbert_ranks
}
|