| from backend.qdrant import client | |
| from backend.embed_models import dense_model, colbert_model, cross_encoder | |
| def search_and_rerank(query, top_k=5): | |
| query_vec = list(dense_model.query_embed(query))[0] | |
| raw_results = client.query_points( | |
| collection_name="pdf_dense", | |
| using="embedding", | |
| query=query_vec, | |
| limit=top_k, | |
| with_payload=True | |
| ) | |
| chunks = [pt.payload["chunk"] for pt in raw_results.points] | |
| cross_scores = list(cross_encoder.rerank(query, chunks)) | |
| cross_ranks = sorted(zip(chunks, cross_scores), key=lambda x: x[1], reverse=True) | |
| query_colbert = list(colbert_model.query_embed(query))[0] | |
| colbert_results = client.query_points( | |
| collection_name="pdf_colbert", | |
| query=query_colbert, | |
| limit=top_k, | |
| with_payload=True | |
| ) | |
| colbert_ranks = [(pt.payload["chunk"], pt.score) for pt in colbert_results.points] | |
| return { | |
| "raw": [(pt.payload["chunk"], pt.score) for pt in raw_results.points], | |
| "cross": cross_ranks, | |
| "colbert": colbert_ranks | |
| } | |