File size: 1,073 Bytes
833b888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from backend.qdrant import client
from backend.embed_models import dense_model, colbert_model, cross_encoder

def search_and_rerank(query, top_k=5):
    query_vec = list(dense_model.query_embed(query))[0]
    raw_results = client.query_points(
        collection_name="pdf_dense",
        using="embedding",
        query=query_vec,
        limit=top_k,
        with_payload=True
    )
    chunks = [pt.payload["chunk"] for pt in raw_results.points]
    cross_scores = list(cross_encoder.rerank(query, chunks))
    cross_ranks = sorted(zip(chunks, cross_scores), key=lambda x: x[1], reverse=True)

    query_colbert = list(colbert_model.query_embed(query))[0]
    colbert_results = client.query_points(
        collection_name="pdf_colbert",
        query=query_colbert,
        limit=top_k,
        with_payload=True
    )
    colbert_ranks = [(pt.payload["chunk"], pt.score) for pt in colbert_results.points]

    return {
        "raw": [(pt.payload["chunk"], pt.score) for pt in raw_results.points],
        "cross": cross_ranks,
        "colbert": colbert_ranks
    }