Vallabhpatil777's picture
Upload 11 files
ff58d3c verified
from backend.qdrant import client
from backend.embed_models import dense_model, colbert_model
from qdrant_client import models
def setup_collections():
dense_dim = client.get_embedding_size("sentence-transformers/all-MiniLM-L6-v2")
print("creating collection")
client.recreate_collection(
collection_name="pdf_dense",
vectors_config={"embedding": models.VectorParams(size=dense_dim, distance=models.Distance.COSINE)}
)
print("creating collection")
client.recreate_collection(
collection_name="pdf_colbert",
vectors_config=models.VectorParams(
size=128,
distance=models.Distance.COSINE,
multivector_config=models.MultiVectorConfig(
comparator=models.MultiVectorComparator.MAX_SIM
)
)
)
def index_documents(chunks):
dense_embs = list(dense_model.embed(chunks))
colbert_embs = list(colbert_model.embed(chunks))
dense_points = [
models.PointStruct(id=idx, payload={"chunk": chunk}, vector={"embedding": vec})
for idx, (chunk, vec) in enumerate(zip(chunks, dense_embs))
]
colbert_points = [
models.PointStruct(id=idx, payload={"chunk": chunk}, vector=vec)
for idx, (chunk, vec) in enumerate(zip(chunks, colbert_embs))
]
client.upload_points("pdf_dense", dense_points)
client.upload_points("pdf_colbert", colbert_points)