from backend.qdrant import client from backend.embed_models import dense_model, colbert_model from qdrant_client import models def setup_collections(): dense_dim = client.get_embedding_size("sentence-transformers/all-MiniLM-L6-v2") print("creating collection") client.recreate_collection( collection_name="pdf_dense", vectors_config={"embedding": models.VectorParams(size=dense_dim, distance=models.Distance.COSINE)} ) print("creating collection") client.recreate_collection( collection_name="pdf_colbert", vectors_config=models.VectorParams( size=128, distance=models.Distance.COSINE, multivector_config=models.MultiVectorConfig( comparator=models.MultiVectorComparator.MAX_SIM ) ) ) def index_documents(chunks): dense_embs = list(dense_model.embed(chunks)) colbert_embs = list(colbert_model.embed(chunks)) dense_points = [ models.PointStruct(id=idx, payload={"chunk": chunk}, vector={"embedding": vec}) for idx, (chunk, vec) in enumerate(zip(chunks, dense_embs)) ] colbert_points = [ models.PointStruct(id=idx, payload={"chunk": chunk}, vector=vec) for idx, (chunk, vec) in enumerate(zip(chunks, colbert_embs)) ] client.upload_points("pdf_dense", dense_points) client.upload_points("pdf_colbert", colbert_points)