Spaces:
Runtime error
Runtime error
| """ colbert_utils.py | |
| Utilities for building (and using) a ColBERT (retrieval) model. | |
| :author: Didier Guillevic | |
| :email: [email protected] | |
| :creation: 2024-12-21 | |
| """ | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logging.basicConfig(level=logging.INFO) | |
| from ragatouille import RAGPretrainedModel | |
| def build_colbert_model( | |
| documents: list[str], | |
| metadatas: list[dict[str, str]], | |
| pretrained_model: str='antoinelouis/colbert-xm', | |
| index_name: str='colbert_index' | |
| ) -> RAGPretrainedModel: | |
| """Build a ColBERT model for retrieval. | |
| Args: | |
| documents: list of documents to index | |
| metadatas: list of metadata for each document | |
| index_name: name of the index built with given documents | |
| pretrined_model: name of the pretrained model to use | |
| Returns: | |
| the ColBERT retrieval model built witt the given documents. | |
| """ | |
| model = RAGPretrainedModel.from_pretrained(pretrained_model) | |
| model.index( | |
| collection=documents, | |
| #document_ids=document_ids, # no unique IDs at the moment | |
| document_metadatas=metadatas, | |
| index_name=index_name, | |
| max_document_length=180, | |
| split_documents=True, | |
| use_faiss=True # set to True if faiss working properly in current env | |
| ) | |
| return model | |