| from model2vec import StaticModel | |
| from tokenizers import Tokenizer | |
| import torch | |
| model = StaticModel.from_pretrained("minishlab/potion-multilingual-128M") | |
| embeddings = torch.from_numpy(model.embedding) | |
| print("Embedding shape:", embeddings.shape) | |
| bytes = embeddings.shape[0] * embeddings.shape[1] * 4 | |
| print("MiB:", bytes / 1024 / 1024) | |
| tokenizer: Tokenizer = model.tokenizer | |
| print(tokenizer.to_str()) | |