File size: 403 Bytes
f7fef32
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from model2vec import StaticModel
from tokenizers import Tokenizer
import torch

model = StaticModel.from_pretrained("minishlab/potion-multilingual-128M")
embeddings = torch.from_numpy(model.embedding)

print("Embedding shape:", embeddings.shape)
bytes = embeddings.shape[0] * embeddings.shape[1] * 4

print("MiB:", bytes / 1024 / 1024)

tokenizer: Tokenizer = model.tokenizer
print(tokenizer.to_str())