|
|
--- |
|
|
library_name: transformers |
|
|
tags: [] |
|
|
--- |
|
|
|
|
|
# Model Card for Super Tiny Bert |
|
|
This is a super tiny Bert model for testing purposes. |
|
|
|
|
|
## Model Details |
|
|
This model has been generated using: |
|
|
|
|
|
``` |
|
|
from transformers import BertTokenizer, BertModel, BertConfig |
|
|
|
|
|
# Define a tiny BERT configuration |
|
|
config = BertConfig( |
|
|
vocab_size=30, |
|
|
hidden_size=8, |
|
|
num_hidden_layers=2, |
|
|
num_attention_heads=2, |
|
|
intermediate_size=8, |
|
|
max_position_embeddings=8, |
|
|
) |
|
|
|
|
|
# Initialize a tiny BERT model with the custom configuration |
|
|
model = BertModel(config) |
|
|
|
|
|
# Create a custom vocabulary |
|
|
vocab = { |
|
|
"[PAD]": 0, |
|
|
"[UNK]": 1, |
|
|
"[CLS]": 2, |
|
|
"[SEP]": 3, |
|
|
"[MASK]": 4, |
|
|
"hello": 5, |
|
|
"how": 6, |
|
|
"are": 7, |
|
|
"you": 8, |
|
|
"?": 9, |
|
|
"i": 10, |
|
|
"am": 11, |
|
|
"fine": 12, |
|
|
"thanks": 13, |
|
|
"and": 14, |
|
|
"good": 15, |
|
|
"morning": 16, |
|
|
"evening": 17, |
|
|
"night": 18, |
|
|
"yes": 19, |
|
|
"no": 20, |
|
|
"please": 21, |
|
|
"thank": 22, |
|
|
"welcome": 23, |
|
|
"sorry": 24, |
|
|
"bye": 25, |
|
|
"see": 26, |
|
|
"later": 27, |
|
|
"take": 28, |
|
|
"care": 29, |
|
|
} |
|
|
|
|
|
# Save the vocabulary to a file |
|
|
vocab_file = "vocab.txt" |
|
|
with open(vocab_file, "w") as f: |
|
|
for token, index in sorted(vocab.items(), key=lambda item: item[1]): |
|
|
f.write(f"{token}\n") |
|
|
|
|
|
# Initialize the tokenizer with the custom vocabulary |
|
|
tokenizer = BertTokenizer(vocab_file=vocab_file) |
|
|
|
|
|
# Example usage: Tokenize input text |
|
|
text = "Hello, how are you?" |
|
|
inputs = tokenizer(text, return_tensors="pt") |
|
|
|
|
|
# Forward pass through the model |
|
|
outputs = model(**inputs) |
|
|
|
|
|
# Extract the last hidden states |
|
|
last_hidden_states = outputs.last_hidden_state |
|
|
|
|
|
print("Last hidden states shape:", last_hidden_states.shape) |
|
|
|
|
|
# Save the tokenizer and model to the Hugging Face Hub |
|
|
model_name = "flexsystems/flex-e2e-super-tiny-bert-model" |
|
|
tokenizer.push_to_hub(model_name, private=False) |
|
|
model.push_to_hub(model_name, private=False) |
|
|
|
|
|
print(f"Tiny BERT model and tokenizer saved to the Hugging Face Hub as '{model_name}'.") |
|
|
|
|
|
``` |
|
|
|