don't resize embeddings to multiples of 32x by default
Browse files- README.md +3 -0
- src/axolotl/utils/models.py +5 -1
README.md
CHANGED
|
@@ -322,6 +322,9 @@ tokenizer_type: AutoTokenizer
|
|
| 322 |
trust_remote_code:
|
| 323 |
# use_fast option for tokenizer loading from_pretrained, default to True
|
| 324 |
tokenizer_use_fast:
|
|
|
|
|
|
|
|
|
|
| 325 |
|
| 326 |
# whether you are training a 4-bit GPTQ quantized model
|
| 327 |
gptq: true
|
|
|
|
| 322 |
trust_remote_code:
|
| 323 |
# use_fast option for tokenizer loading from_pretrained, default to True
|
| 324 |
tokenizer_use_fast:
|
| 325 |
+
# resize the model embeddings when new tokens are added to multiples of 32
|
| 326 |
+
# this is reported to improve training speed on some models
|
| 327 |
+
resize_token_embeddings_to_32x:
|
| 328 |
|
| 329 |
# whether you are training a 4-bit GPTQ quantized model
|
| 330 |
gptq: true
|
src/axolotl/utils/models.py
CHANGED
|
@@ -301,7 +301,11 @@ def load_model(
|
|
| 301 |
**model_kwargs,
|
| 302 |
)
|
| 303 |
|
| 304 |
-
embeddings_len =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
model.resize_token_embeddings(embeddings_len)
|
| 306 |
|
| 307 |
if (
|
|
|
|
| 301 |
**model_kwargs,
|
| 302 |
)
|
| 303 |
|
| 304 |
+
embeddings_len = (
|
| 305 |
+
math.ceil(len(tokenizer) / 32) * 32
|
| 306 |
+
if cfg.resize_token_embeddings_to_32x
|
| 307 |
+
else len(tokenizer)
|
| 308 |
+
)
|
| 309 |
model.resize_token_embeddings(embeddings_len)
|
| 310 |
|
| 311 |
if (
|