new llama-2 default settings (#370)
Browse files* new default settings
* fix whitespace
* rm max packed sequence length
---------
Co-authored-by: Mads Henrichsen <[email protected]>
- examples/llama-2/lora.yml +3 -4
- examples/llama-2/qlora.yml +4 -4
examples/llama-2/lora.yml
CHANGED
|
@@ -15,7 +15,7 @@ val_set_size: 0.01
|
|
| 15 |
output_dir: ./lora-out
|
| 16 |
|
| 17 |
sequence_len: 4096
|
| 18 |
-
|
| 19 |
|
| 20 |
adapter: lora
|
| 21 |
lora_model_dir:
|
|
@@ -49,8 +49,8 @@ early_stopping_patience:
|
|
| 49 |
resume_from_checkpoint:
|
| 50 |
local_rank:
|
| 51 |
logging_steps: 1
|
| 52 |
-
xformers_attention:
|
| 53 |
-
flash_attention:
|
| 54 |
|
| 55 |
warmup_steps: 10
|
| 56 |
eval_steps: 20
|
|
@@ -64,4 +64,3 @@ special_tokens:
|
|
| 64 |
bos_token: "<s>"
|
| 65 |
eos_token: "</s>"
|
| 66 |
unk_token: "<unk>"
|
| 67 |
-
pad_token: "<pad>"
|
|
|
|
| 15 |
output_dir: ./lora-out
|
| 16 |
|
| 17 |
sequence_len: 4096
|
| 18 |
+
sample_packing: true
|
| 19 |
|
| 20 |
adapter: lora
|
| 21 |
lora_model_dir:
|
|
|
|
| 49 |
resume_from_checkpoint:
|
| 50 |
local_rank:
|
| 51 |
logging_steps: 1
|
| 52 |
+
xformers_attention:
|
| 53 |
+
flash_attention: true
|
| 54 |
|
| 55 |
warmup_steps: 10
|
| 56 |
eval_steps: 20
|
|
|
|
| 64 |
bos_token: "<s>"
|
| 65 |
eos_token: "</s>"
|
| 66 |
unk_token: "<unk>"
|
|
|
examples/llama-2/qlora.yml
CHANGED
|
@@ -18,7 +18,8 @@ adapter: qlora
|
|
| 18 |
lora_model_dir:
|
| 19 |
|
| 20 |
sequence_len: 4096
|
| 21 |
-
|
|
|
|
| 22 |
lora_r: 32
|
| 23 |
lora_alpha: 16
|
| 24 |
lora_dropout: 0.05
|
|
@@ -50,8 +51,8 @@ early_stopping_patience:
|
|
| 50 |
resume_from_checkpoint:
|
| 51 |
local_rank:
|
| 52 |
logging_steps: 1
|
| 53 |
-
xformers_attention:
|
| 54 |
-
flash_attention:
|
| 55 |
|
| 56 |
warmup_steps: 10
|
| 57 |
eval_steps: 20
|
|
@@ -65,4 +66,3 @@ special_tokens:
|
|
| 65 |
bos_token: "<s>"
|
| 66 |
eos_token: "</s>"
|
| 67 |
unk_token: "<unk>"
|
| 68 |
-
pad_token: "<pad>"
|
|
|
|
| 18 |
lora_model_dir:
|
| 19 |
|
| 20 |
sequence_len: 4096
|
| 21 |
+
sample_packing: true
|
| 22 |
+
|
| 23 |
lora_r: 32
|
| 24 |
lora_alpha: 16
|
| 25 |
lora_dropout: 0.05
|
|
|
|
| 51 |
resume_from_checkpoint:
|
| 52 |
local_rank:
|
| 53 |
logging_steps: 1
|
| 54 |
+
xformers_attention:
|
| 55 |
+
flash_attention: true
|
| 56 |
|
| 57 |
warmup_steps: 10
|
| 58 |
eval_steps: 20
|
|
|
|
| 66 |
bos_token: "<s>"
|
| 67 |
eos_token: "</s>"
|
| 68 |
unk_token: "<unk>"
|
|
|