Spaces:

Dovakiins
/

qwerrwe

Build error

tmm1 commited on Aug 9, 2023

Commit

f5c11f8

unverified ·

2 Parent(s): 176b888 b4d1d22

Merge pull request #350 from tmm1/group-len-false-examples

Files changed (6) hide show

README.md CHANGED Viewed

@@ -426,7 +426,9 @@ save_safetensors:
 # whether to mask out or include the human's prompt from the training labels
 train_on_inputs: false
-# don't use this, leads to wonky training (according to someone on the internet)
 group_by_length: false
 # Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing

 # whether to mask out or include the human's prompt from the training labels
 train_on_inputs: false
+# group similarly sized data to minimize padding
+# may be slower to start, as it must download and sort the entire dataset
+# note that training loss may have an oscillating pattern with this enabled
 group_by_length: false
 # Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing

examples/cerebras/qlora.yml CHANGED Viewed

@@ -35,7 +35,7 @@ torchdistx_path:
 lr_scheduler: cosine
 learning_rate: 0.0002
 train_on_inputs: false
-group_by_length: true
 bf16: true
 fp16: false
 tf32: true

 lr_scheduler: cosine
 learning_rate: 0.0002
 train_on_inputs: false
+group_by_length: false
 bf16: true
 fp16: false
 tf32: true

examples/gptj/qlora.yml CHANGED Viewed

@@ -32,7 +32,7 @@ torchdistx_path:
 lr_scheduler: cosine
 learning_rate: 0.0001
 train_on_inputs: false
-group_by_length: true
 bf16: true
 fp16: false
 tf32: true

 lr_scheduler: cosine
 learning_rate: 0.0001
 train_on_inputs: false
+group_by_length: false
 bf16: true
 fp16: false
 tf32: true

examples/llama-2/lora.yml CHANGED Viewed

@@ -38,7 +38,7 @@ lr_scheduler: cosine
 learning_rate: 0.0002
 train_on_inputs: false
-group_by_length: true
 bf16: true
 fp16: false
 tf32: false

 learning_rate: 0.0002
 train_on_inputs: false
+group_by_length: false
 bf16: true
 fp16: false
 tf32: false

examples/llama-2/qlora.yml CHANGED Viewed

@@ -39,7 +39,7 @@ lr_scheduler: cosine
 learning_rate: 0.0002
 train_on_inputs: false
-group_by_length: true
 bf16: true
 fp16: false
 tf32: false

 learning_rate: 0.0002
 train_on_inputs: false
+group_by_length: false
 bf16: true
 fp16: false
 tf32: false

examples/openllama-3b/qlora.yml CHANGED Viewed

@@ -34,7 +34,7 @@ torchdistx_path:
 lr_scheduler: cosine
 learning_rate: 0.0002
 train_on_inputs: false
-group_by_length: true
 bf16: true
 fp16: false
 tf32: true

 lr_scheduler: cosine
 learning_rate: 0.0002
 train_on_inputs: false
+group_by_length: false
 bf16: true
 fp16: false
 tf32: true