Fix and document test_datasets (#1228)
Browse files* Make sure test_dataset are used and treat val_set_size.
* Add test_datasets docs.
* Apply suggestions from code review
---------
Co-authored-by: Wing Lian <[email protected]>
- README.md +11 -0
- src/axolotl/core/trainer_builder.py +2 -1
- src/axolotl/utils/data.py +1 -1
README.md
CHANGED
|
@@ -607,6 +607,17 @@ datasets:
|
|
| 607 |
# For `completion` datsets only, uses the provided field instead of `text` column
|
| 608 |
field:
|
| 609 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 610 |
# use RL training: dpo, ipo, kto_pair
|
| 611 |
rl:
|
| 612 |
|
|
|
|
| 607 |
# For `completion` datsets only, uses the provided field instead of `text` column
|
| 608 |
field:
|
| 609 |
|
| 610 |
+
# A list of one or more datasets to eval the model with.
|
| 611 |
+
# You can use either test_datasets, or val_set_size, but not both.
|
| 612 |
+
test_datasets:
|
| 613 |
+
- path: /workspace/data/eval.jsonl
|
| 614 |
+
ds_type: json
|
| 615 |
+
# You need to specify a split. For "json" datasets the default split is called "train".
|
| 616 |
+
split: train
|
| 617 |
+
type: completion
|
| 618 |
+
data_files:
|
| 619 |
+
- /workspace/data/eval.jsonl
|
| 620 |
+
|
| 621 |
# use RL training: dpo, ipo, kto_pair
|
| 622 |
rl:
|
| 623 |
|
src/axolotl/core/trainer_builder.py
CHANGED
|
@@ -735,7 +735,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|
| 735 |
elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False:
|
| 736 |
training_arguments_kwargs["dataloader_drop_last"] = True
|
| 737 |
|
| 738 |
-
if self.cfg.val_set_size == 0:
|
| 739 |
# no eval set, so don't eval
|
| 740 |
training_arguments_kwargs["evaluation_strategy"] = "no"
|
| 741 |
elif self.cfg.eval_steps:
|
|
@@ -822,6 +822,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|
| 822 |
self.cfg.load_best_model_at_end is not False
|
| 823 |
or self.cfg.early_stopping_patience
|
| 824 |
)
|
|
|
|
| 825 |
and self.cfg.val_set_size > 0
|
| 826 |
and self.cfg.save_steps
|
| 827 |
and self.cfg.eval_steps
|
|
|
|
| 735 |
elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False:
|
| 736 |
training_arguments_kwargs["dataloader_drop_last"] = True
|
| 737 |
|
| 738 |
+
if not self.cfg.test_datasets and self.cfg.val_set_size == 0:
|
| 739 |
# no eval set, so don't eval
|
| 740 |
training_arguments_kwargs["evaluation_strategy"] = "no"
|
| 741 |
elif self.cfg.eval_steps:
|
|
|
|
| 822 |
self.cfg.load_best_model_at_end is not False
|
| 823 |
or self.cfg.early_stopping_patience
|
| 824 |
)
|
| 825 |
+
and not self.cfg.test_datasets
|
| 826 |
and self.cfg.val_set_size > 0
|
| 827 |
and self.cfg.save_steps
|
| 828 |
and self.cfg.eval_steps
|
src/axolotl/utils/data.py
CHANGED
|
@@ -440,7 +440,7 @@ def load_prepare_datasets(
|
|
| 440 |
split="train",
|
| 441 |
) -> Tuple[Dataset, Dataset, List[Prompter]]:
|
| 442 |
dataset, prompters = load_tokenized_prepared_datasets(
|
| 443 |
-
tokenizer, cfg, default_dataset_prepared_path
|
| 444 |
)
|
| 445 |
|
| 446 |
if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
|
|
|
|
| 440 |
split="train",
|
| 441 |
) -> Tuple[Dataset, Dataset, List[Prompter]]:
|
| 442 |
dataset, prompters = load_tokenized_prepared_datasets(
|
| 443 |
+
tokenizer, cfg, default_dataset_prepared_path, split=split
|
| 444 |
)
|
| 445 |
|
| 446 |
if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
|