improve how we setup eval/save strategies and steps (#547)
Browse files* setup save end eval strategies to be consistent with trainer logic
* add comments
* better eval handling
- src/axolotl/utils/trainer.py +18 -6
src/axolotl/utils/trainer.py
CHANGED
|
@@ -567,21 +567,33 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
|
|
| 567 |
"sample_packing_efficiency"
|
| 568 |
] = cfg.sample_packing_eff_est
|
| 569 |
|
| 570 |
-
if cfg.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
training_arguments_kwargs["evaluation_strategy"] = "no"
|
|
|
|
|
|
|
|
|
|
| 572 |
elif cfg.eval_steps:
|
|
|
|
| 573 |
training_arguments_kwargs["evaluation_strategy"] = "steps"
|
| 574 |
training_arguments_kwargs["eval_steps"] = cfg.eval_steps
|
| 575 |
else:
|
| 576 |
-
# we have an eval set, but no steps defined, use epoch
|
| 577 |
training_arguments_kwargs["evaluation_strategy"] = "epoch"
|
| 578 |
|
| 579 |
-
if cfg.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
training_arguments_kwargs["save_strategy"] = cfg.save_strategy
|
| 581 |
else:
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
)
|
| 585 |
|
| 586 |
if cfg.do_bench_eval:
|
| 587 |
training_arguments_kwargs["do_bench_eval"] = cfg.do_bench_eval
|
|
|
|
| 567 |
"sample_packing_efficiency"
|
| 568 |
] = cfg.sample_packing_eff_est
|
| 569 |
|
| 570 |
+
if cfg.eval_steps and cfg.evaluation_strategy:
|
| 571 |
+
# assume if the user set both, they know what they're doing
|
| 572 |
+
training_arguments_kwargs["evaluation_strategy"] = cfg.evaluation_strategy
|
| 573 |
+
training_arguments_kwargs["eval_steps"] = cfg.eval_steps
|
| 574 |
+
elif cfg.val_set_size == 0:
|
| 575 |
+
# no eval set, so don't eval
|
| 576 |
training_arguments_kwargs["evaluation_strategy"] = "no"
|
| 577 |
+
elif cfg.evaluation_strategy and cfg.evaluation_strategy in ["epoch", "no"]:
|
| 578 |
+
# if explicitly set for epoch, just set, and eval steps don't matter
|
| 579 |
+
training_arguments_kwargs["evaluation_strategy"] = cfg.evaluation_strategy
|
| 580 |
elif cfg.eval_steps:
|
| 581 |
+
# steps isn't used w/ epochs
|
| 582 |
training_arguments_kwargs["evaluation_strategy"] = "steps"
|
| 583 |
training_arguments_kwargs["eval_steps"] = cfg.eval_steps
|
| 584 |
else:
|
| 585 |
+
# we have an eval set, but no steps defined, default to use epoch
|
| 586 |
training_arguments_kwargs["evaluation_strategy"] = "epoch"
|
| 587 |
|
| 588 |
+
if cfg.save_steps:
|
| 589 |
+
# save_steps implies save_strategy of steps
|
| 590 |
+
training_arguments_kwargs["save_strategy"] = "steps"
|
| 591 |
+
training_arguments_kwargs["save_steps"] = cfg.save_steps
|
| 592 |
+
elif cfg.save_strategy:
|
| 593 |
training_arguments_kwargs["save_strategy"] = cfg.save_strategy
|
| 594 |
else:
|
| 595 |
+
# default to saving each epoch if not defined
|
| 596 |
+
training_arguments_kwargs["save_strategy"] = "epoch"
|
|
|
|
| 597 |
|
| 598 |
if cfg.do_bench_eval:
|
| 599 |
training_arguments_kwargs["do_bench_eval"] = cfg.do_bench_eval
|