whoops, gt vs lt
Browse files
src/axolotl/utils/data.py
CHANGED
|
@@ -213,7 +213,7 @@ def load_prepare_datasets(tokenizer, cfg, default_dataset_prepared_path):
|
|
| 213 |
[
|
| 214 |
d
|
| 215 |
for d in dataset
|
| 216 |
-
if len(d["input_ids"])
|
| 217 |
and len(d["input_ids"]) > 0
|
| 218 |
and len(d["input_ids"]) == len(d["attention_mask"])
|
| 219 |
and len(d["input_ids"]) == len(d["labels"])
|
|
|
|
| 213 |
[
|
| 214 |
d
|
| 215 |
for d in dataset
|
| 216 |
+
if len(d["input_ids"]) < cfg.sequence_len
|
| 217 |
and len(d["input_ids"]) > 0
|
| 218 |
and len(d["input_ids"]) == len(d["attention_mask"])
|
| 219 |
and len(d["input_ids"]) == len(d["labels"])
|