fix: revert local dir dataset load (#878)
Browse files
src/axolotl/utils/data.py
CHANGED
|
@@ -242,7 +242,14 @@ def load_tokenized_prepared_datasets(
|
|
| 242 |
local_path = Path(config_dataset.path)
|
| 243 |
if local_path.exists():
|
| 244 |
if local_path.is_dir():
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
elif local_path.is_file():
|
| 247 |
ds_type = get_ds_type(config_dataset)
|
| 248 |
|
|
|
|
| 242 |
local_path = Path(config_dataset.path)
|
| 243 |
if local_path.exists():
|
| 244 |
if local_path.is_dir():
|
| 245 |
+
# TODO dirs with arrow or parquet files could be loaded with `load_from_disk`
|
| 246 |
+
ds = load_dataset(
|
| 247 |
+
config_dataset.path,
|
| 248 |
+
name=config_dataset.name,
|
| 249 |
+
data_files=config_dataset.data_files,
|
| 250 |
+
streaming=False,
|
| 251 |
+
split=None,
|
| 252 |
+
)
|
| 253 |
elif local_path.is_file():
|
| 254 |
ds_type = get_ds_type(config_dataset)
|
| 255 |
|