Spaces:

Dovakiins
/

qwerrwe

Build error

winglian commited on Apr 15, 2023

Commit

937f44f

1 Parent(s): 902dd0a

helpful info output

Files changed (3) hide show

configs/llama_65B_alpaca.yml CHANGED Viewed

@@ -1,4 +1,4 @@
-base_model: huggyllama/llama-7b
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: true

+base_model: huggyllama/llama-65b
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: true

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-git+https://github.com/huggingface/transformers.git
 git+https://github.com/huggingface/peft.git
 attrdict
 fire
 PyYAML==6.0
@@ -12,4 +12,3 @@ wandb
 flash-attn
 deepspeed
 einops

 git+https://github.com/huggingface/peft.git
+git+https://github.com/huggingface/transformers.git
 attrdict
 fire
 PyYAML==6.0
 flash-attn
 deepspeed
 einops

scripts/finetune.py CHANGED Viewed

@@ -258,7 +258,9 @@ def train(
     datasets = []
     if not isinstance(cfg.datasets, list) and isinstance(cfg.datasets, str):
         # assumption that we are loading a previously saved/cached dataset
         dataset = load_from_disk(cfg.datasets)
     else:
         for d in cfg.datasets:
             ds: IterableDataset = load_dataset(
@@ -289,6 +291,7 @@ def train(
         dataset = Dataset.from_list(
             [_ for _ in constant_len_dataset]
         ).train_test_split(test_size=cfg.val_set_size, shuffle=True, seed=42)
         dataset.save_to_disk("data/last_run")
     train_dataset = dataset["train"]

     datasets = []
     if not isinstance(cfg.datasets, list) and isinstance(cfg.datasets, str):
         # assumption that we are loading a previously saved/cached dataset
+        print("Loading prepared dataset from disk...")
         dataset = load_from_disk(cfg.datasets)
+        print("Prepared dataset loaded from disk...")
     else:
         for d in cfg.datasets:
             ds: IterableDataset = load_dataset(
         dataset = Dataset.from_list(
             [_ for _ in constant_len_dataset]
         ).train_test_split(test_size=cfg.val_set_size, shuffle=True, seed=42)
+        print("Saving prepared dataset to disk...")
         dataset.save_to_disk("data/last_run")
     train_dataset = dataset["train"]