Spaces:

Menouar
/

LLM-FineTuning-Notebook-Generator

Runtime error

App Files Files Community

menouar commited on Mar 11, 2024

Commit

001fbb9

1 Parent(s): 09bdd6c

Improve comments

Browse files

Files changed (1) hide show

utils/notebook_generator.py +20 -22

utils/notebook_generator.py CHANGED Viewed

@@ -6,7 +6,7 @@ from utils import FTDataSet, falcon
 def create_install_libraries_cells(cells: list):
-    text_cell = nbf.v4.new_markdown_cell("### Installing Required Libraries!")
     text_cell1 = nbf.v4.new_markdown_cell(
         "Installing required libraries, including trl, transformers, accelerate, peft, datasets, "
         "and bitsandbytes.")
@@ -37,13 +37,13 @@ except ImportError:
 def create_install_flash_attention(cells: list):
     text_cell = nbf.v4.new_markdown_cell(
-        "### Installing Flash Attention")
     text_cell1 = nbf.v4.new_markdown_cell("Installing Flash Attention to reduce the memory "
                                           "and runtime cost of the attention layer, and improve the performance of "
                                           "the model training. Learn more at [FlashAttention]("
                                           "https://github.com/Dao-AILab/flash-attention/tree/main)."
                                           " Installing flash "
-                                          "attention from source can take quite a bit of time (10-45 "
                                           "minutes).")
     code = """
 import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'
@@ -59,7 +59,7 @@ import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not s
 def create_login_hf_cells(cells: list, should_login: bool = False, model_name: Optional[str] = None):
     text_cell = nbf.v4.new_markdown_cell(
-        "### Login to HF")
     text_1 = "Login with our `HF_TOKEN` in order to push the finetuned model to `huggingface_hub`."
@@ -85,7 +85,7 @@ login(
 def create_datasets_cells(cells: list, dataset: FTDataSet, seed: int):
-    text_cell = nbf.v4.new_markdown_cell("### Load and prepare the dataset")
     text = 'The dataset is already formatted in a conversational format, which is supported by [trl](' \
            'https://huggingface.co/docs/trl/index/). '
     text_format = """
@@ -119,7 +119,7 @@ def create_model_cells(cells: list, model_id: str, version: str, flash_attention
                        load_in_4bit: str, bnb_4bit_use_double_quant: bool, bnb_4bit_quant_type: str,
                        bnb_4bit_compute_dtype: str
                        ):
-    text_cell = nbf.v4.new_markdown_cell(f"### Load {model_id}-{version} for Finetuning")
     load_in_4bit_str = f"{load_in_4bit}=True"
     flash_attention_str = "attn_implementation='flash_attention_2',"
@@ -131,8 +131,10 @@ def create_model_cells(cells: list, model_id: str, version: str, flash_attention
         pad_value_str = ""
     auto_model_import = "AutoModelForCausalLM"
     if model_id == falcon.name:
         auto_model_import = "FalconForCausalLM"
     code = f"""
 import torch
@@ -152,7 +154,7 @@ bnb_config = BitsAndBytesConfig(
 model = {auto_model_import}.from_pretrained(
     model_id,
     device_map="auto",
-    trust_remote_code=True,
     {flash_attention_str}
     torch_dtype=torch.bfloat16,
     quantization_config=bnb_config
@@ -202,7 +204,7 @@ a 24GB GPU for fine-tuning.
 def create_lora_config_cells(cells: list, r: int, alpha: int, dropout: float, bias: str):
-    text_cell = nbf.v4.new_markdown_cell("### LoraConfig")
     code = f"""
 from peft import LoraConfig
@@ -232,7 +234,7 @@ def create_training_args_cells(cells: list, epochs, max_steps, logging_steps, pe
                                save_strategy, gradient_accumulation_steps, gradient_checkpointing,
                                learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir,
                                report_to, seed):
-    text_cell = nbf.v4.new_markdown_cell("### Setting the TrainingArguments")
     to_install = None
     if report_to == "all":
         to_install = "azure_ml comet_ml mlflow tensorboard wandb"
@@ -281,7 +283,7 @@ args = TrainingArguments(
 def create_sft_trainer_cells(cells: list, max_seq_length, packing):
     text_cell = nbf.v4.new_markdown_cell(
-        """### Setting the Supervised Finetuning Trainer (`SFTTrainer`)
 This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods.
 The trainer takes care of properly initializing the `PeftModel`.
@@ -311,7 +313,7 @@ trainer = SFTTrainer(
 def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir):
     if push_to_hub:
-        save_txt = f"and to the hub in 'User/{output_dir}'."
     else:
         save_txt = "."
@@ -323,7 +325,7 @@ def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, out
         f"""### Starting Training and Saving Model/Tokenizer
 We start training the model by calling the `train()` method on the trainer instance. This will start the training
-loop and train the model for `{epoch_str}`. The model will be automatically saved to the output directory ('temp_{output_dir}')
 {save_txt}
     """)
@@ -335,7 +337,7 @@ model.config.use_cache = False
 # start training
 trainer.train()
-# save the PEFT model
 trainer.save_model()
 """
     code_cell = nbf.v4.new_code_cell(code)
@@ -345,7 +347,7 @@ trainer.save_model()
 def create_free_gpu_cells(cells: list):
     text_cell = nbf.v4.new_markdown_cell(
-        """### Free the GPU Memory to Prepare for the Merging of the `PerfModel`
 """)
     code = f"""
@@ -362,7 +364,7 @@ torch.cuda.empty_cache()
 def create_merge_lora_cells(cells: list, output_dir):
     text_cell = nbf.v4.new_markdown_cell(
-        """### Merging LoRa Adapters into the Original Model
 While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the
 model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the
@@ -394,7 +396,7 @@ tokenizer.save_pretrained("{output_dir}")
 def merge_model_cells(cells: list, output_dir):
     text_cell = nbf.v4.new_markdown_cell(
-        f"### Copy all result folders from 'temp_{output_dir}' to '{output_dir}'.")
     code = f"""
 import os
@@ -402,15 +404,11 @@ import shutil
 source_folder = "temp_{output_dir}"
 destination_folder = "{output_dir}"
 os.makedirs(destination_folder, exist_ok=True)
 for item in os.listdir(source_folder):
     item_path = os.path.join(source_folder, item)
     if os.path.isdir(item_path):
         destination_path = os.path.join(destination_folder, item)
         shutil.copytree(item_path, destination_path)
 """
@@ -420,14 +418,14 @@ for item in os.listdir(source_folder):
 def push_to_hub_cells(cells: list, output_dir):
-    text = f"### Pushing '{output_dir}' to our Hugging Face account."
     code = f"""
 from huggingface_hub import HfApi, HfFolder, Repository
 # Instantiate the HfApi class
 api = HfApi()
-# Your Hugging Face repository
 repo_name = "{output_dir}"
 # Create a repository on the Hugging Face Hub

 def create_install_libraries_cells(cells: list):
+    text_cell = nbf.v4.new_markdown_cell("# Installing Required Libraries!")
     text_cell1 = nbf.v4.new_markdown_cell(
         "Installing required libraries, including trl, transformers, accelerate, peft, datasets, "
         "and bitsandbytes.")
 def create_install_flash_attention(cells: list):
     text_cell = nbf.v4.new_markdown_cell(
+        "# Installing Flash Attention")
     text_cell1 = nbf.v4.new_markdown_cell("Installing Flash Attention to reduce the memory "
                                           "and runtime cost of the attention layer, and improve the performance of "
                                           "the model training. Learn more at [FlashAttention]("
                                           "https://github.com/Dao-AILab/flash-attention/tree/main)."
                                           " Installing flash "
+                                          "attention from source can take quite a bit of time (~ "
                                           "minutes).")
     code = """
 import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'
 def create_login_hf_cells(cells: list, should_login: bool = False, model_name: Optional[str] = None):
     text_cell = nbf.v4.new_markdown_cell(
+        "# Login to HF")
     text_1 = "Login with our `HF_TOKEN` in order to push the finetuned model to `huggingface_hub`."
 def create_datasets_cells(cells: list, dataset: FTDataSet, seed: int):
+    text_cell = nbf.v4.new_markdown_cell("# Load and Prepare the Dataset")
     text = 'The dataset is already formatted in a conversational format, which is supported by [trl](' \
            'https://huggingface.co/docs/trl/index/). '
     text_format = """
                        load_in_4bit: str, bnb_4bit_use_double_quant: bool, bnb_4bit_quant_type: str,
                        bnb_4bit_compute_dtype: str
                        ):
+    text_cell = nbf.v4.new_markdown_cell(f"# Load **{model_id}-{version}** for Finetuning")
     load_in_4bit_str = f"{load_in_4bit}=True"
     flash_attention_str = "attn_implementation='flash_attention_2',"
         pad_value_str = ""
     auto_model_import = "AutoModelForCausalLM"
+    trust_code = "trust_remote_code=True,"
     if model_id == falcon.name:
         auto_model_import = "FalconForCausalLM"
+        trust_code = ""
     code = f"""
 import torch
 model = {auto_model_import}.from_pretrained(
     model_id,
     device_map="auto",
+    {trust_code}
     {flash_attention_str}
     torch_dtype=torch.bfloat16,
     quantization_config=bnb_config
 def create_lora_config_cells(cells: list, r: int, alpha: int, dropout: float, bias: str):
+    text_cell = nbf.v4.new_markdown_cell("# Setting LoraConfig")
     code = f"""
 from peft import LoraConfig
                                save_strategy, gradient_accumulation_steps, gradient_checkpointing,
                                learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir,
                                report_to, seed):
+    text_cell = nbf.v4.new_markdown_cell("# Setting the TrainingArguments")
     to_install = None
     if report_to == "all":
         to_install = "azure_ml comet_ml mlflow tensorboard wandb"
 def create_sft_trainer_cells(cells: list, max_seq_length, packing):
     text_cell = nbf.v4.new_markdown_cell(
+        """# Setting the Supervised Finetuning Trainer (`SFTTrainer`)
 This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods.
 The trainer takes care of properly initializing the `PeftModel`.
 def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir):
     if push_to_hub:
+        save_txt = f"and to the hub in **'User/{output_dir}'**."
     else:
         save_txt = "."
         f"""### Starting Training and Saving Model/Tokenizer
 We start training the model by calling the `train()` method on the trainer instance. This will start the training
+loop and train the model for `{epoch_str}`. The model will be automatically saved to the output directory (**'temp_{output_dir}'**)
 {save_txt}
     """)
 # start training
 trainer.train()
+# save the peft model
 trainer.save_model()
 """
     code_cell = nbf.v4.new_code_cell(code)
 def create_free_gpu_cells(cells: list):
     text_cell = nbf.v4.new_markdown_cell(
+        """# Free the GPU Memory to Prepare Merging Lora Adapters with the Base Model
 """)
     code = f"""
 def create_merge_lora_cells(cells: list, output_dir):
     text_cell = nbf.v4.new_markdown_cell(
+        """# Merging LoRa Adapters into the Original Model
 While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the
 model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the
 def merge_model_cells(cells: list, output_dir):
     text_cell = nbf.v4.new_markdown_cell(
+        f"# Copy all result folders from 'temp_{output_dir}' to '{output_dir}'")
     code = f"""
 import os
 source_folder = "temp_{output_dir}"
 destination_folder = "{output_dir}"
 os.makedirs(destination_folder, exist_ok=True)
 for item in os.listdir(source_folder):
     item_path = os.path.join(source_folder, item)
     if os.path.isdir(item_path):
         destination_path = os.path.join(destination_folder, item)
         shutil.copytree(item_path, destination_path)
 """
 def push_to_hub_cells(cells: list, output_dir):
+    text = f"# Pushing '{output_dir}' to our Hugging Face account."
     code = f"""
 from huggingface_hub import HfApi, HfFolder, Repository
 # Instantiate the HfApi class
 api = HfApi()
+# Our Hugging Face repository
 repo_name = "{output_dir}"
 # Create a repository on the Hugging Face Hub