Spaces:
Runtime error
Runtime error
menouar
commited on
Commit
·
001fbb9
1
Parent(s):
09bdd6c
Improve comments
Browse files- utils/notebook_generator.py +20 -22
utils/notebook_generator.py
CHANGED
|
@@ -6,7 +6,7 @@ from utils import FTDataSet, falcon
|
|
| 6 |
|
| 7 |
|
| 8 |
def create_install_libraries_cells(cells: list):
|
| 9 |
-
text_cell = nbf.v4.new_markdown_cell("
|
| 10 |
text_cell1 = nbf.v4.new_markdown_cell(
|
| 11 |
"Installing required libraries, including trl, transformers, accelerate, peft, datasets, "
|
| 12 |
"and bitsandbytes.")
|
|
@@ -37,13 +37,13 @@ except ImportError:
|
|
| 37 |
|
| 38 |
def create_install_flash_attention(cells: list):
|
| 39 |
text_cell = nbf.v4.new_markdown_cell(
|
| 40 |
-
"
|
| 41 |
text_cell1 = nbf.v4.new_markdown_cell("Installing Flash Attention to reduce the memory "
|
| 42 |
"and runtime cost of the attention layer, and improve the performance of "
|
| 43 |
"the model training. Learn more at [FlashAttention]("
|
| 44 |
"https://github.com/Dao-AILab/flash-attention/tree/main)."
|
| 45 |
" Installing flash "
|
| 46 |
-
"attention from source can take quite a bit of time (
|
| 47 |
"minutes).")
|
| 48 |
code = """
|
| 49 |
import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'
|
|
@@ -59,7 +59,7 @@ import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not s
|
|
| 59 |
|
| 60 |
def create_login_hf_cells(cells: list, should_login: bool = False, model_name: Optional[str] = None):
|
| 61 |
text_cell = nbf.v4.new_markdown_cell(
|
| 62 |
-
"
|
| 63 |
|
| 64 |
text_1 = "Login with our `HF_TOKEN` in order to push the finetuned model to `huggingface_hub`."
|
| 65 |
|
|
@@ -85,7 +85,7 @@ login(
|
|
| 85 |
|
| 86 |
|
| 87 |
def create_datasets_cells(cells: list, dataset: FTDataSet, seed: int):
|
| 88 |
-
text_cell = nbf.v4.new_markdown_cell("
|
| 89 |
text = 'The dataset is already formatted in a conversational format, which is supported by [trl](' \
|
| 90 |
'https://huggingface.co/docs/trl/index/). '
|
| 91 |
text_format = """
|
|
@@ -119,7 +119,7 @@ def create_model_cells(cells: list, model_id: str, version: str, flash_attention
|
|
| 119 |
load_in_4bit: str, bnb_4bit_use_double_quant: bool, bnb_4bit_quant_type: str,
|
| 120 |
bnb_4bit_compute_dtype: str
|
| 121 |
):
|
| 122 |
-
text_cell = nbf.v4.new_markdown_cell(f"
|
| 123 |
load_in_4bit_str = f"{load_in_4bit}=True"
|
| 124 |
|
| 125 |
flash_attention_str = "attn_implementation='flash_attention_2',"
|
|
@@ -131,8 +131,10 @@ def create_model_cells(cells: list, model_id: str, version: str, flash_attention
|
|
| 131 |
pad_value_str = ""
|
| 132 |
|
| 133 |
auto_model_import = "AutoModelForCausalLM"
|
|
|
|
| 134 |
if model_id == falcon.name:
|
| 135 |
auto_model_import = "FalconForCausalLM"
|
|
|
|
| 136 |
|
| 137 |
code = f"""
|
| 138 |
import torch
|
|
@@ -152,7 +154,7 @@ bnb_config = BitsAndBytesConfig(
|
|
| 152 |
model = {auto_model_import}.from_pretrained(
|
| 153 |
model_id,
|
| 154 |
device_map="auto",
|
| 155 |
-
|
| 156 |
{flash_attention_str}
|
| 157 |
torch_dtype=torch.bfloat16,
|
| 158 |
quantization_config=bnb_config
|
|
@@ -202,7 +204,7 @@ a 24GB GPU for fine-tuning.
|
|
| 202 |
|
| 203 |
|
| 204 |
def create_lora_config_cells(cells: list, r: int, alpha: int, dropout: float, bias: str):
|
| 205 |
-
text_cell = nbf.v4.new_markdown_cell("
|
| 206 |
code = f"""
|
| 207 |
from peft import LoraConfig
|
| 208 |
|
|
@@ -232,7 +234,7 @@ def create_training_args_cells(cells: list, epochs, max_steps, logging_steps, pe
|
|
| 232 |
save_strategy, gradient_accumulation_steps, gradient_checkpointing,
|
| 233 |
learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir,
|
| 234 |
report_to, seed):
|
| 235 |
-
text_cell = nbf.v4.new_markdown_cell("
|
| 236 |
to_install = None
|
| 237 |
if report_to == "all":
|
| 238 |
to_install = "azure_ml comet_ml mlflow tensorboard wandb"
|
|
@@ -281,7 +283,7 @@ args = TrainingArguments(
|
|
| 281 |
|
| 282 |
def create_sft_trainer_cells(cells: list, max_seq_length, packing):
|
| 283 |
text_cell = nbf.v4.new_markdown_cell(
|
| 284 |
-
"""
|
| 285 |
|
| 286 |
This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods.
|
| 287 |
The trainer takes care of properly initializing the `PeftModel`.
|
|
@@ -311,7 +313,7 @@ trainer = SFTTrainer(
|
|
| 311 |
|
| 312 |
def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir):
|
| 313 |
if push_to_hub:
|
| 314 |
-
save_txt = f"and to the hub in 'User/{output_dir}'
|
| 315 |
else:
|
| 316 |
save_txt = "."
|
| 317 |
|
|
@@ -323,7 +325,7 @@ def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, out
|
|
| 323 |
f"""### Starting Training and Saving Model/Tokenizer
|
| 324 |
|
| 325 |
We start training the model by calling the `train()` method on the trainer instance. This will start the training
|
| 326 |
-
loop and train the model for `{epoch_str}`. The model will be automatically saved to the output directory ('temp_{output_dir}')
|
| 327 |
{save_txt}
|
| 328 |
|
| 329 |
""")
|
|
@@ -335,7 +337,7 @@ model.config.use_cache = False
|
|
| 335 |
# start training
|
| 336 |
trainer.train()
|
| 337 |
|
| 338 |
-
# save the
|
| 339 |
trainer.save_model()
|
| 340 |
"""
|
| 341 |
code_cell = nbf.v4.new_code_cell(code)
|
|
@@ -345,7 +347,7 @@ trainer.save_model()
|
|
| 345 |
|
| 346 |
def create_free_gpu_cells(cells: list):
|
| 347 |
text_cell = nbf.v4.new_markdown_cell(
|
| 348 |
-
"""
|
| 349 |
""")
|
| 350 |
|
| 351 |
code = f"""
|
|
@@ -362,7 +364,7 @@ torch.cuda.empty_cache()
|
|
| 362 |
|
| 363 |
def create_merge_lora_cells(cells: list, output_dir):
|
| 364 |
text_cell = nbf.v4.new_markdown_cell(
|
| 365 |
-
"""
|
| 366 |
|
| 367 |
While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the
|
| 368 |
model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the
|
|
@@ -394,7 +396,7 @@ tokenizer.save_pretrained("{output_dir}")
|
|
| 394 |
|
| 395 |
def merge_model_cells(cells: list, output_dir):
|
| 396 |
text_cell = nbf.v4.new_markdown_cell(
|
| 397 |
-
f"
|
| 398 |
|
| 399 |
code = f"""
|
| 400 |
import os
|
|
@@ -402,15 +404,11 @@ import shutil
|
|
| 402 |
|
| 403 |
source_folder = "temp_{output_dir}"
|
| 404 |
destination_folder = "{output_dir}"
|
| 405 |
-
|
| 406 |
os.makedirs(destination_folder, exist_ok=True)
|
| 407 |
-
|
| 408 |
for item in os.listdir(source_folder):
|
| 409 |
item_path = os.path.join(source_folder, item)
|
| 410 |
-
|
| 411 |
if os.path.isdir(item_path):
|
| 412 |
destination_path = os.path.join(destination_folder, item)
|
| 413 |
-
|
| 414 |
shutil.copytree(item_path, destination_path)
|
| 415 |
"""
|
| 416 |
|
|
@@ -420,14 +418,14 @@ for item in os.listdir(source_folder):
|
|
| 420 |
|
| 421 |
|
| 422 |
def push_to_hub_cells(cells: list, output_dir):
|
| 423 |
-
text = f"
|
| 424 |
code = f"""
|
| 425 |
from huggingface_hub import HfApi, HfFolder, Repository
|
| 426 |
|
| 427 |
# Instantiate the HfApi class
|
| 428 |
api = HfApi()
|
| 429 |
|
| 430 |
-
#
|
| 431 |
repo_name = "{output_dir}"
|
| 432 |
|
| 433 |
# Create a repository on the Hugging Face Hub
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def create_install_libraries_cells(cells: list):
|
| 9 |
+
text_cell = nbf.v4.new_markdown_cell("# Installing Required Libraries!")
|
| 10 |
text_cell1 = nbf.v4.new_markdown_cell(
|
| 11 |
"Installing required libraries, including trl, transformers, accelerate, peft, datasets, "
|
| 12 |
"and bitsandbytes.")
|
|
|
|
| 37 |
|
| 38 |
def create_install_flash_attention(cells: list):
|
| 39 |
text_cell = nbf.v4.new_markdown_cell(
|
| 40 |
+
"# Installing Flash Attention")
|
| 41 |
text_cell1 = nbf.v4.new_markdown_cell("Installing Flash Attention to reduce the memory "
|
| 42 |
"and runtime cost of the attention layer, and improve the performance of "
|
| 43 |
"the model training. Learn more at [FlashAttention]("
|
| 44 |
"https://github.com/Dao-AILab/flash-attention/tree/main)."
|
| 45 |
" Installing flash "
|
| 46 |
+
"attention from source can take quite a bit of time (~ "
|
| 47 |
"minutes).")
|
| 48 |
code = """
|
| 49 |
import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'
|
|
|
|
| 59 |
|
| 60 |
def create_login_hf_cells(cells: list, should_login: bool = False, model_name: Optional[str] = None):
|
| 61 |
text_cell = nbf.v4.new_markdown_cell(
|
| 62 |
+
"# Login to HF")
|
| 63 |
|
| 64 |
text_1 = "Login with our `HF_TOKEN` in order to push the finetuned model to `huggingface_hub`."
|
| 65 |
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
def create_datasets_cells(cells: list, dataset: FTDataSet, seed: int):
|
| 88 |
+
text_cell = nbf.v4.new_markdown_cell("# Load and Prepare the Dataset")
|
| 89 |
text = 'The dataset is already formatted in a conversational format, which is supported by [trl](' \
|
| 90 |
'https://huggingface.co/docs/trl/index/). '
|
| 91 |
text_format = """
|
|
|
|
| 119 |
load_in_4bit: str, bnb_4bit_use_double_quant: bool, bnb_4bit_quant_type: str,
|
| 120 |
bnb_4bit_compute_dtype: str
|
| 121 |
):
|
| 122 |
+
text_cell = nbf.v4.new_markdown_cell(f"# Load **{model_id}-{version}** for Finetuning")
|
| 123 |
load_in_4bit_str = f"{load_in_4bit}=True"
|
| 124 |
|
| 125 |
flash_attention_str = "attn_implementation='flash_attention_2',"
|
|
|
|
| 131 |
pad_value_str = ""
|
| 132 |
|
| 133 |
auto_model_import = "AutoModelForCausalLM"
|
| 134 |
+
trust_code = "trust_remote_code=True,"
|
| 135 |
if model_id == falcon.name:
|
| 136 |
auto_model_import = "FalconForCausalLM"
|
| 137 |
+
trust_code = ""
|
| 138 |
|
| 139 |
code = f"""
|
| 140 |
import torch
|
|
|
|
| 154 |
model = {auto_model_import}.from_pretrained(
|
| 155 |
model_id,
|
| 156 |
device_map="auto",
|
| 157 |
+
{trust_code}
|
| 158 |
{flash_attention_str}
|
| 159 |
torch_dtype=torch.bfloat16,
|
| 160 |
quantization_config=bnb_config
|
|
|
|
| 204 |
|
| 205 |
|
| 206 |
def create_lora_config_cells(cells: list, r: int, alpha: int, dropout: float, bias: str):
|
| 207 |
+
text_cell = nbf.v4.new_markdown_cell("# Setting LoraConfig")
|
| 208 |
code = f"""
|
| 209 |
from peft import LoraConfig
|
| 210 |
|
|
|
|
| 234 |
save_strategy, gradient_accumulation_steps, gradient_checkpointing,
|
| 235 |
learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir,
|
| 236 |
report_to, seed):
|
| 237 |
+
text_cell = nbf.v4.new_markdown_cell("# Setting the TrainingArguments")
|
| 238 |
to_install = None
|
| 239 |
if report_to == "all":
|
| 240 |
to_install = "azure_ml comet_ml mlflow tensorboard wandb"
|
|
|
|
| 283 |
|
| 284 |
def create_sft_trainer_cells(cells: list, max_seq_length, packing):
|
| 285 |
text_cell = nbf.v4.new_markdown_cell(
|
| 286 |
+
"""# Setting the Supervised Finetuning Trainer (`SFTTrainer`)
|
| 287 |
|
| 288 |
This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods.
|
| 289 |
The trainer takes care of properly initializing the `PeftModel`.
|
|
|
|
| 313 |
|
| 314 |
def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir):
|
| 315 |
if push_to_hub:
|
| 316 |
+
save_txt = f"and to the hub in **'User/{output_dir}'**."
|
| 317 |
else:
|
| 318 |
save_txt = "."
|
| 319 |
|
|
|
|
| 325 |
f"""### Starting Training and Saving Model/Tokenizer
|
| 326 |
|
| 327 |
We start training the model by calling the `train()` method on the trainer instance. This will start the training
|
| 328 |
+
loop and train the model for `{epoch_str}`. The model will be automatically saved to the output directory (**'temp_{output_dir}'**)
|
| 329 |
{save_txt}
|
| 330 |
|
| 331 |
""")
|
|
|
|
| 337 |
# start training
|
| 338 |
trainer.train()
|
| 339 |
|
| 340 |
+
# save the peft model
|
| 341 |
trainer.save_model()
|
| 342 |
"""
|
| 343 |
code_cell = nbf.v4.new_code_cell(code)
|
|
|
|
| 347 |
|
| 348 |
def create_free_gpu_cells(cells: list):
|
| 349 |
text_cell = nbf.v4.new_markdown_cell(
|
| 350 |
+
"""# Free the GPU Memory to Prepare Merging Lora Adapters with the Base Model
|
| 351 |
""")
|
| 352 |
|
| 353 |
code = f"""
|
|
|
|
| 364 |
|
| 365 |
def create_merge_lora_cells(cells: list, output_dir):
|
| 366 |
text_cell = nbf.v4.new_markdown_cell(
|
| 367 |
+
"""# Merging LoRa Adapters into the Original Model
|
| 368 |
|
| 369 |
While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the
|
| 370 |
model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the
|
|
|
|
| 396 |
|
| 397 |
def merge_model_cells(cells: list, output_dir):
|
| 398 |
text_cell = nbf.v4.new_markdown_cell(
|
| 399 |
+
f"# Copy all result folders from 'temp_{output_dir}' to '{output_dir}'")
|
| 400 |
|
| 401 |
code = f"""
|
| 402 |
import os
|
|
|
|
| 404 |
|
| 405 |
source_folder = "temp_{output_dir}"
|
| 406 |
destination_folder = "{output_dir}"
|
|
|
|
| 407 |
os.makedirs(destination_folder, exist_ok=True)
|
|
|
|
| 408 |
for item in os.listdir(source_folder):
|
| 409 |
item_path = os.path.join(source_folder, item)
|
|
|
|
| 410 |
if os.path.isdir(item_path):
|
| 411 |
destination_path = os.path.join(destination_folder, item)
|
|
|
|
| 412 |
shutil.copytree(item_path, destination_path)
|
| 413 |
"""
|
| 414 |
|
|
|
|
| 418 |
|
| 419 |
|
| 420 |
def push_to_hub_cells(cells: list, output_dir):
|
| 421 |
+
text = f"# Pushing '{output_dir}' to our Hugging Face account."
|
| 422 |
code = f"""
|
| 423 |
from huggingface_hub import HfApi, HfFolder, Repository
|
| 424 |
|
| 425 |
# Instantiate the HfApi class
|
| 426 |
api = HfApi()
|
| 427 |
|
| 428 |
+
# Our Hugging Face repository
|
| 429 |
repo_name = "{output_dir}"
|
| 430 |
|
| 431 |
# Create a repository on the Hugging Face Hub
|