Spaces:
Runtime error
Runtime error
| from typing import Optional | |
| import nbformat as nbf | |
| from utils import FTDataSet, falcon, gemma | |
| def create_install_libraries_cells(cells: list): | |
| text_cell = nbf.v4.new_markdown_cell("# Installing Required Libraries!") | |
| text_cell1 = nbf.v4.new_markdown_cell( | |
| "Installing required libraries, including trl, transformers, accelerate, peft, datasets, " | |
| "and bitsandbytes.") | |
| code = """ | |
| !pip install -q --upgrade "transformers==4.38.2" | |
| !pip install -q --upgrade "datasets==2.16.1" | |
| !pip install -q --upgrade "accelerate==0.26.1" | |
| !pip install -q --upgrade "evaluate==0.4.1" | |
| !pip install -q --upgrade "bitsandbytes==0.42.0" | |
| !pip install -q --upgrade "trl==0.7.11" | |
| !pip install -q --upgrade "peft==0.8.2" | |
| """ | |
| code_pytorch = """ | |
| # Checks if PyTorch is installed and installs it if not. | |
| try: | |
| import torch | |
| print("PyTorch is installed!") | |
| except ImportError: | |
| print("PyTorch is not installed.") | |
| !pip install -q torch | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(text_cell1) | |
| cells.append(nbf.v4.new_code_cell(code_pytorch)) | |
| cells.append(code_cell) | |
| def create_install_flash_attention(cells: list): | |
| text_cell = nbf.v4.new_markdown_cell( | |
| "## Installing Flash Attention") | |
| text_cell1 = nbf.v4.new_markdown_cell("Installing Flash Attention to reduce the memory " | |
| "and runtime cost of the attention layer, and improve the performance of " | |
| "the model training. Learn more at [FlashAttention](" | |
| "https://github.com/Dao-AILab/flash-attention/tree/main)." | |
| " Installing flash " | |
| "attention from source can take quite a bit of time (~ " | |
| "minutes).") | |
| code = """ | |
| import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention' | |
| !pip install ninja packaging | |
| !MAX_JOBS=4 pip install -q flash-attn --no-build-isolation --upgrade | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(text_cell1) | |
| cells.append(code_cell) | |
| def create_login_hf_cells(cells: list, should_login: bool = False, model_name: Optional[str] = None, | |
| output_dir: Optional[str] = None): | |
| text_cell = nbf.v4.new_markdown_cell("## Login to HF") | |
| text_1 = f"Replace `HF_TOKEN` with a valid token in order to push **'{output_dir}'** to `huggingface_hub`." | |
| if should_login: | |
| text_1 = f"Replace `HF_TOKEN` with a valid token in order to load **'{model_name}'** from `huggingface_hub`." | |
| text_cell1 = nbf.v4.new_markdown_cell(text_1) | |
| code = """ | |
| # Install huggingface_hub | |
| !pip install -q huggingface_hub | |
| from huggingface_hub import login | |
| login( | |
| token='HF_TOKEN', | |
| add_to_git_credential=True | |
| ) | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(text_cell1) | |
| cells.append(code_cell) | |
| def create_datasets_cells(cells: list, dataset: FTDataSet, seed: int): | |
| text_cell = nbf.v4.new_markdown_cell("# Load and Prepare the Dataset") | |
| text = 'The dataset is already formatted in a conversational format, which is supported by [trl](' \ | |
| 'https://huggingface.co/docs/trl/index/), and ready for supervised finetuning.' | |
| text_format = """ | |
| **Conversational format:** | |
| ```python {"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]} | |
| {"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]} | |
| {"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]} | |
| ``` | |
| """ | |
| text_cell1 = nbf.v4.new_markdown_cell(text) | |
| text_cell2 = nbf.v4.new_markdown_cell(text_format) | |
| code = f""" | |
| from datasets import load_dataset | |
| # Load dataset from the hub | |
| dataset = load_dataset("{dataset.path}", split="{dataset.dataset_split}") | |
| dataset = dataset.shuffle(seed={seed}) | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(text_cell1) | |
| cells.append(text_cell2) | |
| cells.append(code_cell) | |
| def create_model_cells(cells: list, model_id: str, version: str, flash_attention: bool, pad_side: str, pad_value: str, | |
| load_in_4bit: str, bnb_4bit_use_double_quant: bool, bnb_4bit_quant_type: str, | |
| bnb_4bit_compute_dtype: str | |
| ): | |
| text_cell = nbf.v4.new_markdown_cell(f"# Load **{model_id}-{version}** for Finetuning") | |
| load_in_4bit_str = f"{load_in_4bit}=True" | |
| flash_attention_str = "attn_implementation='flash_attention_2'," | |
| if not flash_attention: | |
| flash_attention_str = '' | |
| pad_value_str = "tokenizer.pad_token = tokenizer.eos_token" | |
| if pad_value is None: | |
| pad_value_str = "" | |
| auto_model_import = "AutoModelForCausalLM" | |
| trust_code = "trust_remote_code=True," | |
| if model_id == falcon.name: | |
| auto_model_import = "FalconForCausalLM" | |
| trust_code = "" | |
| chat_ml = """ | |
| # Set chat template to OAI chatML | |
| model, tokenizer = setup_chat_format(model, tokenizer) | |
| """ | |
| note = f""" | |
| > **Note:** For `{model_id}`, we will not use `setup_chat_format`. Instead, we will directly use this tokenizer, [philschmid/gemma-tokenizer-chatml](https://huggingface.co/philschmid/gemma-tokenizer-chatml), to fine-tune `{model_id}` with ChatML. | |
| """ | |
| tokenizer_id = f"{model_id}-{version}" | |
| if model_id == gemma.name: | |
| tokenizer_id = "philschmid/gemma-tokenizer-chatml" | |
| chat_ml ="" | |
| else: | |
| note = "" | |
| code = f""" | |
| import torch | |
| from transformers import AutoTokenizer, {auto_model_import}, BitsAndBytesConfig | |
| from trl import setup_chat_format | |
| # Hugging Face model id | |
| model_id = "{model_id}-{version}" | |
| # BitsAndBytesConfig | |
| bnb_config = BitsAndBytesConfig( | |
| {load_in_4bit_str}, bnb_4bit_use_double_quant={bnb_4bit_use_double_quant}, | |
| bnb_4bit_quant_type="{bnb_4bit_quant_type}", bnb_4bit_compute_dtype={bnb_4bit_compute_dtype} | |
| ) | |
| # Load model and tokenizer | |
| model = {auto_model_import}.from_pretrained( | |
| model_id, | |
| device_map="auto", | |
| {trust_code} | |
| {flash_attention_str} | |
| torch_dtype=torch.bfloat16, | |
| quantization_config=bnb_config | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained("{tokenizer_id}") | |
| tokenizer.padding_side = "{pad_side}" | |
| {pad_value_str} | |
| {chat_ml} | |
| """ | |
| text_1 = f""" | |
| This process involves two key steps: | |
| 1. **LLM Quantization:** | |
| - We first load the selected large language model (LLM). | |
| - We then use the `bitsandbytes` library to quantize the model, which can significantly reduce its memory footprint. | |
| > **Note:** The memory requirements of the model scale with its size. For instance, a 7B parameter model may require | |
| a 24GB GPU for fine-tuning. | |
| 2. **Chat Model Preparation:** | |
| - To train a model for chat/conversational tasks, we need to prepare both the model and its tokenizer. | |
| - This involves adding special tokens to the tokenizer and the model itself. These tokens help the model | |
| understand the different roles within a conversation. | |
| - The **trl** provides a convenient method called `setup_chat_format` for this purpose. This method performs the | |
| following actions: | |
| * Adds special tokens to the tokenizer, such as `<|im_start|>` and `<|im_end|>`, to mark the beginning and | |
| ending of a conversation. | |
| * Resizes the model's embedding layer to accommodate the new tokens. | |
| * Sets the tokenizer's chat template, which defines the format used to convert input data into a chat-like | |
| structure. The default template is `chatml` from OpenAI. | |
| {note} | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| text_cell1 = nbf.v4.new_markdown_cell(text_1) | |
| cells.append(text_cell) | |
| cells.append(text_cell1) | |
| cells.append(code_cell) | |
| def create_lora_config_cells(cells: list, r: int, alpha: int, dropout: float, bias: str): | |
| text_cell = nbf.v4.new_markdown_cell("## Setting LoRA Config") | |
| code = f""" | |
| from peft import LoraConfig | |
| peft_config = LoraConfig( | |
| lora_alpha={alpha}, | |
| lora_dropout={dropout}, | |
| r={r}, | |
| bias="{bias}", | |
| target_modules="all-linear", | |
| task_type="CAUSAL_LM" | |
| ) | |
| """ | |
| text = """The `SFTTrainer` provides native integration with `peft`, simplifying the process of efficiently tuning | |
| Language Models (LLMs) using techniques such as [LoRA]( | |
| https://magazine.sebastianraschka.com/p/practical-tips-for-finetuning-llms). The only requirement is to create | |
| the `LoraConfig` and pass it to the `SFTTrainer`. | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(nbf.v4.new_markdown_cell(text)) | |
| cells.append(code_cell) | |
| def create_training_args_cells(cells: list, epochs, max_steps, logging_steps, per_device_train_batch_size, | |
| save_strategy, gradient_accumulation_steps, gradient_checkpointing, | |
| learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir, | |
| report_to, seed): | |
| text_cell = nbf.v4.new_markdown_cell("## Setting the TrainingArguments") | |
| to_install = None | |
| if report_to == "all": | |
| to_install = "azure_ml comet_ml mlflow tensorboard wandb" | |
| elif report_to != "none": | |
| to_install = report_to | |
| gradient_checkpointing_kwargs = {"use_reentrant": False} | |
| code_report = f""" | |
| # Installing {to_install} to report the metrics | |
| !pip install -q {to_install} | |
| """ | |
| code = f""" | |
| from transformers import TrainingArguments | |
| args = TrainingArguments( | |
| output_dir="temp_{output_dir}", | |
| num_train_epochs={epochs}, | |
| per_device_train_batch_size={per_device_train_batch_size}, | |
| gradient_accumulation_steps={gradient_accumulation_steps}, | |
| gradient_checkpointing={gradient_checkpointing}, | |
| gradient_checkpointing_kwargs={gradient_checkpointing_kwargs}, | |
| optim="adamw_torch_fused", | |
| logging_steps={logging_steps}, | |
| save_strategy='{save_strategy}', | |
| learning_rate={learning_rate}, | |
| bf16=True, | |
| max_grad_norm={max_grad_norm}, | |
| warmup_ratio={warmup_ratio}, | |
| lr_scheduler_type='{lr_scheduler_type}', | |
| report_to='{report_to}', | |
| max_steps={max_steps}, | |
| seed={seed}, | |
| overwrite_output_dir=True, | |
| remove_unused_columns=True | |
| ) | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| if to_install is not None: | |
| cells.append(nbf.v4.new_code_cell(code_report)) | |
| cells.append(code_cell) | |
| def create_sft_trainer_cells(cells: list, max_seq_length, packing): | |
| text_cell = nbf.v4.new_markdown_cell( | |
| """## Setting the Supervised Finetuning Trainer (`SFTTrainer`) | |
| This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods. | |
| The trainer takes care of properly initializing the `PeftModel`. | |
| """) | |
| dataset_kwargs = { | |
| "add_special_tokens": False, # We template with special tokens | |
| "append_concat_token": False, # No need to add additional separator token | |
| } | |
| code = f""" | |
| from trl import SFTTrainer | |
| trainer = SFTTrainer( | |
| model=model, | |
| args=args, | |
| train_dataset=dataset, | |
| peft_config=peft_config, | |
| max_seq_length={max_seq_length}, | |
| tokenizer=tokenizer, | |
| packing={packing}, | |
| dataset_kwargs={dataset_kwargs} | |
| ) | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(code_cell) | |
| def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir): | |
| if push_to_hub: | |
| save_txt = f"and to the hub in **'User/{output_dir}'**." | |
| else: | |
| save_txt = "." | |
| epoch_str = f"{epochs} epochs" | |
| if max_steps > 0: | |
| epoch_str = f"{max_steps} steps" | |
| text_cell = nbf.v4.new_markdown_cell( | |
| f"""### Starting Training and Saving Model/Tokenizer | |
| We start training the model by calling the `train()` method on the trainer instance. This will start the training | |
| loop and train the model for `{epoch_str}`. The model will be automatically saved to the output directory (**'temp_{output_dir}'**) | |
| {save_txt} | |
| """) | |
| code = f""" | |
| model.config.use_cache = False | |
| # start training | |
| trainer.train() | |
| # save the peft model | |
| trainer.save_model() | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(code_cell) | |
| def create_free_gpu_cells(cells: list): | |
| text_cell = nbf.v4.new_markdown_cell( | |
| """### Free the GPU Memory to Prepare Merging `LoRA` Adapters with the Base Model | |
| """) | |
| code = f""" | |
| # Free the GPU memory | |
| del model | |
| del trainer | |
| torch.cuda.empty_cache() | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(code_cell) | |
| def create_merge_lora_cells(cells: list, output_dir): | |
| text_cell = nbf.v4.new_markdown_cell( | |
| """## Merging LoRA Adapters into the Original Model | |
| While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the | |
| model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the | |
| entire model for easier usage with Text Generation Inference, we can incorporate the adapter weights into the model | |
| weights. This can be achieved using the `merge_and_unload` method. Following this, the model can be saved using the | |
| `save_pretrained` method. The result is a default model that is ready for inference. | |
| """) | |
| code = f""" | |
| import torch | |
| from peft import AutoPeftModelForCausalLM | |
| # Load Peft model on CPU | |
| model = AutoPeftModelForCausalLM.from_pretrained( | |
| "temp_{output_dir}", | |
| torch_dtype=torch.float16, | |
| low_cpu_mem_usage=True | |
| ) | |
| # Merge LoRA with the base model and save | |
| merged_model = model.merge_and_unload() | |
| merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_size="2GB") | |
| tokenizer.save_pretrained("{output_dir}") | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(code_cell) | |
| def merge_model_cells(cells: list, output_dir): | |
| text_cell = nbf.v4.new_markdown_cell( | |
| f"### Copy all result folders from 'temp_{output_dir}' to '{output_dir}'") | |
| code = f""" | |
| import os | |
| import shutil | |
| source_folder = "temp_{output_dir}" | |
| destination_folder = "{output_dir}" | |
| os.makedirs(destination_folder, exist_ok=True) | |
| for item in os.listdir(source_folder): | |
| item_path = os.path.join(source_folder, item) | |
| if os.path.isdir(item_path): | |
| destination_path = os.path.join(destination_folder, item) | |
| shutil.copytree(item_path, destination_path) | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(text_cell) | |
| cells.append(code_cell) | |
| def push_to_hub_cells(cells: list, output_dir): | |
| text = f"## Pushing '{output_dir}' to the Hugging Face account." | |
| code = f""" | |
| from huggingface_hub import HfApi, HfFolder, Repository | |
| # Instantiate the HfApi class | |
| api = HfApi() | |
| # Our Hugging Face repository | |
| repo_name = "{output_dir}" | |
| # Create a repository on the Hugging Face Hub | |
| repo = api.create_repo(token=HfFolder.get_token(), repo_type="model", repo_id=repo_name) | |
| api.upload_folder( | |
| folder_path="{output_dir}", | |
| repo_id=repo.repo_id | |
| ) | |
| """ | |
| code_cell = nbf.v4.new_code_cell(code) | |
| cells.append(nbf.v4.new_markdown_cell(text)) | |
| cells.append(code_cell) | |