Spaces:
Runtime error
Runtime error
| # coding=utf-8 | |
| # Copyright 2023 Microsoft and the HuggingFace Inc. team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """ | |
| Weights conversion script for Phi | |
| This script downloads both Phi-1 and Phi-1.5 checkpoints to "checkpoint_path" and then converts the weights to | |
| HugfgingFace model's format and saves them in "pytorch_dump_folder_path". | |
| """ | |
| import argparse | |
| import gc | |
| import os | |
| import torch | |
| from huggingface_hub import hf_hub_download | |
| from modeling_phi import PhiConfig, PhiForCausalLM | |
| _MODELS = { | |
| "microsoft/phi-1": "https://huggingface.co/microsoft/phi-1/blob/main/pytorch_model.bin", | |
| "microsoft/phi-1_5": "https://huggingface.co/microsoft/phi-1_5/blob/main/pytorch_model.bin", | |
| } | |
| PHI_MAPPING = { | |
| "layers.0.wte.weight": "model.embed_tokens.weight", | |
| "layers.25.linear.bias": "lm_head.bias", | |
| "layers.25.linear.weight": "lm_head.weight", | |
| "layers.25.ln.bias": "model.final_layernorm.bias", | |
| "layers.25.ln.weight": "model.final_layernorm.weight", | |
| "layers": "model.layers", | |
| "ln": "input_layernorm", | |
| "mixer": "self_attn", | |
| "Wqkv": "query_key_value", | |
| "out_proj": "dense", | |
| } | |
| def convert_weights(original_weights, mapping, config): | |
| converted_weights = {} | |
| original_weights_keys = sorted(original_weights.keys()) | |
| # we change names (1-24) -> layers(0-23) for Phi model layers | |
| range_change = { | |
| f"layers.{k}.": f"layers.{v}." | |
| for k, v in zip(range(1, config.num_hidden_layers + 1), range(0, config.num_hidden_layers)) | |
| } | |
| mapping.update(**range_change) | |
| for original_weights_key in original_weights_keys: | |
| new_key = original_weights_key | |
| if "rotary_emb" in new_key: | |
| continue | |
| if "Wqkv" in new_key: | |
| if "weight" in new_key: | |
| weight = original_weights[new_key] | |
| weights_shape = weight.shape | |
| weight = ( | |
| weight.view(3, config.num_attention_heads, -1, config.hidden_size) | |
| .transpose(0, 1) | |
| .reshape(*weights_shape) | |
| ) | |
| original_weights[new_key] = weight | |
| elif "bias" in new_key: | |
| bias = original_weights[new_key] | |
| bias_shape = bias.shape | |
| bias = bias.view(3, config.num_attention_heads, -1).transpose(0, 1).reshape(*bias_shape) | |
| original_weights[new_key] = bias | |
| for k, v in mapping.items(): | |
| if k in new_key: | |
| new_key = new_key.replace(k, v) | |
| converted_weights[new_key] = original_weights.pop(original_weights_key) | |
| return converted_weights | |
| def _download(url: str, root: str): | |
| repo_id = f"{url.split('/')[3]}/{url.split('/')[4]}" | |
| filename = f"{url.split('/')[-1]}" | |
| hf_hub_download( | |
| repo_id=repo_id, | |
| filename=filename, | |
| force_filename=root, | |
| local_dir_use_symlinks=False, | |
| ) | |
| def convert_phi_weights(checkpoint_path, pytorch_dump_folder_path, use_cuda, save_weights_directly): | |
| device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" | |
| for each_model_name, each_model_url in _MODELS.items(): | |
| converted_checkpoint = {} | |
| model_path = os.path.join(checkpoint_path, each_model_name + "_" + each_model_url.split("/")[-1]) | |
| if not os.path.exists(model_path): | |
| print(f"\n{each_model_name} was not found! Downloading it to {model_path}") | |
| _download(url=each_model_url, root=model_path) | |
| model_checkpoint = torch.load(model_path, map_location=device) | |
| model_type = each_model_name.split("/")[1] # phi-1 or phi-1_5 | |
| config = PhiConfig.from_pretrained(f"susnato/{model_type}_dev") | |
| # Converting the weights | |
| converted_checkpoint.update(**convert_weights(model_checkpoint, PHI_MAPPING, config)) | |
| # Save either the whole model or the converted weights | |
| if save_weights_directly: | |
| save_weights_path = os.path.join( | |
| pytorch_dump_folder_path, each_model_name.split("/")[-1] + "_" + each_model_url.split("/")[-1] | |
| ) | |
| torch.save(converted_checkpoint, save_weights_path) | |
| print(f"Model weights saved at {save_weights_path}!") | |
| else: | |
| model = PhiForCausalLM(config).to(device) | |
| model.load_state_dict(converted_checkpoint, strict=True) | |
| save_model_path = os.path.join(pytorch_dump_folder_path, model_type) | |
| model.save_pretrained(save_model_path) | |
| print(f"Model saved at {save_model_path}!") | |
| # release GPU memory for the 2nd model if cuda was used. | |
| del config, model | |
| # release GPU memory for the 2nd model if cuda was used. | |
| del model_checkpoint, converted_checkpoint | |
| if use_cuda: | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| # # Required parameters | |
| parser.add_argument( | |
| "--checkpoint_path", type=str, help="Path to the folder of downloaded checkpoints. (Please enter full path)" | |
| ) | |
| parser.add_argument( | |
| "--pytorch_dump_folder_path", | |
| default=None, | |
| type=str, | |
| help="Path to the output PyTorch model. (Please enter full path)", | |
| ) | |
| parser.add_argument( | |
| "--use_cuda", | |
| default=False, | |
| type=bool, | |
| help="Whether to load the weights on GPU during conversion or not, False by default", | |
| ) | |
| parser.add_argument( | |
| "--save_weights_directly", | |
| default=True, | |
| type=bool, | |
| help="Whether to save the weights directly after conversion or load the weight to the Phi model and then save " | |
| "the Phi model along with weights. True by default", | |
| ) | |
| args = parser.parse_args() | |
| convert_phi_weights(args.checkpoint_path, args.pytorch_dump_folder_path, args.use_cuda, args.save_weights_directly) | |