Spaces:

zouhsab
/

TinnyADLLAVA

Runtime error

App Files Files Community

TinnyADLLAVA / convert_phi_weights_to_hf.py

zouhsab

Upload 76 files

9d5a733 verified over 1 year ago

raw

history blame contribute delete

6.41 kB

	# coding=utf-8
	# Copyright 2023 Microsoft and the HuggingFace Inc. team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""
	Weights conversion script for Phi

	This script downloads both Phi-1 and Phi-1.5 checkpoints to "checkpoint_path" and then converts the weights to
	HugfgingFace model's format and saves them in "pytorch_dump_folder_path".
	"""

	import argparse
	import gc
	import os

	import torch
	from huggingface_hub import hf_hub_download

	from modeling_phi import PhiConfig, PhiForCausalLM


	_MODELS = {
	"microsoft/phi-1": "https://huggingface.co/microsoft/phi-1/blob/main/pytorch_model.bin",
	"microsoft/phi-1_5": "https://huggingface.co/microsoft/phi-1_5/blob/main/pytorch_model.bin",
	}


	PHI_MAPPING = {
	"layers.0.wte.weight": "model.embed_tokens.weight",
	"layers.25.linear.bias": "lm_head.bias",
	"layers.25.linear.weight": "lm_head.weight",
	"layers.25.ln.bias": "model.final_layernorm.bias",
	"layers.25.ln.weight": "model.final_layernorm.weight",
	"layers": "model.layers",
	"ln": "input_layernorm",
	"mixer": "self_attn",
	"Wqkv": "query_key_value",
	"out_proj": "dense",
	}


	def convert_weights(original_weights, mapping, config):
	converted_weights = {}
	original_weights_keys = sorted(original_weights.keys())

	# we change names (1-24) -> layers(0-23) for Phi model layers
	range_change = {
	f"layers.{k}.": f"layers.{v}."
	for k, v in zip(range(1, config.num_hidden_layers + 1), range(0, config.num_hidden_layers))
	}

	mapping.update(**range_change)

	for original_weights_key in original_weights_keys:
	new_key = original_weights_key

	if "rotary_emb" in new_key:
	continue

	if "Wqkv" in new_key:
	if "weight" in new_key:
	weight = original_weights[new_key]
	weights_shape = weight.shape
	weight = (
	weight.view(3, config.num_attention_heads, -1, config.hidden_size)
	.transpose(0, 1)
	.reshape(*weights_shape)
	)
	original_weights[new_key] = weight
	elif "bias" in new_key:
	bias = original_weights[new_key]
	bias_shape = bias.shape
	bias = bias.view(3, config.num_attention_heads, -1).transpose(0, 1).reshape(*bias_shape)
	original_weights[new_key] = bias

	for k, v in mapping.items():
	if k in new_key:
	new_key = new_key.replace(k, v)

	converted_weights[new_key] = original_weights.pop(original_weights_key)

	return converted_weights


	def _download(url: str, root: str):
	repo_id = f"{url.split('/')[3]}/{url.split('/')[4]}"
	filename = f"{url.split('/')[-1]}"
	hf_hub_download(
	repo_id=repo_id,
	filename=filename,
	force_filename=root,
	local_dir_use_symlinks=False,
	)


	def convert_phi_weights(checkpoint_path, pytorch_dump_folder_path, use_cuda, save_weights_directly):
	device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
	for each_model_name, each_model_url in _MODELS.items():
	converted_checkpoint = {}

	model_path = os.path.join(checkpoint_path, each_model_name + "_" + each_model_url.split("/")[-1])
	if not os.path.exists(model_path):
	print(f"\n{each_model_name} was not found! Downloading it to {model_path}")
	_download(url=each_model_url, root=model_path)
	model_checkpoint = torch.load(model_path, map_location=device)
	model_type = each_model_name.split("/")[1] # phi-1 or phi-1_5
	config = PhiConfig.from_pretrained(f"susnato/{model_type}_dev")

	# Converting the weights
	converted_checkpoint.update(**convert_weights(model_checkpoint, PHI_MAPPING, config))

	# Save either the whole model or the converted weights
	if save_weights_directly:
	save_weights_path = os.path.join(
	pytorch_dump_folder_path, each_model_name.split("/")[-1] + "_" + each_model_url.split("/")[-1]
	)
	torch.save(converted_checkpoint, save_weights_path)
	print(f"Model weights saved at {save_weights_path}!")

	else:
	model = PhiForCausalLM(config).to(device)
	model.load_state_dict(converted_checkpoint, strict=True)
	save_model_path = os.path.join(pytorch_dump_folder_path, model_type)
	model.save_pretrained(save_model_path)
	print(f"Model saved at {save_model_path}!")

	# release GPU memory for the 2nd model if cuda was used.
	del config, model

	# release GPU memory for the 2nd model if cuda was used.
	del model_checkpoint, converted_checkpoint
	if use_cuda:
	torch.cuda.empty_cache()
	gc.collect()


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	# # Required parameters
	parser.add_argument(
	"--checkpoint_path", type=str, help="Path to the folder of downloaded checkpoints. (Please enter full path)"
	)
	parser.add_argument(
	"--pytorch_dump_folder_path",
	default=None,
	type=str,
	help="Path to the output PyTorch model. (Please enter full path)",
	)
	parser.add_argument(
	"--use_cuda",
	default=False,
	type=bool,
	help="Whether to load the weights on GPU during conversion or not, False by default",
	)
	parser.add_argument(
	"--save_weights_directly",
	default=True,
	type=bool,
	help="Whether to save the weights directly after conversion or load the weight to the Phi model and then save "
	"the Phi model along with weights. True by default",
	)

	args = parser.parse_args()
	convert_phi_weights(args.checkpoint_path, args.pytorch_dump_folder_path, args.use_cuda, args.save_weights_directly)