Spaces:

fsadeek
/

gemma-fine-tuning

Running

App Files Files Community

gemma-fine-tuning / model_utils.py

fsadeek

added some features

557c6b6 8 months ago

raw

history blame contribute delete

5.78 kB

	"""
	Utility functions for handling Gemma models
	"""

	import os
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from huggingface_hub import login, HfApi

	def get_available_models():
	"""
	Returns a list of available Gemma models for fine-tuning.
	"""
	return [
	"google/gemma-2-2b-it",
	"google/gemma-2-9b-it",
	"google/gemma-2-27b-it"
	]

	def load_model(model_name, token=None):
	"""
	Loads a model from Hugging Face Hub.

	Args:
	model_name: Name of the model to load
	token: Hugging Face token for access to gated models

	Returns:
	Tuple of (model, tokenizer)
	"""
	if token:
	login(token)

	# Set appropriate device
	if torch.cuda.is_available():
	device = "cuda"
	elif torch.backends.mps.is_available():
	device = "mps" # For Apple Silicon
	else:
	device = "cpu"

	print(f"Loading model {model_name} on {device}...")

	# Load model with appropriate parameters based on device and model size
	model_size = model_name.split("-")[2]
	if device == "cuda":
	# For CUDA devices, optimize based on model size and available memory
	if model_size in ["2b", "7b"]:
	# Smaller models can be loaded in BF16
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	device_map="auto"
	)
	else:
	# Larger models may need additional optimizations
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	load_in_8bit=True
	)
	elif device == "cpu":
	# For CPU, use FP32 but load 8-bit for larger models to conserve memory
	if model_size in ["2b"]:
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map={"": device}
	)
	else:
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map={"": device},
	load_in_8bit=True
	)
	else: # MPS (Apple Silicon)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map={"": device}
	)

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	return model, tokenizer

	def export_model(model_path, output_dir, model_name, format="pytorch", quantization=None):
	"""
	Exports a fine-tuned model to the specified format.

	Args:
	model_path: Path to the fine-tuned model
	output_dir: Directory to save the exported model
	model_name: Name for the exported model
	format: Export format ("pytorch", "gguf", or "safetensors")
	quantization: Quantization level for GGUF format

	Returns:
	Dictionary with export information
	"""
	if not os.path.exists(model_path):
	raise ValueError(f"Model path '{model_path}' does not exist")

	os.makedirs(output_dir, exist_ok=True)
	export_path = os.path.join(output_dir, model_name)
	os.makedirs(export_path, exist_ok=True)

	# Load the model and merge LoRA weights if applicable
	model = AutoModelForCausalLM.from_pretrained(model_path)
	tokenizer = AutoTokenizer.from_pretrained(model_path)

	# Handle different export formats
	if format.lower() == "pytorch":
	# Export as PyTorch model
	model.save_pretrained(export_path)
	tokenizer.save_pretrained(export_path)

	elif format.lower() == "safetensors":
	# Export as safetensors
	model.save_pretrained(export_path, safe_serialization=True)
	tokenizer.save_pretrained(export_path)

	elif format.lower() == "gguf":
	# For GGUF, we'd typically use a conversion script
	# This is simplified; in practice you'd use specific tools for GGUF conversion
	if quantization is not None and quantization.lower() != "none":
	# Command for quantized GGUF conversion would go here
	# In practice, use llama.cpp or similar tools
	pass
	else:
	# Command for standard GGUF conversion would go here
	pass

	else:
	raise ValueError(f"Unsupported export format: {format}")

	# Calculate model size
	model_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
	model_size_gb = model_size_bytes / (1024**3)

	return {
	"format": format.lower(),
	"quantization": quantization if format.lower() == "gguf" else "None",
	"model_name": model_name,
	"export_path": export_path,
	"model_size": f"{model_size_gb:.2f} GB"
	}

	def push_to_hub(model_path, repo_name, token):
	"""
	Pushes a fine-tuned model to Hugging Face Hub.

	Args:
	model_path: Path to the fine-tuned model
	repo_name: Name for the repository on Hugging Face Hub
	token: Hugging Face token

	Returns:
	URL of the uploaded model
	"""
	if not os.path.exists(model_path):
	raise ValueError(f"Model path '{model_path}' does not exist")

	login(token)

	# Load the model and merge LoRA weights if applicable
	model = AutoModelForCausalLM.from_pretrained(model_path)
	tokenizer = AutoTokenizer.from_pretrained(model_path)

	# Push to hub
	model.push_to_hub(repo_name)
	tokenizer.push_to_hub(repo_name)

	# Get the model URL
	api = HfApi()
	model_url = f"https://huggingface.co/{repo_name}"

	return model_url