gemma-fine-tuning / model_utils.py
fsadeek's picture
added some features
557c6b6
"""
Utility functions for handling Gemma models
"""
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login, HfApi
def get_available_models():
"""
Returns a list of available Gemma models for fine-tuning.
"""
return [
"google/gemma-2-2b-it",
"google/gemma-2-9b-it",
"google/gemma-2-27b-it"
]
def load_model(model_name, token=None):
"""
Loads a model from Hugging Face Hub.
Args:
model_name: Name of the model to load
token: Hugging Face token for access to gated models
Returns:
Tuple of (model, tokenizer)
"""
if token:
login(token)
# Set appropriate device
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps" # For Apple Silicon
else:
device = "cpu"
print(f"Loading model {model_name} on {device}...")
# Load model with appropriate parameters based on device and model size
model_size = model_name.split("-")[2]
if device == "cuda":
# For CUDA devices, optimize based on model size and available memory
if model_size in ["2b", "7b"]:
# Smaller models can be loaded in BF16
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto"
)
else:
# Larger models may need additional optimizations
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
load_in_8bit=True
)
elif device == "cpu":
# For CPU, use FP32 but load 8-bit for larger models to conserve memory
if model_size in ["2b"]:
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map={"": device}
)
else:
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map={"": device},
load_in_8bit=True
)
else: # MPS (Apple Silicon)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map={"": device}
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
def export_model(model_path, output_dir, model_name, format="pytorch", quantization=None):
"""
Exports a fine-tuned model to the specified format.
Args:
model_path: Path to the fine-tuned model
output_dir: Directory to save the exported model
model_name: Name for the exported model
format: Export format ("pytorch", "gguf", or "safetensors")
quantization: Quantization level for GGUF format
Returns:
Dictionary with export information
"""
if not os.path.exists(model_path):
raise ValueError(f"Model path '{model_path}' does not exist")
os.makedirs(output_dir, exist_ok=True)
export_path = os.path.join(output_dir, model_name)
os.makedirs(export_path, exist_ok=True)
# Load the model and merge LoRA weights if applicable
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Handle different export formats
if format.lower() == "pytorch":
# Export as PyTorch model
model.save_pretrained(export_path)
tokenizer.save_pretrained(export_path)
elif format.lower() == "safetensors":
# Export as safetensors
model.save_pretrained(export_path, safe_serialization=True)
tokenizer.save_pretrained(export_path)
elif format.lower() == "gguf":
# For GGUF, we'd typically use a conversion script
# This is simplified; in practice you'd use specific tools for GGUF conversion
if quantization is not None and quantization.lower() != "none":
# Command for quantized GGUF conversion would go here
# In practice, use llama.cpp or similar tools
pass
else:
# Command for standard GGUF conversion would go here
pass
else:
raise ValueError(f"Unsupported export format: {format}")
# Calculate model size
model_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
model_size_gb = model_size_bytes / (1024**3)
return {
"format": format.lower(),
"quantization": quantization if format.lower() == "gguf" else "None",
"model_name": model_name,
"export_path": export_path,
"model_size": f"{model_size_gb:.2f} GB"
}
def push_to_hub(model_path, repo_name, token):
"""
Pushes a fine-tuned model to Hugging Face Hub.
Args:
model_path: Path to the fine-tuned model
repo_name: Name for the repository on Hugging Face Hub
token: Hugging Face token
Returns:
URL of the uploaded model
"""
if not os.path.exists(model_path):
raise ValueError(f"Model path '{model_path}' does not exist")
login(token)
# Load the model and merge LoRA weights if applicable
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Push to hub
model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)
# Get the model URL
api = HfApi()
model_url = f"https://huggingface.co/{repo_name}"
return model_url