Spaces:
Running
Running
| """ | |
| Utility functions for handling Gemma models | |
| """ | |
| import os | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from huggingface_hub import login, HfApi | |
| def get_available_models(): | |
| """ | |
| Returns a list of available Gemma models for fine-tuning. | |
| """ | |
| return [ | |
| "google/gemma-2-2b-it", | |
| "google/gemma-2-9b-it", | |
| "google/gemma-2-27b-it" | |
| ] | |
| def load_model(model_name, token=None): | |
| """ | |
| Loads a model from Hugging Face Hub. | |
| Args: | |
| model_name: Name of the model to load | |
| token: Hugging Face token for access to gated models | |
| Returns: | |
| Tuple of (model, tokenizer) | |
| """ | |
| if token: | |
| login(token) | |
| # Set appropriate device | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| elif torch.backends.mps.is_available(): | |
| device = "mps" # For Apple Silicon | |
| else: | |
| device = "cpu" | |
| print(f"Loading model {model_name} on {device}...") | |
| # Load model with appropriate parameters based on device and model size | |
| model_size = model_name.split("-")[2] | |
| if device == "cuda": | |
| # For CUDA devices, optimize based on model size and available memory | |
| if model_size in ["2b", "7b"]: | |
| # Smaller models can be loaded in BF16 | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto" | |
| ) | |
| else: | |
| # Larger models may need additional optimizations | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| load_in_8bit=True | |
| ) | |
| elif device == "cpu": | |
| # For CPU, use FP32 but load 8-bit for larger models to conserve memory | |
| if model_size in ["2b"]: | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map={"": device} | |
| ) | |
| else: | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map={"": device}, | |
| load_in_8bit=True | |
| ) | |
| else: # MPS (Apple Silicon) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16, | |
| device_map={"": device} | |
| ) | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| return model, tokenizer | |
| def export_model(model_path, output_dir, model_name, format="pytorch", quantization=None): | |
| """ | |
| Exports a fine-tuned model to the specified format. | |
| Args: | |
| model_path: Path to the fine-tuned model | |
| output_dir: Directory to save the exported model | |
| model_name: Name for the exported model | |
| format: Export format ("pytorch", "gguf", or "safetensors") | |
| quantization: Quantization level for GGUF format | |
| Returns: | |
| Dictionary with export information | |
| """ | |
| if not os.path.exists(model_path): | |
| raise ValueError(f"Model path '{model_path}' does not exist") | |
| os.makedirs(output_dir, exist_ok=True) | |
| export_path = os.path.join(output_dir, model_name) | |
| os.makedirs(export_path, exist_ok=True) | |
| # Load the model and merge LoRA weights if applicable | |
| model = AutoModelForCausalLM.from_pretrained(model_path) | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| # Handle different export formats | |
| if format.lower() == "pytorch": | |
| # Export as PyTorch model | |
| model.save_pretrained(export_path) | |
| tokenizer.save_pretrained(export_path) | |
| elif format.lower() == "safetensors": | |
| # Export as safetensors | |
| model.save_pretrained(export_path, safe_serialization=True) | |
| tokenizer.save_pretrained(export_path) | |
| elif format.lower() == "gguf": | |
| # For GGUF, we'd typically use a conversion script | |
| # This is simplified; in practice you'd use specific tools for GGUF conversion | |
| if quantization is not None and quantization.lower() != "none": | |
| # Command for quantized GGUF conversion would go here | |
| # In practice, use llama.cpp or similar tools | |
| pass | |
| else: | |
| # Command for standard GGUF conversion would go here | |
| pass | |
| else: | |
| raise ValueError(f"Unsupported export format: {format}") | |
| # Calculate model size | |
| model_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters()) | |
| model_size_gb = model_size_bytes / (1024**3) | |
| return { | |
| "format": format.lower(), | |
| "quantization": quantization if format.lower() == "gguf" else "None", | |
| "model_name": model_name, | |
| "export_path": export_path, | |
| "model_size": f"{model_size_gb:.2f} GB" | |
| } | |
| def push_to_hub(model_path, repo_name, token): | |
| """ | |
| Pushes a fine-tuned model to Hugging Face Hub. | |
| Args: | |
| model_path: Path to the fine-tuned model | |
| repo_name: Name for the repository on Hugging Face Hub | |
| token: Hugging Face token | |
| Returns: | |
| URL of the uploaded model | |
| """ | |
| if not os.path.exists(model_path): | |
| raise ValueError(f"Model path '{model_path}' does not exist") | |
| login(token) | |
| # Load the model and merge LoRA weights if applicable | |
| model = AutoModelForCausalLM.from_pretrained(model_path) | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| # Push to hub | |
| model.push_to_hub(repo_name) | |
| tokenizer.push_to_hub(repo_name) | |
| # Get the model URL | |
| api = HfApi() | |
| model_url = f"https://huggingface.co/{repo_name}" | |
| return model_url | |