mergekit-gui-plus

Paused

App Files Files Community

djuna commited on Nov 24, 2024

Commit

080e0ad

verified ·

1 Parent(s): 50ef1f9

Python Notebook

Browse files

@DreadPoor

@xi0v

The attached Python notebook is provided. Apologies for the delayed response.

Files changed (1) hide show

MergeKitPlus.ipynb +279 -0

MergeKitPlus.ipynb ADDED Viewed

	@@ -0,0 +1,279 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JHRpOZ5g3Flv"
+   },
+   "source": [
+    "# Clone Mergekit and Install the dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "x8548KdSbMs2"
+   },
+   "outputs": [],
+   "source": [
+    "!nvidia-smi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "4alsYntU1gNU"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -qqq git+https://github.com/arcee-ai/mergekit.git"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "DtGY8BAo3alb"
+   },
+   "source": [
+    "# Mergekit Config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "CmfbveTblP0F"
+   },
+   "outputs": [],
+   "source": [
+    "# @markdown What is your model's name will be?\n",
+    "MODEL_NAME = 'SmolMoE' # @param {type:\"string\"}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "r2-rAjH93w8x"
+   },
+   "outputs": [],
+   "source": [
+    "mergekit_yaml = \"\"\"\n",
+    "base_model: BEE-spoke-data/smol_llama-220M-GQA\n",
+    "gate_mode: random\n",
+    "dtype: bfloat16\n",
+    "experts:\n",
+    "  - source_model: BEE-spoke-data/smol_llama-220M-GQA\n",
+    "  - source_model: BEE-spoke-data/smol_llama-220M-GQA\n",
+    "\"\"\" # @param {type:\"string\"}\n",
+    "with open('config.yaml', 'w', encoding=\"utf-8\") as f:\n",
+    "    f.write(mergekit_yaml)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "WiCGZXysn_mD"
+   },
+   "source": [
+    "# Mergekit Runtime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "0scr7Ed_4GPe"
+   },
+   "outputs": [],
+   "source": [
+    "low_cpu_ram = True # @param {type:\"boolean\"}\n",
+    "runtime = \"GPU\" # @param [\"CPU\", \"GPU\"]\n",
+    "task = \"merge-mega\" # @param [\"merge\", \"merge-mega\", \"moe\", \"extract\"]\n",
+    "# @markdown ### Mergekit arguments\n",
+    "\n",
+    "trust_remote_code = False # @param {type:\"boolean\"}\n",
+    "clone_tensors = True # @param {type:\"boolean\"}\n",
+    "low_ram = True # @param {type:\"boolean\"}\n",
+    "out_shard_size = 500M # @param {type:\"string\"}\n",
+    "\n",
+    "# @markdown ### Extract LoRA (experimental)\n",
+    "base_model = \"unsloth/Llama-3.2-3B-Instruct\" # @param {type:\"string\"}\n",
+    "finetuned_model = \"theprint/ReWiz-Llama-3.2-3B\" # @param {type:\"string\"}\n",
+    "extract_rank = 32 # @param {type:\"number\"}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "QBhBgX7U52Xn"
+   },
+   "source": [
+    "## Run the program"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "id": "3Y7aBJXL54GJ"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import shutil\n",
+    "\n",
+    "def empty_folder(folder_path):\n",
+    "  if os.path.exists(folder_path):\n",
+    "    shutil.rmtree(folder_path)\n",
+    "    os.makedirs(folder_path)\n",
+    "\n",
+    "empty_folder('merge')\n",
+    "empty_folder('lora')\n",
+    "\n",
+    "if task == \"merge\":\n",
+    "    cli = \"mergekit-yaml\"\n",
+    "elif task == \"merge-mega\":\n",
+    "    cli = \"mergekit-mega\"\n",
+    "elif task == \"moe\":\n",
+    "    cli = \"mergekit-moe\"\n",
+    "elif task == \"extract\":\n",
+    "    if base_model == \"\" or finetuned_model == \"\":\n",
+    "        raise ValueError(\"base_model and finetuned_model cannot be empty\")\n",
+    "    !pip install -qqq bitsandbytes\n",
+    "    cli = f\"mergekit-extract-lora {finetuned_model} {base_model} lora --rank={extract_rank}\"\n",
+    "\n",
+    "if task in [\"merge\", \"moe\", \"merge-mega\"]:\n",
+    "    cli += \" config.yaml merge --copy-tokenizer --allow-crimes\"\n",
+    "    if runtime == \"GPU\":\n",
+    "        if task in [\"merge\", \"merge-mega\"]:\n",
+    "            cli += \" --cuda\"\n",
+    "        elif task == \"moe\":\n",
+    "            cli += \" --device cuda --cuda\"\n",
+    "    else:\n",
+    "        cli += \" --no-cuda\"\n",
+    "\n",
+    "    if trust_remote_code:\n",
+    "        cli += \" --trust-remote-code\"\n",
+    "    if clone_tensors:\n",
+    "        cli += \" --clone-tensors\"\n",
+    "    if low_ram:\n",
+    "        cli += f\" --out-shard-size {out_shard_size} --lazy-unpickle\"\n",
+    "        if low_cpu_ram:\n",
+    "            cli += \" --low-cpu-memory\"\n",
+    "print(cli)\n",
+    "!{cli}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HyeGrtGrDn6S"
+   },
+   "source": [
+    "# Inference the Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "wpy7Ahw6hghH"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -qU transformers bitsandbytes accelerate\n",
+    "from transformers import AutoTokenizer, pipeline\n",
+    "import torch\n",
+    "\n",
+    "model = \"merge\"\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model)\n",
+    "generator = pipeline(\n",
+    "    \"text-generation\",\n",
+    "    model=model,\n",
+    "    model_kwargs={\"torch_dtype\": torch.float16, \"load_in_4bit\": False},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "f05D7q8wiF-5"
+   },
+   "outputs": [],
+   "source": [
+    "messages = [{\"role\": \"user\", \"content\": \"Explain what a Mixture of Experts is in less than 100 words.\"}]\n",
+    "prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
+    "outputs = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)\n",
+    "print(outputs[0][\"generated_text\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Upload to Hugging Face"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# @title ## Upload model to Hugging Face { display-mode: \"form\" }\n",
+    "# @markdown Enter your HF username and the name of Colab secret that stores your [Hugging Face access token](https://huggingface.co/settings/tokens).\n",
+    "username = 'username' # @param {type:\"string\"}\n",
+    "token_env = 'hf_token' # @param {type:\"string\"}\n",
+    "\n",
+    "!pip install -qU huggingface_hub\n",
+    "\n",
+    "import yaml\n",
+    "\n",
+    "from huggingface_hub import HfApi\n",
+    "from google.colab import userdata\n",
+    "\n",
+    "def output_dir():\n",
+    "    if os.path.exists('merge') and os.listdir('merge'):\n",
+    "        return \"merge\"\n",
+    "    if os.path.exists('lora') and os.listdir('lora'):\n",
+    "        return \"lora\"\n",
+    "    raise ValueError(\"Both folders are empty or do not exist.\")\n",
+    "\n",
+    "\n",
+    "# Defined in the secrets tab in Google Colab\n",
+    "api = HfApi(token=userdata.get(token_env))\n",
+    "try:\n",
+    "    output_dir=output_dir()\n",
+    "    api.create_repo(\n",
+    "        repo_id=f\"{username}/{MODEL_NAME}\",\n",
+    "        repo_type=\"model\",\n",
+    "        exist_ok=True,\n",
+    "    )\n",
+    "    api.upload_folder(\n",
+    "        repo_id=f\"{username}/{MODEL_NAME}\",\n",
+    "        folder_path=output_dir,\n",
+    "    )\n",
+    "except ValueError as e:\n",
+    "    print(e)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}