Virt-io
/

Google-Colab-Imatrix-GGUF

GGUF

Model card Files Files and versions Community

Virt-io commited on Mar 22

Commit

eeb0016

•

1 Parent(s): f9a24f6

Upload Imat_AutoGGUF.ipynb

Browse files

Files changed (1) hide show

Imat_AutoGGUF.ipynb +127 -0

Imat_AutoGGUF.ipynb ADDED Viewed

	@@ -0,0 +1,127 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title # ⚡ Imat-AutoGGUF\n",
+        "\n",
+        "# @markdown Made by https://huggingface.co/Virt-io\n",
+        "\n",
+        "# @markdown Edited https://github.com/mlabonne/llm-course LazyMergekit to work with Imatrix\n",
+        "\n",
+        "# @markdown\n",
+        "\n",
+        "# @markdown The `token` corresponds to the name of the secret that stores your [Hugging Face access token](https://huggingface.co/settings/tokens) in Colab.\n",
+        "\n",
+        "# @markdown ---\n",
+        "\n",
+        "# @markdown ### ⚡ Quantization parameters\n",
+        "MODEL_ID = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\" # @param {type:\"string\"}\n",
+        "IMATRIX_OPTION = 'Imatrix' # @param [\"Imatrix\", \"Imatrix-RP\"]\n",
+        "if IMATRIX_OPTION == \"Imatrix\":\n",
+        "  IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix.txt\"\n",
+        "if IMATRIX_OPTION == \"Imatrix-RP\":\n",
+        "  IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix-with-rp-data.txt\"\n",
+        "print(IMATRIX)\n",
+        "QUANTIZATION_METHODS = \"IQ4_NL, Q8_0\" # @param {type:\"string\"}\n",
+        "QUANTIZATION_METHODS = QUANTIZATION_METHODS.replace(\" \", \"\").split(\",\")\n",
+        "\n",
+        "# @markdown ---\n",
+        "\n",
+        "# @markdown ### 🤗 Hugging Face Hub\n",
+        "username = \"Virt-io\" # @param {type:\"string\"}\n",
+        "token = \"HF_TOKEN\" # @param {type:\"string\"}\n",
+        "\n",
+        "MODEL_NAME = MODEL_ID.split('/')[-1]\n",
+        "\n",
+        "# Git clone llamacpp\n",
+        "!git clone https://github.com/ggerganov/llama.cpp\n",
+        "!cd llama.cpp && git pull\n",
+        "\n",
+        "# Download model\n",
+        "!git lfs install\n",
+        "!git clone https://huggingface.co/{MODEL_ID}\n",
+        "\n",
+        "# Download Imatrix\n",
+        "!git clone https://huggingface.co/Virt-io/Google-Colab-Imatrix-GGUF\n",
+        "\n",
+        "# Install python dependencies and reload instance\n",
+        "!pip install -r llama.cpp/requirements/requirements-convert.txt\n",
+        "\n",
+        "# Build llamacpp\n",
+        "!cd llama.cpp && make clean && LLAMA_CUBLAS=1 LLAMA_CUDA_FORCE_MMQ=1 LLAMA_LTO=1 LLAMA_CUDA_DMMV_X=64 LLAMA_CUDA_MMV_Y=4 LLAMA_CUDA_KQUANTS_ITER=2 LLAMA_CUDA_F16=1 LLAMA_CUDA_DMMV_F16=1 make -j16\n",
+        "\n",
+        "# Convert to fp16\n",
+        "fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.gguf\"\n",
+        "!python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}\n",
+        "\n",
+        "# Run imatrix\n",
+        "imat_dat = f\"{fp16}.{IMATRIX_OPTION}.dat\"\n",
+        "!./llama.cpp/imatrix -ngl 100 -c 512 -b 512 --model {fp16} -f {IMATRIX} -o {imat_dat}\n",
+        "\n",
+        "# Quantize the model for each method in the QUANTIZATION_METHODS list\n",
+        "for method in QUANTIZATION_METHODS:\n",
+        "  qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n",
+        "  !./llama.cpp/quantize --imatrix {imat_dat} {fp16} {qtype} {method}"
+      ],
+      "metadata": {
+        "id": "fD24jJxq7t3k"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @markdown Upload to HF\n",
+        "!pip install -q huggingface_hub\n",
+        "from huggingface_hub import create_repo, HfApi\n",
+        "from google.colab import userdata, runtime\n",
+        "\n",
+        "# Defined in the secrets tab in Google Colab\n",
+        "hf_token = userdata.get(token)\n",
+        "api = HfApi()\n",
+        "\n",
+        "# Create empty repo\n",
+        "create_repo(\n",
+        "    repo_id = f\"{username}/{MODEL_NAME}-GGUF\",\n",
+        "    repo_type=\"model\",\n",
+        "    exist_ok=True,\n",
+        "    token=hf_token\n",
+        ")\n",
+        "\n",
+        "# Upload gguf files\n",
+        "api.upload_folder(\n",
+        "    folder_path=MODEL_NAME,\n",
+        "    repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n",
+        "    allow_patterns=[\"*.gguf\", \"*.fp16.gguf\", \"*.dat\", \"*.md\"],\n",
+        "    token=hf_token\n",
+        ")\n",
+        "\n",
+        "# Kill runtime\n",
+        "# runtime.unassign()"
+      ],
+      "metadata": {
+        "id": "F7Q8_Y1_e3BX"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}