neuralwebtech
/

mental_health_counseling_gemma_7b_4bit_q

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UXKT8SDQQ1tI"
+      },
+      "outputs": [],
+      "source": [
+        "%%capture\n",
+        "import torch\n",
+        "import re\n",
+        "from pprint import pprint\n",
+        "major_version, minor_version = torch.cuda.get_device_capability()\n",
+        "if major_version >= 8:\n",
+        "    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)\n",
+        "    !pip install \"unsloth[colab-ampere] @ git+https://github.com/unslothai/unsloth.git\"\n",
+        "else:\n",
+        "    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)\n",
+        "    !pip install \"unsloth[colab] @ git+https://github.com/unslothai/unsloth.git\"\n",
+        "pass"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from unsloth import FastLanguageModel\n",
+        "import torch\n",
+        "max_seq_length = 2048\n",
+        "# Choose any! We auto support RoPE Scaling internally!\n",
+        "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+        "load_in_4bit = True"
+      ],
+      "metadata": {
+        "id": "Q6gVomWzQ7hU"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+        "    model_name = \"neuralwebtech/mental_health_counseling_gemma_7b_4bit_q\", # YOUR MODEL YOU USED FOR TRAINING\n",
+        "    max_seq_length = max_seq_length,\n",
+        "    dtype = dtype,\n",
+        "    load_in_4bit = load_in_4bit,\n",
+        ")\n",
+        "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
+        "\n",
+        "alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context.\n",
+        " Write a response that appropriately completes the request.\n",
+        "\n",
+        "### Context:\n",
+        "{}\n",
+        "\n",
+        "### Response:\n",
+        "{}\"\"\""
+      ],
+      "metadata": {
+        "id": "_ItV-FhgRC5t"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "inputs = tokenizer(\n",
+        "[\n",
+        "    alpaca_prompt.format(\n",
+        "        text, # instruction\n",
+        "        \"\", # output - leave this blank for generation!\n",
+        "    )\n",
+        "], return_tensors = \"pt\").to(\"cuda\")\n",
+        "\n",
+        "outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)\n",
+        "final_out=tokenizer.batch_decode(outputs)\n"
+      ],
+      "metadata": {
+        "id": "8eTx88KiRDiL"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def print_response(lines):\n",
+        "    text = '\\n'.join(lines)\n",
+        "    response_match = re.search(r'### Response:\\s*(.*)', text)\n",
+        "    if response_match:\n",
+        "        response = response_match.group(1)\n",
+        "        return response\n",
+        "    else:\n",
+        "        return \"No response\""
+      ],
+      "metadata": {
+        "id": "z5s-5_0MRHPt"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "pprint(print_response(final_out))"
+      ],
+      "metadata": {
+        "id": "_DlE2xjBRHUk"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "xHwuwJ-6RHck"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}