{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "4c409c3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"4\" \n",
    "os.environ[\"WORLD_SIZE\"] = \"1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "411c59b3-f177-4a10-8925-d931ce572eaa",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/fsx/homes/afruchtman/.envs/ms_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "2023-10-02 08:44:46.991778: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2023-10-02 08:44:48.824037: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL\n",
    "from PIL import Image\n",
    "\n",
    "from ip_adapter import IPAdapter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6b6dc69c-192d-4d74-8b1e-f0d9ccfbdb49",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_model_path = \"runwayml/stable-diffusion-v1-5\"\n",
    "vae_model_path = \"stabilityai/sd-vae-ft-mse\"\n",
    "image_encoder_path = \"models/image_encoder/\"\n",
    "ip_ckpt = \"models/ip-adapter_sd15.bin\"\n",
    "device=\"cuda\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "63ec542f-8474-4f38-9457-073425578073",
   "metadata": {},
   "outputs": [],
   "source": [
    "def image_grid(imgs, rows, cols):\n",
    "    assert len(imgs) == rows*cols\n",
    "\n",
    "    w, h = imgs[0].size\n",
    "    grid = Image.new('RGB', size=(cols*w, rows*h))\n",
    "    grid_w, grid_h = grid.size\n",
    "    \n",
    "    for i, img in enumerate(imgs):\n",
    "        grid.paste(img, box=(i%cols*w, i//cols*h))\n",
    "    return grid\n",
    "\n",
    "noise_scheduler = DDIMScheduler(\n",
    "    num_train_timesteps=1000,\n",
    "    beta_start=0.00085,\n",
    "    beta_end=0.012,\n",
    "    beta_schedule=\"scaled_linear\",\n",
    "    clip_sample=False,\n",
    "    set_alpha_to_one=False,\n",
    "    steps_offset=1,\n",
    ")\n",
    "vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d8081d92-8f42-4bcd-9f83-44aec3f549a9",
   "metadata": {},
   "source": [
    "## Image Variations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3849f9d0-5f68-4a49-9190-69dd50720cae",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load SD pipeline\n",
    "pipe = StableDiffusionPipeline.from_pretrained(\n",
    "    base_model_path,\n",
    "    torch_dtype=torch.float16,\n",
    "    scheduler=noise_scheduler,\n",
    "    vae=vae,\n",
    "    feature_extractor=None,\n",
    "    safety_checker=None\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81b1ab06-d3ed-4a7e-a356-9ddf1a2eecd6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load ip-adapter\n",
    "ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device=\"cuda\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec09e937-3904-4d8e-a559-9066502ded36",
   "metadata": {},
   "outputs": [],
   "source": [
    "# read image prompt\n",
    "image = Image.open(\"assets/my_imgs/ceo.jpg\")\n",
    "image.resize((512, 512))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b77f52de-a9e4-44e1-aeec-8165414f1273",
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate image variations\n",
    "seed=42\n",
    "images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=50, seed=42)\n",
    "grid = image_grid(images, 1, 4)\n",
    "grid"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf199405-7cb5-4f78-9973-5fe51c632a41",
   "metadata": {},
   "source": [
    "## Image-to-Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f089ad0-4683-46d7-ab58-9e5fe8f34c67",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load SD Img2Img pipe\n",
    "del pipe, ip_model\n",
    "torch.cuda.empty_cache()\n",
    "pipe = StableDiffusionImg2ImgPipeline.from_pretrained(\n",
    "    base_model_path,\n",
    "    torch_dtype=torch.float16,\n",
    "    scheduler=noise_scheduler,\n",
    "    vae=vae,\n",
    "    feature_extractor=None,\n",
    "    safety_checker=None\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8db2b55-2f56-4eef-b2ca-c5126b14feb7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# read image prompt\n",
    "image = Image.open(\"assets/my_imgs/bruna_body.jpg\")\n",
    "g_image = Image.open(\"assets/my_imgs/she.jpg\")\n",
    "image_grid([image.resize((256, 384)), g_image.resize((256, 384))], 1, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a501f284-f295-4673-96ab-e34378da62ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load ip-adapter\n",
    "ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f58fff74-9ff2-46e6-bc8a-2ad4ae1fbe0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate\n",
    "images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=50, seed=42, image=g_image, strength=0.6)\n",
    "grid = image_grid(images, 1, 4)\n",
    "grid"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "420a7c45-8697-411f-8374-3c81d5d972e3",
   "metadata": {},
   "source": [
    "## Inpainting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "385cb339-3326-4523-a7db-b09e62d39c80",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load SD Inpainting pipe\n",
    "# del pipe, ip_model\n",
    "torch.cuda.empty_cache()\n",
    "pipe = StableDiffusionInpaintPipelineLegacy.from_pretrained(\n",
    "    base_model_path,\n",
    "    torch_dtype=torch.float16,\n",
    "    scheduler=noise_scheduler,\n",
    "    vae=vae,\n",
    "    feature_extractor=None,\n",
    "    safety_checker=None\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c47f8ce5-eed0-41ef-9dbb-2272ec4bc224",
   "metadata": {},
   "outputs": [],
   "source": [
    "# read image prompt\n",
    "image = Image.open(\"assets/my_imgs/dress_masked.jpg\").convert(\"RGB\")\n",
    "image.resize((512, 768))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f9b77289-65f5-459b-ada5-5c7c265bb4a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "masked_image = Image.open(\"assets/my_imgs/raylane_outfit_img.jpg\").resize((512, 768))\n",
    "mask = Image.open(\"assets/my_imgs/raylane_outfit_mask.jpg\").resize((512, 768))\n",
    "image_grid([masked_image.resize((256, 384)), mask.resize((256, 384))], 1, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e49dbdaa-58eb-4bcf-acab-fa5e08f96dcb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load ip-adapter\n",
    "ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device=\"cuda\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "945f6800-18b8-4d95-9f5e-e7035166cbbd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate\n",
    "images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=20,\n",
    "                           seed=123456123, image=masked_image, mask_image=mask, strength=0.65, )\n",
    "grid = image_grid(images, 1, 4)\n",
    "grid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc430fba",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}