In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4" 
os.environ["WORLD_SIZE"] = "1"

In [2]:
import torch
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL
from PIL import Image

from ip_adapter import IPAdapter

 from .autonotebook import tqdm as notebook_tqdm
2023-10-02 08:44:46.991778: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
base_model_path = "runwayml/stable-diffusion-v1-5"
vae_model_path = "stabilityai/sd-vae-ft-mse"
image_encoder_path = "models/image_encoder/"
ip_ckpt = "models/ip-adapter_sd15.bin"
device="cuda"

In [4]:
def image_grid(imgs, rows, cols):
 assert len(imgs) == rows*cols

 w, h = imgs[0].size
 grid = Image.new('RGB', size=(cols*w, rows*h))
 grid_w, grid_h = grid.size
 
 for i, img in enumerate(imgs):
 grid.paste(img, box=(i%cols*w, i//cols*h))
 return grid

noise_scheduler = DDIMScheduler(
 num_train_timesteps=1000,
 beta_start=0.00085,
 beta_end=0.012,
 beta_schedule="scaled_linear",
 clip_sample=False,
 set_alpha_to_one=False,
 steps_offset=1,
)
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)

## Image Variations

In [None]:
# load SD pipeline
pipe = StableDiffusionPipeline.from_pretrained(
 base_model_path,
 torch_dtype=torch.float16,
 scheduler=noise_scheduler,
 vae=vae,
 feature_extractor=None,
 safety_checker=None
)

In [None]:
# load ip-adapter
ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device="cuda")

In [None]:
# read image prompt
image = Image.open("assets/my_imgs/ceo.jpg")
image.resize((512, 512))

In [None]:
# generate image variations
seed=42
images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=50, seed=42)
grid = image_grid(images, 1, 4)
grid

## Image-to-Image

In [None]:
# load SD Img2Img pipe
del pipe, ip_model
torch.cuda.empty_cache()
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
 base_model_path,
 torch_dtype=torch.float16,
 scheduler=noise_scheduler,
 vae=vae,
 feature_extractor=None,
 safety_checker=None
)

In [None]:
# read image prompt
image = Image.open("assets/my_imgs/bruna_body.jpg")
g_image = Image.open("assets/my_imgs/she.jpg")
image_grid([image.resize((256, 384)), g_image.resize((256, 384))], 1, 2)

In [None]:
# load ip-adapter
ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device)

In [None]:
# generate
images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=50, seed=42, image=g_image, strength=0.6)
grid = image_grid(images, 1, 4)
grid

## Inpainting

In [None]:
# load SD Inpainting pipe
# del pipe, ip_model
torch.cuda.empty_cache()
pipe = StableDiffusionInpaintPipelineLegacy.from_pretrained(
 base_model_path,
 torch_dtype=torch.float16,
 scheduler=noise_scheduler,
 vae=vae,
 feature_extractor=None,
 safety_checker=None
)

In [None]:
# read image prompt
image = Image.open("assets/my_imgs/dress_masked.jpg").convert("RGB")
image.resize((512, 768))

In [None]:
masked_image = Image.open("assets/my_imgs/raylane_outfit_img.jpg").resize((512, 768))
mask = Image.open("assets/my_imgs/raylane_outfit_mask.jpg").resize((512, 768))
image_grid([masked_image.resize((256, 384)), mask.resize((256, 384))], 1, 2)

In [None]:
# load ip-adapter
ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device="cuda")

In [None]:
# generate
images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=20,
 seed=123456123, image=masked_image, mask_image=mask, strength=0.65, )
grid = image_grid(images, 1, 4)
grid