radames's picture
preload
d2c46d0
raw
history blame
9.99 kB
import sys
import os
import torch
from PIL import Image, ImageSequence, ImageOps
from typing import List
import numpy as np
sys.path.append(os.path.dirname("./ComfyUI/"))
from ComfyUI.nodes import (
CheckpointLoaderSimple,
VAEDecode,
VAEEncode,
KSampler,
EmptyLatentImage,
CLIPTextEncode,
)
from ComfyUI.comfy_extras.nodes_compositing import JoinImageWithAlpha
from ComfyUI.comfy_extras.nodes_mask import InvertMask, MaskToImage
from ComfyUI.comfy import samplers
from ComfyUI.custom_nodes.layerdiffuse.layered_diffusion import (
LayeredDiffusionFG,
LayeredDiffusionDecode,
LayeredDiffusionCond,
)
import gradio as gr
with torch.inference_mode():
ckpt_load_checkpoint = CheckpointLoaderSimple().load_checkpoint
ckpt = ckpt_load_checkpoint(ckpt_name="juggernautXL_v8Rundiffusion.safetensors")
cliptextencode = CLIPTextEncode().encode
emptylatentimage_generate = EmptyLatentImage().generate
ksampler_sample = KSampler().sample
vae_decode = VAEDecode().decode
vae_encode = VAEEncode().encode
ld_fg_apply_layered_diffusion = LayeredDiffusionFG().apply_layered_diffusion
ld_cond_apply_layered_diffusion = LayeredDiffusionCond().apply_layered_diffusion
ld_decode = LayeredDiffusionDecode().decode
mask_to_image = MaskToImage().mask_to_image
invert_mask = InvertMask().invert
join_image_with_alpha = JoinImageWithAlpha().join_image_with_alpha
def tensor_to_pil(images: torch.Tensor | List[torch.Tensor]) -> List[Image.Image]:
if not isinstance(images, list):
images = [images]
imgs = []
for image in images:
i = 255.0 * image.cpu().numpy()
img = Image.fromarray(np.clip(np.squeeze(i), 0, 255).astype(np.uint8))
imgs.append(img)
return imgs
def pad_image(input_image):
pad_w, pad_h = (
np.max(((2, 2), np.ceil(np.array(input_image.size) / 64).astype(int)), axis=0)
* 64
- input_image.size
)
im_padded = Image.fromarray(
np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode="edge")
)
w, h = im_padded.size
if w == h:
return im_padded
elif w > h:
new_image = Image.new(im_padded.mode, (w, w), (0, 0, 0))
new_image.paste(im_padded, (0, (w - h) // 2))
return new_image
else:
new_image = Image.new(im_padded.mode, (h, h), (0, 0, 0))
new_image.paste(im_padded, ((h - w) // 2, 0))
return new_image
def pil_to_tensor(image: Image.Image) -> tuple[torch.Tensor, torch.Tensor]:
output_images = []
output_masks = []
for i in ImageSequence.Iterator(image):
i = ImageOps.exif_transpose(i)
if i.mode == "I":
i = i.point(lambda i: i * (1 / 255))
image = i.convert("RGB")
image = np.array(image).astype(np.float32) / 255.0
image = torch.from_numpy(image)[None,]
if "A" in i.getbands():
mask = np.array(i.getchannel("A")).astype(np.float32) / 255.0
mask = 1.0 - torch.from_numpy(mask)
else:
mask = torch.zeros((64, 64), dtype=torch.float32, device="cpu")
output_images.append(image)
output_masks.append(mask.unsqueeze(0))
if len(output_images) > 1:
output_image = torch.cat(output_images, dim=0)
output_mask = torch.cat(output_masks, dim=0)
else:
output_image = output_images[0]
output_mask = output_masks[0]
return (output_image, output_mask)
def predict(
prompt: str,
negative_prompt: str,
input_image: Image.Image | None,
cond_mode: str,
seed: int,
sampler_name: str,
scheduler: str,
steps: int,
cfg: float,
denoise: float,
):
with torch.inference_mode():
cliptextencode_prompt = cliptextencode(
text=prompt,
clip=ckpt[1],
)
cliptextencode_negative_prompt = cliptextencode(
text=negative_prompt,
clip=ckpt[1],
)
emptylatentimage_sample = emptylatentimage_generate(
width=1024, height=1024, batch_size=1
)
if input_image is not None:
img_tensor = pil_to_tensor(pad_image(input_image).resize((1024, 1024)))
img_latent = vae_encode(pixels=img_tensor[0], vae=ckpt[2])
layereddiffusionapply_sample = ld_cond_apply_layered_diffusion(
config=cond_mode,
weight=1,
model=ckpt[0],
cond=cliptextencode_prompt[0],
uncond=cliptextencode_negative_prompt[0],
latent=img_latent[0],
)
ksampler = ksampler_sample(
steps=steps,
cfg=cfg,
sampler_name=sampler_name,
scheduler=scheduler,
seed=seed,
model=layereddiffusionapply_sample[0],
positive=layereddiffusionapply_sample[1],
negative=layereddiffusionapply_sample[2],
latent_image=emptylatentimage_sample[0],
denoise=denoise,
)
vaedecode_sample = vae_decode(
samples=ksampler[0],
vae=ckpt[2],
)
layereddiffusiondecode_sample = ld_decode(
sd_version="SDXL",
sub_batch_size=16,
samples=ksampler[0],
images=vaedecode_sample[0],
)
rgb_img = tensor_to_pil(vaedecode_sample[0])
return flatten([rgb_img])
else:
layereddiffusionapply_sample = ld_fg_apply_layered_diffusion(
config="SDXL, Conv Injection", weight=1, model=ckpt[0]
)
ksampler = ksampler_sample(
steps=steps,
cfg=cfg,
sampler_name=sampler_name,
scheduler=scheduler,
seed=seed,
model=layereddiffusionapply_sample[0],
positive=cliptextencode_prompt[0],
negative=cliptextencode_negative_prompt[0],
latent_image=emptylatentimage_sample[0],
denoise=denoise,
)
vaedecode_sample = vae_decode(
samples=ksampler[0],
vae=ckpt[2],
)
layereddiffusiondecode_sample = ld_decode(
sd_version="SDXL",
sub_batch_size=16,
samples=ksampler[0],
images=vaedecode_sample[0],
)
mask = mask_to_image(mask=layereddiffusiondecode_sample[1])
ld_image = tensor_to_pil(layereddiffusiondecode_sample[0][0])
inverted_mask = invert_mask(mask=layereddiffusiondecode_sample[1])
rgba_img = join_image_with_alpha(
image=layereddiffusiondecode_sample[0], alpha=inverted_mask[0]
)
rgba_img = tensor_to_pil(rgba_img[0])
mask = tensor_to_pil(mask[0])
rgb_img = tensor_to_pil(vaedecode_sample[0])
return flatten([rgba_img, mask, rgb_img, ld_image])
examples = [["An old men sit on a chair looking at the sky"]]
def flatten(l: List[List[any]]) -> List[any]:
return [item for sublist in l for item in sublist]
def predict_examples(prompt, negative_prompt):
return predict(
prompt, negative_prompt, None, None, 0, "euler", "normal", 20, 8.0, 1.0
)
css = """
.gradio-container{
max-width: 60rem;
}
"""
with gr.Blocks(css=css) as blocks:
gr.Markdown("""# LayerDiffuse (unofficial)
""")
with gr.Row():
with gr.Column():
prompt = gr.Text(label="Prompt")
negative_prompt = gr.Text(label="Negative Prompt")
button = gr.Button("Generate")
with gr.Accordion(open=False, label="Input Images (Optional)"):
cond_mode = gr.Radio(
value="SDXL, Foreground",
choices=["SDXL, Foreground", "SDXL, Background"],
info="Whether to use input image as foreground or background",
)
input_image = gr.Image(label="Input Image", type="pil")
with gr.Accordion(open=False, label="Advanced Options"):
seed = gr.Slider(
label="Seed",
value=0,
minimum=-1,
maximum=0xFFFFFFFFFFFFFFFF,
step=1,
randomize=True,
)
sampler_name = gr.Dropdown(
choices=samplers.KSampler.SAMPLERS,
label="Sampler Name",
value=samplers.KSampler.SAMPLERS[0],
)
scheduler = gr.Dropdown(
choices=samplers.KSampler.SCHEDULERS,
label="Scheduler",
value=samplers.KSampler.SCHEDULERS[0],
)
steps = gr.Number(
label="Steps", value=20, minimum=1, maximum=10000, step=1
)
cfg = gr.Number(
label="CFG", value=8.0, minimum=0.0, maximum=100.0, step=0.1
)
denoise = gr.Number(
label="Denoise", value=1.0, minimum=0.0, maximum=1.0, step=0.01
)
with gr.Column(scale=1.8):
gallery = gr.Gallery(
columns=[2], rows=[2], object_fit="contain", height="unset"
)
inputs = [
prompt,
negative_prompt,
input_image,
cond_mode,
seed,
sampler_name,
scheduler,
steps,
cfg,
denoise,
]
outputs = [gallery]
gr.Examples(
fn=predict_examples,
examples=examples,
inputs=[prompt, negative_prompt],
outputs=outputs,
cache_examples=False,
)
button.click(fn=predict, inputs=inputs, outputs=outputs)
if __name__ == "__main__":
blocks.launch()