nielsr's picture
nielsr HF staff
pad non square image before resize (#1)
3b2cfcf
raw
history blame
2.83 kB
from PIL import Image
import requests
import os
from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
from diffusers import DiffusionPipeline
import torch
from torch import autocast
import gradio as gr
auth_token = os.environ.get("API_TOKEN") or True
url = "https://github.com/timojl/clipseg/blob/master/example_image.jpg?raw=true"
image = Image.open(requests.get(url, stream=True).raw)
processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
pipe = DiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-inpainting",
custom_pipeline="text_inpainting",
segmentation_model=model,
segmentation_processor=processor,
use_auth_token=auth_token,
)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipe.to(device)
def pad_image(image):
w, h = image.size
if w == h:
return image
elif w > h:
new_image = Image.new(image.mode, (w, w), (0, 0, 0))
new_image.paste(image, (0, (w - h) // 2))
return new_image
else:
new_image = Image.new(image.mode, (h, h), (0, 0, 0))
new_image.paste(image, ((h - w) // 2, 0))
return new_image
def process_image(image, text, prompt):
image = pad_image(image)
image = image.resize((512, 512))
with autocast("cuda"):
inpainted_image = pipe(image=image, text=text, prompt=prompt).images[0]
return inpainted_image
title = "Interactive demo: Text-based inpainting with CLIPSeg x Stable Diffusion"
description = "Demo for using CLIPSeg, a CLIP-based model for zero- and one-shot image segmentation. This model can be used to segment things in an image based on text. This way, one can use it to provide a binary mask for Stable Diffusion, which the latter needs to inpaint. To use it, simply upload an image and add a text to mask as well as a text which indicates what to replace, or use one of the examples below and click 'submit'. Results will show up in a few seconds."
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10003'>CLIPSeg: Image Segmentation Using Text and Image Prompts</a> | <a href='https://huggingface.co/docs/transformers/main/en/model_doc/clipseg'>HuggingFace docs</a></p>"
examples = [["example_image.png", "a glass", "a cup"]]
interface = gr.Interface(fn=process_image,
inputs=[gr.Image(type="pil"), gr.Textbox(label="What's the thing you want to replace?"), gr.Textbox(label="What do you want as replacement?")],
outputs=gr.Image(type="pil"),
title=title,
description=description,
article=article,
examples=examples)
interface.launch(debug=True)