waveydaveygravy's picture
Create app.py
8d3fbf3 verified
raw
history blame contribute delete
No virus
5.29 kB
import cv2
from insightface.app import FaceAnalysis
from insightface.utils import face_align
import torch
import os
from datetime import datetime
import torch
import gradio as gr
from diffusers import (
StableDiffusionPipeline,
DDIMScheduler,
AutoencoderKL,
StableDiffusionControlNetPipeline,
ControlNetModel,
)
from PIL import Image
from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
from diffusers.utils import load_image
import numpy as np
# date_time = now.strftime("%Y-%m-%d_%H-%M-%S")
def generate_image(
prompt,
negative_prompt,
depth_map_dir,
face_reference_image,
s_scale,
num_inference_steps,
v2,
):
# Get the current date and time
now = datetime.now()
date_time = now.strftime("%Y-%m-%d_%H-%M-%S")
# Create the output directory if it doesn't exist
output_dir = "/content/output"
os.makedirs(output_dir, exist_ok=True)
# depth_map_dir = "" # or whichever you have the depthmap images in
app = FaceAnalysis(
name="buffalo_l", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
app.prepare(ctx_id=0, det_size=(640, 640))
face_reference_image = face_reference_image # the face reference image
face_reference_image_np = np.array(face_reference_image)
faces = app.get(face_reference_image_np)
faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
face_image = face_align.norm_crop(
face_reference_image_np, landmark=faces[0].kps, image_size=224
) # you can also segment the face
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
vae_model_path = "stabilityai/sd-vae-ft-mse"
image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
ip_ckpt = (
"/content/ip-adapter-faceid-plus_sd15.bin"
if not v2
else "ip-adapter-faceid-plusv2_sd15.bin"
)
device = "cuda"
# Control net test
controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
controlnet = ControlNetModel.from_pretrained(
controlnet_model_path, torch_dtype=torch.float16
)
noise_scheduler = DDIMScheduler(
num_train_timesteps=1000,
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear",
clip_sample=False,
set_alpha_to_one=False,
steps_offset=1,
)
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
base_model_path,
torch_dtype=torch.float16,
controlnet=controlnet,
scheduler=noise_scheduler,
vae=vae,
feature_extractor=None,
safety_checker=None,
)
# load ip-adapter
ip_model = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_ckpt, device)
depth_map_files = [
f for f in os.listdir(depth_map_dir) if f.endswith((".jpg", ".png"))
]
images = []
for idx, filename in enumerate(depth_map_files):
depth_map_path = os.path.join(depth_map_dir, filename)
depth_map = load_image(depth_map_path)
image = ip_model.generate(
prompt=prompt,
negative_prompt=negative_prompt,
image=depth_map,
face_image=face_image,
faceid_embeds=faceid_embeds,
shortcut=v2,
s_scale=s_scale,
num_samples=1, # Generate one image per depth map
width=512,
height=512,
num_inference_steps=num_inference_steps,
seed=2023,
)[0]
# Save the image with the prompt name, date/time, and depth map index
image_name = f"{prompt.replace(' ', '_')}_{date_time}_{idx}_0.png"
image_path = os.path.join(output_dir, image_name)
image.save(image_path)
images.append(image)
torch.cuda.empty_cache()
return images
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt")
negative_prompt = gr.Textbox(label="Negative Prompt")
depth_map_dir = gr.Textbox(label="Depth Map Directory")
face_reference_image = gr.Image(label="Face Reference Image", type="pil")
# s_scale = gr.Slider(label="Face Structure strength", value=0.6, step=0.1, minimum=0, maximum=3)
# num_inference_steps = gr.Slider(label="steps", value=10, step=1, minimum=1, maximum=50)
v2 = gr.Checkbox(label="Use v2 Adapter", value=False)
with gr.Column():
s_scale = gr.Slider(
label="Face Structure strength",
value=0.6,
step=0.1,
minimum=0,
maximum=3,
)
num_inference_steps = gr.Slider(
label="steps", value=10, step=1, minimum=1, maximum=50
)
gallery = gr.Gallery(label="Generated Images")
generate_btn = gr.Button("Generate Images")
generate_btn.click(
fn=generate_image,
inputs=[
prompt,
negative_prompt,
depth_map_dir,
face_reference_image,
s_scale,
num_inference_steps,
v2,
],
outputs=gallery,
)
demo.launch(share=True, debug=True)