Spaces:
Runtime error
Runtime error
File size: 5,294 Bytes
8d3fbf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import cv2
from insightface.app import FaceAnalysis
from insightface.utils import face_align
import torch
import os
from datetime import datetime
import torch
import gradio as gr
from diffusers import (
StableDiffusionPipeline,
DDIMScheduler,
AutoencoderKL,
StableDiffusionControlNetPipeline,
ControlNetModel,
)
from PIL import Image
from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
from diffusers.utils import load_image
import numpy as np
# date_time = now.strftime("%Y-%m-%d_%H-%M-%S")
def generate_image(
prompt,
negative_prompt,
depth_map_dir,
face_reference_image,
s_scale,
num_inference_steps,
v2,
):
# Get the current date and time
now = datetime.now()
date_time = now.strftime("%Y-%m-%d_%H-%M-%S")
# Create the output directory if it doesn't exist
output_dir = "/content/output"
os.makedirs(output_dir, exist_ok=True)
# depth_map_dir = "" # or whichever you have the depthmap images in
app = FaceAnalysis(
name="buffalo_l", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
app.prepare(ctx_id=0, det_size=(640, 640))
face_reference_image = face_reference_image # the face reference image
face_reference_image_np = np.array(face_reference_image)
faces = app.get(face_reference_image_np)
faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
face_image = face_align.norm_crop(
face_reference_image_np, landmark=faces[0].kps, image_size=224
) # you can also segment the face
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
vae_model_path = "stabilityai/sd-vae-ft-mse"
image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
ip_ckpt = (
"/content/ip-adapter-faceid-plus_sd15.bin"
if not v2
else "ip-adapter-faceid-plusv2_sd15.bin"
)
device = "cuda"
# Control net test
controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
controlnet = ControlNetModel.from_pretrained(
controlnet_model_path, torch_dtype=torch.float16
)
noise_scheduler = DDIMScheduler(
num_train_timesteps=1000,
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear",
clip_sample=False,
set_alpha_to_one=False,
steps_offset=1,
)
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
base_model_path,
torch_dtype=torch.float16,
controlnet=controlnet,
scheduler=noise_scheduler,
vae=vae,
feature_extractor=None,
safety_checker=None,
)
# load ip-adapter
ip_model = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_ckpt, device)
depth_map_files = [
f for f in os.listdir(depth_map_dir) if f.endswith((".jpg", ".png"))
]
images = []
for idx, filename in enumerate(depth_map_files):
depth_map_path = os.path.join(depth_map_dir, filename)
depth_map = load_image(depth_map_path)
image = ip_model.generate(
prompt=prompt,
negative_prompt=negative_prompt,
image=depth_map,
face_image=face_image,
faceid_embeds=faceid_embeds,
shortcut=v2,
s_scale=s_scale,
num_samples=1, # Generate one image per depth map
width=512,
height=512,
num_inference_steps=num_inference_steps,
seed=2023,
)[0]
# Save the image with the prompt name, date/time, and depth map index
image_name = f"{prompt.replace(' ', '_')}_{date_time}_{idx}_0.png"
image_path = os.path.join(output_dir, image_name)
image.save(image_path)
images.append(image)
torch.cuda.empty_cache()
return images
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt")
negative_prompt = gr.Textbox(label="Negative Prompt")
depth_map_dir = gr.Textbox(label="Depth Map Directory")
face_reference_image = gr.Image(label="Face Reference Image", type="pil")
# s_scale = gr.Slider(label="Face Structure strength", value=0.6, step=0.1, minimum=0, maximum=3)
# num_inference_steps = gr.Slider(label="steps", value=10, step=1, minimum=1, maximum=50)
v2 = gr.Checkbox(label="Use v2 Adapter", value=False)
with gr.Column():
s_scale = gr.Slider(
label="Face Structure strength",
value=0.6,
step=0.1,
minimum=0,
maximum=3,
)
num_inference_steps = gr.Slider(
label="steps", value=10, step=1, minimum=1, maximum=50
)
gallery = gr.Gallery(label="Generated Images")
generate_btn = gr.Button("Generate Images")
generate_btn.click(
fn=generate_image,
inputs=[
prompt,
negative_prompt,
depth_map_dir,
face_reference_image,
s_scale,
num_inference_steps,
v2,
],
outputs=gallery,
)
demo.launch(share=True, debug=True)
|