waveydaveygravy commited on
Commit
8d3fbf3
1 Parent(s): 072e62d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -0
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from insightface.app import FaceAnalysis
3
+ from insightface.utils import face_align
4
+ import torch
5
+ import os
6
+ from datetime import datetime
7
+ import torch
8
+ import gradio as gr
9
+ from diffusers import (
10
+ StableDiffusionPipeline,
11
+ DDIMScheduler,
12
+ AutoencoderKL,
13
+ StableDiffusionControlNetPipeline,
14
+ ControlNetModel,
15
+ )
16
+ from PIL import Image
17
+ from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
18
+ from diffusers.utils import load_image
19
+ import numpy as np
20
+
21
+ # date_time = now.strftime("%Y-%m-%d_%H-%M-%S")
22
+
23
+
24
+ def generate_image(
25
+ prompt,
26
+ negative_prompt,
27
+ depth_map_dir,
28
+ face_reference_image,
29
+ s_scale,
30
+ num_inference_steps,
31
+ v2,
32
+ ):
33
+ # Get the current date and time
34
+ now = datetime.now()
35
+ date_time = now.strftime("%Y-%m-%d_%H-%M-%S")
36
+
37
+ # Create the output directory if it doesn't exist
38
+ output_dir = "/content/output"
39
+ os.makedirs(output_dir, exist_ok=True)
40
+ # depth_map_dir = "" # or whichever you have the depthmap images in
41
+
42
+ app = FaceAnalysis(
43
+ name="buffalo_l", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
44
+ )
45
+ app.prepare(ctx_id=0, det_size=(640, 640))
46
+ face_reference_image = face_reference_image # the face reference image
47
+ face_reference_image_np = np.array(face_reference_image)
48
+ faces = app.get(face_reference_image_np)
49
+ faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
50
+ face_image = face_align.norm_crop(
51
+ face_reference_image_np, landmark=faces[0].kps, image_size=224
52
+ ) # you can also segment the face
53
+
54
+ base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
55
+ vae_model_path = "stabilityai/sd-vae-ft-mse"
56
+ image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
57
+ ip_ckpt = (
58
+ "/content/ip-adapter-faceid-plus_sd15.bin"
59
+ if not v2
60
+ else "ip-adapter-faceid-plusv2_sd15.bin"
61
+ )
62
+ device = "cuda"
63
+
64
+ # Control net test
65
+ controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
66
+ controlnet = ControlNetModel.from_pretrained(
67
+ controlnet_model_path, torch_dtype=torch.float16
68
+ )
69
+
70
+ noise_scheduler = DDIMScheduler(
71
+ num_train_timesteps=1000,
72
+ beta_start=0.00085,
73
+ beta_end=0.012,
74
+ beta_schedule="scaled_linear",
75
+ clip_sample=False,
76
+ set_alpha_to_one=False,
77
+ steps_offset=1,
78
+ )
79
+
80
+ vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
81
+
82
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
83
+ base_model_path,
84
+ torch_dtype=torch.float16,
85
+ controlnet=controlnet,
86
+ scheduler=noise_scheduler,
87
+ vae=vae,
88
+ feature_extractor=None,
89
+ safety_checker=None,
90
+ )
91
+
92
+ # load ip-adapter
93
+ ip_model = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_ckpt, device)
94
+
95
+ depth_map_files = [
96
+ f for f in os.listdir(depth_map_dir) if f.endswith((".jpg", ".png"))
97
+ ]
98
+ images = []
99
+
100
+ for idx, filename in enumerate(depth_map_files):
101
+ depth_map_path = os.path.join(depth_map_dir, filename)
102
+ depth_map = load_image(depth_map_path)
103
+
104
+ image = ip_model.generate(
105
+ prompt=prompt,
106
+ negative_prompt=negative_prompt,
107
+ image=depth_map,
108
+ face_image=face_image,
109
+ faceid_embeds=faceid_embeds,
110
+ shortcut=v2,
111
+ s_scale=s_scale,
112
+ num_samples=1, # Generate one image per depth map
113
+ width=512,
114
+ height=512,
115
+ num_inference_steps=num_inference_steps,
116
+ seed=2023,
117
+ )[0]
118
+
119
+ # Save the image with the prompt name, date/time, and depth map index
120
+ image_name = f"{prompt.replace(' ', '_')}_{date_time}_{idx}_0.png"
121
+ image_path = os.path.join(output_dir, image_name)
122
+ image.save(image_path)
123
+ images.append(image)
124
+
125
+ torch.cuda.empty_cache()
126
+ return images
127
+
128
+
129
+ with gr.Blocks() as demo:
130
+ with gr.Row():
131
+ with gr.Column():
132
+ prompt = gr.Textbox(label="Prompt")
133
+ negative_prompt = gr.Textbox(label="Negative Prompt")
134
+ depth_map_dir = gr.Textbox(label="Depth Map Directory")
135
+ face_reference_image = gr.Image(label="Face Reference Image", type="pil")
136
+ # s_scale = gr.Slider(label="Face Structure strength", value=0.6, step=0.1, minimum=0, maximum=3)
137
+ # num_inference_steps = gr.Slider(label="steps", value=10, step=1, minimum=1, maximum=50)
138
+ v2 = gr.Checkbox(label="Use v2 Adapter", value=False)
139
+
140
+ with gr.Column():
141
+ s_scale = gr.Slider(
142
+ label="Face Structure strength",
143
+ value=0.6,
144
+ step=0.1,
145
+ minimum=0,
146
+ maximum=3,
147
+ )
148
+ num_inference_steps = gr.Slider(
149
+ label="steps", value=10, step=1, minimum=1, maximum=50
150
+ )
151
+ gallery = gr.Gallery(label="Generated Images")
152
+
153
+ generate_btn = gr.Button("Generate Images")
154
+ generate_btn.click(
155
+ fn=generate_image,
156
+ inputs=[
157
+ prompt,
158
+ negative_prompt,
159
+ depth_map_dir,
160
+ face_reference_image,
161
+ s_scale,
162
+ num_inference_steps,
163
+ v2,
164
+ ],
165
+ outputs=gallery,
166
+ )
167
+
168
+ demo.launch(share=True, debug=True)