from io import BytesIO import requests import torch from PIL import Image from lyrasd_model import LyraSdControlnetImg2ImgPipeline # 存放模型文件的路径,应该包含一下结构: # 1. clip 模型 # 2. 转换好的优化后的 unet 模型 # 3. vae 模型 # 4. scheduler 配置 # LyraSD 的 C++ 编译动态链接库,其中包含 C++ CUDA 计算的细节 lib_path = "./lyrasd_model/lyrasd_lib/libth_lyrasd_cu12_sm86.so" model_path = "./models/lyrasd_rev_animated" canny_controlnet_path = "./models/lyrasd_canny" # 构建 Img2Img 的 Pipeline model = LyraSdControlnetImg2ImgPipeline(model_path, lib_path) # load Controlnet 模型,最多load 3个 model.load_controlnet_model("canny", canny_controlnet_path, "fp32") control_img = Image.open("control_bird_canny.png") # 准备应用的输入和超参数 prompt = "a bird" negative_prompt = "NSFW" height, width = 512, 512 steps = 20 guidance_scale = 7.5 generator = torch.Generator().manual_seed(123) num_images = 1 # 可以一次性load 3 个 Controlnets,达到multi Controlnet的效果,这里的参数的长度需要对其 # Controlnet 所输入的img list 长度应该和 controlnet scale 与 Controlnet name 一致,而内部的list长度需要和batch size一致 # 对应的index 可以对其 controlnet_images = [[control_img]] controlnet_scale= [0.5] controlnet_names= ['canny'] # 从 cos 上拿个图作为初始化图片 init_image_url = "https://chuangxin-research-1258344705.cos.ap-guangzhou.myqcloud.com/share/files/seaside_town.png?q-sign-algorithm=sha1&q-ak=AKIDBF6i7GCtKWS8ZkgOtACzX3MQDl37xYty&q-sign-time=1692601590;1865401590&q-key-time=1692601590;1865401590&q-header-list=&q-url-param-list=&q-signature=ca04ca92d990d94813029c0d9ef29537e5f4637c" init_image = BytesIO(requests.get(init_image_url).content) init_image = Image.open(init_image).convert('RGB') init_image = init_image.resize((width, height), Image.Resampling.LANCZOS) guess_mode = False strength = 0.8 # 推理生成 images = model(prompt, init_image, strength, height, width, steps, guidance_scale, negative_prompt, num_images, generator=generator, controlnet_images=controlnet_images, controlnet_scale=controlnet_scale, controlnet_names=controlnet_names, guess_mode=guess_mode ) # 存储生成的图片 for i, image in enumerate(images): image.save(f"outputs/res_controlnet_img2img_{i}.png")