Tencent-Hunyuan/HunyuanDiT-v1.2-ControlNet-Diffusers-Depth

from diffusers import HunyuanDiT2DControlNetModel, HunyuanDiTControlNetPipeline
import torch

controlnet = HunyuanDiT2DControlNetModel.from_pretrained("Tencent-Hunyuan/HunyuanDiT-v1.2-ControlNet-Diffusers-Depth", torch_dtype=torch.float16)

pipe = HunyuanDiTControlNetPipeline.from_pretrained("Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers-Distilled", controlnet=controlnet, torch_dtype=torch.float16)
pipe.to("cuda")

from diffusers.utils import load_image
cond_image = load_image('https://huggingface.co/Tencent-Hunyuan/HunyuanDiT-v1.2-ControlNet-Diffusers-Depth/resolve/main/depth.jpg?download=true')

## You may also use English prompt as HunyuanDiT supports both English and Chinese
prompt="在茂密的森林中，一只黑白相间的熊猫静静地坐在绿树红花中，周围是山川和海洋。背景是白天的森林，光线充足。照片采用特写、平视和居中构图的方式，呈现出写实的效果"
#prompt="In the dense forest, a black and white panda sits quietly among the green trees and red flowers, surrounded by mountains and oceans. The background is a daytime forest with ample light. The photo uses a close-up, eye-level, and centered composition to create a realistic effect."

torch.manual_seed(42)
image = pipe(
    prompt,
    negative_prompt='错误的眼睛，糟糕的人脸，毁容，糟糕的艺术，变形，多余的肢体，模糊的颜色，模糊，重复，病态，残缺，',
    height=1024,
    width=1024,
    guidance_scale=6.0,
    control_image=cond_image,
    num_inference_steps=50,
).images[0]

image.save('./image.png')