UniPortrait

Runtime error

File size: 21,235 Bytes

##!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time    : 2024-07-31
# @Author  : Junjie He
import gradio as gr

from src.process import (
    text_to_single_id_generation_process,
    text_to_multi_id_generation_process,
    image_to_single_id_generation_process,
)


def text_to_single_id_generation_block():
    gr.Markdown("## Text-to-Single-ID Generation")
    gr.HTML(text_to_single_id_description)
    gr.HTML(text_to_single_id_tips)
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            prompt = gr.Textbox(value="", label='Prompt', lines=2)
            negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
            image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
                                           label="Image Resolution (HxW)")
            run_button = gr.Button(value="Run")

            with gr.Accordion("Advanced Options", open=True):
                seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

                faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
                                                 step=0.01, value=0.1)

                style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)

                use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

        with gr.Column(scale=3, min_width=100):
            with gr.Row(equal_height=False):
                pil_faceid = gr.Image(type="pil", label="ID Image")
                with gr.Accordion("ID Supplements", open=True):
                    with gr.Row():
                        pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
                                                   type="filepath", label="Additional ID Images")
                    with gr.Row():
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
                            mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
                            mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
                pil_style = gr.Image(type="pil", label="Style")

            with gr.Row():
                example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
                result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
                                            format="png")
    with gr.Row():
        examples = [
            [
                "A man with short black hair, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/No-Image-Placeholder.png",
                "assets/examples/1-output-1.png",
            ],
            [
                "A little boy with short black hair, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/No-Image-Placeholder.png",
                "assets/examples/1-output-4.png",
            ],
            [
                "A man with short black hair, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/examples/1-style-1.jpg",
                "assets/examples/1-output-2.png",
            ],
            [
                "A man with short black hair, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/examples/1-style-2.jpg",
                "assets/examples/1-output-5.png",
            ],
        ]
        gr.Examples(
            label="Examples",
            examples=examples,
            inputs=[prompt, pil_faceid, pil_style, example_output],
        )
    ips = [
        pil_faceid, pil_supp_faceids,
        pil_mix_faceid_1, mix_scale_1,
        pil_mix_faceid_2, mix_scale_2,
        faceid_scale, face_structure_scale,
        prompt, negative_prompt,
        pil_style, style_scale,
        seed, image_resolution, use_sr,
    ]
    run_button.click(fn=text_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])


def text_to_multi_id_generation_block():
    gr.Markdown("## Text-to-Multi-ID Generation")
    gr.HTML(text_to_multi_id_description)
    gr.HTML(text_to_multi_id_tips)
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            prompt = gr.Textbox(value="", label='Prompt', lines=2)
            negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
            image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
                                           label="Image Resolution (HxW)")
            run_button = gr.Button(value="Run")

            with gr.Accordion("Advanced Options", open=True):
                seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

                faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
                                                 step=0.01, value=0.3)
                style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)

                use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

        with gr.Column(scale=3, min_width=100):
            with gr.Row(equal_height=False):
                with gr.Column(scale=1, min_width=100):
                    pil_faceid_1st = gr.Image(type="pil", label="First ID")
                    with gr.Accordion("First ID Supplements", open=False):
                        with gr.Row():
                            pil_supp_faceids_1st = gr.File(file_count="multiple", file_types=["image"],
                                                           type="filepath", label="Additional ID Images")
                        with gr.Row():
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_1_1st = gr.Image(type="pil", label="Mix ID 1")
                                mix_scale_1_1st = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_2_1st = gr.Image(type="pil", label="Mix ID 2")
                                mix_scale_2_1st = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                with gr.Column(scale=1, min_width=100):
                    pil_faceid_2nd = gr.Image(type="pil", label="Second ID")
                    with gr.Accordion("Second ID Supplements", open=False):
                        with gr.Row():
                            pil_supp_faceids_2nd = gr.File(file_count="multiple", file_types=["image"],
                                                           type="filepath", label="Additional ID Images")
                        with gr.Row():
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_1_2nd = gr.Image(type="pil", label="Mix ID 1")
                                mix_scale_1_2nd = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_2_2nd = gr.Image(type="pil", label="Mix ID 2")
                                mix_scale_2_2nd = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                with gr.Column(scale=1, min_width=100):
                    pil_style = gr.Image(type="pil", label="Style")

            with gr.Row():
                example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
                result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
                                            format="png")
    with gr.Row():
        examples = [
            [
                "The two women are captured in laughter and joy, their faces brimming with sincere happiness against the backdrop of a peaceful beach at sunset. The painting, depicted in soft style, captures the warmth and tranquility of the moment.",
                "assets/examples/2-stylegan2-ffhq-0100.png",
                "assets/examples/2-stylegan2-ffhq-0293.png",
                "assets/No-Image-Placeholder.png",
                "assets/examples/2-output-1.png",
            ],
            [
                "The two female models are drinking coffee. The background was off-white.",
                "assets/examples/2-stylegan2-ffhq-0100.png",
                "assets/examples/2-stylegan2-ffhq-0293.png",
                "assets/examples/2-style-1.jpg",
                "assets/examples/2-output-2.png",
            ],
        ]
        gr.Examples(
            label="Examples",
            examples=examples,
            inputs=[prompt, pil_faceid_1st, pil_faceid_2nd, pil_style, example_output],
        )
    with gr.Row():
        examples = [
            [
                "Two men in an American poster.",
                "assets/examples/Trump-1.jpg",
                ["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
                "assets/examples/Biden-1.jpg",
                ["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
                "assets/examples/2-output-4.png",
            ],
            [
                "Two men engaged in a vigorous handshake, both wearing expressions of enthusiasm and determination, set against a backdrop of a bustling business district. The image is crafted in a sleek and modern digital art style, conveying the dynamic and competitive nature of their interaction.",
                "assets/examples/Trump-1.jpg",
                ["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
                "assets/examples/Biden-1.jpg",
                ["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
                "assets/examples/2-output-3.png",
            ],
        ]
        gr.Examples(
            label="Examples (Multiple References)",
            examples=examples,
            inputs=[prompt, pil_faceid_1st, pil_supp_faceids_1st, pil_faceid_2nd, pil_supp_faceids_2nd, example_output],
        )
    ips = [
        pil_faceid_1st, pil_supp_faceids_1st,
        pil_mix_faceid_1_1st, mix_scale_1_1st,
        pil_mix_faceid_2_1st, mix_scale_2_1st,
        pil_faceid_2nd, pil_supp_faceids_2nd,
        pil_mix_faceid_1_2nd, mix_scale_1_2nd,
        pil_mix_faceid_2_2nd, mix_scale_2_2nd,
        faceid_scale, face_structure_scale,
        prompt, negative_prompt,
        pil_style, style_scale,
        seed, image_resolution, use_sr,
    ]
    run_button.click(fn=text_to_multi_id_generation_process, inputs=ips, outputs=[result_gallery])


def image_to_single_id_generation_block():
    gr.Markdown("## Image-to-Single-ID Generation")
    gr.HTML(image_to_single_id_description)
    gr.HTML(image_to_single_id_tips)
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
                                           label="Image Resolution (HxW)")
            run_button = gr.Button(value="Run")

            with gr.Accordion("Advanced Options", open=True):
                seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

                style_scale = gr.Slider(label="Reference Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0, step=0.01,
                                                 value=0.3)

                use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

        with gr.Column(scale=3, min_width=100):
            with gr.Row(equal_height=False):
                pil_style = gr.Image(type="pil", label="Portrait Reference")
                pil_faceid = gr.Image(type="pil", label="ID Image")
                with gr.Accordion("ID Supplements", open=True):
                    with gr.Row():
                        pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
                                                   type="filepath", label="Additional ID Images")
                    with gr.Row():
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
                            mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
                            mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
            with gr.Row():
                with gr.Column(scale=3, min_width=100):
                    example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
                    result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4,
                                                preview=True, format="png")
    with gr.Row():
        examples = [
            [
                "assets/examples/3-style-1.png",
                "assets/examples/3-stylegan2-ffhq-0293.png",
                0.7,
                0.3,
                "assets/examples/3-output-1.png",
            ],
            [
                "assets/examples/3-style-1.png",
                "assets/examples/3-stylegan2-ffhq-0293.png",
                0.6,
                0.0,
                "assets/examples/3-output-2.png",
            ],
            [
                "assets/examples/3-style-2.jpg",
                "assets/examples/3-stylegan2-ffhq-0381.png",
                0.7,
                0.3,
                "assets/examples/3-output-3.png",
            ],
            [
                "assets/examples/3-style-3.jpg",
                "assets/examples/3-stylegan2-ffhq-0381.png",
                0.6,
                0.0,
                "assets/examples/3-output-4.png",
            ],
        ]
        gr.Examples(
            label="Examples",
            examples=examples,
            inputs=[pil_style, pil_faceid, faceid_scale, face_structure_scale, example_output],
        )
    ips = [
        pil_faceid, pil_supp_faceids,
        pil_mix_faceid_1, mix_scale_1,
        pil_mix_faceid_2, mix_scale_2,
        faceid_scale, face_structure_scale,
        pil_style, style_scale,
        seed, image_resolution, use_sr,
    ]
    run_button.click(fn=image_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])


if __name__ == "__main__":
    title = r"""
            <div style="text-align: center;">
                <h1> UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization </h1>
                <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
                    <a href="https://arxiv.org/pdf/2408.05939"><img src="https://img.shields.io/badge/arXiv-2408.05939-red"></a>
                    &nbsp;
                    <a href='https://aigcdesigngroup.github.io/UniPortrait-Page/'><img src='https://img.shields.io/badge/Project_Page-UniPortrait-green' alt='Project Page'></a>
                    &nbsp;
                    <a href="https://github.com/junjiehe96/UniPortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
                </div>
                </br>
            </div>
        """

    title_description = r"""
        This is the <b>official 🤗 Gradio demo</b> for <a href='https://arxiv.org/pdf/2408.05939' target='_blank'><b>UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization</b></a>.<br>
        The demo provides three capabilities: text-to-single-ID personalization, text-to-multi-ID personalization, and image-to-single-ID personalization. All of these are based on the Stable Diffusion v1-5 model. Feel free to give them a try! 😊
        """

    text_to_single_id_description = r"""🚀🚀🚀Quick start:<br>
        1. Enter a text prompt (Chinese or English), Upload an image with a face, and Click the <b>Run</b> button.<br>
        2. (Optional) You can also upload an image as the style reference for the results. 🤗<br>
        """

    text_to_single_id_tips = r"""💡💡💡Tips:<br>
        1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
        2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
        3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
        """

    text_to_multi_id_description = r"""🚀🚀🚀Quick start:<br>
        1. Enter a text prompt (Chinese or English), Upload an image with a face in "First ID" and "Second ID" blocks respectively, and Click the <b>Run</b> button.<br>
        2. (Optional) You can also upload an image as the style reference for the results. 🤗<br>
        """

    text_to_multi_id_tips = r"""💡💡💡Tips:<br>
        1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
        2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
        3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.3~0.7) and "Face Structure Scale" (0.0~0.4).<br>
        """

    image_to_single_id_description = r"""🚀🚀🚀Quick start: Upload an image as the portrait reference (can be any style), Upload a face image, and Click the <b>Run</b> button. 🤗<br>"""

    image_to_single_id_tips = r"""💡💡💡Tips:<br>
        1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
        2. It's a good idea to upload multiple reference photos of your face to improve ID consistency. Additional references can be uploaded in the "ID supplements".<br>
        3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the portrait reference and ID alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
        """

    citation = r"""
        ---
        📝 **Citation**
        <br>
        If our work is helpful for your research or applications, please cite us via:
        ```bibtex
        ```
        📧 **Contact**
        <br>
        If you have any questions, please feel free to open an issue or directly reach us out at <b>[email protected]</b>.
        """

    block = gr.Blocks(title="UniPortrait").queue()
    with block:
        gr.HTML(title)
        gr.HTML(title_description)

        with gr.TabItem("Text-to-Single-ID"):
            text_to_single_id_generation_block()

        with gr.TabItem("Text-to-Multi-ID"):
            text_to_multi_id_generation_block()

        with gr.TabItem("Image-to-Single-ID (Stylization)"):
            image_to_single_id_generation_block()

    block.launch(share=True, max_threads=2)
    # block.launch(server_name='0.0.0.0', share=False, server_port=9999, allowed_paths=["/"])
    # block.launch(server_name='127.0.0.1', share=False, server_port=9999, allowed_paths=["/"])