import gradio as gr import spaces import torch from PIL import Image from compel import Compel, DiffusersTextualInversionManager from diffusers import DiffusionPipeline from diffusers.utils import make_image_grid from src.example import EXAMPLES DIFFUSERS_MODEL_IDS = [ # SD Models "stabilityai/stable-diffusion-3-medium-diffusers", "stabilityai/stable-diffusion-xl-base-1.0", "stabilityai/stable-diffusion-2-1", "runwayml/stable-diffusion-v1-5", # Other Models "Prgckwb/trpfrog-diffusion", ] EXTERNAL_MODEL_MAPPING = { "Beautiful Realistic Asians": "checkpoints/diffusers/Beautiful Realistic Asians v7", } MODEL_CHOICES = DIFFUSERS_MODEL_IDS + list(EXTERNAL_MODEL_MAPPING.keys()) device = "cuda" if torch.cuda.is_available() else "cpu" def load_pipeline(model_id, use_model_offload, safety_checker): # Diffusers リポジトリ内のモデル if model_id in DIFFUSERS_MODEL_IDS: pipe = DiffusionPipeline.from_pretrained( model_id, torch_dtype=torch.float16, ) # CIVITAI 系列由来のモデル else: pipe = DiffusionPipeline.from_pretrained( EXTERNAL_MODEL_MAPPING[model_id], torch_dtype=torch.float16, ) # Load Textual Inversion pipe.load_textual_inversion("checkpoints/embeddings/BadNegAnatomyV1 neg.pt", token='BadNegAnatomyV1-neg') pipe.load_textual_inversion("checkpoints/embeddings/Deep Negative V1 75T.pt", token='DeepNegative') pipe.load_textual_inversion("checkpoints/embeddings/easynegative.safetensors", token='EasyNegative') pipe.load_textual_inversion("checkpoints/embeddings/Negative Hand Embedding.pt", token='negative_hand-neg') # Load LoRA pipe.load_lora_weights("checkpoints/lora/detailed style SD1.5.safetensors", adapter_name='detail') pipe.load_lora_weights("checkpoints/lora/perfection style SD1.5.safetensors", adapter_name='perfection') pipe.load_lora_weights("checkpoints/lora/Hand v3 SD1.5.safetensors", adapter_name='hands') pipe.set_adapters(['detail', 'hands'], adapter_weights=[0.5, 0.5]) # VRAM が少ないとき用の対策 if use_model_offload: pipe.enable_model_cpu_offload() else: pipe = pipe.to(device) if not safety_checker: pipe.safety_checker = None return pipe @spaces.GPU(duration=120) @torch.inference_mode() def inference( prompt: str, model_id: str = "stabilityai/stable-diffusion-3-medium-diffusers", negative_prompt: str = "", width: int = 512, height: int = 512, guidance_scale: float = 7.5, num_inference_steps: int = 50, num_images: int = 4, safety_checker: bool = True, use_model_offload: bool = False, seed: int = 8888, progress=gr.Progress(track_tqdm=True), ) -> Image.Image: progress(0, 'Loading pipeline...') pipe = load_pipeline(model_id, use_model_offload, safety_checker) # For Compel textual_inversion_manager = DiffusersTextualInversionManager(pipe) compel_procs = Compel( tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, textual_inversion_manager=textual_inversion_manager, truncate_long_prompts=False, ) prompt_embed = compel_procs(prompt) negative_prompt_embed = compel_procs(negative_prompt) prompt_embed, negative_prompt_embed = compel_procs.pad_conditioning_tensors_to_same_length( [prompt_embed, negative_prompt_embed] ) generator = torch.Generator(device=device).manual_seed(seed) progress(0.3, 'Generating images...') images = pipe( prompt_embeds=prompt_embed, negative_prompt_embeds=negative_prompt_embed, width=width, height=height, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, num_images_per_prompt=num_images, generator=generator, ).images progress(0.9, f'Done generating {num_images} images') if num_images % 2 == 1: image = make_image_grid(images, rows=num_images, cols=1) else: image = make_image_grid(images, rows=2, cols=num_images // 2) return image def build_interface(): """Build Gradio Interface""" theme = gr.themes.Default(primary_hue=gr.themes.colors.emerald) with gr.Blocks(theme=theme) as interface: gr.Markdown(f"# Stable Diffusion Demo") with gr.Row(): with gr.Column(): prompt = gr.Text(label="Prompt", placeholder="Enter a prompt here") model_id = gr.Dropdown( label="Model ID", choices=MODEL_CHOICES, value="stabilityai/stable-diffusion-3-medium-diffusers", ) # Additional Input Settings with gr.Accordion("Additional Settings", open=False): negative_prompt = gr.Text(label="Negative Prompt", value="", ) with gr.Row(): width = gr.Number(label="Width", value=512, step=64, minimum=64, maximum=2048) height = gr.Number(label="Height", value=512, step=64, minimum=64, maximum=2048) num_images = gr.Number(label="Num Images", value=4, minimum=1, maximum=10, step=1) seed = gr.Number(label="Seed", value=8888, step=1) guidance_scale = gr.Slider(label="Guidance Scale", value=7.5, step=0.5, minimum=0, maximum=10) num_inference_step = gr.Slider( label="Num Inference Steps", value=50, minimum=1, maximum=100, step=2 ) with gr.Row(): use_safety_checker = gr.Checkbox(value=True, label='Use Safety Checker') use_model_offload = gr.Checkbox(value=False, label='Use Model Offload') with gr.Accordion(label='Notes', open=False): # language=HTML notes = gr.HTML( """
If you want to use negative embedding, use the following tokens in the prompt.
""" ) with gr.Column(): output_image = gr.Image(label="Image", type="pil") inputs = [ prompt, model_id, negative_prompt, width, height, guidance_scale, num_inference_step, num_images, use_safety_checker, use_model_offload, seed, ] btn = gr.Button("Generate", variant='primary') btn.click( fn=inference, inputs=inputs, outputs=output_image ) gr.Examples( examples=EXAMPLES, inputs=inputs, outputs=output_image, fn=inference, cache_examples='lazy' ) return interface if __name__ == "__main__": iface = build_interface() iface.queue().launch()