import gradio as gr import torch import open_clip import mediapy as media from optim_utils import * import argparse # load args args = argparse.Namespace() args.__dict__.update(read_json("sample_config.json")) args.print_step = None # load model device = "cuda" if torch.cuda.is_available() else "cpu" model, _, preprocess = open_clip.create_model_and_transforms(args.clip_model, pretrained=args.clip_pretrain, device=device) args.counter = 0 def inference(target_image, prompt_len, iter): args.counter += 1 print(args.counter) if prompt_len is not None: args.prompt_len = int(prompt_len) else: args.prompt_len = 8 if iter is not None: args.iter = int(iter) else: args.iter = 1000 learned_prompt = optimize_prompt(model, preprocess, args, device, target_images=[target_image]) return learned_prompt def inference_text(target_prompt, prompt_len, iter): args.counter += 1 print(args.counter) if prompt_len is not None: args.prompt_len = min(int(prompt_len), 75) else: args.prompt_len = 8 if iter is not None: args.iter = min(int(iter), 3000) else: args.iter = 1000 learned_prompt = optimize_prompt(model, preprocess, args, device, target_prompts=[target_prompt]) return learned_prompt gr.Progress(track_tqdm=True) demo = gr.Blocks() with demo: gr.Markdown("# PEZ Dispenser") gr.Markdown("## Hard Prompts Made Easy (PEZ)") gr.Markdown("*Want to generate a text prompt for your image that is useful for Stable Diffusion?*") gr.Markdown("This space can either generate a text fragment that describes your image, or it can shorten an existing text prompt. This space is using OpenCLIP-ViT/H, the same text encoder used by Stable Diffusion V2. After you generate a prompt, try it out on Stable Diffusion [here](https://huggingface.co/stabilityai/stable-diffusion-2-1-base), [here](https://huggingface.co/spaces/stabilityai/stable-diffusion) or on [Midjourney](https://docs.midjourney.com/). For a quick PEZ demo, try clicking on one of the examples at the bottom of this page.") gr.Markdown("For additional details, you can check out the [paper](https://arxiv.org/abs/2302.03668) and the code on [Github](https://github.com/YuxinWenRick/hard-prompts-made-easy).") gr.Markdown("Note: Generation with 1000 steps takes ~60 seconds with a T4. Don't want to wait? You can also run on [Google Colab](https://colab.research.google.com/drive/1VSFps4siwASXDwhK_o29dKA9COvTnG8A?usp=sharing). Or, you can reduce the number of steps.") gr.HTML("""
For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.