HowJanusSeesItself / gradio_interface.py
thomasgauthier's picture
trying to make imgs appear
bd0e8c7
raw
history blame
2.13 kB
import gradio as gr
from PIL import Image
examples = [
[Image.open("examples/in0.jpg"), Image.open("examples/out0.webp")],
[Image.open("examples/in1.webp"), Image.open("examples/out1.png")],
[Image.open("examples/in2.jpg"), Image.open("examples/out2.png")],
[Image.open("examples/in3.jpg"), Image.open("examples/out3.png")],
]
def create_gradio_interface(process_and_generate):
def gradio_process_and_generate(input_image, prompt, num_images, cfg_weight):
return process_and_generate(input_image, prompt, num_images, cfg_weight)
explanation = """[Janus 1.3B](https://huggingface.co/deepseek-ai/Janus-1.3B) uses differerent visual encoders for understanding and generation.
<img src="https://huggingface.co/spaces/thomasgauthier/HowJanusSeesItself/raw/main/images/janus_architecture.svg" alt="Janus Model Architecture">
Here, by feeding the model an image and then asking it to generate that same image, we visualize the model's ability to translate input (understanding) embedding space to generative embedding space."""
with gr.Blocks() as demo:
gr.Markdown("# How Janus-1.3B sees itself")
dummy = gr.Image(type="filepath", label="Generated Image", visible=False)
with gr.Row():
input_image = gr.Image(type="filepath", label="Input Image")
output_images = gr.Gallery(label="Generated Images", columns=2, rows=2)
gr.Markdown(explanation)
prompt = gr.Textbox(label="Prompt", value="Exactly what is shown in the image.")
num_images = gr.Slider(minimum=1, maximum=12, value=12, step=1, label="Number of Images to Generate")
cfg_weight = gr.Slider(minimum=1, maximum=10, value=5, step=0.1, label="CFG Weight")
generate_btn = gr.Button("Generate", variant="primary", size="lg")
generate_btn.click(
fn=gradio_process_and_generate,
inputs=[input_image, prompt, num_images, cfg_weight],
outputs=output_images
)
gr.Examples(
examples=examples,
inputs=[input_image, dummy]
)
return demo