Spaces:
Sleeping
Sleeping
import gradio as gr | |
from PIL import Image | |
examples = [ | |
[Image.open("examples/in0.jpg"), Image.open("examples/out0.webp")], | |
[Image.open("examples/in1.webp"), Image.open("examples/out1.png")], | |
[Image.open("examples/in2.jpg"), Image.open("examples/out2.png")], | |
[Image.open("examples/in3.jpg"), Image.open("examples/out3.png")], | |
] | |
def create_gradio_interface(process_and_generate): | |
def gradio_process_and_generate(input_image, prompt, num_images, cfg_weight): | |
return process_and_generate(input_image, prompt, num_images, cfg_weight) | |
explanation = """[Janus 1.3B](https://huggingface.co/deepseek-ai/Janus-1.3B) uses differerent visual encoders for understanding and generation. | |
<img src="https://huggingface.co/spaces/thomasgauthier/HowJanusSeesItself/raw/main/images/janus_architecture.svg" alt="Janus Model Architecture"> | |
Here, by feeding the model an image and then asking it to generate that same image, we visualize the model's ability to translate input (understanding) embedding space to generative embedding space.""" | |
with gr.Blocks() as demo: | |
gr.Markdown("# How Janus-1.3B sees itself") | |
dummy = gr.Image(type="filepath", label="Generated Image", visible=False) | |
with gr.Row(): | |
input_image = gr.Image(type="filepath", label="Input Image") | |
output_images = gr.Gallery(label="Generated Images", columns=2, rows=2) | |
gr.Markdown(explanation) | |
prompt = gr.Textbox(label="Prompt", value="Exactly what is shown in the image.") | |
num_images = gr.Slider(minimum=1, maximum=12, value=12, step=1, label="Number of Images to Generate") | |
cfg_weight = gr.Slider(minimum=1, maximum=10, value=5, step=0.1, label="CFG Weight") | |
generate_btn = gr.Button("Generate", variant="primary", size="lg") | |
generate_btn.click( | |
fn=gradio_process_and_generate, | |
inputs=[input_image, prompt, num_images, cfg_weight], | |
outputs=output_images | |
) | |
gr.Examples( | |
examples=examples, | |
inputs=[input_image, dummy] | |
) | |
return demo | |