Spaces:
Runtime error
Runtime error
File size: 2,637 Bytes
7efd637 5ee7ec4 c33dbd2 ca8dc25 5ee7ec4 ca8dc25 c33dbd2 e98c6cb c33dbd2 d107cdf 5ee7ec4 c33dbd2 9dc7fb7 5ee7ec4 c33dbd2 5ee7ec4 c33dbd2 6a8b740 c33dbd2 6719d1c c33dbd2 5ee7ec4 81fc138 5ee7ec4 82ee039 5ee7ec4 5e6f5c8 c33dbd2 5ee7ec4 c33dbd2 5ee7ec4 7efd637 c33dbd2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
from gradio_multimodalchatbot import MultimodalChatbot
from gradio.data_classes import FileData
import os
from together import Together
import base64
# Initialize Together client
client = Together()
# Ensure API key is set
if "TOGETHER_API_KEY" not in os.environ:
raise ValueError("Please set the TOGETHER_API_KEY environment variable")
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def call_llama_vision_api(prompt: str, image_path: str) -> str:
getDescriptionPrompt = "You are a UX/UI designer. Describe the attached screenshot or UI mockup in detail. I will feed in the output you give me to a coding model that will attempt to recreate this mockup, so please think step by step and describe the UI in detail. Pay close attention to background color, text color, font size, font family, padding, margin, border, etc. Match the colors and sizes exactly. Make sure to mention every part of the screenshot including any headers, footers, etc. Use the exact text from the screenshot."
base64_image = encode_image(image_path)
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": getDescriptionPrompt + "\n\n" + prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}
]
stream = client.chat.completions.create(
model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
messages=messages,
stream=True,
)
response = ""
for chunk in stream:
content = chunk.choices[0].delta.content or ""
response += content
return response
def chat(message, history):
user_message = message["text"]
files = message.get("files", [])
if files and files[0]["file"].path:
image_path = files[0]["file"].path
response = call_llama_vision_api(user_message, image_path)
else:
response = "I'm sorry, but I need an image to analyze. Please upload an image along with your question."
return {"text": response, "files": []}
with gr.Blocks() as demo:
gr.Markdown("# Llama 3.2 Vision Multimodal Chatbot Demo")
gr.Markdown("Upload an image and enter your message to analyze using the Llama 3.2 Vision model.")
chatbot = MultimodalChatbot(
value=[],
height=800,
callback=chat,
)
if __name__ == "__main__":
demo.launch() |