Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,122 +1,70 @@
|
|
1 |
-
import os
|
2 |
import gradio as gr
|
|
|
3 |
from together import Together
|
4 |
-
from
|
5 |
-
import io
|
6 |
-
import base64
|
7 |
|
8 |
-
# Initialize
|
9 |
-
client = Together(
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
buffered = io.BytesIO()
|
15 |
-
img.save(buffered, format="PNG")
|
16 |
-
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
17 |
-
except Exception as e:
|
18 |
-
print(f"Error encoding image: {e}")
|
19 |
-
return None
|
20 |
|
21 |
-
def
|
22 |
-
|
23 |
-
messages = [{"role": "system", "content": "You are a helpful assistant that can analyze images and text."}]
|
24 |
-
|
25 |
-
for human, assistant in history:
|
26 |
-
if human.startswith("Image: "):
|
27 |
-
# This is an image message
|
28 |
-
image_path = human.split(": ", 1)[1]
|
29 |
-
encoded_image = encode_image(image_path)
|
30 |
-
if encoded_image:
|
31 |
-
messages.append({
|
32 |
-
"role": "user",
|
33 |
-
"content": f"[IMAGE]{encoded_image}[/IMAGE]\nWhat is in this image?"
|
34 |
-
})
|
35 |
-
else:
|
36 |
-
messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
|
37 |
-
else:
|
38 |
-
# This is a text-only message
|
39 |
-
messages.append({"role": "user", "content": human})
|
40 |
-
messages.append({"role": "assistant", "content": assistant})
|
41 |
-
|
42 |
-
# Add the current message
|
43 |
-
if image:
|
44 |
-
encoded_image = encode_image(image)
|
45 |
-
if encoded_image:
|
46 |
-
messages.append({
|
47 |
-
"role": "user",
|
48 |
-
"content": f"[IMAGE]{encoded_image}[/IMAGE]\n{message or 'What is in this image?'}"
|
49 |
-
})
|
50 |
-
else:
|
51 |
-
messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
|
52 |
-
else:
|
53 |
-
messages.append({"role": "user", "content": message})
|
54 |
-
|
55 |
-
# Call the Together AI API
|
56 |
-
try:
|
57 |
-
response = client.chat.completions.create(
|
58 |
-
model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
59 |
-
messages=messages,
|
60 |
-
max_tokens=512,
|
61 |
-
temperature=0.7,
|
62 |
-
top_p=0.7,
|
63 |
-
top_k=50,
|
64 |
-
repetition_penalty=1,
|
65 |
-
stop=["<|eot_id|>", "<|eom_id|>"],
|
66 |
-
stream=True
|
67 |
-
)
|
68 |
-
|
69 |
-
# Accumulate the response
|
70 |
-
full_response = ""
|
71 |
-
for chunk in response:
|
72 |
-
if chunk.choices[0].delta.content is not None:
|
73 |
-
full_response += chunk.choices[0].delta.content
|
74 |
-
yield full_response
|
75 |
-
except Exception as e:
|
76 |
-
# Enhanced error handling
|
77 |
-
import traceback
|
78 |
-
traceback.print_exc()
|
79 |
-
if hasattr(e, 'response') and e.response is not None:
|
80 |
-
try:
|
81 |
-
error_content = e.response.json()
|
82 |
-
print("Error response JSON:", error_content)
|
83 |
-
except Exception:
|
84 |
-
print("Error response text:", e.response.text)
|
85 |
-
yield f"An error occurred: {str(e)}"
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
|
|
|
|
|
|
|
|
|
|
88 |
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
msg = gr.Textbox()
|
94 |
-
image = gr.Image(type="filepath")
|
95 |
-
clear = gr.Button("Clear")
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
return "", None, history + [[user_message, None]]
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
image = None
|
106 |
-
if user_message.startswith("Image: "):
|
107 |
-
image = user_message.split(": ", 1)[1]
|
108 |
-
user_message = "What's in this image?"
|
109 |
-
|
110 |
-
bot_message = chat_with_image(user_message, image, history[:-1])
|
111 |
-
history[-1][1] = ""
|
112 |
-
for character in bot_message:
|
113 |
-
history[-1][1] += character
|
114 |
-
yield history
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
clear.click(lambda: None, None, chatbot, queue=False)
|
120 |
|
121 |
-
|
122 |
-
demo.launch()
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import os
|
3 |
from together import Together
|
4 |
+
from typing import List, Tuple
|
|
|
|
|
5 |
|
6 |
+
# Initialize Together client
|
7 |
+
client = Together()
|
8 |
|
9 |
+
# Ensure API key is set
|
10 |
+
if "TOGETHER_API_KEY" not in os.environ:
|
11 |
+
raise ValueError("Please set the TOGETHER_API_KEY environment variable")
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
def call_llama_vision_api(prompt: str, image_url: str) -> str:
|
14 |
+
getDescriptionPrompt = "You are a UX/UI designer. Describe the attached screenshot or UI mockup in detail. I will feed in the output you give me to a coding model that will attempt to recreate this mockup, so please think step by step and describe the UI in detail. Pay close attention to background color, text color, font size, font family, padding, margin, border, etc. Match the colors and sizes exactly. Make sure to mention every part of the screenshot including any headers, footers, etc. Use the exact text from the screenshot."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
messages = [
|
17 |
+
{
|
18 |
+
"role": "user",
|
19 |
+
"content": [
|
20 |
+
{"type": "text", "text": getDescriptionPrompt + "\n\n" + prompt},
|
21 |
+
{
|
22 |
+
"type": "image_url",
|
23 |
+
"image_url": {
|
24 |
+
"url": image_url,
|
25 |
+
},
|
26 |
+
},
|
27 |
+
],
|
28 |
+
}
|
29 |
+
]
|
30 |
|
31 |
+
stream = client.chat.completions.create(
|
32 |
+
model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
33 |
+
messages=messages,
|
34 |
+
stream=True,
|
35 |
+
)
|
36 |
|
37 |
+
response = ""
|
38 |
+
for chunk in stream:
|
39 |
+
content = chunk.choices[0].delta.content or ""
|
40 |
+
response += content
|
41 |
+
yield response
|
42 |
|
43 |
+
def chat(message: str, history: List[Tuple[str, str]], image_url: str) -> Tuple[str, List[Tuple[str, str]]]:
|
44 |
+
if not message:
|
45 |
+
return "", history
|
|
|
|
|
|
|
46 |
|
47 |
+
full_response = ""
|
48 |
+
for partial_response in call_llama_vision_api(message, image_url):
|
49 |
+
full_response = partial_response
|
50 |
+
yield "", history + [(message, full_response)]
|
|
|
51 |
|
52 |
+
history.append((message, full_response))
|
53 |
+
return "", history
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
# Define the Gradio interface
|
56 |
+
with gr.Blocks() as demo:
|
57 |
+
gr.Markdown("# Llama 3.2 Vision Chatbot Demo")
|
58 |
+
gr.Markdown("Enter your message and an image URL to analyze using the Llama 3.2 Vision model.")
|
59 |
+
|
60 |
+
chatbot = gr.Chatbot()
|
61 |
+
msg = gr.Textbox(label="Your message")
|
62 |
+
image_url = gr.Textbox(label="Image URL", value="https://napkinsdev.s3.us-east-1.amazonaws.com/next-s3-uploads/d96a3145-472d-423a-8b79-bca3ad7978dd/trello-board.png")
|
63 |
+
|
64 |
+
clear = gr.Button("Clear")
|
65 |
+
|
66 |
+
msg.submit(chat, [msg, chatbot, image_url], [msg, chatbot])
|
67 |
clear.click(lambda: None, None, chatbot, queue=False)
|
68 |
|
69 |
+
if __name__ == "__main__":
|
70 |
+
demo.launch()
|