File size: 3,453 Bytes
4d26ed5
7efd637
ca8dc25
4c02c40
4d26ed5
 
ca8dc25
4d26ed5
 
e98c6cb
4d26ed5
 
 
 
d107cdf
4d26ed5
d1e749a
 
 
 
7bc7ddc
d1e749a
7bc7ddc
d1e749a
 
 
 
7bc7ddc
d1e749a
 
 
 
 
 
 
 
4d26ed5
 
d1e749a
4d26ed5
 
 
 
 
d1e749a
4d26ed5
d1e749a
4d26ed5
 
 
d1e749a
4d26ed5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d107cdf
4d26ed5
9dc7fb7
4d26ed5
 
 
 
 
 
3807c9a
4d26ed5
74018c7
7bc7ddc
74018c7
7bc7ddc
82ee039
4d26ed5
7bc7ddc
 
 
 
 
 
 
74018c7
4d26ed5
74018c7
4d26ed5
5e6f5c8
4d26ed5
 
5e6f5c8
4d26ed5
7efd637
4d26ed5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import gradio as gr
from together import Together
from PIL import Image
import io
import base64

# Initialize the Together AI client
client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))

def encode_image(image):
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

def chat_with_image(message, image, history):
    # Prepare the messages
    messages = [{"role": "system", "content": "You are a helpful assistant that can analyze images and text."}]
    
    for human, assistant in history:
        if image is not None and human.startswith("Image uploaded: "):
            # This is an image message
            encoded_image = encode_image(Image.open(image))
            messages.append({
                "role": "user",
                "content": [
                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
                    {"type": "text", "text": message}
                ]
            })
        else:
            # This is a text-only message
            messages.append({"role": "user", "content": human})
        messages.append({"role": "assistant", "content": assistant})
    
    # Add the current message
    if image is not None:
        encoded_image = encode_image(Image.open(image))
        messages.append({
            "role": "user",
            "content": [
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
                {"type": "text", "text": message}
            ]
        })
    else:
        messages.append({"role": "user", "content": message})
    
    # Call the Together AI API
    response = client.chat.completions.create(
        model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
        messages=messages,
        max_tokens=512,
        temperature=0.7,
        top_p=0.7,
        top_k=50,
        repetition_penalty=1,
        stop=["<|eot_id|>", "<|eom_id|>"],
        stream=True
    )
    
    # Accumulate the response
    full_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            full_response += chunk.choices[0].delta.content
            yield full_response

    return full_response

# Create the Gradio interface
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    image = gr.Image(type="filepath")
    clear = gr.Button("Clear")

    def user(user_message, image, history):
        if image is not None:
            return "", None, history + [["Image uploaded: " + user_message, None]]
        else:
            return "", None, history + [[user_message, None]]

    def bot(history):
        user_message = history[-1][0]
        image = None
        if user_message.startswith("Image uploaded: "):
            image = history[-2][0].split(": ", 1)[1]  # Get the image path from the previous message
            user_message = user_message.split(": ", 1)[1]  # Get the actual message
        
        bot_message = chat_with_image(user_message, image, history[:-1])
        history[-1][1] = ""
        for character in bot_message:
            history[-1][1] += character
            yield history

    msg.submit(user, [msg, image, chatbot], [msg, image, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue()
demo.launch()