File size: 4,039 Bytes
7efd637
b6ef90b
 
c33dbd2
ca8dc25
5ee7ec4
b6ef90b
 
bd796ec
ca8dc25
c33dbd2
c27316e
e98c6cb
c27316e
 
 
 
 
 
 
 
d107cdf
bd796ec
 
 
c0f2d6a
 
 
 
bd796ec
 
 
5ee7ec4
bd796ec
c27316e
 
 
b6ef90b
 
 
9dc7fb7
bd796ec
b6ef90b
 
 
 
 
 
 
 
5ee7ec4
b6ef90b
 
 
 
 
 
 
 
6a8b740
bd796ec
 
c671a2f
bd796ec
 
 
 
 
 
 
 
 
 
 
6719d1c
bd796ec
 
 
 
 
82ee039
b6ef90b
 
c671a2f
b6ef90b
 
dbab4b0
 
 
 
 
b6ef90b
 
 
 
 
 
 
 
 
c671a2f
b6ef90b
 
 
 
7efd637
c33dbd2
b6ef90b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
from PIL import Image
import requests
import os
from together import Together
import base64
from threading import Thread
import time
import io

# Initialize Together client
client = None

def initialize_client(api_key=None):
    global client
    if api_key:
        client = Together(api_key=api_key)
    elif "TOGETHER_API_KEY" in os.environ:
        client = Together()
    else:
        raise ValueError("Please provide an API key or set the TOGETHER_API_KEY environment variable")

def encode_image(image_path, max_size=(800, 800), quality=85):
    with Image.open(image_path) as img:
        img.thumbnail(max_size)
        if img.mode in ('RGBA', 'LA'):
            background = Image.new(img.mode[:-1], img.size, (255, 255, 255))
            background.paste(img, mask=img.split()[-1])
            img = background
        buffered = io.BytesIO()
        img.save(buffered, format="JPEG", quality=quality)
        return base64.b64encode(buffered.getvalue()).decode('utf-8')

def bot_streaming(message, history, max_new_tokens=250, api_key=None, max_history=5):
    if client is None:
        initialize_client(api_key)

    txt = message["text"]
    messages = []
    images = []

    for i, msg in enumerate(history[-max_history:]):
        if isinstance(msg[0], tuple):
            messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]})
            messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
        elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
            pass
        elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
            messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
            messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})

    if len(message["files"]) == 1:
        if isinstance(message["files"][0], str):  # examples
            image_path = message["files"][0]
        else:  # regular input
            image_path = message["files"][0]["path"]
        messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]})
    else:
        messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})

    try:
        stream = client.chat.completions.create(
            model="meta-llama/Llama-Vision-Free",
            messages=messages,
            max_tokens=max_new_tokens,
            stream=True,
        )

        buffer = ""
        for chunk in stream:
            if chunk.choices[0].delta.content is not None:
                buffer += chunk.choices[0].delta.content
                time.sleep(0.01)
                yield buffer

    except together.error.InvalidRequestError as e:
        if "Request Entity Too Large" in str(e):
            yield "The image is too large. Please try with a smaller image or compress the existing one."
        else:
            yield f"An error occurred: {str(e)}"

demo = gr.ChatInterface(
    fn=bot_streaming,
    title="Meta Llama-3.2-11B-Vision-Instruct (FREE)",
    textbox=gr.MultimodalTextbox(),
    additional_inputs=[
        gr.Textbox(
            label="Together API Key",
            placeholder="Enter your API key here.",
            required=True
        ),
        gr.Slider(
            minimum=10,
            maximum=500,
            value=250,
            step=10,
            label="Maximum number of new tokens to generate",
        )
    ],
    cache_examples=False,
    description="Try the new Llama 3.2 11B Vision API by Meta for free through Together AI. Upload an image, and start chatting about it. Just paste in your Together AI API key and get started!",
    stop_btn="Stop Generation",
    fill_height=True,
    multimodal=True
)

if __name__ == "__main__":
    demo.launch(debug=True)