hassanelmghari's picture
Update app.py
dbab4b0 verified
raw
history blame
4.04 kB
import gradio as gr
from PIL import Image
import requests
import os
from together import Together
import base64
from threading import Thread
import time
import io
# Initialize Together client
client = None
def initialize_client(api_key=None):
global client
if api_key:
client = Together(api_key=api_key)
elif "TOGETHER_API_KEY" in os.environ:
client = Together()
else:
raise ValueError("Please provide an API key or set the TOGETHER_API_KEY environment variable")
def encode_image(image_path, max_size=(800, 800), quality=85):
with Image.open(image_path) as img:
img.thumbnail(max_size)
if img.mode in ('RGBA', 'LA'):
background = Image.new(img.mode[:-1], img.size, (255, 255, 255))
background.paste(img, mask=img.split()[-1])
img = background
buffered = io.BytesIO()
img.save(buffered, format="JPEG", quality=quality)
return base64.b64encode(buffered.getvalue()).decode('utf-8')
def bot_streaming(message, history, max_new_tokens=250, api_key=None, max_history=5):
if client is None:
initialize_client(api_key)
txt = message["text"]
messages = []
images = []
for i, msg in enumerate(history[-max_history:]):
if isinstance(msg[0], tuple):
messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]})
messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
pass
elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
if len(message["files"]) == 1:
if isinstance(message["files"][0], str): # examples
image_path = message["files"][0]
else: # regular input
image_path = message["files"][0]["path"]
messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]})
else:
messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
try:
stream = client.chat.completions.create(
model="meta-llama/Llama-Vision-Free",
messages=messages,
max_tokens=max_new_tokens,
stream=True,
)
buffer = ""
for chunk in stream:
if chunk.choices[0].delta.content is not None:
buffer += chunk.choices[0].delta.content
time.sleep(0.01)
yield buffer
except together.error.InvalidRequestError as e:
if "Request Entity Too Large" in str(e):
yield "The image is too large. Please try with a smaller image or compress the existing one."
else:
yield f"An error occurred: {str(e)}"
demo = gr.ChatInterface(
fn=bot_streaming,
title="Meta Llama-3.2-11B-Vision-Instruct (FREE)",
textbox=gr.MultimodalTextbox(),
additional_inputs=[
gr.Textbox(
label="Together API Key",
placeholder="Enter your API key here.",
required=True
),
gr.Slider(
minimum=10,
maximum=500,
value=250,
step=10,
label="Maximum number of new tokens to generate",
)
],
cache_examples=False,
description="Try the new Llama 3.2 11B Vision API by Meta for free through Together AI. Upload an image, and start chatting about it. Just paste in your Together AI API key and get started!",
stop_btn="Stop Generation",
fill_height=True,
multimodal=True
)
if __name__ == "__main__":
demo.launch(debug=True)