Spaces:
Runtime error
Runtime error
File size: 2,866 Bytes
7efd637 b6ef90b c33dbd2 ca8dc25 5ee7ec4 b6ef90b ca8dc25 c33dbd2 e98c6cb c33dbd2 d107cdf 5ee7ec4 b6ef90b 9dc7fb7 b6ef90b 5ee7ec4 b6ef90b 6a8b740 c33dbd2 c0f1215 c33dbd2 b6ef90b c33dbd2 6719d1c b6ef90b c33dbd2 b6ef90b 82ee039 b6ef90b c0f1215 b6ef90b c0f1215 b6ef90b 7efd637 c33dbd2 b6ef90b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
from PIL import Image
import requests
import os
from together import Together
import base64
from threading import Thread
import time
# Initialize Together client
client = Together()
# Ensure API key is set
if "TOGETHER_API_KEY" not in os.environ:
raise ValueError("Please set the TOGETHER_API_KEY environment variable")
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def bot_streaming(message, history, max_new_tokens=250):
txt = message["text"]
messages = []
images = []
for i, msg in enumerate(history):
if isinstance(msg[0], tuple):
messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]})
messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
pass
elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
if len(message["files"]) == 1:
if isinstance(message["files"][0], str): # examples
image_path = message["files"][0]
else: # regular input
image_path = message["files"][0]["path"]
messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]})
else:
messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
stream = client.chat.completions.create(
model="meta-llama/Llama-Vision-Free",
messages=messages,
max_tokens=max_new_tokens,
stream=True,
)
buffer = ""
for chunk in stream:
if chunk.choices[0].delta.content is not None:
buffer += chunk.choices[0].delta.content
time.sleep(0.01)
yield buffer
demo = gr.ChatInterface(
fn=bot_streaming,
title="Meta Llama 3.2 Vision 11B",
textbox=gr.MultimodalTextbox(),
additional_inputs=[
gr.Slider(
minimum=10,
maximum=500,
value=250,
step=10,
label="Maximum number of new tokens to generate",
)
],
cache_examples=False,
description="Try Multimodal Llama by Meta with the Together API in this demo. Upload an image, and start chatting about it",
stop_btn="Stop Generation",
fill_height=True,
multimodal=True
)
if __name__ == "__main__":
demo.launch(debug=True) |