Llama-3.2-Vision-Free

Runtime error

App Files Files Community

Llama-3.2-Vision-Free / app.py

akhaliq HF staff

Update app.py

6a8b740 verified about 1 month ago

raw

history blame

3.75 kB

	import os
	import gradio as gr
	from together import Together
	from PIL import Image
	import io
	import base64

	# Initialize the Together AI client
	client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))

	def encode_image(image_path):
	try:
	with Image.open(image_path) as img:
	buffered = io.BytesIO()
	img.save(buffered, format="PNG")
	return base64.b64encode(buffered.getvalue()).decode('utf-8')
	except Exception as e:
	print(f"Error encoding image: {e}")
	return None

	def chat_with_image(message, image, history):
	# Prepare the messages
	messages = [{"role": "system", "content": "You are a helpful assistant that can analyze images and text."}]

	for human, assistant in history:
	if human.startswith("Image: "):
	# This is an image message
	image_path = human.split(": ", 1)[1]
	encoded_image = encode_image(image_path)
	if encoded_image:
	messages.append({
	"role": "user",
	"content": f"[IMAGE]{encoded_image}[/IMAGE]\nWhat's in this image?"
	})
	else:
	messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
	else:
	# This is a text-only message
	messages.append({"role": "user", "content": human})
	messages.append({"role": "assistant", "content": assistant})

	# Add the current message
	if image:
	encoded_image = encode_image(image)
	if encoded_image:
	messages.append({
	"role": "user",
	"content": f"[IMAGE]{encoded_image}[/IMAGE]\n{message or \"What's in this image?\"}"
	})
	else:
	messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
	else:
	messages.append({"role": "user", "content": message})

	# Call the Together AI API
	try:
	response = client.chat.completions.create(
	model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
	messages=messages,
	max_tokens=512,
	temperature=0.7,
	top_p=0.7,
	top_k=50,
	repetition_penalty=1,
	stop=["<\|eot_id\|>", "<\|eom_id\|>"],
	stream=True
	)

	# Accumulate the response
	full_response = ""
	for chunk in response:
	if chunk.choices[0].delta.content is not None:
	full_response += chunk.choices[0].delta.content
	yield full_response
	except Exception as e:
	yield f"An error occurred: {str(e)}"


	# Create the Gradio interface
	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	image = gr.Image(type="filepath")
	clear = gr.Button("Clear")

	def user(user_message, image, history):
	if image:
	return "", None, history + [[f"Image: {image}", None]]
	else:
	return "", None, history + [[user_message, None]]

	def bot(history):
	user_message = history[-1][0]
	image = None
	if user_message.startswith("Image: "):
	image = user_message.split(": ", 1)[1]
	user_message = "What's in this image?"

	bot_message = chat_with_image(user_message, image, history[:-1])
	history[-1][1] = ""
	for character in bot_message:
	history[-1][1] += character
	yield history

	msg.submit(user, [msg, image, chatbot], [msg, image, chatbot], queue=False).then(
	bot, chatbot, chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue()
	demo.launch()