Llama-Vision-Together

Runtime error

App Files Files Community

Llama-Vision-Together / app.py

akhaliq HF staff

Update app.py

c0f1215 verified about 1 month ago

raw

history blame

2.87 kB

	import gradio as gr
	from PIL import Image
	import requests
	import os
	from together import Together
	import base64
	from threading import Thread
	import time

	# Initialize Together client
	client = Together()

	# Ensure API key is set
	if "TOGETHER_API_KEY" not in os.environ:
	raise ValueError("Please set the TOGETHER_API_KEY environment variable")

	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')

	def bot_streaming(message, history, max_new_tokens=250):
	txt = message["text"]
	messages = []
	images = []

	for i, msg in enumerate(history):
	if isinstance(msg[0], tuple):
	messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]})
	messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
	elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
	pass
	elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
	messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
	messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})

	if len(message["files"]) == 1:
	if isinstance(message["files"][0], str): # examples
	image_path = message["files"][0]
	else: # regular input
	image_path = message["files"][0]["path"]
	messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]})
	else:
	messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})

	stream = client.chat.completions.create(
	model="meta-llama/Llama-Vision-Free",
	messages=messages,
	max_tokens=max_new_tokens,
	stream=True,
	)

	buffer = ""
	for chunk in stream:
	if chunk.choices[0].delta.content is not None:
	buffer += chunk.choices[0].delta.content
	time.sleep(0.01)
	yield buffer

	demo = gr.ChatInterface(
	fn=bot_streaming,
	title="Meta Llama 3.2 Vision 11B",
	textbox=gr.MultimodalTextbox(),
	additional_inputs=[
	gr.Slider(
	minimum=10,
	maximum=500,
	value=250,
	step=10,
	label="Maximum number of new tokens to generate",
	)
	],
	cache_examples=False,
	description="Try Multimodal Llama by Meta with the Together API in this demo. Upload an image, and start chatting about it",
	stop_btn="Stop Generation",
	fill_height=True,
	multimodal=True
	)

	if __name__ == "__main__":
	demo.launch(debug=True)