Spaces:

cantuncok
/

meta-Llama-3.2-11B-Vision-Instruct

Running

App Files Files Community

meta-Llama-3.2-11B-Vision-Instruct / app.py

cantuncok

Update app.py

e26d2d9 verified about 4 hours ago

raw

history blame contribute delete

No virus

1.74 kB

	import gradio as gr
	import os
	import torch
	from transformers import AutoProcessor, MllamaForConditionalGeneration
	from PIL import Image

	# Hugging Face tokeninizi çevresel değişkenden alın
	hf_token = os.getenv("HF_TOKEN")
	if not hf_token:
	raise ValueError("HF_TOKEN çevresel değişkeni ayarlanmamış. Lütfen Hugging Face token'ınızı ayarlayın.")

	# Model ve işlemciyi yükleyin
	model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
	model = MllamaForConditionalGeneration.from_pretrained(
	model_name,
	use_auth_token=hf_token,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	processor = AutoProcessor.from_pretrained(model_name, use_auth_token=hf_token)

	def predict(image, text):
	# Mesajları hazırlayın
	messages = [
	{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": text}
	]}
	]
	# Girdi metnini oluşturun
	input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
	# Girdileri işleyin ve cihaza taşıyın
	inputs = processor(image, input_text, return_tensors="pt").to(model.device)
	# Modelden yanıt alın
	outputs = model.generate(**inputs, max_new_tokens=100)
	# Çıktıyı çözümleyin
	response = processor.decode(outputs[0], skip_special_tokens=True)
	return response

	# Gradio arayüzünü tanımlayın
	interface = gr.Interface(
	fn=predict,
	inputs=[
	gr.Image(type="pil", label="Image Input"),
	gr.Textbox(label="Text Input")
	],
	outputs=gr.Textbox(label="Output"),
	title="Llama 3.2 11B Vision Instruct Demo",
	description="Meta's new model that generates a response based on an image and text input."
	)

	interface.launch()