Spaces:

cantuncok
/

meta-Llama-3.2-11B-Vision-Instruct

Running

App Files Files Community

meta-Llama-3.2-11B-Vision-Instruct / app.py

cantuncok

Update app.py

6205fd1 verified about 20 hours ago

raw

history blame

No virus

1.73 kB

	import gradio as gr
	import os
	import torch
	from transformers import AutoProcessor, MllamaForConditionalGeneration
	from PIL import Image

	# Hugging Face tokeninizi çevresel değişkenden alın
	hf_token = os.getenv("HF_TOKEN")
	if not hf_token:
	raise ValueError("HF_TOKEN çevresel değişkeni ayarlanmamış. Lütfen Hugging Face token'ınızı ayarlayın.")

	# Model ve işlemciyi yükleyin
	model_name = "meta-llama/Llama-3.2-90B-Vision-Instruct"
	model = MllamaForConditionalGeneration.from_pretrained(
	model_name,
	use_auth_token=hf_token,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	processor = AutoProcessor.from_pretrained(model_name, use_auth_token=hf_token)

	def predict(image, text):
	# Mesajları hazırlayın
	messages = [
	{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": text}
	]}
	]
	# Girdi metnini oluşturun
	input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
	# Girdileri işleyin ve cihaza taşıyın
	inputs = processor(image, input_text, return_tensors="pt").to(model.device)
	# Modelden yanıt alın
	outputs = model.generate(**inputs, max_new_tokens=100)
	# Çıktıyı çözümleyin
	response = processor.decode(outputs[0], skip_special_tokens=True)
	return response

	# Gradio arayüzünü tanımlayın
	interface = gr.Interface(
	fn=predict,
	inputs=[
	gr.Image(type="pil", label="Görüntü Girdisi"),
	gr.Textbox(label="Metin Girdisi")
	],
	outputs=gr.Textbox(label="Çıktı"),
	title="Llama 3.2 90B Vision Instruct Demo",
	description="Bir görüntü ve metin girdisi alarak yanıt üreten model."
	)

	interface.launch()