File size: 1,969 Bytes
6cc79d4 692e318 6cc79d4 692e318 6cc79d4 692e318 6cc79d4 692e318 6cc79d4 692e318 bd9d1d5 f24cffe bd9d1d5 f24cffe 692e318 f24cffe 6cc79d4 f24cffe 6cc79d4 f24cffe 692e318 6cc79d4 692e318 f24cffe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from typing import Dict, List, Any
class EndpointHandler():
def __init__(self, path=""):
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model.to(self.device)
def process_single_image(self, img_url, text=None):
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
if text:
inputs = self.processor(raw_image, text, return_tensors="pt").to(self.device)
else:
inputs = self.processor(raw_image, return_tensors="pt").to(self.device)
out = self.model.generate(**inputs)
return self.processor.decode(out[0], skip_special_tokens=True)
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
try:
print(f"Received data: {data}")
if not data or "images" not in data:
return [{"error": "No images data provided in the request."}]
images_data = data.get("images")
alt_texts = []
for image in images_data:
img_id = image.get("id")
img_url = image.get("url")
text = image.get("text", None)
alt_text = self.process_single_image(img_url, text)
alt_texts.append({
"image_id": img_id,
"image_url": img_url,
"alt_text": alt_text
})
return alt_texts
except Exception as e:
print(f"Error processing data: {e}")
return [{"error": str(e)}]
def get_pipeline(model_dir, task):
return EndpointHandler(model_dir)
|