import requests from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration import torch from typing import Dict, List, Any class EndpointHandler(): def __init__(self, path=""): self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model.to(self.device) def process_single_image(self, img_url, text=None): raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB') if text: inputs = self.processor(raw_image, text, return_tensors="pt").to(self.device) else: inputs = self.processor(raw_image, return_tensors="pt").to(self.device) out = self.model.generate(**inputs) return self.processor.decode(out[0], skip_special_tokens=True) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: try: print(f"Received data: {data}") if not data or "images" not in data: return [{"error": "No images data provided in the request."}] images_data = data.get("images") alt_texts = [] for image in images_data: img_id = image.get("id") img_url = image.get("url") text = image.get("text", None) alt_text = self.process_single_image(img_url, text) alt_texts.append({ "image_id": img_id, "image_url": img_url, "alt_text": alt_text }) return alt_texts except Exception as e: print(f"Error processing data: {e}") return [{"error": str(e)}] def get_pipeline(model_dir, task): return EndpointHandler(model_dir)