from typing import Dict, List, Any from PIL import Image import torch from torch import autocast from diffusers import StableDiffusionPipeline import base64 from io import BytesIO from transformers.utils import logging logging.set_verbosity_info() logger = logging.get_logger("transformers") logger.info("INFO") logger.warning("WARN") # set device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #if device.type != 'cuda': #raise ValueError("need to run on GPU") class EndpointHandler(): def __init__(self, path=""): self.path = path # load the optimized model model_id = "stabilityai/stable-diffusion-x4-upscaler" self.pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16) self.pipe = self.pipe.to(device) def __call__(self, data) -> List[Dict[str, Any]]: """ Args: image (:obj:`string`) Return: A :obj:`dict`:. base64 encoded image """ logger.info('data received %s', data) inputs = data.get("inputs") logger.info('inputs received %s', inputs) decoded_image = Image.open(BytesIO(base64.b64decode(inputs['image']))) with autocast(device.type): upscaled_image = self.pipe(prompt="", image = decoded_image).images[0] buffered = BytesIO() upscaled_image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) # postprocess the prediction return {"image": img_str}