from typing import Dict, List, Any from PIL import Image import torch from torch import autocast from diffusers import StableDiffusionPipeline import base64 from io import BytesIO from transformers.utils import logging logging.set_verbosity_info() logger = logging.get_logger("transformers") logger.info("INFO") logger.warning("WARN") # set device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if device.type != 'cuda': raise ValueError("need to run on GPU") class EndpointHandler(): def __init__(self, path=""): # load the optimized model #model_id = "stabilityai/stable-diffusion-x4-upscaler" self.pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16) self.pipe = self.pipe.to(device) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: """ Args: images (:obj:`string`) Return: A :obj:`dict`:. base64 encoded image """ inputs = data.pop("inputs", data) logger.info(f"Printing inputs {inputs}") logger.info(f"Printing image {inputs['image']}") # decode base64 image to PIL decoded_image = Image.open(BytesIO(base64.b64decode(inputs['image']))) logger.info(f"Printing loaded image into library {decoded_image}") # run inference pipeline with autocast(device.type): upscaled_image = self.pipe(prompt="", image = decoded_image).images[0] # encode image as base 64 buffered = BytesIO() upscaled_image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) # postprocess the prediction return {"image": img_str}