File size: 25,267 Bytes

import string
import warnings
import subprocess, io, os, sys, time
import random

# os.environ["XFORMERS_DISABLE_FLASH_ATTN"] = "1"
# result = subprocess.run(['pip', 'install', 'xformers'], check=True)

from entklei import get_nude
from scipy.ndimage import binary_dilation


is_production = True
install_stuff = True
os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7/' if is_production else '/usr/local/cuda-12.1/'

run_gradio = False

if run_gradio and install_stuff:
    os.system("pip install gradio==3.50.2")

import gradio as gr

from loguru import logger

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

if is_production:
    os.chdir("/repository")
    sys.path.insert(0, '/repository')

if install_stuff:
    # result = subprocess.run(['pip', 'install', "-u", 'peft'], check=True)
    result = subprocess.run(['pip', 'install', '-e', 'GroundingDINO'], check=True)
    print(f'pip install GroundingDINO = {result}')

# result = subprocess.run(['pip', 'list'], check=True)
# print(f'pip list = {result}')

sys.path.insert(0, '/repository/GroundingDINO' if is_production else "./GroundingDINO")

import argparse
import copy

import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont, ImageOps

# Grounding DINO
import GroundingDINO.groundingdino.datasets.transforms as T
from GroundingDINO.groundingdino.models import build_model
from GroundingDINO.groundingdino.util import box_ops
from GroundingDINO.groundingdino.util.slconfig import SLConfig
from GroundingDINO.groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap

import cv2
import numpy as np
import matplotlib
matplotlib.use('AGG')
plt = matplotlib.pyplot
# import matplotlib.pyplot as plt

# <<<<<< AIINFERENCE

# >>>>>> AIINFERENCE

groundingdino_enable = True
sam_enable = True
inpainting_enable = True
ram_enable = True

lama_cleaner_enable = True

kosmos_enable = False

# qwen_enable = True
# from qwen_utils import *

# segment anything
from segment_anything import build_sam, SamPredictor, SamAutomaticMaskGenerator

# diffusers
import PIL
import requests
import torch
from io import BytesIO
from huggingface_hub import hf_hub_download

config_file = 'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py'
ckpt_repo_id = "ShilongLiu/GroundingDINO"
ckpt_filenmae = "groundingdino_swint_ogc.pth"
sam_checkpoint = './sam_hq_vit_h.pth' 
output_dir = "outputs"

device = 'cpu'
sam_device = "cuda"


def get_sam_vit_h_4b8939():
    url = 'https://huggingface.co/Uminosachi/sam-hq/resolve/main/sam_hq_vit_h.pth'
    file_path = './sam_hq_vit_h.pth'

    if not os.path.exists(file_path):
        logger.info("Downloading sam_vit_h_4b8939.pth...")
        response = requests.get(url)
        with open(file_path, 'wb') as f:
            f.write(response.content)
        print('Downloaded sam_vit_h_4b8939.pth')

logger.info(f"initialize SAM model...")
sam_device = "cuda"

sd_model = None
lama_cleaner_model= None
ram_model = None
kosmos_model = None
kosmos_processor = None

get_sam_vit_h_4b8939()
sam_model = build_sam(checkpoint=sam_checkpoint).to(sam_device)
sam_predictor = SamPredictor(sam_model)
sam_mask_generator = SamAutomaticMaskGenerator(sam_model)

def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
    args = SLConfig.fromfile(model_config_path) 
    model = build_model(args)
    args.device = device

    cache_file = hf_hub_download(repo_id=repo_id, filename=filename)
    checkpoint = torch.load(cache_file, map_location=device)
    log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
    print("Model loaded from {} \n => {}".format(cache_file, log))
    _ = model.eval()
    return model    

def plot_boxes_to_image(image_pil, tgt):
    H, W = tgt["size"]
    boxes = tgt["boxes"]
    labels = tgt["labels"]
    assert len(boxes) == len(labels), "boxes and labels must have same length"

    draw = ImageDraw.Draw(image_pil)
    mask = Image.new("L", image_pil.size, 0)
    mask_draw = ImageDraw.Draw(mask)

    # draw boxes and masks
    for box, label in zip(boxes, labels):
        # from 0..1 to 0..W, 0..H
        box = box * torch.Tensor([W, H, W, H])
        # from xywh to xyxy
        box[:2] -= box[2:] / 2
        box[2:] += box[:2]
        # random color
        color = tuple(np.random.randint(0, 255, size=3).tolist())
        # draw
        x0, y0, x1, y1 = box
        x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

        draw.rectangle([x0, y0, x1, y1], outline=color, width=6)
        # draw.text((x0, y0), str(label), fill=color)

        font = ImageFont.load_default()
        if hasattr(font, "getbbox"):
            bbox = draw.textbbox((x0, y0), str(label), font)
        else:
            w, h = draw.textsize(str(label), font)
            bbox = (x0, y0, w + x0, y0 + h)
        # bbox = draw.textbbox((x0, y0), str(label))
        draw.rectangle(bbox, fill=color)

        try:
            font = os.path.join(cv2.__path__[0],'qt','fonts','DejaVuSans.ttf')
            font_size = 36
            new_font = ImageFont.truetype(font, font_size)

            draw.text((x0+2, y0+2), str(label), font=new_font, fill="white")
        except Exception as e:
            pass

        mask_draw.rectangle([x0, y0, x1, y1], fill=255, width=6)


    return image_pil, mask

def load_image(image_path):
    # # load image
    if isinstance(image_path, PIL.Image.Image):
        image_pil = image_path
    else:
        image_pil = Image.open(image_path).convert("RGB")  # load image

    transform = T.Compose(
        [
            T.RandomResize([800], max_size=1333),
            T.ToTensor(),
            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    )
    image, _ = transform(image_pil, None)  # 3, h, w
    return image_pil, image

def get_grounding_output(model, image, caption, box_threshold, text_threshold, with_logits=True, device="cpu"):
    caption = caption.lower()
    caption = caption.strip()
    if not caption.endswith("."):
        caption = caption + "."
    model = model.to(device)
    image = image.to(device)
    with torch.no_grad():
        outputs = model(image[None], captions=[caption])
    logits = outputs["pred_logits"].cpu().sigmoid()[0]  # (nq, 256)
    boxes = outputs["pred_boxes"].cpu()[0]  # (nq, 4)
    logits.shape[0]

    # filter output
    logits_filt = logits.clone()
    boxes_filt = boxes.clone()
    filt_mask = logits_filt.max(dim=1)[0] > box_threshold
    logits_filt = logits_filt[filt_mask]  # num_filt, 256
    boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
    logits_filt.shape[0]

    # get phrase
    tokenlizer = model.tokenizer
    tokenized = tokenlizer(caption)
    # build pred
    pred_phrases = []
    for logit, box in zip(logits_filt, boxes_filt):
        pred_phrase = get_phrases_from_posmap(logit > text_threshold, tokenized, tokenlizer)
        if with_logits:
            pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})")
        else:
            pred_phrases.append(pred_phrase)

    return boxes_filt, pred_phrases

def show_mask(mask, ax, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

def show_box(box, ax, label):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2)) 
    ax.text(x0, y0, label)

def xywh_to_xyxy(box, sizeW, sizeH):
    if isinstance(box, list):
        box = torch.Tensor(box)
    box = box * torch.Tensor([sizeW, sizeH, sizeW, sizeH])
    box[:2] -= box[2:] / 2
    box[2:] += box[:2]
    box = box.numpy()
    return box

def mask_extend(img, box, extend_pixels=10, useRectangle=True):
    box[0] = int(box[0])
    box[1] = int(box[1])
    box[2] = int(box[2])
    box[3] = int(box[3])
    region = img.crop(tuple(box))
    new_width = box[2] - box[0] + 2*extend_pixels
    new_height = box[3] - box[1] + 2*extend_pixels

    region_BILINEAR = region.resize((int(new_width), int(new_height)))
    if useRectangle:
        region_draw = ImageDraw.Draw(region_BILINEAR)
        region_draw.rectangle((0, 0, new_width, new_height), fill=(255, 255, 255))    
    img.paste(region_BILINEAR, (int(box[0]-extend_pixels), int(box[1]-extend_pixels)))
    return img

def mix_masks(imgs):
    re_img =  1 - np.asarray(imgs[0].convert("1"))
    for i in range(len(imgs)-1):
        re_img = np.multiply(re_img, 1 - np.asarray(imgs[i+1].convert("1")))
    re_img =  1 - re_img
    return  Image.fromarray(np.uint8(255*re_img))

# visualization
def draw_selected_mask(mask, draw):
    color = (255, 0, 0, 153)
    nonzero_coords = np.transpose(np.nonzero(mask))
    for coord in nonzero_coords:
        draw.point(coord[::-1], fill=color)

def draw_object_mask(mask, draw):
    color = (0, 0, 255, 153)
    nonzero_coords = np.transpose(np.nonzero(mask))
    for coord in nonzero_coords:
        draw.point(coord[::-1], fill=color)

def create_title_image(word1, word2, word3, width, font_path='./assets/OpenSans-Bold.ttf'):
    # Define the colors to use for each word
    color_red = (255, 0, 0)
    color_black = (0, 0, 0)
    color_blue = (0, 0, 255)

    # Define the initial font size and spacing between words
    font_size = 40

    # Create a new image with the specified width and white background
    image = Image.new('RGB', (width, 60), (255, 255, 255))

    try:
        # Load the specified font
        font = ImageFont.truetype(font_path, font_size)

        # Keep increasing the font size until all words fit within the desired width
        while True:
            # Create a draw object for the image
            draw = ImageDraw.Draw(image)
            
            word_spacing = font_size / 2
            # Draw each word in the appropriate color
            x_offset = word_spacing
            draw.text((x_offset, 0), word1, color_red, font=font)
            x_offset += font.getsize(word1)[0] + word_spacing
            draw.text((x_offset, 0), word2, color_black, font=font)
            x_offset += font.getsize(word2)[0] + word_spacing
            draw.text((x_offset, 0), word3, color_blue, font=font)
            
            word_sizes = [font.getsize(word) for word in [word1, word2, word3]]
            total_width = sum([size[0] for size in word_sizes]) + word_spacing * 3

            # Stop increasing font size if the image is within the desired width
            if total_width <= width:
                break
                
            # Increase font size and reset the draw object
            font_size -= 1
            image = Image.new('RGB', (width, 50), (255, 255, 255))
            font = ImageFont.truetype(font_path, font_size)
            draw = None
    except Exception as e:
        pass

    return image

def concatenate_images_vertical(image1, image2):
    # Get the dimensions of the two images
    width1, height1 = image1.size
    width2, height2 = image2.size

    # Create a new image with the combined height and the maximum width
    new_image = Image.new('RGBA', (max(width1, width2), height1 + height2))

    # Paste the first image at the top of the new image
    new_image.paste(image1, (0, 0))

    # Paste the second image below the first image
    new_image.paste(image2, (0, height1))

    return new_image

def relate_anything(input_image, k):    
    logger.info(f'relate_anything_1_{input_image.size}_')
    w, h = input_image.size
    max_edge = 1500
    if w > max_edge or h > max_edge:
        ratio = max(w, h) / max_edge
        new_size = (int(w / ratio), int(h / ratio))
        input_image.thumbnail(new_size)
    
    logger.info(f'relate_anything_2_')
    # load image
    pil_image = input_image.convert('RGBA')
    image = np.array(input_image)
    sam_masks = sam_mask_generator.generate(image)
    filtered_masks = sort_and_deduplicate(sam_masks)

    logger.info(f'relate_anything_3_')
    feat_list = []
    for fm in filtered_masks:
        feat = torch.Tensor(fm['feat']).unsqueeze(0).unsqueeze(0).to(device)
        feat_list.append(feat)
    feat = torch.cat(feat_list, dim=1).to(device)
    matrix_output, rel_triplets = ram_model.predict(feat)

    logger.info(f'relate_anything_4_')
    pil_image_list = []
    for i, rel in enumerate(rel_triplets[:k]):
        s,o,r = int(rel[0]),int(rel[1]),int(rel[2])
        relation = relation_classes[r]

        mask_image = Image.new('RGBA', pil_image.size, color=(0, 0, 0, 0))
        mask_draw = ImageDraw.Draw(mask_image)
            
        draw_selected_mask(filtered_masks[s]['segmentation'], mask_draw)
        draw_object_mask(filtered_masks[o]['segmentation'], mask_draw)

        current_pil_image = pil_image.copy()
        current_pil_image.alpha_composite(mask_image)
                
        title_image = create_title_image('Red', relation, 'Blue', current_pil_image.size[0])
        concate_pil_image = concatenate_images_vertical(current_pil_image, title_image)
        pil_image_list.append(concate_pil_image)

    logger.info(f'relate_anything_5_{len(pil_image_list)}')
    return pil_image_list

mask_source_draw = "draw a mask on input image"
mask_source_segment = "type what to detect below"

def get_time_cost(run_task_time, time_cost_str):
    now_time = int(time.time()*1000)
    if run_task_time == 0:
        time_cost_str = 'start'
    else:
        if time_cost_str != '': 
            time_cost_str += f'-->'
        time_cost_str += f'{now_time - run_task_time}'
    run_task_time = now_time
    return run_task_time, time_cost_str

def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold, 
            iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input, cleaner_size_limit=1080):
    run_task_time = 0
    time_cost_str = ''
    run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)

    text_prompt = text_prompt.strip()
    if not ((task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_draw):
        if text_prompt == '':
            return [], gr.Gallery.update(label='Detection prompt is not found!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None

    if input_image is None:
            return [], gr.Gallery.update(label='Please upload a image!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None

    file_temp = int(time.time())
    logger.info(f'run_anything_task_002/{device}_[{file_temp}]_{task_type}/{inpaint_mode}/[{mask_source_radio}]/{remove_mode}/{remove_mask_extend}_[{text_prompt}]/[{inpaint_prompt}]___1_')

    output_images = []

    image_pil, image = load_image(input_image.convert("RGB"))
    input_img = input_image
    run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)

    size = image_pil.size
    H, W = size[1], size[0]

    # run grounding dino model
    if (task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_draw:
        pass
    else:
        groundingdino_device = 'cpu'
        if device != 'cpu':
            try:
                from groundingdino import _C
                groundingdino_device = 'cuda:0'
            except:
                warnings.warn("Failed to load custom C++ ops. Running on CPU mode Only in groundingdino!")

        boxes_filt, pred_phrases = get_grounding_output(
            groundingdino_model, image, text_prompt, box_threshold, text_threshold, device=groundingdino_device
        )
        if boxes_filt.size(0) == 0:
            logger.info(f'run_anything_task_[{file_temp}]_{task_type}_[{text_prompt}]_1___{groundingdino_device}/[No objects detected, please try others.]_')
            return [], gr.Gallery.update(label='No objects detected, please try others.😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
        boxes_filt_ori = copy.deepcopy(boxes_filt)

    logger.info(f'run_anything_task_[{file_temp}]_{task_type}_2_')
    if task_type == 'segment' or ((task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_segment):
        image = np.array(input_img)
        if sam_predictor:
            sam_predictor.set_image(image)

        for i in range(boxes_filt.size(0)):
            boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
            boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
            boxes_filt[i][2:] += boxes_filt[i][:2]

        if sam_predictor:
            boxes_filt = boxes_filt.to(sam_device)
            transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2])

            masks, _, _, _ = sam_predictor.predict_torch(
                point_coords = None,
                point_labels = None,
                boxes = transformed_boxes,
                multimask_output = False,
            )
            # masks: [9, 1, 512, 512]
            assert sam_checkpoint, 'sam_checkpoint is not found!'
        else:
            masks = torch.zeros(len(boxes_filt), 1, H, W)   
            mask_count = 0         
            for box in boxes_filt:
                masks[mask_count, 0, int(box[1]):int(box[3]), int(box[0]):int(box[2])] = 1  
                mask_count += 1   
            masks = torch.where(masks > 0, True, False)      
            run_mode = "rectangle"

        # draw output image
        plt.figure(figsize=(10, 10))
        plt.imshow(image)
        for mask in masks:
            show_mask(mask.cpu().numpy(), plt.gca(), random_color=True)
        for box, label in zip(boxes_filt, pred_phrases):
            show_box(box.cpu().numpy(), plt.gca(), label)
        plt.axis('off')

        # Save the plot to a BytesIO object in memory
        buf = io.BytesIO()
        plt.savefig(buf, format='jpeg', bbox_inches='tight')
        buf.seek(0)

        # Convert the image in memory to a PIL Image
        segment_image_result = Image.open(buf).convert('RGB')
        output_images.append(segment_image_result)

        # Clearing memory
        buf.close()
        plt.clf()
        plt.close('all')     

    print(sam_predictor)

    if inpaint_prompt.strip() == '' and mask_source_radio == mask_source_segment:
        task_type = 'remove'

    logger.info(f'run_anything_task_[{file_temp}]_{task_type}_4_')  
    if mask_source_radio == mask_source_draw:
        mask_pil = input_mask_pil
        mask = input_mask          
    else:
        masks_ori = copy.deepcopy(masks)
        if inpaint_mode == 'merge':
            masks = torch.sum(masks, dim=0).unsqueeze(0)
        masks = torch.where(masks > 0, True, False)
        mask = masks[0][0].cpu().numpy()
        mask_pil = Image.fromarray(mask)
    output_images.append(mask_pil.convert("RGB"))
    return mask_pil

def change_radio_display(task_type, mask_source_radio):
    text_prompt_visible = True
    inpaint_prompt_visible = False
    mask_source_radio_visible = False
    num_relation_visible = False

    image_gallery_visible = True
    kosmos_input_visible = False
    kosmos_output_visible = False
    kosmos_text_output_visible = False

    if task_type == "Kosmos-2":
        if kosmos_enable:
            text_prompt_visible = False
            image_gallery_visible = False
            kosmos_input_visible = True
            kosmos_output_visible = True
            kosmos_text_output_visible = True        

    if task_type == "inpainting":
        inpaint_prompt_visible = True
    if task_type == "inpainting" or task_type == "remove":
        mask_source_radio_visible = True   
        if mask_source_radio == mask_source_draw:
            text_prompt_visible = False
    if task_type == "relate anything":
        text_prompt_visible = False
        num_relation_visible = True

    return  (gr.Textbox.update(visible=text_prompt_visible), 
            gr.Textbox.update(visible=inpaint_prompt_visible), 
            gr.Radio.update(visible=mask_source_radio_visible), 
            gr.Slider.update(visible=num_relation_visible),
            gr.Gallery.update(visible=image_gallery_visible),
            gr.Radio.update(visible=kosmos_input_visible),
            gr.Image.update(visible=kosmos_output_visible),
            gr.HighlightedText.update(visible=kosmos_text_output_visible))

def get_model_device(module):
    try:
        if module is None:
            return 'None'
        if isinstance(module, torch.nn.DataParallel):
            module = module.module
        for submodule in module.children():
            if hasattr(submodule, "_parameters"):
                parameters = submodule._parameters
                if "weight" in parameters:
                    return parameters["weight"].device
        return 'UnKnown'
    except Exception as e:
        return 'Error'


import signal
import json
from datetime import date, datetime, timedelta
from gevent import pywsgi
import base64 

def get_groundingdino_model(device):
    # initialize groundingdino model
    logger.info(f"initialize groundingdino model...")
    model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae, device=device)
    return model

groundingdino_model = get_groundingdino_model("cuda")

def expand_white_pixels(input_pil, expand_by=1):
    # Convert the input image to grayscale
    grayscale = input_pil.convert('L')

    # Create a binary mask where white pixels are represented by 1
    binary_mask = np.array(grayscale) > 245

    # Apply the dilation operation to the binary mask
    dilated_mask = binary_dilation(binary_mask, iterations=expand_by)

    # Create a new PIL image from the dilated mask
    expanded_image = Image.fromarray(np.uint8(dilated_mask * 255))

    return expanded_image

def just_fucking_get_sd_mask(input_pil, prompt, expand_by=10):
    raw_mask = run_anything_task(input_pil, prompt, "inpainting", "", 0.3, 0.25, 0.8, "merge", "type what to detect below", "segment", "10", 5, "Brief")
    expanded_mask = expand_white_pixels(raw_mask, expand_by=expand_by)

    return expanded_mask

S3_REGION = "fra1"
S3_ACCESS_ID = "0RN7BZXS59HYSBD3VB79"
S3_ACCESS_SECRET = "hfSPgBlWl5jsGHa2xuByVkSpancgVeA2CVQf2EMp"
S3_ENDPOINT_URL = "https://s3.solarcom.ch"
S3_BUCKET_NAME = "pissnelke"

import boto3

s3_session = boto3.session.Session()
s3 = s3_session.client(
    service_name="s3",
    region_name=S3_REGION,
    aws_access_key_id=S3_ACCESS_ID,
    aws_secret_access_key=S3_ACCESS_SECRET,
    endpoint_url=S3_ENDPOINT_URL,
)


class EndpointHandler():
    def __init__(self, path=""):
        # get_nude(Image.open("girl.png"))
        os.environ['path'] = path
        print("running apt-get update && apt-get install ffmpeg libsm6 libxext6 -y")
        command = "apt-get update && apt-get install ffmpeg libsm6 libxext6 -y"
        process = subprocess.Popen(
            command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()
        print("ran apt-get update && apt-get install ffmpeg libsm6 libxext6 -y")
        print("path", path)

    def __call__(self, data):
        original_image_res = requests.get(data.get("original_link"))
        original_pil = Image.open(BytesIO(original_image_res.content))

        with_small_tits = data.get("with_small_tits", False)

        with_big_tits = data.get("with_big_tits", False)

        nude_pils = []

        try:
            nude_pils = get_nude(get_mask_function=just_fucking_get_sd_mask, cfg_scale=data.get("cfg_scale"), generate_max_size=data.get("generate_max_size"), original_max_size=data.get(
            "original_max_size"), original_pil=original_pil, positive_prompt=data.get("positive_prompt"), steps=data.get("steps"), with_small_tits=with_small_tits, with_big_tits=with_big_tits)
        except RuntimeError as e:
            if 'out of memory' in str(e):
                torch.cuda.empty_cache()
                nude_pils = get_nude(get_mask_function=just_fucking_get_sd_mask, cfg_scale=data.get("cfg_scale"), generate_max_size=data.get("generate_max_size"), original_max_size=data.get(
                "original_max_size"), original_pil=original_pil, positive_prompt=data.get("positive_prompt"), steps=data.get("steps"), with_small_tits=with_small_tits, with_big_tits=with_big_tits)
                print("CUDA Out of Memory, clearing cache")
                # Optionally, you can retry your operation here, or handle the exception further
            else:
                raise

        filenames = []

        for image in nude_pils:
            byte_arr = io.BytesIO()
            image.save(byte_arr, format='PNG')
            byte_arr = byte_arr.getvalue()

            random_string = ''.join(random.choice(
                string.ascii_letters + string.digits) for i in range(20))
            image_filename = random_string + ".jpeg"

            s3.put_object(Body=byte_arr, Bucket=S3_BUCKET_NAME,
                          Key=image_filename)

            filenames.append(image_filename)

        return {
            "filenames": filenames
        }