import cv2
import torch

import numpy as np

from torch import nn
from transformers import AutoImageProcessor, Swinv2ForImageClassification, SegformerForSemanticSegmentation

from cam import ClassActivationMap
from utils import add_mask, simple_vcdr


class GlaucomaModel(object):
    def __init__(self, 
                 cls_model_path="pamixsun/swinv2_tiny_for_glaucoma_classification", 
                 seg_model_path='pamixsun/segformer_for_optic_disc_cup_segmentation',
                 device=torch.device('cpu')):
        # where to load the model, gpu or cpu ?
        self.device = device
        # classification model for glaucoma
        self.cls_extractor = AutoImageProcessor.from_pretrained(cls_model_path)
        self.cls_model = Swinv2ForImageClassification.from_pretrained(cls_model_path).to(device).eval()
        # segmentation model for optic disc and cup
        self.seg_extractor = AutoImageProcessor.from_pretrained(seg_model_path)
        self.seg_model = SegformerForSemanticSegmentation.from_pretrained(seg_model_path).to(device).eval()
        # class activation map
        self.cam = ClassActivationMap(self.cls_model, self.cls_extractor)

        # classification id to label
        self.cls_id2label = self.cls_model.config.id2label
        # segmentation id to label
        self.seg_id2label = self.seg_model.config.id2label

        # number of classes for classification
        self.num_diseases = len(self.cls_id2label)
        # number of classes for segmentation
        self.seg_classes = len(self.seg_id2label)

    def glaucoma_pred(self, image):
        """
        Args:
            image: image array in RGB order.
        """
        inputs = self.cls_extractor(images=image.copy(), return_tensors="pt")
        with torch.no_grad():
            inputs.to(self.device)
            outputs = self.cls_model(**inputs).logits
            disease_idx = outputs.cpu()[0, :].detach().numpy().argmax()

        return disease_idx

    def optic_disc_cup_pred(self, image):
        """
        Args:
            image: image array in RGB order.
        """
        inputs = self.seg_extractor(images=image.copy(), return_tensors="pt")

        with torch.no_grad():
            inputs.to(self.device)
            outputs = self.seg_model(**inputs)
        logits = outputs.logits.cpu()

        upsampled_logits = nn.functional.interpolate(
            logits,
            size=image.shape[:2],
            mode="bilinear",
            align_corners=False,
        )

        pred_disc_cup = upsampled_logits.argmax(dim=1)[0]

        return pred_disc_cup.numpy().astype(np.uint8)
    
    def process(self, image):
        """
        Args:
            image: image array in RGB order.
        """
        image_shape = image.shape[:2]
        disease_idx = self.glaucoma_pred(image)
        cam = self.cam.get_cam(image, disease_idx)
        cam = cv2.resize(cam, image_shape[::-1])
        disc_cup = self.optic_disc_cup_pred(image)
        try:
            vcdr = simple_vcdr(disc_cup)
        except:
            vcdr = '[None]'
        _, disc_cup_image = add_mask(image, disc_cup, [0, 1, 2], [[0, 0, 0], [0, 255, 0], [255, 0, 0]], 0.2)

        return disease_idx, disc_cup_image, cam, vcdr