import numpy
import torch
import gradio as gr
from einops import rearrange
from  torchvision import transforms

from model import CANNet
model = CANNet()
checkpoint = torch.load('part_B_pre.pth.tar',map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['state_dict'])
model.eval()

## Defining the transform function
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])


def crowd(img):
    ## Transforming the image
    img = transform(img)
    
    ## Adding batch dimension
    img = rearrange(img, "c h w -> 1 c h w")
    
    ## Slicing the image into four parts
    h = img.shape[2]
    w = img.shape[3]
    h_d = int(h/2)
    w_d = int(w/2)
    img_1 = img[:,:,:h_d,:w_d]
    img_2 = img[:,:,:h_d,w_d:]
    img_3 = img[:,:,h_d:,:w_d]
    img_4 = img[:,:,h_d:,w_d:]
    
    ## Inputting the 4 images into the model, converting it to numpy array, and summing to get the density
    with torch.no_grad():
        density_1 = model(img_1).numpy().sum()
        density_2 = model(img_2).numpy().sum()
        density_3 = model(img_3).numpy().sum()
        density_4 = model(img_4).numpy().sum()
        
    ## Summing up the estimated density and rounding the result to get an integer
    pred = density_1 + density_2 + density_3 + density_4
    pred = int(pred.round())
    return pred
    
outputs = gr.outputs.Textbox(type="text", label="Estimated crowd density:")
inputs = gr.inputs.Image(type="numpy", label="Input the image here:")

gr.Interface(fn=crowd, inputs=inputs, outputs=outputs, allow_flagging="never").launch(inbrowser=True)