TreeFormer

Paused

App Files Files Community

franciszzj commited on Jun 5

Commit

c964d4c

•

1 Parent(s): c6b26ba

init

Browse files

Files changed (32) hide show

README.md +31 -13
assets/EU.png +0 -0
assets/reset.png +0 -0
datasets/__init__.py +0 -0
datasets/crowd.py +268 -0
demo.py +148 -0
examples/IMG_101.jpg +0 -0
examples/IMG_125.jpg +0 -0
examples/IMG_138.jpg +0 -0
examples/IMG_18.jpg +0 -0
examples/IMG_180.jpg +0 -0
examples/IMG_206.jpg +0 -0
examples/IMG_223.jpg +0 -0
examples/IMG_247.jpg +0 -0
examples/IMG_270.jpg +0 -0
examples/IMG_306.jpg +0 -0
losses/__init__.py +1 -0
losses/bregman_pytorch.py +484 -0
losses/consistency_loss.py +294 -0
losses/dm_loss.py +62 -0
losses/multi_con_loss.py +41 -0
losses/ot_loss.py +68 -0
losses/ramps.py +41 -0
losses/rank_loss.py +53 -0
network/pvt_cls.py +623 -0
requirements.txt +7 -0
sample_imgs/overview.png +0 -0
test.py +117 -0
train.py +369 -0
utils/__init__.py +0 -0
utils/log_utils.py +24 -0
utils/pytorch_utils.py +58 -0

README.md CHANGED Viewed

@@ -1,13 +1,31 @@
----
-title: TreeFormer
-emoji: 🚀
-colorFrom: purple
-colorTo: red
-sdk: gradio
-sdk_version: 4.32.2
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# TreeFormer
+This is the code base for IEEE TRANSACTIONS ON GEOSCIENCE AND REMOTE SENSING (TGRS 2023) paper ['TreeFormer: a Semi-Supervised Transformer-based Framework for Tree Counting from a Single High Resolution Image'](https://arxiv.org/abs/2307.06118)
+<img src="sample_imgs/overview.png">
+## Installation
+Python ≥ 3.7.
+To install the required packages, please run:
+```bash
+  pip install -r requirements.txt
+```
+## Dataset
+Download the dataset from [google drive](https://drive.google.com/file/d/1xcjv8967VvvzcDM4aqAi7Corkb11T0i2/view?usp=drive_link).
+## Evaluation
+Download our trained model on [London](https://drive.google.com/file/d/14uuOF5758sxtM5EgeGcRtSln5lUXAHge/view?usp=sharing) dataset.
+Modify the path to the dataset and model for evaluation in 'test.py'.
+Run 'test.py'
+## Acknowledgements
+ - Part of codes are borrowed from [PVT](https://github.com/whai362/PVT) and [DM Count](https://github.com/cvlab-stonybrook/DM-Count). Thanks for their great work!

assets/EU.png ADDED Viewed

assets/reset.png ADDED Viewed

datasets/__init__.py ADDED Viewed

File without changes

datasets/crowd.py ADDED Viewed

	@@ -0,0 +1,268 @@

+from PIL import Image
+import torch.utils.data as data
+import os
+from glob import glob
+import torch
+import torchvision.transforms.functional as F
+from torchvision import transforms
+import random
+import numpy as np
+import scipy.io as sio
+def random_crop(im_h, im_w, crop_h, crop_w):
+    res_h = im_h - crop_h
+    res_w = im_w - crop_w
+    i = random.randint(0, res_h)
+    j = random.randint(0, res_w)
+    return i, j, crop_h, crop_w
+def gen_discrete_map(im_height, im_width, points):
+    """
+        func: generate the discrete map.
+        points: [num_gt, 2], for each row: [width, height]
+        """
+    discrete_map = np.zeros([im_height, im_width], dtype=np.float32)
+    h, w = discrete_map.shape[:2]
+    num_gt = points.shape[0]
+    if num_gt == 0:
+        return discrete_map
+    # fast create discrete map
+    points_np = np.array(points).round().astype(int)
+    p_h = np.minimum(points_np[:, 1], np.array([h-1]*num_gt).astype(int))
+    p_w = np.minimum(points_np[:, 0], np.array([w-1]*num_gt).astype(int))
+    p_index = torch.from_numpy(p_h* im_width + p_w).to(torch.int64)
+    discrete_map = torch.zeros(im_width * im_height).scatter_add_(0, index=p_index, src=torch.ones(im_width*im_height)).view(im_height, im_width).numpy()
+    ''' slow method
+    for p in points:
+        p = np.round(p).astype(int)
+        p[0], p[1] = min(h - 1, p[1]), min(w - 1, p[0])
+        discrete_map[p[0], p[1]] += 1
+    '''
+    assert np.sum(discrete_map) == num_gt
+    return discrete_map
+class Base(data.Dataset):
+    def __init__(self, root_path, crop_size, downsample_ratio=8):
+        self.root_path = root_path
+        self.c_size = crop_size
+        self.d_ratio = downsample_ratio
+        assert self.c_size % self.d_ratio == 0
+        self.dc_size = self.c_size // self.d_ratio
+        self.trans = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        ])
+    def __len__(self):
+        pass
+    def __getitem__(self, item):
+        pass
+    def train_transform(self, img, keypoints, gauss_im):
+        wd, ht = img.size
+        st_size = 1.0 * min(wd, ht)
+        assert st_size >= self.c_size
+        assert len(keypoints) >= 0
+        i, j, h, w = random_crop(ht, wd, self.c_size, self.c_size)
+        img = F.crop(img, i, j, h, w)
+        gauss_im = F.crop(img, i, j, h, w)
+        if len(keypoints) > 0:
+            keypoints = keypoints - [j, i]
+            idx_mask = (keypoints[:, 0] >= 0) * (keypoints[:, 0] <= w) * \
+                       (keypoints[:, 1] >= 0) * (keypoints[:, 1] <= h)
+            keypoints = keypoints[idx_mask]
+        else:
+            keypoints = np.empty([0, 2])
+        gt_discrete = gen_discrete_map(h, w, keypoints)
+        down_w = w // self.d_ratio
+        down_h = h // self.d_ratio
+        gt_discrete = gt_discrete.reshape([down_h, self.d_ratio, down_w, self.d_ratio]).sum(axis=(1, 3))
+        assert np.sum(gt_discrete) == len(keypoints)
+        if len(keypoints) > 0:
+            if random.random() > 0.5:
+                img = F.hflip(img)
+                gauss_im = F.hflip(gauss_im)
+                gt_discrete = np.fliplr(gt_discrete)
+                keypoints[:, 0] = w - keypoints[:, 0]
+        else:
+            if random.random() > 0.5:
+                img = F.hflip(img)
+                gauss_im = F.hflip(gauss_im)
+                gt_discrete = np.fliplr(gt_discrete)
+        gt_discrete = np.expand_dims(gt_discrete, 0)
+        return self.trans(img), gauss_im, torch.from_numpy(keypoints.copy()).float(), torch.from_numpy(gt_discrete.copy()).float()
+class Crowd_TC(Base):
+    def __init__(self, root_path, crop_size, downsample_ratio=8, method='train'):
+        super().__init__(root_path, crop_size, downsample_ratio)
+        self.method = method
+        if method not in ['train', 'val']:
+            raise Exception("not implement")
+        self.im_list = sorted(glob(os.path.join(self.root_path, 'images', '*.jpg')))
+        print('number of img [{}]: {}'.format(method, len(self.im_list)))
+    def __len__(self):
+        return len(self.im_list)
+    def __getitem__(self, item):
+        img_path = self.im_list[item]
+        name = os.path.basename(img_path).split('.')[0]
+        gd_path = os.path.join(self.root_path, 'ground_truth', 'GT_{}.mat'.format(name))
+        img = Image.open(img_path).convert('RGB')
+        keypoints = sio.loadmat(gd_path)['image_info'][0][0][0][0][0]
+        gauss_path = os.path.join(self.root_path, 'ground_truth', '{}_densitymap.npy'.format(name))
+        gauss_im = torch.from_numpy(np.load(gauss_path)).float()
+        #import pdb;pdb.set_trace()
+        #print("label {}", item)
+        if self.method == 'train':
+            return self.train_transform(img, keypoints, gauss_im)
+        elif self.method == 'val':
+            wd, ht = img.size
+            st_size = 1.0 * min(wd, ht)
+            if st_size < self.c_size:
+                rr = 1.0 * self.c_size / st_size
+                wd = round(wd * rr)
+                ht = round(ht * rr)
+                st_size = 1.0 * min(wd, ht)
+                img = img.resize((wd, ht), Image.BICUBIC)
+            img = self.trans(img)
+            #import pdb;pdb.set_trace()
+            return img, len(keypoints), name, gauss_im
+    def train_transform(self, img, keypoints, gauss_im):
+        wd, ht = img.size
+        st_size = 1.0 * min(wd, ht)
+        # resize the image to fit the crop size
+        if st_size < self.c_size:
+            rr = 1.0 * self.c_size / st_size
+            wd = round(wd * rr)
+            ht = round(ht * rr)
+            st_size = 1.0 * min(wd, ht)
+            img = img.resize((wd, ht), Image.BICUBIC)
+            #gauss_im = gauss_im.resize((wd, ht), Image.BICUBIC)
+            keypoints = keypoints * rr
+        assert st_size >= self.c_size, print(wd, ht)
+        assert len(keypoints) >= 0
+        i, j, h, w = random_crop(ht, wd, self.c_size, self.c_size)
+        img = F.crop(img, i, j, h, w)
+        gauss_im = F.crop(gauss_im, i, j, h, w)
+        if len(keypoints) > 0:
+            keypoints = keypoints - [j, i]
+            idx_mask = (keypoints[:, 0] >= 0) * (keypoints[:, 0] <= w) * \
+                       (keypoints[:, 1] >= 0) * (keypoints[:, 1] <= h)
+            keypoints = keypoints[idx_mask]
+        else:
+            keypoints = np.empty([0, 2])
+        gt_discrete = gen_discrete_map(h, w, keypoints)
+        down_w = w // self.d_ratio
+        down_h = h // self.d_ratio
+        gt_discrete = gt_discrete.reshape([down_h, self.d_ratio, down_w, self.d_ratio]).sum(axis=(1, 3))
+        assert np.sum(gt_discrete) == len(keypoints)
+        if len(keypoints) > 0:
+            if random.random() > 0.5:
+                img = F.hflip(img)
+                gauss_im = F.hflip(gauss_im)
+                gt_discrete = np.fliplr(gt_discrete)
+                keypoints[:, 0] = w - keypoints[:, 0] - 1
+        else:
+            if random.random() > 0.5:
+                img = F.hflip(img)
+                gauss_im = F.hflip(gauss_im)
+                gt_discrete = np.fliplr(gt_discrete)
+        gt_discrete = np.expand_dims(gt_discrete, 0)
+        #import pdb;pdb.set_trace()
+        return self.trans(img), gauss_im, torch.from_numpy(keypoints.copy()).float(), torch.from_numpy(gt_discrete.copy()).float()
+class Base_UL(data.Dataset):
+    def __init__(self, root_path, crop_size, downsample_ratio=8):
+        self.root_path = root_path
+        self.c_size = crop_size
+        self.d_ratio = downsample_ratio
+        assert self.c_size % self.d_ratio == 0
+        self.dc_size = self.c_size // self.d_ratio
+        self.trans = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        ])
+    def __len__(self):
+        pass
+    def __getitem__(self, item):
+        pass
+    def train_transform_ul(self, img):
+        wd, ht = img.size
+        st_size = 1.0 * min(wd, ht)
+        assert st_size >= self.c_size
+        i, j, h, w = random_crop(ht, wd, self.c_size, self.c_size)
+        img = F.crop(img, i, j, h, w)
+        if random.random() > 0.5:
+            img = F.hflip(img)
+        return self.trans(img)
+class Crowd_UL_TC(Base_UL):
+    def __init__(self, root_path, crop_size, downsample_ratio=8, method='train_ul'):
+        super().__init__(root_path, crop_size, downsample_ratio)
+        self.method = method
+        if method not in ['train_ul']:
+            raise Exception("not implement")
+        self.im_list = sorted(glob(os.path.join(self.root_path, 'images', '*.jpg')))
+        print('number of img [{}]: {}'.format(method, len(self.im_list)))
+    def __len__(self):
+        return len(self.im_list)
+    def __getitem__(self, item):
+        img_path = self.im_list[item]
+        name = os.path.basename(img_path).split('.')[0]
+        img = Image.open(img_path).convert('RGB')
+        #print("un_label {}", item)
+        return self.train_transform_ul(img)
+    def train_transform_ul(self, img):
+        wd, ht = img.size
+        st_size = 1.0 * min(wd, ht)
+        # resize the image to fit the crop size
+        if st_size < self.c_size:
+            rr = 1.0 * self.c_size / st_size
+            wd = round(wd * rr)
+            ht = round(ht * rr)
+            st_size = 1.0 * min(wd, ht)
+            img = img.resize((wd, ht), Image.BICUBIC)
+        assert st_size >= self.c_size, print(wd, ht)
+        i, j, h, w = random_crop(ht, wd, self.c_size, self.c_size)
+        img = F.crop(img, i, j, h, w)
+        if random.random() > 0.5:
+            img = F.hflip(img)
+        return self.trans(img),1

demo.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import os
+import sys
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+from network import pvt_cls as TCN
+import gradio as gr
+def demo(img_path):
+    # config
+    batch_size = 8
+    crop_size = 256
+    model_path = '/users/k21163430/workspace/TreeFormer/models/best_model.pth'
+    device = torch.device('cuda')
+    # prepare model
+    model = TCN.pvt_treeformer(pretrained=False)
+    model.to(device)
+    model.load_state_dict(torch.load(model_path, device))
+    model.eval()
+    # preprocess
+    img = Image.open(img_path).convert('RGB')
+    show_img = np.array(img)
+    wd, ht = img.size
+    st_size = 1.0 * min(wd, ht)
+    if st_size < crop_size:
+        rr = 1.0 * crop_size / st_size
+        wd = round(wd * rr)
+        ht = round(ht * rr)
+        st_size = 1.0 * min(wd, ht)
+        img = img.resize((wd, ht), Image.BICUBIC)
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ])
+    img = transform(img)
+    img = img.unsqueeze(0)
+    # model forward
+    with torch.no_grad():
+        inputs = img.to(device)
+        crop_imgs, crop_masks = [], []
+        b, c, h, w = inputs.size()
+        rh, rw = crop_size, crop_size
+        for i in range(0, h, rh):
+            gis, gie = max(min(h - rh, i), 0), min(h, i + rh)
+            for j in range(0, w, rw):
+                gjs, gje = max(min(w - rw, j), 0), min(w, j + rw)
+                crop_imgs.append(inputs[:, :, gis:gie, gjs:gje])
+                mask = torch.zeros([b, 1, h, w]).to(device)
+                mask[:, :, gis:gie, gjs:gje].fill_(1.0)
+                crop_masks.append(mask)
+        crop_imgs, crop_masks = map(lambda x: torch.cat(
+            x, dim=0), (crop_imgs, crop_masks))
+        crop_preds = []
+        nz, bz = crop_imgs.size(0), batch_size
+        for i in range(0, nz, bz):
+            gs, gt = i, min(nz, i + bz)
+            crop_pred, _ = model(crop_imgs[gs:gt])
+            crop_pred = crop_pred[0]
+            _, _, h1, w1 = crop_pred.size()
+            crop_pred = F.interpolate(crop_pred, size=(
+                h1 * 4, w1 * 4), mode='bilinear', align_corners=True) / 16
+            crop_preds.append(crop_pred)
+        crop_preds = torch.cat(crop_preds, dim=0)
+        # splice them to the original size
+        idx = 0
+        pred_map = torch.zeros([b, 1, h, w]).to(device)
+        for i in range(0, h, rh):
+            gis, gie = max(min(h - rh, i), 0), min(h, i + rh)
+            for j in range(0, w, rw):
+                gjs, gje = max(min(w - rw, j), 0), min(w, j + rw)
+                pred_map[:, :, gis:gie, gjs:gje] += crop_preds[idx]
+                idx += 1
+        # for the overlapping area, compute average value
+        mask = crop_masks.sum(dim=0).unsqueeze(0)
+        outputs = pred_map / mask
+        outputs = F.interpolate(outputs, size=(
+            h, w), mode='bilinear', align_corners=True)/4
+        outputs = pred_map / mask
+        model_output = round(torch.sum(outputs).item())
+        print("{}: {}".format(img_path, model_output))
+        outputs = outputs.squeeze().cpu().numpy()
+        outputs = (outputs - np.min(outputs)) / \
+            (np.max(outputs) - np.min(outputs))
+        show_img = show_img / 255.0
+        show_img = show_img * 0.2 + outputs[:, :, None] * 0.8
+    return model_output, show_img
+if __name__ == "__main__":
+    # test
+    # img_path = sys.argv[1]
+    # demo(img)
+    # Launch a gr.Interface
+    gr_demo = gr.Interface(fn=demo,
+                           inputs=gr.Image(source="upload",
+                                           type="filepath",
+                                           label="Input Image",
+                                           width=768,
+                                           height=768,
+                                           ),
+                           outputs=[
+                               gr.Number(label="Predicted Tree Count"),
+                               gr.Image(label="Density Map",
+                                        width=768,
+                                        height=768,
+                                        )
+                           ],
+                           title="TreeFormer",
+                           description="TreeFormer is a semi-supervised transformer-based framework for tree counting from a single high resolution image. Upload an image and TreeFormer will predict the number of trees in the image and generate a density map of the trees.",
+                           article="This work has been developed a spart of the ReSET project which has received funding from the European Union's Horizon 2020 FET Proactive Programme under grant agreement No 101017857. The contents of this publication are the sole responsibility of the ReSET consortium and do not necessarily reflect the opinion of the European Union.",
+                           examples=[
+                                 ["./examples/IMG_101.jpg"],
+                                 ["./examples/IMG_125.jpg"],
+                                 ["./examples/IMG_138.jpg"],
+                                 ["./examples/IMG_180.jpg"],
+                                 ["./examples/IMG_18.jpg"],
+                                 ["./examples/IMG_206.jpg"],
+                                 ["./examples/IMG_223.jpg"],
+                                 ["./examples/IMG_247.jpg"],
+                                 ["./examples/IMG_270.jpg"],
+                                 ["./examples/IMG_306.jpg"],
+                           ],
+                           # cache_examples=True,
+                           examples_per_page=10,
+                           allow_flagging=False,
+                           theme=gr.themes.Default(),
+                           )
+    gr_demo.launch(share=True, server_port=7861, favicon_path="./assets/reset.png")

examples/IMG_101.jpg ADDED Viewed

examples/IMG_125.jpg ADDED Viewed

examples/IMG_138.jpg ADDED Viewed

examples/IMG_18.jpg ADDED Viewed

examples/IMG_180.jpg ADDED Viewed

examples/IMG_206.jpg ADDED Viewed

examples/IMG_223.jpg ADDED Viewed

examples/IMG_247.jpg ADDED Viewed

examples/IMG_270.jpg ADDED Viewed

examples/IMG_306.jpg ADDED Viewed

losses/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

losses/bregman_pytorch.py ADDED Viewed

	@@ -0,0 +1,484 @@

+# -*- coding: utf-8 -*-
+"""
+Rewrite ot.bregman.sinkhorn in Python Optimal Transport (https://pythonot.github.io/_modules/ot/bregman.html#sinkhorn)
+using pytorch operations.
+Bregman projections for regularized OT (Sinkhorn distance).
+"""
+import torch
+M_EPS = 1e-16
+def sinkhorn(a, b, C, reg=1e-1, method='sinkhorn', maxIter=1000, tau=1e3,
+             stopThr=1e-9, verbose=False, log=True, warm_start=None, eval_freq=10, print_freq=200, **kwargs):
+    """
+    Solve the entropic regularization optimal transport
+    The input should be PyTorch tensors
+    The function solves the following optimization problem:
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,C>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma\geq 0
+    where :
+    - C is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are target and source measures (sum to 1)
+    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [1].
+    Parameters
+    ----------
+    a : torch.tensor (na,)
+        samples measure in the target domain
+    b : torch.tensor (nb,)
+        samples in the source domain
+    C : torch.tensor (na,nb)
+        loss matrix
+    reg : float
+        Regularization term > 0
+    method : str
+        method used for the solver either 'sinkhorn', 'greenkhorn', 'sinkhorn_stabilized' or
+        'sinkhorn_epsilon_scaling', see those function for specific parameters
+    maxIter : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error ( > 0 )
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    Returns
+    -------
+    gamma : (na x nb) torch.tensor
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+    References
+    ----------
+    [1] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+    See Also
+    --------
+    """
+    if method.lower() == 'sinkhorn':
+        return sinkhorn_knopp(a, b, C, reg, maxIter=maxIter,
+                              stopThr=stopThr, verbose=verbose, log=log,
+                              warm_start=warm_start, eval_freq=eval_freq, print_freq=print_freq,
+                              **kwargs)
+    elif method.lower() == 'sinkhorn_stabilized':
+        return sinkhorn_stabilized(a, b, C, reg, maxIter=maxIter, tau=tau,
+                                   stopThr=stopThr, verbose=verbose, log=log,
+                                   warm_start=warm_start, eval_freq=eval_freq, print_freq=print_freq,
+                                   **kwargs)
+    elif method.lower() == 'sinkhorn_epsilon_scaling':
+        return sinkhorn_epsilon_scaling(a, b, C, reg,
+                                        maxIter=maxIter, maxInnerIter=100, tau=tau,
+                                        scaling_base=0.75, scaling_coef=None, stopThr=stopThr,
+                                        verbose=False, log=log, warm_start=warm_start, eval_freq=eval_freq,
+                                        print_freq=print_freq, **kwargs)
+    else:
+        raise ValueError("Unknown method '%s'." % method)
+def sinkhorn_knopp(a, b, C, reg=1e-1, maxIter=1000, stopThr=1e-9,
+                   verbose=False, log=False, warm_start=None, eval_freq=10, print_freq=200, **kwargs):
+    """
+    Solve the entropic regularization optimal transport
+    The input should be PyTorch tensors
+    The function solves the following optimization problem:
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,C>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma\geq 0
+    where :
+    - C is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are target and source measures (sum to 1)
+    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [1].
+    Parameters
+    ----------
+    a : torch.tensor (na,)
+        samples measure in the target domain
+    b : torch.tensor (nb,)
+        samples in the source domain
+    C : torch.tensor (na,nb)
+        loss matrix
+    reg : float
+        Regularization term > 0
+    maxIter : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error ( > 0 )
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    Returns
+    -------
+    gamma : (na x nb) torch.tensor
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+    References
+    ----------
+    [1] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+    See Also
+    --------
+    """
+    device = a.device
+    na, nb = C.shape
+    assert na >= 1 and nb >= 1, 'C needs to be 2d'
+    assert na == a.shape[0] and nb == b.shape[0], "Shape of a or b does't match that of C"
+    assert reg > 0, 'reg should be greater than 0'
+    assert a.min() >= 0. and b.min() >= 0., 'Elements in a or b less than 0'
+    if log:
+        log = {'err': []}
+    if warm_start is not None:
+        u = warm_start['u']
+        v = warm_start['v']
+    else:
+        u = torch.ones(na, dtype=a.dtype).to(device) / na
+        v = torch.ones(nb, dtype=b.dtype).to(device) / nb
+    K = torch.empty(C.shape, dtype=C.dtype).to(device)
+    torch.div(C, -reg, out=K)
+    torch.exp(K, out=K)
+    b_hat = torch.empty(b.shape, dtype=C.dtype).to(device)
+    it = 1
+    err = 1
+    # allocate memory beforehand
+    KTu = torch.empty(v.shape, dtype=v.dtype).to(device)
+    Kv = torch.empty(u.shape, dtype=u.dtype).to(device)
+    while (err > stopThr and it <= maxIter):
+        upre, vpre = u, v
+        torch.matmul(u, K, out=KTu)
+        v = torch.div(b, KTu + M_EPS)
+        torch.matmul(K, v, out=Kv)
+        u = torch.div(a, Kv + M_EPS)
+        if torch.any(torch.isnan(u)) or torch.any(torch.isnan(v)) or \
+                torch.any(torch.isinf(u)) or torch.any(torch.isinf(v)):
+            print('Warning: numerical errors at iteration', it)
+            u, v = upre, vpre
+            break
+        if log and it % eval_freq == 0:
+            # we can speed up the process by checking for the error only all
+            # the eval_freq iterations
+            # below is equivalent to:
+            # b_hat = torch.sum(u.reshape(-1, 1) * K * v.reshape(1, -1), 0)
+            # but with more memory efficient
+            b_hat = torch.matmul(u, K) * v
+            err = (b - b_hat).pow(2).sum().item()
+            # err = (b - b_hat).abs().sum().item()
+            log['err'].append(err)
+        if verbose and it % print_freq == 0:
+            print('iteration {:5d}, constraint error {:5e}'.format(it, err))
+        it += 1
+    if log:
+        log['u'] = u
+        log['v'] = v
+        log['alpha'] = reg * torch.log(u + M_EPS)
+        log['beta'] = reg * torch.log(v + M_EPS)
+    # transport plan
+    P = u.reshape(-1, 1) * K * v.reshape(1, -1)
+    if log:
+        return P, log
+    else:
+        return P
+def sinkhorn_stabilized(a, b, C, reg=1e-1, maxIter=1000, tau=1e3, stopThr=1e-9,
+                        verbose=False, log=False, warm_start=None, eval_freq=10, print_freq=200, **kwargs):
+    """
+    Solve the entropic regularization OT problem with log stabilization
+    The function solves the following optimization problem:
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,C>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma\geq 0
+    where :
+    - C is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are target and source measures (sum to 1)
+    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [1]
+    but with the log stabilization proposed in [3] an defined in [2] (Algo 3.1)
+    Parameters
+    ----------
+    a : torch.tensor (na,)
+        samples measure in the target domain
+    b : torch.tensor (nb,)
+        samples in the source domain
+    C : torch.tensor (na,nb)
+        loss matrix
+    reg : float
+        Regularization term > 0
+    tau : float
+        thershold for max value in u or v for log scaling
+    maxIter : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error ( > 0 )
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    Returns
+    -------
+    gamma : (na x nb) torch.tensor
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+    References
+    ----------
+    [1] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+    [2] Bernhard Schmitzer. Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. SIAM Journal on Scientific Computing, 2019
+    [3] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+    See Also
+    --------
+    """
+    device = a.device
+    na, nb = C.shape
+    assert na >= 1 and nb >= 1, 'C needs to be 2d'
+    assert na == a.shape[0] and nb == b.shape[0], "Shape of a or b does't match that of C"
+    assert reg > 0, 'reg should be greater than 0'
+    assert a.min() >= 0. and b.min() >= 0., 'Elements in a or b less than 0'
+    if log:
+        log = {'err': []}
+    if warm_start is not None:
+        alpha = warm_start['alpha']
+        beta = warm_start['beta']
+    else:
+        alpha = torch.zeros(na, dtype=a.dtype).to(device)
+        beta = torch.zeros(nb, dtype=b.dtype).to(device)
+    u = torch.ones(na, dtype=a.dtype).to(device) / na
+    v = torch.ones(nb, dtype=b.dtype).to(device) / nb
+    def update_K(alpha, beta):
+        """log space computation"""
+        """memory efficient"""
+        torch.add(alpha.reshape(-1, 1), beta.reshape(1, -1), out=K)
+        torch.add(K, -C, out=K)
+        torch.div(K, reg, out=K)
+        torch.exp(K, out=K)
+    def update_P(alpha, beta, u, v, ab_updated=False):
+        """log space P (gamma) computation"""
+        torch.add(alpha.reshape(-1, 1), beta.reshape(1, -1), out=P)
+        torch.add(P, -C, out=P)
+        torch.div(P, reg, out=P)
+        if not ab_updated:
+            torch.add(P, torch.log(u + M_EPS).reshape(-1, 1), out=P)
+            torch.add(P, torch.log(v + M_EPS).reshape(1, -1), out=P)
+        torch.exp(P, out=P)
+    K = torch.empty(C.shape, dtype=C.dtype).to(device)
+    update_K(alpha, beta)
+    b_hat = torch.empty(b.shape, dtype=C.dtype).to(device)
+    it = 1
+    err = 1
+    ab_updated = False
+    # allocate memory beforehand
+    KTu = torch.empty(v.shape, dtype=v.dtype).to(device)
+    Kv = torch.empty(u.shape, dtype=u.dtype).to(device)
+    P = torch.empty(C.shape, dtype=C.dtype).to(device)
+    while (err > stopThr and it <= maxIter):
+        upre, vpre = u, v
+        torch.matmul(u, K, out=KTu)
+        v = torch.div(b, KTu + M_EPS)
+        torch.matmul(K, v, out=Kv)
+        u = torch.div(a, Kv + M_EPS)
+        ab_updated = False
+        # remove numerical problems and store them in K
+        if u.abs().sum() > tau or v.abs().sum() > tau:
+            alpha += reg * torch.log(u + M_EPS)
+            beta += reg * torch.log(v + M_EPS)
+            u.fill_(1. / na)
+            v.fill_(1. / nb)
+            update_K(alpha, beta)
+            ab_updated = True
+        if log and it % eval_freq == 0:
+            # we can speed up the process by checking for the error only all
+            # the eval_freq iterations
+            update_P(alpha, beta, u, v, ab_updated)
+            b_hat = torch.sum(P, 0)
+            err = (b - b_hat).pow(2).sum().item()
+            log['err'].append(err)
+        if verbose and it % print_freq == 0:
+            print('iteration {:5d}, constraint error {:5e}'.format(it, err))
+        it += 1
+    if log:
+        log['u'] = u
+        log['v'] = v
+        log['alpha'] = alpha + reg * torch.log(u + M_EPS)
+        log['beta'] = beta + reg * torch.log(v + M_EPS)
+    # transport plan
+    update_P(alpha, beta, u, v, False)
+    if log:
+        return P, log
+    else:
+        return P
+def sinkhorn_epsilon_scaling(a, b, C, reg=1e-1, maxIter=100, maxInnerIter=100, tau=1e3, scaling_base=0.75,
+                             scaling_coef=None, stopThr=1e-9, verbose=False, log=False, warm_start=None, eval_freq=10,
+                             print_freq=200, **kwargs):
+    """
+    Solve the entropic regularization OT problem with log stabilization
+    The function solves the following optimization problem:
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,C>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma\geq 0
+    where :
+    - C is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are target and source measures (sum to 1)
+    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
+    scaling algorithm as proposed in [1] but with the log stabilization
+    proposed in [3] and the log scaling proposed in [2] algorithm 3.2
+    Parameters
+    ----------
+    a : torch.tensor (na,)
+        samples measure in the target domain
+    b : torch.tensor (nb,)
+        samples in the source domain
+    C : torch.tensor (na,nb)
+        loss matrix
+    reg : float
+        Regularization term > 0
+    tau : float
+        thershold for max value in u or v for log scaling
+    maxIter : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error ( > 0 )
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    Returns
+    -------
+    gamma : (na x nb) torch.tensor
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+    References
+    ----------
+    [1] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+    [2] Bernhard Schmitzer. Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. SIAM Journal on Scientific Computing, 2019
+    [3] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+    See Also
+    --------
+    """
+    na, nb = C.shape
+    assert na >= 1 and nb >= 1, 'C needs to be 2d'
+    assert na == a.shape[0] and nb == b.shape[0], "Shape of a or b does't match that of C"
+    assert reg > 0, 'reg should be greater than 0'
+    assert a.min() >= 0. and b.min() >= 0., 'Elements in a or b less than 0'
+    def get_reg(it, reg, pre_reg):
+        if it == 1:
+            return scaling_coef
+        else:
+            if (pre_reg - reg) * scaling_base < M_EPS:
+                return reg
+            else:
+                return (pre_reg - reg) * scaling_base + reg
+    if scaling_coef is None:
+        scaling_coef = C.max() + reg
+    it = 1
+    err = 1
+    running_reg = scaling_coef
+    if log:
+        log = {'err': []}
+    warm_start = None
+    while (err > stopThr and it <= maxIter):
+        running_reg = get_reg(it, reg, running_reg)
+        P, _log = sinkhorn_stabilized(a, b, C, running_reg, maxIter=maxInnerIter, tau=tau,
+                                      stopThr=stopThr, verbose=False, log=True,
+                                      warm_start=warm_start, eval_freq=eval_freq, print_freq=print_freq,
+                                      **kwargs)
+        warm_start = {}
+        warm_start['alpha'] = _log['alpha']
+        warm_start['beta'] = _log['beta']
+        primal_val = (C * P).sum() + reg * (P * torch.log(P)).sum() - reg * P.sum()
+        dual_val = (_log['alpha'] * a).sum() + (_log['beta'] * b).sum() - reg * P.sum()
+        err = primal_val - dual_val
+        log['err'].append(err)
+        if verbose and it % print_freq == 0:
+            print('iteration {:5d}, constraint error {:5e}'.format(it, err))
+        it += 1
+    if log:
+        log['alpha'] = _log['alpha']
+        log['beta'] = _log['beta']
+        return P, log
+    else:
+        return P

losses/consistency_loss.py ADDED Viewed

	@@ -0,0 +1,294 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+from losses import ramps
+class consistency_weight(object):
+    """
+    ramp_types = ['sigmoid_rampup', 'linear_rampup', 'cosine_rampup', 'log_rampup', 'exp_rampup']
+    """
+    def __init__(self, final_w, iters_per_epoch, rampup_starts=0, rampup_ends=7, ramp_type='sigmoid_rampup'):
+        self.final_w = final_w
+        self.iters_per_epoch = iters_per_epoch
+        self.rampup_starts = rampup_starts * iters_per_epoch
+        self.rampup_ends = rampup_ends * iters_per_epoch
+        self.rampup_length = (self.rampup_ends - self.rampup_starts)
+        self.rampup_func = getattr(ramps, ramp_type)
+        self.current_rampup = 0
+    def __call__(self, epoch, curr_iter):
+        cur_total_iter = self.iters_per_epoch * epoch + curr_iter
+        if cur_total_iter < self.rampup_starts:
+            return 0
+        self.current_rampup = self.rampup_func(cur_total_iter - self.rampup_starts, self.rampup_length)
+        return self.final_w * self.current_rampup
+def CE_loss(input_logits, target_targets, ignore_index, temperature=1):
+    return F.cross_entropy(input_logits/temperature, target_targets, ignore_index=ignore_index)
+# for FocalLoss
+def softmax_helper(x):
+    # copy from: https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/utilities/nd_softmax.py
+    rpt = [1 for _ in range(len(x.size()))]
+    rpt[1] = x.size(1)
+    x_max = x.max(1, keepdim=True)[0].repeat(*rpt)
+    e_x = torch.exp(x - x_max)
+    return e_x / e_x.sum(1, keepdim=True).repeat(*rpt)
+def get_alpha(supervised_loader):
+    # get number of classes
+    num_labels = 0
+    for image_batch, label_batch in supervised_loader:
+        label_batch.data[label_batch.data==255] = 0 # pixels of ignore class added to background
+        l_unique = torch.unique(label_batch.data)
+        list_unique = [element.item() for element in l_unique.flatten()]
+        num_labels = max(max(list_unique),num_labels)
+    num_classes = num_labels + 1
+    # count class occurrences
+    alpha = [0 for i in range(num_classes)]
+    for image_batch, label_batch in supervised_loader:
+        label_batch.data[label_batch.data==255] = 0 # pixels of ignore class added to background
+        l_unique = torch.unique(label_batch.data)
+        list_unique = [element.item() for element in l_unique.flatten()]
+        l_unique_count = torch.stack([(label_batch.data==x_u).sum() for x_u in l_unique]) # tensor([65920, 36480])
+        list_count = [count.item() for count in l_unique_count.flatten()]
+        for index in list_unique:
+            alpha[index] += list_count[list_unique.index(index)]
+    return alpha
+# for FocalLoss
+def softmax_helper(x):
+    # copy from: https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/utilities/nd_softmax.py
+    rpt = [1 for _ in range(len(x.size()))]
+    rpt[1] = x.size(1)
+    x_max = x.max(1, keepdim=True)[0].repeat(*rpt)
+    e_x = torch.exp(x - x_max)
+    return e_x / e_x.sum(1, keepdim=True).repeat(*rpt)
+class FocalLoss(nn.Module):
+    """
+    copy from: https://github.com/Hsuxu/Loss_ToolBox-PyTorch/blob/master/FocalLoss/FocalLoss.py
+    This is a implementation of Focal Loss with smooth label cross entropy supported which is proposed in
+    'Focal Loss for Dense Object Detection. (https://arxiv.org/abs/1708.02002)'
+        Focal_Loss= -1*alpha*(1-pt)*log(pt)
+    :param num_class:
+    :param alpha: (tensor) 3D or 4D the scalar factor for this criterion
+    :param gamma: (float,double) gamma > 0 reduces the relative loss for well-classified examples (p>0.5) putting more
+                    focus on hard misclassified example
+    :param smooth: (float,double) smooth value when cross entropy
+    :param balance_index: (int) balance class index, should be specific when alpha is float
+    :param size_average: (bool, optional) By default, the losses are averaged over each loss element in the batch.
+    """
+    def __init__(self, apply_nonlin=None, ignore_index = None, alpha=None, gamma=2, balance_index=0, smooth=1e-5, size_average=True):
+        super(FocalLoss, self).__init__()
+        self.apply_nonlin = apply_nonlin
+        self.alpha = alpha
+        self.gamma = gamma
+        self.balance_index = balance_index
+        self.smooth = smooth
+        self.size_average = size_average
+        if self.smooth is not None:
+            if self.smooth < 0 or self.smooth > 1.0:
+                raise ValueError('smooth value should be in [0,1]')
+    def forward(self, logit, target):
+        if self.apply_nonlin is not None:
+            logit = self.apply_nonlin(logit)
+        num_class = logit.shape[1]
+        if logit.dim() > 2:
+            # N,C,d1,d2 -> N,C,m (m=d1*d2*...)
+            logit = logit.view(logit.size(0), logit.size(1), -1)
+            logit = logit.permute(0, 2, 1).contiguous()
+            logit = logit.view(-1, logit.size(-1))
+        target = torch.squeeze(target, 1)
+        target = target.view(-1, 1)
+        valid_mask = None
+        if self.ignore_index is not None:
+            valid_mask = target != self.ignore_index
+            target = target * valid_mask
+        alpha = self.alpha
+        if alpha is None:
+            alpha = torch.ones(num_class, 1)
+        elif isinstance(alpha, (list, np.ndarray)):
+            assert len(alpha) == num_class
+            alpha = torch.FloatTensor(alpha).view(num_class, 1)
+            alpha = alpha / alpha.sum()
+            alpha = 1/alpha # inverse of class frequency
+        elif isinstance(alpha, float):
+            alpha = torch.ones(num_class, 1)
+            alpha = alpha * (1 - self.alpha)
+            alpha[self.balance_index] = self.alpha
+        else:
+            raise TypeError('Not support alpha type')
+        if alpha.device != logit.device:
+            alpha = alpha.to(logit.device)
+        idx = target.cpu().long()
+        one_hot_key = torch.FloatTensor(target.size(0), num_class).zero_()
+        # to resolve error in idx in scatter_
+        idx[idx==225]=0
+        one_hot_key = one_hot_key.scatter_(1, idx, 1)
+        if one_hot_key.device != logit.device:
+            one_hot_key = one_hot_key.to(logit.device)
+        if self.smooth:
+            one_hot_key = torch.clamp(
+                one_hot_key, self.smooth/(num_class-1), 1.0 - self.smooth)
+        pt = (one_hot_key * logit).sum(1) + self.smooth
+        logpt = pt.log()
+        gamma = self.gamma
+        alpha = alpha[idx]
+        alpha = torch.squeeze(alpha)
+        loss = -1 * alpha * torch.pow((1 - pt), gamma) * logpt
+        if valid_mask is not None:
+            loss = loss * valid_mask.squeeze()
+        if self.size_average:
+            loss = loss.mean()
+        else:
+            loss = loss.sum()
+        return loss
+class abCE_loss(nn.Module):
+    """
+    Annealed-Bootstrapped cross-entropy loss
+    """
+    def __init__(self, iters_per_epoch, epochs, num_classes, weight=None,
+                        reduction='mean', thresh=0.7, min_kept=1, ramp_type='log_rampup'):
+        super(abCE_loss, self).__init__()
+        self.weight = torch.FloatTensor(weight) if weight is not None else weight
+        self.reduction = reduction
+        self.thresh = thresh
+        self.min_kept = min_kept
+        self.ramp_type = ramp_type
+        if ramp_type is not None:
+            self.rampup_func = getattr(ramps, ramp_type)
+            self.iters_per_epoch = iters_per_epoch
+            self.num_classes = num_classes
+            self.start = 1/num_classes
+            self.end = 0.9
+            self.total_num_iters = (epochs - (0.6 * epochs)) * iters_per_epoch
+    def threshold(self, curr_iter, epoch):
+        cur_total_iter = self.iters_per_epoch * epoch + curr_iter
+        current_rampup = self.rampup_func(cur_total_iter, self.total_num_iters)
+        return current_rampup * (self.end - self.start) + self.start
+    def forward(self, predict, target, ignore_index, curr_iter, epoch):
+        batch_kept = self.min_kept * target.size(0)
+        prob_out = F.softmax(predict, dim=1)
+        tmp_target = target.clone()
+        tmp_target[tmp_target == ignore_index] = 0
+        prob = prob_out.gather(1, tmp_target.unsqueeze(1))
+        mask = target.contiguous().view(-1, ) != ignore_index
+        sort_prob, sort_indices = prob.contiguous().view(-1, )[mask].contiguous().sort()
+        if self.ramp_type is not None:
+            thresh =  self.threshold(curr_iter=curr_iter, epoch=epoch)
+        else:
+            thresh = self.thresh
+        min_threshold = sort_prob[min(batch_kept, sort_prob.numel() - 1)] if sort_prob.numel() > 0 else 0.0
+        threshold = max(min_threshold, thresh)
+        loss_matrix = F.cross_entropy(predict, target,
+                                      weight=self.weight.to(predict.device) if self.weight is not None else None,
+                                      ignore_index=ignore_index, reduction='none')
+        loss_matirx = loss_matrix.contiguous().view(-1, )
+        sort_loss_matirx = loss_matirx[mask][sort_indices]
+        select_loss_matrix = sort_loss_matirx[sort_prob < threshold]
+        if self.reduction == 'sum' or select_loss_matrix.numel() == 0:
+            return select_loss_matrix.sum()
+        elif self.reduction == 'mean':
+            return select_loss_matrix.mean()
+        else:
+            raise NotImplementedError('Reduction Error!')
+def softmax_mse_loss(inputs, targets, conf_mask=False, threshold=None, use_softmax=False):
+    assert inputs.requires_grad == True and targets.requires_grad == False
+    assert inputs.size() == targets.size() # (batch_size * num_classes * H * W)
+    inputs = F.softmax(inputs, dim=1)
+    if use_softmax:
+        targets = F.softmax(targets, dim=1)
+    if conf_mask:
+        loss_mat = F.mse_loss(inputs, targets, reduction='none')
+        mask = (targets.max(1)[0] > threshold)
+        loss_mat = loss_mat[mask.unsqueeze(1).expand_as(loss_mat)]
+        if loss_mat.shape.numel() == 0: loss_mat = torch.tensor([0.]).to(inputs.device)
+        return loss_mat.mean()
+    else:
+        return F.mse_loss(inputs, targets, reduction='mean') # take the mean over the batch_size
+def softmax_kl_loss(inputs, targets, conf_mask=False, threshold=None, use_softmax=False):
+    assert inputs.requires_grad == True and targets.requires_grad == False
+    assert inputs.size() == targets.size()
+    if use_softmax:
+        targets = F.softmax(targets, dim=1)
+    if conf_mask:
+        loss_mat = F.kl_div(input_log_softmax, targets, reduction='none')
+        mask = (targets.max(1)[0] > threshold)
+        loss_mat = loss_mat[mask.unsqueeze(1).expand_as(loss_mat)]
+        if loss_mat.shape.numel() == 0: loss_mat = torch.tensor([0.]).to(inputs.device)
+        return loss_mat.sum() / mask.shape.numel()
+    else:
+        return F.kl_div(inputs, targets, reduction='mean')
+def softmax_js_loss(inputs, targets, **_):
+    assert inputs.requires_grad == True and targets.requires_grad == False
+    assert inputs.size() == targets.size()
+    epsilon = 1e-5
+    M = (F.softmax(inputs, dim=1) + targets) * 0.5
+    kl1 = F.kl_div(F.log_softmax(inputs, dim=1), M, reduction='mean')
+    kl2 = F.kl_div(torch.log(targets+epsilon), M, reduction='mean')
+    return (kl1 + kl2) * 0.5
+def pair_wise_loss(unsup_outputs, size_average=True, nbr_of_pairs=8):
+	"""
+	Pair-wise loss in the sup. mat.
+	"""
+	if isinstance(unsup_outputs, list):
+		unsup_outputs = torch.stack(unsup_outputs)
+	# Only for a subset of the aux outputs to reduce computation and memory
+	unsup_outputs = unsup_outputs[torch.randperm(unsup_outputs.size(0))]
+	unsup_outputs = unsup_outputs[:nbr_of_pairs]
+	temp = torch.zeros_like(unsup_outputs) # For grad purposes
+	for i, u in enumerate(unsup_outputs):
+		temp[i] = F.softmax(u, dim=1)
+	mean_prediction = temp.mean(0).unsqueeze(0) # Mean over the auxiliary outputs
+	pw_loss = ((temp - mean_prediction)**2).mean(0) # Variance
+	pw_loss = pw_loss.sum(1) # Sum over classes
+	if size_average:
+		return pw_loss.mean()
+	return pw_loss.sum()

losses/dm_loss.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from losses.consistency_loss import *
+from losses.ot_loss import OT_Loss
+class DMLoss(nn.Module):
+    def __init__(self):
+        super(DMLoss, self).__init__()
+        self.DMLoss = 0.0
+        self.losses = {}
+    def forward(self, results, points, gt_discrete):
+        self.DMLoss = 0.0
+        self.losses = {}
+        if results is None:
+            self.DMLoss = 0.0
+        elif isinstance(results, list) and len(results) > 0:
+            count = 0
+            for i in range(len(results[0])):
+                with torch.set_grad_enabled(False):
+                    preds_mean = (results[0][i])/len(results[0][0][0])
+                for j in range(len(results)):
+                    var_sel = softmax_kl_loss(results[j][i], preds_mean)
+                    exp_var = torch.exp(-var_sel)
+                    consistency_dist = (preds_mean - results[j][i]) ** 2
+                    temploss = (torch.mean(consistency_dist * exp_var) /(exp_var + 1e-8) + var_sel)
+                    self.losses.update({'unlabel_{}_loss'.format(str(i+1)): temploss})
+                    self.DMLoss += temploss
+                    # Compute counting loss.
+                    count_loss = self.mae(outputs_L[0].sum(1).sum(1).sum(1),
+                        torch.from_numpy(gd_count).float().to(self.device))*self.args.reg
+                    epoch_count_loss.update(count_loss.item(), N)
+                    # Compute OT loss.
+                    ot_loss, wd, ot_obj_value = self.ot_loss(outputs_normed, outputs_L[0], points)
+                    ot_loss = ot_loss * self.args.ot
+                    ot_obj_value = ot_obj_value * self.args.ot
+                    epoch_ot_loss.update(ot_loss.item(), N)
+                    epoch_ot_obj_value.update(ot_obj_value.item(), N)
+                    epoch_wd.update(wd, N)
+                    gd_count_tensor = (torch.from_numpy(gd_count).float()
+                        .to(self.device).unsqueeze(1).unsqueeze(2).unsqueeze(3))
+                    gt_discrete_normed = gt_discrete / (gd_count_tensor + 1e-6)
+                    tv_loss = (self.tvloss(outputs_normed, gt_discrete_normed).sum(1).sum(1).sum(1)*
+                        torch.from_numpy(gd_count).float().to(self.device)).mean(0) * self.args.tv
+                    epoch_tv_loss.update(tv_loss.item(), N)
+                    count += 1
+            if count > 0:
+                self.multiconloss = self.multiconloss / count
+        return self.multiconloss

losses/multi_con_loss.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from losses.consistency_loss import *
+class MultiConLoss(nn.Module):
+    def __init__(self):
+        super(MultiConLoss, self).__init__()
+        self.countloss_criterion = nn.MSELoss(reduction='sum')
+        self.multiconloss = 0.0
+        self.losses = {}
+    def forward(self, unlabeled_results):
+        self.multiconloss = 0.0
+        self.losses = {}
+        if unlabeled_results is None:
+            self.multiconloss = 0.0
+        elif isinstance(unlabeled_results, list) and len(unlabeled_results) > 0:
+            count = 0
+            for i in range(len(unlabeled_results[0])):
+                with torch.set_grad_enabled(False):
+                    preds_mean = (unlabeled_results[0][i] + unlabeled_results[1][i] + unlabeled_results[2][i])/len(unlabeled_results)
+                for j in range(len(unlabeled_results)):
+                    var_sel = softmax_kl_loss(unlabeled_results[j][i], preds_mean)
+                    exp_var = torch.exp(-var_sel)
+                    consistency_dist = (preds_mean - unlabeled_results[j][i]) ** 2
+                    temploss = (torch.mean(consistency_dist * exp_var) /(exp_var + 1e-8) + var_sel)
+                    self.losses.update({'unlabel_{}_loss'.format(str(i+1)): temploss})
+                    self.multiconloss += temploss
+                    count += 1
+            if count > 0:
+                self.multiconloss = self.multiconloss / count
+        return self.multiconloss

losses/ot_loss.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import torch
+from torch.nn import Module
+from .bregman_pytorch import sinkhorn
+class OT_Loss(Module):
+    def __init__(self, c_size, stride, norm_cood, device, num_of_iter_in_ot=100, reg=10.0):
+        super(OT_Loss, self).__init__()
+        assert c_size % stride == 0
+        self.c_size = c_size
+        self.device = device
+        self.norm_cood = norm_cood
+        self.num_of_iter_in_ot = num_of_iter_in_ot
+        self.reg = reg
+        # coordinate is same to image space, set to constant since crop size is same
+        self.cood = torch.arange(0, c_size, step=stride,
+                                 dtype=torch.float32, device=device) + stride / 2
+        self.density_size = self.cood.size(0)
+        self.cood.unsqueeze_(0) # [1, #cood]
+        if self.norm_cood:
+            self.cood = self.cood / c_size * 2 - 1 # map to [-1, 1]
+        self.output_size = self.cood.size(1)
+    def forward(self, normed_density, unnormed_density, points):
+        batch_size = normed_density.size(0)
+        assert len(points) == batch_size
+        assert self.output_size == normed_density.size(2)
+        loss = torch.zeros([1]).to(self.device)
+        ot_obj_values = torch.zeros([1]).to(self.device)
+        wd = 0 # wasserstain distance
+        for idx, im_points in enumerate(points):
+            if len(im_points) > 0:
+                # compute l2 square distance, it should be source target distance. [#gt, #cood * #cood]
+                if self.norm_cood:
+                    im_points = im_points / self.c_size * 2 - 1 # map to [-1, 1]
+                x = im_points[:, 0].unsqueeze_(1)  # [#gt, 1]
+                y = im_points[:, 1].unsqueeze_(1)
+                x_dis = -2 * torch.matmul(x, self.cood) + x * x + self.cood * self.cood # [#gt, #cood]
+                y_dis = -2 * torch.matmul(y, self.cood) + y * y + self.cood * self.cood
+                y_dis.unsqueeze_(2)
+                x_dis.unsqueeze_(1)
+                dis = y_dis + x_dis
+                dis = dis.view((dis.size(0), -1)) # size of [#gt, #cood * #cood]
+                source_prob = normed_density[idx][0].view([-1]).detach()
+                target_prob = (torch.ones([len(im_points)]) / len(im_points)).to(self.device)
+                # use sinkhorn to solve OT, compute optimal beta.
+                P, log = sinkhorn(target_prob, source_prob, dis, self.reg, maxIter=self.num_of_iter_in_ot, log=True)
+                beta = log['beta'] # size is the same as source_prob: [#cood * #cood]
+                ot_obj_values += torch.sum(normed_density[idx] * beta.view([1, self.output_size, self.output_size]))
+                # compute the gradient of OT loss to predicted density (unnormed_density).
+                # im_grad = beta / source_count - < beta, source_density> / (source_count)^2
+                source_density = unnormed_density[idx][0].view([-1]).detach()
+                source_count = source_density.sum()
+                im_grad_1 = (source_count) / (source_count * source_count+1e-8) * beta # size of [#cood * #cood]
+                im_grad_2 = (source_density * beta).sum() / (source_count * source_count + 1e-8) # size of 1
+                im_grad = im_grad_1 - im_grad_2
+                im_grad = im_grad.detach().view([1, self.output_size, self.output_size])
+                # Define loss = <im_grad, predicted density>. The gradient of loss w.r.t prediced density is im_grad.
+                loss += torch.sum(unnormed_density[idx] * im_grad)
+                wd += torch.sum(dis * P).item()
+        return loss, wd, ot_obj_values

losses/ramps.py ADDED Viewed

	@@ -0,0 +1,41 @@

+# Copyright (c) 2018, Curious AI Ltd. All rights reserved.
+#
+# This work is licensed under the Creative Commons Attribution-NonCommercial
+# 4.0 International License. To view a copy of this license, visit
+# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to
+# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
+"""Functions for ramping hyperparameters up or down
+Each function takes the current training step or epoch, and the
+ramp length in the same format, and returns a multiplier between
+0 and 1.
+"""
+import numpy as np
+def sigmoid_rampup(current, rampup_length):
+    """Exponential rampup from https://arxiv.org/abs/1610.02242"""
+    if rampup_length == 0:
+        return 1.0
+    else:
+        current = np.clip(current, 0.0, rampup_length)
+        phase = 1.0 - current / rampup_length
+        return float(np.exp(-5.0 * phase * phase))
+def linear_rampup(current, rampup_length):
+    """Linear rampup"""
+    assert current >= 0 and rampup_length >= 0
+    if current >= rampup_length:
+        return 1.0
+    else:
+        return current / rampup_length
+def cosine_rampdown(current, rampdown_length):
+    """Cosine rampdown from https://arxiv.org/abs/1608.03983"""
+    assert 0 <= current <= rampdown_length
+    return float(.5 * (np.cos(np.pi * current / rampdown_length) + 1))

losses/rank_loss.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class MarginRankLoss(nn.Module):
+    def __init__(self):
+        super(MarginRankLoss, self).__init__()
+        self.loss = 0.0
+    def forward(self, img_list, margin=0):
+        length = len(img_list)
+        self.loss = 0.0
+        B, C, H, W = img_list[0].shape
+        for i in range(length - 1):
+            for j in range(i + 1, length):
+                self.loss = self.loss + torch.sum(F.relu(img_list[j].sum(-1).sum(-1).sum(-1) - img_list[i].sum(-1).sum(-1).sum(-1) + margin))
+        self.loss = self.loss / (B*length*(length-1)/2)
+        return self.loss
+class RankLoss(nn.Module):
+    def __init__(self):
+        super(RankLoss, self).__init__()
+        self.countloss_criterion = nn.MSELoss(reduction='sum')
+        self.rankloss_criterion = MarginRankLoss()
+        self.rankloss = 0.0
+        self.losses = {}
+    def forward(self, unlabeled_results):
+        self.rankloss = 0.0
+        self.losses = {}
+        if unlabeled_results is None:
+            self.rankloss = 0.0
+        elif isinstance(unlabeled_results, tuple) and len(unlabeled_results) > 0:
+            self.rankloss = self.rankloss_criterion(unlabeled_results)
+        elif isinstance(unlabeled_results, list) and len(unlabeled_results) > 0:
+            count = 0
+            for i in range(len(unlabeled_results)):
+                if isinstance(unlabeled_results[i], tuple) and len(unlabeled_results[i]) > 0:
+                    temploss = self.rankloss_criterion(unlabeled_results[i])
+                    self.losses.update({'unlabel_{}_loss'.format(str(i+1)): temploss})
+                    self.rankloss += temploss
+                    count += 1
+            if count > 0:
+                self.rankloss = self.rankloss / count
+        return self.rankloss

network/pvt_cls.py ADDED Viewed

	@@ -0,0 +1,623 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from functools import partial
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+from timm.models.registry import register_model
+from timm.models.vision_transformer import _cfg
+import math
+from torch.distributions.uniform import Uniform
+import numpy as np
+import random
+__all__ = [
+    'pvt_tiny', 'pvt_small', 'pvt_medium', 'pvt_large'
+]
+class SELayer(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(SELayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y.expand_as(x)
+class Regression(nn.Module):
+    def __init__(self):
+        super(Regression, self).__init__()
+        self.v1 = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
+            nn.Conv2d(256, 128, 3, padding=1, dilation=1),
+            nn.BatchNorm2d(128), nn.ReLU(inplace=True))
+        self.v2 = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
+            nn.Conv2d(512, 256, 3, padding=1, dilation=1),
+            nn.BatchNorm2d(256), nn.ReLU(inplace=True))
+        self.v3 = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
+            nn.Conv2d(1024, 512, 3, padding=1, dilation=1), nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True))
+        self.ca2 = nn.Sequential(ChannelAttention(512),
+                    nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1 ),
+                    nn.BatchNorm2d(512), nn.ReLU(inplace=True))
+        self.ca1 = nn.Sequential(ChannelAttention(256),
+                    nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding = 1 ),
+                    nn.BatchNorm2d(256), nn.ReLU(inplace=True))
+        self.ca0 = nn.Sequential(ChannelAttention(128),
+                    nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding = 1 ),
+                    nn.BatchNorm2d(128), nn.ReLU(inplace=True))
+        self.res2 = nn.Sequential(
+            nn.Conv2d(512, 256, 3, padding=1, dilation=1), nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 128, 3, padding=1, dilation=1), nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(128, 1, 3, padding=1, dilation=1),
+            nn.ReLU(inplace=True))
+        self.res1 = nn.Sequential(
+            nn.Conv2d(256, 128, 3, padding=1, dilation=1), nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(128, 64, 3, padding=1, dilation=1), nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 1, 3, padding=1, dilation=1),
+            nn.ReLU(inplace=True))
+        self.res0 = nn.Sequential(
+            nn.Conv2d(128, 64, 3, padding=1, dilation=1), nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 1, 3, padding=1, dilation=1),
+            nn.ReLU(inplace=True))
+        self.noise2 = DropOutDecoder(1, 512, 512)
+        self.noise1 = FeatureDropDecoder(1, 256, 256)
+        self.noise0 = FeatureNoiseDecoder(1, 128, 128)
+        self.upsam2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.upsam4 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)
+        self.conv1 = nn.Conv2d(1024, 512, kernel_size=1, bias=False)
+        self.conv2 = nn.Conv2d(512, 256, kernel_size=1, bias=False)
+        self.conv3 = nn.Conv2d(256, 128, kernel_size=1, bias=False)
+        self.conv4 = nn.Conv2d(128, 1, kernel_size=1, bias=False)
+        #cls2.view(8, 1024, 1, 1))
+        self.init_param()
+    def forward(self, x, cls):
+        x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]
+        cls0 = cls[0].view(cls[0].shape[0], cls[0].shape[1], 1, 1)
+        cls1 = cls[1].view(cls[1].shape[0], cls[1].shape[1], 1, 1)
+        cls2 = cls[2].view(cls[2].shape[0], cls[2].shape[1], 1, 1)
+        x2_1 = self.ca2(x2)+self.v3(x3)
+        x1_1 = self.ca1(x1)+self.v2(x2_1)
+        x0_1 = self.ca0(x0)+self.v1(x1_1)
+        if self.training:
+            yc2 = self.conv4(self.conv3(self.conv2(self.noise2(self.conv1(cls2))))).squeeze()
+            yc1 = self.conv4(self.conv3(self.noise1(self.conv2(cls1)))).squeeze()
+            yc0 = self.conv4(self.noise0(self.conv3(cls0))).squeeze()
+            y2 = self.res2(self.upsam4(self.noise2(x2_1)))
+            y1 = self.res1(self.upsam2(self.noise1(x1_1)))
+            y0 = self.res0(self.noise0(x0_1))
+        else:
+            yc2 = self.conv4(self.conv3(self.conv2(self.conv1(cls2)))).squeeze()
+            yc1 = self.conv4(self.conv3(self.conv2(cls1))).squeeze()
+            yc0 = self.conv4(self.conv3(cls0)).squeeze()
+            y2 = self.res2(self.upsam4(x2_1))
+            y1 = self.res1(self.upsam2(x1_1))
+            y0 = self.res0(x0_1)
+        return [y0, y1, y2], [yc0, yc1, yc2]
+    def init_param(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.normal_(m.weight, std=0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+def upsample(in_channels, out_channels, upscale, kernel_size=3):
+    # A series of x 2 upsamling until we get to the upscale we want
+    layers = []
+    conv1x1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
+    nn.init.kaiming_normal_(conv1x1.weight.data, nonlinearity='relu')
+    layers.append(conv1x1)
+    for i in range(int(math.log(upscale, 2))):
+        layers.append(PixelShuffle(out_channels, scale=2))
+    return nn.Sequential(*layers)
+class FeatureDropDecoder(nn.Module):
+    def __init__(self, upscale, conv_in_ch, num_classes):
+        super(FeatureDropDecoder, self).__init__()
+        self.upsample = upsample(conv_in_ch, num_classes, upscale=upscale)
+    def feature_dropout(self, x):
+        attention = torch.mean(x, dim=1, keepdim=True)
+        max_val, _ = torch.max(attention.view(x.size(0), -1), dim=1, keepdim=True)
+        threshold = max_val * np.random.uniform(0.7, 0.9)
+        threshold = threshold.view(x.size(0), 1, 1, 1).expand_as(attention)
+        drop_mask = (attention < threshold).float()
+        return x.mul(drop_mask)
+    def forward(self, x):
+        x = self.feature_dropout(x)
+        return x
+class FeatureNoiseDecoder(nn.Module):
+    def __init__(self, upscale, conv_in_ch, num_classes, uniform_range=0.3):
+        super(FeatureNoiseDecoder, self).__init__()
+        self.upsample = upsample(conv_in_ch, num_classes, upscale=upscale)
+        self.uni_dist = Uniform(-uniform_range, uniform_range)
+    def feature_based_noise(self, x):
+        noise_vector = self.uni_dist.sample(x.shape[1:]).to(x.device).unsqueeze(0)
+        x_noise = x.mul(noise_vector) + x
+        return x_noise
+    def forward(self, x):
+        x = self.feature_based_noise(x)
+        return x
+class DropOutDecoder(nn.Module):
+    def __init__(self, upscale, conv_in_ch, num_classes, drop_rate=0.3, spatial_dropout=True):
+        super(DropOutDecoder, self).__init__()
+        self.dropout = nn.Dropout2d(p=drop_rate) if spatial_dropout else nn.Dropout(drop_rate)
+        self.upsample = upsample(conv_in_ch, num_classes, upscale=upscale)
+    def forward(self, x):
+        x = self.dropout(x)
+        return x
+## ChannelAttetion
+class ChannelAttention(nn.Module):
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(in_planes,in_planes // ratio, bias = False),
+            nn.ReLU(inplace = True),
+            nn.Linear(in_planes // ratio, in_planes, bias = False)
+        )
+        self.sigmoid = nn.Sigmoid()
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+    def forward(self, in_feature):
+        x = in_feature
+        b, c, _, _ = in_feature.size()
+        avg_out = self.fc(self.avg_pool(x).view(b,c)).view(b, c, 1, 1)
+        out = avg_out
+        return self.sigmoid(out).expand_as(in_feature) * in_feature
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+        if self.sr_ratio > 4:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        self.img_size = img_size
+        self.patch_size = patch_size
+        # assert img_size[0] % patch_size[0] == 0 and img_size[1] % patch_size[1] == 0, \
+        #     f"img_size {img_size} should be divided by patch_size {patch_size}."
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        self.norm = nn.LayerNorm(embed_dim)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        x = self.norm(x)
+        H, W = H // self.patch_size[0], W // self.patch_size[1]
+        return x, (H, W)
+class PyramidVisionTransformer(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], num_stages=4):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        self.num_stages = num_stages
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        for i in range(num_stages):
+            patch_embed = PatchEmbed(img_size=img_size if i == 0 else img_size // (2 ** (i + 1)),
+                                     patch_size=patch_size if i == 0 else 2,
+                                     in_chans=in_chans if i == 0 else embed_dims[i - 1],
+                                     embed_dim=embed_dims[i])
+            num_patches = patch_embed.num_patches if i == 0 else patch_embed.num_patches + 1
+            pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dims[i]))
+            pos_drop = nn.Dropout(p=drop_rate)
+            block = nn.ModuleList([Block(
+                dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j],
+                norm_layer=norm_layer, sr_ratio=sr_ratios[i])
+                for j in range(depths[i])])
+            cur += depths[i]
+            setattr(self, f"patch_embed{i + 1}", patch_embed)
+            setattr(self, f"pos_embed{i + 1}", pos_embed)
+            setattr(self, f"pos_drop{i + 1}", pos_drop)
+            setattr(self, f"block{i + 1}", block)
+        self.norm = norm_layer(embed_dims[3])
+        # cls_token
+        self.cls_token_1 = nn.Parameter(torch.zeros(1, 1, embed_dims[1]))
+        self.cls_token_2 = nn.Parameter(torch.zeros(1, 1, embed_dims[2]))
+        self.cls_token_3 = nn.Parameter(torch.zeros(1, 1, embed_dims[3]))
+        # classification head
+        self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()
+        self.regression = Regression()
+        # init weights
+        for i in range(num_stages):
+            pos_embed = getattr(self, f"pos_embed{i + 1}")
+            trunc_normal_(pos_embed, std=.02)
+        trunc_normal_(self.cls_token_1, std=.02)
+        trunc_normal_(self.cls_token_2, std=.02)
+        trunc_normal_(self.cls_token_3, std=.02)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        # return {'pos_embed', 'cls_token'} # has pos_embed may be better
+        return {'cls_token'}
+    def get_classifier(self):
+        return self.head
+    def reset_classifier(self, num_classes, global_pool=''):
+        self.num_classes = num_classes
+        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+    def _get_pos_embed(self, pos_embed, patch_embed, H, W):
+        if H * W == self.patch_embed1.num_patches:
+            return pos_embed
+        else:
+            return F.interpolate(
+                pos_embed.reshape(1, patch_embed.H, patch_embed.W, -1).permute(0, 3, 1, 2),
+                size=(H, W), mode="bilinear").reshape(1, -1, H * W).permute(0, 2, 1)
+    def forward_features(self, x):
+        B = x.shape[0]
+        outputs = list()
+        cls_output = list()
+        for i in range(self.num_stages):
+            patch_embed = getattr(self, f"patch_embed{i + 1}")
+            pos_embed = getattr(self, f"pos_embed{i + 1}")
+            pos_drop = getattr(self, f"pos_drop{i + 1}")
+            block = getattr(self, f"block{i + 1}")
+            x, (H, W) = patch_embed(x)
+            if i == 0:
+                pos_embed = self._get_pos_embed(pos_embed, patch_embed, H, W)
+            elif i == 1:
+                cls_tokens = self.cls_token_1.expand(B, -1, -1)
+                x = torch.cat((cls_tokens, x), dim=1)
+                pos_embed_ = self._get_pos_embed(pos_embed[:, 1:], patch_embed, H, W)
+                pos_embed = torch.cat((pos_embed[:, 0:1], pos_embed_), dim=1)
+            elif i == 2:
+                cls_tokens = self.cls_token_2.expand(B, -1, -1)
+                x = torch.cat((cls_tokens, x), dim=1)
+                pos_embed_ = self._get_pos_embed(pos_embed[:, 1:], patch_embed, H, W)
+                pos_embed = torch.cat((pos_embed[:, 0:1], pos_embed_), dim=1)
+            elif i == 3:
+                cls_tokens = self.cls_token_3.expand(B, -1, -1)
+                x = torch.cat((cls_tokens, x), dim=1)
+                pos_embed_ = self._get_pos_embed(pos_embed[:, 1:], patch_embed, H, W)
+                pos_embed = torch.cat((pos_embed[:, 0:1], pos_embed_), dim=1)
+            x = pos_drop(x + pos_embed)
+            for blk in block:
+                x = blk(x, H, W)
+            if i == 0:
+                x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+            else:
+                x_cls = x[:,1,:]
+                x = x[:,1:,:].reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+                cls_output.append(x_cls)
+            outputs.append(x)
+        return outputs, cls_output
+    def forward(self, label_x, unlabel_x=None):
+        if self.training:
+            # labeled image processing
+            label_x, l_cls = self.forward_features(label_x)
+            out_label_x, out_cls_l = self.regression(label_x, l_cls)
+            label_x_1, label_x_2, label_x_3 = out_label_x
+            B,C,H,W = label_x_1.size()
+            label_sum = label_x_1.view([B, -1]).sum(1).unsqueeze(1).unsqueeze(2).unsqueeze(3)
+            label_normed = label_x_1 / (label_sum + 1e-6)
+            # unlabeled image processing
+            B,C,H,W = unlabel_x.shape
+            unlabel_x, ul_cls = self.forward_features(unlabel_x)
+            out_unlabel_x, out_cls_ul = self.regression(unlabel_x, ul_cls)
+            y0, y1, y2 = out_unlabel_x
+            unlabel_x_1 = self.generate_feature_patches(y0)
+            unlabel_x_2 = self.generate_feature_patches(y1)
+            unlabel_x_3 = self.generate_feature_patches(y2)
+            assert unlabel_x_1.shape[0] == B * 5
+            assert unlabel_x_2.shape[0] == B * 5
+            assert unlabel_x_3.shape[0] == B * 5
+            unlabel_x_1 = torch.split(unlabel_x_1, split_size_or_sections=B, dim=0)
+            unlabel_x_2 = torch.split(unlabel_x_2, split_size_or_sections=B, dim=0)
+            unlabel_x_3 = torch.split(unlabel_x_3, split_size_or_sections=B, dim=0)
+            return [label_x_1, label_x_2, label_x_3], [unlabel_x_1, unlabel_x_2, unlabel_x_3], label_normed, out_cls_l, out_cls_ul
+        else:
+            label_x, l_cls = self.forward_features(label_x)
+            out_label_x, out_cls_l = self.regression(label_x, l_cls)
+            label_x_1, label_x_2, label_x_3 = out_label_x
+            B,C,H,W = label_x_1.size()
+            label_sum = label_x_1.view([B, -1]).sum(1).unsqueeze(1).unsqueeze(2).unsqueeze(3)
+            label_normed = label_x_1 / (label_sum + 1e-6)
+            return [label_x_1, label_x_2, label_x_3], label_normed
+    def generate_feature_patches(self, unlabel_x, ratio=0.75):
+        # unlabeled image processing
+        unlabel_x_1 = unlabel_x
+        b, c, h, w = unlabel_x.shape
+        center_x = random.randint(h // 2 - (h - h * ratio) // 2, h // 2 + (h - h * ratio) // 2)
+        center_y = random.randint(w // 2 - (w - w * ratio) // 2, w // 2 + (w - w * ratio) // 2)
+        new_h2 = int(h * ratio)
+        new_w2 = int(w * ratio)  # 48*48
+        unlabel_x_2 = unlabel_x[:, :, center_x - new_h2 // 2:center_x + new_h2 // 2,
+                      center_y - new_w2 // 2:center_y + new_w2 // 2]
+        new_h3 = int(new_h2 * ratio)
+        new_w3 = int(new_w2 * ratio)
+        unlabel_x_3 = unlabel_x[:, :, center_x - new_h3 // 2:center_x + new_h3 // 2,
+                      center_y - new_w3 // 2:center_y + new_w3 // 2]
+        new_h4 = int(new_h3 * ratio)
+        new_w4 = int(new_w3 * ratio)
+        unlabel_x_4 = unlabel_x[:, :, center_x - new_h4 // 2:center_x + new_h4 // 2,
+                      center_y - new_w4 // 2:center_y + new_w4 // 2]
+        new_h5 = int(new_h4 * ratio)
+        new_w5 = int(new_w4 * ratio)
+        unlabel_x_5 = unlabel_x[:, :, center_x - new_h5 // 2:center_x + new_h5 // 2,
+                      center_y - new_w5 // 2:center_y + new_w5 // 2]
+        unlabel_x_2 = nn.functional.interpolate(unlabel_x_2, size=(h, w), mode='bilinear')
+        unlabel_x_3 = nn.functional.interpolate(unlabel_x_3, size=(h, w), mode='bilinear')
+        unlabel_x_4 = nn.functional.interpolate(unlabel_x_4, size=(h, w), mode='bilinear')
+        unlabel_x_5 = nn.functional.interpolate(unlabel_x_5, size=(h, w), mode='bilinear')
+        unlabel_x = torch.cat([unlabel_x_1, unlabel_x_2, unlabel_x_3, unlabel_x_4, unlabel_x_5], dim=0)
+        return unlabel_x
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+@register_model
+def pvt_tiny(pretrained=False, **kwargs):
+    model = PyramidVisionTransformer(
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+        **kwargs)
+    model.default_cfg = _cfg()
+    return model
+@register_model
+def pvt_small(pretrained=False, **kwargs):
+    model = PyramidVisionTransformer(
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    model.default_cfg = _cfg()
+    return model
+@register_model
+def pvt_medium(pretrained=False, **kwargs):
+    model = PyramidVisionTransformer(
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
+        **kwargs)
+    model.default_cfg = _cfg()
+    return model
+@register_model
+def pvt_large(pretrained=False, **kwargs):
+    model = PyramidVisionTransformer(
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1],
+        **kwargs)
+    model.default_cfg = _cfg()
+    return model
+@register_model
+def pvt_treeformer(pretrained=False, **kwargs):
+    model = PyramidVisionTransformer(
+        patch_size=4, embed_dims=[128, 256, 512, 1024], num_heads=[4, 8, 16, 32], mlp_ratios=[4, 4, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
+        **kwargs)
+    model.default_cfg = _cfg()
+    return model

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+numpy==1.21.5
+Pillow==9.4.0
+scikit_learn==1.2.2
+scipy==1.7.3
+timm==0.4.12
+torch==1.12.1
+torchvision==0.13.1

sample_imgs/overview.png ADDED Viewed

test.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import argparse
+import torch
+import os
+import numpy as np
+import datasets.crowd as crowd
+from network import pvt_cls as TCN
+import torch.nn.functional as F
+from scipy.io import savemat
+from sklearn.metrics import r2_score
+parser = argparse.ArgumentParser(description='Test ')
+parser.add_argument('--device', default='0', help='assign device')
+parser.add_argument('--batch-size', type=int, default=8, help='train batch size')
+parser.add_argument('--crop-size', type=int, default=256, help='the crop size of the train image')
+parser.add_argument('--model-path', type=str, default='/scratch/users/k2254235/ckpts/SEMI/Treeformer/best_model_mae-21.49_epoch-1759.pth', help='saved model path')
+parser.add_argument('--data-path', type=str, default='/users/k2254235/Lab/TCT/Dataset/London_103050/', help='dataset path')
+parser.add_argument('--dataset', type=str, default='TC')
+def test(args, isSave = True):
+    os.environ['CUDA_VISIBLE_DEVICES'] = args.device  # set vis gpu
+    device = torch.device('cuda')
+    model_path = args.model_path
+    crop_size = args.crop_size
+    data_path = args.data_path
+    dataset = crowd.Crowd_TC(os.path.join(data_path, 'test_data'), crop_size, 1, method='val')
+    dataloader = torch.utils.data.DataLoader(dataset, 1, shuffle=False, num_workers=1, pin_memory=True)
+    model = TCN.pvt_treeformer(pretrained=False)
+    model.to(device)
+    model.load_state_dict(torch.load(model_path, device))
+    model.eval()
+    image_errs = []
+    result = []
+    R2_es = []
+    R2_gt = []
+    l=0;
+    for inputs, count, name, imgauss in dataloader:
+        with torch.no_grad():
+            inputs = inputs.to(device)
+            crop_imgs, crop_masks = [], []
+            b, c, h, w = inputs.size()
+            rh, rw = args.crop_size, args.crop_size
+            for i in range(0, h, rh):
+                gis, gie = max(min(h - rh, i), 0), min(h, i + rh)
+                for j in range(0, w, rw):
+                    gjs, gje = max(min(w - rw, j), 0), min(w, j + rw)
+                    crop_imgs.append(inputs[:, :, gis:gie, gjs:gje])
+                    mask = torch.zeros([b, 1, h, w]).to(device)
+                    mask[:, :, gis:gie, gjs:gje].fill_(1.0)
+                    crop_masks.append(mask)
+            crop_imgs, crop_masks = map(lambda x: torch.cat(x, dim=0), (crop_imgs, crop_masks))
+            crop_preds = []
+            nz, bz = crop_imgs.size(0), args.batch_size
+            for i in range(0, nz, bz):
+                gs, gt = i, min(nz, i + bz)
+                crop_pred, _ = model(crop_imgs[gs:gt])
+                crop_pred = crop_pred[0]
+                _, _, h1, w1 = crop_pred.size()
+                crop_pred = F.interpolate(crop_pred, size=(h1 * 4, w1 * 4), mode='bilinear', align_corners=True) / 16
+                crop_preds.append(crop_pred)
+            crop_preds = torch.cat(crop_preds, dim=0)
+            #import pdb;pdb.set_trace()
+            # splice them to the original size
+            idx = 0
+            pred_map = torch.zeros([b, 1, h, w]).to(device)
+            for i in range(0, h, rh):
+                gis, gie = max(min(h - rh, i), 0), min(h, i + rh)
+                for j in range(0, w, rw):
+                    gjs, gje = max(min(w - rw, j), 0), min(w, j + rw)
+                    pred_map[:, :, gis:gie, gjs:gje] += crop_preds[idx]
+                    idx += 1
+            # for the overlapping area, compute average value
+            mask = crop_masks.sum(dim=0).unsqueeze(0)
+            outputs = pred_map / mask
+            outputs = F.interpolate(outputs, size=(h, w), mode='bilinear', align_corners=True)/4
+            outputs = pred_map / mask
+            img_err = count[0].item() - torch.sum(outputs).item()
+            R2_gt.append(count[0].item())
+            R2_es.append(torch.sum(outputs).item())
+            print("Img name: ", name, "Error: ", img_err, "GT count: ", count[0].item(), "Model out: ", torch.sum(outputs).item())
+            image_errs.append(img_err)
+            result.append([name, count[0].item(), torch.sum(outputs).item(), img_err])
+            savemat('predictions/'+name[0]+'.mat', {'estimation':np.squeeze(outputs.cpu().data.numpy()),
+            'image': np.squeeze(inputs.cpu().data.numpy()), 'gt': np.squeeze(imgauss.cpu().data.numpy())})
+            l=l+1
+    image_errs = np.array(image_errs)
+    mse = np.sqrt(np.mean(np.square(image_errs)))
+    mae = np.mean(np.abs(image_errs))
+    R_2 = r2_score(R2_gt,R2_es)
+    print('{}: mae {}, mse {}, R2 {}\n'.format(model_path, mae, mse,R_2))
+    if isSave:
+        with open("test.txt","w") as f:
+            for i in range(len(result)):
+                f.write(str(result[i]).replace('[','').replace(']','').replace(',', ' ')+"\n")
+            f.close()
+if __name__ == '__main__':
+    args = parser.parse_args()
+    test(args, isSave= True)

train.py ADDED Viewed

	@@ -0,0 +1,369 @@

+import os
+import time
+import torch
+import torch.nn as nn
+from torch import optim
+from torch.utils.data import DataLoader
+from torch.utils.data.dataloader import default_collate
+import numpy as np
+from datetime import datetime
+import torch.nn.functional as F
+from datasets.crowd import Crowd_TC, Crowd_UL_TC
+from network import pvt_cls as TCN
+from losses.multi_con_loss import MultiConLoss
+from utils.pytorch_utils import Save_Handle, AverageMeter
+import utils.log_utils as log_utils
+import argparse
+from losses.rank_loss import RankLoss
+from losses import ramps
+from losses.ot_loss import OT_Loss
+from losses.consistency_loss import *
+parser = argparse.ArgumentParser(description='Train')
+parser.add_argument('--data-dir', default='/users/k2254235/Lab/TCT/Dataset/London_103050/', help='data path')
+parser.add_argument('--dataset', default='TC')
+parser.add_argument('--lr', type=float, default=1e-5, help='the initial learning rate')
+parser.add_argument('--weight-decay', type=float, default=1e-4, help='the weight decay')
+parser.add_argument('--resume', default='', type=str, help='the path of resume training model')
+parser.add_argument('--max-epoch', type=int, default=4000, help='max training epoch')
+parser.add_argument('--val-epoch', type=int, default=1, help='the num of steps to log training information')
+parser.add_argument('--val-start', type=int, default=0, help='the epoch start to val')
+parser.add_argument('--batch-size', type=int, default=16, help='train batch size')
+parser.add_argument('--batch-size-ul', type=int, default=16, help='train batch size')
+parser.add_argument('--device', default='0', help='assign device')
+parser.add_argument('--num-workers', type=int, default=0, help='the num of training process')
+parser.add_argument('--crop-size', type=int, default= 256, help='the crop size of the train image')
+parser.add_argument('--rl', type=float, default=1, help='entropy regularization in sinkhorn')
+parser.add_argument('--reg', type=float, default=1, help='entropy regularization in sinkhorn')
+parser.add_argument('--ot', type=float, default=0.1, help='entropy regularization in sinkhorn')
+parser.add_argument('--tv', type=float, default=0.01, help='entropy regularization in sinkhorn')
+parser.add_argument('--num-of-iter-in-ot', type=int, default=100, help='sinkhorn iterations')
+parser.add_argument('--norm-cood', type=int, default=0, help='whether to norm cood when computing distance')
+parser.add_argument('--run-name', default='Treeformer_test', help='run name for wandb interface/logging')
+parser.add_argument('--consistency', type=int, default=1, help='whether to norm cood when computing distance')
+args = parser.parse_args()
+def train_collate(batch):
+    transposed_batch = list(zip(*batch))
+    images = torch.stack(transposed_batch[0], 0)
+    gauss = torch.stack(transposed_batch[1], 0)
+    points = transposed_batch[2]
+    gt_discretes = torch.stack(transposed_batch[3], 0)
+    return images, gauss, points, gt_discretes
+def train_collate_UL(batch):
+    transposed_batch = list(zip(*batch))
+    images = torch.stack(transposed_batch[0], 0)
+    return images
+def get_current_consistency_weight(epoch):
+    # Consistency ramp-up from https://arxiv.org/abs/1610.02242
+    return args.consistency * ramps.sigmoid_rampup(epoch, args.consistency_ramp)
+class Trainer(object):
+    def __init__(self, args):
+        self.args = args
+    def setup(self):
+        args = self.args
+        sub_dir = (
+            "SEMI/{}_12-1-input-{}_reg-{}_nIter-{}_normCood-{}".format(
+                args.run_name,args.crop_size,args.reg,
+                args.num_of_iter_in_ot,args.norm_cood))
+        self.save_dir = os.path.join("/scratch/users/k2254235","ckpts", sub_dir)
+        if not os.path.exists(self.save_dir):
+            os.makedirs(self.save_dir)
+        time_str = datetime.strftime(datetime.now(), "%m%d-%H%M%S")
+        self.logger = log_utils.get_logger(
+            os.path.join(self.save_dir, "train-{:s}.log".format(time_str)))
+        log_utils.print_config(vars(args), self.logger)
+        if torch.cuda.is_available():
+            self.device = torch.device("cuda")
+            self.device_count = torch.cuda.device_count()
+            self.logger.info("using {} gpus".format(self.device_count))
+        else:
+            raise Exception("gpu is not available")
+        downsample_ratio = 4
+        self.datasets = {"train": Crowd_TC(os.path.join(args.data_dir, "train_data"), args.crop_size,
+                downsample_ratio, "train"), "val": Crowd_TC(os.path.join(args.data_dir, "valid_data"),
+                args.crop_size, downsample_ratio, "val")}
+        self.datasets_ul = { "train_ul": Crowd_UL_TC(os.path.join(args.data_dir, "train_data_ul"),
+                args.crop_size, downsample_ratio, "train_ul")}
+        self.dataloaders = {
+            x: DataLoader(self.datasets[x],
+                collate_fn=(train_collate if x == "train" else default_collate),
+                batch_size=(args.batch_size if x == "train" else 1),
+                shuffle=(True if x == "train" else False),
+                num_workers=args.num_workers * self.device_count,
+                pin_memory=(True if x == "train" else False))
+            for x in ["train", "val"]}
+        self.dataloaders_ul = {
+            x: DataLoader(self.datasets_ul[x],
+                collate_fn=(train_collate_UL ),
+                batch_size=(args.batch_size_ul),
+                shuffle=(True),
+                num_workers=args.num_workers * self.device_count,
+                pin_memory=(True if x == "train" else False))
+            for x in ["train_ul"]}
+        self.model = TCN.pvt_treeformer(pretrained=False)
+        self.model.to(self.device)
+        self.optimizer = optim.AdamW(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
+        self.start_epoch = 0
+        if args.resume:
+            self.logger.info("loading pretrained model from " + args.resume)
+            suf = args.resume.rsplit(".", 1)[-1]
+            if suf == "tar":
+                checkpoint = torch.load(args.resume, self.device)
+                self.model.load_state_dict(checkpoint["model_state_dict"])
+                self.optimizer.load_state_dict(
+                    checkpoint["optimizer_state_dict"])
+                self.start_epoch = checkpoint["epoch"] + 1
+            elif suf == "pth":
+                self.model.load_state_dict(
+                    torch.load(args.resume, self.device))
+        else:
+            self.logger.info("random initialization")
+        self.ot_loss = OT_Loss(args.crop_size, downsample_ratio, args.norm_cood,
+              self.device, args.num_of_iter_in_ot, args.reg)
+        self.tvloss = nn.L1Loss(reduction="none").to(self.device)
+        self.mse = nn.MSELoss().to(self.device)
+        self.mae = nn.L1Loss().to(self.device)
+        self.save_list = Save_Handle(max_num=1)
+        self.best_mae = np.inf
+        self.best_mse = np.inf
+        self.rankloss = RankLoss().to(self.device)
+        self.kl_distance = nn.KLDivLoss(reduction='none')
+        self.multiconloss = MultiConLoss().to(self.device)
+    def train(self):
+        """training process"""
+        args = self.args
+        for epoch in range(self.start_epoch, args.max_epoch + 1):
+            self.logger.info("-" * 5 + "Epoch {}/{}".format(epoch, args.max_epoch) + "-" * 5)
+            self.epoch = epoch
+            self.train_epoch()
+            if epoch % args.val_epoch == 0 and epoch >= args.val_start:
+                self.val_epoch()
+    def train_epoch(self):
+        epoch_ot_loss = AverageMeter()
+        epoch_ot_obj_value = AverageMeter()
+        epoch_wd = AverageMeter()
+        epoch_tv_loss = AverageMeter()
+        epoch_count_loss = AverageMeter()
+        epoch_count_consistency_l = AverageMeter()
+        epoch_count_consistency_ul = AverageMeter()
+        epoch_loss = AverageMeter()
+        epoch_mae = AverageMeter()
+        epoch_mse = AverageMeter()
+        epoch_start = time.time()
+        epoch_rank_loss = AverageMeter()
+        epoch_consistensy_loss = AverageMeter()
+        self.model.train()  # Set model to training mode
+        for step, (inputs, gausss, points, gt_discrete) in enumerate(self.dataloaders["train"]):
+            inputs = inputs.to(self.device)
+            gausss = gausss.to(self.device)
+            gd_count = np.array([len(p) for p in points], dtype=np.float32)
+            points = [p.to(self.device) for p in points]
+            gt_discrete = gt_discrete.to(self.device)
+            N = inputs.size(0)
+            for st, unlabel_data in enumerate(self.dataloaders_ul["train_ul"]):
+                inputs_ul = unlabel_data.to(self.device)
+                break
+            with torch.set_grad_enabled(True):
+                outputs_L, outputs_UL, outputs_normed, CLS_L, CLS_UL = self.model(inputs, inputs_ul)
+                outputs_L = outputs_L[0]
+                with torch.set_grad_enabled(False):
+                    preds_UL = (outputs_UL[0][0] + outputs_UL[1][0] + outputs_UL[2][0])/3
+                # Compute counting loss.
+                count_loss = self.mae(outputs_L.sum(1).sum(1).sum(1),torch.from_numpy(gd_count).float().to(self.device))*self.args.reg
+                # Compute OT loss.
+                ot_loss, wd, ot_obj_value = self.ot_loss(outputs_normed, outputs_L, points)
+                ot_loss = ot_loss* self.args.ot
+                ot_obj_value = ot_obj_value* self.args.ot
+                gd_count_tensor = (torch.from_numpy(gd_count).float().to(self.device).unsqueeze(1).unsqueeze(2).unsqueeze(3))
+                gt_discrete_normed = gt_discrete / (gd_count_tensor + 1e-6)
+                tv_loss = (self.tvloss(outputs_normed, gt_discrete_normed).sum(1).sum(1).sum(1)*
+                    torch.from_numpy(gd_count).float().to(self.device)).mean(0) * self.args.tv
+                epoch_ot_loss.update(ot_loss.item(), N)
+                epoch_ot_obj_value.update(ot_obj_value.item(), N)
+                epoch_wd.update(wd, N)
+                epoch_count_loss.update(count_loss.item(), N)
+                epoch_tv_loss.update(tv_loss.item(), N)
+                # Compute ranking loss.
+                rank_loss = self.rankloss(outputs_UL)*self.args.rl
+                epoch_rank_loss.update(rank_loss.item(), N)
+                # Compute multi level consistancy loss
+                consistency_loss = args.consistency * self.multiconloss(outputs_UL)
+                epoch_consistensy_loss.update(consistency_loss.item(), N)
+                # Compute consistency count
+                Con_cls_UL = (CLS_UL[0] + CLS_UL[1] + CLS_UL[2])/3
+                Con_cls_L = torch.from_numpy(gd_count).float().to(self.device)
+                count_loss_l = self.mae(torch.stack((CLS_L[0],CLS_L[1],CLS_L[2])), torch.stack((Con_cls_L, Con_cls_L, Con_cls_L)))
+                count_loss_ul = self.mae(torch.stack((CLS_UL[0],CLS_UL[1],CLS_UL[2])), torch.stack((Con_cls_UL, Con_cls_UL, Con_cls_UL)))
+                epoch_count_consistency_l.update(count_loss_l.item(), N)
+                epoch_count_consistency_ul.update(count_loss_ul.item(), N)
+                loss = count_loss + ot_loss + tv_loss + rank_loss + count_loss_l + count_loss_ul + consistency_loss
+                self.optimizer.zero_grad()
+                loss.backward()
+                self.optimizer.step()
+                pred_count = (torch.sum(outputs_L.view(N, -1),
+                              dim=1).detach().cpu().numpy())
+                pred_err = pred_count - gd_count
+                epoch_loss.update(loss.item(), N)
+                epoch_mse.update(np.mean(pred_err * pred_err), N)
+                epoch_mae.update(np.mean(abs(pred_err)), N)
+        self.logger.info(
+            "Epoch {} Train, Loss: {:.2f}, Count Loss: {:.2f}, OT Loss: {:.2e}, TV Loss: {:.2e}, Rank Loss: {:.2f},"
+                "Consistensy Loss: {:.2f},  MSE: {:.2f}, MAE: {:.2f},LC Loss: {:.2f}, ULC Loss: {:.2f}, Cost {:.1f} sec".format(
+                self.epoch, epoch_loss.get_avg(), epoch_count_loss.get_avg(), epoch_ot_loss.get_avg(), epoch_tv_loss.get_avg(), epoch_rank_loss.get_avg(),
+                epoch_consistensy_loss.get_avg(), np.sqrt(epoch_mse.get_avg()), epoch_mae.get_avg(), epoch_count_consistency_l.get_avg(),
+                epoch_count_consistency_ul.get_avg(), time.time() - epoch_start))
+        model_state_dic = self.model.state_dict()
+        save_path = os.path.join(self.save_dir, "{}_ckpt.tar".format(self.epoch))
+        torch.save({"epoch": self.epoch, "optimizer_state_dict": self.optimizer.state_dict(),
+                "model_state_dict": model_state_dic}, save_path)
+        self.save_list.append(save_path)
+    def val_epoch(self):
+        args = self.args
+        epoch_start = time.time()
+        self.model.eval()  # Set model to evaluate mode
+        epoch_res = []
+        for inputs, count, name, gauss_im in self.dataloaders["val"]:
+            with torch.no_grad():
+                inputs = inputs.to(self.device)
+                crop_imgs, crop_masks = [], []
+                b, c, h, w = inputs.size()
+                rh, rw = args.crop_size, args.crop_size
+                for i in range(0, h, rh):
+                    gis, gie = max(min(h - rh, i), 0), min(h, i + rh)
+                    for j in range(0, w, rw):
+                        gjs, gje = max(min(w - rw, j), 0), min(w, j + rw)
+                        crop_imgs.append(inputs[:, :, gis:gie, gjs:gje])
+                        mask = torch.zeros([b, 1, h, w]).to(self.device)
+                        mask[:, :, gis:gie, gjs:gje].fill_(1.0)
+                        crop_masks.append(mask)
+                crop_imgs, crop_masks = map(
+                    lambda x: torch.cat(x, dim=0), (crop_imgs, crop_masks))
+                crop_preds = []
+                nz, bz = crop_imgs.size(0), args.batch_size
+                for i in range(0, nz, bz):
+                    gs, gt = i, min(nz, i + bz)
+                    crop_pred, _ = self.model(crop_imgs[gs:gt])
+                    crop_pred = crop_pred[0]
+                    _, _, h1, w1 = crop_pred.size()
+                    crop_pred = (F.interpolate(crop_pred, size=(h1 * 4, w1 * 4),
+                            mode="bilinear", align_corners=True) / 16 )
+                    crop_preds.append(crop_pred)
+                crop_preds = torch.cat(crop_preds, dim=0)
+                # splice them to the original size
+                idx = 0
+                pred_map = torch.zeros([b, 1, h, w]).to(self.device)
+                for i in range(0, h, rh):
+                    gis, gie = max(min(h - rh, i), 0), min(h, i + rh)
+                    for j in range(0, w, rw):
+                        gjs, gje = max(min(w - rw, j), 0), min(w, j + rw)
+                        pred_map[:, :, gis:gie, gjs:gje] += crop_preds[idx]
+                        idx += 1
+                # for the overlapping area, compute average value
+                mask = crop_masks.sum(dim=0).unsqueeze(0)
+                outputs = pred_map / mask
+                res = count[0].item() - torch.sum(outputs).item()
+                epoch_res.append(res)
+        epoch_res = np.array(epoch_res)
+        mse = np.sqrt(np.mean(np.square(epoch_res)))
+        mae = np.mean(np.abs(epoch_res))
+        self.logger.info("Epoch {} Val, MSE: {:.2f}, MAE: {:.2f}, Cost {:.1f} sec".format(
+                self.epoch, mse, mae, time.time() - epoch_start ))
+        model_state_dic = self.model.state_dict()
+        print("Comaprison", mae,  self.best_mae)
+        if mae < self.best_mae:
+            self.best_mse = mse
+            self.best_mae = mae
+            self.logger.info(
+                "save best mse {:.2f} mae {:.2f} model epoch {}".format(
+                    self.best_mse, self.best_mae, self.epoch))
+            print("Saving best model at {} epoch".format(self.epoch))
+            model_path = os.path.join(
+                self.save_dir, "best_model_mae-{:.2f}_epoch-{}.pth".format(
+                    self.best_mae, self.epoch))
+            torch.save(model_state_dic, model_path)
+if __name__ == "__main__":
+    import torch
+    torch.backends.cudnn.benchmark = True
+    trainer = Trainer(args)
+    trainer.setup()
+    trainer.train()

utils/__init__.py ADDED Viewed

File without changes

utils/log_utils.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import logging
+def get_logger(log_file):
+    logger = logging.getLogger(log_file)
+    logger.setLevel(logging.DEBUG)
+    fh = logging.FileHandler(log_file)
+    fh.setLevel(logging.DEBUG)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+    ch.setFormatter(formatter)
+    fh.setFormatter(formatter)
+    logger.addHandler(ch)
+    logger.addHandler(fh)
+    return logger
+def print_config(config, logger):
+    """
+    Print configuration of the model
+    """
+    for k, v in config.items():
+        logger.info("{}:\t{}".format(k.ljust(15), v))

utils/pytorch_utils.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+def adjust_learning_rate(optimizer, epoch, initial_lr=0.001, decay_epoch=10):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = max(initial_lr * (0.1 ** (epoch // decay_epoch)), 1e-6)
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+class Save_Handle(object):
+    """handle the number of """
+    def __init__(self, max_num):
+        self.save_list = []
+        self.max_num = max_num
+    def append(self, save_path):
+        if len(self.save_list) < self.max_num:
+            self.save_list.append(save_path)
+        else:
+            remove_path = self.save_list[0]
+            del self.save_list[0]
+            self.save_list.append(save_path)
+            if os.path.exists(remove_path):
+                os.remove(remove_path)
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = 1.0 * self.sum / self.count
+    def get_avg(self):
+        return self.avg
+    def get_count(self):
+        return self.count
+def set_trainable(model, requires_grad):
+    for param in model.parameters():
+        param.requires_grad = requires_grad
+def get_num_params(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)