from __future__ import division import torch import numpy as np from PIL import Image import torchvision.transforms.functional as F import random import cv2 class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, sample): for t in self.transforms: sample = t(sample) return sample class ToTensor(object): """Convert numpy array to torch tensor""" def __init__(self, no_normalize=False): self.no_normalize = no_normalize def __call__(self, sample): left = np.transpose(sample['left'], (2, 0, 1)) # [3, H, W] if self.no_normalize: sample['left'] = torch.from_numpy(left) else: sample['left'] = torch.from_numpy(left) / 255. right = np.transpose(sample['right'], (2, 0, 1)) if self.no_normalize: sample['right'] = torch.from_numpy(right) else: sample['right'] = torch.from_numpy(right) / 255. # disp = np.expand_dims(sample['disp'], axis=0) # [1, H, W] if 'disp' in sample.keys(): disp = sample['disp'] # [H, W] sample['disp'] = torch.from_numpy(disp) return sample class Normalize(object): """Normalize image, with type tensor""" def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, sample): norm_keys = ['left', 'right'] for key in norm_keys: # Images have converted to tensor, with shape [C, H, W] for t, m, s in zip(sample[key], self.mean, self.std): t.sub_(m).div_(s) return sample class RandomCrop(object): def __init__(self, img_height, img_width): self.img_height = img_height self.img_width = img_width def __call__(self, sample): ori_height, ori_width = sample['left'].shape[:2] # pad zero when crop size is larger than original image size if self.img_height > ori_height or self.img_width > ori_width: # can be used for only pad one side top_pad = max(self.img_height - ori_height, 0) right_pad = max(self.img_width - ori_width, 0) # try edge padding sample['left'] = np.lib.pad(sample['left'], ((top_pad, 0), (0, right_pad), (0, 0)), mode='edge') sample['right'] = np.lib.pad(sample['right'], ((top_pad, 0), (0, right_pad), (0, 0)), mode='edge') if 'disp' in sample.keys(): sample['disp'] = np.lib.pad(sample['disp'], ((top_pad, 0), (0, right_pad)), mode='constant', constant_values=0) # update image resolution ori_height, ori_width = sample['left'].shape[:2] assert self.img_height <= ori_height and self.img_width <= ori_width # Training: random crop self.offset_x = np.random.randint(ori_width - self.img_width + 1) start_height = 0 assert ori_height - start_height >= self.img_height self.offset_y = np.random.randint(start_height, ori_height - self.img_height + 1) sample['left'] = self.crop_img(sample['left']) sample['right'] = self.crop_img(sample['right']) if 'disp' in sample.keys(): sample['disp'] = self.crop_img(sample['disp']) return sample def crop_img(self, img): return img[self.offset_y:self.offset_y + self.img_height, self.offset_x:self.offset_x + self.img_width] class RandomVerticalFlip(object): """Randomly vertically filps""" def __call__(self, sample): if np.random.random() < 0.5: sample['left'] = np.copy(np.flipud(sample['left'])) sample['right'] = np.copy(np.flipud(sample['right'])) sample['disp'] = np.copy(np.flipud(sample['disp'])) return sample class ToPILImage(object): def __call__(self, sample): sample['left'] = Image.fromarray(sample['left'].astype('uint8')) sample['right'] = Image.fromarray(sample['right'].astype('uint8')) return sample class ToNumpyArray(object): def __call__(self, sample): sample['left'] = np.array(sample['left']).astype(np.float32) sample['right'] = np.array(sample['right']).astype(np.float32) return sample # Random coloring class RandomContrast(object): """Random contrast""" def __init__(self, asymmetric_color_aug=True, ): self.asymmetric_color_aug = asymmetric_color_aug def __call__(self, sample): if np.random.random() < 0.5: contrast_factor = np.random.uniform(0.8, 1.2) sample['left'] = F.adjust_contrast(sample['left'], contrast_factor) if self.asymmetric_color_aug and np.random.random() < 0.5: contrast_factor = np.random.uniform(0.8, 1.2) sample['right'] = F.adjust_contrast(sample['right'], contrast_factor) return sample class RandomGamma(object): def __init__(self, asymmetric_color_aug=True, ): self.asymmetric_color_aug = asymmetric_color_aug def __call__(self, sample): if np.random.random() < 0.5: gamma = np.random.uniform(0.7, 1.5) # adopted from FlowNet sample['left'] = F.adjust_gamma(sample['left'], gamma) if self.asymmetric_color_aug and np.random.random() < 0.5: gamma = np.random.uniform(0.7, 1.5) # adopted from FlowNet sample['right'] = F.adjust_gamma(sample['right'], gamma) return sample class RandomBrightness(object): def __init__(self, asymmetric_color_aug=True, ): self.asymmetric_color_aug = asymmetric_color_aug def __call__(self, sample): if np.random.random() < 0.5: brightness = np.random.uniform(0.5, 2.0) sample['left'] = F.adjust_brightness(sample['left'], brightness) if self.asymmetric_color_aug and np.random.random() < 0.5: brightness = np.random.uniform(0.5, 2.0) sample['right'] = F.adjust_brightness(sample['right'], brightness) return sample class RandomHue(object): def __init__(self, asymmetric_color_aug=True, ): self.asymmetric_color_aug = asymmetric_color_aug def __call__(self, sample): if np.random.random() < 0.5: hue = np.random.uniform(-0.1, 0.1) sample['left'] = F.adjust_hue(sample['left'], hue) if self.asymmetric_color_aug and np.random.random() < 0.5: hue = np.random.uniform(-0.1, 0.1) sample['right'] = F.adjust_hue(sample['right'], hue) return sample class RandomSaturation(object): def __init__(self, asymmetric_color_aug=True, ): self.asymmetric_color_aug = asymmetric_color_aug def __call__(self, sample): if np.random.random() < 0.5: saturation = np.random.uniform(0.8, 1.2) sample['left'] = F.adjust_saturation(sample['left'], saturation) if self.asymmetric_color_aug and np.random.random() < 0.5: saturation = np.random.uniform(0.8, 1.2) sample['right'] = F.adjust_saturation(sample['right'], saturation) return sample class RandomColor(object): def __init__(self, asymmetric_color_aug=True, ): self.asymmetric_color_aug = asymmetric_color_aug def __call__(self, sample): transforms = [RandomContrast(asymmetric_color_aug=self.asymmetric_color_aug), RandomGamma(asymmetric_color_aug=self.asymmetric_color_aug), RandomBrightness(asymmetric_color_aug=self.asymmetric_color_aug), RandomHue(asymmetric_color_aug=self.asymmetric_color_aug), RandomSaturation(asymmetric_color_aug=self.asymmetric_color_aug)] sample = ToPILImage()(sample) if np.random.random() < 0.5: # A single transform t = random.choice(transforms) sample = t(sample) else: # Combination of transforms # Random order random.shuffle(transforms) for t in transforms: sample = t(sample) sample = ToNumpyArray()(sample) return sample class RandomScale(object): def __init__(self, min_scale=-0.4, max_scale=0.4, crop_width=512, nearest_interp=False, # for sparse gt ): self.min_scale = min_scale self.max_scale = max_scale self.crop_width = crop_width self.nearest_interp = nearest_interp def __call__(self, sample): if np.random.rand() < 0.5: h, w = sample['disp'].shape scale_x = 2 ** np.random.uniform(self.min_scale, self.max_scale) scale_x = np.clip(scale_x, self.crop_width / float(w), None) # only random scale x axis sample['left'] = cv2.resize(sample['left'], None, fx=scale_x, fy=1., interpolation=cv2.INTER_LINEAR) sample['right'] = cv2.resize(sample['right'], None, fx=scale_x, fy=1., interpolation=cv2.INTER_LINEAR) sample['disp'] = cv2.resize( sample['disp'], None, fx=scale_x, fy=1., interpolation=cv2.INTER_LINEAR if not self.nearest_interp else cv2.INTER_NEAREST ) * scale_x if 'pseudo_disp' in sample and sample['pseudo_disp'] is not None: sample['pseudo_disp'] = cv2.resize(sample['pseudo_disp'], None, fx=scale_x, fy=1., interpolation=cv2.INTER_LINEAR) * scale_x return sample class Resize(object): def __init__(self, scale_x=1, scale_y=1, nearest_interp=True, # for sparse gt ): """ Resize low-resolution data to high-res for mixed dataset training """ self.scale_x = scale_x self.scale_y = scale_y self.nearest_interp = nearest_interp def __call__(self, sample): scale_x = self.scale_x scale_y = self.scale_y sample['left'] = cv2.resize(sample['left'], None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) sample['right'] = cv2.resize(sample['right'], None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) sample['disp'] = cv2.resize( sample['disp'], None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR if not self.nearest_interp else cv2.INTER_NEAREST ) * scale_x return sample class RandomGrayscale(object): def __init__(self, p=0.2): self.p = p def __call__(self, sample): if np.random.random() < self.p: sample = ToPILImage()(sample) # only supported in higher version pytorch # default output channels is 1 sample['left'] = F.rgb_to_grayscale(sample['left'], num_output_channels=3) sample['right'] = F.rgb_to_grayscale(sample['right'], num_output_channels=3) sample = ToNumpyArray()(sample) return sample class RandomRotateShiftRight(object): def __init__(self, p=0.5): self.p = p def __call__(self, sample): if np.random.random() < self.p: angle, pixel = 0.1, 2 px = np.random.uniform(-pixel, pixel) ag = np.random.uniform(-angle, angle) right_img = sample['right'] image_center = ( np.random.uniform(0, right_img.shape[0]), np.random.uniform(0, right_img.shape[1]) ) rot_mat = cv2.getRotationMatrix2D(image_center, ag, 1.0) right_img = cv2.warpAffine( right_img, rot_mat, right_img.shape[1::-1], flags=cv2.INTER_LINEAR ) trans_mat = np.float32([[1, 0, 0], [0, 1, px]]) right_img = cv2.warpAffine( right_img, trans_mat, right_img.shape[1::-1], flags=cv2.INTER_LINEAR ) sample['right'] = right_img return sample class RandomOcclusion(object): def __init__(self, p=0.5, occlusion_mask_zero=False): self.p = p self.occlusion_mask_zero = occlusion_mask_zero def __call__(self, sample): bounds = [50, 100] if np.random.random() < self.p: img2 = sample['right'] ht, wd = img2.shape[:2] if self.occlusion_mask_zero: mean_color = 0 else: mean_color = np.mean(img2.reshape(-1, 3), axis=0) x0 = np.random.randint(0, wd) y0 = np.random.randint(0, ht) dx = np.random.randint(bounds[0], bounds[1]) dy = np.random.randint(bounds[0], bounds[1]) img2[y0:y0 + dy, x0:x0 + dx, :] = mean_color sample['right'] = img2 return sample