"""Miscellaneous utilities.""" import cv2 import random import matplotlib import matplotlib.pyplot as plt import meshcat import meshcat.geometry as g import meshcat.transformations as mtf import PIL import yaml import numpy as np from transforms3d import euler import pybullet as p import kornia from omegaconf import OmegaConf import os import torch import torchvision # ----------------------------------------------------------------------------- # HEIGHTMAP UTILS # ----------------------------------------------------------------------------- def get_heightmap(points, colors, bounds, pixel_size): """Get top-down (z-axis) orthographic heightmap image from 3D pointcloud. Args: points: HxWx3 float array of 3D points in world coordinates. colors: HxWx3 uint8 array of values in range 0-255 aligned with points. bounds: 3x2 float array of values (rows: X,Y,Z; columns: min,max) defining region in 3D space to generate heightmap in world coordinates. pixel_size: float defining size of each pixel in meters. Returns: heightmap: HxW float array of height (from lower z-bound) in meters. colormap: HxWx3 uint8 array of backprojected color aligned with heightmap. """ width = int(np.round((bounds[0, 1] - bounds[0, 0]) / pixel_size)) height = int(np.round((bounds[1, 1] - bounds[1, 0]) / pixel_size)) heightmap = np.zeros((height, width), dtype=np.float32) colormap = np.zeros((height, width, colors.shape[-1]), dtype=np.uint8) # Filter out 3D points that are outside of the predefined bounds. ix = (points[Ellipsis, 0] >= bounds[0, 0]) & (points[Ellipsis, 0] < bounds[0, 1]) iy = (points[Ellipsis, 1] >= bounds[1, 0]) & (points[Ellipsis, 1] < bounds[1, 1]) iz = (points[Ellipsis, 2] >= bounds[2, 0]) & (points[Ellipsis, 2] < bounds[2, 1]) valid = ix & iy & iz points = points[valid] colors = colors[valid] # Sort 3D points by z-value, which works with array assignment to simulate # z-buffering for rendering the heightmap image. iz = np.argsort(points[:, -1]) points, colors = points[iz], colors[iz] px = np.int32(np.floor((points[:, 0] - bounds[0, 0]) / pixel_size)) py = np.int32(np.floor((points[:, 1] - bounds[1, 0]) / pixel_size)) px = np.clip(px, 0, width - 1) py = np.clip(py, 0, height - 1) heightmap[py, px] = points[:, 2] - bounds[2, 0] for c in range(colors.shape[-1]): colormap[py, px, c] = colors[:, c] return heightmap, colormap def get_pointcloud(depth, intrinsics): """Get 3D pointcloud from perspective depth image. Args: depth: HxW float array of perspective depth in meters. intrinsics: 3x3 float array of camera intrinsics matrix. Returns: points: HxWx3 float array of 3D points in camera coordinates. """ height, width = depth.shape xlin = np.linspace(0, width - 1, width) ylin = np.linspace(0, height - 1, height) px, py = np.meshgrid(xlin, ylin) px = (px - intrinsics[0, 2]) * (depth / intrinsics[0, 0]) py = (py - intrinsics[1, 2]) * (depth / intrinsics[1, 1]) points = np.float32([px, py, depth]).transpose(1, 2, 0) return points def transform_pointcloud(points, transform): """Apply rigid transformation to 3D pointcloud. Args: points: HxWx3 float array of 3D points in camera coordinates. transform: 4x4 float array representing a rigid transformation matrix. Returns: points: HxWx3 float array of transformed 3D points. """ padding = ((0, 0), (0, 0), (0, 1)) homogen_points = np.pad(points.copy(), padding, 'constant', constant_values=1) for i in range(3): points[Ellipsis, i] = np.sum(transform[i, :] * homogen_points, axis=-1) return points def reconstruct_heightmaps(color, depth, configs, bounds, pixel_size): """Reconstruct top-down heightmap views from multiple 3D pointclouds.""" heightmaps, colormaps = [], [] for color, depth, config in zip(color, depth, configs): intrinsics = np.array(config['intrinsics']).reshape(3, 3) xyz = get_pointcloud(depth, intrinsics) position = np.array(config['position']).reshape(3, 1) rotation = p.getMatrixFromQuaternion(config['rotation']) rotation = np.array(rotation).reshape(3, 3) transform = np.eye(4) transform[:3, :] = np.hstack((rotation, position)) xyz = transform_pointcloud(xyz, transform) heightmap, colormap = get_heightmap(xyz, color, bounds, pixel_size) heightmaps.append(heightmap) colormaps.append(colormap) return heightmaps, colormaps def pix_to_xyz(pixel, height, bounds, pixel_size, skip_height=False): """Convert from pixel location on heightmap to 3D position.""" u, v = pixel x = bounds[0, 0] + v * pixel_size y = bounds[1, 0] + u * pixel_size if not skip_height: z = bounds[2, 0] + height[u, v] else: z = 0.0 return (x, y, z) def xyz_to_pix(position, bounds, pixel_size): """Convert from 3D position to pixel location on heightmap.""" u = int(np.round((position[1] - bounds[1, 0]) / pixel_size)) v = int(np.round((position[0] - bounds[0, 0]) / pixel_size)) return (u, v) def unproject_vectorized(uv_coordinates, depth_values, intrinsic, distortion): """Vectorized version of unproject(), for N points. Args: uv_coordinates: pixel coordinates to unproject of shape (n, 2). depth_values: depth values corresponding index-wise to the uv_coordinates of shape (n). intrinsic: array of shape (3, 3). This is typically the return value of intrinsics_to_matrix. distortion: camera distortion parameters of shape (5,). Returns: xyz coordinates in camera frame of shape (n, 3). """ cam_mtx = intrinsic # shape [3, 3] cam_dist = np.array(distortion) # shape [5] # shape of points_undistorted is [N, 2] after the squeeze(). points_undistorted = cv2.undistortPoints( uv_coordinates.reshape((-1, 1, 2)), cam_mtx, cam_dist).squeeze() x = points_undistorted[:, 0] * depth_values y = points_undistorted[:, 1] * depth_values xyz = np.vstack((x, y, depth_values)).T return xyz def unproject_depth_vectorized(im_depth, depth_dist, camera_mtx, camera_dist): """Unproject depth image into 3D point cloud, using calibration. Args: im_depth: raw depth image, pre-calibration of shape (height, width). depth_dist: depth distortion parameters of shape (8,) camera_mtx: intrinsics matrix of shape (3, 3). This is typically the return value of intrinsics_to_matrix. camera_dist: camera distortion parameters shape (5,). Returns: numpy array of shape [3, H*W]. each column is xyz coordinates """ h, w = im_depth.shape # shape of each u_map, v_map is [H, W]. u_map, v_map = np.meshgrid(np.linspace( 0, w - 1, w), np.linspace(0, h - 1, h)) adjusted_depth = depth_dist[0] + im_depth * depth_dist[1] # shape after stack is [N, 2], where N = H * W. uv_coordinates = np.stack((u_map.reshape(-1), v_map.reshape(-1)), axis=-1) return unproject_vectorized(uv_coordinates, adjusted_depth.reshape(-1), camera_mtx, camera_dist) # ----------------------------------------------------------------------------- # MATH UTILS # ----------------------------------------------------------------------------- def sample_distribution(prob, n_samples=1): """Sample data point from a custom distribution.""" flat_prob = prob.flatten() / np.sum(prob) rand_ind = np.random.choice( np.arange(len(flat_prob)), n_samples, p=flat_prob, replace=False) rand_ind_coords = np.array(np.unravel_index(rand_ind, prob.shape)).T return np.int32(rand_ind_coords.squeeze()) # ------------------------------------------------------------------------- # Transformation Helper Functions # ------------------------------------------------------------------------- def invert(pose): return p.invertTransform(pose[0], pose[1]) def multiply(pose0, pose1): return p.multiplyTransforms(pose0[0], pose0[1], pose1[0], pose1[1]) def apply(pose, position): position = np.float32(position) position_shape = position.shape position = np.float32(position).reshape(3, -1) rotation = np.float32(p.getMatrixFromQuaternion(pose[1])).reshape(3, 3) translation = np.float32(pose[0]).reshape(3, 1) position = rotation @ position + translation return tuple(position.reshape(position_shape)) def eulerXYZ_to_quatXYZW(rotation): # pylint: disable=invalid-name """Abstraction for converting from a 3-parameter rotation to quaterion. This will help us easily switch which rotation parameterization we use. Quaternion should be in xyzw order for pybullet. Args: rotation: a 3-parameter rotation, in xyz order tuple of 3 floats Returns: quaternion, in xyzw order, tuple of 4 floats """ euler_zxy = (rotation[2], rotation[0], rotation[1]) quaternion_wxyz = euler.euler2quat(*euler_zxy, axes='szxy') q = quaternion_wxyz quaternion_xyzw = (q[1], q[2], q[3], q[0]) return quaternion_xyzw def quatXYZW_to_eulerXYZ(quaternion_xyzw): # pylint: disable=invalid-name """Abstraction for converting from quaternion to a 3-parameter toation. This will help us easily switch which rotation parameterization we use. Quaternion should be in xyzw order for pybullet. Args: quaternion_xyzw: in xyzw order, tuple of 4 floats Returns: rotation: a 3-parameter rotation, in xyz order, tuple of 3 floats """ q = quaternion_xyzw quaternion_wxyz = np.array([q[3], q[0], q[1], q[2]]) euler_zxy = euler.quat2euler(quaternion_wxyz, axes='szxy') euler_xyz = (euler_zxy[1], euler_zxy[2], euler_zxy[0]) return euler_xyz def apply_transform(transform_to_from, points_from): r"""Transforms points (3D) into new frame. Using transform_to_from notation. Args: transform_to_from: numpy.ndarray of shape [B,4,4], SE3 points_from: numpy.ndarray of shape [B,3,N] Returns: points_to: numpy.ndarray of shape [B,3,N] """ num_points = points_from.shape[-1] # non-batched if len(transform_to_from.shape) == 2: ones = np.ones((1, num_points)) # makes these each into homogenous vectors points_from = np.vstack((points_from, ones)) # [4,N] points_to = transform_to_from @ points_from # [4,N] return points_to[0:3, :] # [3,N] # batched else: assert len(transform_to_from.shape) == 3 batch_size = transform_to_from.shape[0] zeros = np.ones((batch_size, 1, num_points)) points_from = np.concatenate((points_from, zeros), axis=1) assert points_from.shape[1] == 4 points_to = transform_to_from @ points_from return points_to[:, 0:3, :] # ----------------------------------------------------------------------------- # IMAGE UTILS # ----------------------------------------------------------------------------- def preprocess(img, dist='transporter'): """Pre-process input (subtract mean, divide by std).""" transporter_color_mean = [0.18877631, 0.18877631, 0.18877631] transporter_color_std = [0.07276466, 0.07276466, 0.07276466] transporter_depth_mean = 0.00509261 transporter_depth_std = 0.00903967 franka_color_mean = [0.622291933, 0.628313992, 0.623031488] franka_color_std = [0.168154213, 0.17626014, 0.184527364] franka_depth_mean = 0.872146842 franka_depth_std = 0.195743116 clip_color_mean = [0.48145466, 0.4578275, 0.40821073] clip_color_std = [0.26862954, 0.26130258, 0.27577711] # choose distribution if dist == 'clip': color_mean = clip_color_mean color_std = clip_color_std elif dist == 'mdetr': color_mean = [0.485, 0.456, 0.406] color_std = [0.229, 0.224, 0.225] elif dist == 'franka': color_mean = franka_color_mean color_std = franka_color_std else: color_mean = transporter_color_mean color_std = transporter_color_std if dist == 'franka': depth_mean = franka_depth_mean depth_std = franka_depth_std else: depth_mean = transporter_depth_mean depth_std = transporter_depth_std # convert to pytorch tensor (if required) if type(img) == torch.Tensor: def cast_shape(stat, img): tensor = torch.from_numpy(np.array(stat)).to(device=img.device, dtype=img.dtype) tensor = tensor.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) tensor = tensor.repeat(img.shape[0], 1, img.shape[-2], img.shape[-1]) return tensor color_mean = cast_shape(color_mean, img) color_std = cast_shape(color_std, img) depth_mean = cast_shape(depth_mean, img) depth_std = cast_shape(depth_std, img) # normalize img = img.clone() img[:, :3, :, :] = ((img[:, :3, :, :] / 255 - color_mean) / color_std) img[:, 3:, :, :] = ((img[:, 3:, :, :] - depth_mean) / depth_std) else: # normalize img[:, :, :3] = (img[:, :, :3] / 255 - color_mean) / color_std img[:, :, 3:] = (img[:, :, 3:] - depth_mean) / depth_std # if dist == 'franka' or dist == 'transporter': # print(np.mean(img[:,:3,:,:].detach().cpu().numpy(), axis=(0,2,3)), # np.mean(img[:,3,:,:].detach().cpu().numpy())) return img def deprocess(img): color_mean = 0.18877631 depth_mean = 0.00509261 color_std = 0.07276466 depth_std = 0.00903967 img[:, :, :3] = np.uint8(((img[:, :, :3] * color_std) + color_mean) * 255) img[:, :, 3:] = np.uint8(((img[:, :, 3:] * depth_std) + depth_mean) * 255) return img def get_fused_heightmap(obs, configs, bounds, pix_size): """Reconstruct orthographic heightmaps with segmentation masks.""" heightmaps, colormaps = reconstruct_heightmaps( obs['color'], obs['depth'], configs, bounds, pix_size) colormaps = np.float32(colormaps) heightmaps = np.float32(heightmaps) # Fuse maps from different views. valid = np.sum(colormaps, axis=3) > 0 repeat = np.sum(valid, axis=0) repeat[repeat == 0] = 1 cmap = np.sum(colormaps, axis=0) / repeat[Ellipsis, None] cmap = np.uint8(np.round(cmap)) hmap = np.max(heightmaps, axis=0) # Max to handle occlusions. return cmap, hmap def get_image_transform(theta, trans, pivot=(0, 0)): """Compute composite 2D rigid transformation matrix.""" # Get 2D rigid transformation matrix that rotates an image by theta (in # radians) around pivot (in pixels) and translates by trans vector (in # pixels) pivot_t_image = np.array([[1., 0., -pivot[0]], [0., 1., -pivot[1]], [0., 0., 1.]]) image_t_pivot = np.array([[1., 0., pivot[0]], [0., 1., pivot[1]], [0., 0., 1.]]) transform = np.array([[np.cos(theta), -np.sin(theta), trans[0]], [np.sin(theta), np.cos(theta), trans[1]], [0., 0., 1.]]) return np.dot(image_t_pivot, np.dot(transform, pivot_t_image)) def check_transform(image, pixel, transform): """Valid transform only if pixel locations are still in FoV after transform.""" new_pixel = np.flip( np.int32( np.round( np.dot(transform, np.float32([pixel[1], pixel[0], 1.]).reshape(3, 1))))[:2].squeeze()) valid = np.all( new_pixel >= 0 ) and new_pixel[0] < image.shape[0] and new_pixel[1] < image.shape[1] return valid, new_pixel def get_se3_from_image_transform(theta, trans, pivot, heightmap, bounds, pixel_size): """Calculate SE3 from image transform.""" position_center = pix_to_xyz( np.flip(np.int32(np.round(pivot))), heightmap, bounds, pixel_size, skip_height=False) new_position_center = pix_to_xyz( np.flip(np.int32(np.round(pivot + trans))), heightmap, bounds, pixel_size, skip_height=True) # Don't look up the z height, it might get augmented out of frame new_position_center = (new_position_center[0], new_position_center[1], position_center[2]) delta_position = np.array(new_position_center) - np.array(position_center) t_world_center = np.eye(4) t_world_center[0:3, 3] = np.array(position_center) t_centernew_center = np.eye(4) euler_zxy = (-theta, 0, 0) t_centernew_center[0:3, 0:3] = euler.euler2mat( *euler_zxy, axes='szxy')[0:3, 0:3] t_centernew_center_tonly = np.eye(4) t_centernew_center_tonly[0:3, 3] = -delta_position t_centernew_center = t_centernew_center @ t_centernew_center_tonly t_world_centernew = t_world_center @ np.linalg.inv(t_centernew_center) return t_world_center, t_world_centernew def get_random_image_transform_params(image_size, theta_sigma=60): theta = np.random.normal(0, np.deg2rad(theta_sigma)) trans_sigma = np.min(image_size) / 6 trans = np.random.normal(0, trans_sigma, size=2) # [x, y] pivot = (image_size[1] / 2, image_size[0] / 2) return theta, trans, pivot def q_mult(q1, q2): w1, x1, y1, z1 = q1 w2, x2, y2, z2 = q2 w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2 z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2 return (w, x, y, z) def perturb(input_image, pixels, theta_sigma=60, add_noise=False): """Data augmentation on images.""" image_size = input_image.shape[:2] # Compute random rigid transform. while True: theta, trans, pivot = get_random_image_transform_params(image_size, theta_sigma=theta_sigma) transform = get_image_transform(theta, trans, pivot) transform_params = theta, trans, pivot # Ensure pixels remain in the image after transform. is_valid = True new_pixels = [] new_rounded_pixels = [] for pixel in pixels: pixel = np.float32([pixel[1], pixel[0], 1.]).reshape(3, 1) rounded_pixel = np.int32(np.round(transform @ pixel))[:2].squeeze() rounded_pixel = np.flip(rounded_pixel) pixel = (transform @ pixel)[:2].squeeze() pixel = np.flip(pixel) in_fov_rounded = rounded_pixel[0] < image_size[0] and rounded_pixel[ 1] < image_size[1] in_fov = pixel[0] < image_size[0] and pixel[1] < image_size[1] is_valid = is_valid and np.all(rounded_pixel >= 0) and np.all( pixel >= 0) and in_fov_rounded and in_fov new_pixels.append(pixel) new_rounded_pixels.append(rounded_pixel) if is_valid: break # Apply rigid transform to image and pixel labels. input_image = cv2.warpAffine( input_image, transform[:2, :], (image_size[1], image_size[0]), flags=cv2.INTER_LINEAR) # Apply noise color = np.int32(input_image[:,:,:3]) depth = np.float32(input_image[:,:,3:]) if add_noise: color += np.int32(np.random.normal(0, 3, image_size + (3,))) color = np.uint8(np.clip(color, 0, 255)) depth += np.float32(np.random.normal(0, 0.003, image_size + (3,))) input_image = np.concatenate((color, depth), axis=2) # length of 5 transform_params = np.array([theta, trans[0], trans[1], pivot[0], pivot[1]]) return input_image, new_pixels, new_rounded_pixels, transform_params def apply_perturbation(input_image, transform_params): '''Apply data augmentation with specific transform params''' image_size = input_image.shape[:2] # Apply rigid transform to image and pixel labels. theta, trans, pivot = transform_params[0], transform_params[1:3], transform_params[3:5] transform = get_image_transform(theta, trans, pivot) input_image = cv2.warpAffine( input_image, transform[:2, :], (image_size[1], image_size[0]), flags=cv2.INTER_LINEAR) return input_image class ImageRotator: """Rotate for n rotations.""" # Reference: https://kornia.readthedocs.io/en/latest/tutorials/warp_affine.html?highlight=rotate def __init__(self, n_rotations): self.angles = [] for i in range(n_rotations): theta = i * 2 * 180 / n_rotations self.angles.append(theta) def __call__(self, x_list, pivot, reverse=False): rot_x_list = [] for i, angle in enumerate(self.angles): x = x_list[i]# .unsqueeze(0) # create transformation (rotation) size = len(x) alpha = angle if not reverse else (-1.0 * angle) # in degrees angle = torch.ones(size) * alpha # define the rotation center if type(pivot) is not torch.Tensor: center = torch.FloatTensor(pivot)[...,[1,0]] center = center.view(1,-1).repeat((size,1)) else: center = pivot[...,[1,0]].view(1,-1).clone().to(angle.device) # center: torch.tensor = torch.ones(size, 2) # center[..., 0] = int(pivot[1]) # center[..., 1] = int(pivot[0]) # define the scale factor scale = torch.ones(size, 2) # # compute the transformation matrix M = kornia.geometry.get_rotation_matrix2d(center, angle, scale) # x_warped = torchvision.transforms.functional.affine(x.float(), scale=1., # center=[int(pivot[1]),int(pivot[0])], # angle=alpha, translate=[0,0], shear=0, # interpolation= torchvision.transforms.InterpolationMode.BILINEAR) # apply the transformation to original image # M = M.repeat(len(x), 1, 1) _, _, h, w = x.shape x_warped = kornia.geometry.transform.warp_affine(x.float(), M.to(x.device), dsize=(h, w)) x_warped = x_warped rot_x_list.append(x_warped) return rot_x_list # KD Tree Utils # Construct K-D Tree to roughly estimate how many objects can fit inside the box. class TreeNode: def __init__(self, parent, children, bbox): self.parent = parent self.children = children self.bbox = bbox # min x, min y, min z, max x, max y, max z def KDTree(node, min_object_dim, margin, bboxes): size = node.bbox[3:] - node.bbox[:3] # Choose which axis to split. split = size > 2 * min_object_dim if np.sum(split) == 0: bboxes.append(node.bbox) return split = np.float32(split) / np.sum(split) split_axis = np.random.choice(range(len(split)), 1, p=split)[0] # Split along chosen axis and create 2 children cut_ind = np.random.rand() * \ (size[split_axis] - 2 * min_object_dim) + \ node.bbox[split_axis] + min_object_dim child1_bbox = node.bbox.copy() child1_bbox[3 + split_axis] = cut_ind - margin / 2. child2_bbox = node.bbox.copy() child2_bbox[split_axis] = cut_ind + margin / 2. node.children = [ TreeNode(node, [], bbox=child1_bbox), TreeNode(node, [], bbox=child2_bbox) ] KDTree(node.children[0], min_object_dim, margin, bboxes) KDTree(node.children[1], min_object_dim, margin, bboxes) # ----------------------------------------------------------------------------- # Shape Name UTILS # ----------------------------------------------------------------------------- google_seen_obj_shapes = { 'train': [ 'alarm clock', 'android toy', 'black boot with leopard print', 'black fedora', 'black razer mouse', 'black sandal', 'black shoe with orange stripes', 'bull figure', 'butterfinger chocolate', 'c clamp', 'can opener', 'crayon box', 'dog statue', 'frypan', 'green and white striped towel', 'grey soccer shoe with cleats', 'hard drive', 'honey dipper', 'magnifying glass', 'mario figure', 'nintendo 3ds', 'nintendo cartridge', 'office depot box', 'orca plush toy', 'pepsi gold caffeine free box', 'pepsi wild cherry box', 'porcelain cup', 'purple tape', 'red and white flashlight', 'rhino figure', 'rocket racoon figure', 'scissors', 'silver tape', 'spatula with purple head', 'spiderman figure', 'tablet', 'toy school bus', ], 'val': [ 'ball puzzle', 'black and blue sneakers', 'black shoe with green stripes', 'brown fedora', 'dinosaur figure', 'hammer', 'light brown boot with golden laces', 'lion figure', 'pepsi max box', 'pepsi next box', 'porcelain salad plate', 'porcelain spoon', 'red and white striped towel', 'red cup', 'screwdriver', 'toy train', 'unicorn toy', 'white razer mouse', 'yoshi figure' ], 'test': [ 'ball puzzle', 'black and blue sneakers', 'black shoe with green stripes', 'brown fedora', 'dinosaur figure', 'hammer', 'light brown boot with golden laces', 'lion figure', 'pepsi max box', 'pepsi next box', 'porcelain salad plate', 'porcelain spoon', 'red and white striped towel', 'red cup', 'screwdriver', 'toy train', 'unicorn toy', 'white razer mouse', 'yoshi figure' ], } google_unseen_obj_shapes = { 'train': [ 'alarm clock', 'android toy', 'black boot with leopard print', 'black fedora', 'black razer mouse', 'black sandal', 'black shoe with orange stripes', 'bull figure', 'butterfinger chocolate', 'c clamp', 'can opener', 'crayon box', 'dog statue', 'frypan', 'green and white striped towel', 'grey soccer shoe with cleats', 'hard drive', 'honey dipper', 'magnifying glass', 'mario figure', 'nintendo 3ds', 'nintendo cartridge', 'office depot box', 'orca plush toy', 'pepsi gold caffeine free box', 'pepsi wild cherry box', 'porcelain cup', 'purple tape', 'red and white flashlight', 'rhino figure', 'rocket racoon figure', 'scissors', 'silver tape', 'spatula with purple head', 'spiderman figure', 'tablet', 'toy school bus', ], 'val': [ 'ball puzzle', 'black and blue sneakers', 'black shoe with green stripes', 'brown fedora', 'dinosaur figure', 'hammer', 'light brown boot with golden laces', 'lion figure', 'pepsi max box', 'pepsi next box', 'porcelain salad plate', 'porcelain spoon', 'red and white striped towel', 'red cup', 'screwdriver', 'toy train', 'unicorn toy', 'white razer mouse', 'yoshi figure' ], 'test': [ 'ball puzzle', 'black and blue sneakers', 'black shoe with green stripes', 'brown fedora', 'dinosaur figure', 'hammer', 'light brown boot with golden laces', 'lion figure', 'pepsi max box', 'pepsi next box', 'porcelain salad plate', 'porcelain spoon', 'red and white striped towel', 'red cup', 'screwdriver', 'toy train', 'unicorn toy', 'white razer mouse', 'yoshi figure' ], } google_all_shapes = { 'train': [ 'alarm clock', 'android toy', 'ball puzzle', 'black and blue sneakers', 'black boot with leopard print', 'black fedora', 'black razer mouse', 'black sandal', 'black shoe with green stripes', 'black shoe with orange stripes', 'brown fedora', 'bull figure', 'butterfinger chocolate', 'c clamp', 'can opener', 'crayon box', 'dinosaur figure', 'dog statue', 'frypan', 'green and white striped towel', 'grey soccer shoe with cleats', 'hammer', 'hard drive', 'honey dipper', 'light brown boot with golden laces', 'lion figure', 'magnifying glass', 'mario figure', 'nintendo 3ds', 'nintendo cartridge', 'office depot box', 'orca plush toy', 'pepsi gold caffeine free box', 'pepsi max box', 'pepsi next box', 'pepsi wild cherry box', 'porcelain cup', 'porcelain salad plate', 'porcelain spoon', 'purple tape', 'red and white flashlight', 'red and white striped towel', 'red cup', 'rhino figure', 'rocket racoon figure', 'scissors', 'screwdriver', 'silver tape', 'spatula with purple head', 'spiderman figure', 'tablet', 'toy school bus', 'toy train', 'unicorn toy', 'white razer mouse', 'yoshi figure', ], 'val': [ 'alarm clock', 'android toy', 'ball puzzle', 'black and blue sneakers', 'black boot with leopard print', 'black fedora', 'black razer mouse', 'black sandal', 'black shoe with green stripes', 'black shoe with orange stripes', 'brown fedora', 'bull figure', 'butterfinger chocolate', 'c clamp', 'can opener', 'crayon box', 'dinosaur figure', 'dog statue', 'frypan', 'green and white striped towel', 'grey soccer shoe with cleats', 'hammer', 'hard drive', 'honey dipper', 'light brown boot with golden laces', 'lion figure', 'magnifying glass', 'mario figure', 'nintendo 3ds', 'nintendo cartridge', 'office depot box', 'orca plush toy', 'pepsi gold caffeine free box', 'pepsi max box', 'pepsi next box', 'pepsi wild cherry box', 'porcelain cup', 'porcelain salad plate', 'porcelain spoon', 'purple tape', 'red and white flashlight', 'red and white striped towel', 'red cup', 'rhino figure', 'rocket racoon figure', 'scissors', 'screwdriver', 'silver tape', 'spatula with purple head', 'spiderman figure', 'tablet', 'toy school bus', 'toy train', 'unicorn toy', 'white razer mouse', 'yoshi figure', ], 'test': [ 'alarm clock', 'android toy', 'ball puzzle', 'black and blue sneakers', 'black boot with leopard print', 'black fedora', 'black razer mouse', 'black sandal', 'black shoe with green stripes', 'black shoe with orange stripes', 'brown fedora', 'bull figure', 'butterfinger chocolate', 'c clamp', 'can opener', 'crayon box', 'dinosaur figure', 'dog statue', 'frypan', 'green and white striped towel', 'grey soccer shoe with cleats', 'hammer', 'hard drive', 'honey dipper', 'light brown boot with golden laces', 'lion figure', 'magnifying glass', 'mario figure', 'nintendo 3ds', 'nintendo cartridge', 'office depot box', 'orca plush toy', 'pepsi gold caffeine free box', 'pepsi max box', 'pepsi next box', 'pepsi wild cherry box', 'porcelain cup', 'porcelain salad plate', 'porcelain spoon', 'purple tape', 'red and white flashlight', 'red and white striped towel', 'red cup', 'rhino figure', 'rocket racoon figure', 'scissors', 'screwdriver', 'silver tape', 'spatula with purple head', 'spiderman figure', 'tablet', 'toy school bus', 'toy train', 'unicorn toy', 'white razer mouse', 'yoshi figure', ], } assembling_kit_shapes = { 0: "letter R shape", 1: "letter A shape", 2: "triangle", 3: "square", 4: "plus", 5: "letter T shape", 6: "diamond", 7: "pentagon", 8: "rectangle", 9: "flower", 10: "star", 11: "circle", 12: "letter G shape", 13: "letter V shape", 14: "letter E shape", 15: "letter L shape", 16: "ring", 17: "hexagon", 18: "heart", 19: "letter M shape", } # ----------------------------------------------------------------------------- # COLOR AND PLOT UTILS # ----------------------------------------------------------------------------- # Colors (Tableau palette). COLORS = { 'blue': [78.0 / 255.0, 121.0 / 255.0, 167.0 / 255.0], 'red': [255.0 / 255.0, 087.0 / 255.0, 089.0 / 255.0], 'green': [089.0 / 255.0, 169.0 / 255.0, 079.0 / 255.0], 'orange': [242.0 / 255.0, 142.0 / 255.0, 043.0 / 255.0], 'yellow': [237.0 / 255.0, 201.0 / 255.0, 072.0 / 255.0], 'purple': [176.0 / 255.0, 122.0 / 255.0, 161.0 / 255.0], 'pink': [255.0 / 255.0, 157.0 / 255.0, 167.0 / 255.0], 'cyan': [118.0 / 255.0, 183.0 / 255.0, 178.0 / 255.0], 'brown': [156.0 / 255.0, 117.0 / 255.0, 095.0 / 255.0], 'white': [255.0 / 255.0, 255.0 / 255.0, 255.0 / 255.0], 'gray': [186.0 / 255.0, 176.0 / 255.0, 172.0 / 255.0], 'indigo': [75.0 / 255.0, 0.0 / 255.0, 130.0 / 255.0], 'violet': [143.0 / 255.0, 0.0 / 255.0, 255.0 / 255.0], 'black': [0.0 / 255.0, 0.0 / 255.0, 0.0 / 255.0], 'silver': [192.0 / 255.0, 192.0 / 255.0, 192.0 / 255.0], 'gold': [255.0 / 255.0, 215.0 / 255.0, 0.0 / 255.0], } COLORS_NAMES = list(COLORS.keys()) TRAIN_COLORS = ['blue', 'red', 'green', 'yellow', 'brown', 'gray', 'cyan'] EVAL_COLORS = ['blue', 'red', 'green', 'orange', 'purple', 'pink', 'white'] def get_colors(mode, n_colors=-1, **kwargs): all_color_names = get_colors_names(mode) if n_colors == -1: all_color_names = all_color_names else: all_color_names = random.sample(all_color_names, n_colors) return [COLORS[cn] for cn in all_color_names], all_color_names def get_colors_names(mode): if mode == 'train': return TRAIN_COLORS elif mode == 'full': return TRAIN_COLORS else: return TRAIN_COLORS def get_random_color(): return get_colors(mode='train', n_colors=1) def solve_hanoi_all(n_disks): # Solve Hanoi sequence with dynamic programming. hanoi_steps = [] # [[object index, from rod, to rod], ...] def solve_hanoi(n, t0, t1, t2): if n == 0: hanoi_steps.append([n, t0, t1]) return solve_hanoi(n - 1, t0, t2, t1) hanoi_steps.append([n, t0, t1]) solve_hanoi(n - 1, t2, t1, t0) solve_hanoi(n_disks - 1, 0, 2, 1) return hanoi_steps def plot(fname, # pylint: disable=dangerous-default-value title, ylabel, xlabel, data, xlim=[-np.inf, 0], xticks=None, ylim=[np.inf, -np.inf], show_std=True): """Plot frame data.""" # Data is a dictionary that maps experiment names to tuples with 3 # elements: x (size N array) and y (size N array) and y_std (size N array) # Get data limits. for name, (x, y, _) in data.items(): del name y = np.array(y) xlim[0] = max(xlim[0], np.min(x)) xlim[1] = max(xlim[1], np.max(x)) ylim[0] = min(ylim[0], np.min(y)) ylim[1] = max(ylim[1], np.max(y)) # Draw background. plt.title(title, fontsize=14) plt.ylim(ylim) plt.ylabel(ylabel, fontsize=14) plt.yticks(fontsize=14) plt.xlim(xlim) plt.xlabel(xlabel, fontsize=14) plt.grid(True, linestyle='-', color=[0.8, 0.8, 0.8]) ax = plt.gca() for axis in ['top', 'bottom', 'left', 'right']: ax.spines[axis].set_color('#000000') plt.rcParams.update({'font.size': 14}) plt.rcParams['mathtext.default'] = 'regular' matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['ps.fonttype'] = 42 # Draw data. color_iter = 0 for name, (x, y, std) in data.items(): del name x, y, std = np.float32(x), np.float32(y), np.float32(std) upper = np.clip(y + std, ylim[0], ylim[1]) lower = np.clip(y - std, ylim[0], ylim[1]) color = COLORS[list(COLORS.keys())[color_iter]] if show_std: plt.fill_between(x, upper, lower, color=color, linewidth=0, alpha=0.3) plt.plot(x, y, color=color, linewidth=2, marker='o', alpha=1.) color_iter += 1 if xticks: plt.xticks(ticks=range(len(xticks)), labels=xticks, fontsize=14) else: plt.xticks(fontsize=14) plt.legend([name for name, _ in data.items()], loc='lower right', fontsize=14) plt.tight_layout() plt.savefig(fname) plt.clf() # ----------------------------------------------------------------------------- # MESHCAT UTILS # ----------------------------------------------------------------------------- def create_visualizer(clear=True): print('Waiting for meshcat server... have you started a server?') vis = meshcat.Visualizer(zmq_url='tcp://127.0.0.1:6000') if clear: vis.delete() return vis def make_frame(vis, name, h, radius, o=1.0): """Add a red-green-blue triad to the Meschat visualizer. Args: vis (MeshCat Visualizer): the visualizer name (string): name for this frame (should be unique) h (float): height of frame visualization radius (float): radius of frame visualization o (float): opacity """ vis[name]['x'].set_object( g.Cylinder(height=h, radius=radius), g.MeshLambertMaterial(color=0xff0000, reflectivity=0.8, opacity=o)) rotate_x = mtf.rotation_matrix(np.pi / 2.0, [0, 0, 1]) rotate_x[0, 3] = h / 2 vis[name]['x'].set_transform(rotate_x) vis[name]['y'].set_object( g.Cylinder(height=h, radius=radius), g.MeshLambertMaterial(color=0x00ff00, reflectivity=0.8, opacity=o)) rotate_y = mtf.rotation_matrix(np.pi / 2.0, [0, 1, 0]) rotate_y[1, 3] = h / 2 vis[name]['y'].set_transform(rotate_y) vis[name]['z'].set_object( g.Cylinder(height=h, radius=radius), g.MeshLambertMaterial(color=0x0000ff, reflectivity=0.8, opacity=o)) rotate_z = mtf.rotation_matrix(np.pi / 2.0, [1, 0, 0]) rotate_z[2, 3] = h / 2 vis[name]['z'].set_transform(rotate_z) def meshcat_visualize(vis, obs, act, info): """Visualize data using meshcat.""" for key in sorted(info.keys()): pose = info[key] pick_transform = np.eye(4) pick_transform[0:3, 3] = pose[0] quaternion_wxyz = np.asarray( [pose[1][3], pose[1][0], pose[1][1], pose[1][2]]) pick_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3] label = 'obj_' + str(key) make_frame(vis, label, h=0.05, radius=0.0012, o=1.0) vis[label].set_transform(pick_transform) for cam_index in range(len(act['camera_config'])): verts = unproject_depth_vectorized( obs['depth'][cam_index], np.array([0, 1]), np.array(act['camera_config'][cam_index]['intrinsics']).reshape(3, 3), np.zeros(5)) # switch from [N,3] to [3,N] verts = verts.T cam_transform = np.eye(4) cam_transform[0:3, 3] = act['camera_config'][cam_index]['position'] quaternion_xyzw = act['camera_config'][cam_index]['rotation'] quaternion_wxyz = np.asarray([ quaternion_xyzw[3], quaternion_xyzw[0], quaternion_xyzw[1], quaternion_xyzw[2] ]) cam_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3] verts = apply_transform(cam_transform, verts) colors = obs['color'][cam_index].reshape(-1, 3).T / 255.0 vis['pointclouds/' + str(cam_index)].set_object( g.PointCloud(position=verts, color=colors)) # ----------------------------------------------------------------------------- # CONFIG UTILS # ----------------------------------------------------------------------------- def set_seed(seed, torch=False): random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) if torch: import torch torch.manual_seed(seed) def load_cfg(yaml_path): with open(yaml_path, 'r') as f: data = yaml.safe_load(f) return data def load_hydra_config(config_path): return OmegaConf.load(config_path)