|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from functools import partial |
|
from typing import Optional, Tuple |
|
|
|
import numpy as np |
|
import torch |
|
|
|
from .image_util import get_tv_resample_method, resize_max_res |
|
|
|
|
|
def inter_distances(tensors: torch.Tensor): |
|
""" |
|
To calculate the distance between each two depth maps. |
|
""" |
|
distances = [] |
|
for i, j in torch.combinations(torch.arange(tensors.shape[0])): |
|
arr1 = tensors[i : i + 1] |
|
arr2 = tensors[j : j + 1] |
|
distances.append(arr1 - arr2) |
|
dist = torch.concatenate(distances, dim=0) |
|
return dist |
|
|
|
|
|
def ensemble_depth( |
|
depth: torch.Tensor, |
|
scale_invariant: bool = True, |
|
shift_invariant: bool = True, |
|
output_uncertainty: bool = False, |
|
reduction: str = "median", |
|
regularizer_strength: float = 0.02, |
|
max_iter: int = 2, |
|
tol: float = 1e-3, |
|
max_res: int = 1024, |
|
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: |
|
""" |
|
Ensembles depth maps represented by the `depth` tensor with expected shape `(B, 1, H, W)`, where B is the |
|
number of ensemble members for a given prediction of size `(H x W)`. Even though the function is designed for |
|
depth maps, it can also be used with disparity maps as long as the input tensor values are non-negative. The |
|
alignment happens when the predictions have one or more degrees of freedom, that is when they are either |
|
affine-invariant (`scale_invariant=True` and `shift_invariant=True`), or just scale-invariant (only |
|
`scale_invariant=True`). For absolute predictions (`scale_invariant=False` and `shift_invariant=False`) |
|
alignment is skipped and only ensembling is performed. |
|
|
|
Args: |
|
depth (`torch.Tensor`): |
|
Input ensemble depth maps. |
|
scale_invariant (`bool`, *optional*, defaults to `True`): |
|
Whether to treat predictions as scale-invariant. |
|
shift_invariant (`bool`, *optional*, defaults to `True`): |
|
Whether to treat predictions as shift-invariant. |
|
output_uncertainty (`bool`, *optional*, defaults to `False`): |
|
Whether to output uncertainty map. |
|
reduction (`str`, *optional*, defaults to `"median"`): |
|
Reduction method used to ensemble aligned predictions. The accepted values are: `"mean"` and |
|
`"median"`. |
|
regularizer_strength (`float`, *optional*, defaults to `0.02`): |
|
Strength of the regularizer that pulls the aligned predictions to the unit range from 0 to 1. |
|
max_iter (`int`, *optional*, defaults to `2`): |
|
Maximum number of the alignment solver steps. Refer to `scipy.optimize.minimize` function, `options` |
|
argument. |
|
tol (`float`, *optional*, defaults to `1e-3`): |
|
Alignment solver tolerance. The solver stops when the tolerance is reached. |
|
max_res (`int`, *optional*, defaults to `1024`): |
|
Resolution at which the alignment is performed; `None` matches the `processing_resolution`. |
|
Returns: |
|
A tensor of aligned and ensembled depth maps and optionally a tensor of uncertainties of the same shape: |
|
`(1, 1, H, W)`. |
|
""" |
|
if depth.dim() != 4 or depth.shape[1] != 1: |
|
raise ValueError(f"Expecting 4D tensor of shape [B,1,H,W]; got {depth.shape}.") |
|
if reduction not in ("mean", "median"): |
|
raise ValueError(f"Unrecognized reduction method: {reduction}.") |
|
if not scale_invariant and shift_invariant: |
|
raise ValueError("Pure shift-invariant ensembling is not supported.") |
|
|
|
def init_param(depth: torch.Tensor): |
|
init_min = depth.reshape(ensemble_size, -1).min(dim=1).values |
|
init_max = depth.reshape(ensemble_size, -1).max(dim=1).values |
|
|
|
if scale_invariant and shift_invariant: |
|
init_s = 1.0 / (init_max - init_min).clamp(min=1e-6) |
|
init_t = -init_s * init_min |
|
param = torch.cat((init_s, init_t)).cpu().numpy() |
|
elif scale_invariant: |
|
init_s = 1.0 / init_max.clamp(min=1e-6) |
|
param = init_s.cpu().numpy() |
|
else: |
|
raise ValueError("Unrecognized alignment.") |
|
|
|
return param |
|
|
|
def align(depth: torch.Tensor, param: np.ndarray) -> torch.Tensor: |
|
if scale_invariant and shift_invariant: |
|
s, t = np.split(param, 2) |
|
s = torch.from_numpy(s).to(depth).view(ensemble_size, 1, 1, 1) |
|
t = torch.from_numpy(t).to(depth).view(ensemble_size, 1, 1, 1) |
|
out = depth * s + t |
|
elif scale_invariant: |
|
s = torch.from_numpy(param).to(depth).view(ensemble_size, 1, 1, 1) |
|
out = depth * s |
|
else: |
|
raise ValueError("Unrecognized alignment.") |
|
return out |
|
|
|
def ensemble( |
|
depth_aligned: torch.Tensor, return_uncertainty: bool = False |
|
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: |
|
uncertainty = None |
|
if reduction == "mean": |
|
prediction = torch.mean(depth_aligned, dim=0, keepdim=True) |
|
if return_uncertainty: |
|
uncertainty = torch.std(depth_aligned, dim=0, keepdim=True) |
|
elif reduction == "median": |
|
prediction = torch.median(depth_aligned, dim=0, keepdim=True).values |
|
if return_uncertainty: |
|
uncertainty = torch.median( |
|
torch.abs(depth_aligned - prediction), dim=0, keepdim=True |
|
).values |
|
else: |
|
raise ValueError(f"Unrecognized reduction method: {reduction}.") |
|
return prediction, uncertainty |
|
|
|
def cost_fn(param: np.ndarray, depth: torch.Tensor) -> float: |
|
cost = 0.0 |
|
depth_aligned = align(depth, param) |
|
|
|
for i, j in torch.combinations(torch.arange(ensemble_size)): |
|
diff = depth_aligned[i] - depth_aligned[j] |
|
cost += (diff**2).mean().sqrt().item() |
|
|
|
if regularizer_strength > 0: |
|
prediction, _ = ensemble(depth_aligned, return_uncertainty=False) |
|
err_near = (0.0 - prediction.min()).abs().item() |
|
err_far = (1.0 - prediction.max()).abs().item() |
|
cost += (err_near + err_far) * regularizer_strength |
|
|
|
return cost |
|
|
|
def compute_param(depth: torch.Tensor): |
|
import scipy |
|
|
|
depth_to_align = depth.to(torch.float32) |
|
if max_res is not None and max(depth_to_align.shape[2:]) > max_res: |
|
depth_to_align = resize_max_res( |
|
depth_to_align, max_res, get_tv_resample_method("nearest-exact") |
|
) |
|
|
|
param = init_param(depth_to_align) |
|
|
|
res = scipy.optimize.minimize( |
|
partial(cost_fn, depth=depth_to_align), |
|
param, |
|
method="BFGS", |
|
tol=tol, |
|
options={"maxiter": max_iter, "disp": False}, |
|
) |
|
|
|
return res.x |
|
|
|
requires_aligning = scale_invariant or shift_invariant |
|
ensemble_size = depth.shape[0] |
|
|
|
if requires_aligning: |
|
param = compute_param(depth) |
|
depth = align(depth, param) |
|
|
|
depth, uncertainty = ensemble(depth, return_uncertainty=output_uncertainty) |
|
|
|
depth_max = depth.max() |
|
if scale_invariant and shift_invariant: |
|
depth_min = depth.min() |
|
elif scale_invariant: |
|
depth_min = 0 |
|
else: |
|
raise ValueError("Unrecognized alignment.") |
|
depth_range = (depth_max - depth_min).clamp(min=1e-6) |
|
depth = (depth - depth_min) / depth_range |
|
if output_uncertainty: |
|
uncertainty /= depth_range |
|
|
|
return depth, uncertainty |
|
|