Spaces:

akhaliq
/

depth-pro

Running on Zero

File size: 9,817 Bytes

de1b1de

from typing import List, Tuple

import numpy as np


def connected_component(r: np.ndarray, c: np.ndarray) -> List[List[int]]:
    """Find connected components in the given row and column indices.

    Args:
    ----
        r (np.ndarray): Row indices.
        c (np.ndarray): Column indices.

    Yields:
    ------
        List[int]: Indices of connected components.

    """
    indices = [0]
    for i in range(1, r.size):
        if r[i] == r[indices[-1]] and c[i] == c[indices[-1]] + 1:
            indices.append(i)
        else:
            yield indices
            indices = [i]
    yield indices


def nms_horizontal(ratio: np.ndarray, threshold: float) -> np.ndarray:
    """Apply Non-Maximum Suppression (NMS) horizontally on the given ratio matrix.

    Args:
    ----
        ratio (np.ndarray): Input ratio matrix.
        threshold (float): Threshold for NMS.

    Returns:
    -------
        np.ndarray: Binary mask after applying NMS.

    """
    mask = np.zeros_like(ratio, dtype=bool)
    r, c = np.nonzero(ratio > threshold)
    if len(r) == 0:
        return mask
    for ids in connected_component(r, c):
        values = [ratio[r[i], c[i]] for i in ids]
        mi = np.argmax(values)
        mask[r[ids[mi]], c[ids[mi]]] = True
    return mask


def nms_vertical(ratio: np.ndarray, threshold: float) -> np.ndarray:
    """Apply Non-Maximum Suppression (NMS) vertically on the given ratio matrix.

    Args:
    ----
        ratio (np.ndarray): Input ratio matrix.
        threshold (float): Threshold for NMS.

    Returns:
    -------
        np.ndarray: Binary mask after applying NMS.

    """
    return np.transpose(nms_horizontal(np.transpose(ratio), threshold))


def fgbg_depth(
    d: np.ndarray, t: float
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Find foreground-background relations between neighboring pixels.

    Args:
    ----
        d (np.ndarray): Depth matrix.
        t (float): Threshold for comparison.

    Returns:
    -------
        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating
        left, top, right, and bottom foreground-background relations.

    """
    right_is_big_enough = (d[..., :, 1:] / d[..., :, :-1]) > t
    left_is_big_enough = (d[..., :, :-1] / d[..., :, 1:]) > t
    bottom_is_big_enough = (d[..., 1:, :] / d[..., :-1, :]) > t
    top_is_big_enough = (d[..., :-1, :] / d[..., 1:, :]) > t
    return (
        left_is_big_enough,
        top_is_big_enough,
        right_is_big_enough,
        bottom_is_big_enough,
    )


def fgbg_depth_thinned(
    d: np.ndarray, t: float
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Find foreground-background relations between neighboring pixels with Non-Maximum Suppression.

    Args:
    ----
        d (np.ndarray): Depth matrix.
        t (float): Threshold for NMS.

    Returns:
    -------
        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating
        left, top, right, and bottom foreground-background relations with NMS applied.

    """
    right_is_big_enough = nms_horizontal(d[..., :, 1:] / d[..., :, :-1], t)
    left_is_big_enough = nms_horizontal(d[..., :, :-1] / d[..., :, 1:], t)
    bottom_is_big_enough = nms_vertical(d[..., 1:, :] / d[..., :-1, :], t)
    top_is_big_enough = nms_vertical(d[..., :-1, :] / d[..., 1:, :], t)
    return (
        left_is_big_enough,
        top_is_big_enough,
        right_is_big_enough,
        bottom_is_big_enough,
    )


def fgbg_binary_mask(
    d: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Find foreground-background relations between neighboring pixels in binary masks.

    Args:
    ----
        d (np.ndarray): Binary depth matrix.

    Returns:
    -------
        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating
        left, top, right, and bottom foreground-background relations in binary masks.

    """
    assert d.dtype == bool
    right_is_big_enough = d[..., :, 1:] & ~d[..., :, :-1]
    left_is_big_enough = d[..., :, :-1] & ~d[..., :, 1:]
    bottom_is_big_enough = d[..., 1:, :] & ~d[..., :-1, :]
    top_is_big_enough = d[..., :-1, :] & ~d[..., 1:, :]
    return (
        left_is_big_enough,
        top_is_big_enough,
        right_is_big_enough,
        bottom_is_big_enough,
    )


def edge_recall_matting(pr: np.ndarray, gt: np.ndarray, t: float) -> float:
    """Calculate edge recall for image matting.

    Args:
    ----
        pr (np.ndarray): Predicted depth matrix.
        gt (np.ndarray): Ground truth binary mask.
        t (float): Threshold for NMS.

    Returns:
    -------
        float: Edge recall value.

    """
    assert gt.dtype == bool
    ap, bp, cp, dp = fgbg_depth_thinned(pr, t)
    ag, bg, cg, dg = fgbg_binary_mask(gt)
    return 0.25 * (
        np.count_nonzero(ap & ag) / max(np.count_nonzero(ag), 1)
        + np.count_nonzero(bp & bg) / max(np.count_nonzero(bg), 1)
        + np.count_nonzero(cp & cg) / max(np.count_nonzero(cg), 1)
        + np.count_nonzero(dp & dg) / max(np.count_nonzero(dg), 1)
    )


def boundary_f1(
    pr: np.ndarray,
    gt: np.ndarray,
    t: float,
    return_p: bool = False,
    return_r: bool = False,
) -> float:
    """Calculate Boundary F1 score.

    Args:
    ----
        pr (np.ndarray): Predicted depth matrix.
        gt (np.ndarray): Ground truth depth matrix.
        t (float): Threshold for comparison.
        return_p (bool, optional): If True, return precision. Defaults to False.
        return_r (bool, optional): If True, return recall. Defaults to False.

    Returns:
    -------
        float: Boundary F1 score, or precision, or recall depending on the flags.

    """
    ap, bp, cp, dp = fgbg_depth(pr, t)
    ag, bg, cg, dg = fgbg_depth(gt, t)

    r = 0.25 * (
        np.count_nonzero(ap & ag) / max(np.count_nonzero(ag), 1)
        + np.count_nonzero(bp & bg) / max(np.count_nonzero(bg), 1)
        + np.count_nonzero(cp & cg) / max(np.count_nonzero(cg), 1)
        + np.count_nonzero(dp & dg) / max(np.count_nonzero(dg), 1)
    )
    p = 0.25 * (
        np.count_nonzero(ap & ag) / max(np.count_nonzero(ap), 1)
        + np.count_nonzero(bp & bg) / max(np.count_nonzero(bp), 1)
        + np.count_nonzero(cp & cg) / max(np.count_nonzero(cp), 1)
        + np.count_nonzero(dp & dg) / max(np.count_nonzero(dp), 1)
    )
    if r + p == 0:
        return 0.0
    if return_p:
        return p
    if return_r:
        return r
    return 2 * (r * p) / (r + p)


def get_thresholds_and_weights(
    t_min: float, t_max: float, N: int
) -> Tuple[np.ndarray, np.ndarray]:
    """Generate thresholds and weights for the given range.

    Args:
    ----
        t_min (float): Minimum threshold.
        t_max (float): Maximum threshold.
        N (int): Number of thresholds.

    Returns:
    -------
        Tuple[np.ndarray, np.ndarray]: Array of thresholds and corresponding weights.

    """
    thresholds = np.linspace(t_min, t_max, N)
    weights = thresholds / thresholds.sum()
    return thresholds, weights


def invert_depth(depth: np.ndarray, eps: float = 1e-6) -> np.ndarray:
    """Inverts a depth map with numerical stability.

    Args:
    ----
        depth (np.ndarray): Depth map to be inverted.
        eps (float): Minimum value to avoid division by zero (default is 1e-6).

    Returns:
    -------
    np.ndarray: Inverted depth map.

    """
    inverse_depth = 1.0 / depth.clip(min=eps)
    return inverse_depth


def SI_boundary_F1(
    predicted_depth: np.ndarray,
    target_depth: np.ndarray,
    t_min: float = 1.05,
    t_max: float = 1.25,
    N: int = 10,
) -> float:
    """Calculate Scale-Invariant Boundary F1 Score for depth-based ground-truth.

    Args:
    ----
        predicted_depth (np.ndarray): Predicted depth matrix.
        target_depth (np.ndarray): Ground truth depth matrix.
        t_min (float, optional): Minimum threshold. Defaults to 1.05.
        t_max (float, optional): Maximum threshold. Defaults to 1.25.
        N (int, optional): Number of thresholds. Defaults to 10.

    Returns:
    -------
        float: Scale-Invariant Boundary F1 Score.

    """
    assert predicted_depth.ndim == target_depth.ndim == 2
    thresholds, weights = get_thresholds_and_weights(t_min, t_max, N)
    f1_scores = np.array(
        [
            boundary_f1(invert_depth(predicted_depth), invert_depth(target_depth), t)
            for t in thresholds
        ]
    )
    return np.sum(f1_scores * weights)


def SI_boundary_Recall(
    predicted_depth: np.ndarray,
    target_mask: np.ndarray,
    t_min: float = 1.05,
    t_max: float = 1.25,
    N: int = 10,
    alpha_threshold: float = 0.1,
) -> float:
    """Calculate Scale-Invariant Boundary Recall Score for mask-based ground-truth.

    Args:
    ----
        predicted_depth (np.ndarray): Predicted depth matrix.
        target_mask (np.ndarray): Ground truth binary mask.
        t_min (float, optional): Minimum threshold. Defaults to 1.05.
        t_max (float, optional): Maximum threshold. Defaults to 1.25.
        N (int, optional): Number of thresholds. Defaults to 10.
        alpha_threshold (float, optional): Threshold for alpha masking. Defaults to 0.1.

    Returns:
    -------
        float: Scale-Invariant Boundary Recall Score.

    """
    assert predicted_depth.ndim == target_mask.ndim == 2
    thresholds, weights = get_thresholds_and_weights(t_min, t_max, N)
    thresholded_target = target_mask > alpha_threshold

    recall_scores = np.array(
        [
            edge_recall_matting(
                invert_depth(predicted_depth), thresholded_target, t=float(t)
            )
            for t in thresholds
        ]
    )
    weighted_recall = np.sum(recall_scores * weights)
    return weighted_recall