Spaces:
Running
on
Zero
Running
on
Zero
from typing import List, Tuple | |
import numpy as np | |
def connected_component(r: np.ndarray, c: np.ndarray) -> List[List[int]]: | |
"""Find connected components in the given row and column indices. | |
Args: | |
---- | |
r (np.ndarray): Row indices. | |
c (np.ndarray): Column indices. | |
Yields: | |
------ | |
List[int]: Indices of connected components. | |
""" | |
indices = [0] | |
for i in range(1, r.size): | |
if r[i] == r[indices[-1]] and c[i] == c[indices[-1]] + 1: | |
indices.append(i) | |
else: | |
yield indices | |
indices = [i] | |
yield indices | |
def nms_horizontal(ratio: np.ndarray, threshold: float) -> np.ndarray: | |
"""Apply Non-Maximum Suppression (NMS) horizontally on the given ratio matrix. | |
Args: | |
---- | |
ratio (np.ndarray): Input ratio matrix. | |
threshold (float): Threshold for NMS. | |
Returns: | |
------- | |
np.ndarray: Binary mask after applying NMS. | |
""" | |
mask = np.zeros_like(ratio, dtype=bool) | |
r, c = np.nonzero(ratio > threshold) | |
if len(r) == 0: | |
return mask | |
for ids in connected_component(r, c): | |
values = [ratio[r[i], c[i]] for i in ids] | |
mi = np.argmax(values) | |
mask[r[ids[mi]], c[ids[mi]]] = True | |
return mask | |
def nms_vertical(ratio: np.ndarray, threshold: float) -> np.ndarray: | |
"""Apply Non-Maximum Suppression (NMS) vertically on the given ratio matrix. | |
Args: | |
---- | |
ratio (np.ndarray): Input ratio matrix. | |
threshold (float): Threshold for NMS. | |
Returns: | |
------- | |
np.ndarray: Binary mask after applying NMS. | |
""" | |
return np.transpose(nms_horizontal(np.transpose(ratio), threshold)) | |
def fgbg_depth( | |
d: np.ndarray, t: float | |
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: | |
"""Find foreground-background relations between neighboring pixels. | |
Args: | |
---- | |
d (np.ndarray): Depth matrix. | |
t (float): Threshold for comparison. | |
Returns: | |
------- | |
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating | |
left, top, right, and bottom foreground-background relations. | |
""" | |
right_is_big_enough = (d[..., :, 1:] / d[..., :, :-1]) > t | |
left_is_big_enough = (d[..., :, :-1] / d[..., :, 1:]) > t | |
bottom_is_big_enough = (d[..., 1:, :] / d[..., :-1, :]) > t | |
top_is_big_enough = (d[..., :-1, :] / d[..., 1:, :]) > t | |
return ( | |
left_is_big_enough, | |
top_is_big_enough, | |
right_is_big_enough, | |
bottom_is_big_enough, | |
) | |
def fgbg_depth_thinned( | |
d: np.ndarray, t: float | |
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: | |
"""Find foreground-background relations between neighboring pixels with Non-Maximum Suppression. | |
Args: | |
---- | |
d (np.ndarray): Depth matrix. | |
t (float): Threshold for NMS. | |
Returns: | |
------- | |
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating | |
left, top, right, and bottom foreground-background relations with NMS applied. | |
""" | |
right_is_big_enough = nms_horizontal(d[..., :, 1:] / d[..., :, :-1], t) | |
left_is_big_enough = nms_horizontal(d[..., :, :-1] / d[..., :, 1:], t) | |
bottom_is_big_enough = nms_vertical(d[..., 1:, :] / d[..., :-1, :], t) | |
top_is_big_enough = nms_vertical(d[..., :-1, :] / d[..., 1:, :], t) | |
return ( | |
left_is_big_enough, | |
top_is_big_enough, | |
right_is_big_enough, | |
bottom_is_big_enough, | |
) | |
def fgbg_binary_mask( | |
d: np.ndarray, | |
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: | |
"""Find foreground-background relations between neighboring pixels in binary masks. | |
Args: | |
---- | |
d (np.ndarray): Binary depth matrix. | |
Returns: | |
------- | |
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating | |
left, top, right, and bottom foreground-background relations in binary masks. | |
""" | |
assert d.dtype == bool | |
right_is_big_enough = d[..., :, 1:] & ~d[..., :, :-1] | |
left_is_big_enough = d[..., :, :-1] & ~d[..., :, 1:] | |
bottom_is_big_enough = d[..., 1:, :] & ~d[..., :-1, :] | |
top_is_big_enough = d[..., :-1, :] & ~d[..., 1:, :] | |
return ( | |
left_is_big_enough, | |
top_is_big_enough, | |
right_is_big_enough, | |
bottom_is_big_enough, | |
) | |
def edge_recall_matting(pr: np.ndarray, gt: np.ndarray, t: float) -> float: | |
"""Calculate edge recall for image matting. | |
Args: | |
---- | |
pr (np.ndarray): Predicted depth matrix. | |
gt (np.ndarray): Ground truth binary mask. | |
t (float): Threshold for NMS. | |
Returns: | |
------- | |
float: Edge recall value. | |
""" | |
assert gt.dtype == bool | |
ap, bp, cp, dp = fgbg_depth_thinned(pr, t) | |
ag, bg, cg, dg = fgbg_binary_mask(gt) | |
return 0.25 * ( | |
np.count_nonzero(ap & ag) / max(np.count_nonzero(ag), 1) | |
+ np.count_nonzero(bp & bg) / max(np.count_nonzero(bg), 1) | |
+ np.count_nonzero(cp & cg) / max(np.count_nonzero(cg), 1) | |
+ np.count_nonzero(dp & dg) / max(np.count_nonzero(dg), 1) | |
) | |
def boundary_f1( | |
pr: np.ndarray, | |
gt: np.ndarray, | |
t: float, | |
return_p: bool = False, | |
return_r: bool = False, | |
) -> float: | |
"""Calculate Boundary F1 score. | |
Args: | |
---- | |
pr (np.ndarray): Predicted depth matrix. | |
gt (np.ndarray): Ground truth depth matrix. | |
t (float): Threshold for comparison. | |
return_p (bool, optional): If True, return precision. Defaults to False. | |
return_r (bool, optional): If True, return recall. Defaults to False. | |
Returns: | |
------- | |
float: Boundary F1 score, or precision, or recall depending on the flags. | |
""" | |
ap, bp, cp, dp = fgbg_depth(pr, t) | |
ag, bg, cg, dg = fgbg_depth(gt, t) | |
r = 0.25 * ( | |
np.count_nonzero(ap & ag) / max(np.count_nonzero(ag), 1) | |
+ np.count_nonzero(bp & bg) / max(np.count_nonzero(bg), 1) | |
+ np.count_nonzero(cp & cg) / max(np.count_nonzero(cg), 1) | |
+ np.count_nonzero(dp & dg) / max(np.count_nonzero(dg), 1) | |
) | |
p = 0.25 * ( | |
np.count_nonzero(ap & ag) / max(np.count_nonzero(ap), 1) | |
+ np.count_nonzero(bp & bg) / max(np.count_nonzero(bp), 1) | |
+ np.count_nonzero(cp & cg) / max(np.count_nonzero(cp), 1) | |
+ np.count_nonzero(dp & dg) / max(np.count_nonzero(dp), 1) | |
) | |
if r + p == 0: | |
return 0.0 | |
if return_p: | |
return p | |
if return_r: | |
return r | |
return 2 * (r * p) / (r + p) | |
def get_thresholds_and_weights( | |
t_min: float, t_max: float, N: int | |
) -> Tuple[np.ndarray, np.ndarray]: | |
"""Generate thresholds and weights for the given range. | |
Args: | |
---- | |
t_min (float): Minimum threshold. | |
t_max (float): Maximum threshold. | |
N (int): Number of thresholds. | |
Returns: | |
------- | |
Tuple[np.ndarray, np.ndarray]: Array of thresholds and corresponding weights. | |
""" | |
thresholds = np.linspace(t_min, t_max, N) | |
weights = thresholds / thresholds.sum() | |
return thresholds, weights | |
def invert_depth(depth: np.ndarray, eps: float = 1e-6) -> np.ndarray: | |
"""Inverts a depth map with numerical stability. | |
Args: | |
---- | |
depth (np.ndarray): Depth map to be inverted. | |
eps (float): Minimum value to avoid division by zero (default is 1e-6). | |
Returns: | |
------- | |
np.ndarray: Inverted depth map. | |
""" | |
inverse_depth = 1.0 / depth.clip(min=eps) | |
return inverse_depth | |
def SI_boundary_F1( | |
predicted_depth: np.ndarray, | |
target_depth: np.ndarray, | |
t_min: float = 1.05, | |
t_max: float = 1.25, | |
N: int = 10, | |
) -> float: | |
"""Calculate Scale-Invariant Boundary F1 Score for depth-based ground-truth. | |
Args: | |
---- | |
predicted_depth (np.ndarray): Predicted depth matrix. | |
target_depth (np.ndarray): Ground truth depth matrix. | |
t_min (float, optional): Minimum threshold. Defaults to 1.05. | |
t_max (float, optional): Maximum threshold. Defaults to 1.25. | |
N (int, optional): Number of thresholds. Defaults to 10. | |
Returns: | |
------- | |
float: Scale-Invariant Boundary F1 Score. | |
""" | |
assert predicted_depth.ndim == target_depth.ndim == 2 | |
thresholds, weights = get_thresholds_and_weights(t_min, t_max, N) | |
f1_scores = np.array( | |
[ | |
boundary_f1(invert_depth(predicted_depth), invert_depth(target_depth), t) | |
for t in thresholds | |
] | |
) | |
return np.sum(f1_scores * weights) | |
def SI_boundary_Recall( | |
predicted_depth: np.ndarray, | |
target_mask: np.ndarray, | |
t_min: float = 1.05, | |
t_max: float = 1.25, | |
N: int = 10, | |
alpha_threshold: float = 0.1, | |
) -> float: | |
"""Calculate Scale-Invariant Boundary Recall Score for mask-based ground-truth. | |
Args: | |
---- | |
predicted_depth (np.ndarray): Predicted depth matrix. | |
target_mask (np.ndarray): Ground truth binary mask. | |
t_min (float, optional): Minimum threshold. Defaults to 1.05. | |
t_max (float, optional): Maximum threshold. Defaults to 1.25. | |
N (int, optional): Number of thresholds. Defaults to 10. | |
alpha_threshold (float, optional): Threshold for alpha masking. Defaults to 0.1. | |
Returns: | |
------- | |
float: Scale-Invariant Boundary Recall Score. | |
""" | |
assert predicted_depth.ndim == target_mask.ndim == 2 | |
thresholds, weights = get_thresholds_and_weights(t_min, t_max, N) | |
thresholded_target = target_mask > alpha_threshold | |
recall_scores = np.array( | |
[ | |
edge_recall_matting( | |
invert_depth(predicted_depth), thresholded_target, t=float(t) | |
) | |
for t in thresholds | |
] | |
) | |
weighted_recall = np.sum(recall_scores * weights) | |
return weighted_recall | |