Spaces:

Ruicheng
/

MoGe

Running on Zero

App Files Files Community

Ruicheng commited on 11 days ago

Commit

ec0c8fa

•

1 Parent(s): 119634a

first commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +25 -0
app.py +111 -0
moge/model/__init__.py +1 -0
moge/model/dinov2/__init__.py +6 -0
moge/model/dinov2/hub/__init__.py +4 -0
moge/model/dinov2/hub/backbones.py +156 -0
moge/model/dinov2/hub/utils.py +39 -0
moge/model/dinov2/layers/__init__.py +11 -0
moge/model/dinov2/layers/attention.py +89 -0
moge/model/dinov2/layers/block.py +259 -0
moge/model/dinov2/layers/dino_head.py +58 -0
moge/model/dinov2/layers/drop_path.py +34 -0
moge/model/dinov2/layers/layer_scale.py +27 -0
moge/model/dinov2/layers/mlp.py +40 -0
moge/model/dinov2/layers/patch_embed.py +88 -0
moge/model/dinov2/layers/swiglu_ffn.py +72 -0
moge/model/dinov2/models/__init__.py +43 -0
moge/model/dinov2/models/vision_transformer.py +396 -0
moge/model/dinov2/utils/__init__.py +4 -0
moge/model/dinov2/utils/cluster.py +95 -0
moge/model/dinov2/utils/config.py +72 -0
moge/model/dinov2/utils/dtype.py +37 -0
moge/model/dinov2/utils/param_groups.py +103 -0
moge/model/dinov2/utils/utils.py +95 -0
moge/model/moge_model.py +376 -0
moge/model/utils.py +38 -0
moge/utils/__init__.py +0 -0
moge/utils/blob.py +314 -0
moge/utils/download.py +55 -0
moge/utils/geometry_numpy.py +175 -0
moge/utils/geometry_torch.py +231 -0
moge/utils/io.py +347 -0
moge/utils/pipeline.py +503 -0
moge/utils/tools.py +240 -0
moge/utils/vis.py +51 -0
moge/utils/webfile.py +73 -0
moge/utils/webzipfile.py +128 -0
packages.txt +1 -0
requirements.txt +5 -0
utils3d/__init__.py +14 -0
utils3d/io/__init__.py +4 -0
utils3d/io/colmap.py +139 -0
utils3d/io/glb.py +105 -0
utils3d/io/ply.py +104 -0
utils3d/io/wavefront_obj.py +146 -0
utils3d/numpy/__init__.py +135 -0
utils3d/numpy/_helpers.py +88 -0
utils3d/numpy/mesh.py +355 -0
utils3d/numpy/quadmesh.py +472 -0
utils3d/numpy/rasterization.py +471 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,25 @@

+/data
+/download
+/extract
+/view_point_cloud
+/view_depth_map
+/blobcache
+/snapshot
+/reference_embeddings
+/.gradio
+/debug
+/workspace
+/mlruns
+/infer_output
+/video_output
+/eval_output
+/.blobcache
+/test_images
+/test_videos
+/vis
+/videos
+/raid
+/blobmnt
+/eval_dump
+/pretrained
+__pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+import time
+from pathlib import Path
+import uuid
+import tempfile
+from typing import Union
+import spaces
+import atexit
+from concurrent.futures import ThreadPoolExecutor
+import gradio as gr
+import cv2
+import torch
+import numpy as np
+from moge.model import MoGeModel
+from moge.utils.vis import colorize_depth
+import utils3d
+model = MoGeModel.from_pretrained('Ruicheng/moge-vitl').cuda().eval()
+thread_pool_executor = ThreadPoolExecutor(max_workers=1)
+def delete_later(path: Union[str, os.PathLike], delay: int = 300):
+    def _delete():
+        try:
+            os.remove(path)
+        except:
+            pass
+    def _wait_and_delete():
+        time.sleep(delay)
+        _delete(path)
+    thread_pool_executor.submit(_wait_and_delete)
+    atexit.register(_delete)
+@spaces.GPU
+def run(image: np.ndarray, remove_edge: bool = True):
+    run_id = str(uuid.uuid4())
+    larger_size = max(image.shape[:2])
+    if larger_size > 1024:
+        scale = 1024 / larger_size
+        image = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
+    image_tensor = torch.tensor(image, dtype=torch.float32, device=torch.device('cuda')).permute(2, 0, 1) / 255
+    output = model.infer(image_tensor, resolution_level=9, apply_mask=True)
+    points, depth, mask = output['points'].cpu().numpy(), output['depth'].cpu().numpy(), output['mask'].cpu().numpy()
+    if remove_edge:
+        mask = mask & ~utils3d.numpy.depth_edge(depth, mask=mask, rtol=0.02)
+    mask = mask & (depth > 0)
+    _, faces, indices = utils3d.numpy.image_mesh(width=image.shape[1], height=image.shape[0], mask=mask)
+    faces = utils3d.numpy.triangulate(faces)
+    tempdir = Path(tempfile.gettempdir(), 'moge')
+    tempdir.mkdir(exist_ok=True)
+    output_glb_path = Path(tempdir, f'{run_id}.glb')
+    output_glb_path.parent.mkdir(exist_ok=True)
+    tempfile.TemporaryFile()
+    utils3d.io.write_glb(
+        output_glb_path,
+        vertices=points.reshape(-1, 3)[indices] * [-1, -1, 1],
+        faces=faces,
+        vertex_colors=image.reshape(-1, 3)[indices] / 255,
+    )
+    output_ply_path = Path(tempdir, f'{run_id}.ply')
+    output_ply_path.parent.mkdir(exist_ok=True)
+    utils3d.io.write_ply(
+        output_ply_path,
+        vertices=points.reshape(-1, 3)[indices] * [-1, -1, 1],
+        faces=faces,
+        vertex_colors=image.reshape(-1, 3)[indices] / 255,
+    )
+    colorized_depth = colorize_depth(depth)
+    delete_later(output_glb_path, delay=300)
+    delete_later(output_ply_path, delay=300)
+    return colorized_depth, output_glb_path, output_ply_path.as_posix()
+DESCRIPTION = """
+MoGe turns 2D images into 3D point maps.
+NOTE:
+* If the image is too large (> 1024px), it will be resized accordingly.
+* The color in the 3D viewer may look dark due to rendering of 3D viewer. You may download the 3D model as .glb or .ply file to view it in other 3D viewers.
+"""
+if __name__ == '__main__':
+    gr.Interface(
+        fn=run,
+        inputs=[
+            gr.Image(type="numpy", image_mode="RGB"),
+            gr.Checkbox(True, label="Remove edges"),
+        ],
+        outputs=[
+            gr.Image(type="numpy", label="Depth map (colorized)"),
+            gr.Model3D(display_mode="solid", clear_color=[1.0, 1.0, 1.0, 1.0], label="3D Viewer"),
+            gr.File(type="filepath", label="Download the model as .ply file"),
+        ],
+        title="MoGe Live Demo",
+        description=DESCRIPTION,
+        clear_btn=None,
+        allow_flagging="never",
+    ).launch(share=False)

moge/model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .moge_model import MoGeModel

moge/model/dinov2/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+__version__ = "0.0.1"

moge/model/dinov2/hub/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.

moge/model/dinov2/hub/backbones.py ADDED Viewed

	@@ -0,0 +1,156 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from enum import Enum
+from typing import Union
+import torch
+from .utils import _DINOV2_BASE_URL, _make_dinov2_model_name
+class Weights(Enum):
+    LVD142M = "LVD142M"
+def _make_dinov2_model(
+    *,
+    arch_name: str = "vit_large",
+    img_size: int = 518,
+    patch_size: int = 14,
+    init_values: float = 1.0,
+    ffn_layer: str = "mlp",
+    block_chunks: int = 0,
+    num_register_tokens: int = 0,
+    interpolate_antialias: bool = False,
+    interpolate_offset: float = 0.1,
+    pretrained: bool = True,
+    weights: Union[Weights, str] = Weights.LVD142M,
+    **kwargs,
+):
+    from ..models import vision_transformer as vits
+    if isinstance(weights, str):
+        try:
+            weights = Weights[weights]
+        except KeyError:
+            raise AssertionError(f"Unsupported weights: {weights}")
+    model_base_name = _make_dinov2_model_name(arch_name, patch_size)
+    vit_kwargs = dict(
+        img_size=img_size,
+        patch_size=patch_size,
+        init_values=init_values,
+        ffn_layer=ffn_layer,
+        block_chunks=block_chunks,
+        num_register_tokens=num_register_tokens,
+        interpolate_antialias=interpolate_antialias,
+        interpolate_offset=interpolate_offset,
+    )
+    vit_kwargs.update(**kwargs)
+    model = vits.__dict__[arch_name](**vit_kwargs)
+    if pretrained:
+        model_full_name = _make_dinov2_model_name(arch_name, patch_size, num_register_tokens)
+        url = _DINOV2_BASE_URL + f"/{model_base_name}/{model_full_name}_pretrain.pth"
+        state_dict = torch.hub.load_state_dict_from_url(url, map_location="cpu")
+        model.load_state_dict(state_dict, strict=True)
+    return model
+def dinov2_vits14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-S/14 model (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(arch_name="vit_small", pretrained=pretrained, weights=weights, **kwargs)
+def dinov2_vitb14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-B/14 model (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(arch_name="vit_base", pretrained=pretrained, weights=weights, **kwargs)
+def dinov2_vitl14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-L/14 model (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(arch_name="vit_large", pretrained=pretrained, weights=weights, **kwargs)
+def dinov2_vitg14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-g/14 model (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_giant2",
+        ffn_layer="swiglufused",
+        weights=weights,
+        pretrained=pretrained,
+        **kwargs,
+    )
+def dinov2_vits14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-S/14 model with registers (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_small",
+        pretrained=pretrained,
+        weights=weights,
+        num_register_tokens=4,
+        interpolate_antialias=True,
+        interpolate_offset=0.0,
+        **kwargs,
+    )
+def dinov2_vitb14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-B/14 model with registers (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_base",
+        pretrained=pretrained,
+        weights=weights,
+        num_register_tokens=4,
+        interpolate_antialias=True,
+        interpolate_offset=0.0,
+        **kwargs,
+    )
+def dinov2_vitl14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-L/14 model with registers (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_large",
+        pretrained=pretrained,
+        weights=weights,
+        num_register_tokens=4,
+        interpolate_antialias=True,
+        interpolate_offset=0.0,
+        **kwargs,
+    )
+def dinov2_vitg14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-g/14 model with registers (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_giant2",
+        ffn_layer="swiglufused",
+        weights=weights,
+        pretrained=pretrained,
+        num_register_tokens=4,
+        interpolate_antialias=True,
+        interpolate_offset=0.0,
+        **kwargs,
+    )

moge/model/dinov2/hub/utils.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import itertools
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+_DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2"
+def _make_dinov2_model_name(arch_name: str, patch_size: int, num_register_tokens: int = 0) -> str:
+    compact_arch_name = arch_name.replace("_", "")[:4]
+    registers_suffix = f"_reg{num_register_tokens}" if num_register_tokens else ""
+    return f"dinov2_{compact_arch_name}{patch_size}{registers_suffix}"
+class CenterPadding(nn.Module):
+    def __init__(self, multiple):
+        super().__init__()
+        self.multiple = multiple
+    def _get_pad(self, size):
+        new_size = math.ceil(size / self.multiple) * self.multiple
+        pad_size = new_size - size
+        pad_size_left = pad_size // 2
+        pad_size_right = pad_size - pad_size_left
+        return pad_size_left, pad_size_right
+    @torch.inference_mode()
+    def forward(self, x):
+        pads = list(itertools.chain.from_iterable(self._get_pad(m) for m in x.shape[:1:-1]))
+        output = F.pad(x, pads)
+        return output

moge/model/dinov2/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from .dino_head import DINOHead
+from .mlp import Mlp
+from .patch_embed import PatchEmbed
+from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
+from .block import NestedTensorBlock
+from .attention import MemEffAttention

moge/model/dinov2/layers/attention.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
+import logging
+import os
+import warnings
+from torch import Tensor
+from torch import nn
+logger = logging.getLogger("dinov2")
+XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None
+try:
+    if XFORMERS_ENABLED:
+        from xformers.ops import memory_efficient_attention, unbind
+        XFORMERS_AVAILABLE = True
+        # warnings.warn("xFormers is available (Attention)")
+    else:
+        # warnings.warn("xFormers is disabled (Attention)")
+        raise ImportError
+except ImportError:
+    XFORMERS_AVAILABLE = False
+    # warnings.warn("xFormers is not available (Attention)")
+class Attention(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int = 8,
+        qkv_bias: bool = False,
+        proj_bias: bool = True,
+        attn_drop: float = 0.0,
+        proj_drop: float = 0.0,
+    ) -> None:
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = head_dim**-0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim, bias=proj_bias)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, x: Tensor, attn_bias=None) -> Tensor:
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
+        attn = q @ k.transpose(-2, -1)
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class MemEffAttention(Attention):
+    def forward(self, x: Tensor, attn_bias=None) -> Tensor:
+        if not XFORMERS_AVAILABLE:
+            if attn_bias is not None:
+                raise AssertionError("xFormers is required for using nested tensors")
+            return super().forward(x)
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
+        q, k, v = unbind(qkv, 2)
+        x = memory_efficient_attention(q, k, v, attn_bias=attn_bias)
+        x = x.reshape([B, N, C])
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x

moge/model/dinov2/layers/block.py ADDED Viewed

	@@ -0,0 +1,259 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
+import logging
+import os
+from typing import Callable, List, Any, Tuple, Dict
+import warnings
+import torch
+from torch import nn, Tensor
+from .attention import Attention, MemEffAttention
+from .drop_path import DropPath
+from .layer_scale import LayerScale
+from .mlp import Mlp
+logger = logging.getLogger("dinov2")
+XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None
+try:
+    if XFORMERS_ENABLED:
+        from xformers.ops import fmha, scaled_index_add, index_select_cat
+        XFORMERS_AVAILABLE = True
+        # warnings.warn("xFormers is available (Block)")
+    else:
+        # warnings.warn("xFormers is disabled (Block)")
+        raise ImportError
+except ImportError:
+    XFORMERS_AVAILABLE = False
+    # warnings.warn("xFormers is not available (Block)")
+class Block(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = False,
+        proj_bias: bool = True,
+        ffn_bias: bool = True,
+        drop: float = 0.0,
+        attn_drop: float = 0.0,
+        init_values=None,
+        drop_path: float = 0.0,
+        act_layer: Callable[..., nn.Module] = nn.GELU,
+        norm_layer: Callable[..., nn.Module] = nn.LayerNorm,
+        attn_class: Callable[..., nn.Module] = Attention,
+        ffn_layer: Callable[..., nn.Module] = Mlp,
+    ) -> None:
+        super().__init__()
+        # print(f"biases: qkv: {qkv_bias}, proj: {proj_bias}, ffn: {ffn_bias}")
+        self.norm1 = norm_layer(dim)
+        self.attn = attn_class(
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            proj_bias=proj_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+        )
+        self.ls1 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
+        self.drop_path1 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = ffn_layer(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+            bias=ffn_bias,
+        )
+        self.ls2 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
+        self.drop_path2 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.sample_drop_ratio = drop_path
+    def forward(self, x: Tensor) -> Tensor:
+        def attn_residual_func(x: Tensor) -> Tensor:
+            return self.ls1(self.attn(self.norm1(x)))
+        def ffn_residual_func(x: Tensor) -> Tensor:
+            return self.ls2(self.mlp(self.norm2(x)))
+        if self.training and self.sample_drop_ratio > 0.1:
+            # the overhead is compensated only for a drop path rate larger than 0.1
+            x = drop_add_residual_stochastic_depth(
+                x,
+                residual_func=attn_residual_func,
+                sample_drop_ratio=self.sample_drop_ratio,
+            )
+            x = drop_add_residual_stochastic_depth(
+                x,
+                residual_func=ffn_residual_func,
+                sample_drop_ratio=self.sample_drop_ratio,
+            )
+        elif self.training and self.sample_drop_ratio > 0.0:
+            x = x + self.drop_path1(attn_residual_func(x))
+            x = x + self.drop_path1(ffn_residual_func(x))  # FIXME: drop_path2
+        else:
+            x = x + attn_residual_func(x)
+            x = x + ffn_residual_func(x)
+        return x
+def drop_add_residual_stochastic_depth(
+    x: Tensor,
+    residual_func: Callable[[Tensor], Tensor],
+    sample_drop_ratio: float = 0.0,
+) -> Tensor:
+    # 1) extract subset using permutation
+    b, n, d = x.shape
+    sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1)
+    brange = (torch.randperm(b, device=x.device))[:sample_subset_size]
+    x_subset = x[brange]
+    # 2) apply residual_func to get residual
+    residual = residual_func(x_subset)
+    x_flat = x.flatten(1)
+    residual = residual.flatten(1)
+    residual_scale_factor = b / sample_subset_size
+    # 3) add the residual
+    x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor)
+    return x_plus_residual.view_as(x)
+def get_branges_scales(x, sample_drop_ratio=0.0):
+    b, n, d = x.shape
+    sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1)
+    brange = (torch.randperm(b, device=x.device))[:sample_subset_size]
+    residual_scale_factor = b / sample_subset_size
+    return brange, residual_scale_factor
+def add_residual(x, brange, residual, residual_scale_factor, scaling_vector=None):
+    if scaling_vector is None:
+        x_flat = x.flatten(1)
+        residual = residual.flatten(1)
+        x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor)
+    else:
+        x_plus_residual = scaled_index_add(
+            x, brange, residual.to(dtype=x.dtype), scaling=scaling_vector, alpha=residual_scale_factor
+        )
+    return x_plus_residual
+attn_bias_cache: Dict[Tuple, Any] = {}
+def get_attn_bias_and_cat(x_list, branges=None):
+    """
+    this will perform the index select, cat the tensors, and provide the attn_bias from cache
+    """
+    batch_sizes = [b.shape[0] for b in branges] if branges is not None else [x.shape[0] for x in x_list]
+    all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list))
+    if all_shapes not in attn_bias_cache.keys():
+        seqlens = []
+        for b, x in zip(batch_sizes, x_list):
+            for _ in range(b):
+                seqlens.append(x.shape[1])
+        attn_bias = fmha.BlockDiagonalMask.from_seqlens(seqlens)
+        attn_bias._batch_sizes = batch_sizes
+        attn_bias_cache[all_shapes] = attn_bias
+    if branges is not None:
+        cat_tensors = index_select_cat([x.flatten(1) for x in x_list], branges).view(1, -1, x_list[0].shape[-1])
+    else:
+        tensors_bs1 = tuple(x.reshape([1, -1, *x.shape[2:]]) for x in x_list)
+        cat_tensors = torch.cat(tensors_bs1, dim=1)
+    return attn_bias_cache[all_shapes], cat_tensors
+def drop_add_residual_stochastic_depth_list(
+    x_list: List[Tensor],
+    residual_func: Callable[[Tensor, Any], Tensor],
+    sample_drop_ratio: float = 0.0,
+    scaling_vector=None,
+) -> Tensor:
+    # 1) generate random set of indices for dropping samples in the batch
+    branges_scales = [get_branges_scales(x, sample_drop_ratio=sample_drop_ratio) for x in x_list]
+    branges = [s[0] for s in branges_scales]
+    residual_scale_factors = [s[1] for s in branges_scales]
+    # 2) get attention bias and index+concat the tensors
+    attn_bias, x_cat = get_attn_bias_and_cat(x_list, branges)
+    # 3) apply residual_func to get residual, and split the result
+    residual_list = attn_bias.split(residual_func(x_cat, attn_bias=attn_bias))  # type: ignore
+    outputs = []
+    for x, brange, residual, residual_scale_factor in zip(x_list, branges, residual_list, residual_scale_factors):
+        outputs.append(add_residual(x, brange, residual, residual_scale_factor, scaling_vector).view_as(x))
+    return outputs
+class NestedTensorBlock(Block):
+    def forward_nested(self, x_list: List[Tensor]) -> List[Tensor]:
+        """
+        x_list contains a list of tensors to nest together and run
+        """
+        assert isinstance(self.attn, MemEffAttention)
+        if self.training and self.sample_drop_ratio > 0.0:
+            def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
+                return self.attn(self.norm1(x), attn_bias=attn_bias)
+            def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
+                return self.mlp(self.norm2(x))
+            x_list = drop_add_residual_stochastic_depth_list(
+                x_list,
+                residual_func=attn_residual_func,
+                sample_drop_ratio=self.sample_drop_ratio,
+                scaling_vector=self.ls1.gamma if isinstance(self.ls1, LayerScale) else None,
+            )
+            x_list = drop_add_residual_stochastic_depth_list(
+                x_list,
+                residual_func=ffn_residual_func,
+                sample_drop_ratio=self.sample_drop_ratio,
+                scaling_vector=self.ls2.gamma if isinstance(self.ls1, LayerScale) else None,
+            )
+            return x_list
+        else:
+            def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
+                return self.ls1(self.attn(self.norm1(x), attn_bias=attn_bias))
+            def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
+                return self.ls2(self.mlp(self.norm2(x)))
+            attn_bias, x = get_attn_bias_and_cat(x_list)
+            x = x + attn_residual_func(x, attn_bias=attn_bias)
+            x = x + ffn_residual_func(x)
+            return attn_bias.split(x)
+    def forward(self, x_or_x_list):
+        if isinstance(x_or_x_list, Tensor):
+            return super().forward(x_or_x_list)
+        elif isinstance(x_or_x_list, list):
+            if not XFORMERS_AVAILABLE:
+                raise AssertionError("xFormers is required for using nested tensors")
+            return self.forward_nested(x_or_x_list)
+        else:
+            raise AssertionError

moge/model/dinov2/layers/dino_head.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+from torch.nn.init import trunc_normal_
+from torch.nn.utils import weight_norm
+class DINOHead(nn.Module):
+    def __init__(
+        self,
+        in_dim,
+        out_dim,
+        use_bn=False,
+        nlayers=3,
+        hidden_dim=2048,
+        bottleneck_dim=256,
+        mlp_bias=True,
+    ):
+        super().__init__()
+        nlayers = max(nlayers, 1)
+        self.mlp = _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=hidden_dim, use_bn=use_bn, bias=mlp_bias)
+        self.apply(self._init_weights)
+        self.last_layer = weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False))
+        self.last_layer.weight_g.data.fill_(1)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        x = self.mlp(x)
+        eps = 1e-6 if x.dtype == torch.float16 else 1e-12
+        x = nn.functional.normalize(x, dim=-1, p=2, eps=eps)
+        x = self.last_layer(x)
+        return x
+def _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=None, use_bn=False, bias=True):
+    if nlayers == 1:
+        return nn.Linear(in_dim, bottleneck_dim, bias=bias)
+    else:
+        layers = [nn.Linear(in_dim, hidden_dim, bias=bias)]
+        if use_bn:
+            layers.append(nn.BatchNorm1d(hidden_dim))
+        layers.append(nn.GELU())
+        for _ in range(nlayers - 2):
+            layers.append(nn.Linear(hidden_dim, hidden_dim, bias=bias))
+            if use_bn:
+                layers.append(nn.BatchNorm1d(hidden_dim))
+            layers.append(nn.GELU())
+        layers.append(nn.Linear(hidden_dim, bottleneck_dim, bias=bias))
+        return nn.Sequential(*layers)

moge/model/dinov2/layers/drop_path.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py
+from torch import nn
+def drop_path(x, drop_prob: float = 0.0, training: bool = False):
+    if drop_prob == 0.0 or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+    if keep_prob > 0.0:
+        random_tensor.div_(keep_prob)
+    output = x * random_tensor
+    return output
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)

moge/model/dinov2/layers/layer_scale.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110
+from typing import Union
+import torch
+from torch import Tensor
+from torch import nn
+class LayerScale(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        init_values: Union[float, Tensor] = 1e-5,
+        inplace: bool = False,
+    ) -> None:
+        super().__init__()
+        self.inplace = inplace
+        self.gamma = nn.Parameter(init_values * torch.ones(dim))
+    def forward(self, x: Tensor) -> Tensor:
+        return x.mul_(self.gamma) if self.inplace else x * self.gamma

moge/model/dinov2/layers/mlp.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py
+from typing import Callable, Optional
+from torch import Tensor, nn
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Callable[..., nn.Module] = nn.GELU,
+        drop: float = 0.0,
+        bias: bool = True,
+    ) -> None:
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x

moge/model/dinov2/layers/patch_embed.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
+from typing import Callable, Optional, Tuple, Union
+from torch import Tensor
+import torch.nn as nn
+def make_2tuple(x):
+    if isinstance(x, tuple):
+        assert len(x) == 2
+        return x
+    assert isinstance(x, int)
+    return (x, x)
+class PatchEmbed(nn.Module):
+    """
+    2D image to patch embedding: (B,C,H,W) -> (B,N,D)
+    Args:
+        img_size: Image size.
+        patch_size: Patch token size.
+        in_chans: Number of input image channels.
+        embed_dim: Number of linear projection output channels.
+        norm_layer: Normalization layer.
+    """
+    def __init__(
+        self,
+        img_size: Union[int, Tuple[int, int]] = 224,
+        patch_size: Union[int, Tuple[int, int]] = 16,
+        in_chans: int = 3,
+        embed_dim: int = 768,
+        norm_layer: Optional[Callable] = None,
+        flatten_embedding: bool = True,
+    ) -> None:
+        super().__init__()
+        image_HW = make_2tuple(img_size)
+        patch_HW = make_2tuple(patch_size)
+        patch_grid_size = (
+            image_HW[0] // patch_HW[0],
+            image_HW[1] // patch_HW[1],
+        )
+        self.img_size = image_HW
+        self.patch_size = patch_HW
+        self.patches_resolution = patch_grid_size
+        self.num_patches = patch_grid_size[0] * patch_grid_size[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        self.flatten_embedding = flatten_embedding
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW)
+        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
+    def forward(self, x: Tensor) -> Tensor:
+        _, _, H, W = x.shape
+        patch_H, patch_W = self.patch_size
+        assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}"
+        assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}"
+        x = self.proj(x)  # B C H W
+        H, W = x.size(2), x.size(3)
+        x = x.flatten(2).transpose(1, 2)  # B HW C
+        x = self.norm(x)
+        if not self.flatten_embedding:
+            x = x.reshape(-1, H, W, self.embed_dim)  # B H W C
+        return x
+    def flops(self) -> float:
+        Ho, Wo = self.patches_resolution
+        flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
+        if self.norm is not None:
+            flops += Ho * Wo * self.embed_dim
+        return flops

moge/model/dinov2/layers/swiglu_ffn.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import os
+from typing import Callable, Optional
+import warnings
+from torch import Tensor, nn
+import torch.nn.functional as F
+class SwiGLUFFN(nn.Module):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Callable[..., nn.Module] = None,
+        drop: float = 0.0,
+        bias: bool = True,
+    ) -> None:
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
+        self.w3 = nn.Linear(hidden_features, out_features, bias=bias)
+    def forward(self, x: Tensor) -> Tensor:
+        x12 = self.w12(x)
+        x1, x2 = x12.chunk(2, dim=-1)
+        hidden = F.silu(x1) * x2
+        return self.w3(hidden)
+XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None
+try:
+    if XFORMERS_ENABLED:
+        from xformers.ops import SwiGLU
+        XFORMERS_AVAILABLE = True
+        # warnings.warn("xFormers is available (SwiGLU)")
+    else:
+        # warnings.warn("xFormers is disabled (SwiGLU)")
+        raise ImportError
+except ImportError:
+    SwiGLU = SwiGLUFFN
+    XFORMERS_AVAILABLE = False
+    # warnings.warn("xFormers is not available (SwiGLU)")
+class SwiGLUFFNFused(SwiGLU):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Callable[..., nn.Module] = None,
+        drop: float = 0.0,
+        bias: bool = True,
+    ) -> None:
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8
+        super().__init__(
+            in_features=in_features,
+            hidden_features=hidden_features,
+            out_features=out_features,
+            bias=bias,
+        )

moge/model/dinov2/models/__init__.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import logging
+from . import vision_transformer as vits
+logger = logging.getLogger("dinov2")
+def build_model(args, only_teacher=False, img_size=224):
+    args.arch = args.arch.removesuffix("_memeff")
+    if "vit" in args.arch:
+        vit_kwargs = dict(
+            img_size=img_size,
+            patch_size=args.patch_size,
+            init_values=args.layerscale,
+            ffn_layer=args.ffn_layer,
+            block_chunks=args.block_chunks,
+            qkv_bias=args.qkv_bias,
+            proj_bias=args.proj_bias,
+            ffn_bias=args.ffn_bias,
+            num_register_tokens=args.num_register_tokens,
+            interpolate_offset=args.interpolate_offset,
+            interpolate_antialias=args.interpolate_antialias,
+        )
+        teacher = vits.__dict__[args.arch](**vit_kwargs)
+        if only_teacher:
+            return teacher, teacher.embed_dim
+        student = vits.__dict__[args.arch](
+            **vit_kwargs,
+            drop_path_rate=args.drop_path_rate,
+            drop_path_uniform=args.drop_path_uniform,
+        )
+        embed_dim = student.embed_dim
+    return student, teacher, embed_dim
+def build_model_from_cfg(cfg, only_teacher=False):
+    return build_model(cfg.student, only_teacher=only_teacher, img_size=cfg.crops.global_crops_size)

moge/model/dinov2/models/vision_transformer.py ADDED Viewed

	@@ -0,0 +1,396 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/main/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
+from functools import partial
+import math
+import logging
+from typing import Sequence, Tuple, Union, Callable
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint
+from torch.nn.init import trunc_normal_
+from ..layers import Mlp, PatchEmbed, SwiGLUFFNFused, MemEffAttention, NestedTensorBlock as Block
+logger = logging.getLogger("dinov2")
+def named_apply(fn: Callable, module: nn.Module, name="", depth_first=True, include_root=False) -> nn.Module:
+    if not depth_first and include_root:
+        fn(module=module, name=name)
+    for child_name, child_module in module.named_children():
+        child_name = ".".join((name, child_name)) if name else child_name
+        named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True)
+    if depth_first and include_root:
+        fn(module=module, name=name)
+    return module
+class BlockChunk(nn.ModuleList):
+    def forward(self, x):
+        for b in self:
+            x = b(x)
+        return x
+class DinoVisionTransformer(nn.Module):
+    def __init__(
+        self,
+        img_size=224,
+        patch_size=16,
+        in_chans=3,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        ffn_bias=True,
+        proj_bias=True,
+        drop_path_rate=0.0,
+        drop_path_uniform=False,
+        init_values=None,  # for layerscale: None or 0 => no layerscale
+        embed_layer=PatchEmbed,
+        act_layer=nn.GELU,
+        block_fn=Block,
+        ffn_layer="mlp",
+        block_chunks=1,
+        num_register_tokens=0,
+        interpolate_antialias=False,
+        interpolate_offset=0.1,
+    ):
+        """
+        Args:
+            img_size (int, tuple): input image size
+            patch_size (int, tuple): patch size
+            in_chans (int): number of input channels
+            embed_dim (int): embedding dimension
+            depth (int): depth of transformer
+            num_heads (int): number of attention heads
+            mlp_ratio (int): ratio of mlp hidden dim to embedding dim
+            qkv_bias (bool): enable bias for qkv if True
+            proj_bias (bool): enable bias for proj in attn if True
+            ffn_bias (bool): enable bias for ffn if True
+            drop_path_rate (float): stochastic depth rate
+            drop_path_uniform (bool): apply uniform drop rate across blocks
+            weight_init (str): weight init scheme
+            init_values (float): layer-scale init values
+            embed_layer (nn.Module): patch embedding layer
+            act_layer (nn.Module): MLP activation layer
+            block_fn (nn.Module): transformer block class
+            ffn_layer (str): "mlp", "swiglu", "swiglufused" or "identity"
+            block_chunks: (int) split block sequence into block_chunks units for FSDP wrap
+            num_register_tokens: (int) number of extra cls tokens (so-called "registers")
+            interpolate_antialias: (str) flag to apply anti-aliasing when interpolating positional embeddings
+            interpolate_offset: (float) work-around offset to apply when interpolating positional embeddings
+        """
+        super().__init__()
+        norm_layer = partial(nn.LayerNorm, eps=1e-6)
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.num_tokens = 1
+        self.n_blocks = depth
+        self.num_heads = num_heads
+        self.patch_size = patch_size
+        self.num_register_tokens = num_register_tokens
+        self.interpolate_antialias = interpolate_antialias
+        self.interpolate_offset = interpolate_offset
+        self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))
+        assert num_register_tokens >= 0
+        self.register_tokens = (
+            nn.Parameter(torch.zeros(1, num_register_tokens, embed_dim)) if num_register_tokens else None
+        )
+        if drop_path_uniform is True:
+            dpr = [drop_path_rate] * depth
+        else:
+            dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
+        if ffn_layer == "mlp":
+            logger.info("using MLP layer as FFN")
+            ffn_layer = Mlp
+        elif ffn_layer == "swiglufused" or ffn_layer == "swiglu":
+            logger.info("using SwiGLU layer as FFN")
+            ffn_layer = SwiGLUFFNFused
+        elif ffn_layer == "identity":
+            logger.info("using Identity layer as FFN")
+            def f(*args, **kwargs):
+                return nn.Identity()
+            ffn_layer = f
+        else:
+            raise NotImplementedError
+        blocks_list = [
+            block_fn(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                proj_bias=proj_bias,
+                ffn_bias=ffn_bias,
+                drop_path=dpr[i],
+                norm_layer=norm_layer,
+                act_layer=act_layer,
+                ffn_layer=ffn_layer,
+                init_values=init_values,
+            )
+            for i in range(depth)
+        ]
+        if block_chunks > 0:
+            self.chunked_blocks = True
+            chunked_blocks = []
+            chunksize = depth // block_chunks
+            for i in range(0, depth, chunksize):
+                # this is to keep the block index consistent if we chunk the block list
+                chunked_blocks.append([nn.Identity()] * i + blocks_list[i : i + chunksize])
+            self.blocks = nn.ModuleList([BlockChunk(p) for p in chunked_blocks])
+        else:
+            self.chunked_blocks = False
+            self.blocks = nn.ModuleList(blocks_list)
+        self.norm = norm_layer(embed_dim)
+        self.head = nn.Identity()
+        self.mask_token = nn.Parameter(torch.zeros(1, embed_dim))
+        self.init_weights()
+    def init_weights(self):
+        trunc_normal_(self.pos_embed, std=0.02)
+        nn.init.normal_(self.cls_token, std=1e-6)
+        if self.register_tokens is not None:
+            nn.init.normal_(self.register_tokens, std=1e-6)
+        named_apply(init_weights_vit_timm, self)
+    def interpolate_pos_encoding(self, x, w, h):
+        previous_dtype = x.dtype
+        npatch = x.shape[1] - 1
+        N = self.pos_embed.shape[1] - 1
+        if npatch == N and w == h:
+            return self.pos_embed
+        pos_embed = self.pos_embed.float()
+        class_pos_embed = pos_embed[:, 0]
+        patch_pos_embed = pos_embed[:, 1:]
+        dim = x.shape[-1]
+        w0 = w // self.patch_size
+        h0 = h // self.patch_size
+        M = int(math.sqrt(N))  # Recover the number of patches in each dimension
+        assert N == M * M
+        kwargs = {}
+        if self.interpolate_offset:
+            # Historical kludge: add a small number to avoid floating point error in the interpolation, see https://github.com/facebookresearch/dino/issues/8
+            # Note: still needed for backward-compatibility, the underlying operators are using both output size and scale factors
+            sx = float(w0 + self.interpolate_offset) / M
+            sy = float(h0 + self.interpolate_offset) / M
+            kwargs["scale_factor"] = (sx, sy)
+        else:
+            # Simply specify an output size instead of a scale factor
+            kwargs["size"] = (w0, h0)
+        patch_pos_embed = nn.functional.interpolate(
+            patch_pos_embed.reshape(1, M, M, dim).permute(0, 3, 1, 2),
+            mode="bicubic",
+            antialias=self.interpolate_antialias,
+            **kwargs,
+        )
+        assert (w0, h0) == patch_pos_embed.shape[-2:]
+        patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
+        return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype)
+    def prepare_tokens_with_masks(self, x, masks=None):
+        B, nc, w, h = x.shape
+        x = self.patch_embed(x)
+        if masks is not None:
+            x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x)
+        x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1)
+        x = x + self.interpolate_pos_encoding(x, w, h)
+        if self.register_tokens is not None:
+            x = torch.cat(
+                (
+                    x[:, :1],
+                    self.register_tokens.expand(x.shape[0], -1, -1),
+                    x[:, 1:],
+                ),
+                dim=1,
+            )
+        return x
+    def forward_features_list(self, x_list, masks_list):
+        x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list)]
+        for blk in self.blocks:
+            x = blk(x)
+        all_x = x
+        output = []
+        for x, masks in zip(all_x, masks_list):
+            x_norm = self.norm(x)
+            output.append(
+                {
+                    "x_norm_clstoken": x_norm[:, 0],
+                    "x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1],
+                    "x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :],
+                    "x_prenorm": x,
+                    "masks": masks,
+                }
+            )
+        return output
+    def forward_features(self, x, masks=None):
+        if isinstance(x, list):
+            return self.forward_features_list(x, masks)
+        x = self.prepare_tokens_with_masks(x, masks)
+        for blk in self.blocks:
+            x = blk(x)
+        x_norm = self.norm(x)
+        return {
+            "x_norm_clstoken": x_norm[:, 0],
+            "x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1],
+            "x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :],
+            "x_prenorm": x,
+            "masks": masks,
+        }
+    def _get_intermediate_layers_not_chunked(self, x, n=1):
+        x = self.prepare_tokens_with_masks(x)
+        # If n is an int, take the n last blocks. If it's a list, take them
+        output, total_block_len = [], len(self.blocks)
+        blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n
+        for i, blk in enumerate(self.blocks):
+            x = blk(x)
+            if i in blocks_to_take:
+                output.append(x)
+        assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found"
+        return output
+    def _get_intermediate_layers_chunked(self, x, n=1):
+        x = self.prepare_tokens_with_masks(x)
+        output, i, total_block_len = [], 0, len(self.blocks[-1])
+        # If n is an int, take the n last blocks. If it's a list, take them
+        blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n
+        for block_chunk in self.blocks:
+            for blk in block_chunk[i:]:  # Passing the nn.Identity()
+                x = blk(x)
+                if i in blocks_to_take:
+                    output.append(x)
+                i += 1
+        assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found"
+        return output
+    def get_intermediate_layers(
+        self,
+        x: torch.Tensor,
+        n: Union[int, Sequence] = 1,  # Layers or n last layers to take
+        reshape: bool = False,
+        return_class_token: bool = False,
+        norm=True,
+    ) -> Tuple[Union[torch.Tensor, Tuple[torch.Tensor]]]:
+        if self.chunked_blocks:
+            outputs = self._get_intermediate_layers_chunked(x, n)
+        else:
+            outputs = self._get_intermediate_layers_not_chunked(x, n)
+        if norm:
+            outputs = [self.norm(out) for out in outputs]
+        class_tokens = [out[:, 0] for out in outputs]
+        outputs = [out[:, 1 + self.num_register_tokens :] for out in outputs]
+        if reshape:
+            B, _, w, h = x.shape
+            outputs = [
+                out.reshape(B, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous()
+                for out in outputs
+            ]
+        if return_class_token:
+            return tuple(zip(outputs, class_tokens))
+        return tuple(outputs)
+    def forward(self, *args, is_training=False, **kwargs):
+        ret = self.forward_features(*args, **kwargs)
+        if is_training:
+            return ret
+        else:
+            return self.head(ret["x_norm_clstoken"])
+def init_weights_vit_timm(module: nn.Module, name: str = ""):
+    """ViT weight initialization, original timm impl (for reproducibility)"""
+    if isinstance(module, nn.Linear):
+        trunc_normal_(module.weight, std=0.02)
+        if module.bias is not None:
+            nn.init.zeros_(module.bias)
+def vit_small(patch_size=16, num_register_tokens=0, **kwargs):
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=384,
+        depth=12,
+        num_heads=6,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model
+def vit_base(patch_size=16, num_register_tokens=0, **kwargs):
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model
+def vit_large(patch_size=16, num_register_tokens=0, **kwargs):
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=1024,
+        depth=24,
+        num_heads=16,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model
+def vit_giant2(patch_size=16, num_register_tokens=0, **kwargs):
+    """
+    Close to ViT-giant, with embed-dim 1536 and 24 heads => embed-dim per head 64
+    """
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=1536,
+        depth=40,
+        num_heads=24,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model

moge/model/dinov2/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.

moge/model/dinov2/utils/cluster.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from enum import Enum
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional
+class ClusterType(Enum):
+    AWS = "aws"
+    FAIR = "fair"
+    RSC = "rsc"
+def _guess_cluster_type() -> ClusterType:
+    uname = os.uname()
+    if uname.sysname == "Linux":
+        if uname.release.endswith("-aws"):
+            # Linux kernel versions on AWS instances are of the form "5.4.0-1051-aws"
+            return ClusterType.AWS
+        elif uname.nodename.startswith("rsc"):
+            # Linux kernel versions on RSC instances are standard ones but hostnames start with "rsc"
+            return ClusterType.RSC
+    return ClusterType.FAIR
+def get_cluster_type(cluster_type: Optional[ClusterType] = None) -> Optional[ClusterType]:
+    if cluster_type is None:
+        return _guess_cluster_type()
+    return cluster_type
+def get_checkpoint_path(cluster_type: Optional[ClusterType] = None) -> Optional[Path]:
+    cluster_type = get_cluster_type(cluster_type)
+    if cluster_type is None:
+        return None
+    CHECKPOINT_DIRNAMES = {
+        ClusterType.AWS: "checkpoints",
+        ClusterType.FAIR: "checkpoint",
+        ClusterType.RSC: "checkpoint/dino",
+    }
+    return Path("/") / CHECKPOINT_DIRNAMES[cluster_type]
+def get_user_checkpoint_path(cluster_type: Optional[ClusterType] = None) -> Optional[Path]:
+    checkpoint_path = get_checkpoint_path(cluster_type)
+    if checkpoint_path is None:
+        return None
+    username = os.environ.get("USER")
+    assert username is not None
+    return checkpoint_path / username
+def get_slurm_partition(cluster_type: Optional[ClusterType] = None) -> Optional[str]:
+    cluster_type = get_cluster_type(cluster_type)
+    if cluster_type is None:
+        return None
+    SLURM_PARTITIONS = {
+        ClusterType.AWS: "learnlab",
+        ClusterType.FAIR: "learnlab",
+        ClusterType.RSC: "learn",
+    }
+    return SLURM_PARTITIONS[cluster_type]
+def get_slurm_executor_parameters(
+    nodes: int, num_gpus_per_node: int, cluster_type: Optional[ClusterType] = None, **kwargs
+) -> Dict[str, Any]:
+    # create default parameters
+    params = {
+        "mem_gb": 0,  # Requests all memory on a node, see https://slurm.schedmd.com/sbatch.html
+        "gpus_per_node": num_gpus_per_node,
+        "tasks_per_node": num_gpus_per_node,  # one task per GPU
+        "cpus_per_task": 10,
+        "nodes": nodes,
+        "slurm_partition": get_slurm_partition(cluster_type),
+    }
+    # apply cluster-specific adjustments
+    cluster_type = get_cluster_type(cluster_type)
+    if cluster_type == ClusterType.AWS:
+        params["cpus_per_task"] = 12
+        del params["mem_gb"]
+    elif cluster_type == ClusterType.RSC:
+        params["cpus_per_task"] = 12
+    # set additional parameters / apply overrides
+    params.update(kwargs)
+    return params

moge/model/dinov2/utils/config.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import math
+import logging
+import os
+from omegaconf import OmegaConf
+import dinov2.distributed as distributed
+from dinov2.logging import setup_logging
+from dinov2.utils import utils
+from dinov2.configs import dinov2_default_config
+logger = logging.getLogger("dinov2")
+def apply_scaling_rules_to_cfg(cfg):  # to fix
+    if cfg.optim.scaling_rule == "sqrt_wrt_1024":
+        base_lr = cfg.optim.base_lr
+        cfg.optim.lr = base_lr
+        cfg.optim.lr *= math.sqrt(cfg.train.batch_size_per_gpu * distributed.get_global_size() / 1024.0)
+        logger.info(f"sqrt scaling learning rate; base: {base_lr}, new: {cfg.optim.lr}")
+    else:
+        raise NotImplementedError
+    return cfg
+def write_config(cfg, output_dir, name="config.yaml"):
+    logger.info(OmegaConf.to_yaml(cfg))
+    saved_cfg_path = os.path.join(output_dir, name)
+    with open(saved_cfg_path, "w") as f:
+        OmegaConf.save(config=cfg, f=f)
+    return saved_cfg_path
+def get_cfg_from_args(args):
+    args.output_dir = os.path.abspath(args.output_dir)
+    args.opts += [f"train.output_dir={args.output_dir}"]
+    default_cfg = OmegaConf.create(dinov2_default_config)
+    cfg = OmegaConf.load(args.config_file)
+    cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts))
+    return cfg
+def default_setup(args):
+    distributed.enable(overwrite=True)
+    seed = getattr(args, "seed", 0)
+    rank = distributed.get_global_rank()
+    global logger
+    setup_logging(output=args.output_dir, level=logging.INFO)
+    logger = logging.getLogger("dinov2")
+    utils.fix_random_seeds(seed + rank)
+    logger.info("git:\n  {}\n".format(utils.get_sha()))
+    logger.info("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
+def setup(args):
+    """
+    Create configs and perform basic setups.
+    """
+    cfg = get_cfg_from_args(args)
+    os.makedirs(args.output_dir, exist_ok=True)
+    default_setup(args)
+    apply_scaling_rules_to_cfg(cfg)
+    write_config(cfg, args.output_dir)
+    return cfg

moge/model/dinov2/utils/dtype.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from typing import Dict, Union
+import numpy as np
+import torch
+TypeSpec = Union[str, np.dtype, torch.dtype]
+_NUMPY_TO_TORCH_DTYPE: Dict[np.dtype, torch.dtype] = {
+    np.dtype("bool"): torch.bool,
+    np.dtype("uint8"): torch.uint8,
+    np.dtype("int8"): torch.int8,
+    np.dtype("int16"): torch.int16,
+    np.dtype("int32"): torch.int32,
+    np.dtype("int64"): torch.int64,
+    np.dtype("float16"): torch.float16,
+    np.dtype("float32"): torch.float32,
+    np.dtype("float64"): torch.float64,
+    np.dtype("complex64"): torch.complex64,
+    np.dtype("complex128"): torch.complex128,
+}
+def as_torch_dtype(dtype: TypeSpec) -> torch.dtype:
+    if isinstance(dtype, torch.dtype):
+        return dtype
+    if isinstance(dtype, str):
+        dtype = np.dtype(dtype)
+    assert isinstance(dtype, np.dtype), f"Expected an instance of nunpy dtype, got {type(dtype)}"
+    return _NUMPY_TO_TORCH_DTYPE[dtype]

moge/model/dinov2/utils/param_groups.py ADDED Viewed

	@@ -0,0 +1,103 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from collections import defaultdict
+import logging
+logger = logging.getLogger("dinov2")
+def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12, force_is_backbone=False, chunked_blocks=False):
+    """
+    Calculate lr decay rate for different ViT blocks.
+    Args:
+        name (string): parameter name.
+        lr_decay_rate (float): base lr decay rate.
+        num_layers (int): number of ViT blocks.
+    Returns:
+        lr decay rate for the given parameter.
+    """
+    layer_id = num_layers + 1
+    if name.startswith("backbone") or force_is_backbone:
+        if (
+            ".pos_embed" in name
+            or ".patch_embed" in name
+            or ".mask_token" in name
+            or ".cls_token" in name
+            or ".register_tokens" in name
+        ):
+            layer_id = 0
+        elif force_is_backbone and (
+            "pos_embed" in name
+            or "patch_embed" in name
+            or "mask_token" in name
+            or "cls_token" in name
+            or "register_tokens" in name
+        ):
+            layer_id = 0
+        elif ".blocks." in name and ".residual." not in name:
+            layer_id = int(name[name.find(".blocks.") :].split(".")[2]) + 1
+        elif chunked_blocks and "blocks." in name and "residual." not in name:
+            layer_id = int(name[name.find("blocks.") :].split(".")[2]) + 1
+        elif "blocks." in name and "residual." not in name:
+            layer_id = int(name[name.find("blocks.") :].split(".")[1]) + 1
+    return lr_decay_rate ** (num_layers + 1 - layer_id)
+def get_params_groups_with_decay(model, lr_decay_rate=1.0, patch_embed_lr_mult=1.0):
+    chunked_blocks = False
+    if hasattr(model, "n_blocks"):
+        logger.info("chunked fsdp")
+        n_blocks = model.n_blocks
+        chunked_blocks = model.chunked_blocks
+    elif hasattr(model, "blocks"):
+        logger.info("first code branch")
+        n_blocks = len(model.blocks)
+    elif hasattr(model, "backbone"):
+        logger.info("second code branch")
+        n_blocks = len(model.backbone.blocks)
+    else:
+        logger.info("else code branch")
+        n_blocks = 0
+    all_param_groups = []
+    for name, param in model.named_parameters():
+        name = name.replace("_fsdp_wrapped_module.", "")
+        if not param.requires_grad:
+            continue
+        decay_rate = get_vit_lr_decay_rate(
+            name, lr_decay_rate, num_layers=n_blocks, force_is_backbone=n_blocks > 0, chunked_blocks=chunked_blocks
+        )
+        d = {"params": param, "is_last_layer": False, "lr_multiplier": decay_rate, "wd_multiplier": 1.0, "name": name}
+        if "last_layer" in name:
+            d.update({"is_last_layer": True})
+        if name.endswith(".bias") or "norm" in name or "gamma" in name:
+            d.update({"wd_multiplier": 0.0})
+        if "patch_embed" in name:
+            d.update({"lr_multiplier": d["lr_multiplier"] * patch_embed_lr_mult})
+        all_param_groups.append(d)
+        logger.info(f"""{name}: lr_multiplier: {d["lr_multiplier"]}, wd_multiplier: {d["wd_multiplier"]}""")
+    return all_param_groups
+def fuse_params_groups(all_params_groups, keys=("lr_multiplier", "wd_multiplier", "is_last_layer")):
+    fused_params_groups = defaultdict(lambda: {"params": []})
+    for d in all_params_groups:
+        identifier = ""
+        for k in keys:
+            identifier += k + str(d[k]) + "_"
+        for k in keys:
+            fused_params_groups[identifier][k] = d[k]
+        fused_params_groups[identifier]["params"].append(d["params"])
+    return fused_params_groups.values()

moge/model/dinov2/utils/utils.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import logging
+import os
+import random
+import subprocess
+from urllib.parse import urlparse
+import numpy as np
+import torch
+from torch import nn
+logger = logging.getLogger("dinov2")
+def load_pretrained_weights(model, pretrained_weights, checkpoint_key):
+    if urlparse(pretrained_weights).scheme:  # If it looks like an URL
+        state_dict = torch.hub.load_state_dict_from_url(pretrained_weights, map_location="cpu")
+    else:
+        state_dict = torch.load(pretrained_weights, map_location="cpu")
+    if checkpoint_key is not None and checkpoint_key in state_dict:
+        logger.info(f"Take key {checkpoint_key} in provided checkpoint dict")
+        state_dict = state_dict[checkpoint_key]
+    # remove `module.` prefix
+    state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
+    # remove `backbone.` prefix induced by multicrop wrapper
+    state_dict = {k.replace("backbone.", ""): v for k, v in state_dict.items()}
+    msg = model.load_state_dict(state_dict, strict=False)
+    logger.info("Pretrained weights found at {} and loaded with msg: {}".format(pretrained_weights, msg))
+def fix_random_seeds(seed=31):
+    """
+    Fix random seeds.
+    """
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+def get_sha():
+    cwd = os.path.dirname(os.path.abspath(__file__))
+    def _run(command):
+        return subprocess.check_output(command, cwd=cwd).decode("ascii").strip()
+    sha = "N/A"
+    diff = "clean"
+    branch = "N/A"
+    try:
+        sha = _run(["git", "rev-parse", "HEAD"])
+        subprocess.check_output(["git", "diff"], cwd=cwd)
+        diff = _run(["git", "diff-index", "HEAD"])
+        diff = "has uncommitted changes" if diff else "clean"
+        branch = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"])
+    except Exception:
+        pass
+    message = f"sha: {sha}, status: {diff}, branch: {branch}"
+    return message
+class CosineScheduler(object):
+    def __init__(self, base_value, final_value, total_iters, warmup_iters=0, start_warmup_value=0, freeze_iters=0):
+        super().__init__()
+        self.final_value = final_value
+        self.total_iters = total_iters
+        freeze_schedule = np.zeros((freeze_iters))
+        warmup_schedule = np.linspace(start_warmup_value, base_value, warmup_iters)
+        iters = np.arange(total_iters - warmup_iters - freeze_iters)
+        schedule = final_value + 0.5 * (base_value - final_value) * (1 + np.cos(np.pi * iters / len(iters)))
+        self.schedule = np.concatenate((freeze_schedule, warmup_schedule, schedule))
+        assert len(self.schedule) == self.total_iters
+    def __getitem__(self, it):
+        if it >= self.total_iters:
+            return self.final_value
+        else:
+            return self.schedule[it]
+def has_batchnorms(model):
+    bn_types = (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)
+    for name, module in model.named_modules():
+        if isinstance(module, bn_types):
+            return True
+    return False

moge/model/moge_model.py ADDED Viewed

	@@ -0,0 +1,376 @@

+from typing import *
+from numbers import Number
+from functools import partial
+from pathlib import Path
+import importlib
+import warnings
+import json
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils
+import torch.utils.checkpoint
+import torch.version
+import utils3d
+from huggingface_hub import hf_hub_download
+from ..utils.geometry_torch import image_plane_uv, point_map_to_depth, gaussian_blur_2d
+from .utils import wrap_dinov2_attention_with_sdpa, wrap_module_with_gradient_checkpointing, unwrap_module_with_gradient_checkpointing
+from ..utils.tools import timeit
+class ResidualConvBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int = None, hidden_channels: int = None, padding_mode: str = 'replicate', activation: Literal['relu', 'leaky_relu', 'silu', 'elu'] = 'relu', norm: Literal['group_norm', 'layer_norm'] = 'group_norm'):
+        super(ResidualConvBlock, self).__init__()
+        if out_channels is None:
+            out_channels = in_channels
+        if hidden_channels is None:
+            hidden_channels = in_channels
+        if activation =='relu':
+            activation_cls = lambda: nn.ReLU(inplace=True)
+        elif activation == 'leaky_relu':
+            activation_cls = lambda: nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        elif activation =='silu':
+            activation_cls = lambda: nn.SiLU(inplace=True)
+        elif activation == 'elu':
+            activation_cls = lambda: nn.ELU(inplace=True)
+        else:
+            raise ValueError(f'Unsupported activation function: {activation}')
+        self.layers = nn.Sequential(
+            nn.GroupNorm(1, in_channels),
+            activation_cls(),
+            nn.Conv2d(in_channels, hidden_channels, kernel_size=3, padding=1, padding_mode=padding_mode),
+            nn.GroupNorm(hidden_channels // 32 if norm == 'group_norm' else 1, hidden_channels),
+            activation_cls(),
+            nn.Conv2d(hidden_channels, out_channels, kernel_size=3, padding=1, padding_mode=padding_mode)
+        )
+        self.skip_connection = nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0) if in_channels != out_channels else nn.Identity()
+    def forward(self, x):
+        skip = self.skip_connection(x)
+        x = self.layers(x)
+        x = x + skip
+        return x
+class Head(nn.Module):
+    def __init__(
+        self,
+        num_features: int,
+        dim_in: int,
+        dim_out: List[int],
+        dim_proj: int = 512,
+        dim_upsample: List[int] = [256, 128, 128],
+        dim_times_res_block_hidden: int = 1,
+        num_res_blocks: int = 1,
+        res_block_norm: Literal['group_norm', 'layer_norm'] = 'group_norm',
+        last_res_blocks: int = 0,
+        last_conv_channels: int = 32,
+        last_conv_size: int = 1
+    ):
+        super().__init__()
+        self.projects = nn.ModuleList([
+            nn.Conv2d(in_channels=dim_in, out_channels=dim_proj, kernel_size=1, stride=1, padding=0,) for _ in range(num_features)
+        ])
+        self.upsample_blocks = nn.ModuleList([
+            nn.Sequential(
+                self._make_upsampler(in_ch + 2, out_ch),
+                *(ResidualConvBlock(out_ch, out_ch, dim_times_res_block_hidden * out_ch, activation="relu", norm=res_block_norm) for _ in range(num_res_blocks))
+            ) for in_ch, out_ch in zip([dim_proj] + dim_upsample[:-1], dim_upsample)
+        ])
+        self.output_block = nn.ModuleList([
+            self._make_output_block(
+                dim_upsample[-1] + 2, dim_out_, dim_times_res_block_hidden, last_res_blocks, last_conv_channels, last_conv_size, res_block_norm,
+            ) for dim_out_ in dim_out
+        ])
+    def _make_upsampler(self, in_channels: int, out_channels: int):
+        upsampler = nn.Sequential(
+            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, padding_mode='replicate')
+        )
+        upsampler[0].weight.data[:] = upsampler[0].weight.data[:, :, :1, :1]
+        return upsampler
+    def _make_output_block(self, dim_in: int, dim_out: int, dim_times_res_block_hidden: int, last_res_blocks: int, last_conv_channels: int, last_conv_size: int, res_block_norm: Literal['group_norm', 'layer_norm']):
+        return nn.Sequential(
+            nn.Conv2d(dim_in, last_conv_channels, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
+            *(ResidualConvBlock(last_conv_channels, last_conv_channels, dim_times_res_block_hidden * last_conv_channels, activation='relu', norm=res_block_norm) for _ in range(last_res_blocks)),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(last_conv_channels, dim_out, kernel_size=last_conv_size, stride=1, padding=last_conv_size // 2, padding_mode='replicate'),
+        )
+    def forward(self, hidden_states: torch.Tensor, image: torch.Tensor):
+        img_h, img_w = image.shape[-2:]
+        patch_h, patch_w = img_h // 14, img_w // 14
+        # Process the hidden states
+        x = torch.stack([
+            proj(feat.permute(0, 2, 1).unflatten(2, (patch_h, patch_w)).contiguous())
+                for proj, (feat, clstoken) in zip(self.projects, hidden_states)
+        ], dim=1).sum(dim=1)
+        # Upsample stage
+        # (patch_h, patch_w) -> (patch_h * 2, patch_w * 2) -> (patch_h * 4, patch_w * 4) -> (patch_h * 8, patch_w * 8)
+        for i, block in enumerate(self.upsample_blocks):
+            # UV coordinates is for awareness of image aspect ratio
+            uv = image_plane_uv(width=x.shape[-1], height=x.shape[-2], aspect_ratio=img_w / img_h, dtype=x.dtype, device=x.device)
+            uv = uv.permute(2, 0, 1).unsqueeze(0).expand(x.shape[0], -1, -1, -1)
+            x = torch.cat([x, uv], dim=1)
+            for layer in block:
+                x = torch.utils.checkpoint.checkpoint(layer, x, use_reentrant=False)
+        # (patch_h * 8, patch_w * 8) -> (img_h, img_w)
+        x = F.interpolate(x, (img_h, img_w), mode="bilinear", align_corners=False)
+        uv = image_plane_uv(width=x.shape[-1], height=x.shape[-2], aspect_ratio=img_w / img_h, dtype=x.dtype, device=x.device)
+        uv = uv.permute(2, 0, 1).unsqueeze(0).expand(x.shape[0], -1, -1, -1)
+        x = torch.cat([x, uv], dim=1)
+        if isinstance(self.output_block, nn.ModuleList):
+            output = [torch.utils.checkpoint.checkpoint(block, x, use_reentrant=False) for block in self.output_block]
+        else:
+            output = torch.utils.checkpoint.checkpoint(self.output_block, x, use_reentrant=False)
+        return output
+class MoGeModel(nn.Module):
+    image_mean: torch.Tensor
+    image_std: torch.Tensor
+    def __init__(self,
+        encoder: str = 'dinov2_vitb14',
+        intermediate_layers: Union[int, List[int]] = 4,
+        dim_proj: int = 512,
+        dim_upsample: List[int] = [256, 128, 128],
+        dim_times_res_block_hidden: int = 1,
+        num_res_blocks: int = 1,
+        output_mask: bool = False,
+        split_head: bool = False,
+        remap_output: Literal[False, True, 'linear', 'sinh', 'exp', 'sinh_exp'] = 'linear',
+        res_block_norm: Literal['group_norm', 'layer_norm'] = 'group_norm',
+        trained_diagonal_size_range: Tuple[Number, Number] = (600, 900),
+        trained_area_range: Tuple[Number, Number] = (500 * 500, 700 * 700),
+        last_res_blocks: int = 0,
+        last_conv_channels: int = 32,
+        last_conv_size: int = 1,
+        **deprecated_kwargs
+    ):
+        super(MoGeModel, self).__init__()
+        if deprecated_kwargs:
+            warnings.warn(f"The following deprecated/invalid arguments are ignored: {deprecated_kwargs}")
+        self.encoder = encoder
+        self.remap_output = remap_output
+        self.intermediate_layers = intermediate_layers
+        self.trained_diagonal_size_range = trained_diagonal_size_range
+        self.trained_area_range = trained_area_range
+        self.output_mask = output_mask
+        self.split_head = split_head
+        # NOTE: We have copied the DINOv2 code in torchhub to this repository.
+        # Minimal modifications have been made: removing irrelevant code, unnecessary warnings and fixing importing issues.
+        hub_loader = getattr(importlib.import_module(".dinov2.hub.backbones", __package__), encoder)
+        self.backbone = hub_loader(pretrained=False)
+        dim_feature = self.backbone.blocks[0].attn.qkv.in_features
+        self.head = Head(
+            num_features=intermediate_layers if isinstance(intermediate_layers, int) else len(intermediate_layers),
+            dim_in=dim_feature,
+            dim_out=3 if not output_mask else 4 if output_mask and not split_head else [3, 1],
+            dim_proj=dim_proj,
+            dim_upsample=dim_upsample,
+            dim_times_res_block_hidden=dim_times_res_block_hidden,
+            num_res_blocks=num_res_blocks,
+            res_block_norm=res_block_norm,
+            last_res_blocks=last_res_blocks,
+            last_conv_channels=last_conv_channels,
+            last_conv_size=last_conv_size
+        )
+        image_mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
+        image_std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
+        self.register_buffer("image_mean", image_mean)
+        self.register_buffer("image_std", image_std)
+        if torch.__version__ >= '2.0':
+            self.enable_pytorch_native_sdpa()
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: Union[str, Path, IO[bytes]], model_kwargs: Optional[Dict[str, Any]] = None, **hf_kwargs) -> 'MoGeModel':
+        """
+        Load a model from a checkpoint file.
+        ### Parameters:
+        - `pretrained_model_name_or_path`: path to the checkpoint file or repo id.
+        - `model_kwargs`: additional keyword arguments to override the parameters in the checkpoint.
+        - `hf_kwargs`: additional keyword arguments to pass to the `hf_hub_download` function. Ignored if `pretrained_model_name_or_path` is a local path.
+        ### Returns:
+        - A new instance of `MoGe` with the parameters loaded from the checkpoint.
+        """
+        if Path(pretrained_model_name_or_path).exists():
+            checkpoint = torch.load(pretrained_model_name_or_path, map_location='cpu', weights_only=True)
+        else:
+            cached_checkpoint_path = hf_hub_download(
+                repo_id=pretrained_model_name_or_path,
+                repo_type="model",
+                filename="model.pt",
+                **hf_kwargs
+            )
+            checkpoint = torch.load(cached_checkpoint_path, map_location='cpu', weights_only=True)
+        model_config = checkpoint['model_config']
+        if model_kwargs is not None:
+            model_config.update(model_kwargs)
+        model = cls(**model_config)
+        model.load_state_dict(checkpoint['model'])
+        return model
+    @staticmethod
+    def cache_pretrained_backbone(encoder: str, pretrained: bool):
+        _ = torch.hub.load('facebookresearch/dinov2', encoder, pretrained=pretrained)
+    def load_pretrained_backbone(self):
+        "Load the backbone with pretrained dinov2 weights from torch hub"
+        state_dict = torch.hub.load('facebookresearch/dinov2', self.encoder, pretrained=True).state_dict()
+        self.backbone.load_state_dict(state_dict)
+    def enable_backbone_gradient_checkpointing(self):
+        for i in range(len(self.backbone.blocks)):
+            self.backbone.blocks[i] = wrap_module_with_gradient_checkpointing(self.backbone.blocks[i])
+    def enable_pytorch_native_sdpa(self):
+        for i in range(len(self.backbone.blocks)):
+            self.backbone.blocks[i].attn = wrap_dinov2_attention_with_sdpa(self.backbone.blocks[i].attn)
+    def forward(self, image: torch.Tensor, mixed_precision: bool = False) -> Dict[str, torch.Tensor]:
+        raw_img_h, raw_img_w = image.shape[-2:]
+        patch_h, patch_w = raw_img_h // 14, raw_img_w // 14
+        image = (image - self.image_mean) / self.image_std
+        # Apply image transformation for DINOv2
+        image_14 = F.interpolate(image, (patch_h * 14, patch_w * 14), mode="bilinear", align_corners=False, antialias=True)
+        # Get intermediate layers from the backbone
+        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=mixed_precision):
+            features = self.backbone.get_intermediate_layers(image_14, self.intermediate_layers, return_class_token=True)
+        # Predict points (and mask)
+        output = self.head(features, image)
+        if self.output_mask:
+            if self.split_head:
+                points, mask = output
+            else:
+                points, mask = output.split([3, 1], dim=1)
+            points, mask = points.permute(0, 2, 3, 1), mask.squeeze(1)
+        else:
+            points = output.permute(0, 2, 3, 1)
+        if self.remap_output == 'linear' or self.remap_output == False:
+            pass
+        elif self.remap_output =='sinh' or self.remap_output == True:
+            points = torch.sinh(points)
+        elif self.remap_output == 'exp':
+            xy, z = points.split([2, 1], dim=-1)
+            z = torch.exp(z)
+            points = torch.cat([xy * z, z], dim=-1)
+        elif self.remap_output =='sinh_exp':
+            xy, z = points.split([2, 1], dim=-1)
+            points = torch.cat([torch.sinh(xy), torch.exp(z)], dim=-1)
+        else:
+            raise ValueError(f"Invalid remap output type: {self.remap_output}")
+        return_dict = {'points': points}
+        if self.output_mask:
+            return_dict['mask'] = mask
+        return return_dict
+    @torch.inference_mode()
+    def infer(
+        self,
+        image: torch.Tensor,
+        force_projection: bool = True,
+        resolution_level: int = 9,
+        apply_mask: bool = True,
+    ) -> Dict[str, torch.Tensor]:
+        """
+        User-friendly inference function
+        ### Parameters
+        - `image`: input image tensor of shape (B, 3, H, W) or (3, H, W)
+        - `resolution_level`: the resolution level to use for the output point map in 0-9. Default: 9 (highest)
+        - `interpolation_mode`: interpolation mode for the output points map. Default: 'bilinear'.
+        ### Returns
+        A dictionary containing the following keys:
+        - `points`: output tensor of shape (B, H, W, 3) or (H, W, 3).
+        - `depth`: tensor of shape (B, H, W) or (H, W) containing the depth map.
+        - `intrinsics`: tensor of shape (B, 3, 3) or (3, 3) containing the camera intrinsics.
+        """
+        if image.dim() == 3:
+            omit_batch_dim = True
+            image = image.unsqueeze(0)
+        else:
+            omit_batch_dim = False
+        original_height, original_width = image.shape[-2:]
+        area = original_height * original_width
+        min_area, max_area = self.trained_area_range
+        expected_area = min_area + (max_area - min_area) * (resolution_level / 9)
+        if expected_area != area:
+            expected_width, expected_height = int(original_width * (expected_area / area) ** 0.5), int(original_height * (expected_area / area) ** 0.5)
+            image = F.interpolate(image, (expected_height, expected_width), mode="bicubic", align_corners=False, antialias=True)
+        output = self.forward(image)
+        points, mask = output['points'], output.get('mask', None)
+        # Get camera-origin-centered point map
+        depth, fov_x, fov_y, z_shift = point_map_to_depth(points, None if mask is None else mask > 0.5)
+        intrinsics = utils3d.torch.intrinsics_from_fov_xy(fov_x, fov_y)
+        # If projection constraint is forces, recompute the point map using the actual depth map
+        if force_projection:
+            points = utils3d.torch.unproject_cv(utils3d.torch.image_uv(width=expected_width, height=expected_height, dtype=points.dtype, device=points.device), depth, extrinsics=None, intrinsics=intrinsics[..., None, :, :])
+        else:
+            points = points + torch.stack([torch.zeros_like(z_shift), torch.zeros_like(z_shift), z_shift], dim=-1)[..., None, None, :]
+        # Resize the output to the original resolution
+        if expected_area != area:
+            points = F.interpolate(points.permute(0, 3, 1, 2), (original_height, original_width), mode='bilinear', align_corners=False, antialias=False).permute(0, 2, 3, 1)
+            depth = F.interpolate(depth.unsqueeze(1), (original_height, original_width), mode='bilinear', align_corners=False, antialias=False).squeeze(1)
+            mask = None if mask is None else F.interpolate(mask.unsqueeze(1), (original_height, original_width), mode='bilinear', align_corners=False, antialias=False).squeeze(1)
+        # Apply mask if needed
+        if self.output_mask and apply_mask:
+            mask_binary = (depth > 0) & (mask > 0.5)
+            points = torch.where(mask_binary[..., None], points, torch.inf)
+            depth = torch.where(mask_binary, depth, torch.inf)
+        if omit_batch_dim:
+            points = points.squeeze(0)
+            intrinsics = intrinsics.squeeze(0)
+            depth = depth.squeeze(0)
+            if self.output_mask:
+                mask = mask.squeeze(0)
+        return_dict = {
+            'points': points,
+            'intrinsics': intrinsics,
+            'depth': depth,
+        }
+        if self.output_mask:
+            return_dict['mask'] = mask > 0.5
+        return return_dict

moge/model/utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from typing import *
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def wrap_module_with_gradient_checkpointing(module: nn.Module):
+    from torch.utils.checkpoint import checkpoint
+    class _CheckpointingWrapper(module.__class__):
+        _restore_cls = module.__class__
+        def forward(self, *args, **kwargs):
+            return checkpoint(super().forward, *args, use_reentrant=False, **kwargs)
+    module.__class__ = _CheckpointingWrapper
+    return module
+def unwrap_module_with_gradient_checkpointing(module: nn.Module):
+    module.__class__ = module.__class__._restore_cls
+def wrap_dinov2_attention_with_sdpa(module: nn.Module):
+    assert torch.__version__ >= '2.0', "SDPA requires PyTorch 2.0 or later"
+    class _AttentionWrapper(module.__class__):
+        def forward(self, x: torch.Tensor, attn_bias=None) -> torch.Tensor:
+            B, N, C = x.shape
+            qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)  # (3, B, H, N, C // H)
+            q, k, v = torch.unbind(qkv, 0)      # (B, H, N, C // H)
+            x = F.scaled_dot_product_attention(q, k, v, attn_bias)
+            x = x.permute(0, 2, 1, 3).reshape(B, N, C)
+            x = self.proj(x)
+            x = self.proj_drop(x)
+            return x
+    module.__class__ = _AttentionWrapper
+    return module

moge/utils/__init__.py ADDED Viewed

File without changes

moge/utils/blob.py ADDED Viewed

	@@ -0,0 +1,314 @@

+from typing import IO, Generator, Tuple, Union, overload
+from pathlib import Path, PosixPath, PurePosixPath
+import io
+import os
+import re
+import requests
+import fnmatch
+from azure.identity import DefaultAzureCredential
+from azure.storage.blob import ContainerClient, BlobClient
+import requests.adapters
+import requests.packages
+from urllib3.util.retry import Retry
+__all__ = [
+    'download_blob', 'upload_blob',
+    'download_blob_with_cache',
+    'open_blob', 'open_blob_with_cache',
+    'blob_file_exists',
+    'AzureBlobPath','SmartPath'
+]
+DEFAULT_CREDENTIAL = DefaultAzureCredential()
+BLOB_CACHE_DIR = './.blobcache'
+def download_blob(blob: Union[str, BlobClient]) -> bytes:
+    if isinstance(blob, str):
+        blob_client = BlobClient.from_blob_url(blob_client)
+    else:
+        blob_client = blob
+    return blob_client.download_blob().read()
+def upload_blob(blob: Union[str, BlobClient], data: Union[str, bytes]):
+    if isinstance(blob, str):
+        blob_client = BlobClient.from_blob_url(blob)
+    else:
+        blob_client = blob
+    blob_client.upload_blob(data, overwrite=True)
+def download_blob_with_cache(container: Union[str, ContainerClient], blob_name: str, cache_dir: str = 'blobcache') -> bytes:
+    """
+    Download a blob file from a container and return its content as bytes.
+    If the file is already present in the cache, it is read from there.
+    """
+    cache_path = Path(cache_dir) / blob_name
+    if cache_path.exists():
+        return cache_path.read_bytes()
+    data = download_blob(container, blob_name)
+    cache_path.parent.mkdir(parents=True, exist_ok=True)
+    cache_path.write_bytes(data)
+    return data
+def open_blob(container: Union[str, ContainerClient], blob_name: str) -> io.BytesIO:
+    """
+    Open a blob file for reading from a container and return its content as a BytesIO object.
+    """
+    return io.BytesIO(download_blob(container, blob_name))
+def open_blob_with_cache(container: Union[str, ContainerClient], blob_name: str, cache_dir: str = 'blobcache') -> io.BytesIO:
+    """
+    Open a blob file for reading from a container and return its content as a BytesIO object.
+    If the file is already present in the cache, it is read from there.
+    """
+    return io.BytesIO(download_blob_with_cache(container, blob_name, cache_dir=cache_dir))
+def blob_file_exists(container: Union[str, ContainerClient], blob_name: str) -> bool:
+    """
+    Check if a blob file exists in a container.
+    """
+    if isinstance(container, str):
+        container = ContainerClient.from_container_url(container)
+    blob_client = container.get_blob_client(blob_name)
+    return blob_client.exists()
+def is_blob_url(url: str) -> bool:
+    return re.match(r'https://[^/]+blob.core.windows.net/+', url) is not None
+def split_blob_url(url: str) -> Tuple[str, str, str]:
+    match = re.match(r'(https://[^/]+blob.core.windows.net/[^/?]+)(/([^\?]*))?(\?.+)?', url)
+    if match:
+        container, _, path, sas = match.groups()
+        return container, path or '', sas or ''
+    raise ValueError(f'Not a valid blob URL: {url}')
+def join_blob_path(url: str, *others: str) -> str:
+    container, path, sas = split_blob_url(url)
+    return container + '/' + os.path.join(path, *others) + sas
+class AzureBlobStringWriter(io.StringIO):
+    def __init__(self, blob_client: BlobClient, encoding: str = 'utf-8', **kwargs):
+        self._encoding = encoding
+        self.blob_client = blob_client
+        self.kwargs = kwargs
+        super().__init__()
+    def close(self):
+        self.blob_client.upload_blob(self.getvalue().encode(self._encoding), blob_type='BlockBlob', overwrite=True, **self.kwargs)
+class AzureBlobBytesWriter(io.BytesIO):
+    def __init__(self, blob_client: BlobClient, **kwargs):
+        super().__init__()
+        self.blob_client = blob_client
+        self.kwargs = kwargs
+    def close(self):
+        self.blob_client.upload_blob(self.getvalue(), blob_type='BlockBlob', overwrite=True, **self.kwargs)
+def open_azure_blob(blob: Union[str, BlobClient], mode: str = 'r', encoding: str = 'utf-8', newline: str = None, cache_blob: bool = False, **kwargs) -> IO:
+    if isinstance(blob, str):
+        blob_client = BlobClient.from_blob_url(blob)
+    elif isinstance(blob, BlobClient):
+        blob_client = blob
+    else:
+        raise ValueError(f'Must be a blob URL or a BlobClient object: {blob}')
+    if cache_blob:
+        cache_path = Path(BLOB_CACHE_DIR, blob_client.account_name, blob_client.container_name, blob_client.blob_name)
+    if mode == 'r' or mode == 'rb':
+        if cache_blob:
+            if cache_path.exists():
+                data = cache_path.read_bytes()
+            else:
+                data = blob_client.download_blob(**kwargs).read()
+                cache_path.parent.mkdir(parents=True, exist_ok=True)
+                cache_path.write_bytes(data)
+        else:
+            data = blob_client.download_blob(**kwargs).read()
+        if mode == 'r':
+            return io.StringIO(data.decode(encoding), newline=newline)
+        else:
+            return io.BytesIO(data)
+    elif mode == 'w':
+        return AzureBlobStringWriter(blob_client, **kwargs)
+    elif mode == 'wb':
+        return AzureBlobBytesWriter(blob_client, **kwargs)
+    else:
+        raise ValueError(f'Unsupported mode: {mode}')
+def smart_open(path_or_url: Union[Path, str], mode: str = 'r', encoding: str = 'utf-8') -> IO:
+    if is_blob_url(str(path_or_url)):
+        return open_azure_blob(str(path_or_url), mode, encoding)
+    return open(path_or_url, mode, encoding)
+class AzureBlobPath(PurePosixPath):
+    """
+    Implementation of pathlib.Path like interface for Azure Blob Storage.
+    """
+    container_client: ContainerClient
+    _parse_path = PurePosixPath._parse_args if hasattr(PurePosixPath, '_parse_args') else PurePosixPath._parse_path
+    def __new__(cls, *args, **kwargs):
+        """Override the old __new__ method. Parts are parsed in __init__"""
+        return object.__new__(cls)
+    def __init__(self, root: Union[str, 'AzureBlobPath', ContainerClient], *others: Union[str, PurePosixPath], pool_maxsize: int = 256, retries: int = 3):
+        if isinstance(root, AzureBlobPath):
+            self.container_client = root.container_client
+            parts = root.parts + others
+        elif isinstance(root, str):
+            url = root
+            container, path, sas = split_blob_url(url)
+            session = self._get_session(pool_maxsize=pool_maxsize, retries=retries)
+            if sas:
+                self.container_client = ContainerClient.from_container_url(container + sas, session=session)
+            else:
+                self.container_client = ContainerClient.from_container_url(container, credential=DEFAULT_CREDENTIAL, session=session)
+            parts = (path, *others)
+        elif isinstance(root, ContainerClient):
+            self.container_client = root
+            parts = others
+        else:
+            raise ValueError(f'Invalid root: {root}')
+        if hasattr(PurePosixPath, '_parse_args'):
+            # For compatibility with Python 3.10
+            drv, root, parts = PurePosixPath._parse_args(parts)
+            self._drv = drv
+            self._root = root
+            self._parts = parts
+        else:
+            super().__init__(*parts)
+    def _get_session(self, pool_maxsize: int = 1024, retries: int = 3) -> requests.Session:
+        session = requests.Session()
+        retry_strategy = Retry(
+            total=retries,
+            status_forcelist=[429, 500, 502, 503, 504],
+            allowed_methods=["HEAD", "GET", "PUT", "DELETE"],
+            backoff_factor=1,
+            raise_on_status=False,
+            read=retries,
+            connect=retries,
+            redirect=retries,
+        )
+        adapter = requests.adapters.HTTPAdapter(pool_connections=pool_maxsize, pool_maxsize=pool_maxsize, max_retries=retry_strategy)
+        session.mount('http://', adapter)
+        session.mount('https://', adapter)
+        return session
+    def _from_parsed_parts(self, drv, root, parts):
+        "For compatibility with Python 3.10"
+        return AzureBlobPath(self.container_client, drv, root, *parts)
+    def with_segments(self, *pathsegments):
+        return AzureBlobPath(self.container_client, *pathsegments)
+    @property
+    def path(self) -> str:
+        return '/'.join(self.parts)
+    @property
+    def blob_client(self) -> BlobClient:
+        return self.container_client.get_blob_client(self.path)
+    @property
+    def url(self) -> str:
+        if len(self.parts) == 0:
+            return self.container_client.url
+        return self.container_client.get_blob_client(self.path).url
+    @property
+    def container_name(self) -> str:
+        return self.container_client.container_name
+    @property
+    def account_name(self) -> str:
+        return self.container_client.account_name
+    def __str__(self):
+        return self.url
+    def __repr__(self):
+        return self.url
+    def open(self, mode: str = 'r', encoding: str = 'utf-8', cache_blob: bool = False, **kwargs) -> IO:
+        return open_azure_blob(self.blob_client, mode, encoding, cache_blob=cache_blob, **kwargs)
+    def __truediv__(self, other: Union[str, Path]) -> 'AzureBlobPath':
+        return self.joinpath(other)
+    def mkdir(self, parents: bool = False, exist_ok: bool = False):
+        pass
+    def iterdir(self) -> Generator['AzureBlobPath', None, None]:
+        path = self.path
+        if not path.endswith('/'):
+            path += '/'
+        for item in self.container_client.walk_blobs(self.path):
+            yield AzureBlobPath(self.container_client, item.name)
+    def glob(self, pattern: str) -> Generator['AzureBlobPath', None, None]:
+        special_chars = ".^$+{}[]()|/"
+        for char in special_chars:
+            pattern = pattern.replace(char, "\\" + char)
+        pattern = pattern.replace('**', './/.')
+        pattern = pattern.replace('*', '[^/]*')
+        pattern = pattern.replace('.//.', '.*')
+        pattern = "^" + pattern + "$"
+        reg = re.compile(pattern)
+        for item in self.container_client.list_blobs(self.path):
+            if reg.match(os.path.relpath(item.name, self.path)):
+                yield AzureBlobPath(self.container_client, item.name)
+    def exists(self) -> bool:
+        return self.blob_client.exists()
+    def read_bytes(self, cache_blob: bool = False) -> bytes:
+        with self.open('rb', cache_blob=cache_blob) as f:
+            return f.read()
+    def read_text(self, encoding: str = 'utf-8', cache_blob: bool = False) -> str:
+        with self.open('r', encoding=encoding, cache_blob=cache_blob) as f:
+            return f.read()
+    def write_bytes(self, data: bytes):
+        self.blob_client.upload_blob(data, overwrite=True)
+    def write_text(self, data: str, encoding: str = 'utf-8'):
+        self.blob_client.upload_blob(data.encode(encoding), overwrite=True)
+    def unlink(self):
+        self.blob_client.delete_blob()
+    def new_client(self) -> 'AzureBlobPath':
+        return AzureBlobPath(self.container_client.url, self.path)
+class SmartPath(Path, AzureBlobPath):
+    """
+    Supports both local file paths and Azure Blob Storage URLs.
+    """
+    def __new__(cls, first: Union[Path, str], *others: Union[str, PurePosixPath]) -> Union[Path, AzureBlobPath]:
+        if is_blob_url(str(first)):
+            return AzureBlobPath(str(first), *others)
+        return Path(first, *others)

moge/utils/download.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from pathlib import Path
+from typing import *
+import requests
+from tqdm import tqdm
+__all__ = ["download_file", "download_bytes"]
+def download_file(url: str, filepath: Union[str, Path], headers: dict = None, resume: bool = True) -> None:
+    # Ensure headers is a dict if not provided
+    headers = headers or {}
+    # Initialize local variables
+    file_path = Path(filepath)
+    downloaded_bytes = 0
+    # Check if we should resume the download
+    if resume and file_path.exists():
+        downloaded_bytes = file_path.stat().st_size
+        headers['Range'] = f"bytes={downloaded_bytes}-"
+    # Make a GET request to fetch the file
+    with requests.get(url, stream=True, headers=headers) as response:
+        response.raise_for_status()  # This will raise an HTTPError if the status is 4xx/5xx
+        # Calculate the total size to download
+        total_size = downloaded_bytes + int(response.headers.get('content-length', 0))
+        # Display a progress bar while downloading
+        with (
+            tqdm(desc=f"Downloading {file_path.name}", total=total_size, unit='B', unit_scale=True, leave=False) as pbar,
+            open(file_path, 'ab') as file,
+        ):
+            # Set the initial position of the progress bar
+            pbar.update(downloaded_bytes)
+            # Write the content to the file in chunks
+            for chunk in response.iter_content(chunk_size=4096):
+                file.write(chunk)
+                pbar.update(len(chunk))
+def download_bytes(url: str, headers: dict = None) -> bytes:
+    # Ensure headers is a dict if not provided
+    headers = headers or {}
+    # Make a GET request to fetch the file
+    with requests.get(url, stream=True, headers=headers) as response:
+        response.raise_for_status()  # This will raise an HTTPError if the status is 4xx/5xx
+        # Read the content of the response
+        return response.content

moge/utils/geometry_numpy.py ADDED Viewed

	@@ -0,0 +1,175 @@

+from typing import *
+from functools import partial
+import math
+import numpy as np
+import utils3d
+from .tools import timeit
+def weighted_mean_numpy(x: np.ndarray, w: np.ndarray = None, axis: Union[int, Tuple[int,...]] = None, keepdims: bool = False, eps: float = 1e-7) -> np.ndarray:
+    if w is None:
+        return np.mean(x, axis=axis)
+    else:
+        w = w.astype(x.dtype)
+        return (x * w).mean(axis=axis) / np.clip(w.mean(axis=axis), eps, None)
+def harmonic_mean_numpy(x: np.ndarray, w: np.ndarray = None, axis: Union[int, Tuple[int,...]] = None, keepdims: bool = False, eps: float = 1e-7) -> np.ndarray:
+    if w is None:
+        return 1 / (1 / np.clip(x, eps, None)).mean(axis=axis)
+    else:
+        w = w.astype(x.dtype)
+        return 1 / (weighted_mean_numpy(1 / (x + eps), w, axis=axis, keepdims=keepdims, eps=eps) + eps)
+def image_plane_uv_numpy(width: int, height: int, aspect_ratio: float = None, dtype: np.dtype = np.float32) -> np.ndarray:
+    "UV with left-top corner as (-width / diagonal, -height / diagonal) and right-bottom corner as (width / diagonal, height / diagonal)"
+    if aspect_ratio is None:
+        aspect_ratio = width / height
+    span_x = aspect_ratio / (1 + aspect_ratio ** 2) ** 0.5
+    span_y = 1 / (1 + aspect_ratio ** 2) ** 0.5
+    u = np.linspace(-span_x * (width - 1) / width, span_x * (width - 1) / width, width, dtype=dtype)
+    v = np.linspace(-span_y * (height - 1) / height, span_y * (height - 1) / height, height, dtype=dtype)
+    u, v = np.meshgrid(u, v, indexing='xy')
+    uv = np.stack([u, v], axis=-1)
+    return uv
+def focal_to_fov_numpy(focal: np.ndarray):
+    return 2 * np.arctan(0.5 / focal)
+def fov_to_focal_numpy(fov: np.ndarray):
+    return 0.5 / np.tan(fov / 2)
+def intrinsics_to_fov_numpy(intrinsics: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+    fov_x = focal_to_fov_numpy(intrinsics[..., 0, 0])
+    fov_y = focal_to_fov_numpy(intrinsics[..., 1, 1])
+    return fov_x, fov_y
+def solve_optimal_shift_focal(uv: np.ndarray, xyz: np.ndarray, ransac_iters: int = None, ransac_hypothetical_size: float = 0.1, ransac_threshold: float = 0.1):
+    "Solve `min |focal * xy / (z + shift) - uv|` with respect to shift and focal"
+    from scipy.optimize import least_squares
+    uv, xy, z = uv.reshape(-1, 2), xyz[..., :2].reshape(-1, 2), xyz[..., 2].reshape(-1)
+    def fn(uv: np.ndarray, xy: np.ndarray, z: np.ndarray, shift: np.ndarray):
+        xy_proj = xy / (z + shift)[: , None]
+        f = (xy_proj * uv).sum() / np.square(xy_proj).sum()
+        err = (f * xy_proj - uv).ravel()
+        return err
+    initial_shift = 0 #-z.min(keepdims=True) + 1.0
+    if ransac_iters is None:
+        solution = least_squares(partial(fn, uv, xy, z), x0=initial_shift, ftol=1e-3, method='lm')
+        optim_shift = solution['x'].squeeze().astype(np.float32)
+    else:
+        best_err, best_shift = np.inf, None
+        for _ in range(ransac_iters):
+            maybe_inliers = np.random.choice(len(z), size=int(ransac_hypothetical_size * len(z)), replace=False)
+            solution = least_squares(partial(fn, uv[maybe_inliers], xy[maybe_inliers], z[maybe_inliers]), x0=initial_shift, ftol=1e-3, method='lm')
+            maybe_shift = solution['x'].squeeze().astype(np.float32)
+            confirmed_inliers = np.linalg.norm(fn(uv, xy, z, maybe_shift).reshape(-1, 2), axis=-1) < ransac_threshold
+            if confirmed_inliers.sum() > 10:
+                solution = least_squares(partial(fn, uv[confirmed_inliers], xy[confirmed_inliers], z[confirmed_inliers]), x0=maybe_shift, ftol=1e-3, method='lm')
+                better_shift = solution['x'].squeeze().astype(np.float32)
+            else:
+                better_shift = maybe_shift
+            err = np.linalg.norm(fn(uv, xy, z, better_shift).reshape(-1, 2), axis=-1).clip(max=ransac_threshold).mean()
+            if err < best_err:
+                best_err, best_shift = err, better_shift
+                initial_shift = best_shift
+        optim_shift = best_shift
+    xy_proj = xy / (z + optim_shift)[: , None]
+    optim_focal = (xy_proj * uv).sum() / (xy_proj * xy_proj).sum()
+    return optim_shift, optim_focal
+def point_map_to_depth_numpy(points: np.ndarray, mask: np.ndarray = None, downsample_size: Tuple[int, int] = (64, 64)):
+    import cv2
+    assert points.shape[-1] == 3, "Points should (H, W, 3)"
+    height, width = points.shape[-3], points.shape[-2]
+    diagonal = (height ** 2 + width ** 2) ** 0.5
+    uv = image_plane_uv_numpy(width=width, height=height)
+    if mask is None:
+        points_lr = cv2.resize(points, downsample_size, interpolation=cv2.INTER_LINEAR).reshape(-1, 3)
+        uv_lr = cv2.resize(uv, downsample_size, interpolation=cv2.INTER_LINEAR).reshape(-1, 2)
+    else:
+        index, mask_lr = mask_aware_nearest_resize_numpy(mask, *downsample_size)
+        points_lr, uv_lr = points[index][mask_lr], uv[index][mask_lr]
+    if points_lr.size == 0:
+        return np.zeros((height, width)), 0, 0, 0
+    optim_shift, optim_focal = solve_optimal_shift_focal(uv_lr, points_lr, ransac_iters=None)
+    fov_x = 2 * np.arctan(width / diagonal / optim_focal)
+    fov_y = 2 * np.arctan(height / diagonal / optim_focal)
+    depth = points[:, :, 2] + optim_shift
+    return depth, fov_x, fov_y, optim_shift
+def mask_aware_nearest_resize_numpy(mask: np.ndarray, target_width: int, target_height: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Resize 2D map by nearest interpolation. Return the nearest neighbor index and mask of the resized map.
+    ### Parameters
+    - `mask`: Input 2D mask of shape (..., H, W)
+    - `target_width`: target width of the resized map
+    - `target_height`: target height of the resized map
+    ### Returns
+    - `nearest_idx`: Nearest neighbor index of the resized map of shape (..., target_height, target_width). Indices are like j + i * W, where j is the row index and i is the column index.
+    - `target_mask`: Mask of the resized map of shape (..., target_height, target_width)
+    """
+    height, width = mask.shape[-2:]
+    filter_h_f, filter_w_f = max(1, height / target_height), max(1, width / target_width)
+    filter_h_i, filter_w_i = math.ceil(filter_h_f), math.ceil(filter_w_f)
+    filter_size = filter_h_i * filter_w_i
+    padding_h, padding_w = round(filter_h_f / 2), round(filter_w_f / 2)
+    # Window the original mask and uv
+    uv = utils3d.numpy.image_pixel_center(width=width, height=height, dtype=np.float32)
+    indices = np.arange(height * width, dtype=np.int32).reshape(height, width)
+    padded_uv = np.full((height + 2 * padding_h, width + 2 * padding_w, 2), 0, dtype=np.float32)
+    padded_uv[padding_h:padding_h + height, padding_w:padding_w + width] = uv
+    padded_mask = np.full((*mask.shape[:-2], height + 2 * padding_h, width + 2 * padding_w), False, dtype=bool)
+    padded_mask[..., padding_h:padding_h + height, padding_w:padding_w + width] = mask
+    padded_indices = np.full((height + 2 * padding_h, width + 2 * padding_w), 0, dtype=np.int32)
+    padded_indices[padding_h:padding_h + height, padding_w:padding_w + width] = indices
+    windowed_uv = utils3d.numpy.sliding_window_2d(padded_uv, (filter_h_i, filter_w_i), 1, axis=(0, 1))
+    windowed_mask = utils3d.numpy.sliding_window_2d(padded_mask, (filter_h_i, filter_w_i), 1, axis=(-2, -1))
+    windowed_indices = utils3d.numpy.sliding_window_2d(padded_indices, (filter_h_i, filter_w_i), 1, axis=(0, 1))
+    # Gather the target pixels's local window
+    target_uv = utils3d.numpy.image_uv(width=target_width, height=target_height, dtype=np.float32) * np.array([width, height], dtype=np.float32)
+    target_corner = target_uv - np.array((filter_w_f / 2, filter_h_f / 2), dtype=np.float32)
+    target_corner = np.round(target_corner - 0.5).astype(np.int32) + np.array((padding_w, padding_h), dtype=np.int32)
+    target_window_uv = windowed_uv[target_corner[..., 1], target_corner[..., 0], :, :, :].reshape(target_height, target_width, 2, filter_size)                          # (target_height, tgt_width, 2, filter_size)
+    target_window_mask = windowed_mask[..., target_corner[..., 1], target_corner[..., 0], :, :].reshape(*mask.shape[:-2], target_height, target_width, filter_size)     # (..., target_height, tgt_width, filter_size)
+    target_window_indices = windowed_indices[target_corner[..., 1], target_corner[..., 0], :, :].reshape(target_height, target_width, filter_size)                      # (target_height, tgt_width, filter_size)
+    # Compute nearest neighbor in the local window for each pixel
+    dist = np.square(target_window_uv - target_uv[..., None])
+    dist = dist[..., 0, :] + dist[..., 1, :]
+    dist = np.where(target_window_mask, dist, np.inf)                                                   # (..., target_height, tgt_width, filter_size)
+    nearest_in_window = np.argmin(dist, axis=-1, keepdims=True)                                         # (..., target_height, tgt_width, 1)
+    nearest_idx = np.take_along_axis(target_window_indices, nearest_in_window, axis=-1).squeeze(-1)     # (..., target_height, tgt_width)
+    nearest_i, nearest_j = nearest_idx // width, nearest_idx % width
+    target_mask = np.any(target_window_mask, axis=-1)
+    batch_indices = [np.arange(n).reshape([1] * i + [n] + [1] * (mask.ndim - i - 1)) for i, n in enumerate(mask.shape[:-2])]
+    return (*batch_indices, nearest_i, nearest_j), target_mask

moge/utils/geometry_torch.py ADDED Viewed

	@@ -0,0 +1,231 @@

+from typing import *
+import math
+from collections import namedtuple
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.types
+import utils3d
+from .tools import timeit
+from .geometry_numpy import solve_optimal_shift_focal
+def weighted_mean(x: torch.Tensor, w: torch.Tensor = None, dim: Union[int, torch.Size] = None, keepdim: bool = False, eps: float = 1e-7) -> torch.Tensor:
+    if w is None:
+        return x.mean(dim=dim, keepdim=keepdim)
+    else:
+        w = w.to(x.dtype)
+        return (x * w).mean(dim=dim, keepdim=keepdim) / w.mean(dim=dim, keepdim=keepdim).add(eps)
+def harmonic_mean(x: torch.Tensor, w: torch.Tensor = None, dim: Union[int, torch.Size] = None, keepdim: bool = False, eps: float = 1e-7) -> torch.Tensor:
+    if w is None:
+        return x.add(eps).reciprocal().mean(dim=dim, keepdim=keepdim).reciprocal()
+    else:
+        w = w.to(x.dtype)
+        return weighted_mean(x.add(eps).reciprocal(), w, dim=dim, keepdim=keepdim, eps=eps).add(eps).reciprocal()
+def geometric_mean(x: torch.Tensor, w: torch.Tensor = None, dim: Union[int, torch.Size] = None, keepdim: bool = False, eps: float = 1e-7) -> torch.Tensor:
+    if w is None:
+        return x.add(eps).log().mean(dim=dim).exp()
+    else:
+        w = w.to(x.dtype)
+        return weighted_mean(x.add(eps).log(), w, dim=dim, keepdim=keepdim, eps=eps).exp()
+def image_plane_uv(width: int, height: int, aspect_ratio: float = None, dtype: torch.dtype = None, device: torch.device = None) -> torch.Tensor:
+    "UV with left-top corner as (-width / diagonal, -height / diagonal) and right-bottom corner as (width / diagonal, height / diagonal)"
+    if aspect_ratio is None:
+        aspect_ratio = width / height
+    span_x = aspect_ratio / (1 + aspect_ratio ** 2) ** 0.5
+    span_y = 1 / (1 + aspect_ratio ** 2) ** 0.5
+    u = torch.linspace(-span_x * (width - 1) / width, span_x * (width - 1) / width, width, dtype=dtype, device=device)
+    v = torch.linspace(-span_y * (height - 1) / height, span_y * (height - 1) / height, height, dtype=dtype, device=device)
+    u, v = torch.meshgrid(u, v, indexing='xy')
+    uv = torch.stack([u, v], dim=-1)
+    return uv
+def gaussian_blur_2d(input: torch.Tensor, kernel_size: int, sigma: float) -> torch.Tensor:
+    kernel = torch.exp(-(torch.arange(-kernel_size // 2 + 1, kernel_size // 2 + 1, dtype=input.dtype, device=input.device) ** 2) / (2 * sigma ** 2))
+    kernel = kernel / kernel.sum()
+    kernel = (kernel[:, None] * kernel[None, :]).reshape(1, 1, kernel_size, kernel_size)
+    input = F.pad(input, (kernel_size // 2, kernel_size // 2, kernel_size // 2, kernel_size // 2), mode='replicate')
+    input = F.conv2d(input, kernel, groups=input.shape[1])
+    return input
+def split_batch_fwd(fn: Callable, chunk_size: int, *args, **kwargs):
+    batch_size = next(x for x in (*args, *kwargs.values()) if isinstance(x, torch.Tensor)).shape[0]
+    n_chunks = batch_size // chunk_size + (batch_size % chunk_size > 0)
+    splited_args = tuple(arg.split(chunk_size, dim=0) if isinstance(arg, torch.Tensor) else [arg] * n_chunks for arg in args)
+    splited_kwargs = {k: [v.split(chunk_size, dim=0) if isinstance(v, torch.Tensor) else [v] * n_chunks] for k, v in kwargs.items()}
+    results = []
+    for i in range(n_chunks):
+        chunk_args = tuple(arg[i] for arg in splited_args)
+        chunk_kwargs = {k: v[i] for k, v in splited_kwargs.items()}
+        results.append(fn(*chunk_args, **chunk_kwargs))
+    if isinstance(results[0], tuple):
+        return tuple(torch.cat(r, dim=0) for r in zip(*results))
+    else:
+        return torch.cat(results, dim=0)
+def focal_to_fov(focal: torch.Tensor):
+    return 2 * torch.atan(0.5 / focal)
+def fov_to_focal(fov: torch.Tensor):
+    return 0.5 / torch.tan(fov / 2)
+def intrinsics_to_fov(intrinsics: torch.Tensor):
+    """
+    Returns field of view in radians from normalized intrinsics matrix.
+    ### Parameters:
+    - intrinsics: torch.Tensor of shape (..., 3, 3)
+    ### Returns:
+    - fov_x: torch.Tensor of shape (...)
+    - fov_y: torch.Tensor of shape (...)
+    """
+    focal_x = intrinsics[..., 0, 0]
+    focal_y = intrinsics[..., 1, 1]
+    return 2 * torch.atan(0.5 / focal_x), 2 * torch.atan(0.5 / focal_y)
+def point_map_to_depth_legacy(points: torch.Tensor):
+    height, width = points.shape[-3:-1]
+    diagonal = (height ** 2 + width ** 2) ** 0.5
+    uv = image_plane_uv(width, height, dtype=points.dtype, device=points.device)  # (H, W, 2)
+    # Solve least squares problem
+    b = (uv * points[..., 2:]).flatten(-3, -1)                        # (..., H * W * 2)
+    A = torch.stack([points[..., :2], -uv.expand_as(points[..., :2])], dim=-1).flatten(-4, -2)   # (..., H * W * 2, 2)
+    M = A.transpose(-2, -1) @ A
+    solution = (torch.inverse(M + 1e-6 * torch.eye(2).to(A)) @ (A.transpose(-2, -1) @ b[..., None])).squeeze(-1)
+    focal, shift = solution.unbind(-1)
+    depth = points[..., 2] + shift[..., None, None]
+    fov_x = torch.atan(width / diagonal / focal) * 2
+    fov_y = torch.atan(height / diagonal / focal) * 2
+    return depth, fov_x, fov_y, shift
+def point_map_to_depth(points: torch.Tensor, mask: torch.Tensor = None, downsample_size: Tuple[int, int] = (64, 64)):
+    """
+    Recover the depth map and FoV from a point map with unknown z shift and focal.
+    Note that it assumes:
+    - the optical center is at the center of the map
+    - the map is undistorted
+    - the map is isometric in the x and y directions
+    ### Parameters:
+    - `points: torch.Tensor` of shape (..., H, W, 3)
+    - `downsample_size: Tuple[int, int]` in (height, width), the size of the downsampled map. Downsampling produces approximate solution and is efficient for large maps.
+    ### Returns:
+    - `depth: torch.Tensor` of shape (..., H, W)
+    - `fov_x: torch.Tensor` of shape (...)
+    - `fov_y: torch.Tensor` of shape (...)
+    - `shift: torch.Tensor` of shape (...), the z shift, making `depth = points[..., 2] + shift`
+    """
+    shape = points.shape
+    height, width = points.shape[-3], points.shape[-2]
+    diagonal = (height ** 2 + width ** 2) ** 0.5
+    points = points.reshape(-1, *shape[-3:])
+    mask = None if mask is None else mask.reshape(-1, *shape[-3:-1])
+    uv = image_plane_uv(width, height, dtype=points.dtype, device=points.device)  # (H, W, 2)
+    points_lr = F.interpolate(points.permute(0, 3, 1, 2), downsample_size, mode='nearest').permute(0, 2, 3, 1)
+    uv_lr = F.interpolate(uv.unsqueeze(0).permute(0, 3, 1, 2), downsample_size, mode='nearest').squeeze(0).permute(1, 2, 0)
+    mask_lr = None if mask is None else F.interpolate(mask.to(torch.float32).unsqueeze(1), downsample_size, mode='nearest').squeeze(1) > 0
+    uv_lr_np = uv_lr.cpu().numpy()
+    points_lr_np = points_lr.detach().cpu().numpy()
+    mask_lr_np = None if mask is None else mask_lr.cpu().numpy()
+    optim_shift, optim_focal = [], []
+    for i in range(points.shape[0]):
+        points_lr_i_np = points_lr_np[i] if mask is None else points_lr_np[i][mask_lr_np[i]]
+        uv_lr_i_np = uv_lr_np if mask is None else uv_lr_np[mask_lr_np[i]]
+        optim_shift_i, optim_focal_i = solve_optimal_shift_focal(uv_lr_i_np, points_lr_i_np, ransac_iters=None)
+        optim_shift.append(float(optim_shift_i))
+        optim_focal.append(float(optim_focal_i))
+    optim_shift = torch.tensor(optim_shift, device=points.device, dtype=points.dtype)
+    optim_focal = torch.tensor(optim_focal, device=points.device, dtype=points.dtype)
+    fov_x = 2 * torch.atan(width / diagonal / optim_focal)
+    fov_y = 2 * torch.atan(height / diagonal / optim_focal)
+    depth = (points[..., 2] + optim_shift[:, None, None]).reshape(shape[:-1])
+    fov_x = fov_x.reshape(shape[:-3])
+    fov_y = fov_y.reshape(shape[:-3])
+    optim_shift = optim_shift.reshape(shape[:-3])
+    return depth, fov_x, fov_y, optim_shift
+def mask_aware_nearest_resize(mask: torch.BoolTensor, target_width: int, target_height: int) -> Tuple[torch.LongTensor, torch.LongTensor, torch.BoolTensor]:
+    """
+    Resize 2D map by nearest interpolation. Return the nearest neighbor index and mask of the resized map.
+    ### Parameters
+    - `mask`: Input 2D mask of shape (..., H, W)
+    - `target_width`: target width of the resized map
+    - `target_height`: target height of the resized map
+    ### Returns
+    - `nearest_idx`: Nearest neighbor index of the resized map of shape (..., target_height, target_width) for each dimension
+    - `target_mask`: Mask of the resized map of shape (..., target_height, target_width)
+    """
+    height, width = mask.shape[-2:]
+    device = mask.device
+    filter_h_f, filter_w_f = max(1, height / target_height), max(1, width / target_width)
+    filter_h_i, filter_w_i = math.ceil(filter_h_f), math.ceil(filter_w_f)
+    filter_size = filter_h_i * filter_w_i
+    padding_h, padding_w = round(filter_h_f / 2), round(filter_w_f / 2)
+    # Window the original mask and uv
+    uv = utils3d.torch.image_pixel_center(width=width, height=height, dtype=torch.float32, device=device)
+    indices = torch.arange(height * width, dtype=torch.long, device=device).reshape(height, width)
+    padded_uv = torch.full((height + 2 * padding_h, width + 2 * padding_w, 2), 0, dtype=torch.float32, device=device)
+    padded_uv[padding_h:padding_h + height, padding_w:padding_w + width] = uv
+    padded_mask = torch.full((*mask.shape[:-2], height + 2 * padding_h, width + 2 * padding_w), False, dtype=torch.bool, device=device)
+    padded_mask[..., padding_h:padding_h + height, padding_w:padding_w + width] = mask
+    padded_indices = torch.full((height + 2 * padding_h, width + 2 * padding_w), 0, dtype=torch.long, device=device)
+    padded_indices[padding_h:padding_h + height, padding_w:padding_w + width] = indices
+    windowed_uv = utils3d.torch.sliding_window_2d(padded_uv, (filter_h_i, filter_w_i), 1, dim=(0, 1))
+    windowed_mask = utils3d.torch.sliding_window_2d(padded_mask, (filter_h_i, filter_w_i), 1, dim=(-2, -1))
+    windowed_indices = utils3d.torch.sliding_window_2d(padded_indices, (filter_h_i, filter_w_i), 1, dim=(0, 1))
+    # Gather the target pixels's local window
+    target_uv = utils3d.torch.image_uv(width=target_width, height=target_height, dtype=torch.float32, device=device) * torch.tensor([width, height], dtype=torch.float32, device=device)
+    target_corner = target_uv - torch.tensor((filter_w_f / 2, filter_h_f / 2), dtype=torch.float32, device=device)
+    target_corner = torch.round(target_corner - 0.5).long() + torch.tensor((padding_w, padding_h), dtype=torch.long, device=device)
+    target_window_uv = windowed_uv[target_corner[..., 1], target_corner[..., 0], :, :, :].reshape(target_height, target_width, 2, filter_size)                          # (target_height, tgt_width, 2, filter_size)
+    target_window_mask = windowed_mask[..., target_corner[..., 1], target_corner[..., 0], :, :].reshape(*mask.shape[:-2], target_height, target_width, filter_size)     # (..., target_height, tgt_width, filter_size)
+    target_window_indices = windowed_indices[target_corner[..., 1], target_corner[..., 0], :, :].reshape(target_height, target_width, filter_size)                      # (target_height, tgt_width, filter_size)
+    target_window_indices = target_window_indices.expand_as(target_window_mask)
+    # Compute nearest neighbor in the local window for each pixel
+    dist = torch.where(target_window_mask, torch.norm(target_window_uv - target_uv[..., None], dim=-2), torch.inf)  # (..., target_height, tgt_width, filter_size)
+    nearest = torch.argmin(dist, dim=-1, keepdim=True)                                                              # (..., target_height, tgt_width, 1)
+    nearest_idx = torch.gather(target_window_indices, index=nearest, dim=-1).squeeze(-1)                            # (..., target_height, tgt_width)
+    target_mask = torch.any(target_window_mask, dim=-1)
+    nearest_i, nearest_j = nearest_idx // width, nearest_idx % width
+    batch_indices = [torch.arange(n, device=device).reshape([1] * i + [n] + [1] * (mask.dim() - i - 1)) for i, n in enumerate(mask.shape[:-2])]
+    return (*batch_indices, nearest_i, nearest_j), target_mask

moge/utils/io.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import os
+os.environ['OPENCV_IO_ENABLE_OPENEXR'] = '1'
+from typing import IO
+import zipfile
+import json
+import io
+from typing import *
+from pathlib import Path
+import re
+import numpy as np
+import cv2
+from .tools import timeit
+LEGACY_SEGFORMER_CLASSES = [
+    'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ',
+    'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth',
+    'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car',
+    'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug',
+    'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe',
+    'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column',
+    'signboard', 'chest of drawers', 'counter', 'sand', 'sink',
+    'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path',
+    'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door',
+    'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table',
+    'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove',
+    'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar',
+    'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower',
+    'chandelier', 'awning', 'streetlight', 'booth', 'television receiver',
+    'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister',
+    'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van',
+    'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything',
+    'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent',
+    'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank',
+    'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake',
+    'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce',
+    'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen',
+    'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass',
+    'clock', 'flag'
+]
+LEGACY_SEGFORMER_LABELS = {k: i for i, k in enumerate(LEGACY_SEGFORMER_CLASSES)}
+def write_rgbd_zip(
+    file: Union[IO, os.PathLike],
+    image: Union[np.ndarray, bytes],
+    depth: Union[np.ndarray, bytes], mask: Union[np.ndarray, bytes],
+    segmentation_mask: Union[np.ndarray, bytes] = None, segmentation_labels: Union[Dict[str, int], bytes] = None,
+    intrinsics: np.ndarray = None,
+    normal: np.ndarray = None, normal_mask: np.ndarray = None,
+    meta: Union[Dict[str, Any], bytes] = None,
+    *, image_quality: int = 95, depth_type: Literal['linear', 'log', 'disparity'] = 'linear', depth_format: Literal['png', 'exr'] = 'png', depth_max_dynamic_range: float = 1e4, png_compression: int = 7
+):
+    """
+    Write RGBD data as zip archive containing the image, depth, mask, segmentation_mask, and meta data.
+    In the zip file there will be:
+    - `meta.json`: The meta data as a JSON file.
+    - `image.jpg`: The RGB image as a JPEG file.
+    - `depth.png/exr`: The depth map as a PNG or EXR file, depending on the `depth_type`.
+    - `mask.png` (optional): The mask as a uint8 PNG file.
+    - `segmentation_mask.png` (optional): The segformer mask as a uint8/uint16 PNG file.
+    You can provided those data as np.ndarray or bytes. If you provide them as np.ndarray, they will be properly processed and encoded.
+    If you provide them as bytes, they will be written as is, assuming they are already encoded.
+    """
+    if meta is None:
+        meta = {}
+    elif isinstance(meta, bytes):
+        meta = json.loads(meta.decode())
+    if isinstance(image, bytes):
+        image_bytes = image
+    elif isinstance(image, np.ndarray):
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        image_bytes = cv2.imencode('.jpg', image, [cv2.IMWRITE_JPEG_QUALITY, image_quality])[1].tobytes()
+    if isinstance(depth, bytes):
+        depth_bytes = depth
+    elif isinstance(depth, np.ndarray):
+        meta['depth_type'] = depth_type
+        if depth_type == 'linear':
+            if depth.dtype == np.float16:
+                depth_format = 'exr'
+                depth_bytes = cv2.imencode('.exr', depth.astype(np.float32), [cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF])[1].tobytes()
+            elif np.issubdtype(depth.dtype, np.floating):
+                depth_format = 'exr'
+                depth_bytes = cv2.imencode('.exr', depth.astype(np.float32), [cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_FLOAT])[1].tobytes()
+            elif depth.dtype in [np.uint8, np.uint16]:
+                depth_format = 'png'
+                depth_bytes = cv2.imencode('.png', depth, [cv2.IMWRITE_PNG_COMPRESSION, png_compression])[1].tobytes()
+        elif depth_type == 'log':
+            depth_format = 'png'
+            depth = depth.astype(np.float32)
+            near = max(depth[mask].min(), 1e-3)
+            far = min(depth[mask].max(), near * depth_max_dynamic_range)
+            depth = ((np.log(depth.clip(near, far) / near) / np.log(far / near)).clip(0, 1) * 65535).astype(np.uint16)
+            depth_bytes = cv2.imencode('.png', depth, [cv2.IMWRITE_PNG_COMPRESSION, png_compression])[1].tobytes()
+            meta['depth_near'] = float(near)
+            meta['depth_far'] = float(far)
+        elif depth_type == 'disparity':
+            depth_format = 'png'
+            depth = depth.astype(np.float32)
+            depth = 1 / (depth + 1e-12)
+            depth = (depth / depth[mask].max()).clip(0, 1)
+            if np.unique(depth) < 200:
+                depth = (depth * 255).astype(np.uint8)
+            else:
+                depth = (depth * 65535).astype(np.uint16)
+            depth_bytes = cv2.imencode('.png', depth, [cv2.IMWRITE_PNG_COMPRESSION, png_compression])[1].tobytes()
+    if isinstance(mask, bytes):
+        mask_bytes = mask
+    elif isinstance(mask, np.ndarray):
+        mask_bytes = cv2.imencode('.png', mask.astype(np.uint8) * 255)[1].tobytes()
+    if segmentation_mask is not None:
+        if isinstance(segmentation_mask, bytes):
+            segmentation_mask_bytes = segmentation_mask
+        else:
+            segmentation_mask_bytes = cv2.imencode('.png', segmentation_mask)[1].tobytes()
+        assert segmentation_labels is not None, "You provided a segmentation mask, but not the corresponding labels."
+        if isinstance(segmentation_labels, bytes):
+            segmentation_labels = json.loads(segmentation_labels)
+        meta['segmentation_labels'] = segmentation_labels
+    if intrinsics is not None:
+        meta['intrinsics'] = intrinsics.tolist()
+    if normal is not None:
+        if isinstance(normal, bytes):
+            normal_bytes = normal
+        elif isinstance(normal, np.ndarray):
+            normal = ((normal * [0.5, -0.5, -0.5] + 0.5).clip(0, 1) * 65535).astype(np.uint16)
+            normal = cv2.cvtColor(normal, cv2.COLOR_RGB2BGR)
+            normal_bytes = cv2.imencode('.png', normal, [cv2.IMWRITE_PNG_COMPRESSION, png_compression])[1].tobytes()
+        if normal_mask is None:
+            normal_mask = np.ones(image.shape[:2], dtype=bool)
+        normal_mask_bytes = cv2.imencode('.png', normal_mask.astype(np.uint8) * 255)[1].tobytes()
+    meta_bytes = meta if isinstance(meta, bytes) else json.dumps(meta).encode()
+    with zipfile.ZipFile(file, 'w') as z:
+        z.writestr('meta.json', meta_bytes)
+        z.writestr('image.jpg', image_bytes)
+        z.writestr(f'depth.{depth_format}', depth_bytes)
+        z.writestr('mask.png', mask_bytes)
+        if segmentation_mask is not None:
+            z.writestr('segmentation_mask.png', segmentation_mask_bytes)
+        if normal is not None:
+            z.writestr('normal.png', normal_bytes)
+            z.writestr('normal_mask.png', normal_mask_bytes)
+def read_rgbd_zip(file: Union[str, Path, IO], return_bytes: bool = False) -> Dict[str, Union[np.ndarray, Dict[str, Any], bytes]]:
+    """
+    Read an RGBD zip file and return the image, depth, mask, segmentation_mask, intrinsics, and meta data.
+    ### Parameters:
+    - `file: Union[str, Path, IO]`
+        The file path or file object to read from.
+    - `return_bytes: bool = False`
+        If True, return the image, depth, mask, and segmentation_mask as raw bytes.
+    ### Returns:
+    - `Tuple[Dict[str, Union[np.ndarray, Dict[str, Any]]], Dict[str, bytes]]`
+        A dictionary containing: (If missing, the value will be None; if return_bytes is True, the value will be bytes)
+        - `image`: RGB numpy.ndarray of shape (H, W, 3).
+        - `depth`: float32 numpy.ndarray of shape (H, W).
+        - `mask`: bool numpy.ndarray of shape (H, W).
+        - `segformer_mask`: uint8 numpy.ndarray of shape (H, W).
+        - `intrinsics`: float32 numpy.ndarray of shape (3, 3).
+        - `meta`: Dict[str, Any].
+    """
+    # Load & extract archive
+    with zipfile.ZipFile(file, 'r') as z:
+        meta = z.read('meta.json')
+        if not return_bytes:
+            meta = json.loads(z.read('meta.json'))
+        image = z.read('image.jpg')
+        if not return_bytes:
+            image = cv2.imdecode(np.frombuffer(z.read('image.jpg'), np.uint8), cv2.IMREAD_COLOR)
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        depth_name = next(s for s in z.namelist() if s.startswith('depth'))
+        depth = z.read(depth_name)
+        if not return_bytes:
+            depth = cv2.imdecode(np.frombuffer(z.read(depth_name), np.uint8), cv2.IMREAD_UNCHANGED)
+        if 'mask.png' in z.namelist():
+            mask = z.read('mask.png')
+            if not return_bytes:
+                mask = cv2.imdecode(np.frombuffer(z.read('mask.png'), np.uint8), cv2.IMREAD_UNCHANGED) > 0
+        else:
+            mask = None
+        if 'segformer_mask.png' in z.namelist():
+            # NOTE: Legacy support for segformer_mask.png
+            segmentation_mask = z.read('segformer_mask.png')
+            segmentation_labels = None
+            if not return_bytes:
+                segmentation_mask = cv2.imdecode(np.frombuffer(segmentation_mask, np.uint8), cv2.IMREAD_UNCHANGED)
+                segmentation_labels = LEGACY_SEGFORMER_LABELS
+        elif 'segmentation_mask.png' in z.namelist():
+            segmentation_mask = z.read('segmentation_mask.png')
+            segmentation_labels = None
+            if not return_bytes:
+                segmentation_mask = cv2.imdecode(np.frombuffer(segmentation_mask, np.uint8), cv2.IMREAD_UNCHANGED)
+                segmentation_labels = meta['segmentation_labels']
+        else:
+            segmentation_mask = None
+            segmentation_labels = None
+        if 'normal.png' in z.namelist():
+            normal = z.read('normal.png')
+            if not return_bytes:
+                normal = cv2.imdecode(np.frombuffer(z.read('normal.png'), np.uint8), cv2.IMREAD_UNCHANGED)
+                normal = cv2.cvtColor(normal, cv2.COLOR_BGR2RGB)
+                normal = (normal.astype(np.float32) / 65535 - 0.5) * [2.0, -2.0, -2.0]
+                normal = normal / np.linalg.norm(normal, axis=-1, keepdims=True)
+            if 'normal_mask.png' in z.namelist():
+                normal_mask = z.read('normal_mask.png')
+                normal_mask = cv2.imdecode(np.frombuffer(normal_mask, np.uint8), cv2.IMREAD_UNCHANGED) > 0
+            else:
+                normal_mask = np.ones(image.shape[:2], dtype=bool)
+        else:
+            normal, normal_mask = None, None
+    # recover linear depth
+    if not return_bytes:
+        if mask is None:
+            mask = np.ones(image.shape[:2], dtype=bool)
+        if meta['depth_type'] == 'linear':
+            depth = depth.astype(np.float32)
+            mask = mask & (depth > 0)
+        elif meta['depth_type'] == 'log':
+            near, far = meta['depth_near'], meta['depth_far']
+            if depth.dtype == np.uint16:
+                depth = depth.astype(np.float32) / 65535
+            elif depth.dtype == np.uint8:
+                depth = depth.astype(np.float32) / 255
+            depth = near ** (1 - depth) * far ** depth
+            mask = mask & ~np.isnan(depth)
+        elif meta['depth_type'] == 'disparity':
+            mask = mask & (depth > 0)
+            if depth.dtype == np.uint16:
+                depth = depth.astype(np.float32) / 65535
+            elif depth.dtype == np.uint8:
+                depth = depth.astype(np.float32) / 255
+            depth = 1 / (depth + 1e-12)
+    # intrinsics
+    if not return_bytes and 'intrinsics' in meta:
+        intrinsics = np.array(meta['intrinsics'], dtype=np.float32)
+    else:
+        intrinsics = None
+    # depth unit
+    if not return_bytes and 'depth_unit' in meta:
+        depth_unit_str = meta['depth_unit']
+        if r := re.match(r'([\d.]*)(\w*)', depth_unit_str):
+            digits, unit = r.groups()
+            depth_unit = float(digits or 1) * {'m': 1, 'cm': 0.01, 'mm': 0.001}[unit]
+        else:
+            depth_unit = None
+    else:
+        depth_unit = None
+    return_dict = {
+        'image': image,
+        'depth': depth,
+        'mask': mask,
+        'segmentation_mask': segmentation_mask,
+        'segmentation_labels': segmentation_labels,
+        'normal': normal,
+        'normal_mask': normal_mask,
+        'intrinsics': intrinsics,
+        'depth_unit': depth_unit,
+        'meta': meta,
+    }
+    return_dict = {k: v for k, v in return_dict.items() if v is not None}
+    return return_dict
+def write_rgbxyz(file: Union[IO, Path], image: np.ndarray, points: np.ndarray, mask: np.ndarray = None, image_quality: int = 95):
+    if isinstance(image, bytes):
+        image_bytes = image
+    elif isinstance(image, np.ndarray):
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        image_bytes = cv2.imencode('.jpg', image, [cv2.IMWRITE_JPEG_QUALITY, image_quality])[1].tobytes()
+    if isinstance(points, bytes):
+        points_bytes = points
+    elif isinstance(points, np.ndarray):
+        points_bytes = cv2.imencode('.exr', points.astype(np.float32), [cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_FLOAT])[1].tobytes()
+    if mask is None:
+        mask = np.ones(image.shape[:2], dtype=bool)
+    if isinstance(mask, bytes):
+        mask_bytes = mask
+    elif isinstance(mask, np.ndarray):
+        mask_bytes = cv2.imencode('.png', mask.astype(np.uint8) * 255)[1].tobytes()
+    is_archive = hasattr(file, 'write') or Path(file).suffix == '.zip'
+    if is_archive:
+        with zipfile.ZipFile(file, 'w') as z:
+            z.writestr('image.jpg', image_bytes)
+            z.writestr('points.exr', points_bytes)
+            if mask is not None:
+                z.writestr('mask.png', mask_bytes)
+    else:
+        file = Path(file)
+        file.mkdir(parents=True, exist_ok=True)
+        with open(file / 'image.jpg', 'wb') as f:
+            f.write(image_bytes)
+        with open(file / 'points.exr', 'wb') as f:
+            f.write(points_bytes)
+        if mask is not None:
+            with open(file / 'mask.png', 'wb') as f:
+                f.write(mask_bytes)
+def read_rgbxyz(file: Union[IO, str, Path]) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
+    is_archive = hasattr(file, 'read') or Path(file).suffix == '.zip'
+    if is_archive:
+        with zipfile.ZipFile(file, 'r') as z:
+            image = cv2.imdecode(np.frombuffer(z.read('image.jpg'), np.uint8), cv2.IMREAD_COLOR)
+            points = cv2.imdecode(np.frombuffer(z.read('points.exr'), np.uint8), cv2.IMREAD_UNCHANGED)
+            if 'mask.png' in z.namelist():
+                mask = cv2.imdecode(np.frombuffer(z.read('mask.png'), np.uint8), cv2.IMREAD_UNCHANGED) > 0
+            else:
+                mask = np.ones(image.shape[:2], dtype=bool)
+    else:
+        file = Path(file)
+        file.mkdir(parents=True, exist_ok=True)
+        image = cv2.imread(str(file / 'image.jpg'), cv2.IMREAD_COLOR)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        points = cv2.imread(str(file / 'points.exr'), cv2.IMREAD_UNCHANGED)
+        if (file /'mask.png').exists():
+            mask = cv2.imread(str(file / 'mask.png'), cv2.IMREAD_UNCHANGED) > 0
+        else:
+            mask = np.ones(image.shape[:2], dtype=bool)
+    return image, points, mask

moge/utils/pipeline.py ADDED Viewed

	@@ -0,0 +1,503 @@

+from typing import *
+from abc import abstractmethod
+from queue import Empty, Full
+from threading import Thread
+from queue import Queue
+from multiprocessing import Process
+from threading import Thread, Event
+import multiprocessing
+import threading
+import inspect
+import time
+import uuid
+from copy import deepcopy
+import itertools
+import functools
+__all__ = [
+    'Node',
+    'Link',
+    'ConcurrentNode',
+    'Worker',
+    'WorkerFunction',
+    'Provider',
+    'ProviderFunction',
+    'Sequential',
+    'Batch',
+    'Unbatch',
+    'Parallel',
+    'Graph',
+    'Buffer',
+]
+TERMINATE_CHECK_INTERVAL = 0.5
+class _ItemWrapper:
+    def __init__(self, data: Any, id: Union[int, List[int]] = None):
+        self.data = data
+        self.id = id
+class Terminate(Exception):
+    pass
+def _get_queue_item(queue: Queue, terminate_flag: Event, timeout: float = None) -> _ItemWrapper:
+    while True:
+        try:
+            item: _ItemWrapper = queue.get(block=True, timeout=TERMINATE_CHECK_INTERVAL if timeout is None else min(timeout, TERMINATE_CHECK_INTERVAL))
+            if terminate_flag.is_set():
+                raise Terminate()
+            return item
+        except Empty:
+            if terminate_flag.is_set():
+                raise Terminate()
+        if timeout is not None:
+            timeout -= TERMINATE_CHECK_INTERVAL
+            if timeout <= 0:
+                raise Empty()
+def _put_queue_item(queue: Queue, item: _ItemWrapper, terminate_flag: Event):
+    while True:
+        try:
+            queue.put(item, block=True, timeout=TERMINATE_CHECK_INTERVAL)
+            if terminate_flag.is_set():
+                raise Terminate()
+            return
+        except Full:
+            if terminate_flag.is_set():
+                raise Terminate()
+class Node:
+    def __init__(self, in_buffer_size: int = 1, out_buffer_size: int = 1) -> None:
+        self.input: Queue = Queue(maxsize=in_buffer_size)
+        self.output: Queue = Queue(maxsize=out_buffer_size)
+        self.in_buffer_size = in_buffer_size
+        self.out_buffer_size = out_buffer_size
+    @abstractmethod
+    def start(self):
+        pass
+    @abstractmethod
+    def terminate(self):
+        pass
+    def stop(self):
+        self.terminate()
+        self.join()
+    @abstractmethod
+    def join(self):
+        pass
+    def put(self, data: Any, key: str = None, block: bool = True) -> None:
+        item = _ItemWrapper(data)
+        self.input.put(item, block=block)
+    def get(self, key: str = None, block: bool = True) -> Any:
+        item: _ItemWrapper = self.output.get(block=block)
+        return item.data
+    def __enter__(self):
+        self.start()
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.terminate()
+        self.join()
+class ConcurrentNode(Node):
+    job: Union[Thread, Process]
+    def __init__(self, running_as: Literal['thread', 'process'] = 'thread', in_buffer_size: int = 1, out_buffer_size: int = 1) -> None:
+        super().__init__(in_buffer_size, out_buffer_size)
+        self.running_as = running_as
+    @abstractmethod
+    def _loop_fn(self, input: Queue, output: Queue, terminate_flag: Event):
+        pass
+    def start(self):
+        if self.running_as == 'thread':
+            terminate_flag = threading.Event()
+            job = Thread(target=self._loop_fn, args=(self.input, self.output, terminate_flag))
+        elif self.running_as == 'process':
+            terminate_flag = multiprocessing.Event()
+            job = Process(target=self._loop_fn, args=(self.input, self.output, terminate_flag))
+        job.start()
+        self.job = job
+        self.terminate_flag = terminate_flag
+    def terminate(self):
+        self.terminate_flag.set()
+    def join(self):
+        self.job.join()
+class Worker(ConcurrentNode):
+    def __init__(self, running_as: Literal['thread', 'process'] = 'thread', in_buffer_size: int = 0, out_buffer_size: int = 0) -> None:
+        super().__init__(running_as, in_buffer_size, out_buffer_size)
+    def init(self) -> None:
+        """
+        This method is called the the thread is started, to initialize any resources that is only held in the thread.
+        """
+        pass
+    @abstractmethod
+    def work(self, *args, **kwargs) -> Union[Any, Dict[str, Any]]:
+        """
+        This method defines the job that the node should do for each input item.
+        A item obtained from the input queue is passed as arguments to this method, and the result is placed in the output queue.
+        The method is executed concurrently with other nodes.
+        """
+        pass
+    def _loop_fn(self, input: Queue, output: Queue, terminate_flag: Event):
+        self.init()
+        try:
+            while True:
+                item = _get_queue_item(input, terminate_flag)
+                result = self.work(item.data)
+                _put_queue_item(output, _ItemWrapper(result, item.id), terminate_flag)
+        except Terminate:
+            return
+class Provider(ConcurrentNode):
+    """
+    A node that provides data to successive nodes. It takes no input and provides data to the output queue.
+    """
+    def __init__(self, running_as: Literal['thread', 'process'], out_buffer_size: int = 1) -> None:
+        super().__init__(running_as, 0, out_buffer_size)
+    def init(self) -> None:
+        """
+        This method is called the the thread or process is started, to initialize any resources that is only held in the thread or process.
+        """
+        pass
+    @abstractmethod
+    def provide(self) -> Generator[Any, None, None]:
+        pass
+    def _loop_fn(self, input: Queue, output: Queue, terminate_flag: Event):
+        self.init()
+        try:
+            for data in self.provide():
+                _put_queue_item(output, _ItemWrapper(data), terminate_flag)
+        except Terminate:
+            return
+class WorkerFunction(Worker):
+    def __init__(self, fn: Callable, running_as: 'thread', in_buffer_size: int = 1, out_buffer_size: int = 1) -> None:
+        super().__init__(running_as, in_buffer_size, out_buffer_size)
+        self.fn = fn
+    def work(self, *args, **kwargs):
+        return self.fn(*args, **kwargs)
+class ProviderFunction(Provider):
+    def __init__(self, fn: Callable, running_as: 'thread', out_buffer_size: int = 1) -> None:
+        super().__init__(running_as, out_buffer_size)
+        self.fn = fn
+    def provide(self):
+        for item in self.fn():
+            yield item
+class Link:
+    def __init__(self, src: Queue, dst: Queue):
+        self.src = src
+        self.dst = dst
+    def _thread_fn(self):
+        try:
+            while True:
+                item = _get_queue_item(self.src, self.terminate_flag)
+                _put_queue_item(self.dst, item, self.terminate_flag)
+        except Terminate:
+            return
+    def start(self):
+        self.terminate_flag = threading.Event()
+        self.thread = Thread(target=self._thread_fn)
+        self.thread.start()
+    def terminate(self):
+        self.terminate_flag.set()
+    def join(self):
+        self.thread.join()
+class Graph(Node):
+    """
+    Graph pipeline of nodes and links
+    """
+    nodes: List[Node]
+    links: List[Link]
+    def __init__(self, in_buffer_size: int = 1, out_buffer_size: int = 1):
+        super().__init__(in_buffer_size, out_buffer_size)
+        self.nodes = []
+        self.links = []
+    def add(self, node: Node):
+        self.nodes.append(node)
+    def link(self, src: Union[Node, Tuple[Node, str]], dst: Union[Node, Tuple[Node, str]]):
+        """
+        Links the output of the source node to the input of the destination node.
+        If the source or destination node is None, the pipeline's input or output is used.
+        """
+        src_queue = self.input if src is None else src.output
+        dst_queue = self.output if dst is None else dst.input
+        self.links.append(Link(src_queue, dst_queue))
+    def chain(self, nodes: Iterable[Node]):
+        """
+        Link the output of each node to the input of the next node.
+        """
+        nodes = list(nodes)
+        for i in range(len(nodes) - 1):
+            self.link(nodes[i], nodes[i + 1])
+    def start(self):
+        for node in self.nodes:
+            node.start()
+        for link in self.links:
+            link.start()
+    def terminate(self):
+        for node in self.nodes:
+            node.terminate()
+        for link in self.links:
+            link.terminate()
+    def join(self):
+        for node in self.nodes:
+            node.join()
+        for link in self.links:
+            link.join()
+    def __iter__(self):
+        providers = [node for node in self.nodes if isinstance(node, Provider)]
+        if len(providers) == 0:
+            raise ValueError("No provider node found in the pipeline. If you want to iterate over the pipeline, the pipeline must be driven by a provider node.")
+        with self:
+            # while all(provider.job.is_alive() for provider in providers):
+            while True:
+                yield self.get()
+    def __call__(self, data: Any) -> Any:
+        """
+        Submit data to the pipeline's input queue, and return the output data asynchronously.
+        NOTE: The pipeline must be streamed (i.e., every output item is uniquely associated with an input item) for this to work.
+        """
+        # TODO
+class Sequential(Graph):
+    """
+    Pipeline of nodes in sequential order, where each node takes the output of the previous node as input.
+    The order of input and output items is preserved (FIFO)
+    """
+    def __init__(self, nodes: List[Union[Node, Callable]], function_running_as: Literal['thread', 'process'] = 'thread', in_buffer_size: int = 1, out_buffer_size: int = 1):
+        """
+        Initialize the pipeline with a list of nodes to execute sequentially.
+        ### Parameters:
+        - nodes: List of nodes or functions to execute sequentially. Generator functions are wrapped in provider nodes, and other functions are wrapped in worker nodes.
+        - function_running_as: Whether to wrap the function as a thread or process worker. Default is 'thread'.
+        - in_buffer_size: Maximum size of the input queue of the pipeline. Default is 0 (unlimited).
+        - out_buffer_size: Maximum size of the output queue of the pipeline. Default is 0 (unlimited).
+        """
+        super().__init__(in_buffer_size, out_buffer_size)
+        for node in nodes:
+            if isinstance(node, Node):
+                pass
+            elif isinstance(node, Callable):
+                if inspect.isgeneratorfunction(node):
+                    node = ProviderFunction(node, function_running_as)
+                else:
+                    node = WorkerFunction(node, function_running_as)
+            else:
+                raise ValueError(f"Invalid node type: {type(node)}")
+            self.add(node)
+        self.chain([None, *self.nodes, None])
+class Parallel(Node):
+    """
+    A FIFO node that runs multiple nodes in parallel to process the input items. Each input item is handed to one of the nodes whoever is available.
+    NOTE: It is FIFO if and only if all the nested nodes are FIFO.
+    """
+    nodes: List[Node]
+    def __init__(self, nodes: Iterable[Node], in_buffer_size: int = 1, out_buffer_size: int = 1, function_running_as: Literal['thread', 'process'] = 'thread'):
+        super().__init__(in_buffer_size, out_buffer_size)
+        self.nodes = []
+        for node in nodes:
+            if isinstance(node, Node):
+                pass
+            elif isinstance(node, Callable):
+                if inspect.isgeneratorfunction(node):
+                    node = ProviderFunction(node, function_running_as)
+                else:
+                    node = WorkerFunction(node, function_running_as)
+            else:
+                raise ValueError(f"Invalid node type: {type(node)}")
+            self.nodes.append(node)
+        self.output_order = Queue()
+        self.lock = threading.Lock()
+    def _in_thread_fn(self, node: Node):
+        try:
+            while True:
+                with self.lock:
+                    # A better idea: first make sure its node is vacant, then get it a new item.
+                    # Currently we will not be able to know which node is busy util there is at least one item already waiting in the queue of the node.
+                    # This could lead to suboptimal scheduling.
+                    item = _get_queue_item(self.input, self.terminate_flag)
+                    self.output_order.put(node.output)
+                _put_queue_item(node.input, item, self.terminate_flag)
+        except Terminate:
+            return
+    def _out_thread_fn(self):
+        try:
+            while True:
+                queue = _get_queue_item(self.output_order, self.terminate_flag)
+                item = _get_queue_item(queue, self.terminate_flag)
+                _put_queue_item(self.output, item, self.terminate_flag)
+        except Terminate:
+            return
+    def start(self):
+        self.terminate_flag = threading.Event()
+        self.in_threads = []
+        for node in self.nodes:
+            thread = Thread(target=self._in_thread_fn, args=(node,))
+            thread.start()
+            self.in_threads.append(thread)
+        thread = Thread(target=self._out_thread_fn)
+        thread.start()
+        self.out_thread = thread
+        for node in self.nodes:
+            node.start()
+    def terminate(self):
+        self.terminate_flag.set()
+        for node in self.nodes:
+            node.terminate()
+    def join(self):
+        for thread in self.in_threads:
+            thread.join()
+        self.out_thread.join()
+class UnorderedParallel(Graph):
+    """
+    Pipeline of nodes in parallel, where each input item is handed to one of the nodes whoever is available.
+    NOTE: The order of the output items is NOT guaranteed to be the same as the input items, depending on how fast the nodes handle their input.
+    """
+    def __init__(self, nodes: List[Union[Node, Callable]], function_running_as: Literal['thread', 'process'] = 'thread', in_buffer_size: int = 1, out_buffer_size: int = 1):
+        """
+        Initialize the pipeline with a list of nodes to execute in parallel. If a function is given, it is wrapped in a worker node.
+        ### Parameters:
+        - nodes: List of nodes or functions to execute in parallel. Generator functions are wrapped in provider nodes, and other functions are wrapped in worker nodes.
+        - function_running_as: Whether to wrap the function as a thread or process worker. Default is 'thread'.
+        - in_buffer_size: Maximum size of the input queue of the pipeline. Default is 0 (unlimited).
+        - out_buffer_size: Maximum size of the output queue of the pipeline. Default is 0 (unlimited).
+        """
+        super().__init__(in_buffer_size, out_buffer_size)
+        for node in nodes:
+            if isinstance(node, Node):
+                pass
+            elif isinstance(node, Callable):
+                if inspect.isgeneratorfunction(node):
+                    node = ProviderFunction(node, function_running_as)
+                else:
+                    node = WorkerFunction(node, function_running_as)
+            else:
+                raise ValueError(f"Invalid node type: {type(node)}")
+            self.add(node)
+        for i in range(len(nodes)):
+            self.chain([None, self.nodes[i], None])
+class Batch(ConcurrentNode):
+    """
+    Groups every `batch_size` items into a batch (a list of items) and passes the batch to successive nodes.
+    The `patience` parameter specifies the maximum time to wait for a batch to be filled before sending it to the next node,
+    i.e., when the earliest item in the batch is out of `patience` seconds, the batch is sent regardless of its size.
+    """
+    def __init__(self, batch_size: int, patience: float = None, in_buffer_size: int = 1, out_buffer_size: int = 1):
+        assert batch_size > 0, "Batch size must be greater than 0."
+        super().__init__('thread', in_buffer_size, out_buffer_size)
+        self.batch_size = batch_size
+        self.patience = patience
+    def _loop_fn(self, input: Queue, output: Queue, terminate_flag: Event):
+        try:
+            while True:
+                batch_id, batch_data = [], []
+                # Try to fill the batch
+                for i in range(self.batch_size):
+                    if i == 0 or self.patience is None:
+                        timeout = None
+                    else:
+                        timeout = self.patience - (time.time() - earliest_time)
+                        if timeout < 0:
+                            break
+                    try:
+                        item = _get_queue_item(input, terminate_flag, timeout)
+                    except Empty:
+                        break
+                    if i == 0:
+                        earliest_time = time.time()
+                    batch_data.append(item.data)
+                    batch_id.append(item.id)
+                batch = _ItemWrapper(batch_data, batch_id)
+                _put_queue_item(output, batch, terminate_flag)
+        except Terminate:
+            return
+class Unbatch(ConcurrentNode):
+    """
+    Ungroups every batch (a list of items) into individual items and passes them to successive nodes.
+    """
+    def __init__(self, in_buffer_size: int = 1, out_buffer_size: int = 1):
+        super().__init__('thread', in_buffer_size, out_buffer_size)
+    def _loop_fn(self, input: Queue, output: Queue, terminate_flag: Event):
+        try:
+            while True:
+                batch = _get_queue_item(input, terminate_flag)
+                for id, data in zip(batch.id or itertools.repeat(None), batch.data):
+                    item = _ItemWrapper(data, id)
+                    _put_queue_item(output, item, terminate_flag)
+        except Terminate:
+            return
+class Buffer(Node):
+    "A FIFO node that buffers items in a queue. Usefull achieve better temporal balance when its successor node has a variable processing time."
+    def __init__(self, size: int):
+        super().__init__(size, size)
+        self.size = size
+        self.input = self.output = Queue(maxsize=size)

moge/utils/tools.py ADDED Viewed

	@@ -0,0 +1,240 @@

+from typing import *
+import time
+from pathlib import Path
+from numbers import Number
+def catch_exception(fn):
+    def wrapper(*args, **kwargs):
+        try:
+            return fn(*args, **kwargs)
+        except Exception as e:
+            import traceback
+            print(f"Exception in {fn.__name__}({', '.join(repr(arg) for arg in args)}, {', '.join(f'{k}={v!r}' for k, v in kwargs.items())})")
+            traceback.print_exc(chain=False)
+            time.sleep(0.1)
+            return None
+    return wrapper
+class CallbackOnException:
+    def __init__(self, callback: Callable, exception: type):
+        self.exception = exception
+        self.callback = callback
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if isinstance(exc_val, self.exception):
+            self.callback()
+            return True
+        return False
+def traverse_nested_dict_keys(d: Dict[str, Dict]) -> Generator[Tuple[str, ...], None, None]:
+    for k, v in d.items():
+        if isinstance(v, dict):
+            for sub_key in traverse_nested_dict_keys(v):
+                yield (k, ) + sub_key
+        else:
+            yield (k, )
+def get_nested_dict(d: Dict[str, Dict], keys: Tuple[str, ...], default: Any = None):
+    for k in keys:
+        d = d.get(k, default)
+        if d is None:
+            break
+    return d
+def set_nested_dict(d: Dict[str, Dict], keys: Tuple[str, ...], value: Any):
+    for k in keys[:-1]:
+        d = d.setdefault(k, {})
+    d[keys[-1]] = value
+def key_average(list_of_dicts: list) -> Dict[str, Any]:
+    """
+    Returns a dictionary with the average value of each key in the input list of dictionaries.
+    """
+    _nested_dict_keys = set()
+    for d in list_of_dicts:
+        _nested_dict_keys.update(traverse_nested_dict_keys(d))
+    _nested_dict_keys = sorted(_nested_dict_keys)
+    result = {}
+    for k in _nested_dict_keys:
+        values = [
+            get_nested_dict(d, k) for d in list_of_dicts
+            if get_nested_dict(d, k) is not None
+        ]
+        avg = sum(values) / len(values) if values else float('nan')
+        set_nested_dict(result, k, avg)
+    return result
+def flatten_nested_dict(d: Dict[str, Any], parent_key: Tuple[str, ...] = None) -> Dict[Tuple[str, ...], Any]:
+    """
+    Flattens a nested dictionary into a single-level dictionary, with keys as tuples.
+    """
+    items = []
+    if parent_key is None:
+        parent_key = ()
+    for k, v in d.items():
+        new_key = parent_key + (k, )
+        if isinstance(v, MutableMapping):
+            items.extend(flatten_nested_dict(v, new_key).items())
+        else:
+            items.append((new_key, v))
+    return dict(items)
+def unflatten_nested_dict(d: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Unflattens a single-level dictionary into a nested dictionary, with keys as tuples.
+    """
+    result = {}
+    for k, v in d.items():
+        sub_dict = result
+        for k_ in k[:-1]:
+            if k_ not in sub_dict:
+                sub_dict[k_] = {}
+            sub_dict = sub_dict[k_]
+        sub_dict[k[-1]] = v
+    return result
+def read_jsonl(file):
+    import json
+    with open(file, 'r') as f:
+        data = f.readlines()
+    return [json.loads(line) for line in data]
+def write_jsonl(data: List[dict], file):
+    import json
+    with open(file, 'w') as f:
+        for item in data:
+            f.write(json.dumps(item) + '\n')
+def save_metrics(save_path: Union[str, Path], all_metrics: Dict[str, List[Dict]]):
+    import pandas as pd
+    import json
+    with open(save_path, 'w') as f:
+        json.dump(all_metrics, f, indent=4)
+def to_hierachical_dataframe(data: List[Dict[Tuple[str, ...], Any]]):
+    import pandas as pd
+    data = [flatten_nested_dict(d) for d in data]
+    df = pd.DataFrame(data)
+    df = df.sort_index(axis=1)
+    df.columns = pd.MultiIndex.from_tuples(df.columns)
+    return df
+def recursive_replace(d: Union[List, Dict, str], mapping: Dict[str, str]):
+    if isinstance(d, str):
+        for old, new in mapping.items():
+            d = d.replace(old, new)
+    elif isinstance(d, list):
+        for i, item in enumerate(d):
+            d[i] = recursive_replace(item, mapping)
+    elif isinstance(d, dict):
+        for k, v in d.items():
+            d[k] = recursive_replace(v, mapping)
+    return d
+class timeit:
+    _history: Dict[str, List['timeit']] = {}
+    def __init__(self, name: str = None, verbose: bool = True, multiple: bool = False):
+        self.name = name
+        self.verbose = verbose
+        self.start = None
+        self.end = None
+        self.multiple = multiple
+        if multiple and name not in timeit._history:
+            timeit._history[name] = []
+    def __call__(self, func: Callable):
+        import inspect
+        if inspect.iscoroutinefunction(func):
+            async def wrapper(*args, **kwargs):
+                with timeit(self.name or func.__qualname__):
+                    ret = await func(*args, **kwargs)
+                return ret
+            return wrapper
+        else:
+            def wrapper(*args, **kwargs):
+                with timeit(self.name or func.__qualname__):
+                    ret = func(*args, **kwargs)
+                return ret
+            return wrapper
+    def __enter__(self):
+        self.start = time.time()
+    @property
+    def time(self) -> float:
+        assert self.start is not None, "Time not yet started."
+        assert self.end is not None, "Time not yet ended."
+        return self.end - self.start
+    @property
+    def history(self) -> List['timeit']:
+        return timeit._history.get(self.name, [])
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.end = time.time()
+        if self.multiple:
+            timeit._history[self.name].append(self)
+        if self.verbose:
+            if self.multiple:
+                avg = sum(t.time for t in timeit._history[self.name]) / len(timeit._history[self.name])
+                print(f"{self.name or 'It'} took {avg} seconds in average.")
+            else:
+                print(f"{self.name or 'It'} took {self.time} seconds.")
+def strip_common_prefix_suffix(strings: List[str]) -> List[str]:
+    first = strings[0]
+    for start in range(len(first)):
+        if any(s[start] != strings[0][start] for s in strings):
+            break
+    for end in range(1, min(len(s) for s in strings)):
+        if any(s[-end] != first[-end] for s in strings):
+            break
+    return [s[start:len(s) - end + 1] for s in strings]
+def multithead_execute(inputs: List[Any], num_workers: int, pbar = None):
+    from concurrent.futures import ThreadPoolExecutor
+    from contextlib import nullcontext
+    from tqdm import tqdm
+    if pbar is not None:
+        pbar.total = len(inputs) if hasattr(inputs, '__len__') else None
+    else:
+        pbar = tqdm(total=len(inputs) if hasattr(inputs, '__len__') else None)
+    def decorator(fn: Callable):
+        with (
+            ThreadPoolExecutor(max_workers=num_workers) as executor,
+            pbar
+        ):
+            pbar.refresh()
+            @catch_exception
+            def _fn(input):
+                ret = fn(input)
+                pbar.update()
+                return ret
+            executor.map(_fn, inputs)
+            executor.shutdown(wait=True)
+    return decorator

moge/utils/vis.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import numpy as np
+import matplotlib
+def colorize_depth(depth: np.ndarray, mask: np.ndarray = None, normalize: bool = True, cmap: str = 'Spectral') -> np.ndarray:
+    if mask is None:
+        depth = np.where(depth > 0, depth, np.nan)
+    else:
+        depth = np.where((depth > 0) & mask, depth, np.nan)
+    disp = 1 / depth
+    if normalize:
+        min_disp, max_disp = np.nanquantile(disp, 0.001), np.nanquantile(disp, 0.999)
+        disp = (disp - min_disp) / (max_disp - min_disp)
+    colored = np.nan_to_num(matplotlib.colormaps[cmap](1.0 - disp), 0)
+    colored = (colored.clip(0, 1) * 255).astype(np.uint8)[:, :, :3]
+    return colored
+def colorize_depth_affine(depth: np.ndarray, mask: np.ndarray = None, cmap: str = 'Spectral') -> np.ndarray:
+    if mask is not None:
+        depth = np.where(mask, depth, np.nan)
+    min_depth, max_depth = np.nanquantile(depth, 0.001), np.nanquantile(depth, 0.999)
+    depth = (depth - min_depth) / (max_depth - min_depth)
+    colored = np.nan_to_num(matplotlib.colormaps[cmap](depth), 0)
+    colored = (colored.clip(0, 1) * 255).astype(np.uint8)[:, :, :3]
+    return colored
+def colorize_disparity(disparity: np.ndarray, mask: np.ndarray = None, normalize: bool = True, cmap: str = 'Spectral') -> np.ndarray:
+    if mask is not None:
+        disparity = np.where(mask, disparity, np.nan)
+    if normalize:
+        min_disp, max_disp = np.nanquantile(disparity, 0.001), np.nanquantile(disparity, 0.999)
+        disparity = (disparity - min_disp) / (max_disp - min_disp)
+    colored = np.nan_to_num(matplotlib.colormaps[cmap](1.0 - disparity), 0)
+    colored = (colored.clip(0, 1) * 255).astype(np.uint8)[:, :, :3]
+    return colored
+def colorize_segmentation(segmentation: np.ndarray, cmap: str = 'Set1') -> np.ndarray:
+    colored = matplotlib.colormaps[cmap]((segmentation % 20) / 20)
+    colored = (colored.clip(0, 1) * 255).astype(np.uint8)[:, :, :3]
+    return colored
+def colorize_normal(normal: np.ndarray) -> np.ndarray:
+    normal = normal * [0.5, -0.5, -0.5] + 0.5
+    normal = (normal.clip(0, 1) * 255).astype(np.uint8)
+    return normal

moge/utils/webfile.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import requests
+from typing import *
+__all__ = ["WebFile"]
+class WebFile:
+    def __init__(self, url: str, session: Optional[requests.Session] = None, headers: Optional[Dict[str, str]] = None, size: Optional[int] = None):
+        self.url = url
+        self.session = session or requests.Session()
+        self.session.headers.update(headers or {})
+        self._offset = 0
+        self.size = size if size is not None else self._fetch_size()
+    def _fetch_size(self):
+        with self.session.get(self.url, stream=True) as response:
+            response.raise_for_status()
+            content_length = response.headers.get("Content-Length")
+            if content_length is None:
+                raise ValueError("Missing Content-Length in header")
+            return int(content_length)
+    def _fetch_data(self, offset: int, n: int) -> bytes:
+        headers = {"Range": f"bytes={offset}-{min(offset + n - 1, self.size)}"}
+        response = self.session.get(self.url, headers=headers)
+        response.raise_for_status()
+        return response.content
+    def seekable(self) -> bool:
+        return True
+    def tell(self) -> int:
+        return self._offset
+    def available(self) -> int:
+        return self.size - self._offset
+    def seek(self, offset: int, whence: int = 0) -> None:
+        if whence == 0:
+            new_offset = offset
+        elif whence == 1:
+            new_offset = self._offset + offset
+        elif whence == 2:
+            new_offset = self.size + offset
+        else:
+            raise ValueError("Invalid value for whence")
+        self._offset = max(0, min(new_offset, self.size))
+    def read(self, n: Optional[int] = None) -> bytes:
+        if n is None or n < 0:
+            n = self.available()
+        else:
+            n = min(n, self.available())
+        if n == 0:
+            return b''
+        data = self._fetch_data(self._offset, n)
+        self._offset += len(data)
+        return data
+    def close(self) -> None:
+        pass
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass

moge/utils/webzipfile.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from typing import *
+import io
+import os
+from zipfile import (
+    ZipInfo, BadZipFile, ZipFile, ZipExtFile,
+    sizeFileHeader, structFileHeader, stringFileHeader,
+    _FH_SIGNATURE, _FH_FILENAME_LENGTH, _FH_EXTRA_FIELD_LENGTH, _FH_GENERAL_PURPOSE_FLAG_BITS,
+    _MASK_COMPRESSED_PATCH, _MASK_STRONG_ENCRYPTION, _MASK_UTF_FILENAME, _MASK_ENCRYPTED
+)
+import struct
+from requests import Session
+from .webfile import WebFile
+class _SharedWebFile(WebFile):
+    def __init__(self, webfile: WebFile, pos: int):
+        super().__init__(webfile.url, webfile.session, size=webfile.size)
+        self.seek(pos)
+class WebZipFile(ZipFile):
+    "Lock-free version of ZipFile that reads from a WebFile, allowing for concurrent reads."
+    def __init__(self, url: str, session: Optional[Session] = None, headers: Optional[Dict[str, str]] = None):
+        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
+        or append 'a'."""
+        webf = WebFile(url, session=session, headers=headers)
+        super().__init__(webf, mode='r')
+    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
+        """Return file-like object for 'name'.
+        name is a string for the file name within the ZIP file, or a ZipInfo
+        object.
+        mode should be 'r' to read a file already in the ZIP file, or 'w' to
+        write to a file newly added to the archive.
+        pwd is the password to decrypt files (only used for reading).
+        When writing, if the file size is not known in advance but may exceed
+        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
+        files.  If the size is known in advance, it is best to pass a ZipInfo
+        instance for name, with zinfo.file_size set.
+        """
+        if mode not in {"r", "w"}:
+            raise ValueError('open() requires mode "r" or "w"')
+        if pwd and (mode == "w"):
+            raise ValueError("pwd is only supported for reading files")
+        if not self.fp:
+            raise ValueError(
+                "Attempt to use ZIP archive that was already closed")
+        assert mode == "r", "Only read mode is supported for now"
+        # Make sure we have an info object
+        if isinstance(name, ZipInfo):
+            # 'name' is already an info object
+            zinfo = name
+        elif mode == 'w':
+            zinfo = ZipInfo(name)
+            zinfo.compress_type = self.compression
+            zinfo._compresslevel = self.compresslevel
+        else:
+            # Get info object for name
+            zinfo = self.getinfo(name)
+        if mode == 'w':
+            return self._open_to_write(zinfo, force_zip64=force_zip64)
+        if self._writing:
+            raise ValueError("Can't read from the ZIP file while there "
+                    "is an open writing handle on it. "
+                    "Close the writing handle before trying to read.")
+        # Open for reading:
+        self._fileRefCnt += 1
+        zef_file = _SharedWebFile(self.fp, zinfo.header_offset)
+        try:
+            # Skip the file header:
+            fheader = zef_file.read(sizeFileHeader)
+            if len(fheader) != sizeFileHeader:
+                raise BadZipFile("Truncated file header")
+            fheader = struct.unpack(structFileHeader, fheader)
+            if fheader[_FH_SIGNATURE] != stringFileHeader:
+                raise BadZipFile("Bad magic number for file header")
+            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
+            if fheader[_FH_EXTRA_FIELD_LENGTH]:
+                zef_file.seek(fheader[_FH_EXTRA_FIELD_LENGTH], whence=1)
+            if zinfo.flag_bits & _MASK_COMPRESSED_PATCH:
+                # Zip 2.7: compressed patched data
+                raise NotImplementedError("compressed patched data (flag bit 5)")
+            if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION:
+                # strong encryption
+                raise NotImplementedError("strong encryption (flag bit 6)")
+            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME:
+                # UTF-8 filename
+                fname_str = fname.decode("utf-8")
+            else:
+                fname_str = fname.decode(self.metadata_encoding or "cp437")
+            if fname_str != zinfo.orig_filename:
+                raise BadZipFile(
+                    'File name in directory %r and header %r differ.'
+                    % (zinfo.orig_filename, fname))
+            # check for encrypted flag & handle password
+            is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
+            if is_encrypted:
+                if not pwd:
+                    pwd = self.pwd
+                if pwd and not isinstance(pwd, bytes):
+                    raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
+                if not pwd:
+                    raise RuntimeError("File %r is encrypted, password "
+                                       "required for extraction" % name)
+            else:
+                pwd = None
+            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
+        except:
+            zef_file.close()
+            raise

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python3-opencv

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+opencv-python
+plyfile
+pygltflib
+transformers
+scikit-learn

utils3d/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+"""
+A package for common utility functions in 3D computer graphics and vision. Providing NumPy utilities in `utils3d.numpy`, PyTorch utilities in `utils3d.torch`, and IO utilities in `utils3d.io`.
+"""
+import importlib
+__all__ = ['numpy', 'torch', 'io']
+def __getattr__(module_name: str):
+    return importlib.import_module(f'.{module_name}', __package__)
+if __name__ == '__main__':
+    from . import torch
+    from . import numpy
+    from . import io

utils3d/io/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .wavefront_obj import *
+from .colmap import *
+from .ply import *
+from .glb import *

utils3d/io/colmap.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from typing import *
+from pathlib import Path
+import numpy as np
+from scipy.spatial.transform import Rotation
+__all__ = ['read_extrinsics_from_colmap', 'read_intrinsics_from_colmap', 'write_extrinsics_as_colmap', 'write_intrinsics_as_colmap']
+def write_extrinsics_as_colmap(file: Union[str, Path], extrinsics: np.ndarray, image_names: Union[str, List[str]] = 'image_{i:04d}.png', camera_ids: List[int] = None):
+    """
+    Write extrinsics to colmap `images.txt` file.
+    Args:
+        file: Path to `images.txt` file.
+        extrinsics: (N, 4, 4) array of extrinsics.
+        image_names: str or List of str, image names. Length is N.
+            If str, it should be a format string with `i` as the index. (i starts from 1, in correspondence with IMAGE_ID in colmap)
+        camera_ids: List of int, camera ids. Length is N.
+            If None, it will be set to [1, 2, ..., N].
+    """
+    assert extrinsics.shape[1:] == (4, 4) and extrinsics.ndim == 3 or extrinsics.shape == (4, 4)
+    if extrinsics.ndim == 2:
+        extrinsics = extrinsics[np.newaxis, ...]
+    quats = Rotation.from_matrix(extrinsics[:, :3, :3]).as_quat()
+    trans = extrinsics[:, :3, 3]
+    if camera_ids is None:
+        camera_ids = list(range(1, len(extrinsics) + 1))
+    if isinstance(image_names, str):
+        image_names = [image_names.format(i=i) for i in range(1, len(extrinsics) + 1)]
+    assert len(extrinsics) == len(image_names) == len(camera_ids), \
+        f'Number of extrinsics ({len(extrinsics)}), image_names ({len(image_names)}), and camera_ids ({len(camera_ids)}) must be the same'
+    with open(file, 'w') as fp:
+        print("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME", file=fp)
+        for i, (quat, t, name, camera_id) in enumerate(zip(quats.tolist(), trans.tolist(), image_names, camera_ids)):
+            # Colmap has wxyz order while scipy.spatial.transform.Rotation has xyzw order. Haha, wcnm.
+            qx, qy, qz, qw = quat
+            tx, ty, tz = t
+            print(f'{i + 1} {qw:f} {qx:f} {qy:f} {qz:f} {tx:f} {ty:f} {tz:f} {camera_id:d} {name}', file=fp)
+            print()
+def write_intrinsics_as_colmap(file: Union[str, Path], intrinsics: np.ndarray, width: int, height: int, normalized: bool = False):
+    """
+    Write intrinsics to colmap `cameras.txt` file. Currently only support PINHOLE model (no distortion)
+    Args:
+        file: Path to `cameras.txt` file.
+        intrinsics: (N, 3, 3) array of intrinsics.
+        width: Image width.
+        height: Image height.
+        normalized: Whether the intrinsics are normalized. If True, the intrinsics will unnormalized for writing.
+    """
+    assert intrinsics.shape[1:] == (3, 3) and intrinsics.ndim == 3 or intrinsics.shape == (3, 3)
+    if intrinsics.ndim == 2:
+        intrinsics = intrinsics[np.newaxis, ...]
+    if normalized:
+        intrinsics = intrinsics * np.array([width, height, 1])[:, None]
+    with open(file, 'w') as fp:
+        print("# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]", file=fp)
+        for i, intr in enumerate(intrinsics):
+            fx, fy, cx, cy = intr[0, 0], intr[1, 1], intr[0, 2], intr[1, 2]
+            print(f'{i + 1} PINHOLE {width:d} {height:d} {fx:f} {fy:f} {cx:f} {cy:f}', file=fp)
+def read_extrinsics_from_colmap(file: Union[str, Path]) -> Union[np.ndarray, List[int], List[str]]:
+    """
+    Read extrinsics from colmap `images.txt` file.
+    Args:
+        file: Path to `images.txt` file.
+    Returns:
+        extrinsics: (N, 4, 4) array of extrinsics.
+        camera_ids: List of int, camera ids. Length is N. Note that camera ids in colmap typically starts from 1.
+        image_names: List of str, image names. Length is N.
+    """
+    with open(file) as fp:
+        lines = fp.readlines()
+    image_names, quats, trans, camera_ids = [], [], [], []
+    i_line = 0
+    for line in lines:
+        line = line.strip()
+        if line.startswith('#'):
+            continue
+        i_line += 1
+        if i_line % 2 == 0:
+            continue
+        image_id, qw, qx, qy, qz, tx, ty, tz, camera_id, name = line.split()
+        quats.append([float(qx), float(qy), float(qz), float(qw)])
+        trans.append([float(tx), float(ty), float(tz)])
+        camera_ids.append(int(camera_id))
+        image_names.append(name)
+    quats = np.array(quats, dtype=np.float32)
+    trans = np.array(trans, dtype=np.float32)
+    rotation = Rotation.from_quat(quats).as_matrix()
+    extrinsics = np.concatenate([
+        np.concatenate([rotation, trans[..., None]], axis=-1),
+        np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :].repeat(len(quats), axis=0)
+    ], axis=-2)
+    return extrinsics, camera_ids, image_names
+def read_intrinsics_from_colmap(file: Union[str, Path], normalize: bool = False) -> Tuple[List[int], np.ndarray, np.ndarray]:
+    """
+    Read intrinsics from colmap `cameras.txt` file.
+    Args:
+        file: Path to `cameras.txt` file.
+        normalize: Whether to normalize the intrinsics. If True, the intrinsics will be normalized. (mapping coordinates to [0, 1] range)
+    Returns:
+        camera_ids: List of int, camera ids. Length is N. Note that camera ids in colmap typically starts from 1.
+        intrinsics: (N, 3, 3) array of intrinsics.
+        distortions: (N, 5) array of distortions.
+    """
+    with open(file) as fp:
+        lines = fp.readlines()
+    intrinsics, distortions, camera_ids = [], [], []
+    for line in lines:
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        camera_id, model, width, height, *params = line.split()
+        camera_id, width, height = int(camera_id), int(width), int(height)
+        if model == 'PINHOLE':
+            fx, fy, cx, cy = map(float, params[:4])
+            k1 = k2 = k3 = p1 = p2 = 0.0
+        elif model == 'OPENCV':
+            fx, fy, cx, cy, k1, k2, p1, p2, k3 = *map(float, params[:8]), 0.0
+        elif model == 'SIMPLE_RADIAL':
+            f, cx, cy, k = map(float, params[:4])
+            fx = fy = f
+            k1, k2, p1, p2, k3 = k, 0.0, 0.0, 0.0, 0.0
+        camera_ids.append(camera_id)
+        if normalize:
+            fx, fy, cx, cy = fx / width, fy / height, cx / width, cy / height
+        intrinsics.append([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+        distortions.append([k1, k2, p1, p2, k3])
+    intrinsics = np.array(intrinsics, dtype=np.float32)
+    distortions = np.array(distortions, dtype=np.float32)
+    return camera_ids, intrinsics, distortions

utils3d/io/glb.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from typing import *
+from pathlib import Path
+import numpy as np
+def write_glb(path: Union[str, Path], vertices: np.ndarray, faces: np.ndarray, vertex_colors: np.ndarray = None,  uv: np.ndarray = None):
+    import pygltflib
+    has_colors = vertex_colors is not None
+    has_uv = uv is not None
+    triangles_bytes = faces.astype(np.uint32).flatten().tobytes()
+    vertices_bytes = vertices.astype(np.float32).tobytes()
+    vertex_colors_bytes = vertex_colors.astype(np.float32).tobytes() if has_colors else None
+    uv_bytes = uv.astype(np.float32).tobytes() if has_uv else None
+    gltf = pygltflib.GLTF2(
+        scene=0,
+        scenes=[pygltflib.Scene(nodes=[0])],
+        nodes=[pygltflib.Node(mesh=0)],
+        meshes=[
+            pygltflib.Mesh(
+                primitives=[
+                    pygltflib.Primitive(
+                        attributes=pygltflib.Attributes(
+                            POSITION=1,
+                            COLOR_0=2 if has_colors else None,
+                            TEXCOORD_0=2 + has_colors if has_uv else None
+                        ),
+                        indices=0
+                    )
+                ]
+            )
+        ],
+        accessors=list(filter(None, [
+            pygltflib.Accessor(     # triangles accessor
+                bufferView=0,
+                componentType=pygltflib.UNSIGNED_INT,
+                count=faces.size,
+                type=pygltflib.SCALAR,
+                max=[int(faces.max())],
+                min=[int(faces.min())],
+            ),
+            pygltflib.Accessor(     # vertices accessor
+                bufferView=1,
+                componentType=pygltflib.FLOAT,
+                count=len(vertices),
+                type=pygltflib.VEC3,
+                max=vertices.max(axis=0).tolist(),
+                min=vertices.min(axis=0).tolist(),
+            ),
+            pygltflib.Accessor(     # vertex colors accessor
+                bufferView=2,
+                componentType=pygltflib.FLOAT,
+                count=len(vertices),
+                type=pygltflib.VEC3,
+                max=vertex_colors.max(axis=0).tolist(),
+                min=vertex_colors.min(axis=0).tolist(),
+            ) if has_colors else None,
+            pygltflib.Accessor(     # uv accessor
+                bufferView=3,
+                componentType=pygltflib.FLOAT,
+                count=len(uv),
+                type=pygltflib.VEC2,
+                max=uv.max(axis=0).tolist(),
+                min=uv.min(axis=0).tolist(),
+            ) if has_uv else None,
+        ])),
+        bufferViews=list(filter(None, [
+            pygltflib.BufferView(    # triangles buffer view
+                buffer=0,
+                byteLength=len(triangles_bytes),
+                target=pygltflib.ELEMENT_ARRAY_BUFFER,
+            ),
+            pygltflib.BufferView(    # vertices buffer view
+                buffer=0,
+                byteOffset=len(triangles_bytes),
+                byteLength=len(vertices_bytes),
+                target=pygltflib.ARRAY_BUFFER,
+            ),
+            pygltflib.BufferView(    # vertex colors buffer view
+                buffer=0,
+                byteOffset=len(triangles_bytes) + len(vertices_bytes),
+                byteLength=len(vertex_colors_bytes),
+                target=pygltflib.ARRAY_BUFFER,
+            ) if has_colors else None,
+            pygltflib.BufferView(    # uv buffer view
+                buffer=0,
+                byteOffset=len(triangles_bytes) + len(vertices_bytes) + (len(vertex_colors_bytes) if has_colors else 0),
+                byteLength=len(uv_bytes),
+                target=pygltflib.ARRAY_BUFFER,
+            ) if has_uv else None,
+        ])),
+        buffers=[
+            pygltflib.Buffer(
+                byteLength=len(triangles_bytes) + len(vertices_bytes) + (len(vertex_colors_bytes) if has_colors else 0) + (len(uv_bytes) if has_uv else 0),
+            )
+        ]
+    )
+    gltf.set_binary_blob(triangles_bytes + vertices_bytes + (vertex_colors_bytes or b'') + (uv_bytes or b''))
+    with open(path, 'wb') as f:
+        for chunk in gltf.save_to_bytes():
+            f.write(chunk)

utils3d/io/ply.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import numpy as np
+from typing import *
+from pathlib import Path
+def read_ply(
+        file: Union[str, Path],
+        encoding: Union[str, None] = None,
+        ignore_unknown: bool = False
+    ) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Read .ply file, without preprocessing.
+    Args:
+        file (Any): filepath
+        encoding (str, optional):
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: vertices, faces
+    """
+    import plyfile
+    plydata = plyfile.PlyData.read(file)
+    vertices = np.stack([plydata['vertex'][k] for k in ['x', 'y', 'z']], axis=-1)
+    if 'face' in plydata:
+        faces = np.array(plydata['face']['vertex_indices'].tolist())
+    else:
+        faces = None
+    return vertices, faces
+def write_ply(
+    file: Union[str, Path],
+    vertices: np.ndarray,
+    faces: np.ndarray = None,
+    edges: np.ndarray = None,
+    vertex_colors: np.ndarray = None,
+    edge_colors: np.ndarray = None,
+    text: bool = False
+):
+    """
+    Write .ply file, without preprocessing.
+    Args:
+        file (Any): filepath
+        vertices (np.ndarray): [N, 3]
+        faces (np.ndarray): [T, E]
+        edges (np.ndarray): [E, 2]
+        vertex_colors (np.ndarray, optional): [N, 3]. Defaults to None.
+        edge_colors (np.ndarray, optional): [E, 3]. Defaults to None.
+        text (bool, optional): save data in text format. Defaults to False.
+    """
+    import plyfile
+    assert vertices.ndim == 2 and vertices.shape[1] == 3
+    vertices = vertices.astype(np.float32)
+    if faces is not None:
+        assert faces.ndim == 2
+        faces = faces.astype(np.int32)
+    if edges is not None:
+        assert edges.ndim == 2 and edges.shape[1] == 2
+        edges = edges.astype(np.int32)
+    if vertex_colors is not None:
+        assert vertex_colors.ndim == 2 and vertex_colors.shape[1] == 3
+        if vertex_colors.dtype in [np.float32, np.float64]:
+            vertex_colors = vertex_colors * 255
+        vertex_colors = np.clip(vertex_colors, 0, 255).astype(np.uint8)
+        vertices_data = np.zeros(len(vertices), dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')])
+        vertices_data['x'] = vertices[:, 0]
+        vertices_data['y'] = vertices[:, 1]
+        vertices_data['z'] = vertices[:, 2]
+        vertices_data['red'] = vertex_colors[:, 0]
+        vertices_data['green'] = vertex_colors[:, 1]
+        vertices_data['blue'] = vertex_colors[:, 2]
+    else:
+        vertices_data = np.array([tuple(v) for v in vertices], dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')])
+    if faces is not None:
+        faces_data = np.zeros(len(faces), dtype=[('vertex_indices', 'i4', (faces.shape[1],))])
+        faces_data['vertex_indices'] = faces
+    if edges is not None:
+        if edge_colors is not None:
+            assert edge_colors.ndim == 2 and edge_colors.shape[1] == 3
+            if edge_colors.dtype in [np.float32, np.float64]:
+                edge_colors = edge_colors * 255
+            edge_colors = np.clip(edge_colors, 0, 255).astype(np.uint8)
+            edges_data = np.zeros(len(edges), dtype=[('vertex1', 'i4'), ('vertex2', 'i4'), ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')])
+            edges_data['vertex1'] = edges[:, 0]
+            edges_data['vertex2'] = edges[:, 1]
+            edges_data['red'] = edge_colors[:, 0]
+            edges_data['green'] = edge_colors[:, 1]
+            edges_data['blue'] = edge_colors[:, 2]
+        else:
+            edges_data = np.array([tuple(e) for e in edges], dtype=[('vertex1', 'i4'), ('vertex2', 'i4')])
+    ply_data = [plyfile.PlyElement.describe(vertices_data, 'vertex')]
+    if faces is not None:
+        ply_data.append(plyfile.PlyElement.describe(faces_data, 'face'))
+    if edges is not None:
+        ply_data.append(plyfile.PlyElement.describe(edges_data, 'edge'))
+    plyfile.PlyData(ply_data, text=text).write(file)

utils3d/io/wavefront_obj.py ADDED Viewed

	@@ -0,0 +1,146 @@

+from io import TextIOWrapper
+from typing import Dict, Any, Union, Iterable
+import numpy as np
+from pathlib import Path
+__all__ = [
+    'read_obj',
+    'write_obj',
+    'simple_write_obj'
+]
+def read_obj(
+    file : Union[str, Path, TextIOWrapper],
+    encoding: Union[str, None] = None,
+    ignore_unknown: bool = False
+):
+    """
+    Read wavefront .obj file, without preprocessing.
+    Why bothering having this read_obj() while we already have other libraries like `trimesh`?
+    This function read the raw format from .obj file and keeps the order of vertices and faces,
+    while trimesh which involves modification like merge/split vertices, which could break the orders of vertices and faces,
+    Those libraries are commonly aiming at geometry processing and rendering supporting various formats.
+    If you want mesh geometry processing, you may turn to `trimesh` for more features.
+    ### Parameters
+        `file` (str, Path, TextIOWrapper): filepath or file object
+        encoding (str, optional):
+    ### Returns
+        obj (dict): A dict containing .obj components
+        {
+            'mtllib': [],
+            'v': [[0,1, 0.2, 1.0], [1.2, 0.0, 0.0], ...],
+            'vt': [[0.5, 0.5], ...],
+            'vn': [[0., 0.7, 0.7], [0., -0.7, 0.7], ...],
+            'f': [[0, 1, 2], [2, 3, 4],...],
+            'usemtl': [{'name': 'mtl1', 'f': 7}]
+        }
+    """
+    if hasattr(file,'read'):
+        lines = file.read().splitlines()
+    else:
+        with open(file, 'r', encoding=encoding) as fp:
+            lines = fp.read().splitlines()
+    mtllib = []
+    v, vt, vn, vp = [], [], [], []      # Vertex coordinates, Vertex texture coordinate, Vertex normal, Vertex parameter
+    f, ft, fn = [], [], []              # Face indices, Face texture indices, Face normal indices
+    o = []
+    s = []
+    usemtl = []
+    def pad(l: list, n: Any):
+        return l + [n] * (3 - len(l))
+    for i, line in enumerate(lines):
+        sq = line.strip().split()
+        if len(sq) == 0:
+            continue
+        if sq[0] == 'v':
+            assert 4 <= len(sq) <= 5, f'Invalid format of line {i}: {line}'
+            v.append([float(e) for e in sq[1:]][:3])
+        elif sq[0] == 'vt':
+            assert 3 <= len(sq) <= 4, f'Invalid format of line {i}: {line}'
+            vt.append([float(e) for e in sq[1:]][:2])
+        elif sq[0] == 'vn':
+            assert len(sq) == 4, f'Invalid format of line {i}: {line}'
+            vn.append([float(e) for e in sq[1:]])
+        elif sq[0] == 'vp':
+            assert 2 <= len(sq) <= 4, f'Invalid format of line {i}: {line}'
+            vp.append(pad([float(e) for e in sq[1:]], 0))
+        elif sq[0] == 'f':
+            spliting = [pad([int(j) - 1 for j in e.split('/')], -1) for e in sq[1:]]
+            f.append([e[0] for e in spliting])
+            ft.append([e[1] for e in spliting])
+            fn.append([e[2] for e in spliting])
+        elif sq[0] == 'usemtl':
+            assert len(sq) == 2
+            usemtl.append((sq[1], len(f)))
+        elif sq[0] == 'o':
+            assert len(sq) == 2
+            o.append((sq[1], len(f)))
+        elif sq[0] == 's':
+            s.append((sq[1], len(f)))
+        elif sq[0] == 'mtllib':
+            assert len(sq) == 2
+            mtllib.append(sq[1])
+        elif sq[0][0] == '#':
+            continue
+        else:
+            if not ignore_unknown:
+                raise Exception(f'Unknown keyword {sq[0]}')
+    min_poly_vertices = min(len(f) for f in f)
+    max_poly_vertices = max(len(f) for f in f)
+    return {
+        'mtllib': mtllib,
+        'v': np.array(v, dtype=np.float32),
+        'vt': np.array(vt, dtype=np.float32),
+        'vn': np.array(vn, dtype=np.float32),
+        'vp': np.array(vp, dtype=np.float32),
+        'f': np.array(f, dtype=np.int32) if min_poly_vertices == max_poly_vertices else f,
+        'ft': np.array(ft, dtype=np.int32) if min_poly_vertices == max_poly_vertices else ft,
+        'fn': np.array(fn, dtype=np.int32) if min_poly_vertices == max_poly_vertices else fn,
+        'o': o,
+        's': s,
+        'usemtl': usemtl,
+    }
+def write_obj(
+        file: Union[str, Path],
+        obj: Dict[str, Any],
+        encoding: Union[str, None] = None
+    ):
+    with open(file, 'w', encoding=encoding) as fp:
+        for k in ['v', 'vt', 'vn', 'vp']:
+            if k not in obj:
+                continue
+            for v in obj[k]:
+                print(k, *map(float, v), file=fp)
+        for f in obj['f']:
+            print('f', *((str('/').join(map(int, i)) if isinstance(int(i), Iterable) else i) for i in f), file=fp)
+def simple_write_obj(
+        file: Union[str, Path],
+        vertices: np.ndarray,
+        faces: np.ndarray,
+        encoding: Union[str, None] = None
+    ):
+    """
+    Write wavefront .obj file, without preprocessing.
+    Args:
+        vertices (np.ndarray): [N, 3]
+        faces (np.ndarray): [T, 3]
+        file (Any): filepath
+        encoding (str, optional):
+    """
+    with open(file, 'w', encoding=encoding) as fp:
+        for v in vertices:
+            print('v', *map(float, v), file=fp)
+        for f in faces:
+            print('f', *map(int, f + 1), file=fp)

utils3d/numpy/__init__.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""
+3D utility functions workings with NumPy.
+"""
+import importlib
+import itertools
+import numpy
+__modules_all__ = {
+    'mesh':[
+        'triangulate',
+        'compute_face_normal',
+        'compute_face_angle',
+        'compute_vertex_normal',
+        'compute_vertex_normal_weighted',
+        'remove_corrupted_faces',
+        'merge_duplicate_vertices',
+        'remove_unreferenced_vertices',
+        'subdivide_mesh_simple',
+        'mesh_relations',
+        'flatten_mesh_indices'
+    ],
+    'quadmesh': [
+        'calc_quad_candidates',
+        'calc_quad_distortion',
+        'calc_quad_direction',
+        'calc_quad_smoothness',
+        'sovle_quad',
+        'sovle_quad_qp',
+        'tri_to_quad'
+    ],
+    'utils': [
+        'sliding_window_1d',
+        'sliding_window_nd',
+        'sliding_window_2d',
+        'max_pool_1d',
+        'max_pool_2d',
+        'max_pool_nd',
+        'depth_edge',
+        'depth_aliasing',
+        'interpolate',
+        'image_scrcoord',
+        'image_uv',
+        'image_pixel_center',
+        'image_pixel',
+        'image_mesh',
+        'image_mesh_from_depth',
+        'depth_to_normal',
+        'point_to_normal',
+        'chessboard',
+        'cube',
+        'square',
+        'camera_frustum',
+    ],
+    'transforms': [
+        'perspective',
+        'perspective_from_fov',
+        'perspective_from_fov_xy',
+        'intrinsics_from_focal_center',
+        'intrinsics_from_fov',
+        'view_look_at',
+        'extrinsics_look_at',
+        'perspective_to_intrinsics',
+        'perspective_to_near_far',
+        'intrinsics_to_perspective',
+        'extrinsics_to_view',
+        'view_to_extrinsics',
+        'normalize_intrinsics',
+        'crop_intrinsics',
+        'pixel_to_uv',
+        'pixel_to_ndc',
+        'uv_to_pixel',
+        'project_depth',
+        'depth_buffer_to_linear',
+        'unproject_cv',
+        'unproject_gl',
+        'project_cv',
+        'project_gl',
+        'quaternion_to_matrix',
+        'axis_angle_to_matrix',
+        'matrix_to_quaternion',
+        'extrinsics_to_essential',
+        'euler_axis_angle_rotation',
+        'euler_angles_to_matrix',
+        'skew_symmetric',
+        'rotation_matrix_from_vectors',
+        'ray_intersection',
+        'se3_matrix',
+        'slerp_quaternion',
+        'slerp_vector',
+        'lerp',
+        'lerp_se3_matrix',
+        'piecewise_lerp',
+        'piecewise_lerp_se3_matrix',
+        'apply_transform'
+    ],
+    'spline': [
+        'linear_spline_interpolate',
+    ],
+    'rasterization': [
+        'RastContext',
+        'rasterize_triangle_faces',
+        'rasterize_edges',
+        'texture',
+        'warp_image_by_depth',
+    ],
+}
+__all__ = list(itertools.chain(*__modules_all__.values()))
+def __getattr__(name):
+    try:
+        return globals()[name]
+    except KeyError:
+        pass
+    try:
+        module_name = next(m for m in __modules_all__ if name in __modules_all__[m])
+    except StopIteration:
+        raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+    module = importlib.import_module(f'.{module_name}', __name__)
+    for key in __modules_all__[module_name]:
+        globals()[key] = getattr(module, key)
+    return globals()[name]
+if __name__ == '__main__':
+    from .quadmesh import *
+    from .transforms import *
+    from .mesh import *
+    from .utils import *
+    from .rasterization import *
+    from .spline import *

utils3d/numpy/_helpers.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# decorator
+import numpy as np
+from numbers import Number
+import inspect
+def get_args_order(func, args, kwargs):
+    """
+    Get the order of the arguments of a function.
+    """
+    names = inspect.getfullargspec(func).args
+    names_idx = {name: i for i, name in enumerate(names)}
+    args_order = []
+    kwargs_order = {}
+    for name, arg in kwargs.items():
+        if name in names:
+            kwargs_order[name] = names_idx[name]
+            names.remove(name)
+    for i, arg in enumerate(args):
+        if i < len(names):
+            args_order.append(names_idx[names[i]])
+    return args_order, kwargs_order
+def broadcast_args(args, kwargs, args_dim, kwargs_dim):
+    spatial = []
+    for arg, arg_dim in zip(args + list(kwargs.values()), args_dim + list(kwargs_dim.values())):
+        if isinstance(arg, np.ndarray) and arg_dim is not None:
+            arg_spatial = arg.shape[:arg.ndim-arg_dim]
+            if len(arg_spatial) > len(spatial):
+                spatial = [1] * (len(arg_spatial) - len(spatial)) + spatial
+            for j in range(len(arg_spatial)):
+                if spatial[-j] < arg_spatial[-j]:
+                    if spatial[-j] == 1:
+                        spatial[-j] = arg_spatial[-j]
+                    else:
+                        raise ValueError("Cannot broadcast arguments.")
+    for i, arg in enumerate(args):
+        if isinstance(arg, np.ndarray) and args_dim[i] is not None:
+            args[i] = np.broadcast_to(arg, [*spatial, *arg.shape[arg.ndim-args_dim[i]:]])
+    for key, arg in kwargs.items():
+        if isinstance(arg, np.ndarray) and kwargs_dim[key] is not None:
+            kwargs[key] = np.broadcast_to(arg, [*spatial, *arg.shape[arg.ndim-kwargs_dim[key]:]])
+    return args, kwargs, spatial
+def batched(*dims):
+    """
+    Decorator that allows a function to be called with batched arguments.
+    """
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            args = list(args)
+            # get arguments dimensions
+            args_order, kwargs_order = get_args_order(func, args, kwargs)
+            args_dim = [dims[i] for i in args_order]
+            kwargs_dim = {key: dims[i] for key, i in kwargs_order.items()}
+            # convert to numpy array
+            for i, arg in enumerate(args):
+                if isinstance(arg, (Number, list, tuple)) and args_dim[i] is not None:
+                    args[i] = np.array(arg)
+            for key, arg in kwargs.items():
+                if isinstance(arg, (Number, list, tuple)) and kwargs_dim[key] is not None:
+                    kwargs[key] = np.array(arg)
+            # broadcast arguments
+            args, kwargs, spatial = broadcast_args(args, kwargs, args_dim, kwargs_dim)
+            for i, (arg, arg_dim) in enumerate(zip(args, args_dim)):
+                if isinstance(arg, np.ndarray) and arg_dim is not None:
+                    args[i] = arg.reshape([-1, *arg.shape[arg.ndim-arg_dim:]])
+            for key, arg in kwargs.items():
+                if isinstance(arg, np.ndarray) and kwargs_dim[key] is not None:
+                    kwargs[key] = arg.reshape([-1, *arg.shape[arg.ndim-kwargs_dim[key]:]])
+            # call function
+            results = func(*args, **kwargs)
+            type_results = type(results)
+            results = list(results) if isinstance(results, (tuple, list)) else [results]
+            # restore spatial dimensions
+            for i, result in enumerate(results):
+                results[i] = result.reshape([*spatial, *result.shape[1:]])
+            if type_results == tuple:
+                results = tuple(results)
+            elif type_results == list:
+                results = list(results)
+            else:
+                results = results[0]
+            return results
+        return wrapper
+    return decorator

utils3d/numpy/mesh.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import numpy as np
+from typing import *
+from ._helpers import batched
+__all__ = [
+    'triangulate',
+    'compute_face_normal',
+    'compute_face_angle',
+    'compute_vertex_normal',
+    'compute_vertex_normal_weighted',
+    'remove_corrupted_faces',
+    'merge_duplicate_vertices',
+    'remove_unreferenced_vertices',
+    'subdivide_mesh_simple',
+    'mesh_relations',
+    'flatten_mesh_indices'
+]
+def triangulate(
+    faces: np.ndarray,
+    vertices: np.ndarray = None,
+    backslash: np.ndarray = None
+) -> np.ndarray:
+    """
+    Triangulate a polygonal mesh.
+    Args:
+        faces (np.ndarray): [L, P] polygonal faces
+        vertices (np.ndarray, optional): [N, 3] 3-dimensional vertices.
+            If given, the triangulation is performed according to the distance
+            between vertices. Defaults to None.
+        backslash (np.ndarray, optional): [L] boolean array indicating
+            how to triangulate the quad faces. Defaults to None.
+    Returns:
+        (np.ndarray): [L * (P - 2), 3] triangular faces
+    """
+    if faces.shape[-1] == 3:
+        return faces
+    P = faces.shape[-1]
+    if vertices is not None:
+        assert faces.shape[-1] == 4, "now only support quad mesh"
+        if backslash is None:
+            backslash = np.linalg.norm(vertices[faces[:, 0]] - vertices[faces[:, 2]], axis=-1) < \
+                        np.linalg.norm(vertices[faces[:, 1]] - vertices[faces[:, 3]], axis=-1)
+    if backslash is None:
+        loop_indice = np.stack([
+            np.zeros(P - 2, dtype=int),
+            np.arange(1, P - 1, 1, dtype=int),
+            np.arange(2, P, 1, dtype=int)
+        ], axis=1)
+        return faces[:, loop_indice].reshape((-1, 3))
+    else:
+        assert faces.shape[-1] == 4, "now only support quad mesh"
+        faces = np.where(
+            backslash[:, None],
+            faces[:, [0, 1, 2, 0, 2, 3]],
+            faces[:, [0, 1, 3, 3, 1, 2]]
+        ).reshape((-1, 3))
+        return faces
+@batched(2, None)
+def compute_face_normal(
+    vertices: np.ndarray,
+    faces: np.ndarray
+) -> np.ndarray:
+    """
+    Compute face normals of a triangular mesh
+    Args:
+        vertices (np.ndarray): [..., N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        normals (np.ndarray): [..., T, 3] face normals
+    """
+    normal = np.cross(
+        vertices[..., faces[:, 1], :] - vertices[..., faces[:, 0], :],
+        vertices[..., faces[:, 2], :] - vertices[..., faces[:, 0], :]
+    )
+    normal_norm = np.linalg.norm(normal, axis=-1, keepdims=True)
+    normal_norm[normal_norm == 0] = 1
+    normal /= normal_norm
+    return normal
+@batched(2, None)
+def compute_face_angle(
+        vertices: np.ndarray,
+        faces: np.ndarray,
+        eps: float = 1e-12
+    ) -> np.ndarray:
+    """
+    Compute face angles of a triangular mesh
+    Args:
+        vertices (np.ndarray): [..., N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        angles (np.ndarray): [..., T, 3] face angles
+    """
+    face_angle = np.zeros_like(faces, dtype=vertices.dtype)
+    for i in range(3):
+        edge1 = vertices[..., faces[:, (i + 1) % 3], :] - vertices[..., faces[:, i], :]
+        edge2 = vertices[..., faces[:, (i + 2) % 3], :] - vertices[..., faces[:, i], :]
+        face_angle[..., i] = np.arccos(np.sum(
+            edge1 / np.clip(np.linalg.norm(edge1, axis=-1, keepdims=True), eps, None) *
+            edge2 / np.clip(np.linalg.norm(edge2, axis=-1, keepdims=True), eps, None),
+            axis=-1
+        ))
+    return face_angle
+@batched(2, None, 2)
+def compute_vertex_normal(
+    vertices: np.ndarray,
+    faces: np.ndarray,
+    face_normal: np.ndarray = None
+) -> np.ndarray:
+    """
+    Compute vertex normals of a triangular mesh by averaging neightboring face normals
+    TODO: can be improved.
+    Args:
+        vertices (np.ndarray): [..., N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+        face_normal (np.ndarray, optional): [..., T, 3] face normals.
+            None to compute face normals from vertices and faces. Defaults to None.
+    Returns:
+        normals (np.ndarray): [..., N, 3] vertex normals
+    """
+    if face_normal is None:
+        face_normal = compute_face_normal(vertices, faces)
+    vertex_normal = np.zeros_like(vertices, dtype=vertices.dtype)
+    for n in range(vertices.shape[0]):
+        for i in range(3):
+            vertex_normal[n, :, 0] += np.bincount(faces[:, i], weights=face_normal[n, :, 0], minlength=vertices.shape[1])
+            vertex_normal[n, :, 1] += np.bincount(faces[:, i], weights=face_normal[n, :, 1], minlength=vertices.shape[1])
+            vertex_normal[n, :, 2] += np.bincount(faces[:, i], weights=face_normal[n, :, 2], minlength=vertices.shape[1])
+    vertex_normal_norm = np.linalg.norm(vertex_normal, axis=-1, keepdims=True)
+    vertex_normal_norm[vertex_normal_norm == 0] = 1
+    vertex_normal /= vertex_normal_norm
+    return vertex_normal
+@batched(2, None, 2)
+def compute_vertex_normal_weighted(
+    vertices: np.ndarray,
+    faces: np.ndarray,
+    face_normal: np.ndarray = None
+) -> np.ndarray:
+    """
+    Compute vertex normals of a triangular mesh by weighted sum of neightboring face normals
+    according to the angles
+    Args:
+        vertices (np.ndarray): [..., N, 3] 3-dimensional vertices
+        faces (np.ndarray): [..., T, 3] triangular face indices
+        face_normal (np.ndarray, optional): [..., T, 3] face normals.
+            None to compute face normals from vertices and faces. Defaults to None.
+    Returns:
+        normals (np.ndarray): [..., N, 3] vertex normals
+    """
+    if face_normal is None:
+        face_normal = compute_face_normal(vertices, faces)
+    face_angle = compute_face_angle(vertices, faces)
+    vertex_normal = np.zeros_like(vertices)
+    for n in range(vertices.shape[0]):
+        for i in range(3):
+            vertex_normal[n, :, 0] += np.bincount(faces[n, :, i], weights=face_normal[n, :, 0] * face_angle[n, :, i], minlength=vertices.shape[1])
+            vertex_normal[n, :, 1] += np.bincount(faces[n, :, i], weights=face_normal[n, :, 1] * face_angle[n, :, i], minlength=vertices.shape[1])
+            vertex_normal[n, :, 2] += np.bincount(faces[n, :, i], weights=face_normal[n, :, 2] * face_angle[n, :, i], minlength=vertices.shape[1])
+    vertex_normal_norm = np.linalg.norm(vertex_normal, axis=-1, keepdims=True)
+    vertex_normal_norm[vertex_normal_norm == 0] = 1
+    vertex_normal /= vertex_normal_norm
+    return vertex_normal
+def remove_corrupted_faces(
+        faces: np.ndarray
+    ) -> np.ndarray:
+    """
+    Remove corrupted faces (faces with duplicated vertices)
+    Args:
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        np.ndarray: [T_, 3] triangular face indices
+    """
+    corrupted = (faces[:, 0] == faces[:, 1]) | (faces[:, 1] == faces[:, 2]) | (faces[:, 2] == faces[:, 0])
+    return faces[~corrupted]
+def merge_duplicate_vertices(
+        vertices: np.ndarray,
+        faces: np.ndarray,
+        tol: float = 1e-6
+    ) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Merge duplicate vertices of a triangular mesh.
+    Duplicate vertices are merged by selecte one of them, and the face indices are updated accordingly.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+        tol (float, optional): tolerance for merging. Defaults to 1e-6.
+    Returns:
+        vertices (np.ndarray): [N_, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+    """
+    vertices_round = np.round(vertices / tol)
+    _, uni_i, uni_inv = np.unique(vertices_round, return_index=True, return_inverse=True, axis=0)
+    vertices = vertices[uni_i]
+    faces = uni_inv[faces]
+    return vertices, faces
+def remove_unreferenced_vertices(
+    faces: np.ndarray,
+    *vertice_attrs,
+    return_indices: bool = False
+) -> Tuple[np.ndarray, ...]:
+    """
+    Remove unreferenced vertices of a mesh.
+    Unreferenced vertices are removed, and the face indices are updated accordingly.
+    Args:
+        faces (np.ndarray): [T, P] face indices
+        *vertice_attrs: vertex attributes
+    Returns:
+        faces (np.ndarray): [T, P] face indices
+        *vertice_attrs: vertex attributes
+        indices (np.ndarray, optional): [N] indices of vertices that are kept. Defaults to None.
+    """
+    P = faces.shape[-1]
+    fewer_indices, inv_map = np.unique(faces, return_inverse=True)
+    faces = inv_map.astype(np.int32).reshape(-1, P)
+    ret = [faces]
+    for attr in vertice_attrs:
+        ret.append(attr[fewer_indices])
+    if return_indices:
+        ret.append(fewer_indices)
+    return tuple(ret)
+def subdivide_mesh_simple(
+    vertices: np.ndarray,
+    faces: np.ndarray,
+    n: int = 1
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Subdivide a triangular mesh by splitting each triangle into 4 smaller triangles.
+    NOTE: All original vertices are kept, and new vertices are appended to the end of the vertex list.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+        n (int, optional): number of subdivisions. Defaults to 1.
+    Returns:
+        vertices (np.ndarray): [N_, 3] subdivided 3-dimensional vertices
+        faces (np.ndarray): [4 * T, 3] subdivided triangular face indices
+    """
+    for _ in range(n):
+        edges = np.stack([faces[:, [0, 1]], faces[:, [1, 2]], faces[:, [2, 0]]], axis=0)
+        edges = np.sort(edges, axis=2)
+        uni_edges, uni_inv = np.unique(edges.reshape(-1, 2), return_inverse=True, axis=0)
+        uni_inv = uni_inv.reshape(3, -1)
+        midpoints = (vertices[uni_edges[:, 0]] + vertices[uni_edges[:, 1]]) / 2
+        n_vertices = vertices.shape[0]
+        vertices = np.concatenate([vertices, midpoints], axis=0)
+        faces = np.concatenate([
+            np.stack([faces[:, 0], n_vertices + uni_inv[0], n_vertices + uni_inv[2]], axis=1),
+            np.stack([faces[:, 1], n_vertices + uni_inv[1], n_vertices + uni_inv[0]], axis=1),
+            np.stack([faces[:, 2], n_vertices + uni_inv[2], n_vertices + uni_inv[1]], axis=1),
+            np.stack([n_vertices + uni_inv[0], n_vertices + uni_inv[1], n_vertices + uni_inv[2]], axis=1),
+        ], axis=0)
+    return vertices, faces
+def mesh_relations(
+    faces: np.ndarray,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Calculate the relation between vertices and faces.
+    NOTE: The input mesh must be a manifold triangle mesh.
+    Args:
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        edges (np.ndarray): [E, 2] edge indices
+        edge2face (np.ndarray): [E, 2] edge to face relation. The second column is -1 if the edge is boundary.
+        face2edge (np.ndarray): [T, 3] face to edge relation
+        face2face (np.ndarray): [T, 3] face to face relation
+    """
+    T = faces.shape[0]
+    edges = np.stack([faces[:, [0, 1]], faces[:, [1, 2]], faces[:, [2, 0]]], axis=1).reshape(-1, 2)  # [3T, 2]
+    edges = np.sort(edges, axis=1)  # [3T, 2]
+    edges, face2edge, occurence = np.unique(edges, axis=0, return_inverse=True, return_counts=True) # [E, 2], [3T], [E]
+    E = edges.shape[0]
+    assert np.all(occurence <= 2), "The input mesh is not a manifold mesh."
+    # Edge to face relation
+    padding = np.arange(E, dtype=np.int32)[occurence == 1]
+    padded_face2edge = np.concatenate([face2edge, padding], axis=0)  # [2E]
+    edge2face = np.argsort(padded_face2edge, kind='stable').reshape(-1, 2) // 3  # [E, 2]
+    edge2face_valid = edge2face[:, 1] < T   # [E]
+    edge2face[~edge2face_valid, 1] = -1
+    # Face to edge relation
+    face2edge = face2edge.reshape(-1, 3)  # [T, 3]
+    # Face to face relation
+    face2face = edge2face[face2edge]  # [T, 3, 2]
+    face2face = face2face[face2face != np.arange(T)[:, None, None]].reshape(T, 3)  # [T, 3]
+    return edges, edge2face, face2edge, face2face
+@overload
+def flatten_mesh_indices(faces1: np.ndarray, attr1: np.ndarray, *other_faces_attrs_pairs: np.ndarray) -> Tuple[np.ndarray, ...]:
+    """
+    Rearrange the indices of a mesh to a flattened version. Vertices will be no longer shared.
+    ### Parameters:
+    - `faces1`: [T, P] face indices of the first attribute
+    - `attr1`: [N1, ...] attributes of the first mesh
+    - ...
+    ### Returns:
+    - `faces`: [T, P] flattened face indices, contigous from 0 to T * P - 1
+    - `attr1`: [T * P, ...] attributes of the first mesh, where every P values correspond to a face
+    _ ...
+    """
+def flatten_mesh_indices(*args: np.ndarray) -> Tuple[np.ndarray, ...]:
+    assert len(args) % 2 == 0, "The number of arguments must be even."
+    T, P = args[0].shape
+    assert all(arg.shape[0] == T and arg.shape[1] == P for arg in args[::2]), "The faces must have the same shape."
+    attr_flat = []
+    for faces_, attr_ in zip(args[::2], args[1::2]):
+        attr_flat_ = attr_[faces_].reshape(-1, *attr_.shape[1:])
+        attr_flat.append(attr_flat_)
+    faces_flat = np.arange(T * P, dtype=np.int32).reshape(T, P)
+    return faces_flat, *attr_flat

utils3d/numpy/quadmesh.py ADDED Viewed

	@@ -0,0 +1,472 @@

+import numpy as np
+import scipy as sp
+import scipy.optimize as spopt
+import piqp
+from typing import *
+__all__ = [
+    'calc_quad_candidates',
+    'calc_quad_distortion',
+    'calc_quad_direction',
+    'calc_quad_smoothness',
+    'sovle_quad',
+    'sovle_quad_qp',
+    'tri_to_quad'
+]
+def calc_quad_candidates(
+    edges: np.ndarray,
+    face2edge: np.ndarray,
+    edge2face: np.ndarray,
+):
+    """
+    Calculate the candidate quad faces.
+    Args:
+        edges (np.ndarray): [E, 2] edge indices
+        face2edge (np.ndarray): [T, 3] face to edge relation
+        edge2face (np.ndarray): [E, 2] edge to face relation
+    Returns:
+        quads (np.ndarray): [Q, 4] quad candidate indices
+        quad2edge (np.ndarray): [Q, 4] edge to quad candidate relation
+        quad2adj (np.ndarray): [Q, 8] adjacent quad candidates of each quad candidate
+        quads_valid (np.ndarray): [E] whether the quad corresponding to the edge is valid
+    """
+    E = edges.shape[0]
+    T = face2edge.shape[0]
+    quads_valid = edge2face[:, 1] != -1
+    Q = quads_valid.sum()
+    quad2face = edge2face[quads_valid]  # [Q, 2]
+    quad2edge = face2edge[quad2face]  # [Q, 2, 3]
+    flag = quad2edge == np.arange(E)[quads_valid][:, None, None] # [Q, 2, 3]
+    flag = flag.argmax(axis=-1)  # [Q, 2]
+    quad2edge = np.stack([
+        quad2edge[np.arange(Q)[:, None], np.arange(2)[None, :], (flag + 1) % 3],
+        quad2edge[np.arange(Q)[:, None], np.arange(2)[None, :], (flag + 2) % 3],
+    ], axis=-1).reshape(Q, 4)  # [Q, 4]
+    quads = np.concatenate([
+        np.where(
+            (edges[quad2edge[:, 0:1], 1:] == edges[quad2edge[:, 1:2], :]).any(axis=-1),
+            edges[quad2edge[:, 0:1], [[0, 1]]],
+            edges[quad2edge[:, 0:1], [[1, 0]]],
+        ),
+        np.where(
+            (edges[quad2edge[:, 2:3], 1:] == edges[quad2edge[:, 3:4], :]).any(axis=-1),
+            edges[quad2edge[:, 2:3], [[0, 1]]],
+            edges[quad2edge[:, 2:3], [[1, 0]]],
+        ),
+    ], axis=1)  # [Q, 4]
+    quad2adj = edge2face[quad2edge]  # [Q, 4, 2]
+    quad2adj = quad2adj[quad2adj != quad2face[:, [0,0,1,1], None]].reshape(Q, 4)  # [Q, 4]
+    quad2adj_valid = quad2adj != -1
+    quad2adj = face2edge[quad2adj]  # [Q, 4, 3]
+    quad2adj[~quad2adj_valid, 0] = quad2edge[~quad2adj_valid]
+    quad2adj[~quad2adj_valid, 1:] = -1
+    quad2adj = quad2adj[quad2adj != quad2edge[..., None]].reshape(Q, 8)  # [Q, 8]
+    edge_valid = -np.ones(E, dtype=np.int32)
+    edge_valid[quads_valid] = np.arange(Q)
+    quad2adj_valid = quad2adj != -1
+    quad2adj[quad2adj_valid] = edge_valid[quad2adj[quad2adj_valid]]  # [Q, 8]
+    return quads, quad2edge, quad2adj, quads_valid
+def calc_quad_distortion(
+    vertices: np.ndarray,
+    quads: np.ndarray,
+):
+    """
+    Calculate the distortion of each candidate quad face.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        quads (np.ndarray): [Q, 4] quad face indices
+    Returns:
+        distortion (np.ndarray): [Q] distortion of each quad face
+    """
+    edge0 = vertices[quads[:, 1]] - vertices[quads[:, 0]]  # [Q, 3]
+    edge1 = vertices[quads[:, 2]] - vertices[quads[:, 1]]  # [Q, 3]
+    edge2 = vertices[quads[:, 3]] - vertices[quads[:, 2]]  # [Q, 3]
+    edge3 = vertices[quads[:, 0]] - vertices[quads[:, 3]]  # [Q, 3]
+    cross = vertices[quads[:, 0]] - vertices[quads[:, 2]]  # [Q, 3]
+    len0 = np.maximum(np.linalg.norm(edge0, axis=-1), 1e-10)  # [Q]
+    len1 = np.maximum(np.linalg.norm(edge1, axis=-1), 1e-10)  # [Q]
+    len2 = np.maximum(np.linalg.norm(edge2, axis=-1), 1e-10)  # [Q]
+    len3 = np.maximum(np.linalg.norm(edge3, axis=-1), 1e-10)  # [Q]
+    len_cross = np.maximum(np.linalg.norm(cross, axis=-1), 1e-10)  # [Q]
+    angle0 = np.arccos(np.clip(np.sum(-edge0 * edge1, axis=-1) / (len0 * len1), -1, 1))  # [Q]
+    angle1 = np.arccos(np.clip(np.sum(-edge1 * cross, axis=-1) / (len1 * len_cross), -1, 1)) \
+           + np.arccos(np.clip(np.sum(cross * edge2, axis=-1) / (len_cross * len2), -1, 1))  # [Q]
+    angle2 = np.arccos(np.clip(np.sum(-edge2 * edge3, axis=-1) / (len2 * len3), -1, 1))  # [Q]
+    angle3 = np.arccos(np.clip(np.sum(-edge3 * -cross, axis=-1) / (len3 * len_cross), -1, 1)) \
+           + np.arccos(np.clip(np.sum(-cross * edge0, axis=-1) / (len_cross * len0), -1, 1))  # [Q]
+    normal0 = np.cross(edge0, edge1)  # [Q, 3]
+    normal1 = np.cross(edge2, edge3)  # [Q, 3]
+    normal0 = normal0 / np.maximum(np.linalg.norm(normal0, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    normal1 = normal1 / np.maximum(np.linalg.norm(normal1, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    angle_normal = np.arccos(np.clip(np.sum(normal0 * normal1, axis=-1), -1, 1))  # [Q]
+    D90 = np.pi / 2
+    D180 = np.pi
+    D360 = np.pi * 2
+    ang_eng = (np.abs(angle0 - D90)**2 + np.abs(angle1 - D90)**2 + np.abs(angle2 - D90)**2 + np.abs(angle3 - D90)**2) / 4  # [Q]
+    dist_eng = np.abs(angle0 - angle2)**2 / np.minimum(np.maximum(np.minimum(angle0, angle2), 1e-10), np.maximum(D180 - np.maximum(angle0, angle2), 1e-10)) \
+             + np.abs(angle1 - angle3)**2 / np.minimum(np.maximum(np.minimum(angle1, angle3), 1e-10), np.maximum(D180 - np.maximum(angle1, angle3), 1e-10))  # [Q]
+    plane_eng = np.where(angle_normal < D90/2, np.abs(angle_normal)**2, 1e10)  # [Q]
+    eng = ang_eng + 2 * dist_eng + 2 * plane_eng  # [Q]
+    return eng
+def calc_quad_direction(
+        vertices: np.ndarray,
+        quads: np.ndarray,
+    ):
+    """
+    Calculate the direction of each candidate quad face.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        quads (np.ndarray): [Q, 4] quad face indices
+    Returns:
+        direction (np.ndarray): [Q, 4] direction of each quad face.
+            Represented by the angle between the crossing and each edge.
+    """
+    mid0 = (vertices[quads[:, 0]] + vertices[quads[:, 1]]) / 2  # [Q, 3]
+    mid1 = (vertices[quads[:, 1]] + vertices[quads[:, 2]]) / 2  # [Q, 3]
+    mid2 = (vertices[quads[:, 2]] + vertices[quads[:, 3]]) / 2  # [Q, 3]
+    mid3 = (vertices[quads[:, 3]] + vertices[quads[:, 0]]) / 2  # [Q, 3]
+    cross0 = mid2 - mid0  # [Q, 3]
+    cross1 = mid3 - mid1  # [Q, 3]
+    cross0 = cross0 / np.maximum(np.linalg.norm(cross0, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    cross1 = cross1 / np.maximum(np.linalg.norm(cross1, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    edge0 = vertices[quads[:, 1]] - vertices[quads[:, 0]]  # [Q, 3]
+    edge1 = vertices[quads[:, 2]] - vertices[quads[:, 1]]  # [Q, 3]
+    edge2 = vertices[quads[:, 3]] - vertices[quads[:, 2]]  # [Q, 3]
+    edge3 = vertices[quads[:, 0]] - vertices[quads[:, 3]]  # [Q, 3]
+    edge0 = edge0 / np.maximum(np.linalg.norm(edge0, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    edge1 = edge1 / np.maximum(np.linalg.norm(edge1, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    edge2 = edge2 / np.maximum(np.linalg.norm(edge2, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    edge3 = edge3 / np.maximum(np.linalg.norm(edge3, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    direction = np.stack([
+        np.arccos(np.clip(np.sum(cross0 * edge0, axis=-1), -1, 1)),
+        np.arccos(np.clip(np.sum(cross1 * edge1, axis=-1), -1, 1)),
+        np.arccos(np.clip(np.sum(-cross0 * edge2, axis=-1), -1, 1)),
+        np.arccos(np.clip(np.sum(-cross1 * edge3, axis=-1), -1, 1)),
+    ], axis=-1)  # [Q, 4]
+    return direction
+def calc_quad_smoothness(
+        quad2edge: np.ndarray,
+        quad2adj: np.ndarray,
+        quads_direction: np.ndarray,
+    ):
+    """
+    Calculate the smoothness of each candidate quad face connection.
+    Args:
+        quad2adj (np.ndarray): [Q, 8] adjacent quad faces of each quad face
+        quads_direction (np.ndarray): [Q, 4] direction of each quad face
+    Returns:
+        smoothness (np.ndarray): [Q, 8] smoothness of each quad face connection
+    """
+    Q = quad2adj.shape[0]
+    quad2adj_valid = quad2adj != -1
+    connections = np.stack([
+        np.arange(Q)[:, None].repeat(8, axis=1),
+        quad2adj,
+    ], axis=-1)[quad2adj_valid]  # [C, 2]
+    shared_edge_idx_0 = np.array([[0, 0, 1, 1, 2, 2, 3, 3]]).repeat(Q, axis=0)[quad2adj_valid]  # [C]
+    shared_edge_idx_1 = np.argmax(quad2edge[quad2adj][quad2adj_valid] == quad2edge[connections[:, 0], shared_edge_idx_0][:, None], axis=-1)  # [C]
+    valid_smoothness = np.abs(quads_direction[connections[:, 0], shared_edge_idx_0] - quads_direction[connections[:, 1], shared_edge_idx_1])**2  # [C]
+    smoothness = np.zeros([Q, 8], dtype=np.float32)
+    smoothness[quad2adj_valid] = valid_smoothness
+    return smoothness
+def sovle_quad(
+        face2edge: np.ndarray,
+        edge2face: np.ndarray,
+        quad2adj: np.ndarray,
+        quads_distortion: np.ndarray,
+        quads_smoothness: np.ndarray,
+        quads_valid: np.ndarray,
+    ):
+    """
+    Solve the quad mesh from the candidate quad faces.
+    Args:
+        face2edge (np.ndarray): [T, 3] face to edge relation
+        edge2face (np.ndarray): [E, 2] edge to face relation
+        quad2adj (np.ndarray): [Q, 8] adjacent quad faces of each quad face
+        quads_distortion (np.ndarray): [Q] distortion of each quad face
+        quads_smoothness (np.ndarray): [Q, 8] smoothness of each quad face connection
+        quads_valid (np.ndarray): [E] whether the quad corresponding to the edge is valid
+    Returns:
+        weights (np.ndarray): [Q] weight of each valid quad face
+    """
+    T = face2edge.shape[0]
+    E = edge2face.shape[0]
+    Q = quads_distortion.shape[0]
+    edge_valid = -np.ones(E, dtype=np.int32)
+    edge_valid[quads_valid] = np.arange(Q)
+    quads_connection = np.stack([
+        np.arange(Q)[:, None].repeat(8, axis=1),
+        quad2adj,
+    ], axis=-1)[quad2adj != -1]  # [C, 2]
+    quads_connection = np.sort(quads_connection, axis=-1)  # [C, 2]
+    quads_connection, quads_connection_idx = np.unique(quads_connection, axis=0, return_index=True)  # [C, 2], [C]
+    quads_smoothness = quads_smoothness[quad2adj != -1]  # [C]
+    quads_smoothness = quads_smoothness[quads_connection_idx]  # [C]
+    C = quads_connection.shape[0]
+    # Construct the linear programming problem
+    # Variables:
+    #   quads_weight: [Q] weight of each quad face
+    #   tri_min_weight: [T] minimum weight of each triangle face
+    #   conn_min_weight: [C] minimum weight of each quad face connection
+    #   conn_max_weight: [C] maximum weight of each quad face connection
+    # Objective:
+    #   mimi
+    c = np.concatenate([
+        quads_distortion - 3,
+        quads_smoothness*4 - 2,
+        quads_smoothness*4,
+    ], axis=0)  # [Q+C]
+    A_ub_triplet = np.concatenate([
+        np.stack([np.arange(T), edge_valid[face2edge[:, 0]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T), edge_valid[face2edge[:, 1]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T), edge_valid[face2edge[:, 2]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T, T+C), np.arange(Q, Q+C), np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T, T+C), quads_connection[:, 0], -np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T, T+C), quads_connection[:, 1], -np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T+C, T+2*C), np.arange(Q+C, Q+2*C), -np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T+C, T+2*C), quads_connection[:, 0], np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T+C, T+2*C), quads_connection[:, 1], np.ones(C)], axis=1),  # [C, 3]
+    ], axis=0)  # [3T+6C, 3]
+    A_ub_triplet = A_ub_triplet[A_ub_triplet[:, 1] != -1]  # [3T', 3]
+    A_ub = sp.sparse.coo_matrix((A_ub_triplet[:, 2], (A_ub_triplet[:, 0], A_ub_triplet[:, 1])), shape=[T+2*C, Q+2*C])  # [T,
+    b_ub = np.concatenate([np.ones(T), -np.ones(C), np.ones(C)], axis=0)  # [T+2C]
+    bound = np.stack([
+        np.concatenate([np.zeros(Q), -np.ones(C), np.zeros(C)], axis=0),
+        np.concatenate([np.ones(Q), np.ones(C), np.ones(C)], axis=0),
+    ], axis=1)  # [Q+2C, 2]
+    A_eq = None
+    b_eq = None
+    print('Solver statistics:')
+    print(f'    #T = {T}')
+    print(f'    #Q = {Q}')
+    print(f'    #C = {C}')
+    # Solve the linear programming problem
+    last_num_valid = 0
+    for i in range(100):
+        res_ = spopt.linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bound)
+        if not res_.success:
+            print(f'    Iter {i} | Failed with {res_.message}')
+            break
+        res = res_
+        weights = res.x[:Q]
+        valid = (weights > 0.5)
+        num_valid = valid.sum()
+        print(f'    Iter {i} | #Q_valid = {num_valid}')
+        if num_valid == last_num_valid:
+            break
+        last_num_valid = num_valid
+        A_eq_triplet = np.stack([
+            np.arange(num_valid),
+            np.arange(Q)[valid],
+            np.ones(num_valid),
+        ], axis=1)  # [num_valid, 3]
+        A_eq = sp.sparse.coo_matrix((A_eq_triplet[:, 2], (A_eq_triplet[:, 0], A_eq_triplet[:, 1])), shape=[num_valid, Q+2*C])  # [num_valid, Q+C]
+        b_eq = np.where(weights[valid] > 0.5, 1, 0)  # [num_valid]
+    # Return the result
+    quads_weight = res.x[:Q]
+    conn_min_weight = res.x[Q:Q+C]
+    conn_max_weight = res.x[Q+C:Q+2*C]
+    return quads_weight, conn_min_weight, conn_max_weight
+def sovle_quad_qp(
+        face2edge: np.ndarray,
+        edge2face: np.ndarray,
+        quad2adj: np.ndarray,
+        quads_distortion: np.ndarray,
+        quads_smoothness: np.ndarray,
+        quads_valid: np.ndarray,
+    ):
+    """
+    Solve the quad mesh from the candidate quad faces.
+    Args:
+        face2edge (np.ndarray): [T, 3] face to edge relation
+        edge2face (np.ndarray): [E, 2] edge to face relation
+        quad2adj (np.ndarray): [Q, 8] adjacent quad faces of each quad face
+        quads_distortion (np.ndarray): [Q] distortion of each quad face
+        quads_smoothness (np.ndarray): [Q, 8] smoothness of each quad face connection
+        quads_valid (np.ndarray): [E] whether the quad corresponding to the edge is valid
+    Returns:
+        weights (np.ndarray): [Q] weight of each valid quad face
+    """
+    T = face2edge.shape[0]
+    E = edge2face.shape[0]
+    Q = quads_distortion.shape[0]
+    edge_valid = -np.ones(E, dtype=np.int32)
+    edge_valid[quads_valid] = np.arange(Q)
+    # Construct the quadratic programming problem
+    C_smoothness_triplet = np.stack([
+        np.arange(Q)[:, None].repeat(8, axis=1)[quad2adj != -1],
+        quad2adj[quad2adj != -1],
+        5 * quads_smoothness[quad2adj != -1],
+    ], axis=-1)  # [C, 3]
+    # C_smoothness_triplet = np.concatenate([
+    #     C_smoothness_triplet,
+    #     np.stack([np.arange(Q), np.arange(Q), 20*np.ones(Q)], axis=1),
+    # ], axis=0)  # [C+Q, 3]
+    C_smoothness = sp.sparse.coo_matrix((C_smoothness_triplet[:, 2], (C_smoothness_triplet[:, 0], C_smoothness_triplet[:, 1])), shape=[Q, Q])  # [Q, Q]
+    C_smoothness = C_smoothness.tocsc()
+    C_dist = quads_distortion - 20  # [Q]
+    A_eq = sp.sparse.coo_matrix((np.zeros(Q), (np.zeros(Q), np.arange(Q))), shape=[1, Q])  # [1, Q]\
+    A_eq = A_eq.tocsc()
+    b_eq = np.array([0])
+    A_ub_triplet = np.concatenate([
+        np.stack([np.arange(T), edge_valid[face2edge[:, 0]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T), edge_valid[face2edge[:, 1]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T), edge_valid[face2edge[:, 2]], np.ones(T)], axis=1),  # [T, 3]
+    ], axis=0)  # [3T, 3]
+    A_ub_triplet = A_ub_triplet[A_ub_triplet[:, 1] != -1]  # [3T', 3]
+    A_ub = sp.sparse.coo_matrix((A_ub_triplet[:, 2], (A_ub_triplet[:, 0], A_ub_triplet[:, 1])), shape=[T, Q])  # [T, Q]
+    A_ub = A_ub.tocsc()
+    b_ub = np.ones(T)
+    lb = np.zeros(Q)
+    ub = np.ones(Q)
+    solver = piqp.SparseSolver()
+    solver.settings.verbose = True
+    solver.settings.compute_timings = True
+    solver.setup(C_smoothness, C_dist, A_eq, b_eq, A_ub, b_ub, lb, ub)
+    status = solver.solve()
+    # x = cp.Variable(Q)
+    # prob = cp.Problem(
+    #     cp.Minimize(cp.quad_form(x, C_smoothness) + C_dist.T @ x),
+    #     [
+    #         A_ub @ x <= b_ub,
+    #         x >= 0, x <= 1,
+    #     ]
+    # )
+    # # Solve the quadratic programming problem
+    # prob.solve(solver=cp.PIQP, verbose=True)
+    # Return the result
+    weights = solver.result.x
+    return weights
+def tri_to_quad(
+        vertices: np.ndarray,
+        faces: np.ndarray,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Convert a triangle mesh to a quad mesh.
+    NOTE: The input mesh must be a manifold mesh.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        vertices (np.ndarray): [N_, 3] 3-dimensional vertices
+        faces (np.ndarray): [Q, 4] quad face indices
+    """
+    raise NotImplementedError
+if __name__ == '__main__':
+    import os
+    import sys
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')))
+    import utils3d
+    import numpy as np
+    import cv2
+    from vis import vis_edge_color
+    file = 'miku'
+    vertices, faces = utils3d.io.read_ply(f'test/assets/{file}.ply')
+    edges, edge2face, face2edge, face2face = calc_relations(faces)
+    quad_cands, quad2edge, quad2adj, quad_valid = calc_quad_candidates(edges, face2edge, edge2face)
+    distortion = calc_quad_distortion(vertices, quad_cands)
+    direction = calc_quad_direction(vertices, quad_cands)
+    smoothness = calc_quad_smoothness(quad2edge, quad2adj, direction)
+    boundary_edges = edges[edge2face[:, 1] == -1]
+    quads_weight, conn_min_weight, conn_max_weight = sovle_quad(face2edge, edge2face, quad2adj, distortion, smoothness, quad_valid)
+    quads = quad_cands[quads_weight > 0.5]
+    print('Mesh statistics')
+    print(f'    #V      =   {vertices.shape[0]}')
+    print(f'    #F      =   {faces.shape[0]}')
+    print(f'    #E      =   {edges.shape[0]}')
+    print(f'    #B      =   {boundary_edges.shape[0]}')
+    print(f'    #Q_cand =   {quad_cands.shape[0]}')
+    print(f'    #Q      =   {quads.shape[0]}')
+    utils3d.io.write_ply(f'test/assets/{file}_boundary_edges.ply', vertices=vertices, edges=boundary_edges)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_candidates.ply', vertices=vertices, faces=quads)
+    edge_colors = np.zeros([edges.shape[0], 3], dtype=np.uint8)
+    distortion = (distortion - distortion.min()) / (distortion.max() - distortion.min())
+    distortion = (distortion * 255).astype(np.uint8)
+    edge_colors[quad_valid] = cv2.cvtColor(cv2.applyColorMap(distortion, cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_candidates_distortion.ply', **vis_edge_color(vertices, edges, edge_colors))
+    edge_colors = np.zeros([edges.shape[0], 3], dtype=np.uint8)
+    edge_colors[quad_valid] = cv2.cvtColor(cv2.applyColorMap((quads_weight * 255).astype(np.uint8), cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_candidates_weights.ply', **vis_edge_color(vertices, edges, edge_colors))
+    utils3d.io.write_ply(f'test/assets/{file}_quad.ply', vertices=vertices, faces=quads)
+    quad_centers = vertices[quad_cands].mean(axis=1)
+    conns = np.stack([
+        np.arange(quad_cands.shape[0])[:, None].repeat(8, axis=1),
+        quad2adj,
+    ], axis=-1)[quad2adj != -1]  # [C, 2]
+    conns, conns_idx = np.unique(np.sort(conns, axis=-1), axis=0, return_index=True)  # [C, 2], [C]
+    smoothness = smoothness[quad2adj != -1][conns_idx]  # [C]
+    conns_color = cv2.cvtColor(cv2.applyColorMap((smoothness * 255).astype(np.uint8), cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_conn_smoothness.ply', **vis_edge_color(quad_centers, conns, conns_color))
+    conns_color = cv2.cvtColor(cv2.applyColorMap((conn_min_weight * 255).astype(np.uint8), cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_conn_min.ply', **vis_edge_color(quad_centers, conns, conns_color))
+    conns_color = cv2.cvtColor(cv2.applyColorMap((conn_max_weight * 255).astype(np.uint8), cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_conn_max.ply', **vis_edge_color(quad_centers, conns, conns_color))

utils3d/numpy/rasterization.py ADDED Viewed

	@@ -0,0 +1,471 @@

+import os
+from typing import *
+import numpy as np
+import moderngl
+from . import transforms, utils, mesh
+__all__ = [
+    'RastContext',
+    'rasterize_triangle_faces',
+    'rasterize_edges',
+    'texture',
+    'warp_image_by_depth',
+]
+def map_np_dtype(dtype) -> str:
+    if dtype == int:
+        return 'i4'
+    elif dtype == np.uint8:
+        return 'u1'
+    elif dtype == np.uint32:
+        return 'u2'
+    elif dtype == np.float16:
+        return 'f2'
+    elif dtype == np.float32:
+        return 'f4'
+def one_value(dtype):
+    if dtype == 'u1':
+        return 255
+    elif dtype == 'u2':
+        return 65535
+    else:
+        return 1
+class RastContext:
+    def __init__(self, standalone: bool = True, backend: str = None, **kwargs):
+        """
+        Create a moderngl context.
+        Args:
+            standalone (bool, optional): whether to create a standalone context. Defaults to True.
+            backend (str, optional): backend to use. Defaults to None.
+        Keyword Args:
+            See moderngl.create_context
+        """
+        if backend is None:
+            self.mgl_ctx = moderngl.create_context(standalone=standalone, **kwargs)
+        else:
+            self.mgl_ctx = moderngl.create_context(standalone=standalone, backend=backend, **kwargs)
+        self.__prog_src = {}
+        self.__prog = {}
+    def __del__(self):
+        self.mgl_ctx.release()
+    def screen_quad(self) -> moderngl.VertexArray:
+        self.screen_quad_vbo = self.mgl_ctx.buffer(np.array([[-1, -1], [1, -1], [1, 1], [-1, 1]], dtype='f4'))
+        self.screen_quad_ibo = self.mgl_ctx.buffer(np.array([0, 1, 2, 0, 2, 3], dtype=np.int32))
+    def program_vertex_attribute(self, n: int) -> moderngl.Program:
+        assert n in [1, 2, 3, 4], 'vertex attribute only supports channels 1, 2, 3, 4'
+        if 'vertex_attribute_vsh' not in self.__prog_src:
+            with open(os.path.join(os.path.dirname(__file__), 'shaders', 'vertex_attribute.vsh'), 'r') as f:
+                self.__prog_src['vertex_attribute_vsh'] = f.read()
+        if 'vertex_attribute_fsh' not in self.__prog_src:
+            with open(os.path.join(os.path.dirname(__file__), 'shaders', 'vertex_attribute.fsh'), 'r') as f:
+                self.__prog_src['vertex_attribute_fsh'] = f.read()
+        if f'vertex_attribute_{n}' not in self.__prog:
+            vsh = self.__prog_src['vertex_attribute_vsh'].replace('vecN', f'vec{n}')
+            fsh = self.__prog_src['vertex_attribute_fsh'].replace('vecN', f'vec{n}')
+            self.__prog[f'vertex_attribute_{n}'] = self.mgl_ctx.program(vertex_shader=vsh, fragment_shader=fsh)
+        return self.__prog[f'vertex_attribute_{n}']
+    def program_texture(self, n: int) -> moderngl.Program:
+        assert n in [1, 2, 3, 4], 'texture only supports channels 1, 2, 3, 4'
+        if 'texture_vsh' not in self.__prog_src:
+            with open(os.path.join(os.path.dirname(__file__), 'shaders', 'texture.vsh'), 'r') as f:
+                self.__prog_src['texture_vsh'] = f.read()
+        if 'texture_fsh' not in self.__prog_src:
+            with open(os.path.join(os.path.dirname(__file__), 'shaders', 'texture.fsh'), 'r') as f:
+                self.__prog_src['texture_fsh'] = f.read()
+        if f'texture_{n}' not in self.__prog:
+            vsh = self.__prog_src['texture_vsh'].replace('vecN', f'vec{n}')
+            fsh = self.__prog_src['texture_fsh'].replace('vecN', f'vec{n}')
+            self.__prog[f'texture_{n}'] = self.mgl_ctx.program(vertex_shader=vsh, fragment_shader=fsh)
+            self.__prog[f'texture_{n}']['tex'] = 0
+            self.__prog[f'texture_{n}']['uv'] = 1
+        return self.__prog[f'texture_{n}']
+def rasterize_triangle_faces(
+    ctx: RastContext,
+    vertices: np.ndarray,
+    faces: np.ndarray,
+    attr: np.ndarray,
+    width: int,
+    height: int,
+    transform: np.ndarray = None,
+    cull_backface: bool = True,
+    return_depth: bool = False,
+    image: np.ndarray = None,
+    depth: np.ndarray = None
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Rasterize vertex attribute.
+    Args:
+        vertices (np.ndarray): [N, 3]
+        faces (np.ndarray): [T, 3]
+        attr (np.ndarray): [N, C]
+        width (int): width of rendered image
+        height (int): height of rendered image
+        transform (np.ndarray): [4, 4] model-view-projection transformation matrix.
+        cull_backface (bool): whether to cull backface
+        image: (np.ndarray): [H, W, C] background image
+        depth: (np.ndarray): [H, W] background depth
+    Returns:
+        image (np.ndarray): [H, W, C] rendered image
+        depth (np.ndarray): [H, W] screen space depth, ranging from 0 to 1. If return_depth is False, it is None.
+    """
+    assert vertices.ndim == 2 and vertices.shape[1] == 3
+    assert faces.ndim == 2 and faces.shape[1] == 3, f"Faces should be a 2D array with shape (T, 3), but got {faces.shape}"
+    assert attr.ndim == 2 and attr.shape[1] in [1, 2, 3, 4], f'Vertex attribute only supports channels 1, 2, 3, 4, but got {attr.shape}'
+    assert vertices.shape[0] == attr.shape[0]
+    assert vertices.dtype == np.float32
+    assert faces.dtype == np.uint32 or faces.dtype == np.int32
+    assert attr.dtype == np.float32, "Attribute should be float32"
+    C = attr.shape[1]
+    prog = ctx.program_vertex_attribute(C)
+    transform = np.eye(4, np.float32) if transform is None else transform
+    # Create buffers
+    ibo = ctx.mgl_ctx.buffer(np.ascontiguousarray(faces, dtype='i4'))
+    vbo_vertices = ctx.mgl_ctx.buffer(np.ascontiguousarray(vertices, dtype='f4'))
+    vbo_attr = ctx.mgl_ctx.buffer(np.ascontiguousarray(attr, dtype='f4'))
+    vao = ctx.mgl_ctx.vertex_array(
+        prog,
+        [
+            (vbo_vertices, '3f', 'i_position'),
+            (vbo_attr, f'{C}f', 'i_attr'),
+        ],
+        ibo,
+        mode=moderngl.TRIANGLES,
+    )
+    # Create framebuffer
+    image_tex = ctx.mgl_ctx.texture((width, height), C, dtype='f4', data=np.ascontiguousarray(image[::-1, :, :]) if image is not None else None)
+    depth_tex = ctx.mgl_ctx.depth_texture((width, height), data=np.ascontiguousarray(depth[::-1, :]) if depth is not None else None)
+    fbo = ctx.mgl_ctx.framebuffer(
+        color_attachments=[image_tex],
+        depth_attachment=depth_tex,
+    )
+    # Render
+    prog['u_mvp'].write(transform.transpose().copy().astype('f4'))
+    fbo.use()
+    fbo.viewport = (0, 0, width, height)
+    ctx.mgl_ctx.depth_func = '<'
+    ctx.mgl_ctx.enable(ctx.mgl_ctx.DEPTH_TEST)
+    if cull_backface:
+        ctx.mgl_ctx.enable(ctx.mgl_ctx.CULL_FACE)
+    else:
+        ctx.mgl_ctx.disable(ctx.mgl_ctx.CULL_FACE)
+    vao.render()
+    ctx.mgl_ctx.disable(ctx.mgl_ctx.DEPTH_TEST)
+    # Read
+    image = np.zeros((height, width, C), dtype='f4')
+    image_tex.read_into(image)
+    image = image[::-1, :, :]
+    if return_depth:
+        depth = np.zeros((height, width), dtype='f4')
+        depth_tex.read_into(depth)
+        depth = depth[::-1, :]
+    else:
+        depth = None
+    # Release
+    vao.release()
+    ibo.release()
+    vbo_vertices.release()
+    vbo_attr.release()
+    fbo.release()
+    image_tex.release()
+    depth_tex.release()
+    return image, depth
+def rasterize_edges(
+    ctx: RastContext,
+    vertices: np.ndarray,
+    edges: np.ndarray,
+    attr: np.ndarray,
+    width: int,
+    height: int,
+    transform: np.ndarray = None,
+    line_width: float = 1.0,
+    return_depth: bool = False,
+    image: np.ndarray = None,
+    depth: np.ndarray = None
+) -> Tuple[np.ndarray, ...]:
+    """
+    Rasterize vertex attribute.
+    Args:
+        vertices (np.ndarray): [N, 3]
+        faces (np.ndarray): [T, 3]
+        attr (np.ndarray): [N, C]
+        width (int): width of rendered image
+        height (int): height of rendered image
+        transform (np.ndarray): [4, 4] model-view-projection matrix
+        line_width (float): width of line. Defaults to 1.0. NOTE: Values other than 1.0 may not work across all platforms.
+        cull_backface (bool): whether to cull backface
+    Returns:
+        image (np.ndarray): [H, W, C] rendered image
+        depth (np.ndarray): [H, W] screen space depth, ranging from 0 to 1. If return_depth is False, it is None.
+    """
+    assert vertices.ndim == 2 and vertices.shape[1] == 3
+    assert edges.ndim == 2 and edges.shape[1] == 2, f"Edges should be a 2D array with shape (T, 2), but got {edges.shape}"
+    assert attr.ndim == 2 and attr.shape[1] in [1, 2, 3, 4], f'Vertex attribute only supports channels 1, 2, 3, 4, but got {attr.shape}'
+    assert vertices.shape[0] == attr.shape[0]
+    assert vertices.dtype == np.float32
+    assert edges.dtype == np.uint32 or edges.dtype == np.int32
+    assert attr.dtype == np.float32, "Attribute should be float32"
+    C = attr.shape[1]
+    prog = ctx.program_vertex_attribute(C)
+    transform = transform if transform is not None else np.eye(4, np.float32)
+    # Create buffers
+    ibo = ctx.mgl_ctx.buffer(np.ascontiguousarray(edges, dtype='i4'))
+    vbo_vertices = ctx.mgl_ctx.buffer(np.ascontiguousarray(vertices, dtype='f4'))
+    vbo_attr = ctx.mgl_ctx.buffer(np.ascontiguousarray(attr, dtype='f4'))
+    vao = ctx.mgl_ctx.vertex_array(
+        prog,
+        [
+            (vbo_vertices, '3f', 'i_position'),
+            (vbo_attr, f'{C}f', 'i_attr'),
+        ],
+        ibo,
+        mode=moderngl.LINES,
+    )
+    # Create framebuffer
+    image_tex = ctx.mgl_ctx.texture((width, height), C, dtype='f4', data=np.ascontiguousarray(image[::-1, :, :]) if image is not None else None)
+    depth_tex = ctx.mgl_ctx.depth_texture((width, height), data=np.ascontiguousarray(depth[::-1, :]) if depth is not None else None)
+    fbo = ctx.mgl_ctx.framebuffer(
+        color_attachments=[image_tex],
+        depth_attachment=depth_tex,
+    )
+    # Render
+    prog['u_mvp'].write(transform.transpose().copy().astype('f4'))
+    fbo.use()
+    fbo.viewport = (0, 0, width, height)
+    ctx.mgl_ctx.depth_func = '<'
+    ctx.mgl_ctx.enable(ctx.mgl_ctx.DEPTH_TEST)
+    ctx.mgl_ctx.line_width = line_width
+    vao.render()
+    ctx.mgl_ctx.disable(ctx.mgl_ctx.DEPTH_TEST)
+    # Read
+    image = np.zeros((height, width, C), dtype='f4')
+    image_tex.read_into(image)
+    image = image[::-1, :, :]
+    if return_depth:
+        depth = np.zeros((height, width), dtype='f4')
+        depth_tex.read_into(depth)
+        depth = depth[::-1, :]
+    else:
+        depth = None
+    # Release
+    vao.release()
+    ibo.release()
+    vbo_vertices.release()
+    vbo_attr.release()
+    fbo.release()
+    image_tex.release()
+    depth_tex.release()
+    return image, depth
+def texture(
+    ctx: RastContext,
+    uv: np.ndarray,
+    texture: np.ndarray,
+    interpolation: str= 'linear',
+    wrap: str = 'clamp'
+) -> np.ndarray:
+    """
+    Given an UV image, texturing from the texture map
+    """
+    assert len(texture.shape) == 3 and 1 <= texture.shape[2] <= 4
+    assert uv.shape[2] == 2
+    height, width = uv.shape[:2]
+    texture_dtype = map_np_dtype(texture.dtype)
+    # Create VAO
+    screen_quad_vbo = ctx.mgl_ctx.buffer(np.array([[-1, -1], [1, -1], [1, 1], [-1, 1]], dtype='f4'))
+    screen_quad_ibo = ctx.mgl_ctx.buffer(np.array([0, 1, 2, 0, 2, 3], dtype=np.int32))
+    screen_quad_vao = ctx.mgl_ctx.vertex_array(ctx.program_texture(texture.shape[2]), [(screen_quad_vbo, '2f4', 'in_vert')], index_buffer=screen_quad_ibo, index_element_size=4)
+    # Create texture, set filter and bind. TODO: min mag filter, mipmap
+    texture_tex = ctx.mgl_ctx.texture((texture.shape[1], texture.shape[0]), texture.shape[2], dtype=texture_dtype, data=np.ascontiguousarray(texture))
+    if interpolation == 'linear':
+        texture_tex.filter = (moderngl.LINEAR, moderngl.LINEAR)
+    elif interpolation == 'nearest':
+        texture_tex.filter = (moderngl.NEAREST, moderngl.NEAREST)
+    texture_tex.use(location=0)
+    texture_uv = ctx.mgl_ctx.texture((width, height), 2, dtype='f4', data=np.ascontiguousarray(uv.astype('f4', copy=False)))
+    texture_uv.filter = (moderngl.NEAREST, moderngl.NEAREST)
+    texture_uv.use(location=1)
+    # Create render buffer and frame buffer
+    rb = ctx.mgl_ctx.renderbuffer((uv.shape[1], uv.shape[0]), texture.shape[2], dtype=texture_dtype)
+    fbo = ctx.mgl_ctx.framebuffer(color_attachments=[rb])
+    # Render
+    fbo.use()
+    fbo.viewport = (0, 0, width, height)
+    ctx.mgl_ctx.disable(ctx.mgl_ctx.BLEND)
+    screen_quad_vao.render()
+    # Read buffer
+    image_buffer = np.frombuffer(fbo.read(components=texture.shape[2], attachment=0, dtype=texture_dtype), dtype=texture_dtype).reshape((height, width, texture.shape[2]))
+    # Release
+    texture_tex.release()
+    rb.release()
+    fbo.release()
+    return image_buffer
+def warp_image_by_depth(
+    ctx: RastContext,
+    src_depth: np.ndarray,
+    src_image: np.ndarray = None,
+    width: int = None,
+    height: int = None,
+    *,
+    extrinsics_src: np.ndarray = None,
+    extrinsics_tgt: np.ndarray = None,
+    intrinsics_src: np.ndarray = None,
+    intrinsics_tgt: np.ndarray = None,
+    near: float = 0.1,
+    far: float = 100.0,
+    cull_backface: bool = True,
+    ssaa: int = 1,
+    return_depth: bool = False,
+) -> Tuple[np.ndarray, ...]:
+    """
+    Warp image by depth map.
+    Args:
+        ctx (RastContext): rasterizer context
+        src_depth (np.ndarray): [H, W]
+        src_image (np.ndarray, optional): [H, W, C]. The image to warp. Defaults to None (use uv coordinates).
+        width (int, optional): width of the output image. None to use depth map width. Defaults to None.
+        height (int, optional): height of the output image. None to use depth map height. Defaults to None.
+        extrinsics_src (np.ndarray, optional): extrinsics matrix of the source camera. Defaults to None (identity).
+        extrinsics_tgt (np.ndarray, optional): extrinsics matrix of the target camera. Defaults to None (identity).
+        intrinsics_src (np.ndarray, optional): intrinsics matrix of the source camera. Defaults to None (use the same as intrinsics_tgt).
+        intrinsics_tgt (np.ndarray, optional): intrinsics matrix of the target camera. Defaults to None (use the same as intrinsics_src).
+        cull_backface (bool, optional): whether to cull backface. Defaults to True.
+        ssaa (int, optional): super sampling anti-aliasing. Defaults to 1.
+    Returns:
+        tgt_image (np.ndarray): [H, W, C] warped image (or uv coordinates if image is None).
+        tgt_depth (np.ndarray): [H, W] screen space depth, ranging from 0 to 1. If return_depth is False, it is None.
+    """
+    assert src_depth.ndim == 2
+    if width is None:
+        width = src_depth.shape[1]
+    if height is None:
+        height = src_depth.shape[0]
+    if src_image is not None:
+        assert src_image.shape[-2:] == src_depth.shape[-2:], f'Shape of source image {src_image.shape} does not match shape of source depth {src_depth.shape}'
+    # set up default camera parameters
+    extrinsics_src = np.eye(4) if extrinsics_src is None else extrinsics_src
+    extrinsics_tgt = np.eye(4) if extrinsics_tgt is None else extrinsics_tgt
+    intrinsics_src = intrinsics_tgt if intrinsics_src is None else intrinsics_src
+    intrinsics_tgt = intrinsics_src if intrinsics_tgt is None else intrinsics_tgt
+    assert all(x is not None for x in [extrinsics_src, extrinsics_tgt, intrinsics_src, intrinsics_tgt]), "Make sure you have provided all the necessary camera parameters."
+    # check shapes
+    assert extrinsics_src.shape == (4, 4) and extrinsics_tgt.shape == (4, 4)
+    assert intrinsics_src.shape == (3, 3) and intrinsics_tgt.shape == (3, 3)
+    # convert to view and perspective matrices
+    view_tgt = transforms.extrinsics_to_view(extrinsics_tgt)
+    perspective_tgt = transforms.intrinsics_to_perspective(intrinsics_tgt, near=near, far=far)
+    # unproject depth map
+    uv, faces = utils.image_mesh(*src_depth.shape[-2:])
+    pts = transforms.unproject_cv(uv, src_depth.reshape(-1), extrinsics_src, intrinsics_src)
+    faces = mesh.triangulate(faces, vertices=pts)
+    # rasterize attributes
+    if src_image is not None:
+        attr = src_image.reshape(-1, src_image.shape[-1])
+    else:
+        attr = uv
+    tgt_image, tgt_depth = rasterize_triangle_faces(
+        ctx,
+        pts,
+        faces,
+        attr,
+        width * ssaa,
+        height * ssaa,
+        transform=perspective_tgt @ view_tgt,
+        cull_backface=cull_backface,
+        return_depth=return_depth,
+    )
+    if ssaa > 1:
+        tgt_image = tgt_image.reshape(height, ssaa, width, ssaa, -1).mean(axis=(1, 3))
+        tgt_depth = tgt_depth.reshape(height, ssaa, width, ssaa, -1).mean(axis=(1, 3)) if return_depth else None
+    return tgt_image, tgt_depth
+def test():
+    """
+    Test if rasterization works. It will render a cube with random colors and save it as a CHECKME.png file.
+    """
+    ctx = RastContext(backend='egl')
+    vertices, faces = utils.cube(tri=True)
+    attr = np.random.rand(len(vertices), 3).astype(np.float32)
+    perspective = transforms.perspective(np.deg2rad(60), 1, 0.01, 100)
+    view = transforms.view_look_at(np.array([2, 2, 2]), np.array([0, 0, 0]), np.array([0, 1, 0]))
+    image, _ = rasterize_triangle_faces(
+        ctx,
+        vertices,
+        faces,
+        attr,
+        512, 512,
+        view=view,
+        projection=perspective,
+        cull_backface=True,
+        ssaa=1,
+        return_depth=True,
+    )
+    import cv2
+    cv2.imwrite('CHECKME.png', cv2.cvtColor((image.clip(0, 1) * 255).astype(np.uint8), cv2.COLOR_RGB2BGR))