File size: 3,696 Bytes
864ec44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# Last modified: 2024-02-08
#
# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# --------------------------------------------------------------------------
# If you find this code useful, we kindly ask you to cite our paper in your work.
# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold.
# More information about the method can be found at https://marigoldmonodepth.github.io
# --------------------------------------------------------------------------
from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode
import torch
from torchvision.transforms import InterpolationMode, Resize, CenterCrop
import torchvision.transforms as transforms
class DepthAnythingDataset(BaseDepthDataset):
def __init__(
self,
**kwargs,
) -> None:
super().__init__(
# ScanNet data parameter
min_depth=-1,
max_depth=256,
has_filled_depth=False,
name_mode=DepthFileNameMode.id,
**kwargs,
)
def _read_depth_file(self, rel_path):
depth_in = self._read_image(rel_path)
# Decode ScanNet depth
# depth_decoded = depth_in / 1000.0
return depth_in
def _training_preprocess(self, rasters):
# Augmentation
if self.augm_args is not None:
rasters = self._augment_data(rasters)
# Normalization
rasters["depth_raw_norm"] = rasters["depth_raw_linear"] / 255.0 * 2.0 - 1.0
rasters["depth_filled_norm"] = rasters["depth_filled_linear"] / 255.0 * 2.0 - 1.0
# Set invalid pixel to far plane
if self.move_invalid_to_far_plane:
if self.depth_transform.far_plane_at_max:
rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = (
self.depth_transform.norm_max
)
else:
rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = (
self.depth_transform.norm_min
)
# Resize
if self.resize_to_hw is not None:
T = transforms.Compose([
Resize(self.resize_to_hw[0]),
CenterCrop(self.resize_to_hw),
])
rasters = {k: T(v) for k, v in rasters.items()}
return rasters
# def _load_depth_data(self, depth_rel_path, filled_rel_path):
# # Read depth data
# outputs = {}
# depth_raw = self._read_depth_file(depth_rel_path).squeeze()
# depth_raw_linear = torch.from_numpy(depth_raw).float().unsqueeze(0) # [1, H, W] [0, 255]
# outputs["depth_raw_linear"] = depth_raw_linear.clone()
#
# if self.has_filled_depth:
# depth_filled = self._read_depth_file(filled_rel_path).squeeze()
# depth_filled_linear = torch.from_numpy(depth_filled).float().unsqueeze(0)
# outputs["depth_filled_linear"] = depth_filled_linear
# else:
# outputs["depth_filled_linear"] = depth_raw_linear.clone()
#
# return outputs |