image-matching-webui

Running

App Files Files Community

Vincentqyw commited on Jul 23, 2023

Commit

c608946

•

1 Parent(s): a80d6bb

add: roma

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

third_party/DKM/.gitignore +3 -0
third_party/DKM/LICENSE +25 -0
third_party/DKM/README.md +117 -0
third_party/DKM/assets/ams_hom_A.jpg +3 -0
third_party/DKM/assets/ams_hom_B.jpg +3 -0
third_party/DKM/assets/dkmv3_warp.jpg +3 -0
third_party/DKM/assets/mega_8_scenes_0008_0.1_0.3.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0008_0.3_0.5.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0019_0.1_0.3.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0019_0.3_0.5.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0021_0.1_0.3.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0021_0.3_0.5.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0024_0.1_0.3.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0024_0.3_0.5.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0025_0.1_0.3.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0025_0.3_0.5.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0032_0.1_0.3.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0032_0.3_0.5.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0063_0.1_0.3.npz +3 -0
third_party/DKM/assets/mega_8_scenes_0063_0.3_0.5.npz +3 -0
third_party/DKM/assets/mega_8_scenes_1589_0.1_0.3.npz +3 -0
third_party/DKM/assets/mega_8_scenes_1589_0.3_0.5.npz +3 -0
third_party/DKM/assets/mount_rushmore.mp4 +0 -0
third_party/DKM/assets/sacre_coeur_A.jpg +3 -0
third_party/DKM/assets/sacre_coeur_B.jpg +3 -0
third_party/DKM/data/.gitignore +2 -0
third_party/DKM/demo/.gitignore +1 -0
third_party/DKM/demo/demo_fundamental.py +37 -0
third_party/DKM/demo/demo_match.py +48 -0
third_party/DKM/dkm/__init__.py +4 -0
third_party/DKM/dkm/benchmarks/__init__.py +4 -0
third_party/DKM/dkm/benchmarks/deprecated/hpatches_sequences_dense_benchmark.py +100 -0
third_party/DKM/dkm/benchmarks/deprecated/yfcc100m_benchmark.py +119 -0
third_party/DKM/dkm/benchmarks/hpatches_sequences_homog_benchmark.py +114 -0
third_party/DKM/dkm/benchmarks/megadepth1500_benchmark.py +124 -0
third_party/DKM/dkm/benchmarks/megadepth_dense_benchmark.py +86 -0
third_party/DKM/dkm/benchmarks/scannet_benchmark.py +143 -0
third_party/DKM/dkm/checkpointing/__init__.py +1 -0
third_party/DKM/dkm/checkpointing/checkpoint.py +31 -0
third_party/DKM/dkm/datasets/__init__.py +1 -0
third_party/DKM/dkm/datasets/megadepth.py +177 -0
third_party/DKM/dkm/datasets/scannet.py +151 -0
third_party/DKM/dkm/losses/__init__.py +1 -0
third_party/DKM/dkm/losses/depth_match_regression_loss.py +128 -0
third_party/DKM/dkm/models/__init__.py +4 -0
third_party/DKM/dkm/models/deprecated/build_model.py +787 -0
third_party/DKM/dkm/models/deprecated/corr_channels.py +34 -0
third_party/DKM/dkm/models/deprecated/local_corr.py +630 -0
third_party/DKM/dkm/models/dkm.py +759 -0
third_party/DKM/dkm/models/encoders.py +147 -0

third_party/DKM/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+*.egg-info*
+*.vscode*
+*__pycache__*

third_party/DKM/LICENSE ADDED Viewed

	@@ -0,0 +1,25 @@

+NOTE! Models trained on our synthetic dataset uses datasets which are licensed under non-commercial licenses.
+Hence we cannot provide them under the MIT license. However, MegaDepth is under MIT license, hence we provide those models under MIT license, see below.
+License for Models Trained on MegaDepth ONLY below:
+Copyright (c) 2022 Johan Edstedt
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

third_party/DKM/README.md ADDED Viewed

	@@ -0,0 +1,117 @@

+# DKM: Dense Kernelized Feature Matching for Geometry Estimation
+### [Project Page](https://parskatt.github.io/DKM) | [Paper](https://arxiv.org/abs/2202.00667)
+<br/>
+> DKM: Dense Kernelized Feature Matching for Geometry Estimation
+> [Johan Edstedt](https://scholar.google.com/citations?user=Ul-vMR0AAAAJ), [Ioannis Athanasiadis](https://scholar.google.com/citations?user=RCAtJgUAAAAJ), [Mårten Wadenbäck](https://scholar.google.com/citations?user=6WRQpCQAAAAJ), [Michael Felsberg](https://scholar.google.com/citations?&user=lkWfR08AAAAJ)
+> CVPR 2023
+## How to Use?
+<details>
+Our model produces a dense (for all pixels) warp and certainty.
+Warp: [B,H,W,4] for all images in batch of size B, for each pixel HxW, we ouput the input and matching coordinate in the normalized grids [-1,1]x[-1,1].
+Certainty: [B,H,W] a number in each pixel indicating the matchability of the pixel.
+See [demo](dkm/demo/) for two demos of DKM.
+See [api.md](docs/api.md) for API.
+</details>
+## Qualitative Results
+<details>
+https://user-images.githubusercontent.com/22053118/223748279-0f0c21b4-376a-440a-81f5-7f9a5d87483f.mp4
+https://user-images.githubusercontent.com/22053118/223748512-1bca4a17-cffa-491d-a448-96aac1353ce9.mp4
+https://user-images.githubusercontent.com/22053118/223748518-4d475d9f-a933-4581-97ed-6e9413c4caca.mp4
+https://user-images.githubusercontent.com/22053118/223748522-39c20631-aa16-4954-9c27-95763b38f2ce.mp4
+</details>
+## Benchmark Results
+<details>
+### Megadepth1500
+|       | @5    | @10  | @20  |
+|-------|-------|------|------|
+| DKMv1 | 54.5  | 70.7 | 82.3 |
+| DKMv2 | *56.8*  | *72.3* | *83.2* |
+| DKMv3 (paper) | **60.5**  | **74.9** | **85.1** |
+| DKMv3 (this repo) | **60.0**  | **74.6** | **84.9** |
+### Megadepth 8 Scenes
+|       | @5    | @10  | @20  |
+|-------|-------|------|------|
+| DKMv3 (paper) | **60.5**  | **74.5** | **84.2** |
+| DKMv3 (this repo) | **60.4**  | **74.6** | **84.3** |
+### ScanNet1500
+|       | @5    | @10  | @20  |
+|-------|-------|------|------|
+| DKMv1 | 24.8  | 44.4 | 61.9 |
+| DKMv2 | *28.2*  | *49.2* | *66.6* |
+| DKMv3 (paper) | **29.4**  | **50.7** | **68.3** |
+| DKMv3 (this repo) | **29.8**  | **50.8** | **68.3** |
+</details>
+## Navigating the Code
+* Code for models can be found in [dkm/models](dkm/models/)
+* Code for benchmarks can be found in [dkm/benchmarks](dkm/benchmarks/)
+* Code for reproducing experiments from our paper can be found in [experiments/](experiments/)
+## Install
+Run ``pip install -e .``
+## Demo
+A demonstration of our method can be run by:
+``` bash
+python demo_match.py
+```
+This runs our model trained on mega on two images taken from Sacre Coeur.
+## Benchmarks
+See [Benchmarks](docs/benchmarks.md) for details.
+## Training
+See [Training](docs/training.md) for details.
+## Reproducing Results
+Given that the required benchmark or training dataset has been downloaded and unpacked, results can be reproduced by running the experiments in the experiments folder.
+## Using DKM matches for estimation
+We recommend using the excellent Graph-Cut RANSAC algorithm: https://github.com/danini/graph-cut-ransac
+|       | @5    | @10  | @20  |
+|-------|-------|------|------|
+| DKMv3 (RANSAC) | *60.5*  | *74.9* | *85.1* |
+| DKMv3 (GC-RANSAC) | **65.5**  | **78.0** | **86.7** |
+## Acknowledgements
+We have used code and been inspired by https://github.com/PruneTruong/DenseMatching, https://github.com/zju3dv/LoFTR, and https://github.com/GrumpyZhou/patch2pix. We additionally thank the authors of ECO-TR for providing their benchmark.
+## BibTeX
+If you find our models useful, please consider citing our paper!
+```
+@inproceedings{edstedt2023dkm,
+title={{DKM}: Dense Kernelized Feature Matching for Geometry Estimation},
+author={Edstedt, Johan and Athanasiadis, Ioannis and Wadenbäck, Mårten and Felsberg, Michael},
+booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
+year={2023}
+}
+```

third_party/DKM/assets/ams_hom_A.jpg ADDED Viewed

Git LFS Details

SHA256: 271a19f0b29fc88d8f88d1136f001078ca6bf5105ff95355f89a18e787c50e3a
Pointer size: 132 Bytes
Size of remote file: 1.19 MB

third_party/DKM/assets/ams_hom_B.jpg ADDED Viewed

Git LFS Details

SHA256: d84ced12e607f5ac5f7628151694fbaa2300caa091ac168e0aedad2ebaf491d6
Pointer size: 132 Bytes
Size of remote file: 1.21 MB

third_party/DKM/assets/dkmv3_warp.jpg ADDED Viewed

Git LFS Details

SHA256: 04c46e39d5ea68e9e116d4ae71c038a459beaf3eed89e8b7b87ccafd01d3bf85
Pointer size: 131 Bytes
Size of remote file: 571 kB

third_party/DKM/assets/mega_8_scenes_0008_0.1_0.3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c902547181fc9b370fdd16272140be6803fe983aea978c68683db803ac70dd57
+size 906160

third_party/DKM/assets/mega_8_scenes_0008_0.3_0.5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65ce02bd248988b42363ccd257abaa9b99a00d569d2779597b36ba6c4da35021
+size 906160

third_party/DKM/assets/mega_8_scenes_0019_0.1_0.3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6104feb8807a4ebdd1266160e67b3c507c550012f54c23292d0ebf99b88753f
+size 368192

third_party/DKM/assets/mega_8_scenes_0019_0.3_0.5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9600ba5c24d414f63728bf5ee7550a3b035d7c615461e357590890ae0e0f042e
+size 368192

third_party/DKM/assets/mega_8_scenes_0021_0.1_0.3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de89e9ccf10515cc4196ba1e7172ec98b2fb92ff9f85d90db5df1af5b6503313
+size 167528

third_party/DKM/assets/mega_8_scenes_0021_0.3_0.5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94c97b57beb10411b3b98a1e88c9e1e2f9db51994dce04580d2b7cfc8919dab3
+size 167528

third_party/DKM/assets/mega_8_scenes_0024_0.1_0.3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f14a66dbbd7fa8f31756dd496bfabe4c3ea115c6914acad9365dd02e46ae674
+size 63909

third_party/DKM/assets/mega_8_scenes_0024_0.3_0.5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dfaee333beccd1da0d920777cdc8f17d584b21ba20f675b39222c5a205acf72a
+size 63909

third_party/DKM/assets/mega_8_scenes_0025_0.1_0.3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b446ca3cc2073c8a3963cf68cc450ef2ebf73d2b956b1f5ae6b37621bc67cce4
+size 200371

third_party/DKM/assets/mega_8_scenes_0025_0.3_0.5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df1969fd94032562b5e8d916467101a878168d586b795770e64c108bab250c9e
+size 200371

third_party/DKM/assets/mega_8_scenes_0032_0.1_0.3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37cbafa0b0f981f5d69aba202ddd37c5892bda0fa13d053a3ad27d6ddad51c16
+size 642823

third_party/DKM/assets/mega_8_scenes_0032_0.3_0.5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:962b3fadc7c94ea8e4a1bb5e168e72b0b6cc474ae56b0aee70ba4e517553fbcf
+size 642823

third_party/DKM/assets/mega_8_scenes_0063_0.1_0.3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50ed6b02dff2fa719e4e9ca216b4704b82cbbefd127355d3ba7120828407e723
+size 228647

third_party/DKM/assets/mega_8_scenes_0063_0.3_0.5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:edc97129d000f0478020495f646f2fa7667408247ccd11054e02efbbb38d1444
+size 228647

third_party/DKM/assets/mega_8_scenes_1589_0.1_0.3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04b0b6c6adff812e12b66476f7ca2a6ed2564cdd8208ec0c775f7b922f160103
+size 177063

third_party/DKM/assets/mega_8_scenes_1589_0.3_0.5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae931f8cac1b2168f699c70efe42c215eaff27d3f0617d59afb3db183c9b1848
+size 177063

third_party/DKM/assets/mount_rushmore.mp4 ADDED Viewed

Binary file (986 kB). View file

third_party/DKM/assets/sacre_coeur_A.jpg ADDED Viewed

Git LFS Details

SHA256: 90d9c5f5a4d76425624989215120fba6f2899190a1d5654b88fa380c64cf6b2c
Pointer size: 131 Bytes
Size of remote file: 118 kB

third_party/DKM/assets/sacre_coeur_B.jpg ADDED Viewed

Git LFS Details

SHA256: 2f1eb9bdd4d80e480f672d6a729689ac77f9fd5c8deb90f59b377590f3ca4799
Pointer size: 131 Bytes
Size of remote file: 153 kB

third_party/DKM/data/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *
2	+ !.gitignore

third_party/DKM/demo/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.jpg

third_party/DKM/demo/demo_fundamental.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from PIL import Image
+import torch
+import torch.nn.functional as F
+import numpy as np
+from dkm.utils.utils import tensor_to_pil
+import cv2
+from dkm import DKMv3_outdoor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument("--im_A_path", default="assets/sacre_coeur_A.jpg", type=str)
+    parser.add_argument("--im_B_path", default="assets/sacre_coeur_B.jpg", type=str)
+    args, _ = parser.parse_known_args()
+    im1_path = args.im_A_path
+    im2_path = args.im_B_path
+    # Create model
+    dkm_model = DKMv3_outdoor(device=device)
+    W_A, H_A = Image.open(im1_path).size
+    W_B, H_B = Image.open(im2_path).size
+    # Match
+    warp, certainty = dkm_model.match(im1_path, im2_path, device=device)
+    # Sample matches for estimation
+    matches, certainty = dkm_model.sample(warp, certainty)
+    kpts1, kpts2 = dkm_model.to_pixel_coordinates(matches, H_A, W_A, H_B, W_B)
+    F, mask = cv2.findFundamentalMat(
+        kpts1.cpu().numpy(), kpts2.cpu().numpy(), ransacReprojThreshold=0.2, method=cv2.USAC_MAGSAC, confidence=0.999999, maxIters=10000
+    )
+    # TODO: some better visualization

third_party/DKM/demo/demo_match.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from PIL import Image
+import torch
+import torch.nn.functional as F
+import numpy as np
+from dkm.utils.utils import tensor_to_pil
+from dkm import DKMv3_outdoor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument("--im_A_path", default="assets/sacre_coeur_A.jpg", type=str)
+    parser.add_argument("--im_B_path", default="assets/sacre_coeur_B.jpg", type=str)
+    parser.add_argument("--save_path", default="demo/dkmv3_warp_sacre_coeur.jpg", type=str)
+    args, _ = parser.parse_known_args()
+    im1_path = args.im_A_path
+    im2_path = args.im_B_path
+    save_path = args.save_path
+    # Create model
+    dkm_model = DKMv3_outdoor(device=device)
+    H, W = 864, 1152
+    im1 = Image.open(im1_path).resize((W, H))
+    im2 = Image.open(im2_path).resize((W, H))
+    # Match
+    warp, certainty = dkm_model.match(im1_path, im2_path, device=device)
+    # Sampling not needed, but can be done with model.sample(warp, certainty)
+    dkm_model.sample(warp, certainty)
+    x1 = (torch.tensor(np.array(im1)) / 255).to(device).permute(2, 0, 1)
+    x2 = (torch.tensor(np.array(im2)) / 255).to(device).permute(2, 0, 1)
+    im2_transfer_rgb = F.grid_sample(
+    x2[None], warp[:,:W, 2:][None], mode="bilinear", align_corners=False
+    )[0]
+    im1_transfer_rgb = F.grid_sample(
+    x1[None], warp[:, W:, :2][None], mode="bilinear", align_corners=False
+    )[0]
+    warp_im = torch.cat((im2_transfer_rgb,im1_transfer_rgb),dim=2)
+    white_im = torch.ones((H,2*W),device=device)
+    vis_im = certainty * warp_im + (1 - certainty) * white_im
+    tensor_to_pil(vis_im, unnormalize=False).save(save_path)

third_party/DKM/dkm/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .models import (
+    DKMv3_outdoor,
+    DKMv3_indoor,
+    )

third_party/DKM/dkm/benchmarks/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .hpatches_sequences_homog_benchmark import HpatchesHomogBenchmark
+from .scannet_benchmark import ScanNetBenchmark
+from .megadepth1500_benchmark import Megadepth1500Benchmark
+from .megadepth_dense_benchmark import MegadepthDenseBenchmark

third_party/DKM/dkm/benchmarks/deprecated/hpatches_sequences_dense_benchmark.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from PIL import Image
+import numpy as np
+import os
+import torch
+from tqdm import tqdm
+from dkm.utils import *
+class HpatchesDenseBenchmark:
+    """WARNING: HPATCHES grid goes from [0,n-1] instead of [0.5,n-0.5]"""
+    def __init__(self, dataset_path) -> None:
+        seqs_dir = "hpatches-sequences-release"
+        self.seqs_path = os.path.join(dataset_path, seqs_dir)
+        self.seq_names = sorted(os.listdir(self.seqs_path))
+    def convert_coordinates(self, query_coords, query_to_support, wq, hq, wsup, hsup):
+        # Get matches in output format on the grid [0, n] where the center of the top-left coordinate is [0.5, 0.5]
+        offset = (
+            0.5  # Hpatches assumes that the center of the top-left pixel is at [0,0]
+        )
+        query_coords = (
+            torch.stack(
+                (
+                    wq * (query_coords[..., 0] + 1) / 2,
+                    hq * (query_coords[..., 1] + 1) / 2,
+                ),
+                axis=-1,
+            )
+            - offset
+        )
+        query_to_support = (
+            torch.stack(
+                (
+                    wsup * (query_to_support[..., 0] + 1) / 2,
+                    hsup * (query_to_support[..., 1] + 1) / 2,
+                ),
+                axis=-1,
+            )
+            - offset
+        )
+        return query_coords, query_to_support
+    def inside_image(self, x, w, h):
+        return torch.logical_and(
+            x[:, 0] < (w - 1),
+            torch.logical_and(x[:, 1] < (h - 1), (x > 0).prod(dim=-1)),
+        )
+    def benchmark(self, model):
+        use_cuda = torch.cuda.is_available()
+        device = torch.device("cuda:0" if use_cuda else "cpu")
+        aepes = []
+        pcks = []
+        for seq_idx, seq_name in tqdm(
+            enumerate(self.seq_names), total=len(self.seq_names)
+        ):
+            if seq_name[0] == "i":
+                continue
+            im1_path = os.path.join(self.seqs_path, seq_name, "1.ppm")
+            im1 = Image.open(im1_path)
+            w1, h1 = im1.size
+            for im_idx in range(2, 7):
+                im2_path = os.path.join(self.seqs_path, seq_name, f"{im_idx}.ppm")
+                im2 = Image.open(im2_path)
+                w2, h2 = im2.size
+                matches, certainty = model.match(im2, im1, do_pred_in_og_res=True)
+                matches, certainty = matches.reshape(-1, 4), certainty.reshape(-1)
+                inv_homography = torch.from_numpy(
+                    np.loadtxt(
+                        os.path.join(self.seqs_path, seq_name, "H_1_" + str(im_idx))
+                    )
+                ).to(device)
+                homography = torch.linalg.inv(inv_homography)
+                pos_a, pos_b = self.convert_coordinates(
+                    matches[:, :2], matches[:, 2:], w2, h2, w1, h1
+                )
+                pos_a, pos_b = pos_a.double(), pos_b.double()
+                pos_a_h = torch.cat(
+                    [pos_a, torch.ones([pos_a.shape[0], 1], device=device)], dim=1
+                )
+                pos_b_proj_h = (homography @ pos_a_h.t()).t()
+                pos_b_proj = pos_b_proj_h[:, :2] / pos_b_proj_h[:, 2:]
+                mask = self.inside_image(pos_b_proj, w1, h1)
+                residual = pos_b - pos_b_proj
+                dist = (residual**2).sum(dim=1).sqrt()[mask]
+                aepes.append(torch.mean(dist).item())
+                pck1 = (dist < 1.0).float().mean().item()
+                pck3 = (dist < 3.0).float().mean().item()
+                pck5 = (dist < 5.0).float().mean().item()
+                pcks.append([pck1, pck3, pck5])
+        m_pcks = np.mean(np.array(pcks), axis=0)
+        return {
+            "hp_pck1": m_pcks[0],
+            "hp_pck3": m_pcks[1],
+            "hp_pck5": m_pcks[2],
+        }

third_party/DKM/dkm/benchmarks/deprecated/yfcc100m_benchmark.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import pickle
+import h5py
+import numpy as np
+import torch
+from dkm.utils import *
+from PIL import Image
+from tqdm import tqdm
+class Yfcc100mBenchmark:
+    def __init__(self, data_root="data/yfcc100m_test") -> None:
+        self.scenes = [
+            "buckingham_palace",
+            "notre_dame_front_facade",
+            "reichstag",
+            "sacre_coeur",
+        ]
+        self.data_root = data_root
+    def benchmark(self, model, r=2):
+        model.train(False)
+        with torch.no_grad():
+            data_root = self.data_root
+            meta_info = open(
+                f"{data_root}/yfcc_test_pairs_with_gt.txt", "r"
+            ).readlines()
+            tot_e_t, tot_e_R, tot_e_pose = [], [], []
+            for scene_ind in range(len(self.scenes)):
+                scene = self.scenes[scene_ind]
+                pairs = np.array(
+                    pickle.load(
+                        open(f"{data_root}/pairs/{scene}-te-1000-pairs.pkl", "rb")
+                    )
+                )
+                scene_dir = f"{data_root}/yfcc100m/{scene}/test/"
+                calibs = open(scene_dir + "calibration.txt", "r").read().split("\n")
+                images = open(scene_dir + "images.txt", "r").read().split("\n")
+                pair_inds = np.random.choice(
+                    range(len(pairs)), size=len(pairs), replace=False
+                )
+                for pairind in tqdm(pair_inds):
+                    idx1, idx2 = pairs[pairind]
+                    params = meta_info[1000 * scene_ind + pairind].split()
+                    rot1, rot2 = int(params[2]), int(params[3])
+                    calib1 = h5py.File(scene_dir + calibs[idx1], "r")
+                    K1, R1, t1, _, _ = get_pose(calib1)
+                    calib2 = h5py.File(scene_dir + calibs[idx2], "r")
+                    K2, R2, t2, _, _ = get_pose(calib2)
+                    R, t = compute_relative_pose(R1, t1, R2, t2)
+                    im1 = images[idx1]
+                    im2 = images[idx2]
+                    im1 = Image.open(scene_dir + im1).rotate(rot1 * 90, expand=True)
+                    w1, h1 = im1.size
+                    im2 = Image.open(scene_dir + im2).rotate(rot2 * 90, expand=True)
+                    w2, h2 = im2.size
+                    K1 = rotate_intrinsic(K1, rot1)
+                    K2 = rotate_intrinsic(K2, rot2)
+                    dense_matches, dense_certainty = model.match(im1, im2)
+                    dense_certainty = dense_certainty ** (1 / r)
+                    sparse_matches, sparse_confidence = model.sample(
+                        dense_matches, dense_certainty, 10000
+                    )
+                    scale1 = 480 / min(w1, h1)
+                    scale2 = 480 / min(w2, h2)
+                    w1, h1 = scale1 * w1, scale1 * h1
+                    w2, h2 = scale2 * w2, scale2 * h2
+                    K1 = K1 * scale1
+                    K2 = K2 * scale2
+                    kpts1 = sparse_matches[:, :2]
+                    kpts1 = np.stack(
+                        (w1 * kpts1[:, 0] / 2, h1 * kpts1[:, 1] / 2), axis=-1
+                    )
+                    kpts2 = sparse_matches[:, 2:]
+                    kpts2 = np.stack(
+                        (w2 * kpts2[:, 0] / 2, h2 * kpts2[:, 1] / 2), axis=-1
+                    )
+                    try:
+                        threshold = 1.0
+                        norm_threshold = threshold / (
+                            np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2]))
+                        )
+                        R_est, t_est, mask = estimate_pose(
+                            kpts1,
+                            kpts2,
+                            K1[:2, :2],
+                            K2[:2, :2],
+                            norm_threshold,
+                            conf=0.9999999,
+                        )
+                        T1_to_2 = np.concatenate((R_est, t_est), axis=-1)  #
+                        e_t, e_R = compute_pose_error(T1_to_2, R, t)
+                        e_pose = max(e_t, e_R)
+                    except:
+                        e_t, e_R = 90, 90
+                        e_pose = max(e_t, e_R)
+                    tot_e_t.append(e_t)
+                    tot_e_R.append(e_R)
+                    tot_e_pose.append(e_pose)
+            tot_e_pose = np.array(tot_e_pose)
+            thresholds = [5, 10, 20]
+            auc = pose_auc(tot_e_pose, thresholds)
+            acc_5 = (tot_e_pose < 5).mean()
+            acc_10 = (tot_e_pose < 10).mean()
+            acc_15 = (tot_e_pose < 15).mean()
+            acc_20 = (tot_e_pose < 20).mean()
+            map_5 = acc_5
+            map_10 = np.mean([acc_5, acc_10])
+            map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
+            return {
+                "auc_5": auc[0],
+                "auc_10": auc[1],
+                "auc_20": auc[2],
+                "map_5": map_5,
+                "map_10": map_10,
+                "map_20": map_20,
+            }

third_party/DKM/dkm/benchmarks/hpatches_sequences_homog_benchmark.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from PIL import Image
+import numpy as np
+import os
+from tqdm import tqdm
+from dkm.utils import pose_auc
+import cv2
+class HpatchesHomogBenchmark:
+    """Hpatches grid goes from [0,n-1] instead of [0.5,n-0.5]"""
+    def __init__(self, dataset_path) -> None:
+        seqs_dir = "hpatches-sequences-release"
+        self.seqs_path = os.path.join(dataset_path, seqs_dir)
+        self.seq_names = sorted(os.listdir(self.seqs_path))
+        # Ignore seqs is same as LoFTR.
+        self.ignore_seqs = set(
+            [
+                "i_contruction",
+                "i_crownnight",
+                "i_dc",
+                "i_pencils",
+                "i_whitebuilding",
+                "v_artisans",
+                "v_astronautis",
+                "v_talent",
+            ]
+        )
+    def convert_coordinates(self, query_coords, query_to_support, wq, hq, wsup, hsup):
+        offset = 0.5  # Hpatches assumes that the center of the top-left pixel is at [0,0] (I think)
+        query_coords = (
+            np.stack(
+                (
+                    wq * (query_coords[..., 0] + 1) / 2,
+                    hq * (query_coords[..., 1] + 1) / 2,
+                ),
+                axis=-1,
+            )
+            - offset
+        )
+        query_to_support = (
+            np.stack(
+                (
+                    wsup * (query_to_support[..., 0] + 1) / 2,
+                    hsup * (query_to_support[..., 1] + 1) / 2,
+                ),
+                axis=-1,
+            )
+            - offset
+        )
+        return query_coords, query_to_support
+    def benchmark(self, model, model_name = None):
+        n_matches = []
+        homog_dists = []
+        for seq_idx, seq_name in tqdm(
+            enumerate(self.seq_names), total=len(self.seq_names)
+        ):
+            if seq_name in self.ignore_seqs:
+                continue
+            im1_path = os.path.join(self.seqs_path, seq_name, "1.ppm")
+            im1 = Image.open(im1_path)
+            w1, h1 = im1.size
+            for im_idx in range(2, 7):
+                im2_path = os.path.join(self.seqs_path, seq_name, f"{im_idx}.ppm")
+                im2 = Image.open(im2_path)
+                w2, h2 = im2.size
+                H = np.loadtxt(
+                    os.path.join(self.seqs_path, seq_name, "H_1_" + str(im_idx))
+                )
+                dense_matches, dense_certainty = model.match(
+                    im1_path, im2_path
+                )
+                good_matches, _ = model.sample(dense_matches, dense_certainty, 5000)
+                pos_a, pos_b = self.convert_coordinates(
+                    good_matches[:, :2], good_matches[:, 2:], w1, h1, w2, h2
+                )
+                try:
+                    H_pred, inliers = cv2.findHomography(
+                        pos_a,
+                        pos_b,
+                        method = cv2.RANSAC,
+                        confidence = 0.99999,
+                        ransacReprojThreshold = 3 * min(w2, h2) / 480,
+                    )
+                except:
+                    H_pred = None
+                if H_pred is None:
+                    H_pred = np.zeros((3, 3))
+                    H_pred[2, 2] = 1.0
+                corners = np.array(
+                    [[0, 0, 1], [0, h1 - 1, 1], [w1 - 1, 0, 1], [w1 - 1, h1 - 1, 1]]
+                )
+                real_warped_corners = np.dot(corners, np.transpose(H))
+                real_warped_corners = (
+                    real_warped_corners[:, :2] / real_warped_corners[:, 2:]
+                )
+                warped_corners = np.dot(corners, np.transpose(H_pred))
+                warped_corners = warped_corners[:, :2] / warped_corners[:, 2:]
+                mean_dist = np.mean(
+                    np.linalg.norm(real_warped_corners - warped_corners, axis=1)
+                ) / (min(w2, h2) / 480.0)
+                homog_dists.append(mean_dist)
+        n_matches = np.array(n_matches)
+        thresholds = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        auc = pose_auc(np.array(homog_dists), thresholds)
+        return {
+            "hpatches_homog_auc_3": auc[2],
+            "hpatches_homog_auc_5": auc[4],
+            "hpatches_homog_auc_10": auc[9],
+        }

third_party/DKM/dkm/benchmarks/megadepth1500_benchmark.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import numpy as np
+import torch
+from dkm.utils import *
+from PIL import Image
+from tqdm import tqdm
+import torch.nn.functional as F
+class Megadepth1500Benchmark:
+    def __init__(self, data_root="data/megadepth", scene_names = None) -> None:
+        if scene_names is None:
+            self.scene_names = [
+                "0015_0.1_0.3.npz",
+                "0015_0.3_0.5.npz",
+                "0022_0.1_0.3.npz",
+                "0022_0.3_0.5.npz",
+                "0022_0.5_0.7.npz",
+            ]
+        else:
+            self.scene_names = scene_names
+        self.scenes = [
+            np.load(f"{data_root}/{scene}", allow_pickle=True)
+            for scene in self.scene_names
+        ]
+        self.data_root = data_root
+    def benchmark(self, model):
+        with torch.no_grad():
+            data_root = self.data_root
+            tot_e_t, tot_e_R, tot_e_pose = [], [], []
+            for scene_ind in range(len(self.scenes)):
+                scene = self.scenes[scene_ind]
+                pairs = scene["pair_infos"]
+                intrinsics = scene["intrinsics"]
+                poses = scene["poses"]
+                im_paths = scene["image_paths"]
+                pair_inds = range(len(pairs))
+                for pairind in tqdm(pair_inds):
+                    idx1, idx2 = pairs[pairind][0]
+                    K1 = intrinsics[idx1].copy()
+                    T1 = poses[idx1].copy()
+                    R1, t1 = T1[:3, :3], T1[:3, 3]
+                    K2 = intrinsics[idx2].copy()
+                    T2 = poses[idx2].copy()
+                    R2, t2 = T2[:3, :3], T2[:3, 3]
+                    R, t = compute_relative_pose(R1, t1, R2, t2)
+                    im1_path = f"{data_root}/{im_paths[idx1]}"
+                    im2_path = f"{data_root}/{im_paths[idx2]}"
+                    im1 = Image.open(im1_path)
+                    w1, h1 = im1.size
+                    im2 = Image.open(im2_path)
+                    w2, h2 = im2.size
+                    scale1 = 1200 / max(w1, h1)
+                    scale2 = 1200 / max(w2, h2)
+                    w1, h1 = scale1 * w1, scale1 * h1
+                    w2, h2 = scale2 * w2, scale2 * h2
+                    K1[:2] = K1[:2] * scale1
+                    K2[:2] = K2[:2] * scale2
+                    dense_matches, dense_certainty = model.match(im1_path, im2_path)
+                    sparse_matches,_ = model.sample(
+                        dense_matches, dense_certainty, 5000
+                    )
+                    kpts1 = sparse_matches[:, :2]
+                    kpts1 = (
+                        torch.stack(
+                            (
+                                w1 * (kpts1[:, 0] + 1) / 2,
+                                h1 * (kpts1[:, 1] + 1) / 2,
+                            ),
+                            axis=-1,
+                        )
+                    )
+                    kpts2 = sparse_matches[:, 2:]
+                    kpts2 = (
+                        torch.stack(
+                            (
+                                w2 * (kpts2[:, 0] + 1) / 2,
+                                h2 * (kpts2[:, 1] + 1) / 2,
+                            ),
+                            axis=-1,
+                        )
+                    )
+                    for _ in range(5):
+                        shuffling = np.random.permutation(np.arange(len(kpts1)))
+                        kpts1 = kpts1[shuffling]
+                        kpts2 = kpts2[shuffling]
+                        try:
+                            norm_threshold = 0.5 / (
+                            np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
+                            R_est, t_est, mask = estimate_pose(
+                                kpts1.cpu().numpy(),
+                                kpts2.cpu().numpy(),
+                                K1,
+                                K2,
+                                norm_threshold,
+                                conf=0.99999,
+                            )
+                            T1_to_2_est = np.concatenate((R_est, t_est), axis=-1)  #
+                            e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
+                            e_pose = max(e_t, e_R)
+                        except Exception as e:
+                            print(repr(e))
+                            e_t, e_R = 90, 90
+                            e_pose = max(e_t, e_R)
+                        tot_e_t.append(e_t)
+                        tot_e_R.append(e_R)
+                        tot_e_pose.append(e_pose)
+            tot_e_pose = np.array(tot_e_pose)
+            thresholds = [5, 10, 20]
+            auc = pose_auc(tot_e_pose, thresholds)
+            acc_5 = (tot_e_pose < 5).mean()
+            acc_10 = (tot_e_pose < 10).mean()
+            acc_15 = (tot_e_pose < 15).mean()
+            acc_20 = (tot_e_pose < 20).mean()
+            map_5 = acc_5
+            map_10 = np.mean([acc_5, acc_10])
+            map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
+            return {
+                "auc_5": auc[0],
+                "auc_10": auc[1],
+                "auc_20": auc[2],
+                "map_5": map_5,
+                "map_10": map_10,
+                "map_20": map_20,
+            }

third_party/DKM/dkm/benchmarks/megadepth_dense_benchmark.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+import numpy as np
+import tqdm
+from dkm.datasets import MegadepthBuilder
+from dkm.utils import warp_kpts
+from torch.utils.data import ConcatDataset
+class MegadepthDenseBenchmark:
+    def __init__(self, data_root="data/megadepth", h = 384, w = 512, num_samples = 2000, device=None) -> None:
+        mega = MegadepthBuilder(data_root=data_root)
+        self.dataset = ConcatDataset(
+            mega.build_scenes(split="test_loftr", ht=h, wt=w)
+        )  # fixed resolution of 384,512
+        self.num_samples = num_samples
+        if device is None:
+            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.device = device
+    def geometric_dist(self, depth1, depth2, T_1to2, K1, K2, dense_matches):
+        b, h1, w1, d = dense_matches.shape
+        with torch.no_grad():
+            x1 = dense_matches[..., :2].reshape(b, h1 * w1, 2)
+            # x1 = torch.stack((2*x1[...,0]/w1-1,2*x1[...,1]/h1-1),dim=-1)
+            mask, x2 = warp_kpts(
+                x1.double(),
+                depth1.double(),
+                depth2.double(),
+                T_1to2.double(),
+                K1.double(),
+                K2.double(),
+            )
+            x2 = torch.stack(
+                (w1 * (x2[..., 0] + 1) / 2, h1 * (x2[..., 1] + 1) / 2), dim=-1
+            )
+            prob = mask.float().reshape(b, h1, w1)
+        x2_hat = dense_matches[..., 2:]
+        x2_hat = torch.stack(
+            (w1 * (x2_hat[..., 0] + 1) / 2, h1 * (x2_hat[..., 1] + 1) / 2), dim=-1
+        )
+        gd = (x2_hat - x2.reshape(b, h1, w1, 2)).norm(dim=-1)
+        gd = gd[prob == 1]
+        pck_1 = (gd < 1.0).float().mean()
+        pck_3 = (gd < 3.0).float().mean()
+        pck_5 = (gd < 5.0).float().mean()
+        gd = gd.mean()
+        return gd, pck_1, pck_3, pck_5
+    def benchmark(self, model, batch_size=8):
+        model.train(False)
+        with torch.no_grad():
+            gd_tot = 0.0
+            pck_1_tot = 0.0
+            pck_3_tot = 0.0
+            pck_5_tot = 0.0
+            sampler = torch.utils.data.WeightedRandomSampler(
+                torch.ones(len(self.dataset)), replacement=False, num_samples=self.num_samples
+            )
+            dataloader = torch.utils.data.DataLoader(
+                self.dataset, batch_size=8, num_workers=batch_size, sampler=sampler
+            )
+            for data in tqdm.tqdm(dataloader):
+                im1, im2, depth1, depth2, T_1to2, K1, K2 = (
+                    data["query"],
+                    data["support"],
+                    data["query_depth"].to(self.device),
+                    data["support_depth"].to(self.device),
+                    data["T_1to2"].to(self.device),
+                    data["K1"].to(self.device),
+                    data["K2"].to(self.device),
+                )
+                matches, certainty = model.match(im1, im2, batched=True)
+                gd, pck_1, pck_3, pck_5 = self.geometric_dist(
+                    depth1, depth2, T_1to2, K1, K2, matches
+                )
+                gd_tot, pck_1_tot, pck_3_tot, pck_5_tot = (
+                    gd_tot + gd,
+                    pck_1_tot + pck_1,
+                    pck_3_tot + pck_3,
+                    pck_5_tot + pck_5,
+                )
+        return {
+            "mega_pck_1": pck_1_tot.item() / len(dataloader),
+            "mega_pck_3": pck_3_tot.item() / len(dataloader),
+            "mega_pck_5": pck_5_tot.item() / len(dataloader),
+        }

third_party/DKM/dkm/benchmarks/scannet_benchmark.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import os.path as osp
+import numpy as np
+import torch
+from dkm.utils import *
+from PIL import Image
+from tqdm import tqdm
+class ScanNetBenchmark:
+    def __init__(self, data_root="data/scannet") -> None:
+        self.data_root = data_root
+    def benchmark(self, model, model_name = None):
+        model.train(False)
+        with torch.no_grad():
+            data_root = self.data_root
+            tmp = np.load(osp.join(data_root, "test.npz"))
+            pairs, rel_pose = tmp["name"], tmp["rel_pose"]
+            tot_e_t, tot_e_R, tot_e_pose = [], [], []
+            pair_inds = np.random.choice(
+                range(len(pairs)), size=len(pairs), replace=False
+            )
+            for pairind in tqdm(pair_inds, smoothing=0.9):
+                scene = pairs[pairind]
+                scene_name = f"scene0{scene[0]}_00"
+                im1_path = osp.join(
+                        self.data_root,
+                        "scans_test",
+                        scene_name,
+                        "color",
+                        f"{scene[2]}.jpg",
+                    )
+                im1 = Image.open(im1_path)
+                im2_path = osp.join(
+                        self.data_root,
+                        "scans_test",
+                        scene_name,
+                        "color",
+                        f"{scene[3]}.jpg",
+                    )
+                im2 = Image.open(im2_path)
+                T_gt = rel_pose[pairind].reshape(3, 4)
+                R, t = T_gt[:3, :3], T_gt[:3, 3]
+                K = np.stack(
+                    [
+                        np.array([float(i) for i in r.split()])
+                        for r in open(
+                            osp.join(
+                                self.data_root,
+                                "scans_test",
+                                scene_name,
+                                "intrinsic",
+                                "intrinsic_color.txt",
+                            ),
+                            "r",
+                        )
+                        .read()
+                        .split("\n")
+                        if r
+                    ]
+                )
+                w1, h1 = im1.size
+                w2, h2 = im2.size
+                K1 = K.copy()
+                K2 = K.copy()
+                dense_matches, dense_certainty = model.match(im1_path, im2_path)
+                sparse_matches, sparse_certainty = model.sample(
+                    dense_matches, dense_certainty, 5000
+                )
+                scale1 = 480 / min(w1, h1)
+                scale2 = 480 / min(w2, h2)
+                w1, h1 = scale1 * w1, scale1 * h1
+                w2, h2 = scale2 * w2, scale2 * h2
+                K1 = K1 * scale1
+                K2 = K2 * scale2
+                offset = 0.5
+                kpts1 = sparse_matches[:, :2]
+                kpts1 = (
+                    np.stack(
+                        (
+                            w1 * (kpts1[:, 0] + 1) / 2 - offset,
+                            h1 * (kpts1[:, 1] + 1) / 2 - offset,
+                        ),
+                        axis=-1,
+                    )
+                )
+                kpts2 = sparse_matches[:, 2:]
+                kpts2 = (
+                    np.stack(
+                        (
+                            w2 * (kpts2[:, 0] + 1) / 2 - offset,
+                            h2 * (kpts2[:, 1] + 1) / 2 - offset,
+                        ),
+                        axis=-1,
+                    )
+                )
+                for _ in range(5):
+                    shuffling = np.random.permutation(np.arange(len(kpts1)))
+                    kpts1 = kpts1[shuffling]
+                    kpts2 = kpts2[shuffling]
+                    try:
+                        norm_threshold = 0.5 / (
+                        np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
+                        R_est, t_est, mask = estimate_pose(
+                            kpts1,
+                            kpts2,
+                            K1,
+                            K2,
+                            norm_threshold,
+                            conf=0.99999,
+                        )
+                        T1_to_2_est = np.concatenate((R_est, t_est), axis=-1)  #
+                        e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
+                        e_pose = max(e_t, e_R)
+                    except Exception as e:
+                        print(repr(e))
+                        e_t, e_R = 90, 90
+                        e_pose = max(e_t, e_R)
+                    tot_e_t.append(e_t)
+                    tot_e_R.append(e_R)
+                    tot_e_pose.append(e_pose)
+                tot_e_t.append(e_t)
+                tot_e_R.append(e_R)
+                tot_e_pose.append(e_pose)
+            tot_e_pose = np.array(tot_e_pose)
+            thresholds = [5, 10, 20]
+            auc = pose_auc(tot_e_pose, thresholds)
+            acc_5 = (tot_e_pose < 5).mean()
+            acc_10 = (tot_e_pose < 10).mean()
+            acc_15 = (tot_e_pose < 15).mean()
+            acc_20 = (tot_e_pose < 20).mean()
+            map_5 = acc_5
+            map_10 = np.mean([acc_5, acc_10])
+            map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
+            return {
+                "auc_5": auc[0],
+                "auc_10": auc[1],
+                "auc_20": auc[2],
+                "map_5": map_5,
+                "map_10": map_10,
+                "map_20": map_20,
+            }

third_party/DKM/dkm/checkpointing/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .checkpoint import CheckPoint

third_party/DKM/dkm/checkpointing/checkpoint.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+import torch
+from torch.nn.parallel.data_parallel import DataParallel
+from torch.nn.parallel.distributed import DistributedDataParallel
+from loguru import logger
+class CheckPoint:
+    def __init__(self, dir=None, name="tmp"):
+        self.name = name
+        self.dir = dir
+        os.makedirs(self.dir, exist_ok=True)
+    def __call__(
+        self,
+        model,
+        optimizer,
+        lr_scheduler,
+        n,
+    ):
+        assert model is not None
+        if isinstance(model, (DataParallel, DistributedDataParallel)):
+            model = model.module
+        states = {
+            "model": model.state_dict(),
+            "n": n,
+            "optimizer": optimizer.state_dict(),
+            "lr_scheduler": lr_scheduler.state_dict(),
+        }
+        torch.save(states, self.dir + self.name + f"_latest.pth")
+        logger.info(f"Saved states {list(states.keys())}, at step {n}")

third_party/DKM/dkm/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .megadepth import MegadepthBuilder

third_party/DKM/dkm/datasets/megadepth.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import os
+import random
+from PIL import Image
+import h5py
+import numpy as np
+import torch
+from torch.utils.data import Dataset, DataLoader, ConcatDataset
+from dkm.utils import get_depth_tuple_transform_ops, get_tuple_transform_ops
+import torchvision.transforms.functional as tvf
+from dkm.utils.transforms import GeometricSequential
+import kornia.augmentation as K
+class MegadepthScene:
+    def __init__(
+        self,
+        data_root,
+        scene_info,
+        ht=384,
+        wt=512,
+        min_overlap=0.0,
+        shake_t=0,
+        rot_prob=0.0,
+        normalize=True,
+    ) -> None:
+        self.data_root = data_root
+        self.image_paths = scene_info["image_paths"]
+        self.depth_paths = scene_info["depth_paths"]
+        self.intrinsics = scene_info["intrinsics"]
+        self.poses = scene_info["poses"]
+        self.pairs = scene_info["pairs"]
+        self.overlaps = scene_info["overlaps"]
+        threshold = self.overlaps > min_overlap
+        self.pairs = self.pairs[threshold]
+        self.overlaps = self.overlaps[threshold]
+        if len(self.pairs) > 100000:
+            pairinds = np.random.choice(
+                np.arange(0, len(self.pairs)), 100000, replace=False
+            )
+            self.pairs = self.pairs[pairinds]
+            self.overlaps = self.overlaps[pairinds]
+        # counts, bins = np.histogram(self.overlaps,20)
+        # print(counts)
+        self.im_transform_ops = get_tuple_transform_ops(
+            resize=(ht, wt), normalize=normalize
+        )
+        self.depth_transform_ops = get_depth_tuple_transform_ops(
+            resize=(ht, wt), normalize=False
+        )
+        self.wt, self.ht = wt, ht
+        self.shake_t = shake_t
+        self.H_generator = GeometricSequential(K.RandomAffine(degrees=90, p=rot_prob))
+    def load_im(self, im_ref, crop=None):
+        im = Image.open(im_ref)
+        return im
+    def load_depth(self, depth_ref, crop=None):
+        depth = np.array(h5py.File(depth_ref, "r")["depth"])
+        return torch.from_numpy(depth)
+    def __len__(self):
+        return len(self.pairs)
+    def scale_intrinsic(self, K, wi, hi):
+        sx, sy = self.wt / wi, self.ht / hi
+        sK = torch.tensor([[sx, 0, 0], [0, sy, 0], [0, 0, 1]])
+        return sK @ K
+    def rand_shake(self, *things):
+        t = np.random.choice(range(-self.shake_t, self.shake_t + 1), size=2)
+        return [
+            tvf.affine(thing, angle=0.0, translate=list(t), scale=1.0, shear=[0.0, 0.0])
+            for thing in things
+        ], t
+    def __getitem__(self, pair_idx):
+        # read intrinsics of original size
+        idx1, idx2 = self.pairs[pair_idx]
+        K1 = torch.tensor(self.intrinsics[idx1].copy(), dtype=torch.float).reshape(3, 3)
+        K2 = torch.tensor(self.intrinsics[idx2].copy(), dtype=torch.float).reshape(3, 3)
+        # read and compute relative poses
+        T1 = self.poses[idx1]
+        T2 = self.poses[idx2]
+        T_1to2 = torch.tensor(np.matmul(T2, np.linalg.inv(T1)), dtype=torch.float)[
+            :4, :4
+        ]  # (4, 4)
+        # Load positive pair data
+        im1, im2 = self.image_paths[idx1], self.image_paths[idx2]
+        depth1, depth2 = self.depth_paths[idx1], self.depth_paths[idx2]
+        im_src_ref = os.path.join(self.data_root, im1)
+        im_pos_ref = os.path.join(self.data_root, im2)
+        depth_src_ref = os.path.join(self.data_root, depth1)
+        depth_pos_ref = os.path.join(self.data_root, depth2)
+        # return torch.randn((1000,1000))
+        im_src = self.load_im(im_src_ref)
+        im_pos = self.load_im(im_pos_ref)
+        depth_src = self.load_depth(depth_src_ref)
+        depth_pos = self.load_depth(depth_pos_ref)
+        # Recompute camera intrinsic matrix due to the resize
+        K1 = self.scale_intrinsic(K1, im_src.width, im_src.height)
+        K2 = self.scale_intrinsic(K2, im_pos.width, im_pos.height)
+        # Process images
+        im_src, im_pos = self.im_transform_ops((im_src, im_pos))
+        depth_src, depth_pos = self.depth_transform_ops(
+            (depth_src[None, None], depth_pos[None, None])
+        )
+        [im_src, im_pos, depth_src, depth_pos], t = self.rand_shake(
+            im_src, im_pos, depth_src, depth_pos
+        )
+        im_src, Hq = self.H_generator(im_src[None])
+        depth_src = self.H_generator.apply_transform(depth_src, Hq)
+        K1[:2, 2] += t
+        K2[:2, 2] += t
+        K1 = Hq[0] @ K1
+        data_dict = {
+            "query": im_src[0],
+            "query_identifier": self.image_paths[idx1].split("/")[-1].split(".jpg")[0],
+            "support": im_pos,
+            "support_identifier": self.image_paths[idx2]
+            .split("/")[-1]
+            .split(".jpg")[0],
+            "query_depth": depth_src[0, 0],
+            "support_depth": depth_pos[0, 0],
+            "K1": K1,
+            "K2": K2,
+            "T_1to2": T_1to2,
+        }
+        return data_dict
+class MegadepthBuilder:
+    def __init__(self, data_root="data/megadepth") -> None:
+        self.data_root = data_root
+        self.scene_info_root = os.path.join(data_root, "prep_scene_info")
+        self.all_scenes = os.listdir(self.scene_info_root)
+        self.test_scenes = ["0017.npy", "0004.npy", "0048.npy", "0013.npy"]
+        self.test_scenes_loftr = ["0015.npy", "0022.npy"]
+    def build_scenes(self, split="train", min_overlap=0.0, **kwargs):
+        if split == "train":
+            scene_names = set(self.all_scenes) - set(self.test_scenes)
+        elif split == "train_loftr":
+            scene_names = set(self.all_scenes) - set(self.test_scenes_loftr)
+        elif split == "test":
+            scene_names = self.test_scenes
+        elif split == "test_loftr":
+            scene_names = self.test_scenes_loftr
+        else:
+            raise ValueError(f"Split {split} not available")
+        scenes = []
+        for scene_name in scene_names:
+            scene_info = np.load(
+                os.path.join(self.scene_info_root, scene_name), allow_pickle=True
+            ).item()
+            scenes.append(
+                MegadepthScene(
+                    self.data_root, scene_info, min_overlap=min_overlap, **kwargs
+                )
+            )
+        return scenes
+    def weight_scenes(self, concat_dataset, alpha=0.5):
+        ns = []
+        for d in concat_dataset.datasets:
+            ns.append(len(d))
+        ws = torch.cat([torch.ones(n) / n**alpha for n in ns])
+        return ws
+if __name__ == "__main__":
+    mega_test = ConcatDataset(MegadepthBuilder().build_scenes(split="train"))
+    mega_test[0]

third_party/DKM/dkm/datasets/scannet.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import os
+import random
+from PIL import Image
+import cv2
+import h5py
+import numpy as np
+import torch
+from torch.utils.data import (
+    Dataset,
+    DataLoader,
+    ConcatDataset)
+import torchvision.transforms.functional as tvf
+import kornia.augmentation as K
+import os.path as osp
+import matplotlib.pyplot as plt
+from dkm.utils import get_depth_tuple_transform_ops, get_tuple_transform_ops
+from dkm.utils.transforms import GeometricSequential
+from tqdm import tqdm
+class ScanNetScene:
+    def __init__(self, data_root, scene_info, ht = 384, wt = 512, min_overlap=0., shake_t = 0, rot_prob=0.) -> None:
+        self.scene_root = osp.join(data_root,"scans","scans_train")
+        self.data_names = scene_info['name']
+        self.overlaps = scene_info['score']
+        # Only sample 10s
+        valid = (self.data_names[:,-2:] % 10).sum(axis=-1) == 0
+        self.overlaps = self.overlaps[valid]
+        self.data_names = self.data_names[valid]
+        if len(self.data_names) > 10000:
+            pairinds = np.random.choice(np.arange(0,len(self.data_names)),10000,replace=False)
+            self.data_names = self.data_names[pairinds]
+            self.overlaps = self.overlaps[pairinds]
+        self.im_transform_ops = get_tuple_transform_ops(resize=(ht, wt), normalize=True)
+        self.depth_transform_ops = get_depth_tuple_transform_ops(resize=(ht, wt), normalize=False)
+        self.wt, self.ht = wt, ht
+        self.shake_t = shake_t
+        self.H_generator = GeometricSequential(K.RandomAffine(degrees=90, p=rot_prob))
+    def load_im(self, im_ref, crop=None):
+        im = Image.open(im_ref)
+        return im
+    def load_depth(self, depth_ref, crop=None):
+        depth = cv2.imread(str(depth_ref), cv2.IMREAD_UNCHANGED)
+        depth = depth / 1000
+        depth = torch.from_numpy(depth).float()  # (h, w)
+        return depth
+    def __len__(self):
+        return len(self.data_names)
+    def scale_intrinsic(self, K, wi, hi):
+        sx, sy = self.wt / wi, self.ht /  hi
+        sK = torch.tensor([[sx, 0, 0],
+                        [0, sy, 0],
+                        [0, 0, 1]])
+        return sK@K
+    def read_scannet_pose(self,path):
+        """ Read ScanNet's Camera2World pose and transform it to World2Camera.
+        Returns:
+            pose_w2c (np.ndarray): (4, 4)
+        """
+        cam2world = np.loadtxt(path, delimiter=' ')
+        world2cam = np.linalg.inv(cam2world)
+        return world2cam
+    def read_scannet_intrinsic(self,path):
+        """ Read ScanNet's intrinsic matrix and return the 3x3 matrix.
+        """
+        intrinsic = np.loadtxt(path, delimiter=' ')
+        return intrinsic[:-1, :-1]
+    def __getitem__(self, pair_idx):
+        # read intrinsics of original size
+        data_name = self.data_names[pair_idx]
+        scene_name, scene_sub_name, stem_name_1, stem_name_2 = data_name
+        scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}'
+        # read the intrinsic of depthmap
+        K1 = K2 =  self.read_scannet_intrinsic(osp.join(self.scene_root,
+                       scene_name,
+                       'intrinsic', 'intrinsic_color.txt'))#the depth K is not the same, but doesnt really matter
+        # read and compute relative poses
+        T1 =  self.read_scannet_pose(osp.join(self.scene_root,
+                       scene_name,
+                       'pose', f'{stem_name_1}.txt'))
+        T2 =  self.read_scannet_pose(osp.join(self.scene_root,
+                       scene_name,
+                       'pose', f'{stem_name_2}.txt'))
+        T_1to2 = torch.tensor(np.matmul(T2, np.linalg.inv(T1)), dtype=torch.float)[:4, :4]  # (4, 4)
+        # Load positive pair data
+        im_src_ref = os.path.join(self.scene_root, scene_name, 'color', f'{stem_name_1}.jpg')
+        im_pos_ref = os.path.join(self.scene_root, scene_name, 'color', f'{stem_name_2}.jpg')
+        depth_src_ref = os.path.join(self.scene_root, scene_name, 'depth', f'{stem_name_1}.png')
+        depth_pos_ref = os.path.join(self.scene_root, scene_name, 'depth', f'{stem_name_2}.png')
+        im_src = self.load_im(im_src_ref)
+        im_pos = self.load_im(im_pos_ref)
+        depth_src = self.load_depth(depth_src_ref)
+        depth_pos = self.load_depth(depth_pos_ref)
+        # Recompute camera intrinsic matrix due to the resize
+        K1 = self.scale_intrinsic(K1, im_src.width, im_src.height)
+        K2 = self.scale_intrinsic(K2, im_pos.width, im_pos.height)
+        # Process images
+        im_src, im_pos = self.im_transform_ops((im_src, im_pos))
+        depth_src, depth_pos = self.depth_transform_ops((depth_src[None,None], depth_pos[None,None]))
+        data_dict = {'query': im_src,
+                    'support': im_pos,
+                    'query_depth': depth_src[0,0],
+                    'support_depth': depth_pos[0,0],
+                    'K1': K1,
+                    'K2': K2,
+                    'T_1to2':T_1to2,
+                    }
+        return data_dict
+class ScanNetBuilder:
+    def __init__(self, data_root = 'data/scannet') -> None:
+        self.data_root = data_root
+        self.scene_info_root = os.path.join(data_root,'scannet_indices')
+        self.all_scenes = os.listdir(self.scene_info_root)
+    def build_scenes(self, split = 'train', min_overlap=0., **kwargs):
+        # Note: split doesn't matter here as we always use same scannet_train scenes
+        scene_names = self.all_scenes
+        scenes = []
+        for scene_name in tqdm(scene_names):
+            scene_info = np.load(os.path.join(self.scene_info_root,scene_name), allow_pickle=True)
+            scenes.append(ScanNetScene(self.data_root, scene_info, min_overlap=min_overlap, **kwargs))
+        return scenes
+    def weight_scenes(self, concat_dataset, alpha=.5):
+        ns = []
+        for d in concat_dataset.datasets:
+            ns.append(len(d))
+        ws = torch.cat([torch.ones(n)/n**alpha for n in ns])
+        return ws
+if __name__ == "__main__":
+    mega_test = ConcatDataset(ScanNetBuilder("data/scannet").build_scenes(split='train'))
+    mega_test[0]

third_party/DKM/dkm/losses/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .depth_match_regression_loss import DepthRegressionLoss

third_party/DKM/dkm/losses/depth_match_regression_loss.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from einops.einops import rearrange
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from dkm.utils.utils import warp_kpts
+class DepthRegressionLoss(nn.Module):
+    def __init__(
+        self,
+        robust=True,
+        center_coords=False,
+        scale_normalize=False,
+        ce_weight=0.01,
+        local_loss=True,
+        local_dist=4.0,
+        local_largest_scale=8,
+    ):
+        super().__init__()
+        self.robust = robust  # measured in pixels
+        self.center_coords = center_coords
+        self.scale_normalize = scale_normalize
+        self.ce_weight = ce_weight
+        self.local_loss = local_loss
+        self.local_dist = local_dist
+        self.local_largest_scale = local_largest_scale
+    def geometric_dist(self, depth1, depth2, T_1to2, K1, K2, dense_matches, scale):
+        """[summary]
+        Args:
+            H ([type]): [description]
+            scale ([type]): [description]
+        Returns:
+            [type]: [description]
+        """
+        b, h1, w1, d = dense_matches.shape
+        with torch.no_grad():
+            x1_n = torch.meshgrid(
+                *[
+                    torch.linspace(
+                        -1 + 1 / n, 1 - 1 / n, n, device=dense_matches.device
+                    )
+                    for n in (b, h1, w1)
+                ]
+            )
+            x1_n = torch.stack((x1_n[2], x1_n[1]), dim=-1).reshape(b, h1 * w1, 2)
+            mask, x2 = warp_kpts(
+                x1_n.double(),
+                depth1.double(),
+                depth2.double(),
+                T_1to2.double(),
+                K1.double(),
+                K2.double(),
+            )
+            prob = mask.float().reshape(b, h1, w1)
+        gd = (dense_matches - x2.reshape(b, h1, w1, 2)).norm(dim=-1)  # *scale?
+        return gd, prob
+    def dense_depth_loss(self, dense_certainty, prob, gd, scale, eps=1e-8):
+        """[summary]
+        Args:
+            dense_certainty ([type]): [description]
+            prob ([type]): [description]
+            eps ([type], optional): [description]. Defaults to 1e-8.
+        Returns:
+            [type]: [description]
+        """
+        smooth_prob = prob
+        ce_loss = F.binary_cross_entropy_with_logits(dense_certainty[:, 0], smooth_prob)
+        depth_loss = gd[prob > 0]
+        if not torch.any(prob > 0).item():
+            depth_loss = (gd * 0.0).mean()  # Prevent issues where prob is 0 everywhere
+        return {
+            f"ce_loss_{scale}": ce_loss.mean(),
+            f"depth_loss_{scale}": depth_loss.mean(),
+        }
+    def forward(self, dense_corresps, batch):
+        """[summary]
+        Args:
+            out ([type]): [description]
+            batch ([type]): [description]
+        Returns:
+            [type]: [description]
+        """
+        scales = list(dense_corresps.keys())
+        tot_loss = 0.0
+        prev_gd = 0.0
+        for scale in scales:
+            dense_scale_corresps = dense_corresps[scale]
+            dense_scale_certainty, dense_scale_coords = (
+                dense_scale_corresps["dense_certainty"],
+                dense_scale_corresps["dense_flow"],
+            )
+            dense_scale_coords = rearrange(dense_scale_coords, "b d h w -> b h w d")
+            b, h, w, d = dense_scale_coords.shape
+            gd, prob = self.geometric_dist(
+                batch["query_depth"],
+                batch["support_depth"],
+                batch["T_1to2"],
+                batch["K1"],
+                batch["K2"],
+                dense_scale_coords,
+                scale,
+            )
+            if (
+                scale <= self.local_largest_scale and self.local_loss
+            ):  # Thought here is that fine matching loss should not be punished by coarse mistakes, but should identify wrong matching
+                prob = prob * (
+                    F.interpolate(prev_gd[:, None], size=(h, w), mode="nearest")[:, 0]
+                    < (2 / 512) * (self.local_dist * scale)
+                )
+            depth_losses = self.dense_depth_loss(dense_scale_certainty, prob, gd, scale)
+            scale_loss = (
+                self.ce_weight * depth_losses[f"ce_loss_{scale}"]
+                + depth_losses[f"depth_loss_{scale}"]
+            )  # scale ce loss for coarser scales
+            if self.scale_normalize:
+                scale_loss = scale_loss * 1 / scale
+            tot_loss = tot_loss + scale_loss
+            prev_gd = gd.detach()
+        return tot_loss

third_party/DKM/dkm/models/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .model_zoo import (
+    DKMv3_outdoor,
+    DKMv3_indoor,
+)

third_party/DKM/dkm/models/deprecated/build_model.py ADDED Viewed

	@@ -0,0 +1,787 @@

+import torch
+import torch.nn as nn
+from dkm import *
+from .local_corr import LocalCorr
+from .corr_channels import NormedCorr
+from torchvision.models import resnet as tv_resnet
+dkm_pretrained_urls = {
+    "DKM": {
+        "mega_synthetic": "https://github.com/Parskatt/storage/releases/download/dkm_mega_synthetic/dkm_mega_synthetic.pth",
+        "mega": "https://github.com/Parskatt/storage/releases/download/dkm_mega/dkm_mega.pth",
+    },
+    "DKMv2":{
+        "outdoor": "https://github.com/Parskatt/storage/releases/download/dkmv2/dkm_v2_outdoor.pth",
+        "indoor": "https://github.com/Parskatt/storage/releases/download/dkmv2/dkm_v2_indoor.pth",
+    }
+}
+def DKM(pretrained=True, version="mega_synthetic", device=None):
+    if device is None:
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    gp_dim = 256
+    dfn_dim = 384
+    feat_dim = 256
+    coordinate_decoder = DFN(
+        internal_dim=dfn_dim,
+        feat_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(512, feat_dim, 1, 1),
+                "16": nn.Conv2d(512, feat_dim, 1, 1),
+            }
+        ),
+        pred_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Identity(),
+                "16": nn.Identity(),
+            }
+        ),
+        rrb_d_dict=nn.ModuleDict(
+            {
+                "32": RRB(gp_dim + feat_dim, dfn_dim),
+                "16": RRB(gp_dim + feat_dim, dfn_dim),
+            }
+        ),
+        cab_dict=nn.ModuleDict(
+            {
+                "32": CAB(2 * dfn_dim, dfn_dim),
+                "16": CAB(2 * dfn_dim, dfn_dim),
+            }
+        ),
+        rrb_u_dict=nn.ModuleDict(
+            {
+                "32": RRB(dfn_dim, dfn_dim),
+                "16": RRB(dfn_dim, dfn_dim),
+            }
+        ),
+        terminal_module=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+                "16": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+            }
+        ),
+    )
+    dw = True
+    hidden_blocks = 8
+    kernel_size = 5
+    conv_refiner = nn.ModuleDict(
+        {
+            "16": ConvRefiner(
+                2 * 512,
+                1024,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "8": ConvRefiner(
+                2 * 512,
+                1024,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "4": ConvRefiner(
+                2 * 256,
+                512,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "2": ConvRefiner(
+                2 * 64,
+                128,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "1": ConvRefiner(
+                2 * 3,
+                24,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+        }
+    )
+    kernel_temperature = 0.2
+    learn_temperature = False
+    no_cov = True
+    kernel = CosKernel
+    only_attention = False
+    basis = "fourier"
+    gp32 = GP(
+        kernel,
+        T=kernel_temperature,
+        learn_temperature=learn_temperature,
+        only_attention=only_attention,
+        gp_dim=gp_dim,
+        basis=basis,
+        no_cov=no_cov,
+    )
+    gp16 = GP(
+        kernel,
+        T=kernel_temperature,
+        learn_temperature=learn_temperature,
+        only_attention=only_attention,
+        gp_dim=gp_dim,
+        basis=basis,
+        no_cov=no_cov,
+    )
+    gps = nn.ModuleDict({"32": gp32, "16": gp16})
+    proj = nn.ModuleDict(
+        {"16": nn.Conv2d(1024, 512, 1, 1), "32": nn.Conv2d(2048, 512, 1, 1)}
+    )
+    decoder = Decoder(coordinate_decoder, gps, proj, conv_refiner, detach=True)
+    h, w = 384, 512
+    encoder = Encoder(
+        tv_resnet.resnet50(pretrained=not pretrained),
+    )  # only load pretrained weights if not loading a pretrained matcher ;)
+    matcher = RegressionMatcher(encoder, decoder, h=h, w=w).to(device)
+    if pretrained:
+        weights = torch.hub.load_state_dict_from_url(
+            dkm_pretrained_urls["DKM"][version]
+        )
+        matcher.load_state_dict(weights)
+    return matcher
+def DKMv2(pretrained=True, version="outdoor", resolution = "low", **kwargs):
+    gp_dim = 256
+    dfn_dim = 384
+    feat_dim = 256
+    coordinate_decoder = DFN(
+        internal_dim=dfn_dim,
+        feat_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(512, feat_dim, 1, 1),
+                "16": nn.Conv2d(512, feat_dim, 1, 1),
+            }
+        ),
+        pred_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Identity(),
+                "16": nn.Identity(),
+            }
+        ),
+        rrb_d_dict=nn.ModuleDict(
+            {
+                "32": RRB(gp_dim + feat_dim, dfn_dim),
+                "16": RRB(gp_dim + feat_dim, dfn_dim),
+            }
+        ),
+        cab_dict=nn.ModuleDict(
+            {
+                "32": CAB(2 * dfn_dim, dfn_dim),
+                "16": CAB(2 * dfn_dim, dfn_dim),
+            }
+        ),
+        rrb_u_dict=nn.ModuleDict(
+            {
+                "32": RRB(dfn_dim, dfn_dim),
+                "16": RRB(dfn_dim, dfn_dim),
+            }
+        ),
+        terminal_module=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+                "16": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+            }
+        ),
+    )
+    dw = True
+    hidden_blocks = 8
+    kernel_size = 5
+    displacement_emb = "linear"
+    conv_refiner = nn.ModuleDict(
+        {
+            "16": ConvRefiner(
+                2 * 512+128,
+                1024+128,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+                displacement_emb=displacement_emb,
+                displacement_emb_dim=128,
+            ),
+            "8": ConvRefiner(
+                2 * 512+64,
+                1024+64,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+                displacement_emb=displacement_emb,
+                displacement_emb_dim=64,
+            ),
+            "4": ConvRefiner(
+                2 * 256+32,
+                512+32,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+                displacement_emb=displacement_emb,
+                displacement_emb_dim=32,
+            ),
+            "2": ConvRefiner(
+                2 * 64+16,
+                128+16,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+                displacement_emb=displacement_emb,
+                displacement_emb_dim=16,
+            ),
+            "1": ConvRefiner(
+                2 * 3+6,
+                24,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+                displacement_emb=displacement_emb,
+                displacement_emb_dim=6,
+            ),
+        }
+    )
+    kernel_temperature = 0.2
+    learn_temperature = False
+    no_cov = True
+    kernel = CosKernel
+    only_attention = False
+    basis = "fourier"
+    gp32 = GP(
+        kernel,
+        T=kernel_temperature,
+        learn_temperature=learn_temperature,
+        only_attention=only_attention,
+        gp_dim=gp_dim,
+        basis=basis,
+        no_cov=no_cov,
+    )
+    gp16 = GP(
+        kernel,
+        T=kernel_temperature,
+        learn_temperature=learn_temperature,
+        only_attention=only_attention,
+        gp_dim=gp_dim,
+        basis=basis,
+        no_cov=no_cov,
+    )
+    gps = nn.ModuleDict({"32": gp32, "16": gp16})
+    proj = nn.ModuleDict(
+        {"16": nn.Conv2d(1024, 512, 1, 1), "32": nn.Conv2d(2048, 512, 1, 1)}
+    )
+    decoder = Decoder(coordinate_decoder, gps, proj, conv_refiner, detach=True)
+    if resolution == "low":
+        h, w = 384, 512
+    elif resolution == "high":
+        h, w = 480, 640
+    encoder = Encoder(
+        tv_resnet.resnet50(pretrained=not pretrained),
+    )  # only load pretrained weights if not loading a pretrained matcher ;)
+    matcher = RegressionMatcher(encoder, decoder, h=h, w=w,**kwargs).to(device)
+    if pretrained:
+        try:
+            weights = torch.hub.load_state_dict_from_url(
+                dkm_pretrained_urls["DKMv2"][version]
+            )
+        except:
+            weights = torch.load(
+                dkm_pretrained_urls["DKMv2"][version]
+            )
+        matcher.load_state_dict(weights)
+    return matcher
+def local_corr(pretrained=True, version="mega_synthetic"):
+    gp_dim = 256
+    dfn_dim = 384
+    feat_dim = 256
+    coordinate_decoder = DFN(
+        internal_dim=dfn_dim,
+        feat_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(512, feat_dim, 1, 1),
+                "16": nn.Conv2d(512, feat_dim, 1, 1),
+            }
+        ),
+        pred_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Identity(),
+                "16": nn.Identity(),
+            }
+        ),
+        rrb_d_dict=nn.ModuleDict(
+            {
+                "32": RRB(gp_dim + feat_dim, dfn_dim),
+                "16": RRB(gp_dim + feat_dim, dfn_dim),
+            }
+        ),
+        cab_dict=nn.ModuleDict(
+            {
+                "32": CAB(2 * dfn_dim, dfn_dim),
+                "16": CAB(2 * dfn_dim, dfn_dim),
+            }
+        ),
+        rrb_u_dict=nn.ModuleDict(
+            {
+                "32": RRB(dfn_dim, dfn_dim),
+                "16": RRB(dfn_dim, dfn_dim),
+            }
+        ),
+        terminal_module=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+                "16": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+            }
+        ),
+    )
+    dw = True
+    hidden_blocks = 8
+    kernel_size = 5
+    conv_refiner = nn.ModuleDict(
+        {
+            "16": LocalCorr(
+                81,
+                81 * 12,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "8": LocalCorr(
+                81,
+                81 * 12,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "4": LocalCorr(
+                81,
+                81 * 6,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "2": LocalCorr(
+                81,
+                81,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "1": ConvRefiner(
+                2 * 3,
+                24,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+        }
+    )
+    kernel_temperature = 0.2
+    learn_temperature = False
+    no_cov = True
+    kernel = CosKernel
+    only_attention = False
+    basis = "fourier"
+    gp32 = GP(
+        kernel,
+        T=kernel_temperature,
+        learn_temperature=learn_temperature,
+        only_attention=only_attention,
+        gp_dim=gp_dim,
+        basis=basis,
+        no_cov=no_cov,
+    )
+    gp16 = GP(
+        kernel,
+        T=kernel_temperature,
+        learn_temperature=learn_temperature,
+        only_attention=only_attention,
+        gp_dim=gp_dim,
+        basis=basis,
+        no_cov=no_cov,
+    )
+    gps = nn.ModuleDict({"32": gp32, "16": gp16})
+    proj = nn.ModuleDict(
+        {"16": nn.Conv2d(1024, 512, 1, 1), "32": nn.Conv2d(2048, 512, 1, 1)}
+    )
+    decoder = Decoder(coordinate_decoder, gps, proj, conv_refiner, detach=True)
+    h, w = 384, 512
+    encoder = Encoder(
+        tv_resnet.resnet50(pretrained=not pretrained)
+    )  # only load pretrained weights if not loading a pretrained matcher ;)
+    matcher = RegressionMatcher(encoder, decoder, h=h, w=w).to(device)
+    if pretrained:
+        weights = torch.hub.load_state_dict_from_url(
+            dkm_pretrained_urls["local_corr"][version]
+        )
+        matcher.load_state_dict(weights)
+    return matcher
+def corr_channels(pretrained=True, version="mega_synthetic"):
+    h, w = 384, 512
+    gp_dim = (h // 32) * (w // 32), (h // 16) * (w // 16)
+    dfn_dim = 384
+    feat_dim = 256
+    coordinate_decoder = DFN(
+        internal_dim=dfn_dim,
+        feat_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(512, feat_dim, 1, 1),
+                "16": nn.Conv2d(512, feat_dim, 1, 1),
+            }
+        ),
+        pred_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Identity(),
+                "16": nn.Identity(),
+            }
+        ),
+        rrb_d_dict=nn.ModuleDict(
+            {
+                "32": RRB(gp_dim[0] + feat_dim, dfn_dim),
+                "16": RRB(gp_dim[1] + feat_dim, dfn_dim),
+            }
+        ),
+        cab_dict=nn.ModuleDict(
+            {
+                "32": CAB(2 * dfn_dim, dfn_dim),
+                "16": CAB(2 * dfn_dim, dfn_dim),
+            }
+        ),
+        rrb_u_dict=nn.ModuleDict(
+            {
+                "32": RRB(dfn_dim, dfn_dim),
+                "16": RRB(dfn_dim, dfn_dim),
+            }
+        ),
+        terminal_module=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+                "16": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+            }
+        ),
+    )
+    dw = True
+    hidden_blocks = 8
+    kernel_size = 5
+    conv_refiner = nn.ModuleDict(
+        {
+            "16": ConvRefiner(
+                2 * 512,
+                1024,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "8": ConvRefiner(
+                2 * 512,
+                1024,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "4": ConvRefiner(
+                2 * 256,
+                512,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "2": ConvRefiner(
+                2 * 64,
+                128,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "1": ConvRefiner(
+                2 * 3,
+                24,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+        }
+    )
+    gp32 = NormedCorr()
+    gp16 = NormedCorr()
+    gps = nn.ModuleDict({"32": gp32, "16": gp16})
+    proj = nn.ModuleDict(
+        {"16": nn.Conv2d(1024, 512, 1, 1), "32": nn.Conv2d(2048, 512, 1, 1)}
+    )
+    decoder = Decoder(coordinate_decoder, gps, proj, conv_refiner, detach=True)
+    h, w = 384, 512
+    encoder = Encoder(
+        tv_resnet.resnet50(pretrained=not pretrained)
+    )  # only load pretrained weights if not loading a pretrained matcher ;)
+    matcher = RegressionMatcher(encoder, decoder, h=h, w=w).to(device)
+    if pretrained:
+        weights = torch.hub.load_state_dict_from_url(
+            dkm_pretrained_urls["corr_channels"][version]
+        )
+        matcher.load_state_dict(weights)
+    return matcher
+def baseline(pretrained=True, version="mega_synthetic"):
+    h, w = 384, 512
+    gp_dim = (h // 32) * (w // 32), (h // 16) * (w // 16)
+    dfn_dim = 384
+    feat_dim = 256
+    coordinate_decoder = DFN(
+        internal_dim=dfn_dim,
+        feat_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(512, feat_dim, 1, 1),
+                "16": nn.Conv2d(512, feat_dim, 1, 1),
+            }
+        ),
+        pred_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Identity(),
+                "16": nn.Identity(),
+            }
+        ),
+        rrb_d_dict=nn.ModuleDict(
+            {
+                "32": RRB(gp_dim[0] + feat_dim, dfn_dim),
+                "16": RRB(gp_dim[1] + feat_dim, dfn_dim),
+            }
+        ),
+        cab_dict=nn.ModuleDict(
+            {
+                "32": CAB(2 * dfn_dim, dfn_dim),
+                "16": CAB(2 * dfn_dim, dfn_dim),
+            }
+        ),
+        rrb_u_dict=nn.ModuleDict(
+            {
+                "32": RRB(dfn_dim, dfn_dim),
+                "16": RRB(dfn_dim, dfn_dim),
+            }
+        ),
+        terminal_module=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+                "16": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+            }
+        ),
+    )
+    dw = True
+    hidden_blocks = 8
+    kernel_size = 5
+    conv_refiner = nn.ModuleDict(
+        {
+            "16": LocalCorr(
+                81,
+                81 * 12,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "8": LocalCorr(
+                81,
+                81 * 12,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "4": LocalCorr(
+                81,
+                81 * 6,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "2": LocalCorr(
+                81,
+                81,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "1": ConvRefiner(
+                2 * 3,
+                24,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+        }
+    )
+    gp32 = NormedCorr()
+    gp16 = NormedCorr()
+    gps = nn.ModuleDict({"32": gp32, "16": gp16})
+    proj = nn.ModuleDict(
+        {"16": nn.Conv2d(1024, 512, 1, 1), "32": nn.Conv2d(2048, 512, 1, 1)}
+    )
+    decoder = Decoder(coordinate_decoder, gps, proj, conv_refiner, detach=True)
+    h, w = 384, 512
+    encoder = Encoder(
+        tv_resnet.resnet50(pretrained=not pretrained)
+    )  # only load pretrained weights if not loading a pretrained matcher ;)
+    matcher = RegressionMatcher(encoder, decoder, h=h, w=w).to(device)
+    if pretrained:
+        weights = torch.hub.load_state_dict_from_url(
+            dkm_pretrained_urls["baseline"][version]
+        )
+        matcher.load_state_dict(weights)
+    return matcher
+def linear(pretrained=True, version="mega_synthetic"):
+    gp_dim = 256
+    dfn_dim = 384
+    feat_dim = 256
+    coordinate_decoder = DFN(
+        internal_dim=dfn_dim,
+        feat_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(512, feat_dim, 1, 1),
+                "16": nn.Conv2d(512, feat_dim, 1, 1),
+            }
+        ),
+        pred_input_modules=nn.ModuleDict(
+            {
+                "32": nn.Identity(),
+                "16": nn.Identity(),
+            }
+        ),
+        rrb_d_dict=nn.ModuleDict(
+            {
+                "32": RRB(gp_dim + feat_dim, dfn_dim),
+                "16": RRB(gp_dim + feat_dim, dfn_dim),
+            }
+        ),
+        cab_dict=nn.ModuleDict(
+            {
+                "32": CAB(2 * dfn_dim, dfn_dim),
+                "16": CAB(2 * dfn_dim, dfn_dim),
+            }
+        ),
+        rrb_u_dict=nn.ModuleDict(
+            {
+                "32": RRB(dfn_dim, dfn_dim),
+                "16": RRB(dfn_dim, dfn_dim),
+            }
+        ),
+        terminal_module=nn.ModuleDict(
+            {
+                "32": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+                "16": nn.Conv2d(dfn_dim, 3, 1, 1, 0),
+            }
+        ),
+    )
+    dw = True
+    hidden_blocks = 8
+    kernel_size = 5
+    conv_refiner = nn.ModuleDict(
+        {
+            "16": ConvRefiner(
+                2 * 512,
+                1024,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "8": ConvRefiner(
+                2 * 512,
+                1024,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "4": ConvRefiner(
+                2 * 256,
+                512,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "2": ConvRefiner(
+                2 * 64,
+                128,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+            "1": ConvRefiner(
+                2 * 3,
+                24,
+                3,
+                kernel_size=kernel_size,
+                dw=dw,
+                hidden_blocks=hidden_blocks,
+            ),
+        }
+    )
+    kernel_temperature = 0.2
+    learn_temperature = False
+    no_cov = True
+    kernel = CosKernel
+    only_attention = False
+    basis = "linear"
+    gp32 = GP(
+        kernel,
+        T=kernel_temperature,
+        learn_temperature=learn_temperature,
+        only_attention=only_attention,
+        gp_dim=gp_dim,
+        basis=basis,
+        no_cov=no_cov,
+    )
+    gp16 = GP(
+        kernel,
+        T=kernel_temperature,
+        learn_temperature=learn_temperature,
+        only_attention=only_attention,
+        gp_dim=gp_dim,
+        basis=basis,
+        no_cov=no_cov,
+    )
+    gps = nn.ModuleDict({"32": gp32, "16": gp16})
+    proj = nn.ModuleDict(
+        {"16": nn.Conv2d(1024, 512, 1, 1), "32": nn.Conv2d(2048, 512, 1, 1)}
+    )
+    decoder = Decoder(coordinate_decoder, gps, proj, conv_refiner, detach=True)
+    h, w = 384, 512
+    encoder = Encoder(
+        tv_resnet.resnet50(pretrained=not pretrained)
+    )  # only load pretrained weights if not loading a pretrained matcher ;)
+    matcher = RegressionMatcher(encoder, decoder, h=h, w=w).to(device)
+    if pretrained:
+        weights = torch.hub.load_state_dict_from_url(
+            dkm_pretrained_urls["linear"][version]
+        )
+        matcher.load_state_dict(weights)
+    return matcher

third_party/DKM/dkm/models/deprecated/corr_channels.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+class NormedCorrelationKernel(nn.Module):  # similar to softmax kernel
+    def __init__(self):
+        super().__init__()
+    def __call__(self, x, y, eps=1e-6):
+        c = torch.einsum("bnd,bmd->bnm", x, y) / (
+            x.norm(dim=-1)[..., None] * y.norm(dim=-1)[:, None] + eps
+        )
+        return c
+class NormedCorr(nn.Module):
+    def __init__(
+        self,
+    ):
+        super().__init__()
+        self.corr = NormedCorrelationKernel()
+    def reshape(self, x):
+        return rearrange(x, "b d h w -> b (h w) d")
+    def forward(self, x, y, **kwargs):
+        b, c, h, w = y.shape
+        assert x.shape == y.shape
+        x, y = self.reshape(x), self.reshape(y)
+        corr_xy = self.corr(x, y)
+        corr_xy_flat = rearrange(corr_xy, "b (h w) c -> b c h w", h=h, w=w)
+        return corr_xy_flat

third_party/DKM/dkm/models/deprecated/local_corr.py ADDED Viewed

	@@ -0,0 +1,630 @@

+import torch
+import torch.nn.functional as F
+try:
+    import cupy
+except:
+    print("Cupy not found, local correlation will not work")
+import re
+from ..dkm import ConvRefiner
+class Stream:
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    if device == 'cuda':
+        stream = torch.cuda.current_stream(device=device).cuda_stream
+    else:
+        stream = None
+kernel_Correlation_rearrange = """
+	extern "C" __global__ void kernel_Correlation_rearrange(
+		const int n,
+		const float* input,
+		float* output
+	) {
+	  int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x;
+	  if (intIndex >= n) {
+	    return;
+	  }
+	  int intSample = blockIdx.z;
+	  int intChannel = blockIdx.y;
+	  float dblValue = input[(((intSample * SIZE_1(input)) + intChannel) * SIZE_2(input) * SIZE_3(input)) + intIndex];
+	  __syncthreads();
+	  int intPaddedY = (intIndex / SIZE_3(input)) + 4;
+	  int intPaddedX = (intIndex % SIZE_3(input)) + 4;
+	  int intRearrange = ((SIZE_3(input) + 8) * intPaddedY) + intPaddedX;
+	  output[(((intSample * SIZE_1(output) * SIZE_2(output)) + intRearrange) * SIZE_1(input)) + intChannel] = dblValue;
+	}
+"""
+kernel_Correlation_updateOutput = """
+	extern "C" __global__ void kernel_Correlation_updateOutput(
+	  const int n,
+	  const float* rbot0,
+	  const float* rbot1,
+	  float* top
+	) {
+	  extern __shared__ char patch_data_char[];
+	  float *patch_data = (float *)patch_data_char;
+	  // First (upper left) position of kernel upper-left corner in current center position of neighborhood in image 1
+	  int x1 = blockIdx.x + 4;
+	  int y1 = blockIdx.y + 4;
+	  int item = blockIdx.z;
+	  int ch_off = threadIdx.x;
+	  // Load 3D patch into shared shared memory
+	  for (int j = 0; j < 1; j++) { // HEIGHT
+	    for (int i = 0; i < 1; i++) { // WIDTH
+	      int ji_off = (j + i) * SIZE_3(rbot0);
+	      for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
+	        int idx1 = ((item * SIZE_1(rbot0) + y1+j) * SIZE_2(rbot0) + x1+i) * SIZE_3(rbot0) + ch;
+	        int idxPatchData = ji_off + ch;
+	        patch_data[idxPatchData] = rbot0[idx1];
+	      }
+	    }
+	  }
+	  __syncthreads();
+	  __shared__ float sum[32];
+	  // Compute correlation
+	  for (int top_channel = 0; top_channel < SIZE_1(top); top_channel++) {
+	    sum[ch_off] = 0;
+	    int s2o = top_channel % 9 - 4;
+	    int s2p = top_channel / 9 - 4;
+	    for (int j = 0; j < 1; j++) { // HEIGHT
+	      for (int i = 0; i < 1; i++) { // WIDTH
+	        int ji_off = (j + i) * SIZE_3(rbot0);
+	        for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
+	          int x2 = x1 + s2o;
+	          int y2 = y1 + s2p;
+	          int idxPatchData = ji_off + ch;
+	          int idx2 = ((item * SIZE_1(rbot0) + y2+j) * SIZE_2(rbot0) + x2+i) * SIZE_3(rbot0) + ch;
+	          sum[ch_off] += patch_data[idxPatchData] * rbot1[idx2];
+	        }
+	      }
+	    }
+	    __syncthreads();
+	    if (ch_off == 0) {
+	      float total_sum = 0;
+	      for (int idx = 0; idx < 32; idx++) {
+	        total_sum += sum[idx];
+	      }
+	      const int sumelems = SIZE_3(rbot0);
+	      const int index = ((top_channel*SIZE_2(top) + blockIdx.y)*SIZE_3(top))+blockIdx.x;
+	      top[index + item*SIZE_1(top)*SIZE_2(top)*SIZE_3(top)] = total_sum / (float)sumelems;
+	    }
+	  }
+	}
+"""
+kernel_Correlation_updateGradFirst = """
+	#define ROUND_OFF 50000
+	extern "C" __global__ void kernel_Correlation_updateGradFirst(
+	  const int n,
+	  const int intSample,
+	  const float* rbot0,
+	  const float* rbot1,
+	  const float* gradOutput,
+	  float* gradFirst,
+	  float* gradSecond
+	) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
+	  int n = intIndex % SIZE_1(gradFirst); // channels
+	  int l = (intIndex / SIZE_1(gradFirst)) % SIZE_3(gradFirst) + 4; // w-pos
+	  int m = (intIndex / SIZE_1(gradFirst) / SIZE_3(gradFirst)) % SIZE_2(gradFirst) + 4; // h-pos
+	  // round_off is a trick to enable integer division with ceil, even for negative numbers
+	  // We use a large offset, for the inner part not to become negative.
+	  const int round_off = ROUND_OFF;
+	  const int round_off_s1 = round_off;
+	  // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
+	  int xmin = (l - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)
+	  int ymin = (m - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)
+	  // Same here:
+	  int xmax = (l - 4 + round_off_s1) - round_off; // floor (l - 4)
+	  int ymax = (m - 4 + round_off_s1) - round_off; // floor (m - 4)
+	  float sum = 0;
+	  if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
+	    xmin = max(0,xmin);
+	    xmax = min(SIZE_3(gradOutput)-1,xmax);
+	    ymin = max(0,ymin);
+	    ymax = min(SIZE_2(gradOutput)-1,ymax);
+	    for (int p = -4; p <= 4; p++) {
+	      for (int o = -4; o <= 4; o++) {
+	        // Get rbot1 data:
+	        int s2o = o;
+	        int s2p = p;
+	        int idxbot1 = ((intSample * SIZE_1(rbot0) + (m+s2p)) * SIZE_2(rbot0) + (l+s2o)) * SIZE_3(rbot0) + n;
+	        float bot1tmp = rbot1[idxbot1]; // rbot1[l+s2o,m+s2p,n]
+	        // Index offset for gradOutput in following loops:
+	        int op = (p+4) * 9 + (o+4); // index[o,p]
+	        int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
+	        for (int y = ymin; y <= ymax; y++) {
+	          for (int x = xmin; x <= xmax; x++) {
+	            int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
+	            sum += gradOutput[idxgradOutput] * bot1tmp;
+	          }
+	        }
+	      }
+	    }
+	  }
+	  const int sumelems = SIZE_1(gradFirst);
+	  const int bot0index = ((n * SIZE_2(gradFirst)) + (m-4)) * SIZE_3(gradFirst) + (l-4);
+	  gradFirst[bot0index + intSample*SIZE_1(gradFirst)*SIZE_2(gradFirst)*SIZE_3(gradFirst)] = sum / (float)sumelems;
+	} }
+"""
+kernel_Correlation_updateGradSecond = """
+	#define ROUND_OFF 50000
+	extern "C" __global__ void kernel_Correlation_updateGradSecond(
+	  const int n,
+	  const int intSample,
+	  const float* rbot0,
+	  const float* rbot1,
+	  const float* gradOutput,
+	  float* gradFirst,
+	  float* gradSecond
+	) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
+	  int n = intIndex % SIZE_1(gradSecond); // channels
+	  int l = (intIndex / SIZE_1(gradSecond)) % SIZE_3(gradSecond) + 4; // w-pos
+	  int m = (intIndex / SIZE_1(gradSecond) / SIZE_3(gradSecond)) % SIZE_2(gradSecond) + 4; // h-pos
+	  // round_off is a trick to enable integer division with ceil, even for negative numbers
+	  // We use a large offset, for the inner part not to become negative.
+	  const int round_off = ROUND_OFF;
+	  const int round_off_s1 = round_off;
+	  float sum = 0;
+	  for (int p = -4; p <= 4; p++) {
+	    for (int o = -4; o <= 4; o++) {
+	      int s2o = o;
+	      int s2p = p;
+	      //Get X,Y ranges and clamp
+	      // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
+	      int xmin = (l - 4 - s2o + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)
+	      int ymin = (m - 4 - s2p + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)
+	      // Same here:
+	      int xmax = (l - 4 - s2o + round_off_s1) - round_off; // floor (l - 4 - s2o)
+	      int ymax = (m - 4 - s2p + round_off_s1) - round_off; // floor (m - 4 - s2p)
+	      if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
+	        xmin = max(0,xmin);
+	        xmax = min(SIZE_3(gradOutput)-1,xmax);
+	        ymin = max(0,ymin);
+	        ymax = min(SIZE_2(gradOutput)-1,ymax);
+	        // Get rbot0 data:
+	        int idxbot0 = ((intSample * SIZE_1(rbot0) + (m-s2p)) * SIZE_2(rbot0) + (l-s2o)) * SIZE_3(rbot0) + n;
+	        float bot0tmp = rbot0[idxbot0]; // rbot1[l+s2o,m+s2p,n]
+	        // Index offset for gradOutput in following loops:
+	        int op = (p+4) * 9 + (o+4); // index[o,p]
+	        int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
+	        for (int y = ymin; y <= ymax; y++) {
+	          for (int x = xmin; x <= xmax; x++) {
+	            int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
+	            sum += gradOutput[idxgradOutput] * bot0tmp;
+	          }
+	        }
+	      }
+	    }
+	  }
+	  const int sumelems = SIZE_1(gradSecond);
+	  const int bot1index = ((n * SIZE_2(gradSecond)) + (m-4)) * SIZE_3(gradSecond) + (l-4);
+	  gradSecond[bot1index + intSample*SIZE_1(gradSecond)*SIZE_2(gradSecond)*SIZE_3(gradSecond)] = sum / (float)sumelems;
+	} }
+"""
+def cupy_kernel(strFunction, objectVariables):
+    strKernel = globals()[strFunction]
+    while True:
+        objectMatch = re.search(r"(SIZE_)([0-4])(\()([^\)]*)(\))", strKernel)
+        if objectMatch is None:
+            break
+        intArg = int(objectMatch.group(2))
+        strTensor = objectMatch.group(4)
+        intSizes = objectVariables[strTensor].size()
+        strKernel = strKernel.replace(objectMatch.group(), str(intSizes[intArg]))
+    while True:
+        objectMatch = re.search(r"(VALUE_)([0-4])(\()([^\)]+)(\))", strKernel)
+        if objectMatch is None:
+            break
+        intArgs = int(objectMatch.group(2))
+        strArgs = objectMatch.group(4).split(",")
+        strTensor = strArgs[0]
+        intStrides = objectVariables[strTensor].stride()
+        strIndex = [
+            "(("
+            + strArgs[intArg + 1].replace("{", "(").replace("}", ")").strip()
+            + ")*"
+            + str(intStrides[intArg])
+            + ")"
+            for intArg in range(intArgs)
+        ]
+        strKernel = strKernel.replace(
+            objectMatch.group(0), strTensor + "[" + str.join("+", strIndex) + "]"
+        )
+    return strKernel
+try:
+    @cupy.memoize(for_each_device=True)
+    def cupy_launch(strFunction, strKernel):
+        return cupy.RawModule(code=strKernel).get_function(strFunction)
+except:
+    pass
+class _FunctionCorrelation(torch.autograd.Function):
+    @staticmethod
+    def forward(self, first, second):
+        rbot0 = first.new_zeros(
+            [first.size(0), first.size(2) + 8, first.size(3) + 8, first.size(1)]
+        )
+        rbot1 = first.new_zeros(
+            [first.size(0), first.size(2) + 8, first.size(3) + 8, first.size(1)]
+        )
+        self.save_for_backward(first, second, rbot0, rbot1)
+        first = first.contiguous()
+        second = second.contiguous()
+        output = first.new_zeros([first.size(0), 81, first.size(2), first.size(3)])
+        if first.is_cuda == True:
+            n = first.size(2) * first.size(3)
+            cupy_launch(
+                "kernel_Correlation_rearrange",
+                cupy_kernel(
+                    "kernel_Correlation_rearrange", {"input": first, "output": rbot0}
+                ),
+            )(
+                grid=tuple([int((n + 16 - 1) / 16), first.size(1), first.size(0)]),
+                block=tuple([16, 1, 1]),
+                args=[n, first.data_ptr(), rbot0.data_ptr()],
+                stream=Stream,
+            )
+            n = second.size(2) * second.size(3)
+            cupy_launch(
+                "kernel_Correlation_rearrange",
+                cupy_kernel(
+                    "kernel_Correlation_rearrange", {"input": second, "output": rbot1}
+                ),
+            )(
+                grid=tuple([int((n + 16 - 1) / 16), second.size(1), second.size(0)]),
+                block=tuple([16, 1, 1]),
+                args=[n, second.data_ptr(), rbot1.data_ptr()],
+                stream=Stream,
+            )
+            n = output.size(1) * output.size(2) * output.size(3)
+            cupy_launch(
+                "kernel_Correlation_updateOutput",
+                cupy_kernel(
+                    "kernel_Correlation_updateOutput",
+                    {"rbot0": rbot0, "rbot1": rbot1, "top": output},
+                ),
+            )(
+                grid=tuple([output.size(3), output.size(2), output.size(0)]),
+                block=tuple([32, 1, 1]),
+                shared_mem=first.size(1) * 4,
+                args=[n, rbot0.data_ptr(), rbot1.data_ptr(), output.data_ptr()],
+                stream=Stream,
+            )
+        elif first.is_cuda == False:
+            raise NotImplementedError()
+        return output
+    @staticmethod
+    def backward(self, gradOutput):
+        first, second, rbot0, rbot1 = self.saved_tensors
+        gradOutput = gradOutput.contiguous()
+        assert gradOutput.is_contiguous() == True
+        gradFirst = (
+            first.new_zeros(
+                [first.size(0), first.size(1), first.size(2), first.size(3)]
+            )
+            if self.needs_input_grad[0] == True
+            else None
+        )
+        gradSecond = (
+            first.new_zeros(
+                [first.size(0), first.size(1), first.size(2), first.size(3)]
+            )
+            if self.needs_input_grad[1] == True
+            else None
+        )
+        if first.is_cuda == True:
+            if gradFirst is not None:
+                for intSample in range(first.size(0)):
+                    n = first.size(1) * first.size(2) * first.size(3)
+                    cupy_launch(
+                        "kernel_Correlation_updateGradFirst",
+                        cupy_kernel(
+                            "kernel_Correlation_updateGradFirst",
+                            {
+                                "rbot0": rbot0,
+                                "rbot1": rbot1,
+                                "gradOutput": gradOutput,
+                                "gradFirst": gradFirst,
+                                "gradSecond": None,
+                            },
+                        ),
+                    )(
+                        grid=tuple([int((n + 512 - 1) / 512), 1, 1]),
+                        block=tuple([512, 1, 1]),
+                        args=[
+                            n,
+                            intSample,
+                            rbot0.data_ptr(),
+                            rbot1.data_ptr(),
+                            gradOutput.data_ptr(),
+                            gradFirst.data_ptr(),
+                            None,
+                        ],
+                        stream=Stream,
+                    )
+            if gradSecond is not None:
+                for intSample in range(first.size(0)):
+                    n = first.size(1) * first.size(2) * first.size(3)
+                    cupy_launch(
+                        "kernel_Correlation_updateGradSecond",
+                        cupy_kernel(
+                            "kernel_Correlation_updateGradSecond",
+                            {
+                                "rbot0": rbot0,
+                                "rbot1": rbot1,
+                                "gradOutput": gradOutput,
+                                "gradFirst": None,
+                                "gradSecond": gradSecond,
+                            },
+                        ),
+                    )(
+                        grid=tuple([int((n + 512 - 1) / 512), 1, 1]),
+                        block=tuple([512, 1, 1]),
+                        args=[
+                            n,
+                            intSample,
+                            rbot0.data_ptr(),
+                            rbot1.data_ptr(),
+                            gradOutput.data_ptr(),
+                            None,
+                            gradSecond.data_ptr(),
+                        ],
+                        stream=Stream,
+                    )
+        elif first.is_cuda == False:
+            raise NotImplementedError()
+        return gradFirst, gradSecond
+class _FunctionCorrelationTranspose(torch.autograd.Function):
+    @staticmethod
+    def forward(self, input, second):
+        rbot0 = second.new_zeros(
+            [second.size(0), second.size(2) + 8, second.size(3) + 8, second.size(1)]
+        )
+        rbot1 = second.new_zeros(
+            [second.size(0), second.size(2) + 8, second.size(3) + 8, second.size(1)]
+        )
+        self.save_for_backward(input, second, rbot0, rbot1)
+        input = input.contiguous()
+        second = second.contiguous()
+        output = second.new_zeros(
+            [second.size(0), second.size(1), second.size(2), second.size(3)]
+        )
+        if second.is_cuda == True:
+            n = second.size(2) * second.size(3)
+            cupy_launch(
+                "kernel_Correlation_rearrange",
+                cupy_kernel(
+                    "kernel_Correlation_rearrange", {"input": second, "output": rbot1}
+                ),
+            )(
+                grid=tuple([int((n + 16 - 1) / 16), second.size(1), second.size(0)]),
+                block=tuple([16, 1, 1]),
+                args=[n, second.data_ptr(), rbot1.data_ptr()],
+                stream=Stream,
+            )
+            for intSample in range(second.size(0)):
+                n = second.size(1) * second.size(2) * second.size(3)
+                cupy_launch(
+                    "kernel_Correlation_updateGradFirst",
+                    cupy_kernel(
+                        "kernel_Correlation_updateGradFirst",
+                        {
+                            "rbot0": rbot0,
+                            "rbot1": rbot1,
+                            "gradOutput": input,
+                            "gradFirst": output,
+                            "gradSecond": None,
+                        },
+                    ),
+                )(
+                    grid=tuple([int((n + 512 - 1) / 512), 1, 1]),
+                    block=tuple([512, 1, 1]),
+                    args=[
+                        n,
+                        intSample,
+                        rbot0.data_ptr(),
+                        rbot1.data_ptr(),
+                        input.data_ptr(),
+                        output.data_ptr(),
+                        None,
+                    ],
+                    stream=Stream,
+                )
+        elif second.is_cuda == False:
+            raise NotImplementedError()
+        return output
+    @staticmethod
+    def backward(self, gradOutput):
+        input, second, rbot0, rbot1 = self.saved_tensors
+        gradOutput = gradOutput.contiguous()
+        gradInput = (
+            input.new_zeros(
+                [input.size(0), input.size(1), input.size(2), input.size(3)]
+            )
+            if self.needs_input_grad[0] == True
+            else None
+        )
+        gradSecond = (
+            second.new_zeros(
+                [second.size(0), second.size(1), second.size(2), second.size(3)]
+            )
+            if self.needs_input_grad[1] == True
+            else None
+        )
+        if second.is_cuda == True:
+            if gradInput is not None or gradSecond is not None:
+                n = second.size(2) * second.size(3)
+                cupy_launch(
+                    "kernel_Correlation_rearrange",
+                    cupy_kernel(
+                        "kernel_Correlation_rearrange",
+                        {"input": gradOutput, "output": rbot0},
+                    ),
+                )(
+                    grid=tuple(
+                        [int((n + 16 - 1) / 16), gradOutput.size(1), gradOutput.size(0)]
+                    ),
+                    block=tuple([16, 1, 1]),
+                    args=[n, gradOutput.data_ptr(), rbot0.data_ptr()],
+                    stream=Stream,
+                )
+            if gradInput is not None:
+                n = gradInput.size(1) * gradInput.size(2) * gradInput.size(3)
+                cupy_launch(
+                    "kernel_Correlation_updateOutput",
+                    cupy_kernel(
+                        "kernel_Correlation_updateOutput",
+                        {"rbot0": rbot0, "rbot1": rbot1, "top": gradInput},
+                    ),
+                )(
+                    grid=tuple(
+                        [gradInput.size(3), gradInput.size(2), gradInput.size(0)]
+                    ),
+                    block=tuple([32, 1, 1]),
+                    shared_mem=gradOutput.size(1) * 4,
+                    args=[n, rbot0.data_ptr(), rbot1.data_ptr(), gradInput.data_ptr()],
+                    stream=Stream,
+                )
+            if gradSecond is not None:
+                for intSample in range(second.size(0)):
+                    n = second.size(1) * second.size(2) * second.size(3)
+                    cupy_launch(
+                        "kernel_Correlation_updateGradSecond",
+                        cupy_kernel(
+                            "kernel_Correlation_updateGradSecond",
+                            {
+                                "rbot0": rbot0,
+                                "rbot1": rbot1,
+                                "gradOutput": input,
+                                "gradFirst": None,
+                                "gradSecond": gradSecond,
+                            },
+                        ),
+                    )(
+                        grid=tuple([int((n + 512 - 1) / 512), 1, 1]),
+                        block=tuple([512, 1, 1]),
+                        args=[
+                            n,
+                            intSample,
+                            rbot0.data_ptr(),
+                            rbot1.data_ptr(),
+                            input.data_ptr(),
+                            None,
+                            gradSecond.data_ptr(),
+                        ],
+                        stream=Stream,
+                    )
+        elif second.is_cuda == False:
+            raise NotImplementedError()
+        return gradInput, gradSecond
+def FunctionCorrelation(reference_features, query_features):
+    return _FunctionCorrelation.apply(reference_features, query_features)
+class ModuleCorrelation(torch.nn.Module):
+    def __init__(self):
+        super(ModuleCorrelation, self).__init__()
+    def forward(self, tensorFirst, tensorSecond):
+        return _FunctionCorrelation.apply(tensorFirst, tensorSecond)
+def FunctionCorrelationTranspose(reference_features, query_features):
+    return _FunctionCorrelationTranspose.apply(reference_features, query_features)
+class ModuleCorrelationTranspose(torch.nn.Module):
+    def __init__(self):
+        super(ModuleCorrelationTranspose, self).__init__()
+    def forward(self, tensorFirst, tensorSecond):
+        return _FunctionCorrelationTranspose.apply(tensorFirst, tensorSecond)
+class LocalCorr(ConvRefiner):
+    def forward(self, x, y, flow):
+        """Computes the relative refining displacement in pixels for a given image x,y and a coarse flow-field between them
+        Args:
+            x ([type]): [description]
+            y ([type]): [description]
+            flow ([type]): [description]
+        Returns:
+            [type]: [description]
+        """
+        with torch.no_grad():
+            x_hat = F.grid_sample(y, flow.permute(0, 2, 3, 1), align_corners=False)
+        corr = FunctionCorrelation(x, x_hat)
+        d = self.block1(corr)
+        d = self.hidden_blocks(d)
+        d = self.out_conv(d)
+        certainty, displacement = d[:, :-2], d[:, -2:]
+        return certainty, displacement
+if __name__ == "__main__":
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    x = torch.randn(2, 128, 32, 32).to(device)
+    y = torch.randn(2, 128, 32, 32).to(device)
+    local_corr = LocalCorr(in_dim=81, hidden_dim=81 * 4)
+    z = local_corr(x, y)
+    print("hej")

third_party/DKM/dkm/models/dkm.py ADDED Viewed

	@@ -0,0 +1,759 @@

+import math
+import os
+import numpy as np
+from PIL import Image
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from ..utils import get_tuple_transform_ops
+from einops import rearrange
+from ..utils.local_correlation import local_correlation
+class ConvRefiner(nn.Module):
+    def __init__(
+        self,
+        in_dim=6,
+        hidden_dim=16,
+        out_dim=2,
+        dw=False,
+        kernel_size=5,
+        hidden_blocks=3,
+        displacement_emb = None,
+        displacement_emb_dim = None,
+        local_corr_radius = None,
+        corr_in_other = None,
+        no_support_fm = False,
+    ):
+        super().__init__()
+        self.block1 = self.create_block(
+            in_dim, hidden_dim, dw=dw, kernel_size=kernel_size
+        )
+        self.hidden_blocks = nn.Sequential(
+            *[
+                self.create_block(
+                    hidden_dim,
+                    hidden_dim,
+                    dw=dw,
+                    kernel_size=kernel_size,
+                )
+                for hb in range(hidden_blocks)
+            ]
+        )
+        self.out_conv = nn.Conv2d(hidden_dim, out_dim, 1, 1, 0)
+        if displacement_emb:
+            self.has_displacement_emb = True
+            self.disp_emb = nn.Conv2d(2,displacement_emb_dim,1,1,0)
+        else:
+            self.has_displacement_emb = False
+        self.local_corr_radius = local_corr_radius
+        self.corr_in_other = corr_in_other
+        self.no_support_fm = no_support_fm
+    def create_block(
+        self,
+        in_dim,
+        out_dim,
+        dw=False,
+        kernel_size=5,
+    ):
+        num_groups = 1 if not dw else in_dim
+        if dw:
+            assert (
+                out_dim % in_dim == 0
+            ), "outdim must be divisible by indim for depthwise"
+        conv1 = nn.Conv2d(
+            in_dim,
+            out_dim,
+            kernel_size=kernel_size,
+            stride=1,
+            padding=kernel_size // 2,
+            groups=num_groups,
+        )
+        norm = nn.BatchNorm2d(out_dim)
+        relu = nn.ReLU(inplace=True)
+        conv2 = nn.Conv2d(out_dim, out_dim, 1, 1, 0)
+        return nn.Sequential(conv1, norm, relu, conv2)
+    def forward(self, x, y, flow):
+        """Computes the relative refining displacement in pixels for a given image x,y and a coarse flow-field between them
+        Args:
+            x ([type]): [description]
+            y ([type]): [description]
+            flow ([type]): [description]
+        Returns:
+            [type]: [description]
+        """
+        device = x.device
+        b,c,hs,ws = x.shape
+        with torch.no_grad():
+            x_hat = F.grid_sample(y, flow.permute(0, 2, 3, 1), align_corners=False)
+        if self.has_displacement_emb:
+            query_coords = torch.meshgrid(
+            (
+                torch.linspace(-1 + 1 / hs, 1 - 1 / hs, hs, device=device),
+                torch.linspace(-1 + 1 / ws, 1 - 1 / ws, ws, device=device),
+            )
+            )
+            query_coords = torch.stack((query_coords[1], query_coords[0]))
+            query_coords = query_coords[None].expand(b, 2, hs, ws)
+            in_displacement = flow-query_coords
+            emb_in_displacement = self.disp_emb(in_displacement)
+            if self.local_corr_radius:
+                #TODO: should corr have gradient?
+                if self.corr_in_other:
+                    # Corr in other means take a kxk grid around the predicted coordinate in other image
+                    local_corr = local_correlation(x,y,local_radius=self.local_corr_radius,flow = flow)
+                else:
+                    # Otherwise we use the warp to sample in the first image
+                    # This is actually different operations, especially for large viewpoint changes
+                    local_corr = local_correlation(x, x_hat, local_radius=self.local_corr_radius,)
+                if self.no_support_fm:
+                    x_hat = torch.zeros_like(x)
+                d = torch.cat((x, x_hat, emb_in_displacement, local_corr), dim=1)
+            else:
+                d = torch.cat((x, x_hat, emb_in_displacement), dim=1)
+        else:
+            if self.no_support_fm:
+                x_hat = torch.zeros_like(x)
+            d = torch.cat((x, x_hat), dim=1)
+        d = self.block1(d)
+        d = self.hidden_blocks(d)
+        d = self.out_conv(d)
+        certainty, displacement = d[:, :-2], d[:, -2:]
+        return certainty, displacement
+class CosKernel(nn.Module):  # similar to softmax kernel
+    def __init__(self, T, learn_temperature=False):
+        super().__init__()
+        self.learn_temperature = learn_temperature
+        if self.learn_temperature:
+            self.T = nn.Parameter(torch.tensor(T))
+        else:
+            self.T = T
+    def __call__(self, x, y, eps=1e-6):
+        c = torch.einsum("bnd,bmd->bnm", x, y) / (
+            x.norm(dim=-1)[..., None] * y.norm(dim=-1)[:, None] + eps
+        )
+        if self.learn_temperature:
+            T = self.T.abs() + 0.01
+        else:
+            T = torch.tensor(self.T, device=c.device)
+        K = ((c - 1.0) / T).exp()
+        return K
+class CAB(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(CAB, self).__init__()
+        self.global_pooling = nn.AdaptiveAvgPool2d(1)
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.relu = nn.ReLU()
+        self.conv2 = nn.Conv2d(
+            out_channels, out_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.sigmod = nn.Sigmoid()
+    def forward(self, x):
+        x1, x2 = x  # high, low (old, new)
+        x = torch.cat([x1, x2], dim=1)
+        x = self.global_pooling(x)
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.sigmod(x)
+        x2 = x * x2
+        res = x2 + x1
+        return res
+class RRB(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=3):
+        super(RRB, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.conv2 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=1,
+            padding=kernel_size // 2,
+        )
+        self.relu = nn.ReLU()
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.conv3 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=1,
+            padding=kernel_size // 2,
+        )
+    def forward(self, x):
+        x = self.conv1(x)
+        res = self.conv2(x)
+        res = self.bn(res)
+        res = self.relu(res)
+        res = self.conv3(res)
+        return self.relu(x + res)
+class DFN(nn.Module):
+    def __init__(
+        self,
+        internal_dim,
+        feat_input_modules,
+        pred_input_modules,
+        rrb_d_dict,
+        cab_dict,
+        rrb_u_dict,
+        use_global_context=False,
+        global_dim=None,
+        terminal_module=None,
+        upsample_mode="bilinear",
+        align_corners=False,
+    ):
+        super().__init__()
+        if use_global_context:
+            assert (
+                global_dim is not None
+            ), "Global dim must be provided when using global context"
+        self.align_corners = align_corners
+        self.internal_dim = internal_dim
+        self.feat_input_modules = feat_input_modules
+        self.pred_input_modules = pred_input_modules
+        self.rrb_d = rrb_d_dict
+        self.cab = cab_dict
+        self.rrb_u = rrb_u_dict
+        self.use_global_context = use_global_context
+        if use_global_context:
+            self.global_to_internal = nn.Conv2d(global_dim, self.internal_dim, 1, 1, 0)
+            self.global_pooling = nn.AdaptiveAvgPool2d(1)
+        self.terminal_module = (
+            terminal_module if terminal_module is not None else nn.Identity()
+        )
+        self.upsample_mode = upsample_mode
+        self._scales = [int(key) for key in self.terminal_module.keys()]
+    def scales(self):
+        return self._scales.copy()
+    def forward(self, embeddings, feats, context, key):
+        feats = self.feat_input_modules[str(key)](feats)
+        embeddings = torch.cat([feats, embeddings], dim=1)
+        embeddings = self.rrb_d[str(key)](embeddings)
+        context = self.cab[str(key)]([context, embeddings])
+        context = self.rrb_u[str(key)](context)
+        preds = self.terminal_module[str(key)](context)
+        pred_coord = preds[:, -2:]
+        pred_certainty = preds[:, :-2]
+        return pred_coord, pred_certainty, context
+class GP(nn.Module):
+    def __init__(
+        self,
+        kernel,
+        T=1,
+        learn_temperature=False,
+        only_attention=False,
+        gp_dim=64,
+        basis="fourier",
+        covar_size=5,
+        only_nearest_neighbour=False,
+        sigma_noise=0.1,
+        no_cov=False,
+        predict_features = False,
+    ):
+        super().__init__()
+        self.K = kernel(T=T, learn_temperature=learn_temperature)
+        self.sigma_noise = sigma_noise
+        self.covar_size = covar_size
+        self.pos_conv = torch.nn.Conv2d(2, gp_dim, 1, 1)
+        self.only_attention = only_attention
+        self.only_nearest_neighbour = only_nearest_neighbour
+        self.basis = basis
+        self.no_cov = no_cov
+        self.dim = gp_dim
+        self.predict_features = predict_features
+    def get_local_cov(self, cov):
+        K = self.covar_size
+        b, h, w, h, w = cov.shape
+        hw = h * w
+        cov = F.pad(cov, 4 * (K // 2,))  # pad v_q
+        delta = torch.stack(
+            torch.meshgrid(
+                torch.arange(-(K // 2), K // 2 + 1), torch.arange(-(K // 2), K // 2 + 1)
+            ),
+            dim=-1,
+        )
+        positions = torch.stack(
+            torch.meshgrid(
+                torch.arange(K // 2, h + K // 2), torch.arange(K // 2, w + K // 2)
+            ),
+            dim=-1,
+        )
+        neighbours = positions[:, :, None, None, :] + delta[None, :, :]
+        points = torch.arange(hw)[:, None].expand(hw, K**2)
+        local_cov = cov.reshape(b, hw, h + K - 1, w + K - 1)[
+            :,
+            points.flatten(),
+            neighbours[..., 0].flatten(),
+            neighbours[..., 1].flatten(),
+        ].reshape(b, h, w, K**2)
+        return local_cov
+    def reshape(self, x):
+        return rearrange(x, "b d h w -> b (h w) d")
+    def project_to_basis(self, x):
+        if self.basis == "fourier":
+            return torch.cos(8 * math.pi * self.pos_conv(x))
+        elif self.basis == "linear":
+            return self.pos_conv(x)
+        else:
+            raise ValueError(
+                "No other bases other than fourier and linear currently supported in public release"
+            )
+    def get_pos_enc(self, y):
+        b, c, h, w = y.shape
+        coarse_coords = torch.meshgrid(
+            (
+                torch.linspace(-1 + 1 / h, 1 - 1 / h, h, device=y.device),
+                torch.linspace(-1 + 1 / w, 1 - 1 / w, w, device=y.device),
+            )
+        )
+        coarse_coords = torch.stack((coarse_coords[1], coarse_coords[0]), dim=-1)[
+            None
+        ].expand(b, h, w, 2)
+        coarse_coords = rearrange(coarse_coords, "b h w d -> b d h w")
+        coarse_embedded_coords = self.project_to_basis(coarse_coords)
+        return coarse_embedded_coords
+    def forward(self, x, y, **kwargs):
+        b, c, h1, w1 = x.shape
+        b, c, h2, w2 = y.shape
+        f = self.get_pos_enc(y)
+        if self.predict_features:
+            f = f + y[:,:self.dim] # Stupid way to predict features
+        b, d, h2, w2 = f.shape
+        #assert x.shape == y.shape
+        x, y, f = self.reshape(x), self.reshape(y), self.reshape(f)
+        K_xx = self.K(x, x)
+        K_yy = self.K(y, y)
+        K_xy = self.K(x, y)
+        K_yx = K_xy.permute(0, 2, 1)
+        sigma_noise = self.sigma_noise * torch.eye(h2 * w2, device=x.device)[None, :, :]
+        # Due to https://github.com/pytorch/pytorch/issues/16963 annoying warnings, remove batch if N large
+        if len(K_yy[0]) > 2000:
+            K_yy_inv = torch.cat([torch.linalg.inv(K_yy[k:k+1] + sigma_noise[k:k+1]) for k in range(b)])
+        else:
+            K_yy_inv = torch.linalg.inv(K_yy + sigma_noise)
+        mu_x = K_xy.matmul(K_yy_inv.matmul(f))
+        mu_x = rearrange(mu_x, "b (h w) d -> b d h w", h=h1, w=w1)
+        if not self.no_cov:
+            cov_x = K_xx - K_xy.matmul(K_yy_inv.matmul(K_yx))
+            cov_x = rearrange(cov_x, "b (h w) (r c) -> b h w r c", h=h1, w=w1, r=h1, c=w1)
+            local_cov_x = self.get_local_cov(cov_x)
+            local_cov_x = rearrange(local_cov_x, "b h w K -> b K h w")
+            gp_feats = torch.cat((mu_x, local_cov_x), dim=1)
+        else:
+            gp_feats = mu_x
+        return gp_feats
+class Encoder(nn.Module):
+    def __init__(self, resnet):
+        super().__init__()
+        self.resnet = resnet
+    def forward(self, x):
+        x0 = x
+        b, c, h, w = x.shape
+        x = self.resnet.conv1(x)
+        x = self.resnet.bn1(x)
+        x1 = self.resnet.relu(x)
+        x = self.resnet.maxpool(x1)
+        x2 = self.resnet.layer1(x)
+        x3 = self.resnet.layer2(x2)
+        x4 = self.resnet.layer3(x3)
+        x5 = self.resnet.layer4(x4)
+        feats = {32: x5, 16: x4, 8: x3, 4: x2, 2: x1, 1: x0}
+        return feats
+    def train(self, mode=True):
+        super().train(mode)
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+            pass
+class Decoder(nn.Module):
+    def __init__(
+        self, embedding_decoder, gps, proj, conv_refiner, transformers = None, detach=False, scales="all", pos_embeddings = None,
+    ):
+        super().__init__()
+        self.embedding_decoder = embedding_decoder
+        self.gps = gps
+        self.proj = proj
+        self.conv_refiner = conv_refiner
+        self.detach = detach
+        if scales == "all":
+            self.scales = ["32", "16", "8", "4", "2", "1"]
+        else:
+            self.scales = scales
+    def upsample_preds(self, flow, certainty, query, support):
+        b, hs, ws, d = flow.shape
+        b, c, h, w = query.shape
+        flow = flow.permute(0, 3, 1, 2)
+        certainty = F.interpolate(
+            certainty, size=(h, w), align_corners=False, mode="bilinear"
+        )
+        flow = F.interpolate(
+            flow, size=(h, w), align_corners=False, mode="bilinear"
+        )
+        delta_certainty, delta_flow = self.conv_refiner["1"](query, support, flow)
+        flow = torch.stack(
+                (
+                    flow[:, 0] + delta_flow[:, 0] / (4 * w),
+                    flow[:, 1] + delta_flow[:, 1] / (4 * h),
+                ),
+                dim=1,
+            )
+        flow = flow.permute(0, 2, 3, 1)
+        certainty = certainty + delta_certainty
+        return flow, certainty
+    def get_placeholder_flow(self, b, h, w, device):
+        coarse_coords = torch.meshgrid(
+            (
+                torch.linspace(-1 + 1 / h, 1 - 1 / h, h, device=device),
+                torch.linspace(-1 + 1 / w, 1 - 1 / w, w, device=device),
+            )
+        )
+        coarse_coords = torch.stack((coarse_coords[1], coarse_coords[0]), dim=-1)[
+            None
+        ].expand(b, h, w, 2)
+        coarse_coords = rearrange(coarse_coords, "b h w d -> b d h w")
+        return coarse_coords
+    def forward(self, f1, f2, upsample = False, dense_flow = None, dense_certainty = None):
+        coarse_scales = self.embedding_decoder.scales()
+        all_scales = self.scales if not upsample else ["8", "4", "2", "1"]
+        sizes = {scale: f1[scale].shape[-2:] for scale in f1}
+        h, w = sizes[1]
+        b = f1[1].shape[0]
+        device = f1[1].device
+        coarsest_scale = int(all_scales[0])
+        old_stuff = torch.zeros(
+            b, self.embedding_decoder.internal_dim, *sizes[coarsest_scale], device=f1[coarsest_scale].device
+        )
+        dense_corresps = {}
+        if not upsample:
+            dense_flow = self.get_placeholder_flow(b, *sizes[coarsest_scale], device)
+            dense_certainty = 0.0
+        else:
+            dense_flow = F.interpolate(
+                    dense_flow,
+                    size=sizes[coarsest_scale],
+                    align_corners=False,
+                    mode="bilinear",
+                )
+            dense_certainty = F.interpolate(
+                    dense_certainty,
+                    size=sizes[coarsest_scale],
+                    align_corners=False,
+                    mode="bilinear",
+                )
+        for new_scale in all_scales:
+            ins = int(new_scale)
+            f1_s, f2_s = f1[ins], f2[ins]
+            if new_scale in self.proj:
+                f1_s, f2_s = self.proj[new_scale](f1_s), self.proj[new_scale](f2_s)
+            b, c, hs, ws = f1_s.shape
+            if ins in coarse_scales:
+                old_stuff = F.interpolate(
+                    old_stuff, size=sizes[ins], mode="bilinear", align_corners=False
+                )
+                new_stuff = self.gps[new_scale](f1_s, f2_s, dense_flow=dense_flow)
+                dense_flow, dense_certainty, old_stuff = self.embedding_decoder(
+                    new_stuff, f1_s, old_stuff, new_scale
+                )
+            if new_scale in self.conv_refiner:
+                delta_certainty, displacement = self.conv_refiner[new_scale](
+                    f1_s, f2_s, dense_flow
+                )
+                dense_flow = torch.stack(
+                    (
+                        dense_flow[:, 0] + ins * displacement[:, 0] / (4 * w),
+                        dense_flow[:, 1] + ins * displacement[:, 1] / (4 * h),
+                    ),
+                    dim=1,
+                )
+                dense_certainty = (
+                    dense_certainty + delta_certainty
+                )  # predict both certainty and displacement
+            dense_corresps[ins] = {
+                "dense_flow": dense_flow,
+                "dense_certainty": dense_certainty,
+            }
+            if new_scale != "1":
+                dense_flow = F.interpolate(
+                    dense_flow,
+                    size=sizes[ins // 2],
+                    align_corners=False,
+                    mode="bilinear",
+                )
+                dense_certainty = F.interpolate(
+                    dense_certainty,
+                    size=sizes[ins // 2],
+                    align_corners=False,
+                    mode="bilinear",
+                )
+                if self.detach:
+                    dense_flow = dense_flow.detach()
+                    dense_certainty = dense_certainty.detach()
+        return dense_corresps
+class RegressionMatcher(nn.Module):
+    def __init__(
+        self,
+        encoder,
+        decoder,
+        h=384,
+        w=512,
+        use_contrastive_loss = False,
+        alpha = 1,
+        beta = 0,
+        sample_mode = "threshold",
+        upsample_preds = False,
+        symmetric = False,
+        name = None,
+        use_soft_mutual_nearest_neighbours = False,
+    ):
+        super().__init__()
+        self.encoder = encoder
+        self.decoder = decoder
+        self.w_resized = w
+        self.h_resized = h
+        self.og_transforms = get_tuple_transform_ops(resize=None, normalize=True)
+        self.use_contrastive_loss = use_contrastive_loss
+        self.alpha = alpha
+        self.beta = beta
+        self.sample_mode = sample_mode
+        self.upsample_preds = upsample_preds
+        self.symmetric = symmetric
+        self.name = name
+        self.sample_thresh = 0.05
+        self.upsample_res = (864,1152)
+        if use_soft_mutual_nearest_neighbours:
+            assert symmetric, "MNS requires symmetric inference"
+        self.use_soft_mutual_nearest_neighbours = use_soft_mutual_nearest_neighbours
+    def extract_backbone_features(self, batch, batched = True, upsample = True):
+        #TODO: only extract stride [1,2,4,8] for upsample = True
+        x_q = batch["query"]
+        x_s = batch["support"]
+        if batched:
+            X = torch.cat((x_q, x_s))
+            feature_pyramid = self.encoder(X)
+        else:
+            feature_pyramid = self.encoder(x_q), self.encoder(x_s)
+        return feature_pyramid
+    def sample(
+        self,
+        dense_matches,
+        dense_certainty,
+        num=10000,
+    ):
+        if "threshold" in self.sample_mode:
+            upper_thresh = self.sample_thresh
+            dense_certainty = dense_certainty.clone()
+            dense_certainty[dense_certainty > upper_thresh] = 1
+        elif "pow" in self.sample_mode:
+            dense_certainty = dense_certainty**(1/3)
+        elif "naive" in self.sample_mode:
+            dense_certainty = torch.ones_like(dense_certainty)
+        matches, certainty = (
+            dense_matches.reshape(-1, 4),
+            dense_certainty.reshape(-1),
+        )
+        expansion_factor = 4 if "balanced" in self.sample_mode else 1
+        good_samples = torch.multinomial(certainty,
+                          num_samples = min(expansion_factor*num, len(certainty)),
+                          replacement=False)
+        good_matches, good_certainty = matches[good_samples], certainty[good_samples]
+        if "balanced" not in self.sample_mode:
+            return good_matches, good_certainty
+        from ..utils.kde import kde
+        density = kde(good_matches, std=0.1)
+        p = 1 / (density+1)
+        p[density < 10] = 1e-7 # Basically should have at least 10 perfect neighbours, or around 100 ok ones
+        balanced_samples = torch.multinomial(p,
+                          num_samples = min(num,len(good_certainty)),
+                          replacement=False)
+        return good_matches[balanced_samples], good_certainty[balanced_samples]
+    def forward(self, batch, batched = True):
+        feature_pyramid = self.extract_backbone_features(batch, batched=batched)
+        if batched:
+            f_q_pyramid = {
+                scale: f_scale.chunk(2)[0] for scale, f_scale in feature_pyramid.items()
+            }
+            f_s_pyramid = {
+                scale: f_scale.chunk(2)[1] for scale, f_scale in feature_pyramid.items()
+            }
+        else:
+            f_q_pyramid, f_s_pyramid = feature_pyramid
+        dense_corresps = self.decoder(f_q_pyramid, f_s_pyramid)
+        if self.training and self.use_contrastive_loss:
+            return dense_corresps, (f_q_pyramid, f_s_pyramid)
+        else:
+            return dense_corresps
+    def forward_symmetric(self, batch, upsample = False, batched = True):
+        feature_pyramid = self.extract_backbone_features(batch, upsample = upsample, batched = batched)
+        f_q_pyramid = feature_pyramid
+        f_s_pyramid = {
+            scale: torch.cat((f_scale.chunk(2)[1], f_scale.chunk(2)[0]))
+            for scale, f_scale in feature_pyramid.items()
+        }
+        dense_corresps = self.decoder(f_q_pyramid, f_s_pyramid, upsample = upsample, **(batch["corresps"] if "corresps" in batch else {}))
+        return dense_corresps
+    def to_pixel_coordinates(self, matches, H_A, W_A, H_B, W_B):
+        kpts_A, kpts_B = matches[...,:2], matches[...,2:]
+        kpts_A = torch.stack((W_A/2 * (kpts_A[...,0]+1), H_A/2 * (kpts_A[...,1]+1)),axis=-1)
+        kpts_B = torch.stack((W_B/2 * (kpts_B[...,0]+1), H_B/2 * (kpts_B[...,1]+1)),axis=-1)
+        return kpts_A, kpts_B
+    def match(
+        self,
+        im1_path,
+        im2_path,
+        *args,
+        batched=False,
+        device = None
+    ):
+        assert not (batched and self.upsample_preds), "Cannot upsample preds if in batchmode (as we don't have access to high res images). You can turn off upsample_preds by model.upsample_preds = False "
+        if isinstance(im1_path, (str, os.PathLike)):
+            im1, im2 = Image.open(im1_path), Image.open(im2_path)
+        else: # assume it is a PIL Image
+            im1, im2 = im1_path, im2_path
+        if device is None:
+            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        symmetric = self.symmetric
+        self.train(False)
+        with torch.no_grad():
+            if not batched:
+                b = 1
+                w, h = im1.size
+                w2, h2 = im2.size
+                # Get images in good format
+                ws = self.w_resized
+                hs = self.h_resized
+                test_transform = get_tuple_transform_ops(
+                    resize=(hs, ws), normalize=True
+                )
+                query, support = test_transform((im1, im2))
+                batch = {"query": query[None].to(device), "support": support[None].to(device)}
+            else:
+                b, c, h, w = im1.shape
+                b, c, h2, w2 = im2.shape
+                assert w == w2 and h == h2, "For batched images we assume same size"
+                batch = {"query": im1.to(device), "support": im2.to(device)}
+                hs, ws = self.h_resized, self.w_resized
+            finest_scale = 1
+            # Run matcher
+            if symmetric:
+                dense_corresps  = self.forward_symmetric(batch, batched = True)
+            else:
+                dense_corresps = self.forward(batch, batched = True)
+            if self.upsample_preds:
+                hs, ws = self.upsample_res
+            low_res_certainty = F.interpolate(
+            dense_corresps[16]["dense_certainty"], size=(hs, ws), align_corners=False, mode="bilinear"
+            )
+            cert_clamp = 0
+            factor = 0.5
+            low_res_certainty = factor*low_res_certainty*(low_res_certainty < cert_clamp)
+            if self.upsample_preds:
+                test_transform = get_tuple_transform_ops(
+                    resize=(hs, ws), normalize=True
+                )
+                query, support = test_transform((im1, im2))
+                query, support = query[None].to(device), support[None].to(device)
+                batch = {"query": query, "support": support, "corresps": dense_corresps[finest_scale]}
+                if symmetric:
+                    dense_corresps = self.forward_symmetric(batch, upsample = True, batched=True)
+                else:
+                    dense_corresps = self.forward(batch, batched = True, upsample=True)
+            query_to_support = dense_corresps[finest_scale]["dense_flow"]
+            dense_certainty = dense_corresps[finest_scale]["dense_certainty"]
+            # Get certainty interpolation
+            dense_certainty = dense_certainty - low_res_certainty
+            query_to_support = query_to_support.permute(
+                0, 2, 3, 1
+                )
+            # Create im1 meshgrid
+            query_coords = torch.meshgrid(
+                (
+                    torch.linspace(-1 + 1 / hs, 1 - 1 / hs, hs, device=device),
+                    torch.linspace(-1 + 1 / ws, 1 - 1 / ws, ws, device=device),
+                )
+            )
+            query_coords = torch.stack((query_coords[1], query_coords[0]))
+            query_coords = query_coords[None].expand(b, 2, hs, ws)
+            dense_certainty = dense_certainty.sigmoid()  # logits -> probs
+            query_coords = query_coords.permute(0, 2, 3, 1)
+            if (query_to_support.abs() > 1).any() and True:
+                wrong = (query_to_support.abs() > 1).sum(dim=-1) > 0
+                dense_certainty[wrong[:,None]] = 0
+            query_to_support = torch.clamp(query_to_support, -1, 1)
+            if symmetric:
+                support_coords = query_coords
+                qts, stq = query_to_support.chunk(2)
+                q_warp = torch.cat((query_coords, qts), dim=-1)
+                s_warp = torch.cat((stq, support_coords), dim=-1)
+                warp = torch.cat((q_warp, s_warp),dim=2)
+                dense_certainty = torch.cat(dense_certainty.chunk(2), dim=3)[:,0]
+            else:
+                warp = torch.cat((query_coords, query_to_support), dim=-1)
+            if batched:
+                return (
+                    warp,
+                    dense_certainty
+                )
+            else:
+                return (
+                    warp[0],
+                    dense_certainty[0],
+                )

third_party/DKM/dkm/models/encoders.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as tvm
+class ResNet18(nn.Module):
+    def __init__(self, pretrained=False) -> None:
+        super().__init__()
+        self.net = tvm.resnet18(pretrained=pretrained)
+    def forward(self, x):
+        self = self.net
+        x1 = x
+        x = self.conv1(x1)
+        x = self.bn1(x)
+        x2 = self.relu(x)
+        x = self.maxpool(x2)
+        x4 = self.layer1(x)
+        x8 = self.layer2(x4)
+        x16 = self.layer3(x8)
+        x32 = self.layer4(x16)
+        return {32:x32,16:x16,8:x8,4:x4,2:x2,1:x1}
+    def train(self, mode=True):
+        super().train(mode)
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+            pass
+class ResNet50(nn.Module):
+    def __init__(self, pretrained=False, high_res = False, weights = None, dilation = None, freeze_bn = True, anti_aliased = False) -> None:
+        super().__init__()
+        if dilation is None:
+            dilation = [False,False,False]
+        if anti_aliased:
+            pass
+        else:
+            if weights is not None:
+                self.net = tvm.resnet50(weights = weights,replace_stride_with_dilation=dilation)
+            else:
+                self.net = tvm.resnet50(pretrained=pretrained,replace_stride_with_dilation=dilation)
+        self.high_res = high_res
+        self.freeze_bn = freeze_bn
+    def forward(self, x):
+        net = self.net
+        feats = {1:x}
+        x = net.conv1(x)
+        x = net.bn1(x)
+        x = net.relu(x)
+        feats[2] = x
+        x = net.maxpool(x)
+        x = net.layer1(x)
+        feats[4] = x
+        x = net.layer2(x)
+        feats[8] = x
+        x = net.layer3(x)
+        feats[16] = x
+        x = net.layer4(x)
+        feats[32] = x
+        return feats
+    def train(self, mode=True):
+        super().train(mode)
+        if self.freeze_bn:
+            for m in self.modules():
+                if isinstance(m, nn.BatchNorm2d):
+                    m.eval()
+                pass
+class ResNet101(nn.Module):
+    def __init__(self, pretrained=False, high_res = False, weights = None) -> None:
+        super().__init__()
+        if weights is not None:
+            self.net = tvm.resnet101(weights = weights)
+        else:
+            self.net = tvm.resnet101(pretrained=pretrained)
+        self.high_res = high_res
+        self.scale_factor = 1 if not high_res else 1.5
+    def forward(self, x):
+        net = self.net
+        feats = {1:x}
+        sf = self.scale_factor
+        if self.high_res:
+            x = F.interpolate(x, scale_factor=sf, align_corners=False, mode="bicubic")
+        x = net.conv1(x)
+        x = net.bn1(x)
+        x = net.relu(x)
+        feats[2] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        x = net.maxpool(x)
+        x = net.layer1(x)
+        feats[4] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        x = net.layer2(x)
+        feats[8] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        x = net.layer3(x)
+        feats[16] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        x = net.layer4(x)
+        feats[32] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        return feats
+    def train(self, mode=True):
+        super().train(mode)
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+            pass
+class WideResNet50(nn.Module):
+    def __init__(self, pretrained=False, high_res = False, weights = None) -> None:
+        super().__init__()
+        if weights is not None:
+            self.net = tvm.wide_resnet50_2(weights = weights)
+        else:
+            self.net = tvm.wide_resnet50_2(pretrained=pretrained)
+        self.high_res = high_res
+        self.scale_factor = 1 if not high_res else 1.5
+    def forward(self, x):
+        net = self.net
+        feats = {1:x}
+        sf = self.scale_factor
+        if self.high_res:
+            x = F.interpolate(x, scale_factor=sf, align_corners=False, mode="bicubic")
+        x = net.conv1(x)
+        x = net.bn1(x)
+        x = net.relu(x)
+        feats[2] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        x = net.maxpool(x)
+        x = net.layer1(x)
+        feats[4] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        x = net.layer2(x)
+        feats[8] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        x = net.layer3(x)
+        feats[16] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        x = net.layer4(x)
+        feats[32] = x if not self.high_res else F.interpolate(x,scale_factor=1/sf,align_corners=False, mode="bilinear")
+        return feats
+    def train(self, mode=True):
+        super().train(mode)
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+            pass