image-matching-webui

No application file

App Files Files Community

Vincentqyw commited on Jul 23, 2023

Commit

9223079

•

1 Parent(s): 71bbcb3

add: files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
.gitignore +19 -0
.gitmodules +45 -0
README.md +107 -12
app.py +291 -0
assets/demo.gif +3 -0
assets/gui.jpg +0 -0
datasets/.gitignore +0 -0
datasets/lines/terrace0.JPG +0 -0
datasets/lines/terrace1.JPG +0 -0
datasets/sacre_coeur/README.md +3 -0
datasets/sacre_coeur/mapping/02928139_3448003521.jpg +0 -0
datasets/sacre_coeur/mapping/03903474_1471484089.jpg +0 -0
datasets/sacre_coeur/mapping/10265353_3838484249.jpg +0 -0
datasets/sacre_coeur/mapping/17295357_9106075285.jpg +0 -0
datasets/sacre_coeur/mapping/32809961_8274055477.jpg +0 -0
datasets/sacre_coeur/mapping/44120379_8371960244.jpg +0 -0
datasets/sacre_coeur/mapping/51091044_3486849416.jpg +0 -0
datasets/sacre_coeur/mapping/60584745_2207571072.jpg +0 -0
datasets/sacre_coeur/mapping/71295362_4051449754.jpg +0 -0
datasets/sacre_coeur/mapping/93341989_396310999.jpg +0 -0
extra_utils/__init__.py +0 -0
extra_utils/plotting.py +504 -0
extra_utils/utils.py +182 -0
extra_utils/visualize_util.py +642 -0
hloc/__init__.py +31 -0
hloc/extract_features.py +516 -0
hloc/extractors/__init__.py +0 -0
hloc/extractors/alike.py +52 -0
hloc/extractors/cosplace.py +44 -0
hloc/extractors/d2net.py +57 -0
hloc/extractors/darkfeat.py +57 -0
hloc/extractors/dedode.py +102 -0
hloc/extractors/dir.py +76 -0
hloc/extractors/disk.py +32 -0
hloc/extractors/dog.py +131 -0
hloc/extractors/example.py +58 -0
hloc/extractors/fire.py +73 -0
hloc/extractors/fire_local.py +90 -0
hloc/extractors/lanet.py +53 -0
hloc/extractors/netvlad.py +147 -0
hloc/extractors/openibl.py +26 -0
hloc/extractors/r2d2.py +61 -0
hloc/extractors/rekd.py +53 -0
hloc/extractors/superpoint.py +44 -0
hloc/match_dense.py +384 -0
hloc/match_features.py +389 -0
hloc/matchers/__init__.py +3 -0
hloc/matchers/adalam.py +69 -0
hloc/matchers/aspanformer.py +76 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,19 @@

+build/
+lib/
+bin/
+cmake_modules/
+cmake-build-debug/
+.idea/
+.vscode/
+*.pyc
+flagged
+.ipynb_checkpoints
+__pycache__
+Untitled*
+experiments
+third_party/REKD
+Dockerfile
+hloc/matchers/dedode.py
+gradio_cached_examples

.gitmodules ADDED Viewed

	@@ -0,0 +1,45 @@

+[submodule "third_party/Roma"]
+	path = third_party/Roma
+	url = https://github.com/Vincentqyw/RoMa.git
+[submodule "third_party/SuperGluePretrainedNetwork"]
+	path = third_party/SuperGluePretrainedNetwork
+	url = https://github.com/magicleap/SuperGluePretrainedNetwork.git
+[submodule "third_party/SOLD2"]
+	path = third_party/SOLD2
+	url = https://github.com/cvg/SOLD2.git
+[submodule "third_party/GlueStick"]
+	path = third_party/GlueStick
+	url = https://github.com/cvg/GlueStick.git
+[submodule "third_party/ASpanFormer"]
+	path = third_party/ASpanFormer
+	url = https://github.com/Vincentqyw/ml-aspanformer.git
+[submodule "third_party/TopicFM"]
+	path = third_party/TopicFM
+	url = https://github.com/Vincentqyw/TopicFM.git
+[submodule "third_party/d2net"]
+	path = third_party/d2net
+	url = https://github.com/Vincentqyw/d2-net.git
+[submodule "third_party/r2d2"]
+	path = third_party/r2d2
+	url = https://github.com/naver/r2d2.git
+[submodule "third_party/DKM"]
+	path = third_party/DKM
+	url = https://github.com/Vincentqyw/DKM.git
+[submodule "third_party/ALIKE"]
+	path = third_party/ALIKE
+	url = https://github.com/Shiaoming/ALIKE.git
+[submodule "third_party/lanet"]
+	path = third_party/lanet
+	url = https://github.com/wangch-g/lanet.git
+[submodule "third_party/LightGlue"]
+	path = third_party/LightGlue
+	url = https://github.com/cvg/LightGlue.git
+[submodule "third_party/SGMNet"]
+	path = third_party/SGMNet
+	url = https://github.com/vdvchen/SGMNet.git
+[submodule "third_party/DarkFeat"]
+	path = third_party/DarkFeat
+	url = https://github.com/THU-LYJ-Lab/DarkFeat.git
+[submodule "third_party/DeDoDe"]
+	path = third_party/DeDoDe
+	url = https://github.com/Parskatt/DeDoDe.git

README.md CHANGED Viewed

@@ -1,12 +1,107 @@
----
-title: Image Matching Webui
-emoji: 🏆
-colorFrom: indigo
-colorTo: gray
-sdk: gradio
-sdk_version: 3.35.2
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+[![Contributors][contributors-shield]][contributors-url]
+[![Forks][forks-shield]][forks-url]
+[![Stargazers][stars-shield]][stars-url]
+[![Issues][issues-shield]][issues-url]
+<p align="center">
+  <h1 align="center"><br><ins>Image Matching WebUI</ins><br>find matches between 2 images</h1>
+</p>
+## Description
+This simple tool efficiently matches image pairs using multiple famous image matching algorithms. The tool features a Graphical User Interface (GUI) designed using [gradio](https://gradio.app/). You can effortlessly select two images and a matching algorithm and obtain a precise matching result.
+**Note**: the images source can be either local images or webcam images.
+Here is a demo of the tool:
+![demo](assets/demo.gif)
+The tool currently supports various popular image matching algorithms, namely:
+- [x] [LightGlue](https://github.com/cvg/LightGlue), ICCV 2023
+- [x] [DeDoDe](https://github.com/Parskatt/DeDoDe), TBD
+- [x] [DarkFeat](https://github.com/THU-LYJ-Lab/DarkFeat), AAAI 2023
+- [ ] [ASTR](https://github.com/ASTR2023/ASTR), CVPR 2023
+- [ ] [SEM](https://github.com/SEM2023/SEM), CVPR 2023
+- [ ] [DeepLSD](https://github.com/cvg/DeepLSD), CVPR 2023
+- [x] [GlueStick](https://github.com/cvg/GlueStick), ArXiv 2023
+- [ ] [ConvMatch](https://github.com/SuhZhang/ConvMatch), AAAI 2023
+- [x] [SOLD2](https://github.com/cvg/SOLD2), CVPR 2021
+- [ ] [LineTR](https://github.com/yosungho/LineTR), RA-L 2021
+- [x] [DKM](https://github.com/Parskatt/DKM), CVPR 2023
+- [x] [RoMa](https://github.com/Vincentqyw/RoMa), Arxiv 2023
+- [ ] [NCMNet](https://github.com/xinliu29/NCMNet), CVPR 2023
+- [x] [TopicFM](https://github.com/Vincentqyw/TopicFM), AAAI 2023
+- [x] [AspanFormer](https://github.com/Vincentqyw/ml-aspanformer), ECCV 2022
+- [x] [LANet](https://github.com/wangch-g/lanet), ACCV 2022
+- [ ] [LISRD](https://github.com/rpautrat/LISRD), ECCV 2022
+- [ ] [REKD](https://github.com/bluedream1121/REKD), CVPR 2022
+- [x] [ALIKE](https://github.com/Shiaoming/ALIKE), ArXiv 2022
+- [x] [SGMNet](https://github.com/vdvchen/SGMNet), ICCV 2021
+- [x] [SuperPoint](https://github.com/magicleap/SuperPointPretrainedNetwork), CVPRW 2018
+- [x] [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork), CVPR 2020
+- [x] [D2Net](https://github.com/Vincentqyw/d2-net), CVPR 2019
+- [x] [R2D2](https://github.com/naver/r2d2), NeurIPS 2019
+- [x] [DISK](https://github.com/cvlab-epfl/disk), NeurIPS 2020
+- [ ] [Key.Net](https://github.com/axelBarroso/Key.Net), ICCV 2019
+- [ ] [OANet](https://github.com/zjhthu/OANet), ICCV 2019
+- [ ] [SOSNet](https://github.com/scape-research/SOSNet), CVPR 2019
+- [x] [SIFT](https://docs.opencv.org/4.x/da/df5/tutorial_py_sift_intro.html), IJCV 2004
+## How to use
+### requirements
+``` bash
+git clone --recursive https://github.com/Vincentqyw/image-matching-webui.git
+cd image-matching-webui
+conda env create -f environment.yaml
+conda activate imw
+```
+### run demo
+``` bash
+python3 ./app.py
+```
+then open http://localhost:7860 in your browser.
+![](assets/gui.jpg)
+### Add your own feature / matcher
+I provide an example to add local feature in [hloc/extractors/example.py](hloc/extractors/example.py). Then add feature settings in `confs` in file [hloc/extract_features.py](hloc/extract_features.py). Last step is adding some settings to `model_zoo` in file [extra_utils/utils.py](extra_utils/utils.py).
+## Contributions welcome!
+External contributions are very much welcome. Please follow the [PEP8 style guidelines](https://www.python.org/dev/peps/pep-0008/) using a linter like flake8 (reformat using command `python -m black .`). This is a non-exhaustive list of features that might be valuable additions:
+- [x] add webcam support
+- [x] add [line feature matching](https://github.com/Vincentqyw/LineSegmentsDetection) algorithms
+- [x] example to add a new feature extractor / matcher
+- [ ] ransac to filter outliers
+- [ ] support export matches to colmap ([#issue 6](https://github.com/Vincentqyw/image-matching-webui/issues/6))
+- [ ] add config file to set default parameters
+- [ ] dynamically load models and reduce GPU overload
+Adding local features / matchers as submodules is very easy. For example, to add the [GlueStick](https://github.com/cvg/GlueStick):
+``` bash
+git submodule add https://github.com/cvg/GlueStick.git third_party/GlueStick
+```
+If remote submodule repositories are updated, don't forget to pull submodules with `git submodule update --remote`, if you only want to update one submodule, use `git submodule update --remote third_party/GlueStick`.
+## Resources
+- [Image Matching: Local Features & Beyond](https://image-matching-workshop.github.io)
+- [Long-term Visual Localization](https://www.visuallocalization.net)
+## Acknowledgement
+This code is built based on [Hierarchical-Localization](https://github.com/cvg/Hierarchical-Localization). We express our gratitude to the authors for their valuable source code.
+[contributors-shield]: https://img.shields.io/github/contributors/Vincentqyw/image-matching-webui.svg?style=for-the-badge
+[contributors-url]: https://github.com/Vincentqyw/image-matching-webui/graphs/contributors
+[forks-shield]: https://img.shields.io/github/forks/Vincentqyw/image-matching-webui.svg?style=for-the-badge
+[forks-url]: https://github.com/Vincentqyw/image-matching-webui/network/members
+[stars-shield]: https://img.shields.io/github/stars/Vincentqyw/image-matching-webui.svg?style=for-the-badge
+[stars-url]: https://github.com/Vincentqyw/image-matching-webui/stargazers
+[issues-shield]: https://img.shields.io/github/issues/Vincentqyw/image-matching-webui.svg?style=for-the-badge
+[issues-url]: https://github.com/Vincentqyw/image-matching-webui/issues

app.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import argparse
+import gradio as gr
+from hloc import extract_features
+from extra_utils.utils import (
+    matcher_zoo,
+    device,
+    match_dense,
+    match_features,
+    get_model,
+    get_feature_model,
+    display_matches
+)
+def run_matching(
+    match_threshold, extract_max_keypoints, keypoint_threshold, key, image0, image1
+):
+    # image0 and image1 is RGB mode
+    if image0 is None or image1 is None:
+        raise gr.Error("Error: No images found! Please upload two images.")
+    model = matcher_zoo[key]
+    match_conf = model["config"]
+    # update match config
+    match_conf["model"]["match_threshold"] = match_threshold
+    match_conf["model"]["max_keypoints"] = extract_max_keypoints
+    matcher = get_model(match_conf)
+    if model["dense"]:
+        pred = match_dense.match_images(
+            matcher, image0, image1, match_conf["preprocessing"], device=device
+        )
+        del matcher
+        extract_conf = None
+    else:
+        extract_conf = model["config_feature"]
+        # update extract config
+        extract_conf["model"]["max_keypoints"] = extract_max_keypoints
+        extract_conf["model"]["keypoint_threshold"] = keypoint_threshold
+        extractor = get_feature_model(extract_conf)
+        pred0 = extract_features.extract(
+            extractor, image0, extract_conf["preprocessing"]
+        )
+        pred1 = extract_features.extract(
+            extractor, image1, extract_conf["preprocessing"]
+        )
+        pred = match_features.match_images(matcher, pred0, pred1)
+        del extractor
+    fig, num_inliers = display_matches(pred)
+    del pred
+    return (
+        fig,
+        {"matches number": num_inliers},
+        {"match_conf": match_conf, "extractor_conf": extract_conf},
+    )
+def ui_change_imagebox(choice):
+    return {"value": None, "source": choice, "__type__": "update"}
+def ui_reset_state(
+    match_threshold, extract_max_keypoints, keypoint_threshold, key, image0, image1
+):
+    match_threshold = 0.2
+    extract_max_keypoints = 1000
+    keypoint_threshold = 0.015
+    key = list(matcher_zoo.keys())[0]
+    image0 = None
+    image1 = None
+    return (
+        match_threshold,
+        extract_max_keypoints,
+        keypoint_threshold,
+        key,
+        image0,
+        image1,
+        {"value": None, "source": "upload", "__type__": "update"},
+        {"value": None, "source": "upload", "__type__": "update"},
+        "upload",
+        None,
+        {},
+        {},
+    )
+def run(config):
+    with gr.Blocks(
+        theme=gr.themes.Monochrome(), css="footer {visibility: hidden}"
+    ) as app:
+        gr.Markdown(
+            """
+            <p align="center">
+            <h1 align="center">Image Matching WebUI</h1>
+            </p>
+            """
+        )
+        with gr.Row(equal_height=False):
+            with gr.Column():
+                with gr.Row():
+                    matcher_list = gr.Dropdown(
+                        choices=list(matcher_zoo.keys()),
+                        value="disk+lightglue",
+                        label="Matching Model",
+                        interactive=True,
+                    )
+                    match_image_src = gr.Radio(
+                        ["upload", "webcam", "canvas"],
+                        label="Image Source",
+                        value="upload",
+                    )
+                with gr.Row():
+                    match_setting_threshold = gr.Slider(
+                        minimum=0.0,
+                        maximum=1,
+                        step=0.001,
+                        label="Match threshold",
+                        value=0.1,
+                    )
+                    match_setting_max_features = gr.Slider(
+                        minimum=10,
+                        maximum=10000,
+                        step=10,
+                        label="Max number of features",
+                        value=1000,
+                    )
+                # TODO: add line settings
+                with gr.Row():
+                    detect_keypoints_threshold = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        step=0.001,
+                        label="Keypoint threshold",
+                        value=0.015,
+                    )
+                    detect_line_threshold = gr.Slider(
+                        minimum=0.1,
+                        maximum=1,
+                        step=0.01,
+                        label="Line threshold",
+                        value=0.2,
+                    )
+                    # matcher_lists = gr.Radio(
+                    #     ["NN-mutual", "Dual-Softmax"],
+                    #     label="Matcher mode",
+                    #     value="NN-mutual",
+                    # )
+                with gr.Row():
+                    input_image0 = gr.Image(
+                        label="Image 0",
+                        type="numpy",
+                        interactive=True,
+                        image_mode="RGB",
+                    )
+                    input_image1 = gr.Image(
+                        label="Image 1",
+                        type="numpy",
+                        interactive=True,
+                        image_mode="RGB",
+                    )
+                with gr.Row():
+                    button_reset = gr.Button(label="Reset", value="Reset")
+                    button_run = gr.Button(
+                        label="Run Match", value="Run Match", variant="primary"
+                    )
+                with gr.Accordion("Open for More!", open=False):
+                    gr.Markdown(
+                        f"""
+                        <h3>Supported Algorithms</h3>
+                        {", ".join(matcher_zoo.keys())}
+                        """
+                    )
+                # collect inputs
+                inputs = [
+                    match_setting_threshold,
+                    match_setting_max_features,
+                    detect_keypoints_threshold,
+                    matcher_list,
+                    input_image0,
+                    input_image1,
+                ]
+                # Add some examples
+                with gr.Row():
+                    examples = [
+                        [
+                            0.1,
+                            2000,
+                            0.015,
+                            "disk+lightglue",
+                            "datasets/sacre_coeur/mapping/71295362_4051449754.jpg",
+                            "datasets/sacre_coeur/mapping/93341989_396310999.jpg",
+                        ],
+                        [
+                            0.1,
+                            2000,
+                            0.015,
+                            "loftr",
+                            "datasets/sacre_coeur/mapping/03903474_1471484089.jpg",
+                            "datasets/sacre_coeur/mapping/02928139_3448003521.jpg",
+                        ],
+                        [
+                            0.1,
+                            2000,
+                            0.015,
+                            "disk",
+                            "datasets/sacre_coeur/mapping/10265353_3838484249.jpg",
+                            "datasets/sacre_coeur/mapping/51091044_3486849416.jpg",
+                        ],
+                        [
+                            0.1,
+                            2000,
+                            0.015,
+                            "topicfm",
+                            "datasets/sacre_coeur/mapping/44120379_8371960244.jpg",
+                            "datasets/sacre_coeur/mapping/93341989_396310999.jpg",
+                        ],
+                        [
+                            0.1,
+                            2000,
+                            0.015,
+                            "superpoint+superglue",
+                            "datasets/sacre_coeur/mapping/17295357_9106075285.jpg",
+                            "datasets/sacre_coeur/mapping/44120379_8371960244.jpg",
+                        ],
+                    ]
+                    # Example inputs
+                    gr.Examples(
+                        examples=examples,
+                        inputs=inputs,
+                        outputs=[],
+                        fn=run_matching,
+                        cache_examples=False,
+                        label="Examples (click one of the images below to Run Match)",
+                    )
+            with gr.Column():
+                output_mkpts = gr.Image(label="Keypoints Matching", type="numpy")
+                matches_result_info = gr.JSON(label="Matches Statistics")
+                matcher_info = gr.JSON(label="Match info")
+            # callbacks
+            match_image_src.change(
+                fn=ui_change_imagebox, inputs=match_image_src, outputs=input_image0
+            )
+            match_image_src.change(
+                fn=ui_change_imagebox, inputs=match_image_src, outputs=input_image1
+            )
+            # collect outputs
+            outputs = [
+                output_mkpts,
+                matches_result_info,
+                matcher_info,
+            ]
+            # button callbacks
+            button_run.click(fn=run_matching, inputs=inputs, outputs=outputs)
+            # Reset images
+            reset_outputs = [
+                match_setting_threshold,
+                match_setting_max_features,
+                detect_keypoints_threshold,
+                matcher_list,
+                input_image0,
+                input_image1,
+                input_image0,
+                input_image1,
+                match_image_src,
+                output_mkpts,
+                matches_result_info,
+                matcher_info,
+            ]
+            button_reset.click(fn=ui_reset_state, inputs=inputs, outputs=reset_outputs)
+    app.launch(share=True)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--config_path", type=str, default="config.yaml", help="configuration file path"
+    )
+    args = parser.parse_args()
+    config = None
+    run(config)

assets/demo.gif ADDED Viewed

Git LFS Details

SHA256: 3f163c0e2699181897c81c68e01c60fa4289e886a2a40932d53dd529262d3735
Pointer size: 132 Bytes
Size of remote file: 8.91 MB

assets/gui.jpg ADDED Viewed

datasets/.gitignore ADDED Viewed

File without changes

datasets/lines/terrace0.JPG ADDED Viewed

datasets/lines/terrace1.JPG ADDED Viewed

datasets/sacre_coeur/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Sacre Coeur demo
2	+
3	+ We provide here a subset of images depicting the Sacre Coeur. These images were obtained from the [Image Matching Challenge 2021](https://www.cs.ubc.ca/research/image-matching-challenge/2021/data/) and were originally collected by the [Yahoo Flickr Creative Commons 100M (YFCC) dataset](https://multimediacommons.wordpress.com/yfcc100m-core-dataset/).

datasets/sacre_coeur/mapping/02928139_3448003521.jpg ADDED Viewed

datasets/sacre_coeur/mapping/03903474_1471484089.jpg ADDED Viewed

datasets/sacre_coeur/mapping/10265353_3838484249.jpg ADDED Viewed

datasets/sacre_coeur/mapping/17295357_9106075285.jpg ADDED Viewed

datasets/sacre_coeur/mapping/32809961_8274055477.jpg ADDED Viewed

datasets/sacre_coeur/mapping/44120379_8371960244.jpg ADDED Viewed

datasets/sacre_coeur/mapping/51091044_3486849416.jpg ADDED Viewed

datasets/sacre_coeur/mapping/60584745_2207571072.jpg ADDED Viewed

datasets/sacre_coeur/mapping/71295362_4051449754.jpg ADDED Viewed

datasets/sacre_coeur/mapping/93341989_396310999.jpg ADDED Viewed

extra_utils/__init__.py ADDED Viewed

File without changes

extra_utils/plotting.py ADDED Viewed

	@@ -0,0 +1,504 @@

+import bisect
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib, os, cv2
+import matplotlib.cm as cm
+from PIL import Image
+import torch.nn.functional as F
+import torch
+def _compute_conf_thresh(data):
+    dataset_name = data["dataset_name"][0].lower()
+    if dataset_name == "scannet":
+        thr = 5e-4
+    elif dataset_name == "megadepth":
+        thr = 1e-4
+    else:
+        raise ValueError(f"Unknown dataset: {dataset_name}")
+    return thr
+# --- VISUALIZATION --- #
+def make_matching_figure(
+    img0,
+    img1,
+    mkpts0,
+    mkpts1,
+    color,
+    titles=None,
+    kpts0=None,
+    kpts1=None,
+    text=[],
+    dpi=75,
+    path=None,
+    pad=0,
+):
+    # draw image pair
+    # assert mkpts0.shape[0] == mkpts1.shape[0], f'mkpts0: {mkpts0.shape[0]} v.s. mkpts1: {mkpts1.shape[0]}'
+    fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi)
+    axes[0].imshow(img0)  # , cmap='gray')
+    axes[1].imshow(img1)  # , cmap='gray')
+    for i in range(2):  # clear all frames
+        axes[i].get_yaxis().set_ticks([])
+        axes[i].get_xaxis().set_ticks([])
+        for spine in axes[i].spines.values():
+            spine.set_visible(False)
+        if titles is not None:
+            axes[i].set_title(titles[i])
+    plt.tight_layout(pad=pad)
+    if kpts0 is not None:
+        assert kpts1 is not None
+        axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c="w", s=5)
+        axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c="w", s=5)
+    # draw matches
+    if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0:
+        fig.canvas.draw()
+        transFigure = fig.transFigure.inverted()
+        fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0))
+        fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1))
+        fig.lines = [
+            matplotlib.lines.Line2D(
+                (fkpts0[i, 0], fkpts1[i, 0]),
+                (fkpts0[i, 1], fkpts1[i, 1]),
+                transform=fig.transFigure,
+                c=color[i],
+                linewidth=2,
+            )
+            for i in range(len(mkpts0))
+        ]
+        # freeze the axes to prevent the transform to change
+        axes[0].autoscale(enable=False)
+        axes[1].autoscale(enable=False)
+        axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color[..., :3], s=4)
+        axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color[..., :3], s=4)
+    # put txts
+    txt_color = "k" if img0[:100, :200].mean() > 200 else "w"
+    fig.text(
+        0.01,
+        0.99,
+        "\n".join(text),
+        transform=fig.axes[0].transAxes,
+        fontsize=15,
+        va="top",
+        ha="left",
+        color=txt_color,
+    )
+    # save or return figure
+    if path:
+        plt.savefig(str(path), bbox_inches="tight", pad_inches=0)
+        plt.close()
+    else:
+        return fig
+def _make_evaluation_figure(data, b_id, alpha="dynamic"):
+    b_mask = data["m_bids"] == b_id
+    conf_thr = _compute_conf_thresh(data)
+    img0 = (data["image0"][b_id][0].cpu().numpy() * 255).round().astype(np.int32)
+    img1 = (data["image1"][b_id][0].cpu().numpy() * 255).round().astype(np.int32)
+    kpts0 = data["mkpts0_f"][b_mask].cpu().numpy()
+    kpts1 = data["mkpts1_f"][b_mask].cpu().numpy()
+    # for megadepth, we visualize matches on the resized image
+    if "scale0" in data:
+        kpts0 = kpts0 / data["scale0"][b_id].cpu().numpy()[[1, 0]]
+        kpts1 = kpts1 / data["scale1"][b_id].cpu().numpy()[[1, 0]]
+    epi_errs = data["epi_errs"][b_mask].cpu().numpy()
+    correct_mask = epi_errs < conf_thr
+    precision = np.mean(correct_mask) if len(correct_mask) > 0 else 0
+    n_correct = np.sum(correct_mask)
+    n_gt_matches = int(data["conf_matrix_gt"][b_id].sum().cpu())
+    recall = 0 if n_gt_matches == 0 else n_correct / (n_gt_matches)
+    # recall might be larger than 1, since the calculation of conf_matrix_gt
+    # uses groundtruth depths and camera poses, but epipolar distance is used here.
+    # matching info
+    if alpha == "dynamic":
+        alpha = dynamic_alpha(len(correct_mask))
+    color = error_colormap(epi_errs, conf_thr, alpha=alpha)
+    text = [
+        f"#Matches {len(kpts0)}",
+        f"Precision({conf_thr:.2e}) ({100 * precision:.1f}%): {n_correct}/{len(kpts0)}",
+        f"Recall({conf_thr:.2e}) ({100 * recall:.1f}%): {n_correct}/{n_gt_matches}",
+    ]
+    # make the figure
+    figure = make_matching_figure(img0, img1, kpts0, kpts1, color, text=text)
+    return figure
+def _make_confidence_figure(data, b_id):
+    # TODO: Implement confidence figure
+    raise NotImplementedError()
+def make_matching_figures(data, config, mode="evaluation"):
+    """Make matching figures for a batch.
+    Args:
+        data (Dict): a batch updated by PL_LoFTR.
+        config (Dict): matcher config
+    Returns:
+        figures (Dict[str, List[plt.figure]]
+    """
+    assert mode in ["evaluation", "confidence"]  # 'confidence'
+    figures = {mode: []}
+    for b_id in range(data["image0"].size(0)):
+        if mode == "evaluation":
+            fig = _make_evaluation_figure(
+                data, b_id, alpha=config.TRAINER.PLOT_MATCHES_ALPHA
+            )
+        elif mode == "confidence":
+            fig = _make_confidence_figure(data, b_id)
+        else:
+            raise ValueError(f"Unknown plot mode: {mode}")
+    figures[mode].append(fig)
+    return figures
+def dynamic_alpha(
+    n_matches, milestones=[0, 300, 1000, 2000], alphas=[1.0, 0.8, 0.4, 0.2]
+):
+    if n_matches == 0:
+        return 1.0
+    ranges = list(zip(alphas, alphas[1:] + [None]))
+    loc = bisect.bisect_right(milestones, n_matches) - 1
+    _range = ranges[loc]
+    if _range[1] is None:
+        return _range[0]
+    return _range[1] + (milestones[loc + 1] - n_matches) / (
+        milestones[loc + 1] - milestones[loc]
+    ) * (_range[0] - _range[1])
+def error_colormap(err, thr, alpha=1.0):
+    assert alpha <= 1.0 and alpha > 0, f"Invaid alpha value: {alpha}"
+    x = 1 - np.clip(err / (thr * 2), 0, 1)
+    return np.clip(
+        np.stack([2 - x * 2, x * 2, np.zeros_like(x), np.ones_like(x) * alpha], -1),
+        0,
+        1,
+    )
+np.random.seed(1995)
+color_map = np.arange(100)
+np.random.shuffle(color_map)
+def draw_topics(
+    data, img0, img1, saved_folder="viz_topics", show_n_topics=8, saved_name=None
+):
+    topic0, topic1 = data["topic_matrix"]["img0"], data["topic_matrix"]["img1"]
+    hw0_c, hw1_c = data["hw0_c"], data["hw1_c"]
+    hw0_i, hw1_i = data["hw0_i"], data["hw1_i"]
+    # print(hw0_i, hw1_i)
+    scale0, scale1 = hw0_i[0] // hw0_c[0], hw1_i[0] // hw1_c[0]
+    if "scale0" in data:
+        scale0 *= data["scale0"][0]
+    else:
+        scale0 = (scale0, scale0)
+    if "scale1" in data:
+        scale1 *= data["scale1"][0]
+    else:
+        scale1 = (scale1, scale1)
+    n_topics = topic0.shape[-1]
+    # mask0_nonzero = topic0[0].sum(dim=-1, keepdim=True) > 0
+    # mask1_nonzero = topic1[0].sum(dim=-1, keepdim=True) > 0
+    theta0 = topic0[0].sum(dim=0)
+    theta0 /= theta0.sum().float()
+    theta1 = topic1[0].sum(dim=0)
+    theta1 /= theta1.sum().float()
+    # top_topic0 = torch.argsort(theta0, descending=True)[:show_n_topics]
+    # top_topic1 = torch.argsort(theta1, descending=True)[:show_n_topics]
+    top_topics = torch.argsort(theta0 * theta1, descending=True)[:show_n_topics]
+    # print(sum_topic0, sum_topic1)
+    topic0 = topic0[0].argmax(
+        dim=-1, keepdim=True
+    )  # .float() / (n_topics - 1) #* 255 + 1 #
+    # topic0[~mask0_nonzero] = -1
+    topic1 = topic1[0].argmax(
+        dim=-1, keepdim=True
+    )  # .float() / (n_topics - 1) #* 255 + 1
+    # topic1[~mask1_nonzero] = -1
+    label_img0, label_img1 = torch.zeros_like(topic0) - 1, torch.zeros_like(topic1) - 1
+    for i, k in enumerate(top_topics):
+        label_img0[topic0 == k] = color_map[k]
+        label_img1[topic1 == k] = color_map[k]
+    #     print(hw0_c, scale0)
+    #     print(hw1_c, scale1)
+    # map_topic0 = F.fold(label_img0.unsqueeze(0), hw0_i, kernel_size=scale0, stride=scale0)
+    map_topic0 = (
+        label_img0.float().view(hw0_c).cpu().numpy()
+    )  # map_topic0.squeeze(0).squeeze(0).cpu().numpy()
+    map_topic0 = cv2.resize(
+        map_topic0, (int(hw0_c[1] * scale0[0]), int(hw0_c[0] * scale0[1]))
+    )
+    # map_topic1 = F.fold(label_img1.unsqueeze(0), hw1_i, kernel_size=scale1, stride=scale1)
+    map_topic1 = (
+        label_img1.float().view(hw1_c).cpu().numpy()
+    )  # map_topic1.squeeze(0).squeeze(0).cpu().numpy()
+    map_topic1 = cv2.resize(
+        map_topic1, (int(hw1_c[1] * scale1[0]), int(hw1_c[0] * scale1[1]))
+    )
+    # show image0
+    if saved_name is None:
+        return map_topic0, map_topic1
+    if not os.path.exists(saved_folder):
+        os.makedirs(saved_folder)
+    path_saved_img0 = os.path.join(saved_folder, "{}_0.png".format(saved_name))
+    plt.imshow(img0)
+    masked_map_topic0 = np.ma.masked_where(map_topic0 < 0, map_topic0)
+    plt.imshow(
+        masked_map_topic0,
+        cmap=plt.cm.jet,
+        vmin=0,
+        vmax=n_topics - 1,
+        alpha=0.3,
+        interpolation="bilinear",
+    )
+    # plt.show()
+    plt.axis("off")
+    plt.savefig(path_saved_img0, bbox_inches="tight", pad_inches=0, dpi=250)
+    plt.close()
+    path_saved_img1 = os.path.join(saved_folder, "{}_1.png".format(saved_name))
+    plt.imshow(img1)
+    masked_map_topic1 = np.ma.masked_where(map_topic1 < 0, map_topic1)
+    plt.imshow(
+        masked_map_topic1,
+        cmap=plt.cm.jet,
+        vmin=0,
+        vmax=n_topics - 1,
+        alpha=0.3,
+        interpolation="bilinear",
+    )
+    plt.axis("off")
+    plt.savefig(path_saved_img1, bbox_inches="tight", pad_inches=0, dpi=250)
+    plt.close()
+def draw_topicfm_demo(
+    data,
+    img0,
+    img1,
+    mkpts0,
+    mkpts1,
+    mcolor,
+    text,
+    show_n_topics=8,
+    topic_alpha=0.3,
+    margin=5,
+    path=None,
+    opencv_display=False,
+    opencv_title="",
+):
+    topic_map0, topic_map1 = draw_topics(data, img0, img1, show_n_topics=show_n_topics)
+    mask_tm0, mask_tm1 = np.expand_dims(topic_map0 >= 0, axis=-1), np.expand_dims(
+        topic_map1 >= 0, axis=-1
+    )
+    topic_cm0, topic_cm1 = cm.jet(topic_map0 / 99.0), cm.jet(topic_map1 / 99.0)
+    topic_cm0 = cv2.cvtColor(topic_cm0[..., :3].astype(np.float32), cv2.COLOR_RGB2BGR)
+    topic_cm1 = cv2.cvtColor(topic_cm1[..., :3].astype(np.float32), cv2.COLOR_RGB2BGR)
+    overlay0 = (mask_tm0 * topic_cm0 + (1 - mask_tm0) * img0).astype(np.float32)
+    overlay1 = (mask_tm1 * topic_cm1 + (1 - mask_tm1) * img1).astype(np.float32)
+    cv2.addWeighted(overlay0, topic_alpha, img0, 1 - topic_alpha, 0, overlay0)
+    cv2.addWeighted(overlay1, topic_alpha, img1, 1 - topic_alpha, 0, overlay1)
+    overlay0, overlay1 = (overlay0 * 255).astype(np.uint8), (overlay1 * 255).astype(
+        np.uint8
+    )
+    h0, w0 = img0.shape[:2]
+    h1, w1 = img1.shape[:2]
+    h, w = h0 * 2 + margin * 2, w0 * 2 + margin
+    out_fig = 255 * np.ones((h, w, 3), dtype=np.uint8)
+    out_fig[:h0, :w0] = overlay0
+    if h0 >= h1:
+        start = (h0 - h1) // 2
+        out_fig[start : (start + h1), (w0 + margin) : (w0 + margin + w1)] = overlay1
+    else:
+        start = (h1 - h0) // 2
+        out_fig[:h0, (w0 + margin) : (w0 + margin + w1)] = overlay1[
+            start : (start + h0)
+        ]
+    step_h = h0 + margin * 2
+    out_fig[step_h : step_h + h0, :w0] = (img0 * 255).astype(np.uint8)
+    if h0 >= h1:
+        start = step_h + (h0 - h1) // 2
+        out_fig[start : start + h1, (w0 + margin) : (w0 + margin + w1)] = (
+            img1 * 255
+        ).astype(np.uint8)
+    else:
+        start = (h1 - h0) // 2
+        out_fig[step_h : step_h + h0, (w0 + margin) : (w0 + margin + w1)] = (
+            img1[start : start + h0] * 255
+        ).astype(np.uint8)
+    # draw matching lines, this is inspried from https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/utils.py
+    mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int)
+    mcolor = (np.array(mcolor[:, [2, 1, 0]]) * 255).astype(int)
+    for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, mcolor):
+        c = c.tolist()
+        cv2.line(
+            out_fig,
+            (x0, y0 + step_h),
+            (x1 + margin + w0, y1 + step_h + (h0 - h1) // 2),
+            color=c,
+            thickness=1,
+            lineType=cv2.LINE_AA,
+        )
+        # display line end-points as circles
+        cv2.circle(out_fig, (x0, y0 + step_h), 2, c, -1, lineType=cv2.LINE_AA)
+        cv2.circle(
+            out_fig,
+            (x1 + margin + w0, y1 + step_h + (h0 - h1) // 2),
+            2,
+            c,
+            -1,
+            lineType=cv2.LINE_AA,
+        )
+        # Scale factor for consistent visualization across scales.
+    sc = min(h / 960.0, 2.0)
+    # Big text.
+    Ht = int(30 * sc)  # text height
+    txt_color_fg = (255, 255, 255)
+    txt_color_bg = (0, 0, 0)
+    for i, t in enumerate(text):
+        cv2.putText(
+            out_fig,
+            t,
+            (int(8 * sc), Ht + step_h * i),
+            cv2.FONT_HERSHEY_DUPLEX,
+            1.0 * sc,
+            txt_color_bg,
+            2,
+            cv2.LINE_AA,
+        )
+        cv2.putText(
+            out_fig,
+            t,
+            (int(8 * sc), Ht + step_h * i),
+            cv2.FONT_HERSHEY_DUPLEX,
+            1.0 * sc,
+            txt_color_fg,
+            1,
+            cv2.LINE_AA,
+        )
+    if path is not None:
+        cv2.imwrite(str(path), out_fig)
+    if opencv_display:
+        cv2.imshow(opencv_title, out_fig)
+        cv2.waitKey(1)
+    return out_fig
+def fig2im(fig):
+    fig.canvas.draw()
+    w, h = fig.canvas.get_width_height()
+    buf_ndarray = np.frombuffer(fig.canvas.tostring_rgb(), dtype="u1")
+    im = buf_ndarray.reshape(h, w, 3)
+    return im
+def draw_matches(
+    mkpts0, mkpts1, img0, img1, conf, titles=None, dpi=150, path=None, pad=0.5
+):
+    thr = 5e-4
+    thr = 0.5
+    color = error_colormap(conf, thr, alpha=0.1)
+    text = [
+        f"image name",
+        f"#Matches: {len(mkpts0)}",
+    ]
+    if path:
+        fig2im(
+            make_matching_figure(
+                img0,
+                img1,
+                mkpts0,
+                mkpts1,
+                color,
+                titles=titles,
+                text=text,
+                path=path,
+                dpi=dpi,
+                pad=pad,
+            )
+        )
+    else:
+        return fig2im(
+            make_matching_figure(
+                img0,
+                img1,
+                mkpts0,
+                mkpts1,
+                color,
+                titles=titles,
+                text=text,
+                pad=pad,
+                dpi=dpi,
+            )
+        )
+def draw_image_pairs(img0, img1, text=[], dpi=75, path=None, pad=0.5):
+    # draw image pair
+    fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi)
+    axes[0].imshow(img0)  # , cmap='gray')
+    axes[1].imshow(img1)  # , cmap='gray')
+    for i in range(2):  # clear all frames
+        axes[i].get_yaxis().set_ticks([])
+        axes[i].get_xaxis().set_ticks([])
+        for spine in axes[i].spines.values():
+            spine.set_visible(False)
+    plt.tight_layout(pad=pad)
+    # put txts
+    txt_color = "k" if img0[:100, :200].mean() > 200 else "w"
+    fig.text(
+        0.01,
+        0.99,
+        "\n".join(text),
+        transform=fig.axes[0].transAxes,
+        fontsize=15,
+        va="top",
+        ha="left",
+        color=txt_color,
+    )
+    # save or return figure
+    if path:
+        plt.savefig(str(path), bbox_inches="tight", pad_inches=0)
+        plt.close()
+    else:
+        return fig2im(fig)

extra_utils/utils.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import torch
+import numpy as np
+import cv2
+from hloc import matchers, extractors
+from hloc.utils.base_model import dynamic_load
+from hloc import match_dense, match_features, extract_features
+from .plotting import draw_matches, fig2im
+from .visualize_util import plot_images, plot_color_line_matches
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def get_model(match_conf):
+    Model = dynamic_load(matchers, match_conf["model"]["name"])
+    model = Model(match_conf["model"]).eval().to(device)
+    return model
+def get_feature_model(conf):
+    Model = dynamic_load(extractors, conf["model"]["name"])
+    model = Model(conf["model"]).eval().to(device)
+    return model
+def display_matches(pred: dict):
+    img0 = pred["image0_orig"]
+    img1 = pred["image1_orig"]
+    num_inliers = 0
+    if "keypoints0_orig" in pred.keys() and "keypoints1_orig" in pred.keys():
+        mkpts0 = pred["keypoints0_orig"]
+        mkpts1 = pred["keypoints1_orig"]
+        num_inliers = len(mkpts0)
+        if "mconf" in pred.keys():
+            mconf = pred["mconf"]
+        else:
+            mconf = np.ones(len(mkpts0))
+        fig_mkpts = draw_matches(
+            mkpts0,
+            mkpts1,
+            img0,
+            img1,
+            mconf,
+            dpi=300,
+            titles=["Image 0 - matched keypoints", "Image 1 - matched keypoints"],
+        )
+        fig = fig_mkpts
+    if "line0_orig" in pred.keys() and "line1_orig" in pred.keys():
+        # lines
+        mtlines0 = pred["line0_orig"]
+        mtlines1 = pred["line1_orig"]
+        num_inliers = len(mtlines0)
+        fig_lines = plot_images(
+            [img0.squeeze(), img1.squeeze()],
+            ["Image 0 - matched lines", "Image 1 - matched lines"],
+            dpi=300,
+        )
+        fig_lines = plot_color_line_matches([mtlines0, mtlines1], lw=2)
+        fig_lines = fig2im(fig_lines)
+        # keypoints
+        mkpts0 = pred["line_keypoints0_orig"]
+        mkpts1 = pred["line_keypoints1_orig"]
+        if mkpts0 is not None and mkpts1 is not None:
+            num_inliers = len(mkpts0)
+            if "mconf" in pred.keys():
+                mconf = pred["mconf"]
+            else:
+                mconf = np.ones(len(mkpts0))
+            fig_mkpts = draw_matches(mkpts0, mkpts1, img0, img1, mconf, dpi=300)
+            fig_lines = cv2.resize(fig_lines, (fig_mkpts.shape[1], fig_mkpts.shape[0]))
+            fig = np.concatenate([fig_mkpts, fig_lines], axis=0)
+        else:
+            fig = fig_lines
+    return fig, num_inliers
+# Matchers collections
+matcher_zoo = {
+    "gluestick": {"config": match_dense.confs["gluestick"], "dense": True},
+    "sold2": {"config": match_dense.confs["sold2"], "dense": True},
+    # 'dedode-sparse': {
+    #     'config': match_dense.confs['dedode_sparse'],
+    #     'dense': True  # dense mode, we need 2 images
+    # },
+    "loftr": {"config": match_dense.confs["loftr"], "dense": True},
+    "topicfm": {"config": match_dense.confs["topicfm"], "dense": True},
+    "aspanformer": {"config": match_dense.confs["aspanformer"], "dense": True},
+    "dedode": {
+        "config": match_features.confs["Dual-Softmax"],
+        "config_feature": extract_features.confs["dedode"],
+        "dense": False,
+    },
+    "superpoint+superglue": {
+        "config": match_features.confs["superglue"],
+        "config_feature": extract_features.confs["superpoint_max"],
+        "dense": False,
+    },
+    "superpoint+lightglue": {
+        "config": match_features.confs["superpoint-lightglue"],
+        "config_feature": extract_features.confs["superpoint_max"],
+        "dense": False,
+    },
+    "disk": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["disk"],
+        "dense": False,
+    },
+    "disk+dualsoftmax": {
+        "config": match_features.confs["Dual-Softmax"],
+        "config_feature": extract_features.confs["disk"],
+        "dense": False,
+    },
+    "superpoint+dualsoftmax": {
+        "config": match_features.confs["Dual-Softmax"],
+        "config_feature": extract_features.confs["superpoint_max"],
+        "dense": False,
+    },
+    "disk+lightglue": {
+        "config": match_features.confs["disk-lightglue"],
+        "config_feature": extract_features.confs["disk"],
+        "dense": False,
+    },
+    "superpoint+mnn": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["superpoint_max"],
+        "dense": False,
+    },
+    "sift+sgmnet": {
+        "config": match_features.confs["sgmnet"],
+        "config_feature": extract_features.confs["sift"],
+        "dense": False,
+    },
+    "sosnet": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["sosnet"],
+        "dense": False,
+    },
+    "hardnet": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["hardnet"],
+        "dense": False,
+    },
+    "d2net": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["d2net-ss"],
+        "dense": False,
+    },
+    "d2net-ms": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["d2net-ms"],
+        "dense": False,
+    },
+    "alike": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["alike"],
+        "dense": False,
+    },
+    "lanet": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["lanet"],
+        "dense": False,
+    },
+    "r2d2": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["r2d2"],
+        "dense": False,
+    },
+    "darkfeat": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["darkfeat"],
+        "dense": False,
+    },
+    "sift": {
+        "config": match_features.confs["NN-mutual"],
+        "config_feature": extract_features.confs["sift"],
+        "dense": False,
+    },
+    "roma": {"config": match_dense.confs["roma"], "dense": True},
+    "DKMv3": {"config": match_dense.confs["dkm"], "dense": True},
+}

extra_utils/visualize_util.py ADDED Viewed

	@@ -0,0 +1,642 @@

+""" Organize some frequently used visualization functions. """
+import cv2
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import copy
+import seaborn as sns
+# Plot junctions onto the image (return a separate copy)
+def plot_junctions(input_image, junctions, junc_size=3, color=None):
+    """
+    input_image: can be 0~1 float or 0~255 uint8.
+    junctions: Nx2 or 2xN np array.
+    junc_size: the size of the plotted circles.
+    """
+    # Create image copy
+    image = copy.copy(input_image)
+    # Make sure the image is converted to 255 uint8
+    if image.dtype == np.uint8:
+        pass
+    # A float type image ranging from 0~1
+    elif image.dtype in [np.float32, np.float64, np.float] and image.max() <= 2.0:
+        image = (image * 255.0).astype(np.uint8)
+    # A float type image ranging from 0.~255.
+    elif image.dtype in [np.float32, np.float64, np.float] and image.mean() > 10.0:
+        image = image.astype(np.uint8)
+    else:
+        raise ValueError(
+            "[Error] Unknown image data type. Expect 0~1 float or 0~255 uint8."
+        )
+    # Check whether the image is single channel
+    if len(image.shape) == 2 or ((len(image.shape) == 3) and (image.shape[-1] == 1)):
+        # Squeeze to H*W first
+        image = image.squeeze()
+        # Stack to channle 3
+        image = np.concatenate([image[..., None] for _ in range(3)], axis=-1)
+    # Junction dimensions should be N*2
+    if not len(junctions.shape) == 2:
+        raise ValueError("[Error] junctions should be 2-dim array.")
+    # Always convert to N*2
+    if junctions.shape[-1] != 2:
+        if junctions.shape[0] == 2:
+            junctions = junctions.T
+        else:
+            raise ValueError("[Error] At least one of the two dims should be 2.")
+    # Round and convert junctions to int (and check the boundary)
+    H, W = image.shape[:2]
+    junctions = (np.round(junctions)).astype(np.int)
+    junctions[junctions < 0] = 0
+    junctions[junctions[:, 0] >= H, 0] = H - 1  # (first dim) max bounded by H-1
+    junctions[junctions[:, 1] >= W, 1] = W - 1  # (second dim) max bounded by W-1
+    # Iterate through all the junctions
+    num_junc = junctions.shape[0]
+    if color is None:
+        color = (0, 255.0, 0)
+    for idx in range(num_junc):
+        # Fetch one junction
+        junc = junctions[idx, :]
+        cv2.circle(
+            image, tuple(np.flip(junc)), radius=junc_size, color=color, thickness=3
+        )
+    return image
+# Plot line segements given junctions and line adjecent map
+def plot_line_segments(
+    input_image,
+    junctions,
+    line_map,
+    junc_size=3,
+    color=(0, 255.0, 0),
+    line_width=1,
+    plot_survived_junc=True,
+):
+    """
+    input_image: can be 0~1 float or 0~255 uint8.
+    junctions: Nx2 or 2xN np array.
+    line_map: NxN np array
+    junc_size: the size of the plotted circles.
+    color: color of the line segments (can be string "random")
+    line_width: width of the drawn segments.
+    plot_survived_junc: whether we only plot the survived junctions.
+    """
+    # Create image copy
+    image = copy.copy(input_image)
+    # Make sure the image is converted to 255 uint8
+    if image.dtype == np.uint8:
+        pass
+    # A float type image ranging from 0~1
+    elif image.dtype in [np.float32, np.float64, np.float] and image.max() <= 2.0:
+        image = (image * 255.0).astype(np.uint8)
+    # A float type image ranging from 0.~255.
+    elif image.dtype in [np.float32, np.float64, np.float] and image.mean() > 10.0:
+        image = image.astype(np.uint8)
+    else:
+        raise ValueError(
+            "[Error] Unknown image data type. Expect 0~1 float or 0~255 uint8."
+        )
+    # Check whether the image is single channel
+    if len(image.shape) == 2 or ((len(image.shape) == 3) and (image.shape[-1] == 1)):
+        # Squeeze to H*W first
+        image = image.squeeze()
+        # Stack to channle 3
+        image = np.concatenate([image[..., None] for _ in range(3)], axis=-1)
+    # Junction dimensions should be 2
+    if not len(junctions.shape) == 2:
+        raise ValueError("[Error] junctions should be 2-dim array.")
+    # Always convert to N*2
+    if junctions.shape[-1] != 2:
+        if junctions.shape[0] == 2:
+            junctions = junctions.T
+        else:
+            raise ValueError("[Error] At least one of the two dims should be 2.")
+    # line_map dimension should be 2
+    if not len(line_map.shape) == 2:
+        raise ValueError("[Error] line_map should be 2-dim array.")
+    # Color should be "random" or a list or tuple with length 3
+    if color != "random":
+        if not (isinstance(color, tuple) or isinstance(color, list)):
+            raise ValueError("[Error] color should have type list or tuple.")
+        else:
+            if len(color) != 3:
+                raise ValueError(
+                    "[Error] color should be a list or tuple with length 3."
+                )
+    # Make a copy of the line_map
+    line_map_tmp = copy.copy(line_map)
+    # Parse line_map back to segment pairs
+    segments = np.zeros([0, 4])
+    for idx in range(junctions.shape[0]):
+        # if no connectivity, just skip it
+        if line_map_tmp[idx, :].sum() == 0:
+            continue
+        # record the line segment
+        else:
+            for idx2 in np.where(line_map_tmp[idx, :] == 1)[0]:
+                p1 = np.flip(junctions[idx, :])  # Convert to xy format
+                p2 = np.flip(junctions[idx2, :])  # Convert to xy format
+                segments = np.concatenate(
+                    (segments, np.array([p1[0], p1[1], p2[0], p2[1]])[None, ...]),
+                    axis=0,
+                )
+                # Update line_map
+                line_map_tmp[idx, idx2] = 0
+                line_map_tmp[idx2, idx] = 0
+    # Draw segment pairs
+    for idx in range(segments.shape[0]):
+        seg = np.round(segments[idx, :]).astype(np.int)
+        # Decide the color
+        if color != "random":
+            color = tuple(color)
+        else:
+            color = tuple(
+                np.random.rand(
+                    3,
+                )
+            )
+        cv2.line(
+            image, tuple(seg[:2]), tuple(seg[2:]), color=color, thickness=line_width
+        )
+    # Also draw the junctions
+    if not plot_survived_junc:
+        num_junc = junctions.shape[0]
+        for idx in range(num_junc):
+            # Fetch one junction
+            junc = junctions[idx, :]
+            cv2.circle(
+                image,
+                tuple(np.flip(junc)),
+                radius=junc_size,
+                color=(0, 255.0, 0),
+                thickness=3,
+            )
+    # Only plot the junctions which are part of a line segment
+    else:
+        for idx in range(segments.shape[0]):
+            seg = np.round(segments[idx, :]).astype(np.int)  # Already in HW format.
+            cv2.circle(
+                image,
+                tuple(seg[:2]),
+                radius=junc_size,
+                color=(0, 255.0, 0),
+                thickness=3,
+            )
+            cv2.circle(
+                image,
+                tuple(seg[2:]),
+                radius=junc_size,
+                color=(0, 255.0, 0),
+                thickness=3,
+            )
+    return image
+# Plot line segments given Nx4 or Nx2x2 line segments
+def plot_line_segments_from_segments(
+    input_image, line_segments, junc_size=3, color=(0, 255.0, 0), line_width=1
+):
+    # Create image copy
+    image = copy.copy(input_image)
+    # Make sure the image is converted to 255 uint8
+    if image.dtype == np.uint8:
+        pass
+    # A float type image ranging from 0~1
+    elif image.dtype in [np.float32, np.float64, np.float] and image.max() <= 2.0:
+        image = (image * 255.0).astype(np.uint8)
+    # A float type image ranging from 0.~255.
+    elif image.dtype in [np.float32, np.float64, np.float] and image.mean() > 10.0:
+        image = image.astype(np.uint8)
+    else:
+        raise ValueError(
+            "[Error] Unknown image data type. Expect 0~1 float or 0~255 uint8."
+        )
+    # Check whether the image is single channel
+    if len(image.shape) == 2 or ((len(image.shape) == 3) and (image.shape[-1] == 1)):
+        # Squeeze to H*W first
+        image = image.squeeze()
+        # Stack to channle 3
+        image = np.concatenate([image[..., None] for _ in range(3)], axis=-1)
+    # Check the if line_segments are in (1) Nx4, or (2) Nx2x2.
+    H, W, _ = image.shape
+    # (1) Nx4 format
+    if len(line_segments.shape) == 2 and line_segments.shape[-1] == 4:
+        # Round to int32
+        line_segments = line_segments.astype(np.int32)
+        # Clip H dimension
+        line_segments[:, 0] = np.clip(line_segments[:, 0], a_min=0, a_max=H - 1)
+        line_segments[:, 2] = np.clip(line_segments[:, 2], a_min=0, a_max=H - 1)
+        # Clip W dimension
+        line_segments[:, 1] = np.clip(line_segments[:, 1], a_min=0, a_max=W - 1)
+        line_segments[:, 3] = np.clip(line_segments[:, 3], a_min=0, a_max=W - 1)
+        # Convert to Nx2x2 format
+        line_segments = np.concatenate(
+            [
+                np.expand_dims(line_segments[:, :2], axis=1),
+                np.expand_dims(line_segments[:, 2:], axis=1),
+            ],
+            axis=1,
+        )
+    # (2) Nx2x2 format
+    elif len(line_segments.shape) == 3 and line_segments.shape[-1] == 2:
+        # Round to int32
+        line_segments = line_segments.astype(np.int32)
+        # Clip H dimension
+        line_segments[:, :, 0] = np.clip(line_segments[:, :, 0], a_min=0, a_max=H - 1)
+        line_segments[:, :, 1] = np.clip(line_segments[:, :, 1], a_min=0, a_max=W - 1)
+    else:
+        raise ValueError(
+            "[Error] line_segments should be either Nx4 or Nx2x2 in HW format."
+        )
+    # Draw segment pairs (all segments should be in HW format)
+    image = image.copy()
+    for idx in range(line_segments.shape[0]):
+        seg = np.round(line_segments[idx, :, :]).astype(np.int32)
+        # Decide the color
+        if color != "random":
+            color = tuple(color)
+        else:
+            color = tuple(
+                np.random.rand(
+                    3,
+                )
+            )
+        cv2.line(
+            image,
+            tuple(np.flip(seg[0, :])),
+            tuple(np.flip(seg[1, :])),
+            color=color,
+            thickness=line_width,
+        )
+        # Also draw the junctions
+        cv2.circle(
+            image,
+            tuple(np.flip(seg[0, :])),
+            radius=junc_size,
+            color=(0, 255.0, 0),
+            thickness=3,
+        )
+        cv2.circle(
+            image,
+            tuple(np.flip(seg[1, :])),
+            radius=junc_size,
+            color=(0, 255.0, 0),
+            thickness=3,
+        )
+    return image
+# Additional functions to visualize multiple images at the same time,
+# e.g. for line matching
+def plot_images(imgs, titles=None, cmaps="gray", dpi=100, size=5, pad=0.5):
+    """Plot a set of images horizontally.
+    Args:
+        imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W).
+        titles: a list of strings, as titles for each image.
+        cmaps: colormaps for monochrome images.
+    """
+    n = len(imgs)
+    if not isinstance(cmaps, (list, tuple)):
+        cmaps = [cmaps] * n
+    # figsize = (size*n, size*3/4) if size is not None else None
+    figsize = (size * n, size * 6 / 5) if size is not None else None
+    fig, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi)
+    if n == 1:
+        ax = [ax]
+    for i in range(n):
+        ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
+        ax[i].get_yaxis().set_ticks([])
+        ax[i].get_xaxis().set_ticks([])
+        ax[i].set_axis_off()
+        for spine in ax[i].spines.values():  # remove frame
+            spine.set_visible(False)
+        if titles:
+            ax[i].set_title(titles[i])
+    fig.tight_layout(pad=pad)
+    return fig
+def plot_keypoints(kpts, colors="lime", ps=4):
+    """Plot keypoints for existing images.
+    Args:
+        kpts: list of ndarrays of size (N, 2).
+        colors: string, or list of list of tuples (one for each keypoints).
+        ps: size of the keypoints as float.
+    """
+    if not isinstance(colors, list):
+        colors = [colors] * len(kpts)
+    axes = plt.gcf().axes
+    for a, k, c in zip(axes, kpts, colors):
+        a.scatter(k[:, 0], k[:, 1], c=c, s=ps, linewidths=0)
+def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.0):
+    """Plot matches for a pair of existing images.
+    Args:
+        kpts0, kpts1: corresponding keypoints of size (N, 2).
+        color: color of each match, string or RGB tuple. Random if not given.
+        lw: width of the lines.
+        ps: size of the end points (no endpoint if ps=0)
+        indices: indices of the images to draw the matches on.
+        a: alpha opacity of the match lines.
+    """
+    fig = plt.gcf()
+    ax = fig.axes
+    assert len(ax) > max(indices)
+    ax0, ax1 = ax[indices[0]], ax[indices[1]]
+    fig.canvas.draw()
+    assert len(kpts0) == len(kpts1)
+    if color is None:
+        color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist()
+    elif len(color) > 0 and not isinstance(color[0], (tuple, list)):
+        color = [color] * len(kpts0)
+    if lw > 0:
+        # transform the points into the figure coordinate system
+        transFigure = fig.transFigure.inverted()
+        fkpts0 = transFigure.transform(ax0.transData.transform(kpts0))
+        fkpts1 = transFigure.transform(ax1.transData.transform(kpts1))
+        fig.lines += [
+            matplotlib.lines.Line2D(
+                (fkpts0[i, 0], fkpts1[i, 0]),
+                (fkpts0[i, 1], fkpts1[i, 1]),
+                zorder=1,
+                transform=fig.transFigure,
+                c=color[i],
+                linewidth=lw,
+                alpha=a,
+            )
+            for i in range(len(kpts0))
+        ]
+    # freeze the axes to prevent the transform to change
+    ax0.autoscale(enable=False)
+    ax1.autoscale(enable=False)
+    if ps > 0:
+        ax0.scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps, zorder=2)
+        ax1.scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps, zorder=2)
+def plot_lines(
+    lines, line_colors="orange", point_colors="cyan", ps=4, lw=2, indices=(0, 1)
+):
+    """Plot lines and endpoints for existing images.
+    Args:
+        lines: list of ndarrays of size (N, 2, 2).
+        colors: string, or list of list of tuples (one for each keypoints).
+        ps: size of the keypoints as float pixels.
+        lw: line width as float pixels.
+        indices: indices of the images to draw the matches on.
+    """
+    if not isinstance(line_colors, list):
+        line_colors = [line_colors] * len(lines)
+    if not isinstance(point_colors, list):
+        point_colors = [point_colors] * len(lines)
+    fig = plt.gcf()
+    ax = fig.axes
+    assert len(ax) > max(indices)
+    axes = [ax[i] for i in indices]
+    fig.canvas.draw()
+    # Plot the lines and junctions
+    for a, l, lc, pc in zip(axes, lines, line_colors, point_colors):
+        for i in range(len(l)):
+            line = matplotlib.lines.Line2D(
+                (l[i, 0, 0], l[i, 1, 0]),
+                (l[i, 0, 1], l[i, 1, 1]),
+                zorder=1,
+                c=lc,
+                linewidth=lw,
+            )
+            a.add_line(line)
+        pts = l.reshape(-1, 2)
+        a.scatter(pts[:, 0], pts[:, 1], c=pc, s=ps, linewidths=0, zorder=2)
+    return fig
+def plot_line_matches(kpts0, kpts1, color=None, lw=1.5, indices=(0, 1), a=1.0):
+    """Plot matches for a pair of existing images, parametrized by their middle point.
+    Args:
+        kpts0, kpts1: corresponding middle points of the lines of size (N, 2).
+        color: color of each match, string or RGB tuple. Random if not given.
+        lw: width of the lines.
+        indices: indices of the images to draw the matches on.
+        a: alpha opacity of the match lines.
+    """
+    fig = plt.gcf()
+    ax = fig.axes
+    assert len(ax) > max(indices)
+    ax0, ax1 = ax[indices[0]], ax[indices[1]]
+    fig.canvas.draw()
+    assert len(kpts0) == len(kpts1)
+    if color is None:
+        color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist()
+    elif len(color) > 0 and not isinstance(color[0], (tuple, list)):
+        color = [color] * len(kpts0)
+    if lw > 0:
+        # transform the points into the figure coordinate system
+        transFigure = fig.transFigure.inverted()
+        fkpts0 = transFigure.transform(ax0.transData.transform(kpts0))
+        fkpts1 = transFigure.transform(ax1.transData.transform(kpts1))
+        fig.lines += [
+            matplotlib.lines.Line2D(
+                (fkpts0[i, 0], fkpts1[i, 0]),
+                (fkpts0[i, 1], fkpts1[i, 1]),
+                zorder=1,
+                transform=fig.transFigure,
+                c=color[i],
+                linewidth=lw,
+                alpha=a,
+            )
+            for i in range(len(kpts0))
+        ]
+    # freeze the axes to prevent the transform to change
+    ax0.autoscale(enable=False)
+    ax1.autoscale(enable=False)
+def plot_color_line_matches(lines, correct_matches=None, lw=2, indices=(0, 1)):
+    """Plot line matches for existing images with multiple colors.
+    Args:
+        lines: list of ndarrays of size (N, 2, 2).
+        correct_matches: bool array of size (N,) indicating correct matches.
+        lw: line width as float pixels.
+        indices: indices of the images to draw the matches on.
+    """
+    n_lines = len(lines[0])
+    colors = sns.color_palette("husl", n_colors=n_lines)
+    np.random.shuffle(colors)
+    alphas = np.ones(n_lines)
+    # If correct_matches is not None, display wrong matches with a low alpha
+    if correct_matches is not None:
+        alphas[~np.array(correct_matches)] = 0.2
+    fig = plt.gcf()
+    ax = fig.axes
+    assert len(ax) > max(indices)
+    axes = [ax[i] for i in indices]
+    fig.canvas.draw()
+    # Plot the lines
+    for a, l in zip(axes, lines):
+        # Transform the points into the figure coordinate system
+        transFigure = fig.transFigure.inverted()
+        endpoint0 = transFigure.transform(a.transData.transform(l[:, 0]))
+        endpoint1 = transFigure.transform(a.transData.transform(l[:, 1]))
+        fig.lines += [
+            matplotlib.lines.Line2D(
+                (endpoint0[i, 0], endpoint1[i, 0]),
+                (endpoint0[i, 1], endpoint1[i, 1]),
+                zorder=1,
+                transform=fig.transFigure,
+                c=colors[i],
+                alpha=alphas[i],
+                linewidth=lw,
+            )
+            for i in range(n_lines)
+        ]
+    return fig
+def plot_color_lines(lines, correct_matches, wrong_matches, lw=2, indices=(0, 1)):
+    """Plot line matches for existing images with multiple colors:
+    green for correct matches, red for wrong ones, and blue for the rest.
+    Args:
+        lines: list of ndarrays of size (N, 2, 2).
+        correct_matches: list of bool arrays of size N with correct matches.
+        wrong_matches: list of bool arrays of size (N,) with correct matches.
+        lw: line width as float pixels.
+        indices: indices of the images to draw the matches on.
+    """
+    # palette = sns.color_palette()
+    palette = sns.color_palette("hls", 8)
+    blue = palette[5]  # palette[0]
+    red = palette[0]  # palette[3]
+    green = palette[2]  # palette[2]
+    colors = [np.array([blue] * len(l)) for l in lines]
+    for i, c in enumerate(colors):
+        c[np.array(correct_matches[i])] = green
+        c[np.array(wrong_matches[i])] = red
+    fig = plt.gcf()
+    ax = fig.axes
+    assert len(ax) > max(indices)
+    axes = [ax[i] for i in indices]
+    fig.canvas.draw()
+    # Plot the lines
+    for a, l, c in zip(axes, lines, colors):
+        # Transform the points into the figure coordinate system
+        transFigure = fig.transFigure.inverted()
+        endpoint0 = transFigure.transform(a.transData.transform(l[:, 0]))
+        endpoint1 = transFigure.transform(a.transData.transform(l[:, 1]))
+        fig.lines += [
+            matplotlib.lines.Line2D(
+                (endpoint0[i, 0], endpoint1[i, 0]),
+                (endpoint0[i, 1], endpoint1[i, 1]),
+                zorder=1,
+                transform=fig.transFigure,
+                c=c[i],
+                linewidth=lw,
+            )
+            for i in range(len(l))
+        ]
+def plot_subsegment_matches(lines, subsegments, lw=2, indices=(0, 1)):
+    """Plot line matches for existing images with multiple colors and
+        highlight the actually matched subsegments.
+    Args:
+        lines: list of ndarrays of size (N, 2, 2).
+        subsegments: list of ndarrays of size (N, 2, 2).
+        lw: line width as float pixels.
+        indices: indices of the images to draw the matches on.
+    """
+    n_lines = len(lines[0])
+    colors = sns.cubehelix_palette(
+        start=2, rot=-0.2, dark=0.3, light=0.7, gamma=1.3, hue=1, n_colors=n_lines
+    )
+    fig = plt.gcf()
+    ax = fig.axes
+    assert len(ax) > max(indices)
+    axes = [ax[i] for i in indices]
+    fig.canvas.draw()
+    # Plot the lines
+    for a, l, ss in zip(axes, lines, subsegments):
+        # Transform the points into the figure coordinate system
+        transFigure = fig.transFigure.inverted()
+        # Draw full line
+        endpoint0 = transFigure.transform(a.transData.transform(l[:, 0]))
+        endpoint1 = transFigure.transform(a.transData.transform(l[:, 1]))
+        fig.lines += [
+            matplotlib.lines.Line2D(
+                (endpoint0[i, 0], endpoint1[i, 0]),
+                (endpoint0[i, 1], endpoint1[i, 1]),
+                zorder=1,
+                transform=fig.transFigure,
+                c="red",
+                alpha=0.7,
+                linewidth=lw,
+            )
+            for i in range(n_lines)
+        ]
+        # Draw matched subsegment
+        endpoint0 = transFigure.transform(a.transData.transform(ss[:, 0]))
+        endpoint1 = transFigure.transform(a.transData.transform(ss[:, 1]))
+        fig.lines += [
+            matplotlib.lines.Line2D(
+                (endpoint0[i, 0], endpoint1[i, 0]),
+                (endpoint0[i, 1], endpoint1[i, 1]),
+                zorder=1,
+                transform=fig.transFigure,
+                c=colors[i],
+                alpha=1,
+                linewidth=lw,
+            )
+            for i in range(n_lines)
+        ]

hloc/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import logging
+from packaging import version
+__version__ = "1.3"
+formatter = logging.Formatter(
+    fmt="[%(asctime)s %(name)s %(levelname)s] %(message)s", datefmt="%Y/%m/%d %H:%M:%S"
+)
+handler = logging.StreamHandler()
+handler.setFormatter(formatter)
+handler.setLevel(logging.INFO)
+logger = logging.getLogger("hloc")
+logger.setLevel(logging.INFO)
+logger.addHandler(handler)
+logger.propagate = False
+try:
+    import pycolmap
+except ImportError:
+    logger.warning("pycolmap is not installed, some features may not work.")
+else:
+    minimal_version = version.parse("0.3.0")
+    found_version = version.parse(getattr(pycolmap, "__version__"))
+    if found_version < minimal_version:
+        logger.warning(
+            "hloc now requires pycolmap>=%s but found pycolmap==%s, "
+            "please upgrade with `pip install --upgrade pycolmap`",
+            minimal_version,
+            found_version,
+        )

hloc/extract_features.py ADDED Viewed

	@@ -0,0 +1,516 @@

+import argparse
+import torch
+from pathlib import Path
+from typing import Dict, List, Union, Optional
+import h5py
+from types import SimpleNamespace
+import cv2
+import numpy as np
+from tqdm import tqdm
+import pprint
+import collections.abc as collections
+import PIL.Image
+import torchvision.transforms.functional as F
+from . import extractors, logger
+from .utils.base_model import dynamic_load
+from .utils.parsers import parse_image_lists
+from .utils.io import read_image, list_h5_names
+"""
+A set of standard configurations that can be directly selected from the command
+line using their name. Each is a dictionary with the following entries:
+    - output: the name of the feature file that will be generated.
+    - model: the model configuration, as passed to a feature extractor.
+    - preprocessing: how to preprocess the images read from disk.
+"""
+confs = {
+    "superpoint_aachen": {
+        "output": "feats-superpoint-n4096-r1024",
+        "model": {
+            "name": "superpoint",
+            "nms_radius": 3,
+            "max_keypoints": 4096,
+            "keypoint_threshold": 0.005,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "force_resize": True,
+            "resize_max": 1600,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    # Resize images to 1600px even if they are originally smaller.
+    # Improves the keypoint localization if the images are of good quality.
+    "superpoint_max": {
+        "output": "feats-superpoint-n4096-rmax1600",
+        "model": {
+            "name": "superpoint",
+            "nms_radius": 3,
+            "max_keypoints": 4096,
+            "keypoint_threshold": 0.005,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "force_resize": True,
+            "resize_max": 1600,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    "superpoint_inloc": {
+        "output": "feats-superpoint-n4096-r1600",
+        "model": {
+            "name": "superpoint",
+            "nms_radius": 4,
+            "max_keypoints": 4096,
+            "keypoint_threshold": 0.005,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1600,
+        },
+    },
+    "r2d2": {
+        "output": "feats-r2d2-n5000-r1024",
+        "model": {
+            "name": "r2d2",
+            "max_keypoints": 5000,
+            "reliability_threshold": 0.7,
+            "repetability_threshold": 0.7,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "force_resize": True,
+            "resize_max": 1600,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    "d2net-ss": {
+        "output": "feats-d2net-ss",
+        "model": {
+            "name": "d2net",
+            "multiscale": False,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "resize_max": 1600,
+        },
+    },
+    "d2net-ms": {
+        "output": "feats-d2net-ms",
+        "model": {
+            "name": "d2net",
+            "multiscale": True,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "resize_max": 1600,
+        },
+    },
+    "rootsift": {
+        "output": "feats-sift",
+        "model": {
+            "name": "dog",
+            "max_keypoints": 5000,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "force_resize": True,
+            "resize_max": 1600,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    "sift": {
+        "output": "feats-sift",
+        "model": {
+            "name": "dog",
+            "descriptor": "sift",
+            "max_keypoints": 5000,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "force_resize": True,
+            "resize_max": 1600,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    "sosnet": {
+        "output": "feats-sosnet",
+        "model": {"name": "dog", "descriptor": "sosnet"},
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1600,
+            "force_resize": True,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    "hardnet": {
+        "output": "feats-hardnet",
+        "model": {"name": "dog", "descriptor": "hardnet"},
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1600,
+            "force_resize": True,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    "disk": {
+        "output": "feats-disk",
+        "model": {
+            "name": "disk",
+            "max_keypoints": 5000,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "resize_max": 1600,
+        },
+    },
+    "alike": {
+        "output": "feats-alike",
+        "model": {
+            "name": "alike",
+            "max_keypoints": 5000,
+            "use_relu": True,
+            "multiscale": False,
+            "detection_threshold": 0.5,
+            "top_k": -1,
+            "sub_pixel": False,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "resize_max": 1600,
+        },
+    },
+    "lanet": {
+        "output": "feats-lanet",
+        "model": {
+            "name": "lanet",
+            "keypoint_threshold": 0.1,
+            "max_keypoints": 5000,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "resize_max": 1600,
+        },
+    },
+    "darkfeat": {
+        "output": "feats-darkfeat-n5000-r1024",
+        "model": {
+            "name": "darkfeat",
+            "max_keypoints": 5000,
+            "reliability_threshold": 0.7,
+            "repetability_threshold": 0.7,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "force_resize": True,
+            "resize_max": 1600,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    "dedode": {
+        "output": "feats-dedode-n5000-r1024",
+        "model": {
+            "name": "dedode",
+            "max_keypoints": 5000,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "force_resize": True,
+            "resize_max": 1024,
+            "width": 768,
+            "height": 768,
+            "dfactor": 8,
+        },
+    },
+    "example": {
+        "output": "feats-example-n5000-r1024",
+        "model": {
+            "name": "example",
+            "keypoint_threshold": 0.1,
+            "max_keypoints": 2000,
+            "model_name": "model.pth",
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "force_resize": True,
+            "resize_max": 1024,
+            "width": 768,
+            "height": 768,
+            "dfactor": 8,
+        },
+    },
+    # Global descriptors
+    "dir": {
+        "output": "global-feats-dir",
+        "model": {"name": "dir"},
+        "preprocessing": {"resize_max": 1024},
+    },
+    "netvlad": {
+        "output": "global-feats-netvlad",
+        "model": {"name": "netvlad"},
+        "preprocessing": {"resize_max": 1024},
+    },
+    "openibl": {
+        "output": "global-feats-openibl",
+        "model": {"name": "openibl"},
+        "preprocessing": {"resize_max": 1024},
+    },
+    "cosplace": {
+        "output": "global-feats-cosplace",
+        "model": {"name": "cosplace"},
+        "preprocessing": {"resize_max": 1024},
+    },
+}
+def resize_image(image, size, interp):
+    if interp.startswith("cv2_"):
+        interp = getattr(cv2, "INTER_" + interp[len("cv2_") :].upper())
+        h, w = image.shape[:2]
+        if interp == cv2.INTER_AREA and (w < size[0] or h < size[1]):
+            interp = cv2.INTER_LINEAR
+        resized = cv2.resize(image, size, interpolation=interp)
+    elif interp.startswith("pil_"):
+        interp = getattr(PIL.Image, interp[len("pil_") :].upper())
+        resized = PIL.Image.fromarray(image.astype(np.uint8))
+        resized = resized.resize(size, resample=interp)
+        resized = np.asarray(resized, dtype=image.dtype)
+    else:
+        raise ValueError(f"Unknown interpolation {interp}.")
+    return resized
+class ImageDataset(torch.utils.data.Dataset):
+    default_conf = {
+        "globs": ["*.jpg", "*.png", "*.jpeg", "*.JPG", "*.PNG"],
+        "grayscale": False,
+        "resize_max": None,
+        "force_resize": False,
+        "interpolation": "cv2_area",  # pil_linear is more accurate but slower
+    }
+    def __init__(self, root, conf, paths=None):
+        self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf})
+        self.root = root
+        if paths is None:
+            paths = []
+            for g in conf.globs:
+                paths += list(Path(root).glob("**/" + g))
+            if len(paths) == 0:
+                raise ValueError(f"Could not find any image in root: {root}.")
+            paths = sorted(list(set(paths)))
+            self.names = [i.relative_to(root).as_posix() for i in paths]
+            logger.info(f"Found {len(self.names)} images in root {root}.")
+        else:
+            if isinstance(paths, (Path, str)):
+                self.names = parse_image_lists(paths)
+            elif isinstance(paths, collections.Iterable):
+                self.names = [p.as_posix() if isinstance(p, Path) else p for p in paths]
+            else:
+                raise ValueError(f"Unknown format for path argument {paths}.")
+            for name in self.names:
+                if not (root / name).exists():
+                    raise ValueError(f"Image {name} does not exists in root: {root}.")
+    def __getitem__(self, idx):
+        name = self.names[idx]
+        image = read_image(self.root / name, self.conf.grayscale)
+        image = image.astype(np.float32)
+        size = image.shape[:2][::-1]
+        if self.conf.resize_max and (
+            self.conf.force_resize or max(size) > self.conf.resize_max
+        ):
+            scale = self.conf.resize_max / max(size)
+            size_new = tuple(int(round(x * scale)) for x in size)
+            image = resize_image(image, size_new, self.conf.interpolation)
+        if self.conf.grayscale:
+            image = image[None]
+        else:
+            image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
+        image = image / 255.0
+        data = {
+            "image": image,
+            "original_size": np.array(size),
+        }
+        return data
+    def __len__(self):
+        return len(self.names)
+def extract(model, image_0, conf):
+    default_conf = {
+        "grayscale": True,
+        "resize_max": 1024,
+        "dfactor": 8,
+        "cache_images": False,
+        "force_resize": False,
+        "width": 320,
+        "height": 240,
+        "interpolation": "cv2_area",
+    }
+    conf = SimpleNamespace(**{**default_conf, **conf})
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    def preprocess(image: np.ndarray, conf: SimpleNamespace):
+        image = image.astype(np.float32, copy=False)
+        size = image.shape[:2][::-1]
+        scale = np.array([1.0, 1.0])
+        if conf.resize_max:
+            scale = conf.resize_max / max(size)
+            if scale < 1.0:
+                size_new = tuple(int(round(x * scale)) for x in size)
+                image = resize_image(image, size_new, "cv2_area")
+                scale = np.array(size) / np.array(size_new)
+        if conf.force_resize:
+            image = resize_image(image, (conf.width, conf.height), "cv2_area")
+            size_new = (conf.width, conf.height)
+            scale = np.array(size) / np.array(size_new)
+        if conf.grayscale:
+            assert image.ndim == 2, image.shape
+            image = image[None]
+        else:
+            image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
+        image = torch.from_numpy(image / 255.0).float()
+        # assure that the size is divisible by dfactor
+        size_new = tuple(
+            map(lambda x: int(x // conf.dfactor * conf.dfactor), image.shape[-2:])
+        )
+        image = F.resize(image, size=size_new, antialias=True)
+        input_ = image.to(device, non_blocking=True)[None]
+        data = {
+            "image": input_,
+            "image_orig": image_0,
+            "original_size": np.array(size),
+            "size": np.array(image.shape[1:][::-1]),
+        }
+        return data
+    # convert to grayscale if needed
+    if len(image_0.shape) == 3 and conf.grayscale:
+        image0 = cv2.cvtColor(image_0, cv2.COLOR_RGB2GRAY)
+    else:
+        image0 = image_0
+    # comment following lines, image is always RGB mode
+    # if not conf.grayscale and len(image_0.shape) == 3:
+    #     image0 = image_0[:, :, ::-1]  # BGR to RGB
+    data = preprocess(image0, conf)
+    pred = model({"image": data["image"]})
+    pred["image_size"] = original_size = data["original_size"]
+    pred = {**pred, **data}
+    return pred
+@torch.no_grad()
+def main(
+    conf: Dict,
+    image_dir: Path,
+    export_dir: Optional[Path] = None,
+    as_half: bool = True,
+    image_list: Optional[Union[Path, List[str]]] = None,
+    feature_path: Optional[Path] = None,
+    overwrite: bool = False,
+) -> Path:
+    logger.info(
+        "Extracting local features with configuration:" f"\n{pprint.pformat(conf)}"
+    )
+    dataset = ImageDataset(image_dir, conf["preprocessing"], image_list)
+    if feature_path is None:
+        feature_path = Path(export_dir, conf["output"] + ".h5")
+    feature_path.parent.mkdir(exist_ok=True, parents=True)
+    skip_names = set(
+        list_h5_names(feature_path) if feature_path.exists() and not overwrite else ()
+    )
+    dataset.names = [n for n in dataset.names if n not in skip_names]
+    if len(dataset.names) == 0:
+        logger.info("Skipping the extraction.")
+        return feature_path
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    Model = dynamic_load(extractors, conf["model"]["name"])
+    model = Model(conf["model"]).eval().to(device)
+    loader = torch.utils.data.DataLoader(
+        dataset, num_workers=1, shuffle=False, pin_memory=True
+    )
+    for idx, data in enumerate(tqdm(loader)):
+        name = dataset.names[idx]
+        pred = model({"image": data["image"].to(device, non_blocking=True)})
+        pred = {k: v[0].cpu().numpy() for k, v in pred.items()}
+        pred["image_size"] = original_size = data["original_size"][0].numpy()
+        if "keypoints" in pred:
+            size = np.array(data["image"].shape[-2:][::-1])
+            scales = (original_size / size).astype(np.float32)
+            pred["keypoints"] = (pred["keypoints"] + 0.5) * scales[None] - 0.5
+            if "scales" in pred:
+                pred["scales"] *= scales.mean()
+            # add keypoint uncertainties scaled to the original resolution
+            uncertainty = getattr(model, "detection_noise", 1) * scales.mean()
+        if as_half:
+            for k in pred:
+                dt = pred[k].dtype
+                if (dt == np.float32) and (dt != np.float16):
+                    pred[k] = pred[k].astype(np.float16)
+        with h5py.File(str(feature_path), "a", libver="latest") as fd:
+            try:
+                if name in fd:
+                    del fd[name]
+                grp = fd.create_group(name)
+                for k, v in pred.items():
+                    grp.create_dataset(k, data=v)
+                if "keypoints" in pred:
+                    grp["keypoints"].attrs["uncertainty"] = uncertainty
+            except OSError as error:
+                if "No space left on device" in error.args[0]:
+                    logger.error(
+                        "Out of disk space: storing features on disk can take "
+                        "significant space, did you enable the as_half flag?"
+                    )
+                    del grp, fd[name]
+                raise error
+        del pred
+    logger.info("Finished exporting features.")
+    return feature_path
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--image_dir", type=Path, required=True)
+    parser.add_argument("--export_dir", type=Path, required=True)
+    parser.add_argument(
+        "--conf", type=str, default="superpoint_aachen", choices=list(confs.keys())
+    )
+    parser.add_argument("--as_half", action="store_true")
+    parser.add_argument("--image_list", type=Path)
+    parser.add_argument("--feature_path", type=Path)
+    args = parser.parse_args()
+    main(confs[args.conf], args.image_dir, args.export_dir, args.as_half)

hloc/extractors/__init__.py ADDED Viewed

File without changes

hloc/extractors/alike.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import sys
+from pathlib import Path
+import subprocess
+import torch
+from ..utils.base_model import BaseModel
+alike_path = Path(__file__).parent / "../../third_party/ALIKE"
+sys.path.append(str(alike_path))
+from alike import ALike as Alike_
+from alike import configs
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class Alike(BaseModel):
+    default_conf = {
+        "model_name": "alike-t",  # 'alike-t', 'alike-s', 'alike-n', 'alike-l'
+        "use_relu": True,
+        "multiscale": False,
+        "max_keypoints": 1000,
+        "detection_threshold": 0.5,
+        "top_k": -1,
+        "sub_pixel": False,
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        self.net = Alike_(
+            **configs[conf["model_name"]],
+            device=device,
+            top_k=conf["top_k"],
+            scores_th=conf["detection_threshold"],
+            n_limit=conf["max_keypoints"],
+        )
+    def _forward(self, data):
+        image = data["image"]
+        image = image.permute(0, 2, 3, 1).squeeze()
+        image = image.cpu().numpy() * 255.0
+        pred = self.net(image, sub_pixel=self.conf["sub_pixel"])
+        keypoints = pred["keypoints"]
+        descriptors = pred["descriptors"]
+        scores = pred["scores"]
+        return {
+            "keypoints": torch.from_numpy(keypoints)[None],
+            "scores": torch.from_numpy(scores)[None],
+            "descriptors": torch.from_numpy(descriptors.T)[None],
+        }

hloc/extractors/cosplace.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""
+Code for loading models trained with CosPlace as a global features extractor
+for geolocalization through image retrieval.
+Multiple models are available with different backbones. Below is a summary of
+models available (backbone : list of available output descriptors
+dimensionality). For example you can use a model based on a ResNet50 with
+descriptors dimensionality 1024.
+    ResNet18:  [32, 64, 128, 256, 512]
+    ResNet50:  [32, 64, 128, 256, 512, 1024, 2048]
+    ResNet101: [32, 64, 128, 256, 512, 1024, 2048]
+    ResNet152: [32, 64, 128, 256, 512, 1024, 2048]
+    VGG16:     [    64, 128, 256, 512]
+CosPlace paper: https://arxiv.org/abs/2204.02287
+"""
+import torch
+import torchvision.transforms as tvf
+from ..utils.base_model import BaseModel
+class CosPlace(BaseModel):
+    default_conf = {"backbone": "ResNet50", "fc_output_dim": 2048}
+    required_inputs = ["image"]
+    def _init(self, conf):
+        self.net = torch.hub.load(
+            "gmberton/CosPlace",
+            "get_trained_model",
+            backbone=conf["backbone"],
+            fc_output_dim=conf["fc_output_dim"],
+        ).eval()
+        mean = [0.485, 0.456, 0.406]
+        std = [0.229, 0.224, 0.225]
+        self.norm_rgb = tvf.Normalize(mean=mean, std=std)
+    def _forward(self, data):
+        image = self.norm_rgb(data["image"])
+        desc = self.net(image)
+        return {
+            "global_descriptor": desc,
+        }

hloc/extractors/d2net.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import sys
+from pathlib import Path
+import subprocess
+import torch
+from ..utils.base_model import BaseModel
+d2net_path = Path(__file__).parent / "../../third_party/d2net"
+sys.path.append(str(d2net_path))
+from lib.model_test import D2Net as _D2Net
+from lib.pyramid import process_multiscale
+class D2Net(BaseModel):
+    default_conf = {
+        "model_name": "d2_tf.pth",
+        "checkpoint_dir": d2net_path / "models",
+        "use_relu": True,
+        "multiscale": False,
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        model_file = conf["checkpoint_dir"] / conf["model_name"]
+        if not model_file.exists():
+            model_file.parent.mkdir(exist_ok=True)
+            cmd = [
+                "wget",
+                "https://dsmn.ml/files/d2-net/" + conf["model_name"],
+                "-O",
+                str(model_file),
+            ]
+            subprocess.run(cmd, check=True)
+        self.net = _D2Net(
+            model_file=model_file, use_relu=conf["use_relu"], use_cuda=False
+        )
+    def _forward(self, data):
+        image = data["image"]
+        image = image.flip(1)  # RGB -> BGR
+        norm = image.new_tensor([103.939, 116.779, 123.68])
+        image = image * 255 - norm.view(1, 3, 1, 1)  # caffe normalization
+        if self.conf["multiscale"]:
+            keypoints, scores, descriptors = process_multiscale(image, self.net)
+        else:
+            keypoints, scores, descriptors = process_multiscale(
+                image, self.net, scales=[1]
+            )
+        keypoints = keypoints[:, [1, 0]]  # (x, y) and remove the scale
+        return {
+            "keypoints": torch.from_numpy(keypoints)[None],
+            "scores": torch.from_numpy(scores)[None],
+            "descriptors": torch.from_numpy(descriptors.T)[None],
+        }

hloc/extractors/darkfeat.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import sys
+from pathlib import Path
+import subprocess
+import logging
+from ..utils.base_model import BaseModel
+logger = logging.getLogger(__name__)
+darkfeat_path = Path(__file__).parent / "../../third_party/DarkFeat"
+sys.path.append(str(darkfeat_path))
+from darkfeat import DarkFeat as DarkFeat_
+class DarkFeat(BaseModel):
+    default_conf = {
+        "model_name": "DarkFeat.pth",
+        "max_keypoints": 1000,
+        "detection_threshold": 0.5,
+        "sub_pixel": False,
+    }
+    weight_urls = {
+        "DarkFeat.pth": "https://drive.google.com/uc?id=1Thl6m8NcmQ7zSAF-1_xaFs3F4H8UU6HX&confirm=t",
+    }
+    proxy = "http://localhost:1080"
+    required_inputs = ["image"]
+    def _init(self, conf):
+        model_path = darkfeat_path / "checkpoints" / conf["model_name"]
+        link = self.weight_urls[conf["model_name"]]
+        if not model_path.exists():
+            model_path.parent.mkdir(exist_ok=True)
+            cmd_wo_proxy = ["gdown", link, "-O", str(model_path)]
+            cmd = ["gdown", link, "-O", str(model_path), "--proxy", self.proxy]
+            logger.info(f"Downloading the DarkFeat model with `{cmd_wo_proxy}`.")
+            try:
+                subprocess.run(cmd_wo_proxy, check=True)
+            except subprocess.CalledProcessError as e:
+                logger.info(f"Downloading the DarkFeat model with `{cmd}`.")
+                try:
+                    subprocess.run(cmd, check=True)
+                except subprocess.CalledProcessError as e:
+                    logger.error(f"Failed to download the DarkFeat model.")
+                    raise e
+        self.net = DarkFeat_(model_path)
+    def _forward(self, data):
+        pred = self.net({"image": data["image"]})
+        keypoints = pred["keypoints"]
+        descriptors = pred["descriptors"]
+        scores = pred["scores"]
+        return {
+            "keypoints": keypoints[None],  # 1 x N x 2
+            "scores": scores[None],  # 1 x N
+            "descriptors": descriptors[None],  # 1 x 128 x N
+        }

hloc/extractors/dedode.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import sys
+from pathlib import Path
+import subprocess
+import logging
+import torch
+from PIL import Image
+from ..utils.base_model import BaseModel
+import torchvision.transforms as transforms
+dedode_path = Path(__file__).parent / "../../third_party/DeDoDe"
+sys.path.append(str(dedode_path))
+from DeDoDe import dedode_detector_L, dedode_descriptor_B
+from DeDoDe.utils import to_pixel_coords
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+logger = logging.getLogger(__name__)
+class DeDoDe(BaseModel):
+    default_conf = {
+        "name": "dedode",
+        "model_detector_name": "dedode_detector_L.pth",
+        "model_descriptor_name": "dedode_descriptor_B.pth",
+        "max_keypoints": 2000,
+        "match_threshold": 0.2,
+        "dense": False,  # Now fixed to be false
+    }
+    required_inputs = [
+        "image",
+    ]
+    weight_urls = {
+        "dedode_detector_L.pth": "https://github.com/Parskatt/DeDoDe/releases/download/dedode_pretrained_models/dedode_detector_L.pth",
+        "dedode_descriptor_B.pth": "https://github.com/Parskatt/DeDoDe/releases/download/dedode_pretrained_models/dedode_descriptor_B.pth",
+    }
+    # Initialize the line matcher
+    def _init(self, conf):
+        model_detector_path = dedode_path / "pretrained" / conf["model_detector_name"]
+        model_descriptor_path = (
+            dedode_path / "pretrained" / conf["model_descriptor_name"]
+        )
+        self.normalizer = transforms.Normalize(
+            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+        )
+        # Download the model.
+        if not model_detector_path.exists():
+            model_detector_path.parent.mkdir(exist_ok=True)
+            link = self.weight_urls[conf["model_detector_name"]]
+            cmd = ["wget", link, "-O", str(model_detector_path)]
+            logger.info(f"Downloading the DeDoDe detector model with `{cmd}`.")
+            subprocess.run(cmd, check=True)
+        if not model_descriptor_path.exists():
+            model_descriptor_path.parent.mkdir(exist_ok=True)
+            link = self.weight_urls[conf["model_descriptor_name"]]
+            cmd = ["wget", link, "-O", str(model_descriptor_path)]
+            logger.info(f"Downloading the DeDoDe descriptor model with `{cmd}`.")
+            subprocess.run(cmd, check=True)
+        logger.info(f"Loading DeDoDe model...")
+        # load the model
+        weights_detector = torch.load(model_detector_path, map_location="cpu")
+        weights_descriptor = torch.load(model_descriptor_path, map_location="cpu")
+        self.detector = dedode_detector_L(weights=weights_detector)
+        self.descriptor = dedode_descriptor_B(weights=weights_descriptor)
+        logger.info(f"Load DeDoDe model done.")
+    def _forward(self, data):
+        """
+        data: dict, keys: {'image0','image1'}
+        image shape: N x C x H x W
+        color mode: RGB
+        """
+        img0 = self.normalizer(data["image"].squeeze()).float()[None]
+        H_A, W_A = img0.shape[2:]
+        # step 1: detect keypoints
+        detections_A = None
+        batch_A = {"image": img0}
+        if self.conf["dense"]:
+            detections_A = self.detector.detect_dense(batch_A)
+        else:
+            detections_A = self.detector.detect(
+                batch_A, num_keypoints=self.conf["max_keypoints"]
+            )
+        keypoints_A, P_A = detections_A["keypoints"], detections_A["confidence"]
+        # step 2: describe keypoints
+        # dim: 1 x N x 256
+        description_A = self.descriptor.describe_keypoints(batch_A, keypoints_A)[
+            "descriptions"
+        ]
+        keypoints_A = to_pixel_coords(keypoints_A, H_A, W_A)
+        return {
+            "keypoints": keypoints_A,  # 1 x N x 2
+            "descriptors": description_A.permute(0, 2, 1),  # 1 x 256 x N
+            "scores": P_A,  # 1 x N
+        }

hloc/extractors/dir.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import sys
+from pathlib import Path
+import torch
+from zipfile import ZipFile
+import os
+import sklearn
+import gdown
+from ..utils.base_model import BaseModel
+sys.path.append(str(Path(__file__).parent / "../../third_party/deep-image-retrieval"))
+os.environ["DB_ROOT"] = ""  # required by dirtorch
+from dirtorch.utils import common  # noqa: E402
+from dirtorch.extract_features import load_model  # noqa: E402
+# The DIR model checkpoints (pickle files) include sklearn.decomposition.pca,
+# which has been deprecated in sklearn v0.24
+# and must be explicitly imported with `from sklearn.decomposition import PCA`.
+# This is a hacky workaround to maintain forward compatibility.
+sys.modules["sklearn.decomposition.pca"] = sklearn.decomposition._pca
+class DIR(BaseModel):
+    default_conf = {
+        "model_name": "Resnet-101-AP-GeM",
+        "whiten_name": "Landmarks_clean",
+        "whiten_params": {
+            "whitenp": 0.25,
+            "whitenv": None,
+            "whitenm": 1.0,
+        },
+        "pooling": "gem",
+        "gemp": 3,
+    }
+    required_inputs = ["image"]
+    dir_models = {
+        "Resnet-101-AP-GeM": "https://docs.google.com/uc?export=download&id=1UWJGDuHtzaQdFhSMojoYVQjmCXhIwVvy",
+    }
+    def _init(self, conf):
+        checkpoint = Path(torch.hub.get_dir(), "dirtorch", conf["model_name"] + ".pt")
+        if not checkpoint.exists():
+            checkpoint.parent.mkdir(exist_ok=True, parents=True)
+            link = self.dir_models[conf["model_name"]]
+            gdown.download(str(link), str(checkpoint) + ".zip", quiet=False)
+            zf = ZipFile(str(checkpoint) + ".zip", "r")
+            zf.extractall(checkpoint.parent)
+            zf.close()
+            os.remove(str(checkpoint) + ".zip")
+        self.net = load_model(checkpoint, False)  # first load on CPU
+        if conf["whiten_name"]:
+            assert conf["whiten_name"] in self.net.pca
+    def _forward(self, data):
+        image = data["image"]
+        assert image.shape[1] == 3
+        mean = self.net.preprocess["mean"]
+        std = self.net.preprocess["std"]
+        image = image - image.new_tensor(mean)[:, None, None]
+        image = image / image.new_tensor(std)[:, None, None]
+        desc = self.net(image)
+        desc = desc.unsqueeze(0)  # batch dimension
+        if self.conf["whiten_name"]:
+            pca = self.net.pca[self.conf["whiten_name"]]
+            desc = common.whiten_features(
+                desc.cpu().numpy(), pca, **self.conf["whiten_params"]
+            )
+            desc = torch.from_numpy(desc)
+        return {
+            "global_descriptor": desc,
+        }

hloc/extractors/disk.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import kornia
+from ..utils.base_model import BaseModel
+class DISK(BaseModel):
+    default_conf = {
+        "weights": "depth",
+        "max_keypoints": None,
+        "nms_window_size": 5,
+        "detection_threshold": 0.0,
+        "pad_if_not_divisible": True,
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        self.model = kornia.feature.DISK.from_pretrained(conf["weights"])
+    def _forward(self, data):
+        image = data["image"]
+        features = self.model(
+            image,
+            n=self.conf["max_keypoints"],
+            window_size=self.conf["nms_window_size"],
+            score_threshold=self.conf["detection_threshold"],
+            pad_if_not_divisible=self.conf["pad_if_not_divisible"],
+        )
+        return {
+            "keypoints": [f.keypoints for f in features],
+            "scores": [f.detection_scores for f in features],
+            "descriptors": [f.descriptors.t() for f in features],
+        }

hloc/extractors/dog.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import kornia
+from kornia.feature.laf import laf_from_center_scale_ori, extract_patches_from_pyramid
+import numpy as np
+import torch
+import pycolmap
+from ..utils.base_model import BaseModel
+EPS = 1e-6
+def sift_to_rootsift(x):
+    x = x / (np.linalg.norm(x, ord=1, axis=-1, keepdims=True) + EPS)
+    x = np.sqrt(x.clip(min=EPS))
+    x = x / (np.linalg.norm(x, axis=-1, keepdims=True) + EPS)
+    return x
+class DoG(BaseModel):
+    default_conf = {
+        "options": {
+            "first_octave": 0,
+            "peak_threshold": 0.01,
+        },
+        "descriptor": "rootsift",
+        "max_keypoints": -1,
+        "patch_size": 32,
+        "mr_size": 12,
+    }
+    required_inputs = ["image"]
+    detection_noise = 1.0
+    max_batch_size = 1024
+    def _init(self, conf):
+        if conf["descriptor"] == "sosnet":
+            self.describe = kornia.feature.SOSNet(pretrained=True)
+        elif conf["descriptor"] == "hardnet":
+            self.describe = kornia.feature.HardNet(pretrained=True)
+        elif conf["descriptor"] not in ["sift", "rootsift"]:
+            raise ValueError(f'Unknown descriptor: {conf["descriptor"]}')
+        self.sift = None  # lazily instantiated on the first image
+        self.device = torch.device("cpu")
+    def to(self, *args, **kwargs):
+        device = kwargs.get("device")
+        if device is None:
+            match = [a for a in args if isinstance(a, (torch.device, str))]
+            if len(match) > 0:
+                device = match[0]
+        if device is not None:
+            self.device = torch.device(device)
+        return super().to(*args, **kwargs)
+    def _forward(self, data):
+        image = data["image"]
+        image_np = image.cpu().numpy()[0, 0]
+        assert image.shape[1] == 1
+        assert image_np.min() >= -EPS and image_np.max() <= 1 + EPS
+        if self.sift is None:
+            use_gpu = pycolmap.has_cuda and self.device.type == "cuda"
+            options = {**self.conf["options"]}
+            if self.conf["descriptor"] == "rootsift":
+                options["normalization"] = pycolmap.Normalization.L1_ROOT
+            else:
+                options["normalization"] = pycolmap.Normalization.L2
+            self.sift = pycolmap.Sift(
+                options=pycolmap.SiftExtractionOptions(options),
+                device=getattr(pycolmap.Device, "cuda" if use_gpu else "cpu"),
+            )
+        keypoints, scores, descriptors = self.sift.extract(image_np)
+        scales = keypoints[:, 2]
+        oris = np.rad2deg(keypoints[:, 3])
+        if self.conf["descriptor"] in ["sift", "rootsift"]:
+            # We still renormalize because COLMAP does not normalize well,
+            # maybe due to numerical errors
+            if self.conf["descriptor"] == "rootsift":
+                descriptors = sift_to_rootsift(descriptors)
+            descriptors = torch.from_numpy(descriptors)
+        elif self.conf["descriptor"] in ("sosnet", "hardnet"):
+            center = keypoints[:, :2] + 0.5
+            laf_scale = scales * self.conf["mr_size"] / 2
+            laf_ori = -oris
+            lafs = laf_from_center_scale_ori(
+                torch.from_numpy(center)[None],
+                torch.from_numpy(laf_scale)[None, :, None, None],
+                torch.from_numpy(laf_ori)[None, :, None],
+            ).to(image.device)
+            patches = extract_patches_from_pyramid(
+                image, lafs, PS=self.conf["patch_size"]
+            )[0]
+            descriptors = patches.new_zeros((len(patches), 128))
+            if len(patches) > 0:
+                for start_idx in range(0, len(patches), self.max_batch_size):
+                    end_idx = min(len(patches), start_idx + self.max_batch_size)
+                    descriptors[start_idx:end_idx] = self.describe(
+                        patches[start_idx:end_idx]
+                    )
+        else:
+            raise ValueError(f'Unknown descriptor: {self.conf["descriptor"]}')
+        keypoints = torch.from_numpy(keypoints[:, :2])  # keep only x, y
+        scales = torch.from_numpy(scales)
+        oris = torch.from_numpy(oris)
+        scores = torch.from_numpy(scores)
+        if self.conf["max_keypoints"] != -1:
+            # TODO: check that the scores from PyCOLMAP are 100% correct,
+            # follow https://github.com/mihaidusmanu/pycolmap/issues/8
+            max_number = (
+                scores.shape[0]
+                if scores.shape[0] < self.conf["max_keypoints"]
+                else self.conf["max_keypoints"]
+            )
+            values, indices = torch.topk(scores, max_number)
+            keypoints = keypoints[indices]
+            scales = scales[indices]
+            oris = oris[indices]
+            scores = scores[indices]
+            descriptors = descriptors[indices]
+        return {
+            "keypoints": keypoints[None],
+            "scales": scales[None],
+            "oris": oris[None],
+            "scores": scores[None],
+            "descriptors": descriptors.T[None],
+        }

hloc/extractors/example.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import sys
+from pathlib import Path
+import subprocess
+import torch
+import logging
+from ..utils.base_model import BaseModel
+example_path = Path(__file__).parent / "../../third_party/example"
+sys.path.append(str(example_path))
+# import some modules here
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+logger = logging.getLogger(__name__)
+class Example(BaseModel):
+    # change to your default configs
+    default_conf = {
+        "name": "example",
+        "keypoint_threshold": 0.1,
+        "max_keypoints": 2000,
+        "model_name": "model.pth",
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        # set checkpoints paths if needed
+        model_path = example_path / "checkpoints" / f'{conf["model_name"]}'
+        if not model_path.exists():
+            logger.info(f"No model found at {model_path}")
+        # init model
+        self.net = callable
+        # self.net = ExampleNet(is_test=True)
+        state_dict = torch.load(model_path, map_location="cpu")
+        self.net.load_state_dict(state_dict["model_state"])
+        logger.info(f"Load example model done.")
+    def _forward(self, data):
+        # data: dict, keys: 'image'
+        # image color mode: RGB
+        # image value range in [0, 1]
+        image = data["image"]
+        # B: batch size, N: number of keypoints
+        # keypoints shape: B x N x 2, type: torch tensor
+        # scores shape: B x N, type: torch tensor
+        # descriptors shape: B x 128 x N, type: torch tensor
+        keypoints, scores, descriptors = self.net(image)
+        return {
+            "keypoints": keypoints,
+            "scores": scores,
+            "descriptors": descriptors,
+        }

hloc/extractors/fire.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from pathlib import Path
+import subprocess
+import logging
+import sys
+import torch
+import torchvision.transforms as tvf
+from ..utils.base_model import BaseModel
+logger = logging.getLogger(__name__)
+fire_path = Path(__file__).parent / "../../third_party/fire"
+sys.path.append(str(fire_path))
+import fire_network
+class FIRe(BaseModel):
+    default_conf = {
+        "global": True,
+        "asmk": False,
+        "model_name": "fire_SfM_120k.pth",
+        "scales": [2.0, 1.414, 1.0, 0.707, 0.5, 0.353, 0.25],  # default params
+        "features_num": 1000,  # TODO:not supported now
+        "asmk_name": "asmk_codebook.bin",  # TODO:not supported now
+        "config_name": "eval_fire.yml",
+    }
+    required_inputs = ["image"]
+    # Models exported using
+    fire_models = {
+        "fire_SfM_120k.pth": "http://download.europe.naverlabs.com/ComputerVision/FIRe/official/fire.pth",
+        "fire_imagenet.pth": "http://download.europe.naverlabs.com/ComputerVision/FIRe/pretraining/fire_imagenet.pth",
+    }
+    def _init(self, conf):
+        assert conf["model_name"] in self.fire_models.keys()
+        # Config paths
+        model_path = fire_path / "model" / conf["model_name"]
+        # Download the model.
+        if not model_path.exists():
+            model_path.parent.mkdir(exist_ok=True)
+            link = self.fire_models[conf["model_name"]]
+            cmd = ["wget", link, "-O", str(model_path)]
+            logger.info(f"Downloading the FIRe model with `{cmd}`.")
+            subprocess.run(cmd, check=True)
+        logger.info(f"Loading fire model...")
+        # Load net
+        state = torch.load(model_path)
+        state["net_params"]["pretrained"] = None
+        net = fire_network.init_network(**state["net_params"])
+        net.load_state_dict(state["state_dict"])
+        self.net = net
+        self.norm_rgb = tvf.Normalize(
+            **dict(zip(["mean", "std"], net.runtime["mean_std"]))
+        )
+        # params
+        self.scales = conf["scales"]
+    def _forward(self, data):
+        image = self.norm_rgb(data["image"])
+        # Feature extraction.
+        desc = self.net.forward_global(image, scales=self.scales)
+        return {"global_descriptor": desc}

hloc/extractors/fire_local.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from pathlib import Path
+import subprocess
+import logging
+import sys
+import torch
+import torchvision.transforms as tvf
+from ..utils.base_model import BaseModel
+logger = logging.getLogger(__name__)
+fire_path = Path(__file__).parent / "../../third_party/fire"
+sys.path.append(str(fire_path))
+import fire_network
+from lib.how.how.stages.evaluate import eval_asmk_fire, load_dataset_fire
+from lib.asmk import asmk
+from asmk import io_helpers, asmk_method, kernel as kern_pkg
+EPS = 1e-6
+class FIRe(BaseModel):
+    default_conf = {
+        "global": True,
+        "asmk": False,
+        "model_name": "fire_SfM_120k.pth",
+        "scales": [2.0, 1.414, 1.0, 0.707, 0.5, 0.353, 0.25],  # default params
+        "features_num": 1000,
+        "asmk_name": "asmk_codebook.bin",
+        "config_name": "eval_fire.yml",
+    }
+    required_inputs = ["image"]
+    # Models exported using
+    fire_models = {
+        "fire_SfM_120k.pth": "http://download.europe.naverlabs.com/ComputerVision/FIRe/official/fire.pth",
+        "fire_imagenet.pth": "http://download.europe.naverlabs.com/ComputerVision/FIRe/pretraining/fire_imagenet.pth",
+    }
+    def _init(self, conf):
+        assert conf["model_name"] in self.fire_models.keys()
+        # Config paths
+        model_path = fire_path / "model" / conf["model_name"]
+        config_path = fire_path / conf["config_name"]
+        asmk_bin_path = fire_path / "model" / conf["asmk_name"]
+        # Download the model.
+        if not model_path.exists():
+            model_path.parent.mkdir(exist_ok=True)
+            link = self.fire_models[conf["model_name"]]
+            cmd = ["wget", link, "-O", str(model_path)]
+            logger.info(f"Downloading the FIRe model with `{cmd}`.")
+            subprocess.run(cmd, check=True)
+        logger.info(f"Loading fire model...")
+        # Load net
+        state = torch.load(model_path)
+        state["net_params"]["pretrained"] = None
+        net = fire_network.init_network(**state["net_params"])
+        net.load_state_dict(state["state_dict"])
+        self.net = net
+        self.norm_rgb = tvf.Normalize(
+            **dict(zip(["mean", "std"], net.runtime["mean_std"]))
+        )
+        # params
+        self.scales = conf["scales"]
+        self.features_num = conf["features_num"]
+    def _forward(self, data):
+        image = self.norm_rgb(data["image"])
+        local_desc = self.net.forward_local(
+            image, features_num=self.features_num, scales=self.scales
+        )
+        logger.info(f"output[0].shape = {local_desc[0].shape}\n")
+        return {
+            # 'global_descriptor': desc
+            "local_descriptor": local_desc
+        }

hloc/extractors/lanet.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import sys
+from pathlib import Path
+import subprocess
+import torch
+from ..utils.base_model import BaseModel
+lanet_path = Path(__file__).parent / "../../third_party/lanet"
+sys.path.append(str(lanet_path))
+from network_v0.model import PointModel
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class LANet(BaseModel):
+    default_conf = {
+        "model_name": "v0",
+        "keypoint_threshold": 0.1,
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        model_path = lanet_path / "checkpoints" / f'PointModel_{conf["model_name"]}.pth'
+        if not model_path.exists():
+            print(f"No model found at {model_path}")
+        self.net = PointModel(is_test=True)
+        state_dict = torch.load(model_path, map_location="cpu")
+        self.net.load_state_dict(state_dict["model_state"])
+    def _forward(self, data):
+        image = data["image"]
+        keypoints, scores, descriptors = self.net(image)
+        _, _, Hc, Wc = descriptors.shape
+        # Scores & Descriptors
+        kpts_score = (
+            torch.cat([keypoints, scores], dim=1).view(3, -1).t().cpu().detach().numpy()
+        )
+        descriptors = (
+            descriptors.view(256, Hc, Wc).view(256, -1).t().cpu().detach().numpy()
+        )
+        # Filter based on confidence threshold
+        descriptors = descriptors[kpts_score[:, 0] > self.conf["keypoint_threshold"], :]
+        kpts_score = kpts_score[kpts_score[:, 0] > self.conf["keypoint_threshold"], :]
+        keypoints = kpts_score[:, 1:]
+        scores = kpts_score[:, 0]
+        return {
+            "keypoints": torch.from_numpy(keypoints)[None],
+            "scores": torch.from_numpy(scores)[None],
+            "descriptors": torch.from_numpy(descriptors.T)[None],
+        }

hloc/extractors/netvlad.py ADDED Viewed

	@@ -0,0 +1,147 @@

+from pathlib import Path
+import subprocess
+import logging
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+from scipy.io import loadmat
+from ..utils.base_model import BaseModel
+logger = logging.getLogger(__name__)
+EPS = 1e-6
+class NetVLADLayer(nn.Module):
+    def __init__(self, input_dim=512, K=64, score_bias=False, intranorm=True):
+        super().__init__()
+        self.score_proj = nn.Conv1d(input_dim, K, kernel_size=1, bias=score_bias)
+        centers = nn.parameter.Parameter(torch.empty([input_dim, K]))
+        nn.init.xavier_uniform_(centers)
+        self.register_parameter("centers", centers)
+        self.intranorm = intranorm
+        self.output_dim = input_dim * K
+    def forward(self, x):
+        b = x.size(0)
+        scores = self.score_proj(x)
+        scores = F.softmax(scores, dim=1)
+        diff = x.unsqueeze(2) - self.centers.unsqueeze(0).unsqueeze(-1)
+        desc = (scores.unsqueeze(1) * diff).sum(dim=-1)
+        if self.intranorm:
+            # From the official MATLAB implementation.
+            desc = F.normalize(desc, dim=1)
+        desc = desc.view(b, -1)
+        desc = F.normalize(desc, dim=1)
+        return desc
+class NetVLAD(BaseModel):
+    default_conf = {"model_name": "VGG16-NetVLAD-Pitts30K", "whiten": True}
+    required_inputs = ["image"]
+    # Models exported using
+    # https://github.com/uzh-rpg/netvlad_tf_open/blob/master/matlab/net_class2struct.m.
+    dir_models = {
+        "VGG16-NetVLAD-Pitts30K": "https://cvg-data.inf.ethz.ch/hloc/netvlad/Pitts30K_struct.mat",
+        "VGG16-NetVLAD-TokyoTM": "https://cvg-data.inf.ethz.ch/hloc/netvlad/TokyoTM_struct.mat",
+    }
+    def _init(self, conf):
+        assert conf["model_name"] in self.dir_models.keys()
+        # Download the checkpoint.
+        checkpoint = Path(torch.hub.get_dir(), "netvlad", conf["model_name"] + ".mat")
+        if not checkpoint.exists():
+            checkpoint.parent.mkdir(exist_ok=True, parents=True)
+            link = self.dir_models[conf["model_name"]]
+            cmd = ["wget", link, "-O", str(checkpoint)]
+            logger.info(f"Downloading the NetVLAD model with `{cmd}`.")
+            subprocess.run(cmd, check=True)
+        # Create the network.
+        # Remove classification head.
+        backbone = list(models.vgg16().children())[0]
+        # Remove last ReLU + MaxPool2d.
+        self.backbone = nn.Sequential(*list(backbone.children())[:-2])
+        self.netvlad = NetVLADLayer()
+        if conf["whiten"]:
+            self.whiten = nn.Linear(self.netvlad.output_dim, 4096)
+        # Parse MATLAB weights using https://github.com/uzh-rpg/netvlad_tf_open
+        mat = loadmat(checkpoint, struct_as_record=False, squeeze_me=True)
+        # CNN weights.
+        for layer, mat_layer in zip(self.backbone.children(), mat["net"].layers):
+            if isinstance(layer, nn.Conv2d):
+                w = mat_layer.weights[0]  # Shape: S x S x IN x OUT
+                b = mat_layer.weights[1]  # Shape: OUT
+                # Prepare for PyTorch - enforce float32 and right shape.
+                # w should have shape: OUT x IN x S x S
+                # b should have shape: OUT
+                w = torch.tensor(w).float().permute([3, 2, 0, 1])
+                b = torch.tensor(b).float()
+                # Update layer weights.
+                layer.weight = nn.Parameter(w)
+                layer.bias = nn.Parameter(b)
+        # NetVLAD weights.
+        score_w = mat["net"].layers[30].weights[0]  # D x K
+        # centers are stored as opposite in official MATLAB code
+        center_w = -mat["net"].layers[30].weights[1]  # D x K
+        # Prepare for PyTorch - make sure it is float32 and has right shape.
+        # score_w should have shape K x D x 1
+        # center_w should have shape D x K
+        score_w = torch.tensor(score_w).float().permute([1, 0]).unsqueeze(-1)
+        center_w = torch.tensor(center_w).float()
+        # Update layer weights.
+        self.netvlad.score_proj.weight = nn.Parameter(score_w)
+        self.netvlad.centers = nn.Parameter(center_w)
+        # Whitening weights.
+        if conf["whiten"]:
+            w = mat["net"].layers[33].weights[0]  # Shape: 1 x 1 x IN x OUT
+            b = mat["net"].layers[33].weights[1]  # Shape: OUT
+            # Prepare for PyTorch - make sure it is float32 and has right shape
+            w = torch.tensor(w).float().squeeze().permute([1, 0])  # OUT x IN
+            b = torch.tensor(b.squeeze()).float()  # Shape: OUT
+            # Update layer weights.
+            self.whiten.weight = nn.Parameter(w)
+            self.whiten.bias = nn.Parameter(b)
+        # Preprocessing parameters.
+        self.preprocess = {
+            "mean": mat["net"].meta.normalization.averageImage[0, 0],
+            "std": np.array([1, 1, 1], dtype=np.float32),
+        }
+    def _forward(self, data):
+        image = data["image"]
+        assert image.shape[1] == 3
+        assert image.min() >= -EPS and image.max() <= 1 + EPS
+        image = torch.clamp(image * 255, 0.0, 255.0)  # Input should be 0-255.
+        mean = self.preprocess["mean"]
+        std = self.preprocess["std"]
+        image = image - image.new_tensor(mean).view(1, -1, 1, 1)
+        image = image / image.new_tensor(std).view(1, -1, 1, 1)
+        # Feature extraction.
+        descriptors = self.backbone(image)
+        b, c, _, _ = descriptors.size()
+        descriptors = descriptors.view(b, c, -1)
+        # NetVLAD layer.
+        descriptors = F.normalize(descriptors, dim=1)  # Pre-normalization.
+        desc = self.netvlad(descriptors)
+        # Whiten if needed.
+        if hasattr(self, "whiten"):
+            desc = self.whiten(desc)
+            desc = F.normalize(desc, dim=1)  # Final L2 normalization.
+        return {"global_descriptor": desc}

hloc/extractors/openibl.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import torch
+import torchvision.transforms as tvf
+from ..utils.base_model import BaseModel
+class OpenIBL(BaseModel):
+    default_conf = {
+        "model_name": "vgg16_netvlad",
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        self.net = torch.hub.load(
+            "yxgeee/OpenIBL", conf["model_name"], pretrained=True
+        ).eval()
+        mean = [0.48501960784313836, 0.4579568627450961, 0.4076039215686255]
+        std = [0.00392156862745098, 0.00392156862745098, 0.00392156862745098]
+        self.norm_rgb = tvf.Normalize(mean=mean, std=std)
+    def _forward(self, data):
+        image = self.norm_rgb(data["image"])
+        desc = self.net(image)
+        return {
+            "global_descriptor": desc,
+        }

hloc/extractors/r2d2.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import sys
+from pathlib import Path
+import torchvision.transforms as tvf
+from ..utils.base_model import BaseModel
+r2d2_path = Path(__file__).parent / "../../third_party/r2d2"
+sys.path.append(str(r2d2_path))
+from extract import load_network, NonMaxSuppression, extract_multiscale
+class R2D2(BaseModel):
+    default_conf = {
+        "model_name": "r2d2_WASF_N16.pt",
+        "max_keypoints": 5000,
+        "scale_factor": 2**0.25,
+        "min_size": 256,
+        "max_size": 1024,
+        "min_scale": 0,
+        "max_scale": 1,
+        "reliability_threshold": 0.7,
+        "repetability_threshold": 0.7,
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        model_fn = r2d2_path / "models" / conf["model_name"]
+        self.norm_rgb = tvf.Normalize(
+            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+        )
+        self.net = load_network(model_fn)
+        self.detector = NonMaxSuppression(
+            rel_thr=conf["reliability_threshold"],
+            rep_thr=conf["repetability_threshold"],
+        )
+    def _forward(self, data):
+        img = data["image"]
+        img = self.norm_rgb(img)
+        xys, desc, scores = extract_multiscale(
+            self.net,
+            img,
+            self.detector,
+            scale_f=self.conf["scale_factor"],
+            min_size=self.conf["min_size"],
+            max_size=self.conf["max_size"],
+            min_scale=self.conf["min_scale"],
+            max_scale=self.conf["max_scale"],
+        )
+        idxs = scores.argsort()[-self.conf["max_keypoints"] or None :]
+        xy = xys[idxs, :2]
+        desc = desc[idxs].t()
+        scores = scores[idxs]
+        pred = {
+            "keypoints": xy[None],
+            "descriptors": desc[None],
+            "scores": scores[None],
+        }
+        return pred

hloc/extractors/rekd.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import sys
+from pathlib import Path
+import subprocess
+import torch
+from ..utils.base_model import BaseModel
+rekd_path = Path(__file__).parent / "../../third_party/REKD"
+sys.path.append(str(rekd_path))
+from training.model.REKD import REKD as REKD_
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class REKD(BaseModel):
+    default_conf = {
+        "model_name": "v0",
+        "keypoint_threshold": 0.1,
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        model_path = rekd_path / "checkpoints" / f'PointModel_{conf["model_name"]}.pth'
+        if not model_path.exists():
+            print(f"No model found at {model_path}")
+        self.net = REKD_(is_test=True)
+        state_dict = torch.load(model_path, map_location="cpu")
+        self.net.load_state_dict(state_dict["model_state"])
+    def _forward(self, data):
+        image = data["image"]
+        keypoints, scores, descriptors = self.net(image)
+        _, _, Hc, Wc = descriptors.shape
+        # Scores & Descriptors
+        kpts_score = (
+            torch.cat([keypoints, scores], dim=1).view(3, -1).t().cpu().detach().numpy()
+        )
+        descriptors = (
+            descriptors.view(256, Hc, Wc).view(256, -1).t().cpu().detach().numpy()
+        )
+        # Filter based on confidence threshold
+        descriptors = descriptors[kpts_score[:, 0] > self.conf["keypoint_threshold"], :]
+        kpts_score = kpts_score[kpts_score[:, 0] > self.conf["keypoint_threshold"], :]
+        keypoints = kpts_score[:, 1:]
+        scores = kpts_score[:, 0]
+        return {
+            "keypoints": torch.from_numpy(keypoints)[None],
+            "scores": torch.from_numpy(scores)[None],
+            "descriptors": torch.from_numpy(descriptors.T)[None],
+        }

hloc/extractors/superpoint.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import sys
+from pathlib import Path
+import torch
+from ..utils.base_model import BaseModel
+sys.path.append(str(Path(__file__).parent / "../../third_party"))
+from SuperGluePretrainedNetwork.models import superpoint  # noqa E402
+# The original keypoint sampling is incorrect. We patch it here but
+# we don't fix it upstream to not impact exisiting evaluations.
+def sample_descriptors_fix_sampling(keypoints, descriptors, s: int = 8):
+    """Interpolate descriptors at keypoint locations"""
+    b, c, h, w = descriptors.shape
+    keypoints = (keypoints + 0.5) / (keypoints.new_tensor([w, h]) * s)
+    keypoints = keypoints * 2 - 1  # normalize to (-1, 1)
+    descriptors = torch.nn.functional.grid_sample(
+        descriptors, keypoints.view(b, 1, -1, 2), mode="bilinear", align_corners=False
+    )
+    descriptors = torch.nn.functional.normalize(
+        descriptors.reshape(b, c, -1), p=2, dim=1
+    )
+    return descriptors
+class SuperPoint(BaseModel):
+    default_conf = {
+        "nms_radius": 4,
+        "keypoint_threshold": 0.005,
+        "max_keypoints": -1,
+        "remove_borders": 4,
+        "fix_sampling": False,
+    }
+    required_inputs = ["image"]
+    detection_noise = 2.0
+    def _init(self, conf):
+        if conf["fix_sampling"]:
+            superpoint.sample_descriptors = sample_descriptors_fix_sampling
+        self.net = superpoint.SuperPoint(conf)
+    def _forward(self, data):
+        return self.net(data)

hloc/match_dense.py ADDED Viewed

	@@ -0,0 +1,384 @@

+import numpy as np
+import torch
+import torchvision.transforms.functional as F
+from types import SimpleNamespace
+from .extract_features import read_image, resize_image
+import cv2
+device = "cuda" if torch.cuda.is_available() else "cpu"
+confs = {
+    # Best quality but loads of points. Only use for small scenes
+    "loftr": {
+        "output": "matches-loftr",
+        "model": {
+            "name": "loftr",
+            "weights": "outdoor",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1024,
+            "dfactor": 8,
+            "width": 640,
+            "height": 480,
+            "force_resize": True,
+        },
+        "max_error": 1,  # max error for assigned keypoints (in px)
+        "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
+    },
+    # Semi-scalable loftr which limits detected keypoints
+    "loftr_aachen": {
+        "output": "matches-loftr_aachen",
+        "model": {
+            "name": "loftr",
+            "weights": "outdoor",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {"grayscale": True, "resize_max": 1024, "dfactor": 8},
+        "max_error": 2,  # max error for assigned keypoints (in px)
+        "cell_size": 8,  # size of quantization patch (max 1 kp/patch)
+    },
+    # Use for matching superpoint feats with loftr
+    "loftr_superpoint": {
+        "output": "matches-loftr_aachen",
+        "model": {
+            "name": "loftr",
+            "weights": "outdoor",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {"grayscale": True, "resize_max": 1024, "dfactor": 8},
+        "max_error": 4,  # max error for assigned keypoints (in px)
+        "cell_size": 4,  # size of quantization patch (max 1 kp/patch)
+    },
+    # Use topicfm for matching feats
+    "topicfm": {
+        "output": "matches-topicfm",
+        "model": {
+            "name": "topicfm",
+            "weights": "outdoor",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "force_resize": True,
+            "resize_max": 1024,
+            "dfactor": 8,
+            "width": 640,
+            "height": 480,
+        },
+    },
+    # Use topicfm for matching feats
+    "aspanformer": {
+        "output": "matches-aspanformer",
+        "model": {
+            "name": "aspanformer",
+            "weights": "outdoor",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "force_resize": True,
+            "resize_max": 1024,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    "dkm": {
+        "output": "matches-dkm",
+        "model": {
+            "name": "dkm",
+            "weights": "outdoor",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "force_resize": True,
+            "resize_max": 1024,
+            "width": 80,
+            "height": 60,
+            "dfactor": 8,
+        },
+    },
+    "roma": {
+        "output": "matches-roma",
+        "model": {
+            "name": "roma",
+            "weights": "outdoor",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "force_resize": True,
+            "resize_max": 1024,
+            "width": 320,
+            "height": 240,
+            "dfactor": 8,
+        },
+    },
+    "dedode_sparse": {
+        "output": "matches-dedode",
+        "model": {
+            "name": "dedode",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+            "dense": False,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "force_resize": True,
+            "resize_max": 1024,
+            "width": 768,
+            "height": 768,
+            "dfactor": 8,
+        },
+    },
+    "sold2": {
+        "output": "matches-sold2",
+        "model": {
+            "name": "sold2",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "force_resize": True,
+            "resize_max": 1024,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+    "gluestick": {
+        "output": "matches-gluestick",
+        "model": {
+            "name": "gluestick",
+            "use_lines": True,
+            "max_keypoints": 1000,
+            "max_lines": 300,
+            "force_num_keypoints": False,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "force_resize": True,
+            "resize_max": 1024,
+            "width": 640,
+            "height": 480,
+            "dfactor": 8,
+        },
+    },
+}
+def scale_keypoints(kpts, scale):
+    if np.any(scale != 1.0):
+        kpts *= kpts.new_tensor(scale)
+    return kpts
+def scale_lines(lines, scale):
+    if np.any(scale != 1.0):
+        lines *= lines.new_tensor(scale)
+    return lines
+def match(model, path_0, path_1, conf):
+    default_conf = {
+        "grayscale": True,
+        "resize_max": 1024,
+        "dfactor": 8,
+        "cache_images": False,
+        "force_resize": False,
+        "width": 320,
+        "height": 240,
+    }
+    def preprocess(image: np.ndarray):
+        image = image.astype(np.float32, copy=False)
+        size = image.shape[:2][::-1]
+        scale = np.array([1.0, 1.0])
+        if conf.resize_max:
+            scale = conf.resize_max / max(size)
+            if scale < 1.0:
+                size_new = tuple(int(round(x * scale)) for x in size)
+                image = resize_image(image, size_new, "cv2_area")
+                scale = np.array(size) / np.array(size_new)
+        if conf.force_resize:
+            size = image.shape[:2][::-1]
+            image = resize_image(image, (conf.width, conf.height), "cv2_area")
+            size_new = (conf.width, conf.height)
+            scale = np.array(size) / np.array(size_new)
+        if conf.grayscale:
+            assert image.ndim == 2, image.shape
+            image = image[None]
+        else:
+            image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
+        image = torch.from_numpy(image / 255.0).float()
+        # assure that the size is divisible by dfactor
+        size_new = tuple(
+            map(lambda x: int(x // conf.dfactor * conf.dfactor), image.shape[-2:])
+        )
+        image = F.resize(image, size=size_new, antialias=True)
+        scale = np.array(size) / np.array(size_new)[::-1]
+        return image, scale
+    conf = SimpleNamespace(**{**default_conf, **conf})
+    image0 = read_image(path_0, conf.grayscale)
+    image1 = read_image(path_1, conf.grayscale)
+    image0, scale0 = preprocess(image0)
+    image1, scale1 = preprocess(image1)
+    image0 = image0.to(device)[None]
+    image1 = image1.to(device)[None]
+    pred = model({"image0": image0, "image1": image1})
+    # Rescale keypoints and move to cpu
+    kpts0, kpts1 = pred["keypoints0"], pred["keypoints1"]
+    kpts0 = scale_keypoints(kpts0 + 0.5, scale0) - 0.5
+    kpts1 = scale_keypoints(kpts1 + 0.5, scale1) - 0.5
+    ret = {
+        "image0": image0.squeeze().cpu().numpy(),
+        "image1": image1.squeeze().cpu().numpy(),
+        "keypoints0": kpts0.cpu().numpy(),
+        "keypoints1": kpts1.cpu().numpy(),
+    }
+    if "mconf" in pred.keys():
+        ret["mconf"] = pred["mconf"].cpu().numpy()
+    return ret
+@torch.no_grad()
+def match_images(model, image_0, image_1, conf, device="cpu"):
+    default_conf = {
+        "grayscale": True,
+        "resize_max": 1024,
+        "dfactor": 8,
+        "cache_images": False,
+        "force_resize": False,
+        "width": 320,
+        "height": 240,
+    }
+    def preprocess(image: np.ndarray):
+        image = image.astype(np.float32, copy=False)
+        size = image.shape[:2][::-1]
+        scale = np.array([1.0, 1.0])
+        if conf.resize_max:
+            scale = conf.resize_max / max(size)
+            if scale < 1.0:
+                size_new = tuple(int(round(x * scale)) for x in size)
+                image = resize_image(image, size_new, "cv2_area")
+                scale = np.array(size) / np.array(size_new)
+        if conf.force_resize:
+            size = image.shape[:2][::-1]
+            image = resize_image(image, (conf.width, conf.height), "cv2_area")
+            size_new = (conf.width, conf.height)
+            scale = np.array(size) / np.array(size_new)
+        if conf.grayscale:
+            assert image.ndim == 2, image.shape
+            image = image[None]
+        else:
+            image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
+        image = torch.from_numpy(image / 255.0).float()
+        # assure that the size is divisible by dfactor
+        size_new = tuple(
+            map(lambda x: int(x // conf.dfactor * conf.dfactor), image.shape[-2:])
+        )
+        image = F.resize(image, size=size_new)
+        scale = np.array(size) / np.array(size_new)[::-1]
+        return image, scale
+    conf = SimpleNamespace(**{**default_conf, **conf})
+    if len(image_0.shape) == 3 and conf.grayscale:
+        image0 = cv2.cvtColor(image_0, cv2.COLOR_RGB2GRAY)
+    else:
+        image0 = image_0
+    if len(image_0.shape) == 3 and conf.grayscale:
+        image1 = cv2.cvtColor(image_1, cv2.COLOR_RGB2GRAY)
+    else:
+        image1 = image_1
+    # comment following lines, image is always RGB mode
+    # if not conf.grayscale and len(image0.shape) == 3:
+    #     image0 = image0[:, :, ::-1]  # BGR to RGB
+    # if not conf.grayscale and len(image1.shape) == 3:
+    #     image1 = image1[:, :, ::-1]  # BGR to RGB
+    image0, scale0 = preprocess(image0)
+    image1, scale1 = preprocess(image1)
+    image0 = image0.to(device)[None]
+    image1 = image1.to(device)[None]
+    pred = model({"image0": image0, "image1": image1})
+    s0 = np.array(image_0.shape[:2][::-1]) / np.array(image0.shape[-2:][::-1])
+    s1 = np.array(image_1.shape[:2][::-1]) / np.array(image1.shape[-2:][::-1])
+    # Rescale keypoints and move to cpu
+    if "keypoints0" in pred.keys() and "keypoints1" in pred.keys():
+        kpts0, kpts1 = pred["keypoints0"], pred["keypoints1"]
+        kpts0_origin = scale_keypoints(kpts0 + 0.5, s0) - 0.5
+        kpts1_origin = scale_keypoints(kpts1 + 0.5, s1) - 0.5
+        ret = {
+            "image0": image0.squeeze().cpu().numpy(),
+            "image1": image1.squeeze().cpu().numpy(),
+            "image0_orig": image_0,
+            "image1_orig": image_1,
+            "keypoints0": kpts0.cpu().numpy(),
+            "keypoints1": kpts1.cpu().numpy(),
+            "keypoints0_orig": kpts0_origin.cpu().numpy(),
+            "keypoints1_orig": kpts1_origin.cpu().numpy(),
+            "original_size0": np.array(image_0.shape[:2][::-1]),
+            "original_size1": np.array(image_1.shape[:2][::-1]),
+            "new_size0": np.array(image0.shape[-2:][::-1]),
+            "new_size1": np.array(image1.shape[-2:][::-1]),
+            "scale0": s0,
+            "scale1": s1,
+        }
+        if "mconf" in pred.keys():
+            ret["mconf"] = pred["mconf"].cpu().numpy()
+    if "lines0" in pred.keys() and "lines1" in pred.keys():
+        if "keypoints0" in pred.keys() and "keypoints1" in pred.keys():
+            kpts0, kpts1 = pred["keypoints0"], pred["keypoints1"]
+            kpts0_origin = scale_keypoints(kpts0 + 0.5, s0) - 0.5
+            kpts1_origin = scale_keypoints(kpts1 + 0.5, s1) - 0.5
+            kpts0_origin = kpts0_origin.cpu().numpy()
+            kpts1_origin = kpts1_origin.cpu().numpy()
+        else:
+            kpts0_origin, kpts1_origin = None, None  # np.zeros([0]), np.zeros([0])
+        lines0, lines1 = pred["lines0"], pred["lines1"]
+        lines0_raw, lines1_raw = pred["raw_lines0"], pred["raw_lines1"]
+        lines0_raw = torch.from_numpy(lines0_raw.copy())
+        lines1_raw = torch.from_numpy(lines1_raw.copy())
+        lines0_raw = scale_lines(lines0_raw + 0.5, s0) - 0.5
+        lines1_raw = scale_lines(lines1_raw + 0.5, s1) - 0.5
+        lines0 = torch.from_numpy(lines0.copy())
+        lines1 = torch.from_numpy(lines1.copy())
+        lines0 = scale_lines(lines0 + 0.5, s0) - 0.5
+        lines1 = scale_lines(lines1 + 0.5, s1) - 0.5
+        ret = {
+            "image0_orig": image_0,
+            "image1_orig": image_1,
+            "line0": lines0_raw.cpu().numpy(),
+            "line1": lines1_raw.cpu().numpy(),
+            "line0_orig": lines0.cpu().numpy(),
+            "line1_orig": lines1.cpu().numpy(),
+            "line_keypoints0_orig": kpts0_origin,
+            "line_keypoints1_orig": kpts1_origin,
+        }
+    del pred
+    torch.cuda.empty_cache()
+    return ret

hloc/match_features.py ADDED Viewed

	@@ -0,0 +1,389 @@

+import argparse
+from typing import Union, Optional, Dict, List, Tuple
+from pathlib import Path
+import pprint
+from queue import Queue
+from threading import Thread
+from functools import partial
+from tqdm import tqdm
+import h5py
+import torch
+from . import matchers, logger
+from .utils.base_model import dynamic_load
+from .utils.parsers import names_to_pair, names_to_pair_old, parse_retrieval
+import numpy as np
+"""
+A set of standard configurations that can be directly selected from the command
+line using their name. Each is a dictionary with the following entries:
+    - output: the name of the match file that will be generated.
+    - model: the model configuration, as passed to a feature matcher.
+"""
+confs = {
+    "superglue": {
+        "output": "matches-superglue",
+        "model": {
+            "name": "superglue",
+            "weights": "outdoor",
+            "sinkhorn_iterations": 50,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1024,
+            "dfactor": 8,
+            "force_resize": False,
+        },
+    },
+    "superglue-fast": {
+        "output": "matches-superglue-it5",
+        "model": {
+            "name": "superglue",
+            "weights": "outdoor",
+            "sinkhorn_iterations": 5,
+            "match_threshold": 0.2,
+        },
+    },
+    "superpoint-lightglue": {
+        "output": "matches-lightglue",
+        "model": {
+            "name": "lightglue",
+            "match_threshold": 0.2,
+            "width_confidence": 0.99,  # for point pruning
+            "depth_confidence": 0.95,  # for early stopping,
+            "features": "superpoint",
+            "model_name": "superpoint_lightglue.pth",
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1024,
+            "dfactor": 8,
+            "force_resize": False,
+        },
+    },
+    "disk-lightglue": {
+        "output": "matches-lightglue",
+        "model": {
+            "name": "lightglue",
+            "match_threshold": 0.2,
+            "width_confidence": 0.99,  # for point pruning
+            "depth_confidence": 0.95,  # for early stopping,
+            "features": "disk",
+            "model_name": "disk_lightglue.pth",
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1024,
+            "dfactor": 8,
+            "force_resize": False,
+        },
+    },
+    "sgmnet": {
+        "output": "matches-sgmnet",
+        "model": {
+            "name": "sgmnet",
+            "seed_top_k": [256, 256],
+            "seed_radius_coe": 0.01,
+            "net_channels": 128,
+            "layer_num": 9,
+            "head": 4,
+            "seedlayer": [0, 6],
+            "use_mc_seeding": True,
+            "use_score_encoding": False,
+            "conf_bar": [1.11, 0.1],
+            "sink_iter": [10, 100],
+            "detach_iter": 1000000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1024,
+            "dfactor": 8,
+            "force_resize": False,
+        },
+    },
+    "NN-superpoint": {
+        "output": "matches-NN-mutual-dist.7",
+        "model": {
+            "name": "nearest_neighbor",
+            "do_mutual_check": True,
+            "distance_threshold": 0.7,
+            "match_threshold": 0.2,
+        },
+    },
+    "NN-ratio": {
+        "output": "matches-NN-mutual-ratio.8",
+        "model": {
+            "name": "nearest_neighbor",
+            "do_mutual_check": True,
+            "ratio_threshold": 0.8,
+            "match_threshold": 0.2,
+        },
+    },
+    "NN-mutual": {
+        "output": "matches-NN-mutual",
+        "model": {
+            "name": "nearest_neighbor",
+            "do_mutual_check": True,
+            "match_threshold": 0.2,
+        },
+    },
+    "Dual-Softmax": {
+        "output": "matches-Dual-Softmax",
+        "model": {
+            "name": "dual_softmax",
+            "do_mutual_check": True,
+            "match_threshold": 0.2,  # TODO
+        },
+    },
+    "adalam": {
+        "output": "matches-adalam",
+        "model": {
+            "name": "adalam",
+            "match_threshold": 0.2,
+        },
+    },
+}
+class WorkQueue:
+    def __init__(self, work_fn, num_threads=1):
+        self.queue = Queue(num_threads)
+        self.threads = [
+            Thread(target=self.thread_fn, args=(work_fn,)) for _ in range(num_threads)
+        ]
+        for thread in self.threads:
+            thread.start()
+    def join(self):
+        for thread in self.threads:
+            self.queue.put(None)
+        for thread in self.threads:
+            thread.join()
+    def thread_fn(self, work_fn):
+        item = self.queue.get()
+        while item is not None:
+            work_fn(item)
+            item = self.queue.get()
+    def put(self, data):
+        self.queue.put(data)
+class FeaturePairsDataset(torch.utils.data.Dataset):
+    def __init__(self, pairs, feature_path_q, feature_path_r):
+        self.pairs = pairs
+        self.feature_path_q = feature_path_q
+        self.feature_path_r = feature_path_r
+    def __getitem__(self, idx):
+        name0, name1 = self.pairs[idx]
+        data = {}
+        with h5py.File(self.feature_path_q, "r") as fd:
+            grp = fd[name0]
+            for k, v in grp.items():
+                data[k + "0"] = torch.from_numpy(v.__array__()).float()
+            # some matchers might expect an image but only use its size
+            data["image0"] = torch.empty((1,) + tuple(grp["image_size"])[::-1])
+        with h5py.File(self.feature_path_r, "r") as fd:
+            grp = fd[name1]
+            for k, v in grp.items():
+                data[k + "1"] = torch.from_numpy(v.__array__()).float()
+            data["image1"] = torch.empty((1,) + tuple(grp["image_size"])[::-1])
+        return data
+    def __len__(self):
+        return len(self.pairs)
+def writer_fn(inp, match_path):
+    pair, pred = inp
+    with h5py.File(str(match_path), "a", libver="latest") as fd:
+        if pair in fd:
+            del fd[pair]
+        grp = fd.create_group(pair)
+        matches = pred["matches0"][0].cpu().short().numpy()
+        grp.create_dataset("matches0", data=matches)
+        if "matching_scores0" in pred:
+            scores = pred["matching_scores0"][0].cpu().half().numpy()
+            grp.create_dataset("matching_scores0", data=scores)
+def main(
+    conf: Dict,
+    pairs: Path,
+    features: Union[Path, str],
+    export_dir: Optional[Path] = None,
+    matches: Optional[Path] = None,
+    features_ref: Optional[Path] = None,
+    overwrite: bool = False,
+) -> Path:
+    if isinstance(features, Path) or Path(features).exists():
+        features_q = features
+        if matches is None:
+            raise ValueError(
+                "Either provide both features and matches as Path" " or both as names."
+            )
+    else:
+        if export_dir is None:
+            raise ValueError(
+                "Provide an export_dir if features is not" f" a file path: {features}."
+            )
+        features_q = Path(export_dir, features + ".h5")
+        if matches is None:
+            matches = Path(export_dir, f'{features}_{conf["output"]}_{pairs.stem}.h5')
+    if features_ref is None:
+        features_ref = features_q
+    match_from_paths(conf, pairs, matches, features_q, features_ref, overwrite)
+    return matches
+def find_unique_new_pairs(pairs_all: List[Tuple[str]], match_path: Path = None):
+    """Avoid to recompute duplicates to save time."""
+    pairs = set()
+    for i, j in pairs_all:
+        if (j, i) not in pairs:
+            pairs.add((i, j))
+    pairs = list(pairs)
+    if match_path is not None and match_path.exists():
+        with h5py.File(str(match_path), "r", libver="latest") as fd:
+            pairs_filtered = []
+            for i, j in pairs:
+                if (
+                    names_to_pair(i, j) in fd
+                    or names_to_pair(j, i) in fd
+                    or names_to_pair_old(i, j) in fd
+                    or names_to_pair_old(j, i) in fd
+                ):
+                    continue
+                pairs_filtered.append((i, j))
+        return pairs_filtered
+    return pairs
+@torch.no_grad()
+def match_from_paths(
+    conf: Dict,
+    pairs_path: Path,
+    match_path: Path,
+    feature_path_q: Path,
+    feature_path_ref: Path,
+    overwrite: bool = False,
+) -> Path:
+    logger.info(
+        "Matching local features with configuration:" f"\n{pprint.pformat(conf)}"
+    )
+    if not feature_path_q.exists():
+        raise FileNotFoundError(f"Query feature file {feature_path_q}.")
+    if not feature_path_ref.exists():
+        raise FileNotFoundError(f"Reference feature file {feature_path_ref}.")
+    match_path.parent.mkdir(exist_ok=True, parents=True)
+    assert pairs_path.exists(), pairs_path
+    pairs = parse_retrieval(pairs_path)
+    pairs = [(q, r) for q, rs in pairs.items() for r in rs]
+    pairs = find_unique_new_pairs(pairs, None if overwrite else match_path)
+    if len(pairs) == 0:
+        logger.info("Skipping the matching.")
+        return
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    Model = dynamic_load(matchers, conf["model"]["name"])
+    model = Model(conf["model"]).eval().to(device)
+    dataset = FeaturePairsDataset(pairs, feature_path_q, feature_path_ref)
+    loader = torch.utils.data.DataLoader(
+        dataset, num_workers=5, batch_size=1, shuffle=False, pin_memory=True
+    )
+    writer_queue = WorkQueue(partial(writer_fn, match_path=match_path), 5)
+    for idx, data in enumerate(tqdm(loader, smoothing=0.1)):
+        data = {
+            k: v if k.startswith("image") else v.to(device, non_blocking=True)
+            for k, v in data.items()
+        }
+        pred = model(data)
+        pair = names_to_pair(*pairs[idx])
+        writer_queue.put((pair, pred))
+    writer_queue.join()
+    logger.info("Finished exporting matches.")
+def scale_keypoints(kpts, scale):
+    if np.any(scale != 1.0):
+        kpts *= kpts.new_tensor(scale)
+    return kpts
+@torch.no_grad()
+def match_images(model, feat0, feat1):
+    # forward pass to match keypoints
+    desc0 = feat0["descriptors"][0]
+    desc1 = feat1["descriptors"][0]
+    if len(desc0.shape) == 2:
+        desc0 = desc0.unsqueeze(0)
+    if len(desc1.shape) == 2:
+        desc1 = desc1.unsqueeze(0)
+    pred = model(
+        {
+            "image0": feat0["image"],
+            "keypoints0": feat0["keypoints"][0],
+            "scores0": feat0["scores"][0].unsqueeze(0),
+            "descriptors0": desc0,
+            "image1": feat1["image"],
+            "keypoints1": feat1["keypoints"][0],
+            "scores1": feat1["scores"][0].unsqueeze(0),
+            "descriptors1": desc1,
+        }
+    )
+    pred = {
+        k: v.cpu().detach()[0] if isinstance(v, torch.Tensor) else v
+        for k, v in pred.items()
+    }
+    kpts0, kpts1 = (
+        feat0["keypoints"][0].cpu().numpy(),
+        feat1["keypoints"][0].cpu().numpy(),
+    )
+    matches, confid = pred["matches0"], pred["matching_scores0"]
+    # Keep the matching keypoints.
+    valid = matches > -1
+    mkpts0 = kpts0[valid]
+    mkpts1 = kpts1[matches[valid]]
+    mconfid = confid[valid]
+    # rescale the keypoints to their original size
+    s0 = feat0["original_size"] / feat0["size"]
+    s1 = feat1["original_size"] / feat1["size"]
+    kpts0_origin = scale_keypoints(torch.from_numpy(mkpts0 + 0.5), s0) - 0.5
+    kpts1_origin = scale_keypoints(torch.from_numpy(mkpts1 + 0.5), s1) - 0.5
+    ret = {
+        "image0_orig": feat0["image_orig"],
+        "image1_orig": feat1["image_orig"],
+        "keypoints0": kpts0,
+        "keypoints1": kpts1,
+        "keypoints0_orig": kpts0_origin.numpy(),
+        "keypoints1_orig": kpts1_origin.numpy(),
+        "mconf": mconfid,
+    }
+    del feat0, feat1, desc0, desc1, kpts0, kpts1, kpts0_origin, kpts1_origin
+    torch.cuda.empty_cache()
+    return ret
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--pairs", type=Path, required=True)
+    parser.add_argument("--export_dir", type=Path)
+    parser.add_argument("--features", type=str, default="feats-superpoint-n4096-r1024")
+    parser.add_argument("--matches", type=Path)
+    parser.add_argument(
+        "--conf", type=str, default="superglue", choices=list(confs.keys())
+    )
+    args = parser.parse_args()
+    main(confs[args.conf], args.pairs, args.features, args.export_dir)

hloc/matchers/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+def get_matcher(matcher):
+    mod = __import__(f"{__name__}.{matcher}", fromlist=[""])
+    return getattr(mod, "Model")

hloc/matchers/adalam.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import torch
+from ..utils.base_model import BaseModel
+from kornia.feature.adalam import AdalamFilter
+from kornia.utils.helpers import get_cuda_device_if_available
+class AdaLAM(BaseModel):
+    # See https://kornia.readthedocs.io/en/latest/_modules/kornia/feature/adalam/adalam.html.
+    default_conf = {
+        "area_ratio": 100,
+        "search_expansion": 4,
+        "ransac_iters": 128,
+        "min_inliers": 6,
+        "min_confidence": 200,
+        "orientation_difference_threshold": 30,
+        "scale_rate_threshold": 1.5,
+        "detected_scale_rate_threshold": 5,
+        "refit": True,
+        "force_seed_mnn": True,
+        "device": get_cuda_device_if_available(),
+    }
+    required_inputs = [
+        "image0",
+        "image1",
+        "descriptors0",
+        "descriptors1",
+        "keypoints0",
+        "keypoints1",
+        "scales0",
+        "scales1",
+        "oris0",
+        "oris1",
+    ]
+    def _init(self, conf):
+        self.adalam = AdalamFilter(conf)
+    def _forward(self, data):
+        assert data["keypoints0"].size(0) == 1
+        if data["keypoints0"].size(1) < 2 or data["keypoints1"].size(1) < 2:
+            matches = torch.zeros(
+                (0, 2), dtype=torch.int64, device=data["keypoints0"].device
+            )
+        else:
+            matches = self.adalam.match_and_filter(
+                data["keypoints0"][0],
+                data["keypoints1"][0],
+                data["descriptors0"][0].T,
+                data["descriptors1"][0].T,
+                data["image0"].shape[2:],
+                data["image1"].shape[2:],
+                data["oris0"][0],
+                data["oris1"][0],
+                data["scales0"][0],
+                data["scales1"][0],
+            )
+        matches_new = torch.full(
+            (data["keypoints0"].size(1),),
+            -1,
+            dtype=torch.int64,
+            device=data["keypoints0"].device,
+        )
+        matches_new[matches[:, 0]] = matches[:, 1]
+        return {
+            "matches0": matches_new.unsqueeze(0),
+            "matching_scores0": torch.zeros(matches_new.size(0)).unsqueeze(0),
+        }

hloc/matchers/aspanformer.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import sys
+import torch
+from ..utils.base_model import BaseModel
+from ..utils import do_system
+from pathlib import Path
+import subprocess
+import logging
+logger = logging.getLogger(__name__)
+sys.path.append(str(Path(__file__).parent / "../../third_party"))
+from ASpanFormer.src.ASpanFormer.aspanformer import ASpanFormer as _ASpanFormer
+from ASpanFormer.src.config.default import get_cfg_defaults
+from ASpanFormer.src.utils.misc import lower_config
+from ASpanFormer.demo import demo_utils
+aspanformer_path = Path(__file__).parent / "../../third_party/ASpanFormer"
+class ASpanFormer(BaseModel):
+    default_conf = {
+        "weights": "outdoor",
+        "match_threshold": 0.2,
+        "config_path": aspanformer_path / "configs/aspan/outdoor/aspan_test.py",
+        "model_name": "weights_aspanformer.tar",
+    }
+    required_inputs = ["image0", "image1"]
+    proxy = "http://localhost:1080"
+    aspanformer_models = {
+        "weights_aspanformer.tar": "https://drive.google.com/uc?id=1eavM9dTkw9nbc-JqlVVfGPU5UvTTfc6k&confirm=t"
+    }
+    def _init(self, conf):
+        model_path = aspanformer_path / "weights" / Path(conf["weights"] + ".ckpt")
+        # Download the model.
+        if not model_path.exists():
+            # model_path.parent.mkdir(exist_ok=True)
+            tar_path = aspanformer_path / conf["model_name"]
+            if not tar_path.exists():
+                link = self.aspanformer_models[conf["model_name"]]
+                cmd = ["gdown", link, "-O", str(tar_path), "--proxy", self.proxy]
+                cmd_wo_proxy = ["gdown", link, "-O", str(tar_path)]
+                logger.info(f"Downloading the Aspanformer model with `{cmd_wo_proxy}`.")
+                try:
+                    subprocess.run(cmd_wo_proxy, check=True)
+                except subprocess.CalledProcessError as e:
+                    logger.info(f"Downloading the Aspanformer model with `{cmd}`.")
+                    try:
+                        subprocess.run(cmd, check=True)
+                    except subprocess.CalledProcessError as e:
+                        logger.error(f"Failed to download the Aspanformer model.")
+                        raise e
+            do_system(f"cd {str(aspanformer_path)} & tar -xvf {str(tar_path)}")
+        logger.info(f"Loading Aspanformer model...")
+        config = get_cfg_defaults()
+        config.merge_from_file(conf["config_path"])
+        _config = lower_config(config)
+        self.net = _ASpanFormer(config=_config["aspan"])
+        weight_path = model_path
+        state_dict = torch.load(str(weight_path), map_location="cpu")["state_dict"]
+        self.net.load_state_dict(state_dict, strict=False)
+    def _forward(self, data):
+        data_ = {
+            "image0": data["image0"],
+            "image1": data["image1"],
+        }
+        self.net(data_, online_resize=True)
+        corr0 = data_["mkpts0_f"]
+        corr1 = data_["mkpts1_f"]
+        pred = {}
+        pred["keypoints0"], pred["keypoints1"] = corr0, corr1
+        return pred