Spaces:

nccr-catalysis
/

atom-detection

Sleeping

App Files Files Community

Romain Graux commited on Sep 17

Commit

60fece7

•

1 Parent(s): 0bb9cde

New extractor for physical metadata

Browse files

Files changed (3) hide show

app.py +2 -2
app/dl_inference.py +7 -5
app/tiff_utils.py +75 -16

app.py CHANGED Viewed

@@ -85,10 +85,10 @@ def batch_fn(
         try:
             physical_metadata = extract_physical_metadata(file.name)
             if physical_metadata.unit != "nm":
-                raise ValueError(f"Unit of {file.name} is not nm, cannot process it")
         except Exception as e:
             error_messages.append(f"Error processing {file.name}: {str(e)}")
-            continue  # Skip to the next file
         original_file_name = os.path.basename(file.name)
         sanitized_file_name = original_file_name.replace(" ", "_")

         try:
             physical_metadata = extract_physical_metadata(file.name)
             if physical_metadata.unit != "nm":
+                raise gr.Error(f"Unit of {file.name} is not nm, cannot process it")
         except Exception as e:
             error_messages.append(f"Error processing {file.name}: {str(e)}")
+            raise gr.Error(f"Error processing {file.name}: {str(e)}")
         original_file_name = os.path.basename(file.name)
         sanitized_file_name = original_file_name.replace(" ", "_")

app/dl_inference.py CHANGED Viewed

@@ -11,6 +11,7 @@ from functools import lru_cache
 import sys
 from .tiff_utils import tiff_to_png
 if ".." not in sys.path:
     sys.path.append("..")
@@ -78,7 +79,7 @@ def multimers_classification(
     for e in range(epochs):
         trainer.step(train_loader, scale_factor=scale_factor)
         trainer.print_statistics()
     # Extract latent space (only mean) from VAE
     z_mean, _ = rvae.encode(torch_crops)
@@ -150,7 +151,9 @@ def inference_fn(
     # if img.max() <= 1:
     #     raise ValueError("Gradio seems to preprocess badly the tiff images. Did you adapt the preprocessing function as mentionned in the app.py file comments?")
     prepro_img, _, pred_map = detection.image_to_pred_map(img, return_intermediate=True)
-    center_coords_list, likelihood_list = (np.array(x) for x in detection.pred_map_to_atoms(pred_map))
     results = (
         multimers_classification(
             img=prepro_img,
@@ -161,7 +164,6 @@ def inference_fn(
         if n_species > 1
         else {
             0: {
                 "coords": center_coords_list,
                 "likelihood": likelihood_list,
                 "confidence": np.ones(len(center_coords_list)),
@@ -173,8 +175,8 @@ def inference_fn(
             Evaluation.center_coords_to_bbox(center_coords)
             for center_coords in v["coords"]
         ]
-    return tiff_to_png(image), {
-        "image": tiff_to_png(image),
         "pred_map": pred_map,
         "species": results,
     }

 import sys
 from .tiff_utils import tiff_to_png
 if ".." not in sys.path:
     sys.path.append("..")
     for e in range(epochs):
         trainer.step(train_loader, scale_factor=scale_factor)
         trainer.print_statistics()
     # Extract latent space (only mean) from VAE
     z_mean, _ = rvae.encode(torch_crops)
     # if img.max() <= 1:
     #     raise ValueError("Gradio seems to preprocess badly the tiff images. Did you adapt the preprocessing function as mentionned in the app.py file comments?")
     prepro_img, _, pred_map = detection.image_to_pred_map(img, return_intermediate=True)
+    center_coords_list, likelihood_list = (
+        np.array(x) for x in detection.pred_map_to_atoms(pred_map)
+    )
     results = (
         multimers_classification(
             img=prepro_img,
         if n_species > 1
         else {
             0: {
                 "coords": center_coords_list,
                 "likelihood": likelihood_list,
                 "confidence": np.ones(len(center_coords_list)),
             Evaluation.center_coords_to_bbox(center_coords)
             for center_coords in v["coords"]
         ]
+    return tiff_to_png(Image.fromarray(prepro_img)), {
+        "image": tiff_to_png(Image.fromarray(prepro_img)),
         "pred_map": pred_map,
         "species": results,
     }

app/tiff_utils.py CHANGED Viewed

@@ -6,34 +6,93 @@
 @last modified : 2023 September 19, 11:18:36
 """
 import re
 import imageio
-import numpy as np
 from collections import namedtuple
-physical_metadata = namedtuple("physical_metadata", ["width", "height", "pixel_width", "pixel_height", "unit"])
-def extract_physical_metadata(image_path : str, strict:bool=True) -> physical_metadata:
     """
-    Extracts the physical metadata of an image (only tiff for now)
     """
     with open(image_path, "rb") as f:
         data = f.read()
-        reader = imageio.get_reader(data, format=".tif")
         metadata = reader.get_meta_data()
-    if strict and not metadata['is_imagej']:
-        for key, value in metadata.items():
-            if key.startswith("is_") and value == True: # Force bool to be True, because it can also pass the condition while being an random object
-                raise ValueError(f"The image is not TIFF image, but it seems to be a {key[3:]} image")
-        raise ValueError("Impossible to extract metadata from the image (ImageJ)")
     h, w = reader.get_next_data().shape
-    ipw, iph, _ = metadata['resolution']
-    result = re.search(r"unit=(.+)", metadata['description'])
-    if strict and not result:
-        raise ValueError(f"No scale unit found in the image description : {metadata['description']}")
-    unit = result and result.group(1)
-    return physical_metadata(w, h, 1. / ipw, 1. / iph, unit)
 def tiff_to_png(image, inplace=True):
     img = image if inplace else image.copy()

 @last modified : 2023 September 19, 11:18:36
 """
+from typing import Callable, Optional
 import re
 import imageio
 from collections import namedtuple
+import numpy as np
+PhysicalMetadata = namedtuple(
+    "PhysicalMetadata", ["width", "height", "pixel_width", "pixel_height", "unit"]
+)
+MetadataExtractor = Callable[[dict, int, int], Optional[PhysicalMetadata]]
+def extract_imagej_metadata(
+    metadata: dict, width: int, height: int
+) -> Optional[PhysicalMetadata]:
+    try:
+        ipw, iph, _ = metadata["resolution"]
+        result = re.search(r"unit=(.+)", metadata["description"])
+        if not result:
+            return None
+        unit = result.group(1)
+        return PhysicalMetadata(width, height, 1.0 / ipw, 1.0 / iph, unit.lower())
+    except (KeyError, AttributeError):
+        return None
+def extract_resolution_metadata(
+    metadata: dict, width: int, height: int
+) -> Optional[PhysicalMetadata]:
+    try:
+        ipw, iph, _ = metadata["resolution"]
+        # It looks like the resolution unit is not really reliable, so let's just assume nm
+        unit = "nm"
+        return PhysicalMetadata(width, height, 1.0 / ipw, 1.0 / iph, unit)
+    except (KeyError, AttributeError):
+        return None
+METADATA_EXTRACTORS: list[MetadataExtractor] = [
+    extract_imagej_metadata,
+    extract_resolution_metadata,
+]
+def normalize_metadata(metadata: PhysicalMetadata) -> PhysicalMetadata:
+    conversion_factor = {
+        "inch": 2.54e7,
+        "m": 1e9,
+        "dm": 1e8,
+        "cm": 1e7,
+        "mm": 1e6,
+        "µm": 1e3,
+        "nm": 1,
+    }
+    if metadata.unit not in conversion_factor:
+        raise ValueError(f"Unknown unit: {metadata.unit}")
+    factor = conversion_factor[metadata.unit]
+    return PhysicalMetadata(
+        metadata.width,
+        metadata.height,
+        metadata.pixel_width * factor,
+        metadata.pixel_height * factor,
+        "nm",
+    )
+def extract_physical_metadata(image_path: str, strict: bool = True) -> PhysicalMetadata:
     """
+    Extracts the physical metadata of an image by trying all available extractors.
+    Raises ValueError if no extractor succeeds.
     """
     with open(image_path, "rb") as f:
         data = f.read()
+        reader = imageio.get_reader(data)
         metadata = reader.get_meta_data()
     h, w = reader.get_next_data().shape
+    for extractor in METADATA_EXTRACTORS:
+        result = extractor(metadata, w, h)
+        if result is not None:
+            return normalize_metadata(result)
+    raise ValueError(
+        "Failed to extract metadata from the image using any available method."
+    )
 def tiff_to_png(image, inplace=True):
     img = image if inplace else image.copy()