Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 45 additions & 2 deletions src/tracksdata/nodes/_regionprops.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,22 @@ class RegionPropsNodes(BaseNodesOperator):
Physical spacing between pixels. If provided, affects distance-based
measurements. Should be (row_spacing, col_spacing) for 2D or
(depth_spacing, row_spacing, col_spacing) for 3D.
separate_arrays : bool, optional
If True, array-like properties (e.g. ``inertia_tensor`` or multi-channel
``intensity_mean``) are flattened into multiple scalar attributes instead
of being stored as a single array attribute. The new attributes are named
``<prop>_<index>`` (e.g. ``intensity_mean_0``, ``intensity_mean_1``) using
the same convention as ``node_attrs(unpack=True)``. This makes individual
components filterable. Defaults to False.

Attributes
----------
_extra_properties : list
List of additional properties to compute.
_spacing : tuple[float, float] | None
Physical spacing between pixels.
_separate_arrays : bool
Whether array-like properties are flattened into scalar attributes.

Examples
--------
Expand Down Expand Up @@ -92,6 +101,7 @@ def __init__(
self,
extra_properties: list[str | Callable[[RegionProperties], Any]] | None = None,
spacing: tuple[float, float] | None = None,
separate_arrays: bool = False,
):
super().__init__()
self._extra_properties = extra_properties or []
Expand All @@ -102,6 +112,7 @@ def __init__(
if "bbox" in self._extra_properties:
raise ValueError("`bbox` is not supported as an extra property. It's already included by default.")
self._spacing = spacing
self._separate_arrays = separate_arrays

def _axis_names(self, labels: NDArray[np.integer]) -> list[str]:
"""
Expand All @@ -124,6 +135,37 @@ def _axis_names(self, labels: NDArray[np.integer]) -> list[str]:
else:
raise ValueError(f"`labels` must be 't + 2D' or 't + 3D', got '{labels.ndim}' dimensions.")

def _attr_items(self, key: str, value: Any) -> list[tuple[str, Any]]:
"""
Normalize a single property value into one or more node attribute items.

Tuple/list/array-like numeric values are converted to numpy arrays so they
are stored consistently as fixed-shape array attributes. When
``separate_arrays`` is enabled, such values are instead flattened into
scalar attributes named ``<key>_<index>`` (row-major), matching the
``node_attrs(unpack=True)`` naming convention.

Parameters
----------
key : str
The base attribute name.
value : Any
The property value returned by regionprops or a custom callable.

Returns
-------
list[tuple[str, Any]]
The (name, value) pairs to add to the node attributes.
"""
if isinstance(value, np.ndarray | tuple | list):
arr = np.asarray(value)
if arr.dtype.kind in "biufc" and arr.ndim >= 1:
if self._separate_arrays:
return [("_".join([key, *map(str, idx)]), arr[idx]) for idx in np.ndindex(arr.shape)]
return [(key, arr)]

return [(key, value)]

def _init_node_attrs(self, graph: BaseGraph, node_attrs: dict[str, Any]) -> None:
"""
Initialize the node attributes for the graph.
Expand Down Expand Up @@ -302,9 +344,10 @@ def _nodes_per_time(

for prop in self._extra_properties:
if callable(prop):
attrs[prop.__name__] = prop(obj)
key, value = prop.__name__, prop(obj)
else:
attrs[prop] = getattr(obj, prop)
key, value = prop, getattr(obj, prop)
attrs.update(self._attr_items(key, value))

attrs[DEFAULT_ATTR_KEYS.MASK] = Mask(obj.image, obj.bbox)
attrs[DEFAULT_ATTR_KEYS.BBOX] = np.asarray(obj.bbox, dtype=int)
Expand Down
88 changes: 88 additions & 0 deletions src/tracksdata/nodes/_test/test_regionprops.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import numpy as np
import polars as pl
import pytest
from skimage.measure._regionprops import RegionProperties

from tracksdata.attrs import NodeAttr
from tracksdata.constants import DEFAULT_ATTR_KEYS
from tracksdata.graph import RustWorkXGraph
from tracksdata.nodes import Mask, RegionPropsNodes
Expand Down Expand Up @@ -334,3 +336,89 @@ def test_regionprops_multiprocessing_isolation() -> None:
"""Test that multiprocessing options don't affect subsequent tests."""
# Verify default n_workers is 1
assert get_options().n_workers == 1


def test_regionprops_multichannel_intensity_array() -> None:
"""Multi-channel intensity props are stored as fixed-shape array attributes (#195)."""
graph = RustWorkXGraph()

labels = np.array([[[1, 1, 0], [1, 0, 2], [0, 2, 2]]], dtype=np.int32)
intensity = np.zeros((1, 3, 3, 2), dtype=np.float32)
intensity[..., 0] = [[10, 20, 0], [30, 0, 40], [0, 50, 60]]
intensity[..., 1] = [[1, 2, 0], [3, 0, 4], [0, 5, 6]]

operator = RegionPropsNodes(extra_properties=["intensity_max"])
operator.add_nodes(graph, labels=labels, intensity_image=intensity)

nodes_df = graph.node_attrs()
assert isinstance(nodes_df.schema["intensity_max"], pl.Array)
assert nodes_df["intensity_max"].dtype.shape == (2,)


def test_regionprops_tuple_property_stored_as_array() -> None:
"""Tuple-returning props (e.g. centroid_weighted) are normalized to array attributes (#191)."""
graph = RustWorkXGraph()

labels = np.array([[[1, 1, 0], [1, 0, 2], [0, 2, 2]]], dtype=np.int32)
intensity = np.array([[[10, 20, 0], [30, 0, 40], [0, 50, 60]]], dtype=np.float32)

operator = RegionPropsNodes(extra_properties=["centroid_weighted"])
operator.add_nodes(graph, labels=labels, intensity_image=intensity)

nodes_df = graph.node_attrs()
# tuple props must become fixed-shape arrays (not pl.List) so they are unpackable
assert isinstance(nodes_df.schema["centroid_weighted"], pl.Array)
unpacked = graph.node_attrs(unpack=True)
assert "centroid_weighted_0" in unpacked.columns
assert "centroid_weighted_1" in unpacked.columns


def test_regionprops_separate_arrays() -> None:
"""`separate_arrays=True` flattens array props into filterable scalar columns (#269)."""
graph = RustWorkXGraph()

labels = np.array([[[1, 1, 0], [1, 0, 2], [0, 2, 2]]], dtype=np.int32)
intensity = np.zeros((1, 3, 3, 2), dtype=np.float32)
intensity[..., 0] = [[10, 20, 0], [30, 0, 40], [0, 50, 60]]
intensity[..., 1] = [[1, 2, 0], [3, 0, 4], [0, 5, 6]]

operator = RegionPropsNodes(extra_properties=["intensity_max", "inertia_tensor"], separate_arrays=True)
operator.add_nodes(graph, labels=labels, intensity_image=intensity)

nodes_df = graph.node_attrs()
# 1D property -> single index suffix; 2D property -> row-major double index suffix
for col in ["intensity_max_0", "intensity_max_1", "inertia_tensor_0_0", "inertia_tensor_1_1"]:
assert col in nodes_df.columns
assert nodes_df[col].dtype == pl.Float64

# the array column itself must not exist when separated
assert "intensity_max" not in nodes_df.columns

# separated columns are now filterable
subgraph = graph.filter(NodeAttr("intensity_max_0") > 30)
filtered = subgraph.node_attrs()
assert len(filtered) == 1
assert filtered["intensity_max_0"][0] == 60.0


def test_regionprops_separate_arrays_matches_unpack() -> None:
"""`separate_arrays=True` column names match `node_attrs(unpack=True)`."""
labels = np.array([[[1, 1, 0], [1, 0, 2], [0, 2, 2]]], dtype=np.int32)
intensity = np.zeros((1, 3, 3, 2), dtype=np.float32)
intensity[..., 0] = [[10, 20, 0], [30, 0, 40], [0, 50, 60]]
intensity[..., 1] = [[1, 2, 0], [3, 0, 4], [0, 5, 6]]

extra = ["intensity_max", "inertia_tensor"]

sep_graph = RustWorkXGraph()
RegionPropsNodes(extra_properties=extra, separate_arrays=True).add_nodes(
sep_graph, labels=labels, intensity_image=intensity
)

packed_graph = RustWorkXGraph()
RegionPropsNodes(extra_properties=extra).add_nodes(packed_graph, labels=labels, intensity_image=intensity)

def _prop_cols(df: pl.DataFrame) -> set[str]:
return {c for c in df.columns if c.startswith(("intensity_max", "inertia_tensor"))}

assert _prop_cols(sep_graph.node_attrs()) == _prop_cols(packed_graph.node_attrs(unpack=True))
Loading