Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
29177f3
added private metadata machinery
yfukai Feb 17, 2026
d8292f1
before adding private
yfukai Feb 17, 2026
cff5898
added private metadata view
yfukai Feb 17, 2026
68b01d4
renamed func
yfukai Feb 17, 2026
1ae2426
implementation of saving and loading dtypes as metadata
yfukai Feb 17, 2026
c50a07b
lint
yfukai Feb 17, 2026
e9bf28f
restricted dtype metadata to sqlgraph
yfukai Feb 18, 2026
9aa9c3a
udpated serialization strategies
yfukai Feb 18, 2026
7e61ac3
solved failing tests
yfukai Feb 18, 2026
e5968bf
added test for shape-less pl.Array (xfail)
yfukai Feb 18, 2026
b4acde3
working
yfukai Feb 19, 2026
cc55976
simplified code
yfukai Feb 19, 2026
e76d8e5
initial try
yfukai Feb 19, 2026
7bec369
saving private metadata
yfukai Feb 20, 2026
852f717
rustworkx reviewed
yfukai Feb 26, 2026
4c151bb
Merge branch 'from_other_roundtrip' into struct_attr
yfukai Feb 26, 2026
4af9904
working with clean code?
yfukai Feb 26, 2026
19055ab
Merge branch 'main' into struct_attr
JoOkuma Feb 27, 2026
6c69e76
updated impl
yfukai Apr 10, 2026
d9bee26
removed codex config wrongly added
yfukai Apr 10, 2026
cc0beb4
issue fixes
yfukai Apr 14, 2026
007d4c7
Merge branch 'main' into struct_attr
yfukai Apr 14, 2026
ffea2ec
rolled back unncessary change
yfukai Apr 14, 2026
0ad6c60
Merge remote-tracking branch 'upstream/main' into struct_attr
yfukai May 28, 2026
5e7331f
additional comments
yfukai May 28, 2026
7e07801
Merge branch 'main' into struct_attr
JoOkuma Jun 1, 2026
37f8fc9
Fix lint: remove whitespace from blank lines
JoOkuma Jun 1, 2026
1842c55
fixes
yfukai Jun 4, 2026
db35287
refactor aligning main
yfukai Jun 4, 2026
3759d9c
Restore scratch-table machinery and tests from main
yfukai Jun 5, 2026
f5b7cc0
Merge branch 'main' of https://github.com/royerlab/tracksdata into st…
yfukai Jun 8, 2026
0d76262
ignored the devcontaienr
yfukai Jun 8, 2026
87707d0
bugfix
yfukai Jun 8, 2026
4705182
Merge branch 'main' into struct_attr
JoOkuma Jun 10, 2026
ae4d563
Merge remote-tracking branch 'upstream/main' into struct_attr
JoOkuma Jun 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/tracksdata/_test/test_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,23 @@ def test_attr_expr_method_delegation() -> None:
assert result.to_list() == expected.to_list()


def test_attr_expr_struct_field_method_delegation() -> None:
df = pl.DataFrame({"s": [{"x": 1}, {"x": 2}, {"x": 3}]}, schema={"s": pl.Struct({"x": pl.Int64})})
expr = NodeAttr("s").struct.field("x")
result = expr.evaluate(df)
assert isinstance(expr, NodeAttr)
assert result.to_list() == [1, 2, 3]


def test_attr_comparison_struct_field() -> None:
df = pl.DataFrame({"s": [{"x": 1}, {"x": 2}, {"x": 1}]}, schema={"s": pl.Struct({"x": pl.Int64})})
comp = NodeAttr("s").struct.field("x") == 1
result = comp.to_attr().evaluate(df)
assert comp.column == "s"
assert comp.attr.field_path == ("x",)
assert result.to_list() == [True, False, True]


def test_attr_expr_complex_expression() -> None:
df = pl.DataFrame({"iou": [0.5, 0.7, 0.9], "distance": [10, 20, 30]})
expr = (1 - Attr("iou")) * Attr("distance")
Expand Down
105 changes: 99 additions & 6 deletions src/tracksdata/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,10 @@ def __init__(self, attr: "Attr", op: Callable, other: ExprInput | MembershipExpr
raise ValueError(f"Comparison operators are not supported for multiple columns. Found {columns}.")

self.attr = attr
self.column = columns[0]
# Prefer the explicitly tracked root_column so struct-field comparisons
# (e.g. `NodeAttr("m").struct.field("x") == 1`) record the parent storage
# column ("m"), letting backends remap to their physical layout via field_path.
self.column = attr.root_column if attr.root_column is not None else columns[0]
self.op = op

# casting numpy scalars to python scalars
Expand All @@ -144,14 +147,18 @@ def __init__(self, attr: "Attr", op: Callable, other: ExprInput | MembershipExpr
self.other = other

def __repr__(self) -> str:
return f"{type(self.attr).__name__}({self.column}) {_OPS_MATH_SYMBOLS[self.op]} {self.other}"
if self.attr.field_path:
column = ".".join([str(self.column), *self.attr.field_path])
else:
column = str(self.column)
return f"{type(self.attr).__name__}({column}) {_OPS_MATH_SYMBOLS[self.op]} {self.other}"

def to_attr(self) -> "Attr":
"""
Transform the comparison back to an [Attr][tracksdata.attrs.Attr] object.
This is useful for evaluating the expression on a DataFrame.
"""
return Attr(self.op(pl.col(self.column), self.other))
return Attr(self.op(self.attr.expr, self.other))

def __getattr__(self, attr: str) -> Any:
return getattr(self.to_attr(), attr)
Expand Down Expand Up @@ -198,6 +205,39 @@ def __ge__(self, other: ExprInput) -> "Attr": ...
def __rge__(self, other: ExprInput) -> "Attr": ...


class _StructNamespace:
"""Wrapper around polars struct namespace that preserves Attr semantics.

Polars' own ``Expr.struct.field(name)`` only updates the underlying expression;
it loses the parent column identity, which backends need to map a filter back
to its physical storage (e.g. SQL flat columns, dict lookups in rustworkx).
This wrapper proxies the namespace while threading ``root_column`` and
``field_path`` through ``.field(...)`` calls.
"""

def __init__(self, attr: "Attr") -> None:
self._attr = attr
self._namespace = attr.expr.struct

def field(self, name: str) -> "Attr":
# preserve_field_path keeps the existing root/path before appending the new field.
out = self._attr._wrap(self._namespace.field(name), preserve_field_path=True)
if isinstance(out, Attr):
out._append_field_path(name)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this if always be true according to the return value?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think it's guaranteed to be Attr before this block. Thanks!

return out

def __getattr__(self, name: str) -> Any:
namespace_attr = getattr(self._namespace, name)
if callable(namespace_attr):

@functools.wraps(namespace_attr)
def _wrapped(*args, **kwargs):
return self._attr._wrap(namespace_attr(*args, **kwargs))

return _wrapped
return namespace_attr


class Attr:
"""
A class to compose an attribute expression for attribute filtering or value evaluation.
Expand All @@ -222,30 +262,43 @@ class Attr:
def __init__(self, value: ExprInput) -> None:
self._inf_exprs = [] # expressions multiplied by +inf
self._neg_inf_exprs = [] # expressions multiplied by -inf
# Path-tracking for backend filters:
# - root_column: top-level column used to store the value.
# - field_path: nested struct path from that root column.
self._root_column: str | None = None
self._field_path: tuple[str, ...] = ()

if isinstance(value, str):
self.expr = pl.col(value)
self._root_column = value
elif isinstance(value, Attr):
self.expr = value.expr
# Copy infinity tracking from the other AttrExpr
self._inf_exprs = value.inf_exprs
self._neg_inf_exprs = value.neg_inf_exprs
self._root_column = value.root_column
self._field_path = value.field_path
elif isinstance(value, AttrComparison):
attr = value.to_attr()
self.expr = attr.expr
self._inf_exprs = attr.inf_exprs
self._neg_inf_exprs = attr.neg_inf_exprs
self._root_column = attr.root_column
self._field_path = attr.field_path
elif isinstance(value, Expr):
self.expr = value
else:
self.expr = pl.lit(value)

def _wrap(self, expr: ExprInput) -> Union["Attr", Any]:
def _wrap(self, expr: ExprInput, *, preserve_field_path: bool = False) -> Union["Attr", Any]:
if isinstance(expr, Expr):
result = Attr(expr)
result = type(self)(expr)
# Propagate infinity tracking
result._inf_exprs = self._inf_exprs.copy()
result._neg_inf_exprs = self._neg_inf_exprs.copy()
if preserve_field_path:
result._root_column = self._root_column
result._field_path = self._field_path
return result
return expr

Expand Down Expand Up @@ -377,6 +430,33 @@ def evaluate(self, df: DataFrame) -> Series:
def columns(self) -> list[str]:
return list(dict.fromkeys(self.expr_columns + self.inf_columns + self.neg_inf_columns))

@property
def root_column(self) -> str | None:
"""
Top-level column name from which this expression originates.

Examples
--------
`Attr("t").root_column == "t"`
`NodeAttr("measurements").struct.field("score").root_column == "measurements"`
"""
return self._root_column

@property
def field_path(self) -> tuple[str, ...]:
"""
Nested struct-field path relative to [root_column][tracksdata.attrs.Attr.root_column].

Empty tuple means no nested access.

Examples
--------
`Attr("t").field_path == ()`
`NodeAttr("measurements").struct.field("score").field_path == ("score",)`
`NodeAttr("meta").struct.field("det").struct.field("conf").field_path == ("det", "conf")`
"""
return self._field_path

@property
def inf_exprs(self) -> list["Attr"]:
"""Get the expressions multiplied by positive infinity."""
Expand Down Expand Up @@ -464,6 +544,9 @@ def __getattr__(self, attr: str) -> Any:
if attr.startswith("_"):
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")

if attr == "struct":
return _StructNamespace(self)

# To auto generate operator methods such as `.log()``
expr_attr = getattr(self.expr, attr)
if callable(expr_attr):
Expand All @@ -475,6 +558,12 @@ def _wrapped(*args, **kwargs):
return _wrapped
return expr_attr

def _append_field_path(self, field_name: str) -> None:
if self._root_column is None:
self._field_path = ()
else:
self._field_path = (*self._field_path, field_name)

def __repr__(self) -> str:
return f"Attr({self.expr})"

Expand Down Expand Up @@ -733,4 +822,8 @@ def polars_reduce_attr_comps(
# Return True for all rows by using the first column as a reference
raise ValueError("No attribute comparisons provided.")

return pl.reduce(reduce_op, [attr_comp.op(df[str(attr_comp.column)], attr_comp.other) for attr_comp in attr_comps])
# Apply each comparison against the full Attr expression rather than the bare
# column from the dataframe. This matters for struct-field accesses: the
# expression already drills into the struct (e.g. `pl.col("m").struct.field("x")`),
# while `df[column]` would yield the whole struct and the comparison would fail.
return pl.reduce(reduce_op, [attr_comp.op(attr_comp.attr.expr, attr_comp.other) for attr_comp in attr_comps])
26 changes: 26 additions & 0 deletions src/tracksdata/graph/_rustworkx_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,35 @@ def _create_filter_func(
) -> Callable[[dict[str, Any]], bool]:
LOG.info(f"Creating filter function for {attr_comps}")

def _extract_field_path(value: Any, field_path: tuple[str, ...]) -> Any:
# Rustworkx stores attributes as plain Python objects (typically dicts for
# struct attrs) rather than polars columns, so struct-field filters can't be
# pushed down into an expression — we walk the path manually here. We also
# accept sequence- and attribute-style access to keep this robust for users
# who pass nested dataclasses or tuples through the attr dict.
for field in field_path:
if value is None:
return None

if isinstance(value, dict):
value = value.get(field, None)
continue

try:
value = value[field]
except (KeyError, IndexError, TypeError):
try:
value = getattr(value, field)
except AttributeError:
return None

return value

def _filter(attrs: dict[str, Any]) -> bool:
for attr_op in attr_comps:
value = attrs.get(attr_op.column, schema[attr_op.column].default_value)
if attr_op.attr.field_path:
value = _extract_field_path(value, attr_op.attr.field_path)
if not attr_op.op(value, attr_op.other):
return False
return True
Expand Down
Loading
Loading