Genesis-Embodied-AI · hughperkins · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/docs/source/user_guide/tensor.md b/docs/source/user_guide/tensor.md
@@ -5,9 +5,9 @@
 
 # Tensors
 
-Quadrants offers two underlying tensor implementations, `qd.field` and
-`qd.ndarray`. They have different runtime/compile-time trade-offs, and
-different physical memory layouts can suit different kernels.
+Quadrants offers two underlying tensor implementations, [`qd.field`](tensor_types.md#global-field)
+and [`qd.ndarray`](tensor_types.md#ndarray). They have different runtime/compile-time
+trade-offs, and different physical memory layouts can suit different kernels.
 
 The tensor API lets you pick both the **backend** and (in a future
 release) the **physical layout** on a per-tensor basis at allocation time.
@@ -138,5 +138,38 @@ The returned object is interchangeable with its direct equivalent:
 This mirrors the one-liner Genesis already uses to switch backends; the
 helper just makes the pattern first-class.
 
+## Gradients
+
+`needs_grad=True` works on every tensor factory and on every
+backend, by passing the keyword through to the underlying
+`qd.field` / `qd.ndarray` call:
+
+```python
+import quadrants as qd
+
+qd.init(arch=qd.x64)
+
+# Field-backed primal + grad.
+a = qd.tensor(qd.f32, shape=(4,), needs_grad=True)
+assert a.grad is not None
+
+# Same on the ndarray backend.
+b = qd.tensor(qd.f32, shape=(4,), backend=qd.Backend.NDARRAY, needs_grad=True)
+assert b.grad is not None
+
+# Kernels write through canonical indices on both primal and grad.
+@qd.kernel
+def write_grad(x: qd.template()):
+    for i in range(4):
+        x.grad[i] = i * 100.0
+
+write_grad(a)
+print(a.grad.to_numpy())   # [0., 100., 200., 300.]
+```
+
+Gradient buffers always share the canonical shape of the primal, on both
+backends. The `needs_grad` keyword also passes through `qd.tensor_vec` and
+`qd.tensor_mat` for compound element types.
+
 Subsequent releases will add a `layout=` keyword for per-tensor physical-memory
 layout.
diff --git a/python/quadrants/lang/matrix.py b/python/quadrants/lang/matrix.py
@@ -975,7 +975,7 @@ def field(
 
     @classmethod
     @python_scope
-    def ndarray(cls, n, m, dtype, shape):
+    def ndarray(cls, n, m, dtype, shape, needs_grad=False):
         """Defines a Quadrants ndarray with matrix elements.
         This function must be called in Python scope, and after `qd.init` is called.
 
@@ -984,6 +984,11 @@ def ndarray(cls, n, m, dtype, shape):
             m (int): Number of columns of the matrix.
             dtype (DataType): Data type of each value.
             shape (Union[int, tuple[int]]): Shape of the ndarray.
+            needs_grad (bool, optional): If True, allocate a companion grad
+                ndarray of the same shape and dtype, accessible via
+                ``arr.grad``. Requires ``dtype`` to be a real (floating-point)
+                type. Defaults to False. Silently ignored on the python
+                backend (matches the scalar ``qd.ndarray`` behaviour).
 
         Example::
 
@@ -1001,7 +1006,15 @@ def ndarray(cls, n, m, dtype, shape):
             batch_ndim = len(shape)
             shape = (*shape, m, n)
             return py_tensor.create_tensor(shape, dtype_to_torch_dtype(dtype), batch_ndim=batch_ndim)
-        return MatrixNdarray(n, m, dtype, shape)
+        arr = MatrixNdarray(n, m, dtype, shape)
+        if needs_grad:
+            dt = cook_dtype(dtype)
+            if not qd_python_core.is_real(dt):
+                raise QuadrantsRuntimeError(
+                    f"{dtype} is not supported for Matrix.ndarray with needs_grad=True; element dtype must be real (floating-point)."
+                )
+            arr._set_grad(cls.ndarray(n, m, dtype, shape, needs_grad=False))
+        return arr
 
     @classmethod
     def tensor(cls, n, m, dtype, shape, **kwargs):
@@ -1186,13 +1199,18 @@ def field(cls, n, dtype, *args, **kwargs):
 
     @classmethod
     @python_scope
-    def ndarray(cls, n, dtype, shape):
+    def ndarray(cls, n, dtype, shape, needs_grad=False):
         """Defines a Quadrants ndarray with vector elements.
 
         Args:
             n (int): Size of the vector.
             dtype (DataType): Data type of each value.
             shape (Union[int, tuple[int]]): Shape of the ndarray.
+            needs_grad (bool, optional): If True, allocate a companion grad
+                ndarray of the same shape and dtype, accessible via
+                ``arr.grad``. Requires ``dtype`` to be a real (floating-point)
+                type. Defaults to False. Silently ignored on the python
+                backend (matches the scalar ``qd.ndarray`` behaviour).
 
         Example:
             The code below shows how a Quadrants ndarray with vector elements can be declared and defined::
@@ -1208,7 +1226,15 @@ def ndarray(cls, n, dtype, shape):
             batch_ndim = len(shape)
             shape = (*shape, n)
             return py_tensor.create_tensor(shape, dtype_to_torch_dtype(dtype), batch_ndim=batch_ndim)
-        return VectorNdarray(n, dtype, shape)
+        arr = VectorNdarray(n, dtype, shape)
+        if needs_grad:
+            dt = cook_dtype(dtype)
+            if not qd_python_core.is_real(dt):
+                raise QuadrantsRuntimeError(
+                    f"{dtype} is not supported for Vector.ndarray with needs_grad=True; element dtype must be real (floating-point)."
+                )
+            arr._set_grad(cls.ndarray(n, dtype, shape, needs_grad=False))
+        return arr
 
     @classmethod
     def tensor(cls, n, dtype, shape, **kwargs):

diff --git a/tests/python/test_api.py b/tests/python/test_api.py
@@ -40,6 +40,7 @@ def _get_expected_matrix_apis():
         "outer_product",
         "rows",
         "sum",
+        "tensor",
         "to_list",
         "to_numpy",
         "trace",

diff --git a/tests/python/test_tensor_grad.py b/tests/python/test_tensor_grad.py
@@ -0,0 +1,199 @@
+"""Tests for ``needs_grad=True`` on the tensor factories.
+
+The factories pass ``needs_grad`` through to ``qd.field`` / ``qd.ndarray``
+via ``**kwargs``; ``qd.Vector.ndarray`` / ``qd.Matrix.ndarray`` accept
+``needs_grad`` and allocate a companion grad ndarray of matching shape and
+element type (real-only). These tests lock that behaviour as part of the
+public contract on every (factory, backend) combination.
+
+Behavioural tests are parametrized over both backends. The int-dtype
+rejection check is NDARRAY-only because the FIELD path goes through the
+older Vector/Matrix.field code which raises a different error (out of
+scope for this branch's contract).
+"""
+
+import pytest
+
+import quadrants as qd
+
+from tests import test_utils
+
+BACKENDS = [qd.Backend.FIELD, qd.Backend.NDARRAY]
+BACKEND_IDS = ["field", "ndarray"]
+
+
+# ----------------------------------------------------------------------------
+# Scalar qd.tensor()
+# ----------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("backend", BACKENDS, ids=BACKEND_IDS)
+@test_utils.test(arch=qd.cpu)
+def test_tensor_needs_grad_allocates_grad(backend):
+    a = qd.tensor(qd.f32, shape=(4,), backend=backend, needs_grad=True)
+    assert a.grad is not None
+    assert a.grad.shape == a.shape
+
+
+@pytest.mark.parametrize("backend", BACKENDS, ids=BACKEND_IDS)
+@test_utils.test(arch=qd.cpu)
+def test_tensor_grad_kernel_roundtrip(backend):
+    """Write to primal and grad through a kernel; read back canonically."""
+    a = qd.tensor(qd.f32, shape=(4,), backend=backend, needs_grad=True)
+
+    if backend is qd.Backend.FIELD:
+
+        @qd.kernel
+        def write_primal(x: qd.template()):
+            for i in range(4):
+                x[i] = i * 10.0
+
+        @qd.kernel
+        def write_grad(x: qd.template()):
+            for i in range(4):
+                x.grad[i] = i * 100.0
+
+    else:
+
+        @qd.kernel
+        def write_primal(x: qd.types.ndarray()):
+            for i in range(4):
+                x[i] = i * 10.0
+
+        @qd.kernel
+        def write_grad(x: qd.types.ndarray()):
+            for i in range(4):
+                x.grad[i] = i * 100.0
+
+    write_primal(a)
+    write_grad(a)
+    assert list(a.to_numpy()) == [0.0, 10.0, 20.0, 30.0]
+    assert list(a.grad.to_numpy()) == [0.0, 100.0, 200.0, 300.0]
+
+
+# ----------------------------------------------------------------------------
+# Vector / Matrix tensor factories
+# ----------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("backend", BACKENDS, ids=BACKEND_IDS)
+@test_utils.test(arch=qd.cpu)
+def test_tensor_vec_needs_grad_allocates_grad(backend):
+    v = qd.Vector.tensor(3, qd.f32, shape=(2,), backend=backend, needs_grad=True)
+    assert v.grad is not None
+    assert tuple(v.grad.shape) == tuple(v.shape)
+
+
+@pytest.mark.parametrize("backend", BACKENDS, ids=BACKEND_IDS)
+@test_utils.test(arch=qd.cpu)
+def test_tensor_mat_needs_grad_allocates_grad(backend):
+    m = qd.Matrix.tensor(2, 2, qd.f32, shape=(3,), backend=backend, needs_grad=True)
+    assert m.grad is not None
+    assert tuple(m.grad.shape) == tuple(m.shape)
+
+
+@pytest.mark.parametrize("backend", BACKENDS, ids=BACKEND_IDS)
+@test_utils.test(arch=qd.cpu)
+def test_tensor_vec_grad_kernel_roundtrip(backend):
+    v = qd.Vector.tensor(3, qd.f32, shape=(2,), backend=backend, needs_grad=True)
+
+    if backend is qd.Backend.FIELD:
+
+        @qd.kernel
+        def write_primal(x: qd.template()):
+            for i in range(2):
+                for j in qd.static(range(3)):
+                    x[i][j] = i * 10.0 + j
+
+        @qd.kernel
+        def write_grad(x: qd.template()):
+            for i in range(2):
+                for j in qd.static(range(3)):
+                    x.grad[i][j] = i * 100.0 + j * 10.0
+
+    else:
+
+        @qd.kernel
+        def write_primal(x: qd.types.ndarray()):
+            for i in range(2):
+                for j in qd.static(range(3)):
+                    x[i][j] = i * 10.0 + j
+
+        @qd.kernel
+        def write_grad(x: qd.types.ndarray()):
+            for i in range(2):
+                for j in qd.static(range(3)):
+                    x.grad[i][j] = i * 100.0 + j * 10.0
+
+    write_primal(v)
+    write_grad(v)
+    primal = v.to_numpy()
+    grad = v.grad.to_numpy()
+    assert primal[0, 0] == 0.0 and primal[1, 2] == 12.0
+    assert grad[0, 0] == 0.0 and grad[1, 2] == 120.0
+
+
+@pytest.mark.parametrize("backend", BACKENDS, ids=BACKEND_IDS)
+@test_utils.test(arch=qd.cpu)
+def test_tensor_mat_grad_kernel_roundtrip(backend):
+    m = qd.Matrix.tensor(2, 2, qd.f32, shape=(3,), backend=backend, needs_grad=True)
+
+    if backend is qd.Backend.FIELD:
+
+        @qd.kernel
+        def write_primal(x: qd.template()):
+            for i in range(3):
+                for r in qd.static(range(2)):
+                    for c in qd.static(range(2)):
+                        x[i][r, c] = i * 10.0 + r * 2.0 + c
+
+        @qd.kernel
+        def write_grad(x: qd.template()):
+            for i in range(3):
+                for r in qd.static(range(2)):
+                    for c in qd.static(range(2)):
+                        x.grad[i][r, c] = i * 100.0 + r * 20.0 + c * 10.0
+
+    else:
+
+        @qd.kernel
+        def write_primal(x: qd.types.ndarray()):
+            for i in range(3):
+                for r in qd.static(range(2)):
+                    for c in qd.static(range(2)):
+                        x[i][r, c] = i * 10.0 + r * 2.0 + c
+
+        @qd.kernel
+        def write_grad(x: qd.types.ndarray()):
+            for i in range(3):
+                for r in qd.static(range(2)):
+                    for c in qd.static(range(2)):
+                        x.grad[i][r, c] = i * 100.0 + r * 20.0 + c * 10.0
+
+    write_primal(m)
+    write_grad(m)
+    primal = m.to_numpy()
+    grad = m.grad.to_numpy()
+    assert primal[2, 1, 1] == 23.0
+    assert grad[2, 1, 1] == 230.0
+
+
+# ----------------------------------------------------------------------------
+# Negative path: int dtype + needs_grad on the NDARRAY backend.
+# Kept NDARRAY-only because the FIELD path raises through the legacy
+# create_field machinery with a different error class / message; the
+# Vector.ndarray / Matrix.ndarray rejection added in this branch is the
+# focused contract.
+# ----------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_tensor_vec_ndarray_needs_grad_rejects_int_dtype():
+    with pytest.raises(qd.QuadrantsRuntimeError, match="needs_grad"):
+        qd.Vector.tensor(3, qd.i32, shape=(2,), backend=qd.Backend.NDARRAY, needs_grad=True)
+
+
+@test_utils.test(arch=qd.cpu)
+def test_tensor_mat_ndarray_needs_grad_rejects_int_dtype():
+    with pytest.raises(qd.QuadrantsRuntimeError, match="needs_grad"):
+        qd.Matrix.tensor(2, 2, qd.i32, shape=(3,), backend=qd.Backend.NDARRAY, needs_grad=True)