Genesis-Embodied-AI · Kashu7100 · Jun 7, 2026 · chatgpt-codex-connector · Jun 9, 2026
@@ -244,6 +244,12 @@ class RaycasterSharedMetadata(KinematicSensorMetadataMixin, SimpleSensorMetadata
     sensor_cache_offsets: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int)
     sensor_point_offsets: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int)
     sensor_point_counts: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int)
+    # Size (in cache slots) of each sensor's leading point region: num_rays*3 when return_points, else 0. The cast
+    # kernel adds this to the cache offset to locate the distance block, so a distances-only sensor packs distances
+    # at the front of its (4x smaller) cache block with no gap.
+    sensor_point_region: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int)
+    # 1 when the sensor stores per-ray hit points, 0 for distances-only. Gates the point writes in write_ray_hit.
+    sensor_return_points: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int)
 
 
 class RaycasterReturnType(NamedTuple):
@@ -299,16 +305,25 @@ def build(self):
         num_rays = math.prod(self._options.pattern.return_shape)
         self._shared_metadata.sensors_ray_start_idx.append(self._shared_metadata.total_n_rays)
 
-        # These fields are used to properly index into the big cache tensor in kernel_cast_rays
+        # These fields are used to properly index into the big cache tensor in kernel_cast_rays. The offset of the
+        # next sensor's block is this sensor's start plus its own cache size — a running cumulative sum, so sensors
+        # with different cache sizes (e.g. a points lidar next to a distances-only depth camera) pack correctly.
+        prev_offset = int(self._shared_metadata.sensor_cache_offsets[-1].item())
         self._shared_metadata.sensor_cache_offsets = concat_with_tensor(
-            self._shared_metadata.sensor_cache_offsets, self._cache_size * (self._idx + 1)
+            self._shared_metadata.sensor_cache_offsets, prev_offset + self._cache_size
         )
         self._shared_metadata.sensor_point_offsets = concat_with_tensor(
             self._shared_metadata.sensor_point_offsets, self._shared_metadata.total_n_rays
         )
         self._shared_metadata.sensor_point_counts = concat_with_tensor(
             self._shared_metadata.sensor_point_counts, num_rays
         )
+        self._shared_metadata.sensor_point_region = concat_with_tensor(
+            self._shared_metadata.sensor_point_region, num_rays * 3 if self._options.return_points else 0
+        )
+        self._shared_metadata.sensor_return_points = concat_with_tensor(
+            self._shared_metadata.sensor_return_points, int(self._options.return_points)
+        )
         self._shared_metadata.total_n_rays += num_rays
 
         self._shared_metadata.points_to_sensor_idx = concat_with_tensor(
@@ -333,8 +348,20 @@ def build(self):
 
     def _get_return_format(self) -> tuple[tuple[int, ...], ...]:
         shape = self._options.pattern.return_shape
+        # Distances-only: drop the (*shape, 3) points field so the cache holds just the distances.
+        if not self._options.return_points:
+            return (shape,)
         return ((*shape, 3), shape)
 
+    def _get_formatted_data(self, tensor: torch.Tensor, envs_idx=None):
+        # Keep the RaycasterData(points, distances) NamedTuple contract regardless of return_points: when points are
+        # disabled the base class sees a single return field and would hand back a bare distances tensor, so re-wrap
+        # it with points=None. Consumers that only read .distances are unaffected.
+        data = super()._get_formatted_data(tensor, envs_idx)
+        if self._options.return_points:
+            return data
+        return RaycasterReturnType(points=None, distances=data)
+
     @classmethod
     def _get_cache_dtype(cls) -> torch.dtype:
         return gs.tc_float
@@ -390,6 +417,8 @@ def _update_raw_data(
                 shared_metadata.sensor_cache_offsets,
                 shared_metadata.sensor_point_offsets,
                 shared_metadata.sensor_point_counts,
+                shared_metadata.sensor_point_region,
+                shared_metadata.sensor_return_points,
                 raw_data_T,
                 gs.EPS,
                 i > 0,

@@ -530,6 +530,12 @@ class Raycaster(KinematicSensorOptionsMixin["RaycasterSensor"], SimpleSensorOpti
         The value to return for no hit. Defaults to max_range if not specified.
     return_world_frame : bool, optional
         Whether to return points in the world frame. Defaults to False (local frame).
+    return_points : bool, optional
+        Whether to compute and store the per-ray hit points (the ``points`` field). Defaults to True.
+        Set False for a distances-only sensor (e.g. a depth image whose consumer reads only
+        ``.distances``): the output cache then holds just the ``H*W`` distances instead of
+        ``H*W*(3+1)``, cutting the cache memory and per-ray write bandwidth ~4x. ``read().points``
+        is ``None`` when disabled.
     debug_sphere_radius: float, optional
         The radius of each debug sphere drawn in the scene. Defaults to 0.02.
     debug_ray_start_color: array-like[float, float, float, float], optional
@@ -543,6 +549,7 @@ class Raycaster(KinematicSensorOptionsMixin["RaycasterSensor"], SimpleSensorOpti
     max_range: PositiveFloat = 20.0
     no_hit_value: float | None = None
     return_world_frame: StrictBool = False
+    return_points: StrictBool = True
 
     debug_sphere_radius: PositiveFloat = 0.02
     debug_ray_start_color: Vec4FType = (0.5, 0.5, 1.0, 1.0)

@@ -508,6 +508,7 @@ def write_ray_hit(
     i_p_dist: int,
     is_world_frame: qd.types.ndarray(ndim=1),
     no_hit_values: qd.types.ndarray(ndim=1),
+    return_points_i: int,
     output_hits: qd.types.ndarray(ndim=2),
     eps: float,
     is_merge: qd.template(),
@@ -518,26 +519,31 @@ def write_ray_hit(
     no_hit_value), initializing the cache. When True the function only writes when it found a closer hit than what
     is already in the cache, so multiple BVH casts can be composed by chaining calls (first with is_merge=False,
     subsequent with is_merge=True) into the same output buffer with no scratch storage.
+
+    `return_points_i` (1/0) gates the per-ray hit-point writes: a distances-only sensor (0) has no point region in
+    its cache block, so i_p_dist already points at the front of the block and the point stores are skipped.
     """
     if hit_face >= 0 and (not is_merge or hit_distance < output_hits[i_p_dist, i_b]):
-        # Store distance at: cache_offset + (num_points_in_sensor * 3) + point_idx_in_sensor
+        # Store distance at: cache_offset + point_region + point_idx_in_sensor
         output_hits[i_p_dist, i_b] = hit_distance
 
-        hit_point = qd.math.vec3(0.0, 0.0, 0.0)
-        if is_world_frame[i_s]:
-            hit_point = ray_start_world + hit_distance * ray_direction_world
-        else:
-            # Local frame output along provided local ray direction
-            hit_point = hit_distance * gu.qd_normalize(ray_dir_local, eps)
-        # Store points at: cache_offset + point_idx_in_sensor * 3
-        output_hits[i_p_offset + i_p_sensor * 3 + 0, i_b] = hit_point.x
-        output_hits[i_p_offset + i_p_sensor * 3 + 1, i_b] = hit_point.y
-        output_hits[i_p_offset + i_p_sensor * 3 + 2, i_b] = hit_point.z
+        if return_points_i != 0:
+            hit_point = qd.math.vec3(0.0, 0.0, 0.0)
+            if is_world_frame[i_s]:
+                hit_point = ray_start_world + hit_distance * ray_direction_world
+            else:
+                # Local frame output along provided local ray direction
+                hit_point = hit_distance * gu.qd_normalize(ray_dir_local, eps)
+            # Store points at: cache_offset + point_idx_in_sensor * 3
+            output_hits[i_p_offset + i_p_sensor * 3 + 0, i_b] = hit_point.x
+            output_hits[i_p_offset + i_p_sensor * 3 + 1, i_b] = hit_point.y
+            output_hits[i_p_offset + i_p_sensor * 3 + 2, i_b] = hit_point.z
     elif not is_merge:
         # No hit
-        output_hits[i_p_offset + i_p_sensor * 3 + 0, i_b] = 0.0
-        output_hits[i_p_offset + i_p_sensor * 3 + 1, i_b] = 0.0
-        output_hits[i_p_offset + i_p_sensor * 3 + 2, i_b] = 0.0
+        if return_points_i != 0:
+            output_hits[i_p_offset + i_p_sensor * 3 + 0, i_b] = 0.0
+            output_hits[i_p_offset + i_p_sensor * 3 + 1, i_b] = 0.0
+            output_hits[i_p_offset + i_p_sensor * 3 + 2, i_b] = 0.0
         output_hits[i_p_dist, i_b] = no_hit_values[i_s]
 
 
@@ -560,6 +566,8 @@ def kernel_cast_rays(
     sensor_cache_offsets: qd.types.ndarray(ndim=1),  # [n_sensors] - cache start index for each sensor
     sensor_point_offsets: qd.types.ndarray(ndim=1),  # [n_sensors] - point start index for each sensor
     sensor_point_counts: qd.types.ndarray(ndim=1),  # [n_sensors] - number of points for each sensor
+    sensor_point_region: qd.types.ndarray(ndim=1),  # [n_sensors] - cache slots for the point block (count*3 or 0)
+    sensor_return_points: qd.types.ndarray(ndim=1),  # [n_sensors] - 1 to store hit points, 0 for distances-only
     output_hits: qd.types.ndarray(ndim=2),  # [total_cache_size, n_env]
     eps: float,
     is_merge: qd.template(),
@@ -618,7 +626,7 @@ def kernel_cast_rays(
 
         i_p_sensor = i_p - sensor_point_offsets[i_s]
         i_p_offset = sensor_cache_offsets[i_s]
-        i_p_dist = i_p_offset + sensor_point_counts[i_s] * 3 + i_p_sensor
+        i_p_dist = i_p_offset + sensor_point_region[i_s] + i_p_sensor
         write_ray_hit(
             hit_face,
             hit_distance,
@@ -632,6 +640,7 @@ def kernel_cast_rays(
             i_p_dist,
             is_world_frame,
             no_hit_values,
+            sensor_return_points[i_s],
             output_hits,
             eps,
             is_merge,
@@ -657,6 +666,8 @@ def kernel_cast_rays_visual(
     sensor_cache_offsets: qd.types.ndarray(ndim=1),
     sensor_point_offsets: qd.types.ndarray(ndim=1),
     sensor_point_counts: qd.types.ndarray(ndim=1),
+    sensor_point_region: qd.types.ndarray(ndim=1),
+    sensor_return_points: qd.types.ndarray(ndim=1),
     output_hits: qd.types.ndarray(ndim=2),
     eps: float,
     is_merge: qd.template(),
@@ -702,7 +713,7 @@ def kernel_cast_rays_visual(
 
         i_p_sensor = i_p - sensor_point_offsets[i_s]
         i_p_offset = sensor_cache_offsets[i_s]
-        i_p_dist = i_p_offset + sensor_point_counts[i_s] * 3 + i_p_sensor
+        i_p_dist = i_p_offset + sensor_point_region[i_s] + i_p_sensor
         write_ray_hit(
             hit_face,
             hit_distance,
@@ -716,6 +727,7 @@ def kernel_cast_rays_visual(
             i_p_dist,
             is_world_frame,
             no_hit_values,
+            sensor_return_points[i_s],
             output_hits,
             eps,
             is_merge,

@@ -1174,6 +1174,46 @@ def test_raycaster_hits(show_viewer, n_envs):
     assert_allclose(grid_distances, grid_distances_ref, tol=1e-3)
 
 
+@pytest.mark.required
+@pytest.mark.parametrize("n_envs", [0, 2])
+def test_raycaster_return_points_false(show_viewer, n_envs):
+    # A distances-only DepthCamera (return_points=False) must yield exactly the same hit distances as the default
+    # points+distances sensor, while skipping the per-ray hit-point storage so read().points is None. A points-on
+    # sensor sharing the scene must be unaffected, which exercises the cumulative cache-offset packing for two
+    # sensors of differing per-sensor cache sizes.
+    scene = gs.Scene(show_viewer=show_viewer)
+    scene.add_entity(gs.morphs.Plane())
+    scene.add_entity(gs.morphs.Box(size=(0.4, 0.4, 0.4), pos=(0.0, 0.0, 0.5), fixed=True))
+
+    common = dict(pos_offset=(0.0, 0.0, 2.0))
+    cam_pts = scene.add_sensor(
+        gs.sensors.DepthCamera(
+            pattern=gs.sensors.raycaster.DepthCameraPattern(res=(8, 8)), return_points=True, **common
+        )
+    )
+    cam_nopts = scene.add_sensor(
+        gs.sensors.DepthCamera(
+            pattern=gs.sensors.raycaster.DepthCameraPattern(res=(8, 8)), return_points=False, **common
+        )
+    )
+    scene.build(n_envs=n_envs)
+    scene.step()
+
+    data_pts = cam_pts.read()
+    data_nopts = cam_nopts.read()
+
+    # distances-only keeps the RaycasterData NamedTuple but with points=None; distances stay finite.
+    assert data_nopts.points is None
+    assert data_pts.points is not None
+    assert torch.isfinite(data_nopts.distances).all()
+    # Storing points must not change the computed distances at all.
+    assert torch.equal(data_pts.distances, data_nopts.distances)
+    # The points-on sensor stays self-consistent (||hit_point|| == distance for rays that hit) even though a
+    # distances-only sensor is packed after it in the shared cache.
+    hit = data_pts.distances < cam_pts._options.max_range
+    assert_allclose(data_pts.points.norm(dim=-1)[hit], data_pts.distances[hit], tol=1e-4)
+
+
 @pytest.mark.required
 @pytest.mark.parametrize("n_envs", [0, 2])
 @pytest.mark.parametrize("kin_raycastable", [True, False])