Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 86 additions & 10 deletions genesis/engine/sensors/raycaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,17 @@ class BVHContext:
# node loads instead of scattering over n_env identical trees. Recomputed on every rebuild.
shared_across_envs: bool = False

# Tree routing. ``env_bvh_idx`` (n_env,) gives the BVH batch each env casts against; ``batch_repr_env``
# (n_batches,) gives the env whose geometry builds each tree slot. Together they express all three layouts:
# per-env : n_batches=n_env, env_bvh_idx=arange, batch_repr_env=arange
# shared : n_batches=1, env_bvh_idx=0, batch_repr_env=[0]
# N grouped : n_batches=N, env_bvh_idx=group_id, batch_repr_env=one env per group (N distinct << n_env)
env_bvh_idx: "torch.Tensor | None" = None
batch_repr_env: "torch.Tensor | None" = None
# Compile-time thread-mapping selector for the cast: env-major (coalesced) when a tree is shared by a contiguous
# block of envs (shared / grouped), ray-major otherwise (distinct per-env trees).
env_major: bool = False


class RaycastContext(SharedSensorContext):
"""
Expand Down Expand Up @@ -101,6 +112,36 @@ def _compute_visual_raycast_mask(solver: "KinematicSolver") -> np.ndarray:
vface_vgeom_idx = qd_to_numpy(solver.vfaces_info.vgeom_idx)
return vgeom_enabled[vface_vgeom_idx].astype(np.int8)

@staticmethod
def _static_geometry_groups(solver: "RigidSolver"):
"""Group envs by identical static collision geometry, returning ``(n_groups, env_bvh_idx, batch_repr_env)``.

Heterogeneity in Genesis is expressed via per-env active-geom ranges in batched ``links_info`` (one variant
block per env, see ``_balanced_variant_mapping``). Two envs cast against the same geometry iff every link's
``(geom_start, geom_end)`` matches, so grouping by that per-env signature yields the distinct geometries:
a single group when the solver isn't batched (homogeneous), and N groups for N variants across the envs.
``env_bvh_idx[b]`` is env ``b``'s group; ``batch_repr_env[g]`` is the lowest env index in group ``g`` (its
verts/ranges build that tree). Caller guarantees, via ``shared_static_raycast_bvh``, that the static
geometry stays grouped as built (no per-env ``set_pos`` divergence), so the cheap range signature suffices.
"""
B = solver._B
if not solver._options.batch_links_info:
# Unbatched link info → every env shares one geom range → a single shared tree.
env_bvh_idx = torch.zeros(B, dtype=gs.tc_int, device=gs.device)
return 1, env_bvh_idx, torch.zeros(1, dtype=gs.tc_int, device=gs.device)
gstart = qd_to_torch(solver.links_info.geom_start) # (n_links, B)
gend = qd_to_torch(solver.links_info.geom_end)
sig = torch.cat([gstart, gend], dim=0).t().contiguous() # (B, 2*n_links) per-env geometry signature
uniq, inv = torch.unique(sig, dim=0, return_inverse=True)
n_groups = int(uniq.shape[0])
env_bvh_idx = inv.to(gs.tc_int)
# Lowest env index per group (deterministic; any group member is a valid representative since the group is
# geometry-identical, but a stable choice keeps builds reproducible).
env_ids = torch.arange(B, dtype=torch.int64, device=inv.device)
repr_env = torch.full((n_groups,), B, dtype=torch.int64, device=inv.device)
repr_env.scatter_reduce_(0, inv.to(torch.int64), env_ids, reduce="amin", include_self=True)
return n_groups, env_bvh_idx, repr_env.to(gs.tc_int)

def activate(self):
"""
Build the per-(solver, mesh-type) BVHs on first activation; idempotent. Rigid solvers get a collision BVH
Expand All @@ -120,16 +161,39 @@ def activate(self):
maybe_static = all(link.is_fixed for link in solver.links)
if isinstance(solver, RigidSolver):
n_faces = solver.faces_info.geom_idx.shape[0]
aabb = AABB(n_batches=n_envs, n_aabbs=n_faces)
# A static collision mesh whose geometry is env-identical (or shared by groups of envs) produces
# bit-identical per-env BVHs, so one tree per distinct geometry serves its envs — dropping the
# n_envs-fold replication of nodes / aabbs / morton codes / radix-sort scratch, which dominates GPU
# memory for a high-poly terrain. Opt-in (shared_static_raycast_bvh) because env-identity is a
# runtime property (per-env set_pos on a fixed body diverges geometry after build) that can't be
# auto-proven at allocation time; the flag is a caller guarantee the static geometry stays grouped
# as built. With N distinct geometries across n_envs (N << n_envs, e.g. a terrain curriculum), the
# BVH collapses from n_envs trees to N. Without the flag, the per-env path keeps the runtime
# shared-across-envs detection below.
if maybe_static and solver._options.shared_static_raycast_bvh:
n_groups, env_bvh_idx, batch_repr_env = self._static_geometry_groups(solver)
else:
n_groups = n_envs
env_bvh_idx = torch.arange(n_envs, dtype=gs.tc_int, device=gs.device)
batch_repr_env = env_bvh_idx
aabb = AABB(n_batches=n_groups, n_aabbs=n_faces)
bvh = LBVH(aabb, max_n_query_result_per_aabb=0, n_radix_sort_groups=64)
self._bvh_contexts.append(BVHContext(solver, bvh, aabb, None, maybe_static))
entry = BVHContext(solver, bvh, aabb, None, maybe_static)
entry.env_bvh_idx = env_bvh_idx
entry.batch_repr_env = batch_repr_env
entry.env_major = n_groups < n_envs
self._bvh_contexts.append(entry)
n_vfaces = solver.vfaces_info.vgeom_idx.shape[0]
if n_vfaces > 0:
mask = self._compute_visual_raycast_mask(solver)
if mask.any():
aabb = AABB(n_batches=n_envs, n_aabbs=n_vfaces)
bvh = LBVH(aabb, max_n_query_result_per_aabb=0, n_radix_sort_groups=64)
self._bvh_contexts.append(BVHContext(solver, bvh, aabb, mask, maybe_static))
entry = BVHContext(solver, bvh, aabb, mask, maybe_static)
# Visual BVH stays per-env (the shared-static optimization targets the collision terrain).
entry.env_bvh_idx = torch.arange(n_envs, dtype=gs.tc_int, device=gs.device)
entry.batch_repr_env = entry.env_bvh_idx
self._bvh_contexts.append(entry)

# Lazily watch each static BVH (collision or visual) for GEOMETRY changes. ``update`` polls its
# rebuild_subscriber so an explicit set_pos / set_quat / set_vverts on the otherwise-immovable geometry forces
Expand Down Expand Up @@ -168,6 +232,7 @@ def update(self):
free_verts_state=entry.solver.free_verts_state,
fixed_verts_state=entry.solver.fixed_verts_state,
links_info=entry.solver.links_info,
batch_repr_env=entry.batch_repr_env,
static_rigid_sim_config=entry.solver._static_rigid_sim_config,
aabb_state=entry.aabb,
)
Expand All @@ -189,19 +254,29 @@ def update(self):
)
entry.bvh.build()
entry.needs_rebuild = False
# The per-env trees are bit-identical - so the cast can read one shared copy (batch 0) - exactly when the
# per-face AABBs they are built from match across envs. Comparing that build input directly (rather than a
# proxy like link poses or raw verts) captures per-env pose, batched verts, and any per-env geometry
# selection at once - so it stays correct whatever feeds the AABBs. A single-env solver gains nothing.
if entry.maybe_static and entry.aabb.n_batches > 1:
# Tree routing. A grouped/shared allocation (n_batches < n_envs) fixed its routing at activate time and
# is left as-is. For the per-env allocation (n_batches == n_envs) we recover the runtime shared-geometry
# optimization: if the freshly-built per-env trees are bit-identical, route every env to batch 0 (a
# coalesced read, no extra memory). Comparing the built AABBs directly captures per-env pose, batched
# verts and per-env geometry selection at once. Only static geometry can be shared this way.
n_envs = entry.solver._B
if entry.maybe_static and entry.aabb.n_batches == n_envs and n_envs > 1:
aabb_min = qd_to_torch(entry.aabb.aabbs.min)
aabb_max = qd_to_torch(entry.aabb.aabbs.max)
entry.shared_across_envs = bool(
torch.equal(aabb_min, aabb_min[:1].expand_as(aabb_min))
and torch.equal(aabb_max, aabb_max[:1].expand_as(aabb_max))
)
if entry.shared_across_envs:
entry.env_bvh_idx = torch.zeros(n_envs, dtype=gs.tc_int, device=gs.device)
entry.env_major = True
else:
entry.env_bvh_idx = torch.arange(n_envs, dtype=gs.tc_int, device=gs.device)
entry.env_major = False
else:
entry.shared_across_envs = False
# Grouped (N<n_envs, routing set at activate) or a single shared tree (n_batches==1, shared by
# construction). The non-static per-env case keeps its activate-time identity routing.
entry.shared_across_envs = entry.aabb.n_batches < n_envs or n_envs <= 1

def reset(self, envs_idx):
# A reset may change otherwise-static geometry (re-randomized terrain, teleported obstacles), so force every
Expand Down Expand Up @@ -390,10 +465,11 @@ def _update_raw_data(
shared_metadata.sensor_cache_offsets,
shared_metadata.sensor_point_offsets,
shared_metadata.sensor_point_counts,
entry.env_bvh_idx,
raw_data_T,
gs.EPS,
i > 0,
entry.shared_across_envs,
entry.env_major,
)
if entry.raycast_mask is None:
kernel_cast_rays(
Expand Down
16 changes: 16 additions & 0 deletions genesis/options/solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,18 @@ class RigidOptions(Options):
Broadphase traversal strategy. ``SAP`` (sweep-and-prune) or ``ALL_VS_ALL`` (parallel pair iteration). Defaults
to ``None`` (auto: ``SAP`` on CPU or when hibernation/heterogeneous entities are enabled, ``ALL_VS_ALL`` on GPU
otherwise). See ``gs.broadphase_traversal`` for details on each strategy.
shared_static_raycast_bvh : bool, optional
Optimization for raycast sensors (``Raycaster`` / ``DepthCamera``) over large static scenes. When True, the
raycast BVH built over the solver's static (fully fixed) collision geometry is allocated **per distinct
geometry** rather than per env: envs with identical static geometry share one tree (each reads the tree for
its geometry). A homogeneous scene collapses to a single shared tree; a heterogeneous scene with N variants
across the envs (N << n_envs, e.g. a terrain curriculum) collapses from n_envs trees to N. For a high-poly
static terrain the per-env tree replication is the dominant raycast memory cost, so this cuts total GPU
memory by roughly n_envs/N and lifts the env-count ceiling. It is a caller guarantee that each env's static
collision geometry stays as built: do not enable it if you give fixed entities per-env poses (e.g. per-env
``set_pos`` on a fixed body) after build, as the shared tree would not reflect the divergence. Defaults to
False, which keeps per-env trees and the runtime "shared across envs" auto-detection. Only affects
raycasting, not physics.

Warning
-------
Expand All @@ -504,6 +516,10 @@ class RigidOptions(Options):
batch_joints_info: StrictBool = False
batch_dofs_info: StrictBool = False

# raycast: share one static-geometry BVH across envs instead of one per env (caller guarantees env-identical
# static collision geometry). See the class docstring.
shared_static_raycast_bvh: StrictBool = False

# constraint solver
constraint_solver: gs.constraint_solver = gs.constraint_solver.Newton
iterations: PositiveInt = 50
Expand Down
Loading
Loading