Genesis-Embodied-AI · v01dXYZ · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 9, 2026
@@ -107,12 +107,22 @@ def _skip_reason(reason):
 
 
 def is_mem_monitoring_supported():
+    if not sys.platform.startswith("linux"):
+        return False, "mem-monitoring only supported on linux"
+
     try:
-        assert sys.platform.startswith("linux")
         subprocess.check_output(["nvidia-smi"], stderr=subprocess.STDOUT, timeout=10)
         return True, None
-    except Exception as exc:  # platform or nvidia-smi unavailable
-        return False, exc
+    except Exception as exc:
+        pass
+
+    try:
+        subprocess.check_output(["rocm-smi"], stderr=subprocess.STDOUT, timeout=10)
+        return True, None
+    except Exception as exc:
+        pass
+
+    return False, "neither nvidia-smi nor rocm-smi executable found or runnable"
 
 
 def pytest_make_parametrize_id(config, val, argname):
@@ -224,19 +234,81 @@ def _get_gpu_indices():
         return tuple(map(int, cuda_visible_devices.split(",")))
 
     if sys.platform == "linux":
-        nvidia_gpu_interface_path = "/proc/driver/nvidia/gpus/"
-        try:
-            return tuple(range(len(os.listdir(nvidia_gpu_interface_path))))
-        except FileNotFoundError:
-            warnings.warn(
-                f"'{nvidia_gpu_interface_path}' is not available. Multi-GPU support will be disabled. This is expected "
-                "on WSL2 where the NVIDIA proc interface is not mounted.",
-                stacklevel=2,
-            )
+        if NVIDIA_GPU_INTERFACE_PATH.is_dir():
+            return tuple(i for i, _ in enumerate(NVIDIA_GPU_INTERFACE_PATH.iterdir()))
+        if KFD_SYSFS_PATH.is_dir():
+            return tuple(i for i, _ in enumerate(get_kfd_gpu_nodes_properties()))
+
+        warnings.warn(
+            f"'{NVIDIA_GPU_INTERFACE_PATH!s}' or '{KFD_SYSFS_PATH!s}' is not available. Multi-GPU support will be disabled. This is expected "
+            "on WSL2 where the NVIDIA proc interface is not mounted.",
+            stacklevel=2,
+        )
 
     return (0,)
 
 
+def parse_kfd_node_properties(kfd_properties_str: str):
+    props = {}
+    for l in kfd_properties_str.split("\n"):
+        l = l.strip()
+
+        if not l:
+            continue
+
+        name, value_str = l.split()
+        props[name] = int(value_str)
+
+    return props
+
+
+KFD_SYSFS_PATH = Path("/sys/devices/virtual/kfd/kfd/topology")
+
+
+def get_kfd_gpu_nodes_properties():
+    kfd_sysfs_path_nodes = Path(KFD_SYSFS_PATH) / "nodes"
+
+    gpu_nodes_properties = {}
+
+    for node_path in kfd_sysfs_path_nodes.iterdir():
+        with (node_path / "properties").open() as node_properties_f:
+            properties_str = node_properties_f.read()
+            node_props = parse_kfd_node_properties(properties_str)
+
+            if node_props["cpu_cores_count"] == 0:
+                gpu_nodes_properties[int(node_path.name)] = node_props
+
+    return gpu_nodes_properties
+
+
+def amdgpu_get_node_rank_from_uuid(device_uuid):
+    device_uuid = device_uuid.replace("-", "")
+    hip_uuid = "".join([chr(int(device_uuid[i : i + 2], 16)) for i in range(0, len(device_uuid), 2)])
+    unique_id = int(hip_uuid, 16)
+
+    UNIQUE_ID = "unique_id"
+    gpu_nodes_properties = get_kfd_gpu_nodes_properties()
+
+    for node_rank, gpu_props in enumerate(gpu_nodes_properties.values()):
+        if gpu_props["unique_id"] == unique_id:
+            return node_rank
+
+    return -1
+
+
+NVIDIA_GPU_INTERFACE_PATH = Path("/proc/driver/nvidia/gpus/")
+
+
+def nvidia_get_node_rank_from_uuid(device_uuid):
+    for device_idx, device_path in enumerate(NVIDIA_GPU_INTERFACE_PATH.iterdir()):
+        with (device_path / "information").open() as f:
+            device_info = f.read()
+        if re.search(rf"GPU UUID:\s+GPU-{device_uuid}", device_info):
+            return device_idx
+
+    return -1
+
+
 def _torch_get_gpu_idx(device):
     if sys.platform == "darwin":
         return 0
@@ -247,19 +319,16 @@ def _torch_get_gpu_idx(device):
         device_property = torch.cuda.get_device_properties(device)
         device_uuid = str(device_property.uuid)
 
-        nvidia_gpu_interface_path = "/proc/driver/nvidia/gpus/"
-        try:
-            for device_idx, device_path in enumerate(os.listdir(nvidia_gpu_interface_path)):
-                with open(os.path.join(nvidia_gpu_interface_path, device_path, "information"), "r") as f:
-                    device_info = f.read()
-                if re.search(rf"GPU UUID:\s+GPU-{device_uuid}", device_info):
-                    return device_idx
-        except FileNotFoundError:
-            warnings.warn(
-                f"'{nvidia_gpu_interface_path}' is not available. Multi-GPU support will be disabled. This is expected "
-                "on WSL2 where the NVIDIA proc interface is not mounted.",
-                stacklevel=2,
-            )
+        if NVIDIA_GPU_INTERFACE_PATH.is_dir():
+            return nvidia_get_node_rank_from_uuid(device_uuid)
+        if KFD_SYSFS_PATH.is_dir():
+            return amdgpu_get_node_rank_from_uuid(device_uuid)
+
+        warnings.warn(
+            f"'{NVIDIA_GPU_INTERFACE_PATH!s}' or '{KFD_SYSFS_PATH!s}' is not available. Multi-GPU support will be disabled. This is expected "
+            "on WSL2 where the NVIDIA proc interface is not mounted.",
+            stacklevel=2,
+        )
 
     return -1
 

@@ -47,8 +47,7 @@ def parse_test_name(test_name: str) -> dict[str, str]:
     return filtered_params
 
 
-def get_cuda_usage() -> dict[int, int]:
-    output = subprocess.check_output(["nvidia-smi"]).decode("utf-8")
+def parse_nvidia_smi_output_for_mem_per_process(output: str) -> dict[int, int]:
     section = 0
     subsec = 0
     res = {}
@@ -70,6 +69,67 @@ def get_cuda_usage() -> dict[int, int]:
     return res
 
 
+def parse_rocm_smi_output_for_mem_per_process(output: str) -> dict[int, int]:
+    NO_PIDS = "No KFD PIDs currently running"
+
+    if NO_PIDS in output:
+        return {}
+
+    PID = "PID"
+    VRAM_USED = "VRAM USED"
+    TABLE_BORDER_STR = "===="
+
+    pid_idx = output.find(PID)
+    output = output[pid_idx:]
+
+    lines = output.split("\n")
+    header = lines[0]
+
+    # the header line looks like:
+    #
+    # PID     PROCESS NAME    GPU(s)  VRAM USED       SDMA USED       CU OCCUPANCY
+    #
+    # we extract the span from each of them (it assumes they end with at least
+    # 2 whitespaces)
+    header_to_span = {header_m.group(0).strip(): header_m.span() for header_m in re.finditer(r"(\S+( |$))+ *", header)}
+
+    assert PID in header_to_span and VRAM_USED in header_to_span, (
+        "The way rocm-smi presents information has changed and could not be parsed anymore"
+    )
+
+    pid_to_mem_use = {}
+    for l in lines[1:]:
+        l = l.strip()
+        if not l or l.startswith(TABLE_BORDER_STR):
+            continue
+
+        header_to_info_str = {h: l[start:end].strip() for h, (start, end) in header_to_span.items()}
+
+        pid = int(header_to_info_str[PID])
+        mem_bytes = int(header_to_info_str[VRAM_USED])
+
+        # Use MB as does nvidia-smi
+        pid_to_mem_use[pid] = mem_bytes >> 20
+
+    return pid_to_mem_use
+
+
+def get_cuda_usage() -> dict[int, int]:
+    try:
+        output = subprocess.check_output(["nvidia-smi"]).decode("utf-8")
+        return parse_nvidia_smi_output_for_mem_per_process(output)
+    except FileNotFoundError:
+        pass
+
+    try:
+        output = subprocess.check_output(["rocm-smi", "--showpids"]).decode("utf-8")
+        return parse_rocm_smi_output_for_mem_per_process(output)
+    except FileNotFoundError:
+        pass
+
+    raise RuntimeError("Neither nvidia-smi nor rocm-smi available.")
+
+
 def get_test_name_by_pid() -> dict[int, str]:
     test_by_psid = {}
     for proc in psutil.process_iter(["pid", "cmdline"]):

@@ -17,6 +17,7 @@
 from genesis.utils.urdf import compose_inertial_properties
 
 from .utils import assert_allclose
+from . import monitor_test_mem
 
 
 TOL = 1e-7
@@ -518,3 +519,39 @@ def test_polar_decomposition_batched_pure_rotation(side, tol):
         np_reconstructed = np_P @ np_U
 
     assert_allclose(np_A, np_reconstructed, tol=tol)
+
+
+def test_parse_rocm_smi_output_for_mem_per_process():
+    """This function is in tests/"""
+
+    output = """
+
+
+    ============================ ROCm System Management Interface ============================
+    ===================================== KFD Processes ======================================
+    KFD process information:
+    PID     PROCESS NAME    GPU(s)  VRAM USED       SDMA USED       CU OCCUPANCY
+    19191   [pytest-xdist r 1       2680758272      0               0
+    19188   [pytest-xdist r 1       2680078336      0               0
+    19206   [pytest-xdist r 1       2867257344      0               0
+    19194   [pytest-xdist r 1       2680664064      0               0
+    19200   [pytest-xdist r 1       2866966528      0               0
+    19209   [pytest-xdist r 1       2680922112      0               0
+    19197   [pytest-xdist r 1       2686627840      0               0
+    19203   [pytest-xdist r 1       2681561088      0               0
+    ==========================================================================================
+    ================================== End of ROCm SMI Log ===================================
+    """
+
+    expected_output = {
+        19191: 2556,
+        19188: 2555,
+        19206: 2734,
+        19194: 2556,
+        19200: 2734,
+        19209: 2556,
+        19197: 2562,
+        19203: 2557,
+    }
+    results = monitor_test_mem.parse_rocm_smi_output_for_mem_per_process(output)
+    assert results == expected_output