lablup · rapsealk · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/changes/11222.enhance.md b/changes/11222.enhance.md
@@ -0,0 +1 @@
+Pin container memory to the CPU's NUMA node via `CpusetMems` when the allocation is node-local.
diff --git a/src/ai/backend/agent/docker/intrinsic.py b/src/ai/backend/agent/docker/intrinsic.py
@@ -433,12 +433,29 @@ async def generate_docker_args(
     ) -> Mapping[str, Any]:
         cores = [*map(int, device_alloc[SlotName("cpu")].keys())]
         sorted_core_ids = [*map(str, sorted(cores))]
+        host_config: dict[str, Any] = {
+            "Cpus": len(cores),
+            "CpusetCpus": ",".join(sorted_core_ids),
+        }
+        # Skip CpusetMems entirely when NUMA is unsupported (non-Linux hosts,
+        # Linux without libnuma.so, Docker Desktop, WSL, etc.) or when the host
+        # exposes a single node; libnuma.node_of_cpu would otherwise fall back
+        # to 0 and cause every container to be pinned to "CpusetMems": "0".
+        if libnuma.num_nodes() > 1:
+            allocated_nodes: set[int] = set()
+            for core in cores:
+                node = libnuma.node_of_cpu(core)
+                if node < 0:
+                    allocated_nodes.clear()
+                    break
+                allocated_nodes.add(node)
+            # Pin memory only when the CPU allocation is fully node-local.
+            # For multi-node CPU allocations, intentionally leave CpusetMems unset
+            # so Docker/kernel default NUMA memory placement policy can apply.
+            if len(allocated_nodes) == 1:
+                host_config["CpusetMems"] = str(next(iter(allocated_nodes)))
         return {
-            "HostConfig": {
-                "Cpus": len(cores),
-                "CpusetCpus": ",".join(sorted_core_ids),
-                # 'CpusetMems': f'{resource_spec.numa_node}',
-            },
+            "HostConfig": host_config,
         }
 
     async def restore_from_container(

diff --git a/src/ai/backend/agent/dummy/intrinsic.py b/src/ai/backend/agent/dummy/intrinsic.py
@@ -145,7 +145,6 @@ async def generate_docker_args(
                 "CpuQuota": int(100_000 * len(cores)),
                 "Cpus": ",".join(sorted_core_ids),
                 "CpusetCpus": ",".join(sorted_core_ids),
-                # 'CpusetMems': f'{resource_spec.numa_node}',
             },
         }
 

diff --git a/tests/unit/agent/test_docker_intrinsic.py b/tests/unit/agent/test_docker_intrinsic.py
@@ -4,6 +4,7 @@
 from collections.abc import Generator
 from contextlib import contextmanager
 from dataclasses import dataclass
+from decimal import Decimal
 from pathlib import Path
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
@@ -17,6 +18,7 @@
     read_proc_net_dev,
 )
 from ai.backend.agent.stats import StatModes
+from ai.backend.common.types import DeviceId, SlotName
 
 
 class BaseDockerIntrinsicTest:
@@ -610,3 +612,137 @@ def test_raises_oserror_for_nonexistent_pid(self) -> None:
         """Raises OSError when /proc/[pid]/net/dev does not exist."""
         with pytest.raises(OSError):
             read_proc_net_dev(999999999)
+
+
+class TestCPUPluginGenerateDockerArgsNumaLocality:
+    """Tests for CPUPlugin.generate_docker_args() NUMA-locality CpusetMems logic."""
+
+    @pytest.fixture
+    def cpu_plugin(self) -> CPUPlugin:
+        return CPUPlugin.__new__(CPUPlugin)
+
+    @staticmethod
+    def _device_alloc(core_ids: list[int]) -> dict[SlotName, dict[DeviceId, Decimal]]:
+        return {
+            SlotName("cpu"): {DeviceId(str(cid)): Decimal("1") for cid in core_ids},
+        }
+
+    @staticmethod
+    @contextmanager
+    def _patch_node_of_cpu(
+        core_to_node: dict[int, int],
+        *,
+        num_nodes: int = 2,
+    ) -> Generator[None, None, None]:
+        """Patch libnuma.node_of_cpu; return -1 for any core missing from the map
+        (matches real libnuma's behavior for unknown cores when NUMA info is
+        unavailable).
+
+        Also patches libnuma.num_nodes to report a multi-node host by default
+        so the NUMA-aware branch is exercised. Tests covering the non-NUMA
+        short-circuit can pass ``num_nodes=1``.
+        """
+        with (
+            patch(
+                "ai.backend.agent.docker.intrinsic.libnuma.num_nodes",
+                return_value=num_nodes,
+            ),
+            patch(
+                "ai.backend.agent.docker.intrinsic.libnuma.node_of_cpu",
+                side_effect=lambda core: core_to_node.get(core, -1),
+            ),
+        ):
+            yield
+
+    async def test_single_node_allocation_sets_cpuset_mems(
+        self,
+        cpu_plugin: CPUPlugin,
+    ) -> None:
+        """When all allocated cores are on the same NUMA node, CpusetMems is pinned
+        to that node as a string."""
+        with self._patch_node_of_cpu({0: 0, 1: 0, 2: 1, 3: 1}):
+            result = await cpu_plugin.generate_docker_args(
+                AsyncMock(),
+                self._device_alloc([0, 1]),
+            )
+
+        host_config = result["HostConfig"]
+        assert host_config["CpusetMems"] == "0"
+        # Sanity: core-list plumbing still works.
+        assert host_config["Cpus"] == 2
+        assert host_config["CpusetCpus"] == "0,1"
+
+    async def test_multi_node_allocation_omits_cpuset_mems(
+        self,
+        cpu_plugin: CPUPlugin,
+    ) -> None:
+        """When cores span multiple NUMA nodes, CpusetMems must be omitted so that
+        the Docker/kernel default NUMA memory placement policy can apply."""
+        with self._patch_node_of_cpu({0: 0, 1: 0, 2: 1, 3: 1}):
+            result = await cpu_plugin.generate_docker_args(
+                AsyncMock(),
+                self._device_alloc([0, 2]),
+            )
+
+        host_config = result["HostConfig"]
+        assert "CpusetMems" not in host_config
+        # Sanity: core-list plumbing still works.
+        assert host_config["Cpus"] == 2
+        assert host_config["CpusetCpus"] == "0,2"
+
+    @pytest.mark.parametrize(
+        "core_to_node",
+        [
+            {0: 0},
+            {0: 0, 1: -1},
+        ],
+        ids=["unknown_node", "negative_node"],
+    )
+    async def test_unknown_or_negative_node_omits_cpuset_mems(
+        self,
+        cpu_plugin: CPUPlugin,
+        core_to_node: dict[int, int],
+    ) -> None:
+        """When any allocated core maps to an unknown (libnuma returns -1) or
+        explicitly negative NUMA node, CpusetMems must be omitted.
+
+        `unknown_node` covers the case where libnuma cannot resolve a core (the
+        patched side_effect returns -1 for unmapped cores). `negative_node` covers
+        the case where libnuma reports -1 for a known core (NUMA info unavailable).
+        Both collapse to the same `node < 0` branch in the SUT.
+        """
+        with self._patch_node_of_cpu(core_to_node):
+            result = await cpu_plugin.generate_docker_args(
+                AsyncMock(),
+                self._device_alloc([0, 1]),
+            )
+
+        host_config = result["HostConfig"]
+        assert "CpusetMems" not in host_config
+        # Sanity: core-list plumbing still works.
+        assert host_config["Cpus"] == 2
+        assert host_config["CpusetCpus"] == "0,1"
+
+    async def test_non_numa_host_omits_cpuset_mems(
+        self,
+        cpu_plugin: CPUPlugin,
+    ) -> None:
+        """On non-NUMA / non-Linux hosts (macOS, Docker Desktop, WSL, Linux
+        without libnuma.so) libnuma.num_nodes() reports 1 and node_of_cpu()
+        hardcodes 0. The plugin must short-circuit before inspecting per-core
+        nodes so containers are not unconditionally pinned to CpusetMems="0".
+        """
+        # node_of_cpu would return 0 for every core on a non-NUMA host; assert
+        # we never reach that branch by mapping cores to a bogus node that
+        # would otherwise produce a stale CpusetMems assignment.
+        with self._patch_node_of_cpu({0: 0, 1: 0}, num_nodes=1):
+            result = await cpu_plugin.generate_docker_args(
+                AsyncMock(),
+                self._device_alloc([0, 1]),
+            )
+
+        host_config = result["HostConfig"]
+        assert "CpusetMems" not in host_config
+        # Sanity: core-list plumbing still works.
+        assert host_config["Cpus"] == 2
+        assert host_config["CpusetCpus"] == "0,1"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Pin container memory to the CPU's NUMA node via `CpusetMems` when the allocation is node-local.