ROCm · eble-amd · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/.buildkite/ci_config_rocm.yaml b/.buildkite/ci_config_rocm.yaml
@@ -0,0 +1,23 @@
+name: vllm_rocm_ci
+job_dirs:
+  - ".buildkite/hardware_tests"
+run_all_patterns:
+  - "docker/Dockerfile.rocm"
+  - "docker/Dockerfile.rocm_base"
+  - "docker/ci-rocm.hcl"
+  - "docker/docker-bake-rocm.hcl"
+  - ".buildkite/hardware_tests/amd.yaml"
+  - ".buildkite/scripts/ci-bake-rocm.sh"
+  - ".buildkite/scripts/hardware_ci/run-amd-test.py"
+  - ".buildkite/scripts/hardware_ci/run-amd-test.sh"
+  - "CMakeLists.txt"
+  - "requirements/common.txt"
+  - "requirements/rocm.txt"
+  - "requirements/build/rocm.txt"
+  - "requirements/test/rocm.txt"
+  - "setup.py"
+  - "csrc/"
+  - "cmake/"
+run_all_exclude_patterns:
+  - "csrc/cpu/"
+  - "cmake/cpu_extension.cmake"
diff --git a/.buildkite/hardware_tests/amd.yaml b/.buildkite/hardware_tests/amd.yaml
@@ -1,42 +1,73 @@
-group: Hardware - AMD Build 
+group: Hardware - AMD Build
 steps:
-  - label: "AMD: :docker: build image"
-    key: image-build-amd
+  # Ensure ci_base is up-to-date before building the test image.
+  # Compares a content hash of ci_base-affecting files against the remote
+  # image label. If hashes match the build is skipped (< 30 s); if they
+  # differ ci_base is rebuilt and pushed automatically.
+  - label: "AMD: :docker: ensure ci_base"
+    key: ensure-ci-base-amd
     depends_on: []
     device: amd_cpu
     no_plugin: true
     commands:
-    - >
-      docker build
-      --build-arg max_jobs=16
-      --build-arg REMOTE_VLLM=1
-      --build-arg ARG_PYTORCH_ROCM_ARCH='gfx90a;gfx942;gfx950'
-      --build-arg VLLM_BRANCH=$BUILDKITE_COMMIT
-      --tag "rocm/vllm-ci:${BUILDKITE_COMMIT}"
-      -f docker/Dockerfile.rocm
-      --target test
-      --no-cache
-      --progress plain .
-    - |
-      docker run --rm --network=none --entrypoint /bin/bash "rocm/vllm-ci:${BUILDKITE_COMMIT}" -ec '
-        if [ ! -d /vllm-workspace ]; then echo Missing directory: /vllm-workspace >&2; exit 1; fi
-        if [ ! -d /vllm-workspace/tests ]; then echo Missing directory: /vllm-workspace/tests >&2; exit 1; fi
-        if [ ! -d /vllm-workspace/src/vllm ]; then echo Missing directory: /vllm-workspace/src/vllm >&2; exit 1; fi
-        if [ ! -x /vllm-workspace/src/vllm/vllm-rs ]; then echo Missing executable: /vllm-workspace/src/vllm/vllm-rs >&2; exit 1; fi
-        command -v python3
-        command -v uv
-        command -v pytest
-        if ! command -v amd-smi >/dev/null 2>&1 && ! command -v rocminfo >/dev/null 2>&1; then
-          echo No ROCm CLI found in image >&2
-          exit 1
+      - bash .buildkite/scripts/ci-bake-rocm.sh ci-base-rocm-ci-with-deps
+    env:
+      DOCKER_BUILDKIT: "1"
+      VLLM_BAKE_FILE: "docker/docker-bake-rocm.hcl"
+      PYTORCH_ROCM_ARCH: "gfx90a;gfx942;gfx950"
+      REMOTE_VLLM: "1"
+      VLLM_BRANCH: "$BUILDKITE_COMMIT"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 1
+        - exit_status: -10  # Agent was lost
+          limit: 1
+
+  - label: "AMD: :docker: build test image and artifacts"
+    key: image-build-amd
+    depends_on:
+      - ensure-ci-base-amd
+    device: amd_cpu
+    no_plugin: true
+    commands:
+      - |
+        if [[ "${ROCM_CI_ARTIFACT_ONLY:-0}" == "1" ]]; then
+          echo "ROCM_CI_ARTIFACT_ONLY=1; building ROCm wheel artifact only"
+          IMAGE_TAG="" bash .buildkite/scripts/ci-bake-rocm.sh test-rocm-ci-with-artifacts
+        else
+          bash .buildkite/scripts/ci-bake-rocm.sh test-rocm-ci-with-wheel
         fi
-        python3 - <<PY
-      import torch, vllm
-      print(torch.__version__)
-      print(vllm.__version__)
-      PY
-        echo AMD image smoke OK
-      '
-    - docker push "rocm/vllm-ci:${BUILDKITE_COMMIT}"
+      - |
+        docker run --rm --network=none --entrypoint /bin/bash "rocm/vllm-ci:${BUILDKITE_COMMIT}" -ec '
+          if [ ! -d /vllm-workspace ]; then echo Missing directory: /vllm-workspace >&2; exit 1; fi
+          if [ ! -d /vllm-workspace/tests ]; then echo Missing directory: /vllm-workspace/tests >&2; exit 1; fi
+          if [ ! -d /vllm-workspace/src/vllm ]; then echo Missing directory: /vllm-workspace/src/vllm >&2; exit 1; fi
+          if [ ! -x /vllm-workspace/src/vllm/vllm-rs ]; then echo Missing executable: /vllm-workspace/src/vllm/vllm-rs >&2; exit 1; fi
+          command -v python3
+          command -v uv
+          command -v pytest
+          if ! command -v amd-smi >/dev/null 2>&1 && ! command -v rocminfo >/dev/null 2>&1; then
+            echo No ROCm CLI found in image >&2
+            exit 1
+          fi
+          python3 - <<PY
+        import torch, vllm
+        print(torch.__version__)
+        print(vllm.__version__)
+        PY
+          echo AMD image smoke OK
+        '
     env:
       DOCKER_BUILDKIT: "1"
+      VLLM_BAKE_FILE: "docker/docker-bake-rocm.hcl"
+      PYTORCH_ROCM_ARCH: "gfx90a;gfx942;gfx950"
+      IMAGE_TAG: "rocm/vllm-ci:$BUILDKITE_COMMIT"
+      REMOTE_VLLM: "1"
+      VLLM_BRANCH: "$BUILDKITE_COMMIT"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 1
+        - exit_status: -10  # Agent was lost
+          limit: 1
diff --git a/.buildkite/hardware_tests/cpu.yaml b/.buildkite/hardware_tests/cpu.yaml
@@ -28,18 +28,19 @@ steps:
       pytest -x -v -s tests/kernels/quantization/test_cpu_fp8_scaled_mm.py
       pytest -x -v -s tests/kernels/mamba/cpu/test_cpu_gdn_ops.py"
 
-- label: CPU-Compatibility Tests
-  depends_on: []
-  device: intel_cpu
-  no_plugin: true
-  source_file_dependencies:
-  - cmake/cpu_extension.cmake
-  - setup.py
-  - vllm/platforms/cpu.py
-  commands:
-    - |
-      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
-      bash .buildkite/scripts/hardware_ci/run-cpu-compatibility-test.sh"
+# Note: SDE can't be downloaded from CI host because of AWS WAF
+# - label: CPU-Compatibility Tests
+#   depends_on: []
+#   device: intel_cpu
+#   no_plugin: true
+#   source_file_dependencies:
+#   - cmake/cpu_extension.cmake
+#   - setup.py
+#   - vllm/platforms/cpu.py
+#   commands:
+#     - |
+#       bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
+#       bash .buildkite/scripts/hardware_ci/run-cpu-compatibility-test.sh"
 
 - label: CPU-Language Generation and Pooling Model Tests
   depends_on: []

diff --git a/.buildkite/intel_jobs/basic_correctness.yaml b/.buildkite/intel_jobs/basic_correctness.yaml
@@ -0,0 +1,22 @@
+group: Basic Correctness
+depends_on:
+  - image-build-xpu
+steps:
+- label: XPU Sleep Mode
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+  - vllm/
+  - tests/basic_correctness/test_cumem.py
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      pytest -v -s basic_correctness/test_mem.py::test_end_to_end'
diff --git a/.buildkite/intel_jobs/expert_parallelism_intel.yaml b/.buildkite/intel_jobs/expert_parallelism_intel.yaml
@@ -0,0 +1,23 @@
+group: Expert Parallelism
+depends_on: 
+  - image-build-xpu
+steps:
+- label: EPLB Algorithm
+  key: eplb-algorithm
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+  - vllm/distributed/eplb
+  - tests/distributed/test_eplb_algo.py
+  - tests/distributed/test_eplb_utils.py
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      pytest -v -s distributed/test_eplb_algo.py'
diff --git a/.buildkite/intel_jobs/misc_intel.yaml b/.buildkite/intel_jobs/misc_intel.yaml
@@ -38,7 +38,17 @@ steps:
     REPO: "vllm-ci-test-repo"
     VLLM_TEST_DEVICE: "xpu"
   source_file_dependencies:
-    - vllm/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/engine/
+    - vllm/inputs/
+    - vllm/logger.py
+    - vllm/model_executor/
+    - vllm/platforms/
+    - vllm/sampling_params.py
+    - vllm/transformers_utils/
+    - vllm/utils/
+    - vllm/v1/
     - tests/v1/sample
     - tests/v1/logits_processors
     - tests/v1/test_oracle.py
@@ -52,4 +62,126 @@ steps:
       pytest -v -s v1/logits_processors --ignore=v1/logits_processors/test_custom_online.py --ignore=v1/logits_processors/test_custom_offline.py &&
       pytest -v -s v1/test_oracle.py &&
       pytest -v -s v1/test_request.py &&
-      pytest -v -s v1/test_outputs.py'
+      pytest -v -s v1/test_outputs.py &&
+      pytest -v -s v1/sample/test_topk_topp_sampler.py'
+
+- label: XPU CPU Offload
+  timeout_in_minutes: 60
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/
+    - vllm/v1/kv_offload/
+    - vllm/v1/kv_connector/
+    - tests/v1/kv_offload/
+    - tests/v1/kv_connector/unit/test_offloading_connector.py
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      cd tests &&
+      pytest -v -s v1/kv_offload &&
+      pytest -v -s v1/kv_connector/unit/test_offloading_connector.py'
+
+- label: Regression
+  key: regression
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+  - vllm/config/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/inputs/
+  - vllm/model_executor/
+  - vllm/multimodal/
+  - vllm/platforms/
+  - vllm/sampling_params.py
+  - vllm/transformers_utils/
+  - vllm/utils/
+  - vllm/v1/
+  - tests/test_regression
+  commands:
+  - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'pip install modelscope &&
+      cd tests &&
+      pytest -v -s test_regression.py'
+
+- label: Metrics, Tracing (2 GPUs)
+  key: metrics-tracing-2-gpus
+  timeout_in_minutes: 30
+  num_devices: 2
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+  - vllm/config/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/inputs/
+  - vllm/model_executor/
+  - vllm/multimodal/
+  - vllm/platforms/
+  - vllm/sampling_params.py
+  - vllm/tracing/
+  - vllm/transformers_utils/
+  - vllm/utils/
+  - vllm/v1/
+  - tests/v1/tracing
+  commands:
+  - >-
+    bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+    'pip install opentelemetry-sdk\>=1.26.0 opentelemetry-api\>=1.26.0 opentelemetry-exporter-otlp\>=1.26.0 opentelemetry-semantic-conventions-ai\>=0.4.1 &&
+    cd tests &&
+    pytest -v -s v1/tracing'
+
+- label: Async Engine, Inputs, Utils, Worker
+  key: async-engine-inputs-utils-worker
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+  - vllm/assets/
+  - vllm/config/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/inputs/
+  - vllm/model_executor/
+  - vllm/multimodal/
+  - vllm/platforms/
+  - vllm/sampling_params.py
+  - vllm/tokenizers/
+  - vllm/transformers_utils/
+  - vllm/utils/
+  - vllm/v1/
+  - tests/detokenizer
+  - tests/multimodal
+  - tests/utils_
+  commands:
+  - >-
+    bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+    'cd tests &&
+    pip install av &&
+    pytest -v -s detokenizer &&
+    pytest -v -s -m "not cpu_test" ./multimodal &&
+    pytest -v -s utils_ --ignore=utils_/test_mem_utils.py'