Skip to content

Merge upstream→gfx11 #634

Merge upstream→gfx11

Merge upstream→gfx11 #634

name: Build RDNA3/3.5 prototype wheels
on:
push:
branches:
- gfx11
pull_request:
branches:
- gfx11
workflow_dispatch:
inputs:
pytorch_index:
description: 'PyTorch index URL for ROCm nightly wheels'
default: 'https://rocm.nightlies.amd.com/whl-multi-arch'
required: false
rocm_arch:
description: 'ROCm architecture (e.g., gfx1103;gfx1150;gfx1151)'
default: 'gfx1103;gfx1150;gfx1151'
required: false
concurrency:
group: build-wheels-${{ github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
env:
PYTORCH_INDEX_URL: ${{ github.event.inputs.pytorch_index || 'https://rocm.nightlies.amd.com/whl-multi-arch' }}
PYTORCH_ROCM_ARCH: ${{ github.event.inputs.rocm_arch || 'gfx1103;gfx1150;gfx1151' }}
CI_IMAGE: ghcr.io/rocm/vllm/gfx11-ci:latest
jobs:
build-wheel:
runs-on: ubuntu-latest
container:
image: ghcr.io/rocm/vllm/gfx11-ci:latest
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
permissions:
actions: write
contents: read
packages: read
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
fetch-tags: true
- name: Fetch upstream tags for versioning
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"
git remote add upstream https://github.com/vllm-project/vllm.git || true
git fetch upstream --tags || echo "Warning: could not fetch upstream tags"
git describe --tags --always || echo "Warning: git describe failed"
- name: Install build dependencies
run: |
# Resolve torch on the ROCm nightly index, then derive matching
# torchvision/torchaudio pins. The nightly publishes multiple
# base versions side-by-side (2.9, 2.10, 2.11, 2.12); pin to
# 2.11 to match requirements/build/rocm.txt. The three packages
# must share the same +rocm build-date suffix or they
# ABI-mismatch at import ("operator torchvision::nms does not
# exist").
echo 'torch==2.11.*' > constraints.in
# --no-deps: torch pins exact rocm[libraries]/triton versions
# in its metadata (e.g. rocm[libraries]==7.14.0a..., triton==
# 3.7.0+rocm...); we don't want those locked in constraints.
uv pip compile constraints.in \
--index-url ${{ env.PYTORCH_INDEX_URL }} \
--prerelease allow \
--no-header --no-annotate --no-deps \
-o constraints.txt
# Synthesize torchvision/torchaudio entries with the same
# +rocm local version uv just picked for torch.
TORCH_LOCAL=$(grep '^torch==' constraints.txt | sed 's/.*+//')
TORCH_PATCH_SUFFIX=$(grep '^torch==' constraints.txt | sed 's/^torch==[0-9]\+\.[0-9]\+\.\([0-9]\+[^+]*\)+.*/\1/')
echo "torchvision==0.26.${TORCH_PATCH_SUFFIX}+${TORCH_LOCAL}" >> constraints.txt
echo "torchaudio==2.11.${TORCH_PATCH_SUFFIX}+${TORCH_LOCAL}" >> constraints.txt
cat constraints.txt
# unsafe-best-match: uv defaults to stopping at the first
# index that has a given package name, so PyPI (extra-index)
# would otherwise win for torch. We need it to consider both
# indexes and let the +rocm constraint pick the nightly wheel.
# --prerelease=allow: rocm-sdk transitive deps are alpha-tagged
# nightlies (e.g. rocm==7.14.0a20260518).
# Device extras (torch[device-gfx*]) pull GPU runtime libraries
# that are only needed to run on a GPU, not to compile vllm.
# Skip them to save ~2 GB of disk on the GitHub-hosted runner.
uv pip install --system --break-system-packages --link-mode=copy \
-c constraints.txt \
"rocm[devel,libraries]" \
torch \
torchvision \
-r requirements/build/rocm.txt \
--index-url ${{ env.PYTORCH_INDEX_URL }} \
--extra-index-url https://pypi.org/simple/ \
--index-strategy unsafe-best-match \
--prerelease allow
# rocm-sdk-devel ships a tarball that must be unpacked after install
rocm-sdk init
# Stash constraints.txt for the test workflow so it pins the same
# torch/torchvision/torchaudio +rocm build that the wheel was built
# against. Lives next to the wheel in the uploaded artifact.
mkdir -p dist
cp constraints.txt dist/
- name: Expose GitHub Actions cache env vars
uses: actions/github-script@v7
with:
script: |
core.exportVariable('ACTIONS_CACHE_SERVICE_V2', 'true')
core.exportVariable('ACTIONS_RESULTS_URL', process.env['ACTIONS_RESULTS_URL'])
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env['ACTIONS_RUNTIME_TOKEN'])
# TODO: Remove this step once the gfx11-ci image is rebuilt with sccache v0.14.0
- name: Upgrade sccache
run: |
curl -L "https://github.com/mozilla/sccache/releases/download/v0.14.0/sccache-v0.14.0-x86_64-unknown-linux-musl.tar.gz" \
| tar xz --strip-components=1 -C /usr/local/bin --wildcards '*/sccache'
sccache --version
- name: Build wheel
env:
PYTORCH_ROCM_ARCH: ${{ env.PYTORCH_ROCM_ARCH }}
VLLM_TARGET_DEVICE: rocm
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
SCCACHE_IDLE_TIMEOUT: 0
run: |
echo "=== Environment ==="
echo "ROCM_PATH=$ROCM_PATH"
echo "PYTORCH_ROCM_ARCH=$PYTORCH_ROCM_ARCH"
hipcc --version
sccache --show-stats || true
echo ""
echo "=== Building wheel ==="
python setup.py bdist_wheel --dist-dir=dist
echo ""
sccache --show-stats || true
ls -la dist/
- name: Rename wheel for manylinux compliance
run: |
cd dist
for wheel in *.whl; do
if [[ "$wheel" == *"linux"* ]]; then
new_name="${wheel/linux/manylinux_2_31}"
mv "$wheel" "$new_name"
echo "Renamed $wheel to $new_name"
fi
done
ls -la
- name: Upload wheel artifact
uses: actions/upload-artifact@v4
with:
name: vllm-rocm-wheel
path: |
dist/*.whl
dist/constraints.txt
retention-days: 90
test-kernels-correctness:
name: test-kernels (correctness)
needs: build-wheel
uses: ./.github/workflows/test-rocm-kernels.yml
with:
test_name: correctness
runs_on: linux-strix-halo-gpu-rocm-oem
ci_image: ghcr.io/rocm/vllm/gfx11-ci:latest
pytorch_index: ${{ github.event.inputs.pytorch_index || 'https://rocm.nightlies.amd.com/whl-multi-arch' }}
timeout: 300
pytest_args: >-
tests/kernels/moe/test_exllama_moe.py
tests/kernels/moe/test_hybrid_w4a16_moe.py
tests/kernels/quantization/test_awq_gemv_moe.py
tests/kernels/quantization/test_hip_w4a16.py
tests/kernels/quantization/test_hybrid_w4a16_triton.py
tests/kernels/quantization/test_rocm_compressed_tensors_w4a16.py
tests/kernels/quantization/test_rocm_skinny_gemms.py
tests/kernels/quantization/test_dynamic_int8_lm_head.py
tests/quantization/test_hip_w4a16_kernel.py
tests/rocm/aiter/test_fused_qk_norm_mrope_kvcache.py
tests/kernels/test_wvsplitk_fused_silu.py
tests/models/multimodal/processing/test_openvla.py
test-kernels-performance:
# TEMPORARILY DISABLED: noneng runners are offline (since 2026-06-23).
# Remove `if: false` once the runner pool is restored.
if: false
name: test-kernels (performance)
needs: build-wheel
uses: ./.github/workflows/test-rocm-kernels.yml
with:
test_name: performance
runs_on: linux-strix-halo-gpu-rocm-noneng
ci_image: ghcr.io/rocm/vllm/gfx11-ci:latest
pytorch_index: ${{ github.event.inputs.pytorch_index || 'https://rocm.nightlies.amd.com/whl-multi-arch' }}
timeout: 300
pytest_args: >-
tests/kernels/quantization/test_hybrid_w4a16_perf.py
upload-wheel:
runs-on: ubuntu-latest
# Performance tests are excluded from `needs` because results vary
# across runners (VRAM configuration, thermal state, etc.).
# `always()` is required so that a performance-test failure doesn't
# implicitly skip this job via GitHub Actions' default `success()`
# gate, which checks ALL prior jobs — not just the `needs` list.
needs: [build-wheel, test-kernels-correctness]
if: >-
always()
&& needs.build-wheel.result == 'success'
&& needs.test-kernels-correctness.result == 'success'
&& github.event_name == 'push'
&& github.repository_owner == 'ROCm'
permissions:
id-token: write
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
sparse-checkout: .github/workflows/scripts
fetch-depth: 1
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
name: vllm-rocm-wheel
path: dist/
- name: Configure AWS credentials via OIDC
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0
with:
role-to-assume: arn:aws:iam::317668459450:role/therock-aig-embd-gfx11-wheels-s3-oidc
aws-region: us-east-1
- name: Upload wheel to S3
run: |
pip install boto3
python .github/workflows/scripts/upload_wheel_s3.py \
--bucket aig-embd-gfx11-wheels \
--package vllm \
--wheel-dir dist/