Merge upstream→gfx11 #634
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build RDNA3/3.5 prototype wheels | |
| on: | |
| push: | |
| branches: | |
| - gfx11 | |
| pull_request: | |
| branches: | |
| - gfx11 | |
| workflow_dispatch: | |
| inputs: | |
| pytorch_index: | |
| description: 'PyTorch index URL for ROCm nightly wheels' | |
| default: 'https://rocm.nightlies.amd.com/whl-multi-arch' | |
| required: false | |
| rocm_arch: | |
| description: 'ROCm architecture (e.g., gfx1103;gfx1150;gfx1151)' | |
| default: 'gfx1103;gfx1150;gfx1151' | |
| required: false | |
| concurrency: | |
| group: build-wheels-${{ github.ref }} | |
| cancel-in-progress: ${{ github.event_name == 'pull_request' }} | |
| env: | |
| PYTORCH_INDEX_URL: ${{ github.event.inputs.pytorch_index || 'https://rocm.nightlies.amd.com/whl-multi-arch' }} | |
| PYTORCH_ROCM_ARCH: ${{ github.event.inputs.rocm_arch || 'gfx1103;gfx1150;gfx1151' }} | |
| CI_IMAGE: ghcr.io/rocm/vllm/gfx11-ci:latest | |
| jobs: | |
| build-wheel: | |
| runs-on: ubuntu-latest | |
| container: | |
| image: ghcr.io/rocm/vllm/gfx11-ci:latest | |
| credentials: | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| permissions: | |
| actions: write | |
| contents: read | |
| packages: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| fetch-tags: true | |
| - name: Fetch upstream tags for versioning | |
| run: | | |
| git config --global --add safe.directory "$GITHUB_WORKSPACE" | |
| git remote add upstream https://github.com/vllm-project/vllm.git || true | |
| git fetch upstream --tags || echo "Warning: could not fetch upstream tags" | |
| git describe --tags --always || echo "Warning: git describe failed" | |
| - name: Install build dependencies | |
| run: | | |
| # Resolve torch on the ROCm nightly index, then derive matching | |
| # torchvision/torchaudio pins. The nightly publishes multiple | |
| # base versions side-by-side (2.9, 2.10, 2.11, 2.12); pin to | |
| # 2.11 to match requirements/build/rocm.txt. The three packages | |
| # must share the same +rocm build-date suffix or they | |
| # ABI-mismatch at import ("operator torchvision::nms does not | |
| # exist"). | |
| echo 'torch==2.11.*' > constraints.in | |
| # --no-deps: torch pins exact rocm[libraries]/triton versions | |
| # in its metadata (e.g. rocm[libraries]==7.14.0a..., triton== | |
| # 3.7.0+rocm...); we don't want those locked in constraints. | |
| uv pip compile constraints.in \ | |
| --index-url ${{ env.PYTORCH_INDEX_URL }} \ | |
| --prerelease allow \ | |
| --no-header --no-annotate --no-deps \ | |
| -o constraints.txt | |
| # Synthesize torchvision/torchaudio entries with the same | |
| # +rocm local version uv just picked for torch. | |
| TORCH_LOCAL=$(grep '^torch==' constraints.txt | sed 's/.*+//') | |
| TORCH_PATCH_SUFFIX=$(grep '^torch==' constraints.txt | sed 's/^torch==[0-9]\+\.[0-9]\+\.\([0-9]\+[^+]*\)+.*/\1/') | |
| echo "torchvision==0.26.${TORCH_PATCH_SUFFIX}+${TORCH_LOCAL}" >> constraints.txt | |
| echo "torchaudio==2.11.${TORCH_PATCH_SUFFIX}+${TORCH_LOCAL}" >> constraints.txt | |
| cat constraints.txt | |
| # unsafe-best-match: uv defaults to stopping at the first | |
| # index that has a given package name, so PyPI (extra-index) | |
| # would otherwise win for torch. We need it to consider both | |
| # indexes and let the +rocm constraint pick the nightly wheel. | |
| # --prerelease=allow: rocm-sdk transitive deps are alpha-tagged | |
| # nightlies (e.g. rocm==7.14.0a20260518). | |
| # Device extras (torch[device-gfx*]) pull GPU runtime libraries | |
| # that are only needed to run on a GPU, not to compile vllm. | |
| # Skip them to save ~2 GB of disk on the GitHub-hosted runner. | |
| uv pip install --system --break-system-packages --link-mode=copy \ | |
| -c constraints.txt \ | |
| "rocm[devel,libraries]" \ | |
| torch \ | |
| torchvision \ | |
| -r requirements/build/rocm.txt \ | |
| --index-url ${{ env.PYTORCH_INDEX_URL }} \ | |
| --extra-index-url https://pypi.org/simple/ \ | |
| --index-strategy unsafe-best-match \ | |
| --prerelease allow | |
| # rocm-sdk-devel ships a tarball that must be unpacked after install | |
| rocm-sdk init | |
| # Stash constraints.txt for the test workflow so it pins the same | |
| # torch/torchvision/torchaudio +rocm build that the wheel was built | |
| # against. Lives next to the wheel in the uploaded artifact. | |
| mkdir -p dist | |
| cp constraints.txt dist/ | |
| - name: Expose GitHub Actions cache env vars | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| core.exportVariable('ACTIONS_CACHE_SERVICE_V2', 'true') | |
| core.exportVariable('ACTIONS_RESULTS_URL', process.env['ACTIONS_RESULTS_URL']) | |
| core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env['ACTIONS_RUNTIME_TOKEN']) | |
| # TODO: Remove this step once the gfx11-ci image is rebuilt with sccache v0.14.0 | |
| - name: Upgrade sccache | |
| run: | | |
| curl -L "https://github.com/mozilla/sccache/releases/download/v0.14.0/sccache-v0.14.0-x86_64-unknown-linux-musl.tar.gz" \ | |
| | tar xz --strip-components=1 -C /usr/local/bin --wildcards '*/sccache' | |
| sccache --version | |
| - name: Build wheel | |
| env: | |
| PYTORCH_ROCM_ARCH: ${{ env.PYTORCH_ROCM_ARCH }} | |
| VLLM_TARGET_DEVICE: rocm | |
| MAX_JOBS: 1 | |
| SCCACHE_GHA_ENABLED: "true" | |
| SCCACHE_IDLE_TIMEOUT: 0 | |
| run: | | |
| echo "=== Environment ===" | |
| echo "ROCM_PATH=$ROCM_PATH" | |
| echo "PYTORCH_ROCM_ARCH=$PYTORCH_ROCM_ARCH" | |
| hipcc --version | |
| sccache --show-stats || true | |
| echo "" | |
| echo "=== Building wheel ===" | |
| python setup.py bdist_wheel --dist-dir=dist | |
| echo "" | |
| sccache --show-stats || true | |
| ls -la dist/ | |
| - name: Rename wheel for manylinux compliance | |
| run: | | |
| cd dist | |
| for wheel in *.whl; do | |
| if [[ "$wheel" == *"linux"* ]]; then | |
| new_name="${wheel/linux/manylinux_2_31}" | |
| mv "$wheel" "$new_name" | |
| echo "Renamed $wheel to $new_name" | |
| fi | |
| done | |
| ls -la | |
| - name: Upload wheel artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: vllm-rocm-wheel | |
| path: | | |
| dist/*.whl | |
| dist/constraints.txt | |
| retention-days: 90 | |
| test-kernels-correctness: | |
| name: test-kernels (correctness) | |
| needs: build-wheel | |
| uses: ./.github/workflows/test-rocm-kernels.yml | |
| with: | |
| test_name: correctness | |
| runs_on: linux-strix-halo-gpu-rocm-oem | |
| ci_image: ghcr.io/rocm/vllm/gfx11-ci:latest | |
| pytorch_index: ${{ github.event.inputs.pytorch_index || 'https://rocm.nightlies.amd.com/whl-multi-arch' }} | |
| timeout: 300 | |
| pytest_args: >- | |
| tests/kernels/moe/test_exllama_moe.py | |
| tests/kernels/moe/test_hybrid_w4a16_moe.py | |
| tests/kernels/quantization/test_awq_gemv_moe.py | |
| tests/kernels/quantization/test_hip_w4a16.py | |
| tests/kernels/quantization/test_hybrid_w4a16_triton.py | |
| tests/kernels/quantization/test_rocm_compressed_tensors_w4a16.py | |
| tests/kernels/quantization/test_rocm_skinny_gemms.py | |
| tests/kernels/quantization/test_dynamic_int8_lm_head.py | |
| tests/quantization/test_hip_w4a16_kernel.py | |
| tests/rocm/aiter/test_fused_qk_norm_mrope_kvcache.py | |
| tests/kernels/test_wvsplitk_fused_silu.py | |
| tests/models/multimodal/processing/test_openvla.py | |
| test-kernels-performance: | |
| # TEMPORARILY DISABLED: noneng runners are offline (since 2026-06-23). | |
| # Remove `if: false` once the runner pool is restored. | |
| if: false | |
| name: test-kernels (performance) | |
| needs: build-wheel | |
| uses: ./.github/workflows/test-rocm-kernels.yml | |
| with: | |
| test_name: performance | |
| runs_on: linux-strix-halo-gpu-rocm-noneng | |
| ci_image: ghcr.io/rocm/vllm/gfx11-ci:latest | |
| pytorch_index: ${{ github.event.inputs.pytorch_index || 'https://rocm.nightlies.amd.com/whl-multi-arch' }} | |
| timeout: 300 | |
| pytest_args: >- | |
| tests/kernels/quantization/test_hybrid_w4a16_perf.py | |
| upload-wheel: | |
| runs-on: ubuntu-latest | |
| # Performance tests are excluded from `needs` because results vary | |
| # across runners (VRAM configuration, thermal state, etc.). | |
| # `always()` is required so that a performance-test failure doesn't | |
| # implicitly skip this job via GitHub Actions' default `success()` | |
| # gate, which checks ALL prior jobs — not just the `needs` list. | |
| needs: [build-wheel, test-kernels-correctness] | |
| if: >- | |
| always() | |
| && needs.build-wheel.result == 'success' | |
| && needs.test-kernels-correctness.result == 'success' | |
| && github.event_name == 'push' | |
| && github.repository_owner == 'ROCm' | |
| permissions: | |
| id-token: write | |
| contents: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| sparse-checkout: .github/workflows/scripts | |
| fetch-depth: 1 | |
| - name: Download wheel artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: vllm-rocm-wheel | |
| path: dist/ | |
| - name: Configure AWS credentials via OIDC | |
| uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0 | |
| with: | |
| role-to-assume: arn:aws:iam::317668459450:role/therock-aig-embd-gfx11-wheels-s3-oidc | |
| aws-region: us-east-1 | |
| - name: Upload wheel to S3 | |
| run: | | |
| pip install boto3 | |
| python .github/workflows/scripts/upload_wheel_s3.py \ | |
| --bucket aig-embd-gfx11-wheels \ | |
| --package vllm \ | |
| --wheel-dir dist/ |