Skip to content

HRX CI: run on self-hosted ROCm GPU runners for gfx1151/gfx1201 #116

HRX CI: run on self-hosted ROCm GPU runners for gfx1151/gfx1201

HRX CI: run on self-hosted ROCm GPU runners for gfx1151/gfx1201 #116

Workflow file for this run

name: HRX CI
on:
push:
branches:
- hrx-integration
pull_request:
types: [opened, synchronize, reopened]
branches:
- hrx-integration
permissions:
contents: read
actions: read
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
hrx-build:
name: ${{ matrix.name }} (${{ matrix.gpu_target }})
strategy:
# Per-arch failures are independent; keep both signals.
fail-fast: false
matrix:
include:
- name: gfx1151_strix-halo
gpu_target: gfx1151
runs_on: linux-gfx1151-gpu-rocm
extra_rocm_artifacts: rocwmma_dev_gfx1151
- name: gfx1201_9070
gpu_target: gfx1201
runs_on: linux-gfx120X-gpu-rocm
extra_rocm_artifacts: rocwmma_dev_gfx120X-all
runs-on: ${{ matrix.runs_on }}
timeout-minutes: 15
container:
image: ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:fba5f55a122dbb15925e98c51fe65bffe88c36e11ebb25b73daf2bea04202dc3
# --user 0:0: actions running inside the container need to write to some
# files set up outside the container by the runner agent. In
# June 2026 some runner agents set those files up with
# permissions for a "tester" user with UID/GID 1001, which
# matches the user in no_rocm_image_ubuntu24_04, and some are
# set up as root; accessing a file owned by root with user
# "tester" gives an EACCES. Running as root is the common
# denominator.
# --device kfd/dri: GPU access.
options: >-
--user 0:0
--device /dev/kfd
--device /dev/dri
env:
# Write out of tree build/install to a non-bind-mounted directory. In June
# 2026 a debug test showed some runners had assets dated from 6 months ago
# in the bind-mounted github workspace, it seems like nothing cleans it up
# currently. Writing to an non-bind-mounted directory ensures that
# rocm-artifacts/builds are cleaned up when docker container is removed.
HRX_WORK_DIR: /work
# Public location for bench tooling (rsuderman/llamacpp_ci and fork
# AaronStGeorge/llamacpp_ci)
#TODO: switch to ROCm/llamacpp-hrx-bench once it is open sourced.
BENCH_REPOSITORY: AaronStGeorge/llamacpp_ci
BENCH_REF: 'main'
# actions/checkout@v6 does auto-cleanup, an in-tree build would be
# auto-cleaned as well.
BENCH_DIR: ${{ github.workspace }}/bench
LLAMA_SRC_DIR: ${{ github.workspace }}/llama-src
LLAMA_BUILD_DIR: /work/llama-build
HRX_EXTRA_ROCM_ARTIFACTS: ${{ matrix.extra_rocm_artifacts }}
GGML_HRX_AMDGPU_TARGETS: ${{ matrix.gpu_target }}
CCACHE_COMPILERCHECK: content
steps:
- name: Checkout llama.cpp (under test)
uses: actions/checkout@v6
with:
path: llama-src
- name: Checkout bench tooling
uses: actions/checkout@v6
with:
repository: ${{ env.BENCH_REPOSITORY }}
ref: ${{ env.BENCH_REF }}
path: bench
- name: Runner info
if: always()
env:
MATRIX_NAME: ${{ matrix.name }}
MATRIX_GPU_TARGET: ${{ matrix.gpu_target }}
MATRIX_RUNS_ON: ${{ matrix.runs_on }}
run: "${BENCH_DIR}/scripts/hrx/runner-info.sh"
- name: Install ROCm build dependencies
run: "${BENCH_DIR}/scripts/hrx/install-rocm-deps.sh"
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ${{ matrix.gpu_target }}
evict-old-files: 1d
save: ${{ github.event_name == 'pull_request' }}
- name: Checkout HRX
run: "${BENCH_DIR}/scripts/hrx/checkout-hrx.sh"
- name: Fetch ROCm assets
run: "${BENCH_DIR}/scripts/hrx/fetch-rocm-assets.sh"
- name: Build HRX
run: "${BENCH_DIR}/scripts/hrx/build-hrx.sh"
- name: Validate HRX
# hrx-info has been observed hanging on some runners; fail fast.
timeout-minutes: 5
run: "${BENCH_DIR}/scripts/hrx/validate-hrx.sh"
- name: Build llama.cpp with HRX
run: "${BENCH_DIR}/scripts/hrx/build-llama-hrx.sh"
- name: Run sample MUL_MAT correctness config on HRX
run: |
. "${BENCH_DIR}/scripts/hrx/env.sh"
. "${BENCH_DIR}/scripts/hrx/runtime-env.sh"
"${BENCH_DIR}/tools/run-op-test.py" \
--test-backend-ops "${LLAMA_BUILD_DIR}/bin/test-backend-ops" \
--test-file "${BENCH_DIR}/benchmark-configs/test/mul_mat_f16.txt" \
--op MUL_MAT \
--backend HRX0 \
--output benchmark-results/sample-mul-mat-f16-hrx-test.jsonl
- name: Run sample MUL_MAT benchmark config on HRX
run: |
. "${BENCH_DIR}/scripts/hrx/env.sh"
. "${BENCH_DIR}/scripts/hrx/runtime-env.sh"
"${BENCH_DIR}/tools/run-op-perf.py" \
--test-backend-ops "${LLAMA_BUILD_DIR}/bin/test-backend-ops" \
--test-file "${BENCH_DIR}/benchmark-configs/test/mul_mat_f16.txt" \
--op MUL_MAT \
--backend HRX0 \
--output benchmark-results/sample-mul-mat-f16-hrx-perf.jsonl
- name: Upload benchmark results
uses: actions/upload-artifact@v5
with:
name: benchmark-results-${{ matrix.name }}
path: benchmark-results/