gfx11: sync ROCm CI + RDNA3.5 MMQ device table onto upstream-synced master #44

Workflow file for this run

.github/workflows/build-gfx11-rocm.yml at 41013ff

	name: Build gfx11 + ROCm

	on:
	push:
	branches: [gfx11]
	pull_request:
	types: [opened, synchronize, reopened]
	workflow_dispatch:
	inputs:
	rocm_version:
	description: 'ROCm version to use (e.g., 7.14.0a20260608) or "latest" to auto-detect'
	required: false
	default: '7.14.0a20260608'
	create_release:
	description: 'Publish a dated GitHub Release (b<YYYYMMDD>) with the built binaries. Set true only for the nightly dispatch.'
	required: false
	default: 'false'

	# Pinned to ROCm 7.14.0a20260608: last known-good nightly (libhsa-runtime64 build
	# d34cbb6409). The 7.14.0a20260609 and newer nightlies regressed libhsa-runtime64
	# (build 1b2a555677), which segfaults in GpuAgent::InitDma on the gfx115x runners.
	# See https://github.com/ROCm/TheRock/issues/5763. Pass rocm_version=latest manually
	# to track the newest nightly.
	#
	# Nightly is driven externally (GitHub cron only fires from the default branch).
	# On a host you control, with `gh` authenticated (token scope: repo + workflow):
	# 0 13 * * * gh workflow run build-gfx11-rocm.yml --repo ROCm/llama.cpp --ref gfx11 -f rocm_version=7.14.0a20260608

	env:
	ROCM_VERSION: ${{ github.event.inputs.rocm_version \|\| '7.14.0a20260608' }}

	jobs:
	build-ubuntu:
	runs-on: ubuntu-24.04
	# Single multiarch build: one fat binary covering all current CI arches,
	# sourced from TheRock's multiarch tarball (arch-neutral host + per-arch
	# Tensile DBs). gfx1100-1103 (RDNA3 desktop + Hawk Point/Phoenix 760M/780M),
	# gfx1150/1151/1153 (RDNA3.5 Strix APUs).
	env:
	GPU_TARGETS: gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151;gfx1153
	outputs:
	rocm_version: ${{ steps.set-outputs.outputs.rocm_version }}
	llamacpp_commit_hash: ${{ steps.set-outputs.outputs.llamacpp_commit_hash }}

	steps:
	- name: Free disk space
	# Remove unused runner files to free up disk space
	run: curl -fsSL https://raw.githubusercontent.com/kou/arrow/e49d8ae15583ceff03237571569099a6ad62be32/ci/scripts/util_free_space.sh \| bash

	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Clean up existing ROCm directory (safety precaution)
	run: \|
	if [ -d "/opt/rocm" ]; then
	echo "Removing existing /opt/rocm directory..."
	sudo rm -rf /opt/rocm
	fi
	if [ -f "rocm.tar.gz" ]; then
	rm -f rocm.tar.gz
	fi
	echo "Cleanup completed successfully"

	- name: Install build dependencies
	run: \|
	echo "Installing build dependencies..."
	sudo apt update
	sudo apt install -y cmake ninja-build unzip curl
	echo "Verifying installations..."
	cmake --version
	ninja --version
	echo "Build dependencies installation completed"

	- name: Download and extract multiarch ROCm directly to /opt/rocm
	run: \|
	rocm_version="${{ env.ROCM_VERSION }}"
	base_url="https://rocm.nightlies.amd.com/tarball-multi-arch"

	if [ "$rocm_version" = "latest" ]; then
	echo "Auto-detecting latest multiarch ROCm version"
	# The multiarch host serves an HTML index (not S3 XML); scrape the
	# multiarch tarball names from it.
	files=$(curl -s "$base_url/" \
	\| grep -oE 'therock-dist-linux-multiarch-[0-9]+\.[0-9]+\.[0-9]+(a\|rc)[0-9]+\.tar\.gz' \
	\| sort -u)

	latest_file=""
	latest_major=0
	latest_minor=0
	latest_patch=0
	latest_rc=0
	latest_is_alpha=false

	while IFS= read -r file; do
	if [[ "$file" =~ therock-dist-linux-multiarch-([0-9]+\.[0-9]+\.[0-9]+(a\|rc)[0-9]+)\.tar\.gz ]]; then
	version="${BASH_REMATCH[1]}"
	major=$(echo "$version" \| cut -d. -f1)
	minor=$(echo "$version" \| cut -d. -f2)
	patch=$(echo "$version" \| cut -d. -f3 \| sed 's/$a\\|rc$.*//')
	rc=$(echo "$version" \| sed 's/.*$a\\|rc$//')
	is_alpha=false
	if [[ "$version" =~ a ]]; then
	is_alpha=true
	fi

	is_newer=false
	if [ "$major" -gt "$latest_major" ]; then
	is_newer=true
	elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then
	is_newer=true
	elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then
	is_newer=true
	elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then
	if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then
	is_newer=true
	elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then
	is_newer=true
	fi
	fi

	if [ "$is_newer" = true ]; then
	latest_file="$file"
	latest_major="$major"
	latest_minor="$minor"
	latest_patch="$patch"
	latest_rc="$rc"
	latest_is_alpha="$is_alpha"
	fi
	fi
	done <<< "$files"

	echo "Found latest file: $latest_file"

	if [[ "$latest_file" =~ therock-dist-linux-multiarch-([0-9]+\.[0-9]+\.[0-9]+(a\|rc)[0-9]+)\.tar\.gz ]]; then
	rocm_version="${BASH_REMATCH[1]}"
	echo "Detected latest ROCm version: $rocm_version"
	else
	echo "Failed to extract ROCm version from latest file: $latest_file"
	echo "Expected pattern: therock-dist-linux-multiarch-<version>.tar.gz"
	exit 1
	fi
	fi

	rocm_url="$base_url/therock-dist-linux-multiarch-${rocm_version}.tar.gz"
	echo "DETECTED_ROCM_VERSION=$rocm_version" >> $GITHUB_ENV

	# The multiarch tarball (~11.5 GB) ships device code for ALL 26 GPU
	# arches: every arch's .kpack plus per-arch rocBLAS/hipBLASLt Tensile DBs.
	# This consumer build only needs the 7 RDNA3/3.5 arches in GPU_TARGETS and
	# uses the GEMM (Tensile) path, which works without .kpack files. So we
	# stream-extract and prune at the tar level: drop ALL .kpack, and drop the
	# Tensile DBs of every arch not in our target set. This keeps the runner
	# disk footprint small (the 11.5 GB is streamed, never stored) and yields
	# a lean multiarch package. tar matches --exclude on pre-strip member
	# names, hence the leading "./".
	drop_arches="gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 \
	gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 \
	gfx1152 gfx1200 gfx1201"
	excludes=(--exclude='./.kpack' --exclude='./.kpack/*')
	for a in $drop_arches; do
	excludes+=("--exclude=./lib/*/library/${a}")
	excludes+=("--exclude=./lib//library/${a}/")
	excludes+=("--exclude=./lib//library/${a}*")
	done

	echo "Streaming multiarch ROCm from: $rocm_url (pruning .kpack + non-target arches)"
	sudo mkdir -p /opt/rocm
	curl -sL "$rocm_url" \| sudo tar --use-compress-program=gzip -xf - \
	-C /opt/rocm --strip-components=1 "${excludes[@]}"

	echo "Retained rocBLAS Tensile arch dirs:"
	ls /opt/rocm/lib/rocblas/library/ 2>/dev/null \|\| echo "(none)"
	echo "Retained hipBLASLt Tensile arch dirs:"
	ls /opt/rocm/lib/hipblaslt/library/ 2>/dev/null \|\| echo "(none)"

	- name: Set ROCm environment variables
	run: \|
	echo "Setting ROCm environment variables..."
	echo "HIP_PATH=/opt/rocm" >> $GITHUB_ENV
	echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV
	echo "HIP_PLATFORM=amd" >> $GITHUB_ENV
	echo "HIP_CLANG_PATH=/opt/rocm/llvm/bin" >> $GITHUB_ENV
	echo "HIP_INCLUDE_PATH=/opt/rocm/include" >> $GITHUB_ENV
	echo "HIP_LIB_PATH=/opt/rocm/lib" >> $GITHUB_ENV
	echo "HIP_DEVICE_LIB_PATH=/opt/rocm/lib/llvm/amdgcn/bitcode" >> $GITHUB_ENV
	echo "/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH" >> $GITHUB_PATH
	echo "LD_LIBRARY_PATH=/opt/rocm/lib:/opt/rocm/lib64:/opt/rocm/llvm/lib:${LD_LIBRARY_PATH:-}" >> $GITHUB_ENV
	echo "LIBRARY_PATH=/opt/rocm/lib:/opt/rocm/lib64:${LIBRARY_PATH:-}" >> $GITHUB_ENV
	echo "CPATH=/opt/rocm/include:${CPATH:-}" >> $GITHUB_ENV
	echo "PKG_CONFIG_PATH=/opt/rocm/lib/pkgconfig:${PKG_CONFIG_PATH:-}" >> $GITHUB_ENV
	echo "ROCm environment variables set successfully"

	- name: Record llama.cpp commit hash
	run: \|
	commit_hash=$(git rev-parse --short=5 HEAD)
	echo "LLAMACPP_COMMIT_HASH=$commit_hash" >> $GITHUB_ENV
	echo "llama.cpp commit hash (5 digits): $commit_hash"
	echo "Current llama.cpp commit:"
	git log --oneline -1

	- name: Build Llama.cpp + ROCm
	run: \|
	gpu_targets="${{ env.GPU_TARGETS }}"
	echo "Building multiarch binary (GPU_TARGETS=$gpu_targets)"

	mkdir build
	cd build

	cmake .. -G Ninja \
	-DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
	-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
	-DCMAKE_CXX_FLAGS="-I/opt/rocm/include" \
	-DCMAKE_CROSSCOMPILING=ON \
	-DCMAKE_BUILD_TYPE=Release \
	-DGPU_TARGETS="$gpu_targets" \
	-DBUILD_SHARED_LIBS=ON \
	-DLLAMA_BUILD_TESTS=OFF \
	-DGGML_HIP=ON \
	-DGGML_OPENMP=OFF \
	-DGGML_CUDA_FORCE_CUBLAS=OFF \
	-DGGML_RPC=ON \
	-DGGML_HIP_ROCWMMA_FATTN=OFF \
	-DLLAMA_BUILD_BORINGSSL=ON \
	-DGGML_NATIVE=OFF \
	-DGGML_STATIC=OFF \
	-DCMAKE_SYSTEM_NAME=Linux

	cmake --build . -j $(nproc)

	- name: Copy ROCm core libs to build directory
	run: \|
	build_bin_path="build/bin"

	# Copy the rocblas/library folder and all its contents
	rocblas_lib_path="/opt/rocm/lib/rocblas/library"
	if [ -d "$rocblas_lib_path" ]; then
	echo "Copying rocblas/library folder and all contents..."
	dest_rocblas_path="$build_bin_path/rocblas/library"
	mkdir -p "$(dirname "$dest_rocblas_path")"
	cp -r "$rocblas_lib_path" "$(dirname "$dest_rocblas_path")/"
	echo "Copied: rocblas/library folder with all contents"
	else
	echo "Warning: rocblas/library folder not found at: $rocblas_lib_path"
	fi

	# Copy the hipblaslt/library folder and all its contents
	hipblaslt_lib_path="/opt/rocm/lib/hipblaslt/library"
	if [ -d "$hipblaslt_lib_path" ]; then
	echo "Copying hipblaslt/library folder and all contents..."
	dest_hipblaslt_path="$build_bin_path/hipblaslt/library"
	mkdir -p "$(dirname "$dest_hipblaslt_path")"
	cp -r "$hipblaslt_lib_path" "$(dirname "$dest_hipblaslt_path")/"
	echo "Copied: hipblaslt/library folder with all contents"
	else
	echo "Warning: hipblaslt/library folder not found at: $hipblaslt_lib_path"
	fi

	# Copy required ROCm libraries to build directory
	# If artifacts from ROCm or Llama.cpp change, you may need to update this list.
	# To regenerate the list, run:
	# utils/gather_required_libs.py --rocm-dir /opt/rocm --dest-dir build/bin
	echo "Copying required ROCm libraries to build directory..."
	cp -v /opt/rocm/lib/libhipblas.so* "$build_bin_path/" 2>/dev/null \|\| echo "libhipblas.so* not found"
	cp -v /opt/rocm/lib/librocblas.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocblas.so* not found"
	cp -v /opt/rocm/lib/libamdhip64.so* "$build_bin_path/" 2>/dev/null \|\| echo "libamdhip64.so* not found"
	cp -v /opt/rocm/lib/librocsolver.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocsolver.so* not found"
	cp -v /opt/rocm/lib/libroctx64.so* "$build_bin_path/" 2>/dev/null \|\| echo "libroctx64.so* not found"
	cp -v /opt/rocm/lib/libhipblaslt.so* "$build_bin_path/" 2>/dev/null \|\| echo "libhipblaslt.so* not found"
	cp -v /opt/rocm/lib/rocm_sysdeps/lib/librocm_sysdeps_liblzma.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocm_sysdeps_liblzma.so* not found"
	cp -v /opt/rocm/lib/librocprofiler-register.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocprofiler-register.so* not found"
	cp -v /opt/rocm/lib/libamd_comgr.so* "$build_bin_path/" 2>/dev/null \|\| echo "libamd_comgr.so* not found"
	cp -v /opt/rocm/lib/libamd_comgr_loader.so* "$build_bin_path/" 2>/dev/null \|\| echo "libamd_comgr_loader.so* not found"
	cp -v /opt/rocm/lib/libhsa-runtime64.so* "$build_bin_path/" 2>/dev/null \|\| echo "libhsa-runtime64.so* not found"
	cp -v /opt/rocm/lib/rocm_sysdeps/lib/librocm_sysdeps_numa.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocm_sysdeps_numa.so* not found"
	cp -v /opt/rocm/lib/librocroller.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocroller.so* not found"
	cp -v /opt/rocm/lib/librocm_kpack.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocm_kpack.so* not found"
	cp -v /opt/rocm/lib/rocm_sysdeps/lib/librocm_sysdeps_z.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocm_sysdeps_z.so* not found"
	cp -v /opt/rocm/lib/rocm_sysdeps/lib/librocm_sysdeps_zstd.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocm_sysdeps_zstd.so* not found"
	cp -v /opt/rocm/lib/llvm/lib/libLLVM.so* "$build_bin_path/" 2>/dev/null \|\| echo "libLLVM.so* not found"
	cp -v /opt/rocm/lib/llvm/lib/libclang-cpp.so* "$build_bin_path/" 2>/dev/null \|\| echo "libclang-cpp.so* not found"
	cp -v /opt/rocm/lib/rocm_sysdeps/lib/librocm_sysdeps_elf.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocm_sysdeps_elf.so* not found"
	cp -v /opt/rocm/lib/rocm_sysdeps/lib/librocm_sysdeps_drm.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocm_sysdeps_drm.so* not found"
	cp -v /opt/rocm/lib/rocm_sysdeps/lib/librocm_sysdeps_drm_amdgpu.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocm_sysdeps_drm_amdgpu.so* not found"
	cp -v /opt/rocm/lib/rocm_sysdeps/lib/librocm_sysdeps_bz2.so* "$build_bin_path/" 2>/dev/null \|\| echo "librocm_sysdeps_bz2.so* not found"

	# Bundle libatomic (gcc runtime dep of llama-cli). Some self-hosted GPU
	# runners don't have libatomic1 installed, so ship it with the artifact;
	# the $ORIGIN RPATH step below makes the binary load this bundled copy.
	cp -v /usr/lib/x86_64-linux-gnu/libatomic.so.1* "$build_bin_path/" 2>/dev/null \
	\|\| cp -v "$(gcc -print-file-name=libatomic.so.1)" "$build_bin_path/" 2>/dev/null \
	\|\| echo "libatomic.so.1 not found"

	echo "Finished copying required ROCm libraries"

	- name: Set RPATH for portable distribution
	run: \|
	sudo apt-get install -y patchelf
	cd build/bin
	# Set RPATH to $ORIGIN so all libraries (including the comgr stub loader) find deps locally
	for file in .so llama-*; do
	[ -f "$file" ] && [ ! -L "$file" ] && patchelf --set-rpath '$ORIGIN' "$file" 2>/dev/null \|\| true
	done

	- name: List build artifacts (including ROCm files)
	run: \|
	cd build/bin
	echo "Final build artifacts (including ROCm library files):"
	ls -la

	- name: Upload build artifacts
	uses: actions/upload-artifact@v4
	with:
	name: llama-ubuntu-rocm-multiarch-x64
	path: build/bin/
	retention-days: 30

	- name: Set job outputs
	id: set-outputs
	run: \|
	rocm_version="${DETECTED_ROCM_VERSION:-${{ env.ROCM_VERSION }}}"
	echo "rocm_version=$rocm_version" >> $GITHUB_OUTPUT
	echo "llamacpp_commit_hash=${LLAMACPP_COMMIT_HASH}" >> $GITHUB_OUTPUT
	echo "Final rocm_version: $rocm_version"
	echo "Final llamacpp_commit_hash: ${LLAMACPP_COMMIT_HASH}"

	test-gfx:
	needs: build-ubuntu
	if: needs.build-ubuntu.result == 'success'
	# Single hardware test of the multiarch artifact on gfx1151. This is the
	# end-to-end safety net for the Tensile-only multiarch package: a real
	# llama-cli inference exercising the rocBLAS/hipBLASLt GEMM path on-device.
	runs-on: linux-gfx1151-gpu-rocm

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: llama-ubuntu-rocm-multiarch-x64
	path: llama-binaries

	- name: Download test model
	run: \|
	# Pinned to a fixed GitHub release asset instead of huggingface.co: some
	# self-hosted runners cannot reach huggingface.co (curl 35, connection
	# reset), but they can reach GitHub. Source:
	# https://github.com/jimw567/llamacpp-test-assets/releases/tag/test-assets
	model_url="https://github.com/jimw567/llamacpp-test-assets/releases/download/test-assets/Qwen3-0.6B-Q4_0.gguf"
	model_path="Qwen3-0.6B-Q4_0.gguf"
	echo "Downloading test model from: $model_url"
	curl -fL --retry 5 --retry-all-errors --retry-delay 5 -o "$model_path" "$model_url"
	if [ -f "$model_path" ]; then
	file_size=$(stat -c%s "$model_path")
	echo "Model downloaded successfully. Size: $file_size bytes"
	else
	echo "Failed to download model"
	exit 1
	fi

	- name: Set up library path
	run: \|
	echo "LD_LIBRARY_PATH=$(pwd)/llama-binaries:$LD_LIBRARY_PATH" >> $GITHUB_ENV

	- name: Run llama-cli test
	run: \|
	llama_cli_path="./llama-binaries/llama-cli"
	model_path="Qwen3-0.6B-Q4_0.gguf"
	output_file="llama_output.txt"

	chmod +x "$llama_cli_path"

	if [ ! -f "$llama_cli_path" ]; then
	echo "llama-cli not found at: $llama_cli_path"
	echo "Available files in llama-binaries:"
	find llama-binaries -type f
	exit 1
	fi

	# Use a prompt with a single correct answer and greedy decoding
	# (--temp 0) so the result is deterministic and verifiable.
	prompt="What is 2 + 2? Reply with only the number."
	echo "Running llama-cli test for gfx1151 (multiarch artifact)..."
	echo "Command: $llama_cli_path -m \"$model_path\" -ngl 99 --temp 0 -p \"$prompt\" -st -v"

	# Bound the run: a healthy 0.6B inference finishes in seconds. If the
	# GPU kernel launch hangs (driver/runner issue) the step would otherwise
	# never end and GitHub would never publish the logs. timeout forces a
	# clean failure and lets the captured output flush for diagnosis.
	set +e
	timeout 180 "$llama_cli_path" -m "$model_path" -ngl 99 --temp 0 -p "$prompt" -st -v > "$output_file" 2>&1
	exit_code=$?
	set -e

	echo "=== LLAMA-CLI OUTPUT ==="
	if [ -f "$output_file" ] && [ -s "$output_file" ]; then
	cat "$output_file"
	else
	echo "(empty)"
	fi
	echo "=== END OUTPUT ==="
	echo "Process exit code: $exit_code"

	if [ $exit_code -eq 124 ]; then
	echo "❌ llama-cli timed out after 180s (GPU kernel likely hung)"
	echo "=== GPU / driver diagnostics ==="
	rocminfo 2>&1 \| grep -iE 'Name\|Marketing\|gfx\|Uuid' \| head -40 \|\| echo "rocminfo failed"
	(dmesg 2>/dev/null \| tail -40) \|\| echo "dmesg unavailable"
	rm -f "$output_file"
	exit 1
	fi

	if [ $exit_code -ne 0 ]; then
	echo "❌ llama-cli exited with error code: $exit_code"
	echo "Checking for missing library dependencies..."
	ldd "$llama_cli_path" \|\| echo "ldd command failed"
	rm -f "$output_file"
	exit 1
	fi

	# Functional checks against current llama.cpp output:
	# (1) the ROCm GPU was selected,
	# (2) the model layers were offloaded to it (incl. the output layer),
	# (3) the model computed the correct answer to "What is 2 + 2?".
	# With greedy decoding (--temp 0) the answer is deterministic, so this
	# verifies the GPU math path end to end, not just that text was emitted.
	found_device=false
	found_offload=false
	found_answer=false

	if grep -q "using device ROCm0" "$output_file"; then
	found_device=true
	fi

	# Qwen3-0.6B has 28 transformer layers + output = 29 GPU assignments per
	# pass (verbose mode logs them more than once; require at least one pass).
	layers_on_gpu=$(grep -c "assigned to device ROCm0" "$output_file" \|\| true)
	if [ "$layers_on_gpu" -ge 29 ] && grep -q "offloading output layer to GPU" "$output_file"; then
	found_offload=true
	fi

	# The parsed assistant answer ("content" field, printed with -v) must
	# contain 4 and no other digit, so "4" / "The answer is 4." pass while
	# "5", "14", "22" fail. This is the deterministic correctness check.
	answer_content=$(grep -oE '"content":"[^"]*"' "$output_file" \| tail -1)
	if echo "$answer_content" \| grep -qE '4' && ! echo "$answer_content" \| grep -qE '[0-35-9]'; then
	found_answer=true
	fi

	echo "=== TEST RESULTS ==="
	echo "ROCm GPU selected ('using device ROCm0'): $(if [ "$found_device" = true ]; then echo 'FOUND'; else echo 'NOT FOUND'; fi)"
	echo "Layers offloaded to GPU ('assigned to device ROCm0' x$layers_on_gpu + output layer): $(if [ "$found_offload" = true ]; then echo 'FOUND'; else echo 'NOT FOUND'; fi)"
	echo "Correct answer to '2 + 2' (parsed content: ${answer_content:-<none>}): $(if [ "$found_answer" = true ]; then echo 'FOUND'; else echo 'NOT FOUND'; fi)"

	rm -f "$output_file"

	if [ "$found_device" = true ] && [ "$found_offload" = true ] \
	&& [ "$found_answer" = true ]; then
	echo "✅ Test PASSED - GPU offload + correct deterministic answer verified"
	else
	echo "❌ Test FAILED - Missing expected outputs"
	exit 1
	fi

	create-release:
	needs: [build-ubuntu, test-gfx]
	runs-on: ubuntu-24.04
	permissions:
	contents: write
	# Publish only on the nightly dispatch (external cron passes
	# -f create_release=true). Push/PR and manual runs never release.
	# Require the build to succeed and the gfx1151 hardware test to pass.
	if: \|
	always() &&
	needs.build-ubuntu.result == 'success' &&
	needs.test-gfx.result == 'success' &&
	github.event_name == 'workflow_dispatch' &&
	github.event.inputs.create_release == 'true'
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Download all build artifacts
	uses: actions/download-artifact@v4
	with:
	path: ./all-artifacts

	- name: Generate dated release tag
	id: generate-tag
	env:
	GITHUB_TOKEN: ${{ github.token }}
	run: \|
	# Date tag mirrors the ROCm nightly scheme (e.g. b20260609 for the
	# 7.14.0a20260609-era build). One release per UTC day.
	TAG="b$(date -u '+%Y%m%d')"
	echo "tag=${TAG}" >> $GITHUB_OUTPUT

	if gh release view "$TAG" --repo "$GITHUB_REPOSITORY" >/dev/null 2>&1; then
	echo "Release $TAG already exists; skipping creation."
	echo "tag_exists=true" >> $GITHUB_OUTPUT
	else
	echo "Release $TAG does not exist; will create."
	echo "tag_exists=false" >> $GITHUB_OUTPUT
	fi
	echo "Release tag: $TAG"

	- name: Create multiarch archive
	if: steps.generate-tag.outputs.tag_exists == 'false'
	run: \|
	TAG="${{ steps.generate-tag.outputs.tag }}"
	root="$PWD"
	artifact_dir="./all-artifacts/llama-ubuntu-rocm-multiarch-x64"
	archive="llama-${TAG}-ubuntu-rocm-multiarch-x64"
	if [ -d "$artifact_dir" ]; then
	echo "Creating ${archive}.tar.gz"
	tar -czf "$root/${archive}.tar.gz" -C "$artifact_dir" .
	else
	echo "ERROR: artifact dir not found: $artifact_dir"
	exit 1
	fi
	ls -la *.tar.gz

	- name: Create GitHub Release
	if: steps.generate-tag.outputs.tag_exists == 'false'
	env:
	GITHUB_TOKEN: ${{ github.token }}
	run: \|
	TAG="${{ steps.generate-tag.outputs.tag }}"
	ROCM_VERSION="${{ needs.build-ubuntu.outputs.rocm_version }}"
	LLAMACPP_COMMIT_HASH="${{ needs.build-ubuntu.outputs.llamacpp_commit_hash }}"
	gh release create "$TAG" \
	--repo "$GITHUB_REPOSITORY" \
	--title "$TAG" \
	--notes "Build: $TAG
	OS: ubuntu
	GPU Target(s): gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1153 (single multiarch binary)
	ROCm Version: $ROCM_VERSION (multiarch)
	Llama.cpp Commit: $LLAMACPP_COMMIT_HASH
	Build Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')

	Prebuilt llama.cpp ROCm binaries — one multiarch package covering the RDNA3 gfx110X family incl. Hawk Point/Phoenix (Radeon 760M/780M) and the RDNA3.5 gfx115x APUs (gfx1150/gfx1151/gfx1153). Built from TheRock's multiarch ROCm runtime, pruned to the CI target arches (per-arch Tensile databases bundled)." \
	*.tar.gz

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

gfx11: sync ROCm CI + RDNA3.5 MMQ device table onto upstream-synced master #44

Workflow file

gfx11: sync ROCm CI + RDNA3.5 MMQ device table onto upstream-synced master #44

Uh oh!

Workflow file for this run