Skip to content

Commit e85b276

Browse files
committed
fix(ci): update build for ubuntu 24.04 and cuda 12.8
Signed-off-by: kyteinsky <kyteinsky@gmail.com>
1 parent 8c83398 commit e85b276

1 file changed

Lines changed: 7 additions & 15 deletions

File tree

Dockerfile

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ ARG CUDA_RUNTIME_IMAGE=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSIO
1313
# CPU / ARM builder
1414
# Builds llama_cpp_python for any x86_64 (AVX+, Sandy Bridge 2011+)
1515
# and for arm64 (NEON always available).
16-
# ubuntu:22.04 is a multi-arch image so this stage covers both.
16+
# The Ubuntu base image is multi-arch so this stage covers both.
1717
#
1818
# GGML_NATIVE=OFF: no -march=native; the host build machine's SIMD
1919
# capabilities are not baked in. AVX/AVX2/FMA/F16C default to ON in
@@ -49,8 +49,9 @@ RUN /opt/venv/bin/python -m pip wheel \
4949
# ============================================================
5050
# CUDA (NVIDIA) builder
5151
# Builds llama_cpp_python with CUDA support.
52-
# sm_90 is the maximum compute capability supported by CUDA 12.4
53-
# (Hopper / H100). Blackwell sm_100 requires CUDA 12.8+.
52+
# CUDA 12.8 supports up to sm_100 (Blackwell / B100, B200).
53+
# Ubuntu 24.04 ships gcc-13 which CUDA 12.6+ accepts natively,
54+
# so no compiler pin or --allow-unsupported-compiler is needed.
5455
# ============================================================
5556
FROM ${CUDA_DEVEL_IMAGE} AS llama-builder-cuda
5657
ARG LLAMA_CPP_PYTHON_VERSION
@@ -59,31 +60,22 @@ ENV DEBIAN_FRONTEND=noninteractive
5960
WORKDIR /build
6061
ADD dockerfile_scripts/install_py11.sh dockerfile_scripts/install_py11.sh
6162
RUN ./dockerfile_scripts/install_py11.sh
62-
# gcc-12 is required: Ubuntu 22.04 ships gcc-11 by default which CUDA 12.4
63-
# treats as "unsupported"; we pin gcc-12 to match the official CI workflow.
6463
RUN apt-get install -y --no-install-recommends \
6564
python3.11-dev \
6665
cmake build-essential ninja-build git \
67-
gcc-12 g++-12 \
6866
libgomp1 \
6967
&& rm -rf /var/lib/apt/lists/*
7068

71-
ENV CC=/usr/bin/gcc-12
72-
ENV CXX=/usr/bin/g++-12
73-
ENV CUDAHOSTCXX=/usr/bin/g++-12
74-
7569
RUN /usr/bin/python3.11 -m venv /opt/venv \
7670
&& /opt/venv/bin/python -m pip install --no-cache-dir --upgrade pip setuptools wheel
7771

7872
# Make the CUDA compat stub visible to the linker so cuMem* symbols resolve
7973
ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH}"
8074

81-
# Architecture list aligned with the official llama-cpp-python CUDA CI workflow:
82-
# https://github.com/abetlen/llama-cpp-python/blob/main/.github/workflows/build-wheels-cuda.yaml
75+
# Real cubins for all shipping GPU generations through Blackwell (sm_100),
76+
# plus one forward-compatible PTX target to keep wheel size manageable.
8377
ENV CMAKE_ARGS="-DGGML_CUDA=ON -DGGML_CUDA_FORCE_MMQ=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
84-
-DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;90-virtual \
85-
-DCMAKE_CUDA_FLAGS=--allow-unsupported-compiler \
86-
-DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++-12"
78+
-DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;100-real;100-virtual"
8779

8880
RUN /opt/venv/bin/python -m pip wheel \
8981
--no-cache-dir \

0 commit comments

Comments
 (0)