@@ -13,7 +13,7 @@ ARG CUDA_RUNTIME_IMAGE=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSIO
1313# CPU / ARM builder
1414# Builds llama_cpp_python for any x86_64 (AVX+, Sandy Bridge 2011+)
1515# and for arm64 (NEON always available).
16- # ubuntu:22.04 is a multi-arch image so this stage covers both.
16+ # The Ubuntu base image is multi-arch so this stage covers both.
1717#
1818# GGML_NATIVE=OFF: no -march=native; the host build machine's SIMD
1919# capabilities are not baked in. AVX/AVX2/FMA/F16C default to ON in
@@ -49,8 +49,9 @@ RUN /opt/venv/bin/python -m pip wheel \
4949# ============================================================
5050# CUDA (NVIDIA) builder
5151# Builds llama_cpp_python with CUDA support.
52- # sm_90 is the maximum compute capability supported by CUDA 12.4
53- # (Hopper / H100). Blackwell sm_100 requires CUDA 12.8+.
52+ # CUDA 12.8 supports up to sm_100 (Blackwell / B100, B200).
53+ # Ubuntu 24.04 ships gcc-13 which CUDA 12.6+ accepts natively,
54+ # so no compiler pin or --allow-unsupported-compiler is needed.
5455# ============================================================
5556FROM ${CUDA_DEVEL_IMAGE} AS llama-builder-cuda
5657ARG LLAMA_CPP_PYTHON_VERSION
@@ -59,31 +60,22 @@ ENV DEBIAN_FRONTEND=noninteractive
5960WORKDIR /build
6061ADD dockerfile_scripts/install_py11.sh dockerfile_scripts/install_py11.sh
6162RUN ./dockerfile_scripts/install_py11.sh
62- # gcc-12 is required: Ubuntu 22.04 ships gcc-11 by default which CUDA 12.4
63- # treats as "unsupported"; we pin gcc-12 to match the official CI workflow.
6463RUN apt-get install -y --no-install-recommends \
6564 python3.11-dev \
6665 cmake build-essential ninja-build git \
67- gcc-12 g++-12 \
6866 libgomp1 \
6967 && rm -rf /var/lib/apt/lists/*
7068
71- ENV CC=/usr/bin/gcc-12
72- ENV CXX=/usr/bin/g++-12
73- ENV CUDAHOSTCXX=/usr/bin/g++-12
74-
7569RUN /usr/bin/python3.11 -m venv /opt/venv \
7670 && /opt/venv/bin/python -m pip install --no-cache-dir --upgrade pip setuptools wheel
7771
7872# Make the CUDA compat stub visible to the linker so cuMem* symbols resolve
7973ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH}"
8074
81- # Architecture list aligned with the official llama-cpp-python CUDA CI workflow:
82- # https://github.com/abetlen/llama-cpp-python/blob/main/.github/workflows/build-wheels-cuda.yaml
75+ # Real cubins for all shipping GPU generations through Blackwell (sm_100),
76+ # plus one forward-compatible PTX target to keep wheel size manageable.
8377ENV CMAKE_ARGS="-DGGML_CUDA=ON -DGGML_CUDA_FORCE_MMQ=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
84- -DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;90-virtual \
85- -DCMAKE_CUDA_FLAGS=--allow-unsupported-compiler \
86- -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++-12"
78+ -DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;100-real;100-virtual"
8779
8880RUN /opt/venv/bin/python -m pip wheel \
8981 --no-cache-dir \
0 commit comments