Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions unsloth_zoo/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,30 @@ def install_llama_cpp(
print("Unsloth: Building llama.cpp - please wait 1 to 3 minutes")
if gpu_support == "ON":
print("Unsloth: Building llama.cpp with GPU support")
# Detect GPU backend: ROCm uses HIP flags, CUDA uses CUDA flags
try:
import torch
if hasattr(torch.version, 'hip') and torch.version.hip is not None and torch.cuda.is_available():
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Detect HIP backend without requiring visible GPU

The new ROCm branch only runs when torch.cuda.is_available() is true, so ROCm environments with a HIP build of PyTorch but no currently visible device (for example CI/headless containers or HIP_VISIBLE_DEVICES-restricted jobs) fall through to -DGGML_CUDA=ON. That makes CMake try CUDA instead of HIP and can fail the build even though the machine is configured for ROCm. Using torch.version.hip alone for backend selection (or at least not forcing CUDA in this case) avoids this regression.

Useful? React with 👍 / 👎.

# ROCm detected: use HIP flags
try:
gpu_arch = torch.cuda.get_device_properties(0).gcnArchName.split(":")[0]
except Exception:
gpu_arch = ""
rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')
arch_flag = f" -DCMAKE_HIP_ARCHITECTURES={gpu_arch}" if gpu_arch else ""
gpu_cmake_flags = (
f"-DGGML_HIP=ON"
f" -DCMAKE_C_COMPILER={rocm_path}/llvm/bin/clang"
f" -DCMAKE_CXX_COMPILER={rocm_path}/llvm/bin/clang++"
f"{arch_flag}"
)
print(f"Unsloth: Detected ROCm GPU{' (' + gpu_arch + ')' if gpu_arch else ''} -- building with HIP support")
else:
gpu_cmake_flags = "-DGGML_CUDA=ON"
except Exception:
gpu_cmake_flags = "-DGGML_CUDA=ON"
else:
gpu_cmake_flags = f"-DGGML_CUDA={gpu_support}" # "OFF"

build_success = False
build_errors = []
Expand Down Expand Up @@ -708,7 +732,7 @@ def install_llama_cpp(
"-G", cmake_generator,
"-Wno-dev",
"-DBUILD_SHARED_LIBS=OFF",
f"-DGGML_CUDA={gpu_support}",
gpu_cmake_flags,
]
if vs_install_path:
cmake_args.append(f"-DCMAKE_GENERATOR_INSTANCE={vs_install_path}")
Expand Down Expand Up @@ -773,7 +797,7 @@ def install_llama_cpp(
# Build cmake configure command with library detection
cmake_configure = (
f"cmake . -B build "
f"-DBUILD_SHARED_LIBS=OFF -DGGML_CUDA={gpu_support}"
f"-DBUILD_SHARED_LIBS=OFF {gpu_cmake_flags}"
)

# Detect OpenMP library path (fixes GOMP linker errors)
Expand Down