Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 126 additions & 2 deletions tests/sh/test_get_torch_index_url.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,32 @@ MOCK
echo "$_dir"
}

# Helper: create a mock amd-smi that prints a given ROCm version string
# Supports both "amd-smi version" and "amd-smi list" subcommands so that
# the GPU presence check (amd-smi list) also succeeds in tests.
make_mock_amd_smi() {
_dir=$(mktemp -d)
cat > "$_dir/amd-smi" <<MOCK
#!/bin/sh
case "\$1" in
list)
printf 'GPU: 0\\n BDF: 0000:03:00.0\\n NAME: gfx1100\\n'
;;
*)
cat <<AMD_OUT
AMDSMI Tool: 25.0.1+2b74356 | AMDSMI Library version: 25.0.1.0 | ROCm version: $1
AMD_OUT
;;
esac
MOCK
chmod +x "$_dir/amd-smi"
echo "$_dir"
}

# Build a minimal tools directory with symlinks to essential commands
# (uname, grep, head, etc.) but WITHOUT nvidia-smi.
# (uname, grep, head, etc.) but WITHOUT nvidia-smi or amd-smi.
_TOOLS_DIR=$(mktemp -d)
for _cmd in uname grep sed head sh bash cat; do
for _cmd in uname grep sed head sh bash cat awk printf; do
_real=$(command -v "$_cmd" 2>/dev/null || true)
[ -n "$_real" ] && ln -sf "$_real" "$_TOOLS_DIR/$_cmd"
done
Expand Down Expand Up @@ -119,6 +141,108 @@ _result=$(run_func "$_dir")
assert_eq "unparseable -> cu126" "https://download.pytorch.org/whl/cu126" "$_result"
rm -rf "$_dir"

# 9) ROCm 6.3 (no nvidia-smi) -> rocm6.3
_dir=$(make_mock_amd_smi "6.3")
_result=$(run_func "$_dir")
assert_eq "ROCm 6.3 -> rocm6.3" "https://download.pytorch.org/whl/rocm6.3" "$_result"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Avoid ROCm URL assertions before install.sh supports ROCm

This new expectation (and the related ROCm cases below it) currently fails deterministically against the base install.sh logic, which returns the CPU index whenever nvidia-smi is missing; running bash tests/sh/test_get_torch_index_url.sh yields multiple ROCm failures and exits non-zero. As a result, merging this test-only change ahead of the matching installer implementation will break CI.

Useful? React with 👍 / 👎.

rm -rf "$_dir"

# 10) ROCm 7.1 (no nvidia-smi) -> rocm7.1
_dir=$(make_mock_amd_smi "7.1")
_result=$(run_func "$_dir")
assert_eq "ROCm 7.1 -> rocm7.1" "https://download.pytorch.org/whl/rocm7.1" "$_result"
rm -rf "$_dir"

# 11) ROCm 7.2 (no nvidia-smi) -> rocm7.1 (capped due to torch <2.11.0)
_dir=$(make_mock_amd_smi "7.2")
_result=$(run_func "$_dir")
assert_eq "ROCm 7.2 -> rocm7.1 (capped)" "https://download.pytorch.org/whl/rocm7.1" "$_result"
rm -rf "$_dir"

# 12) Both nvidia-smi and amd-smi present -> CUDA takes precedence
_cuda_dir=$(make_mock_smi "12.6")
_amd_dir=$(make_mock_amd_smi "6.3")
_combined_dir=$(mktemp -d)
ln -sf "$_cuda_dir/nvidia-smi" "$_combined_dir/nvidia-smi"
ln -sf "$_amd_dir/amd-smi" "$_combined_dir/amd-smi"
_result=$(run_func "$_combined_dir")
assert_eq "CUDA+ROCm -> CUDA precedence" "https://download.pytorch.org/whl/cu126" "$_result"
rm -rf "$_cuda_dir" "$_amd_dir" "$_combined_dir"

# 13) No nvidia-smi, no amd-smi -> cpu (duplicate of test 1, confirms ROCm didn't break it)
_result=$(run_func "none")
assert_eq "no GPU -> cpu" "https://download.pytorch.org/whl/cpu" "$_result"

# 14) ROCm 6.1 (no nvidia-smi) -> rocm6.1
_dir=$(make_mock_amd_smi "6.1")
_result=$(run_func "$_dir")
assert_eq "ROCm 6.1 -> rocm6.1" "https://download.pytorch.org/whl/rocm6.1" "$_result"
rm -rf "$_dir"

# 15) ROCm 6.4 (no nvidia-smi) -> rocm6.4
_dir=$(make_mock_amd_smi "6.4")
_result=$(run_func "$_dir")
assert_eq "ROCm 6.4 -> rocm6.4" "https://download.pytorch.org/whl/rocm6.4" "$_result"
rm -rf "$_dir"

# 16) ROCm 7.0 (no nvidia-smi) -> rocm7.0
_dir=$(make_mock_amd_smi "7.0")
_result=$(run_func "$_dir")
assert_eq "ROCm 7.0 -> rocm7.0" "https://download.pytorch.org/whl/rocm7.0" "$_result"
rm -rf "$_dir"

# 17) ROCm 8.0 (future, no nvidia-smi) -> rocm7.1 (capped)
_dir=$(make_mock_amd_smi "8.0")
_result=$(run_func "$_dir")
assert_eq "ROCm 8.0 -> rocm7.1 (capped)" "https://download.pytorch.org/whl/rocm7.1" "$_result"
rm -rf "$_dir"

# 18) Malformed amd-smi output (empty version field) -> cpu
_dir=$(mktemp -d)
cat > "$_dir/amd-smi" <<'MOCK'
#!/bin/sh
echo "AMDSMI Tool: 25.0.1 | AMDSMI Library version: 25.0.1.0 | ROCm version: "
MOCK
chmod +x "$_dir/amd-smi"
_result=$(run_func "$_dir")
assert_eq "empty amd-smi version -> cpu" "https://download.pytorch.org/whl/cpu" "$_result"
rm -rf "$_dir"

# 19) amd-smi with "N/A" version -> cpu
_dir=$(mktemp -d)
cat > "$_dir/amd-smi" <<'MOCK'
#!/bin/sh
echo "AMDSMI Tool: 25.0.1 | AMDSMI Library version: 25.0.1.0 | ROCm version: N/A"
MOCK
chmod +x "$_dir/amd-smi"
_result=$(run_func "$_dir")
assert_eq "N/A amd-smi version -> cpu" "https://download.pytorch.org/whl/cpu" "$_result"
rm -rf "$_dir"

# 20) ROCm version with trailing text (e.g. "6.3.1-beta") -> rocm6.3
_dir=$(make_mock_amd_smi "6.3.1-beta")
_result=$(run_func "$_dir")
assert_eq "ROCm 6.3.1-beta -> rocm6.3" "https://download.pytorch.org/whl/rocm6.3" "$_result"
rm -rf "$_dir"

# 22) CUDA 12.6 still works after ROCm changes (regression check)
_dir=$(make_mock_smi "12.6")
_result=$(run_func "$_dir")
assert_eq "CUDA 12.6 regression -> cu126" "https://download.pytorch.org/whl/cu126" "$_result"
rm -rf "$_dir"

# 23) CUDA 13.0 still works after ROCm changes (regression check)
_dir=$(make_mock_smi "13.0")
_result=$(run_func "$_dir")
assert_eq "CUDA 13.0 regression -> cu130" "https://download.pytorch.org/whl/cu130" "$_result"
rm -rf "$_dir"

# 24) CUDA 12.8 still works after ROCm changes (regression check)
_dir=$(make_mock_smi "12.8")
_result=$(run_func "$_dir")
assert_eq "CUDA 12.8 regression -> cu128" "https://download.pytorch.org/whl/cu128" "$_result"
rm -rf "$_dir"

rm -f "$_FUNC_FILE"
rm -rf "$_FAKE_SMI_DIR"
rm -rf "$_TOOLS_DIR"
Expand Down
Loading
Loading