[AMD] server_vllm.sh: default PREFILL/DECODE_TP_SIZE to a full node

Duyi-Wang · Duyi-Wang · commit be92334d8d82 · 2026-06-24T07:03:51.000Z
Mirror server_sglang.sh / server_atom.sh so the bench.sh GPU count never
resolves to 0 if submit.sh did not export the per-worker TP size.
diff --git a/benchmarks/multi_node/amd_utils/server_vllm.sh b/benchmarks/multi_node/amd_utils/server_vllm.sh
@@ -39,6 +39,12 @@ BENCH_MAX_CONCURRENCY="${BENCH_MAX_CONCURRENCY:-512}"
 DRY_RUN="${DRY_RUN:-0}"
 GPUS_PER_NODE="${GPUS_PER_NODE:-8}"
 
+# Per-worker TP size (PREFILL_NODES*PREFILL_TP/PREFILL_WORKERS), normally exported
+# by submit.sh; fall back to a full node so the bench.sh GPU count never resolves
+# to 0. Mirrors server_sglang.sh / server_atom.sh.
+PREFILL_TP_SIZE="${PREFILL_TP_SIZE:-$GPUS_PER_NODE}"
+DECODE_TP_SIZE="${DECODE_TP_SIZE:-$GPUS_PER_NODE}"
+
 ROUTER_PORT="${ROUTER_PORT:-30000}"
 SERVER_PORT="${SERVER_PORT:-2584}"
 ENGINE_ID="${ENGINE_ID:-${MODEL_NAME}-pd-run}"