Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 77 additions & 37 deletions gdb/testsuite/gdb.rocm/multi-inferior-gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,30 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */

/* Shared driver program for the gdb.rocm multi-inferior tests.

The parent process forks N children; each child re-execs itself so
that its GPU runtime is initialized in a clean address space, then
launches its own GPU kernel. Re-exec'ing makes each child a separate
process from the operating system's point of view, which is what the
multi-inferior tests debug concurrently.

N comes from argv[1] when given; otherwise it defaults to the number
of GPU devices found at runtime (one child per device). The
companion helpers in multi-inferior.exp.tcl plant breakpoints on the
pre-fork and post-waitpid source markers and on the kernel. */

#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#include <hip/hip_runtime.h>

#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>

#include "rocm-test-utils.h"

Expand All @@ -30,77 +48,99 @@ kern ()
asm ("s_sleep 1");
}

/* Spawn one child process per detected GPU. */
static int
child (int argc, char **argv)
{
if (argc < 4)
{
fprintf (stderr, "%s: expected: child <idx> <num_devices>\n",
argv[0]);
return -1;
}

int idx = atoi (argv[2]);
int num_devices = atoi (argv[3]);
if (num_devices <= 0)
{
fprintf (stderr, "child %d: invalid num_devices %d\n", idx,
num_devices);
return -1;
}

CHECK (hipSetDevice (idx % num_devices));
kern<<<1, 1>>> ();
CHECK (hipDeviceSynchronize ());
return 0;
}

/* Spawn NUM_CHILDREN child processes. When NUM_CHILDREN is not
positive, spawn one child per detected GPU device. */

static int
parent (int argc, char **argv)
parent (const char *argv0, int num_children)
{
/* Identify how many GPUs we have, and spawn one child for each. */
int num_devices;
CHECK (hipGetDeviceCount (&num_devices));
if (num_devices <= 0)
{
fprintf (stderr, "no GPU devices available\n");
return -1;
}

if (num_children <= 0)
num_children = num_devices;

/* Break here. */

for (int i = 0; i < num_devices; i++)
for (int i = 0; i < num_children; i++)
{
char n[32] = {};
snprintf (n, sizeof (n), "%d", i);
std::string idx_str = std::to_string (i);
std::string ndev_str = std::to_string (num_devices);

pid_t pid = fork ();
if (pid == -1)
{
perror ("Fork failed");
perror ("fork");
return -1;
}

if (pid == 0)
{
/* Exec to force the child to re-initialize the ROCm runtime. */
if (execl (argv[0], argv[0], n, nullptr) == -1)
if (execl (argv0, argv0, "child", idx_str.c_str (),
ndev_str.c_str (), (char *) nullptr) == -1)
{
perror ("Failed to exec");
return -1;
perror ("execl");
_exit (127);
}
}
}

/* Wait for all children. */
/* Reap every child. Any non-zero exit from a child is a failure
(e.g. a runtime initialization failure under contention). */
int failed = 0;
while (true)
{
int ws;
pid_t ret = waitpid (-1, &ws, 0);
if (ret == -1 && errno == ECHILD)
break;
if (ret > 0 && (!WIFEXITED (ws) || WEXITSTATUS (ws) != 0))
failed++;
}

/* Last break here. */
return 0;
}

static int
child (int argc, char **argv)
{
int dev_number;
if (sscanf (argv[1], "%d", &dev_number) != 1)
{
fprintf (stderr, "Invalid argument \"%s\"\n", argv[1]);
return -1;
}

CHECK (hipSetDevice (dev_number));
kern<<<1, 1>>> ();
CHECK (hipDeviceSynchronize ());
return 0;
return failed == 0 ? 0 : 1;
}

/* When called with no argument, identify how many AMDGPU devices are
available on the system and spawn one worker process per GPU. If a
command-line argument is provided, it is the index of the GPU to use. */

int
main (int argc, char **argv)
{
if (argc <= 1)
return parent (argc, argv);
else
if (argc >= 2 && strcmp (argv[1], "child") == 0)
return child (argc, argv);

int num_children = 0;
if (argc >= 2)
num_children = atoi (argv[1]);

return parent (argv[0], num_children);
}
61 changes: 6 additions & 55 deletions gdb/testsuite/gdb.rocm/multi-inferior-gpu.exp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ load_lib rocm.exp

standard_testfile .cpp

source $srcdir/$subdir/multi-inferior.exp.tcl

require allow_hip_tests
require hip_devices_support_debug_multi_process
require allow_fork_tests
Expand All @@ -29,62 +31,11 @@ if {[build_executable "failed to prepare" $testfile $srcfile {debug hip}]} {
}

proc do_test {} {
clean_restart
gdb_load $::binfile
gdb_test_no_output "set non-stop on"
gdb_test_no_output "set detach-on-fork off"
gdb_test_no_output "set follow-fork parent"

with_rocm_gpu_lock {
gdb_breakpoint [gdb_get_line_number "Break here"]
gdb_breakpoint kern allow-pending
gdb_breakpoint [gdb_get_line_number "Last break here"]

# Run until we reach the first breakpoint where we can figure
# out how many children will be spawned.
gdb_test "run" "hit Breakpoint.*"

set num_children [get_integer_valueof "num_devices" 0]
set bp_to_see $num_children
set stopped_gpu_threads [list]

gdb_test_multiple "continue -a &" "continue to gpu breakpoints" {
-re "Continuing\.\r\n$::gdb_prompt " {
pass $gdb_test_name
}
}

gdb_test_multiple "" "wait for gpu stops" {
-re "Thread ($::decimal\.$::decimal)\[^\r\n\]* hit Breakpoint\[^\r\n\]*, kern \(\)\[^\r\n\]*\r\n" {
lappend stopped_gpu_threads $expect_out(1,string)
incr bp_to_see -1
if {$bp_to_see != 0} {
exp_continue
} else {
pass $gdb_test_name
}
}
}

# Continue all the GPU kernels so all the children processes can reach exit.
foreach thread $stopped_gpu_threads {
set infnumber [lindex [split $thread .] 0]
gdb_test "thread $thread" "Switching to thread.*"
gdb_test_multiple "continue" "continue inferior $infnumber" {
-re "\\\[Inferior $infnumber \[^\n\r\]* exited normally\\]\r\n$::gdb_prompt " {
pass $gdb_test_name
}
}
}

gdb_test_multiple "" "reach breakpoint in main" {
-re "hit Breakpoint.*parent" {
pass $gdb_test_name
}
}
# Select main inferior
gdb_test "inferior 1" "Switching to inferior 1.*"
gdb_continue_to_end "" "continue -a" 1
# Spawn one child inferior per detected GPU. The child count
# is discovered from "num_devices" at the pre-fork stop.
set threads [rocm_multi_inferior_run_to_kernels]
rocm_multi_inferior_drain $threads
}
}

Expand Down
133 changes: 133 additions & 0 deletions gdb/testsuite/gdb.rocm/multi-inferior.exp.tcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Copyright (C) 2026 Free Software Foundation, Inc.
# Copyright (C) 2026 Advanced Micro Devices, Inc. All rights reserved.

# This file is part of GDB.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# Shared driver helpers for the gdb.rocm multi-inferior tests. Tests
# source this file with:
#
# source $srcdir/$subdir/multi-inferior.exp.tcl
#
# and drive a session built around the multi-inferior.cpp program (the
# "Break here" / kern / "Last break here" source markers).

# Drive a HIP multi-inferior session. Load the program, turn on
# non-stop mode with detach-on-fork off and follow-fork parent, set
# breakpoints on the "Break here" / kern / "Last break here" markers,
# run the parent to the pre-fork breakpoint, resume everything in the
# background, and wait for one "kern" breakpoint stop per child.
#
# arg_list - arguments passed via "set args" ("" for none).
# expected - number of child stops to wait for. If "", it is read
# from the "num_devices" value at the pre-fork stop.
#
# Return the list of stopped GPU thread ids (one per child).

proc rocm_multi_inferior_run_to_kernels { {arg_list ""} {expected ""} } {
clean_restart
gdb_load $::binfile

gdb_test_no_output "set non-stop on"
gdb_test_no_output "set detach-on-fork off"
gdb_test_no_output "set follow-fork parent"
if { $arg_list ne "" } {
gdb_test_no_output "set args $arg_list"
}

gdb_breakpoint [gdb_get_line_number "Break here"]
gdb_breakpoint kern allow-pending
gdb_breakpoint [gdb_get_line_number "Last break here"]

# Run the parent up to the pre-fork sync point. GDB prints the
# source line of the stop after the breakpoint line, so the pattern
# must account for breakpoint-hit + source-line before the prompt.
gdb_test_multiple "run" "run to fork point" {
-re -wrap "hit Breakpoint\[^\r\n\]*parent\[^\r\n\]*\r\n\[^\r\n\]+" {
pass $gdb_test_name
}
}

if { $expected eq "" } {
set expected [get_integer_valueof "num_devices" 0]
}

# Resume everything. As children are forked and re-exec, each
# launches its kernel and we expect one "hit Breakpoint ..., kern ()"
# notification per child inferior.
gdb_test_multiple "continue -a &" "continue -a in non-stop" {
-re "Continuing\\.\r\n$::gdb_prompt " {
pass $gdb_test_name
}
}

set seen [list]
set threads [list]
gdb_test_multiple "" "wait for all GPU stops" {
-re "Thread ($::decimal)\\.($::decimal)\[^\r\n\]* hit Breakpoint\[^\r\n\]*, kern \\(\\)\[^\r\n\]*\r\n" {
set inf $expect_out(1,string)
if { [lsearch -exact $seen $inf] == -1 } {
lappend seen $inf
lappend threads "$inf.$expect_out(2,string)"
}
if { [llength $seen] < $expected } {
exp_continue
} else {
pass $gdb_test_name
}
}
timeout {
fail $gdb_test_name
verbose -log "only [llength $seen] of $expected inferiors stopped"
}
}

return $threads
}

# Continue each stopped GPU inferior in THREADS to a clean exit, wait
# for the parent to reach its post-waitpid breakpoint, then run the
# parent to completion. THREADS is the list returned by
# rocm_multi_inferior_run_to_kernels.

proc rocm_multi_inferior_drain { threads } {
foreach thread $threads {
set inf [lindex [split $thread .] 0]
gdb_test "thread $thread" "Switching to thread.*" \
"switch to GPU thread in inferior $inf"
gdb_test_multiple "continue" "continue inferior $inf to end" {
-re "\\\[Inferior $inf \[^\r\n\]* exited normally\\\]\r\n$::gdb_prompt " {
pass $gdb_test_name
}
-re "\\\[Inferior $inf \[^\r\n\]* exited with code\[^\r\n\]*\\\]" {
fail "$gdb_test_name (non-zero exit)"
}
}
}

# Once all children have exited, the parent's waitpid loop falls
# through to the post-fork breakpoint.
gdb_test_multiple "" "parent reached post-waitpid breakpoint" {
-re "hit Breakpoint\[^\r\n\]*parent\[^\r\n\]*" {
pass $gdb_test_name
}
timeout {
fail $gdb_test_name
}
}

gdb_test "inferior 1" "Switching to inferior 1.*"
gdb_continue_to_end "" "continue -a" 1
}
Loading