From c6d3683661a7623a306667915abba4d4695616d7 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Thu, 25 Jun 2020 17:48:14 +0200 Subject: [PATCH] Use fork instead of vfork on Solaris The gdb.mi/mi-exec-run.exp test never completed/timed out on Solaris for quite some time: FAIL: gdb.mi/mi-exec-run.exp: inferior-tty=main: mi=main: force-fail=1: run failure detected (timeout) This is for gdb trying to exec mi-exec-run.nox, a copy of mi-exec-run with execute permissions removed. The process tree at this point looks like this: 21254 /vol/gcc/bin/expect -- /vol/gcc/share/dejagnu/runtest.exp GDB_PARALLEL=yes --outdir=outputs/gdb.mi/mi-exec-run-vfork gdb.mi/mi-exec-run.exp 21300 21281 21294 $obj/gdb/testsuite/../../gdb/gdb -nw -nx -data-directory $obj/gdb/testsuite/../data-directory -i=mi 21297 $obj/gdb/testsuite/../../gdb/gdb -nw -nx -data-directory $obj/gdb/testsuite/../data-directory -i=mi The parent gdb hangs here: 21294: $obj/gdb/testsuite/../../gdb/gdb -nw ------------ lwp# 1 / thread# 1 --------------- 0000000000000000 SYS#0 () 0000000000daeccd procfs_target::create_inferior(char const*, std::__cxx11::basic_string, std::allocator > const&, char**, int) () + 97 (procfs.c:2853) 0000000000ca63a7 run_command_1(char const*, int, run_how) () + 349 (basic_string.h:187) 0000000000ca6516 start_command(char const*, int) () + 26 (infcmd.c:584) 0000000000b3ca8e do_const_cfunc(cmd_list_element*, char const*, int) () + f (cli-decode.c:96) 0000000000b3ed77 cmd_func(cmd_list_element*, char const*, int) () + 32 (cli-decode.c:2113) 0000000000f2d219 execute_command(char const*, int) () + 455 (top.c:657) 0000000000d4ad77 mi_execute_cli_command(char const*, int, char const*) () + 242 (basic_string.h:187) 0000000000d4ae80 mi_cmd_exec_run(char const*, char**, int) () + ba (mi-main.c:473) with these process flags 21294: $obj/gdb/testsuite/../../gdb/gdb -nw data model = _LP64 flags = VFORKP|ORPHAN|MSACCT|MSFORK sigpend = 0x00004103,0x00000000,0x00000000 /1: flags = 0 sigmask = 0xffbffeff,0xffffffff,0x000000ff cursig = SIGKILL /2: flags = DETACH|STOPPED|ASLEEP lwp_park(0x0,0x0,0x0) why = PR_SUSPENDED sigmask = 0x000a2002,0x00000000,0x00000000 [...] while the child sits at 21297: $obj/gdb/testsuite/../../gdb/gdb -nw 00007fffbf078a0b execve (7fffbffff756, 7fffbfffec58, 7fffbfffec90, 0) 00007fffbef84cf6 execvpex () + f6 00007fffbef84f45 execvp () + 15 0000000000d60a44 fork_inferior(char const*, std::__cxx11::basic_string, std::allocator > const&, char**, void (*)(), gdb::function_view, void (*)(), char const*, void (*)(char const*, char* const*, char* const*)) () + 47f (fork-inferior.c:423) 0000000000daeccd procfs_target::create_inferior(char const*, std::__cxx11::basic_string, std::allocator > const&, char**, int) () + 97 (procfs.c:2853) 0000000000ca63a7 run_command_1(char const*, int, run_how) () + 349 (basic_string.h:187) 0000000000ca6516 start_command(char const*, int) () + 26 (infcmd.c:584) 0000000000b3ca8e do_const_cfunc(cmd_list_element*, char const*, int) () + f (cli-decode.c:96) 0000000000b3ed77 cmd_func(cmd_list_element*, char const*, int) () + 32 (cli-decode.c:2113) 0000000000f2d219 execute_command(char const*, int) () + 455 (top.c:657) 0000000000d4ad77 mi_execute_cli_command(char const*, int, char const*) () + 242 (basic_string.h:187) 0000000000d4ae80 mi_cmd_exec_run(char const*, char**, int) () + ba (mi-main.c:473) with 21297: $obj/gdb/testsuite/../../gdb/gdb -nw data model = _LP64 flags = MSACCT|MSFORK exitset = 0x00000000 0x04000000 0x00000000 0x00000000 0x00000000 0x00000000 0x00000000 0x00000000 /1: flags = STOPPED|ISTOP execve(0x7fffbffff756,0x7fffbfffec58,0x7fffbfffec90,0x0) why = PR_SYSEXIT what = execve We have a deadlock here: the execve in the child cannot return until the parent has handled the PR_SYSEXIT while the parent cannot run with a vfork'ed child as documented in proc(4): The child of a vfork(2) borrows the parent's address space. When a vfork(2) is executed by a traced process, all watched areas established for the parent are suspended until the child terminates or performs an exec(2). Any watched areas established independently in the child are cancelled when the parent resumes after the child's termination or exec(2). PCWATCH fails with EBUSY if applied to the parent of a vfork(2) before the child has terminated or performed an exec(2). The PR_VFORKP flag is set in the pstatus structure for such a parent process. In that situation, the parent cannot be killed even with SIGKILL (as runtest will attempt once the timeout occurs; the pending signal can be seen in the pflags output above), so the whole test hangs until one manually kills the child process. Fortunately, there's an easy way out: when using fork instead of vfork, the problem doesn't occur, and this is what the current patch does: it calls fork_inferior with a dummy pre_trace_fun arg. Tested on amd64-pc-solaris2.11 and sparcv9-sun-solaris2.11. * procfs.c (procfs_pre_trace): New function. (procfs_target::create_inferior): Pass it to fork_inferior. --- gdb/ChangeLog | 5 +++++ gdb/procfs.c | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 7298617b80e..982f23a3157 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,8 @@ +2020-06-25 Rainer Orth + + * procfs.c (procfs_pre_trace): New function. + (procfs_target::create_inferior): Pass it to fork_inferior. + 2020-06-25 Rainer Orth * configure.tgt (gdb_target_obs): Remove diff --git a/gdb/procfs.c b/gdb/procfs.c index 65243b1e7c5..d3085a20fce 100644 --- a/gdb/procfs.c +++ b/gdb/procfs.c @@ -2759,6 +2759,13 @@ procfs_set_exec_trap (void) /*destroy_procinfo (pi);*/ } +/* Dummy function to be sure fork_inferior uses fork(2) and not vfork(2). + This avoids a possible deadlock gdb and its vfork'ed child. */ +static void +procfs_pre_trace (void) +{ +} + /* This function is called BEFORE gdb forks the inferior process. Its only real responsibility is to set things up for the fork, and tell GDB which two functions to call after the fork (one for the parent, @@ -2851,7 +2858,7 @@ procfs_target::create_inferior (const char *exec_file, push_target (this); pid = fork_inferior (exec_file, allargs, env, procfs_set_exec_trap, - NULL, NULL, shell_file, NULL); + NULL, procfs_pre_trace, shell_file, NULL); /* We have something that executes now. We'll be running through the shell at this point (if startup-with-shell is true), but the -- 2.30.2