From b7b1008c0b1af5379656bd8e3063d01daf2bd764 Mon Sep 17 00:00:00 2001 From: Pedro Alves Date: Wed, 23 Feb 2022 11:17:26 +0000 Subject: [PATCH] Fix gdb.threads/current-lwp-dead.exp race If we make GDB report the process EXIT event for the leader thread, as will be done in a latter patch of this series, then gdb.threads/current-lwp-dead.exp starts failing: (gdb) break fn_return Breakpoint 2 at 0x5555555551b5: file /home/pedro/rocm/gdb/build/gdb/testsuite/../../../src/gdb/testsuite/gdb.threads/current-lwp-dead.c, line 45. (gdb) continue Continuing. [New LWP 2138466] [Inferior 1 (process 2138459) exited normally] (gdb) FAIL: gdb.threads/current-lwp-dead.exp: continue to breakpoint: fn_return (the program exited) The inferior exit reported is actually correct. The main thread has indeed exited, and that's the thread that has the right exit code to report to the user, as that's the exit code that is reported to the program's parent. In this case, GDB managed to collect the exit code for the leader thread before reaping the other thread, because in reality, the testcase isn't creating standard threads, it is using raw clone, and the new clones are put in their own thread group. Fix it by making the main "thread" not exit until the scenario we're exercising plays out. Also, run the program to completion for completeness. The original program really wanted the leader thread to exit before the fn_return function was reached -- it was important that the current thread as pointed by inferior_ptid was gone when infrun got the breakpoint event. I've tweaked the testcase to ensure that that condition is still held, though it is no longer the main thread that exits. This required a bit of synchronization between the threads, which required using CLONE_VM unconditionally. The #ifdef guards were added as a fix for https://sourceware.org/bugzilla/show_bug.cgi?id=11214, though I don't think they were necessary because the program is not using TLS. If it turns out they were necessary, we can link the testcase with "-z now" instead, which was mentioned as an alternative workaround in that Bugzilla. Change-Id: I7be2f0da4c2fe8f80a60bdde5e6c623d8bd5a0aa --- gdb/testsuite/gdb.threads/current-lwp-dead.c | 101 ++++++++++++------ .../gdb.threads/current-lwp-dead.exp | 23 +++- 2 files changed, 87 insertions(+), 37 deletions(-) diff --git a/gdb/testsuite/gdb.threads/current-lwp-dead.c b/gdb/testsuite/gdb.threads/current-lwp-dead.c index 76babc42ce8..ceb3ae47a4d 100644 --- a/gdb/testsuite/gdb.threads/current-lwp-dead.c +++ b/gdb/testsuite/gdb.threads/current-lwp-dead.c @@ -15,6 +15,18 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . + + The original issue we're trying to test is described in this + thread: + + https://sourceware.org/legacy-ml/gdb-patches/2009-06/msg00802.html + + The NEW_THREAD_EVENT code the comments below refer to no longer + exists in GDB, so the following comments are kept for historical + reasons, and to guide future updates to the testcase. + + --- + Do not use threads as we need to exploit a bug in LWP code masked by the threads code otherwise. @@ -29,60 +41,81 @@ #include #include #include - -#include -#ifdef __UCLIBC__ -#if !(defined(__UCLIBC_HAS_MMU__) || defined(__ARCH_HAS_MMU__)) -#define HAS_NOMMU -#endif -#endif +#include +#include #define STACK_SIZE 0x1000 -static int -fn_return (void *unused) -{ - return 0; /* at-fn_return */ -} +/* True if the 'fn_return' thread has been reached at the point after + its parent is already gone. */ +volatile int fn_return_reached = 0; + +/* True if the 'fn' thread has exited. */ +volatile int fn_exited = 0; + +/* Wrapper around clone. */ static int -fn (void *unused) +do_clone (int (*fn)(void *)) { - int i; unsigned char *stack; int new_pid; - i = sleep (1); - assert (i == 0); - stack = malloc (STACK_SIZE); assert (stack != NULL); - new_pid = clone (fn_return, stack + STACK_SIZE, CLONE_FILES -#if defined(__UCLIBC__) && defined(HAS_NOMMU) - | CLONE_VM -#endif /* defined(__UCLIBC__) && defined(HAS_NOMMU) */ - , NULL, NULL, NULL, NULL); + new_pid = clone (fn, stack + STACK_SIZE, CLONE_FILES | CLONE_VM, + NULL, NULL, NULL, NULL); assert (new_pid > 0); + return new_pid; +} + +static int +fn_return (void *unused) +{ + /* Wait until our direct parent exits. We want the breakpoint set a + couple lines below to hit with the previously-selected thread + gone. */ + while (!fn_exited) + usleep (1); + + fn_return_reached = 1; /* at-fn_return */ + return 0; +} + +static int +fn (void *unused) +{ + do_clone (fn_return); return 0; } int main (int argc, char **argv) { - unsigned char *stack; - int new_pid; - - stack = malloc (STACK_SIZE); - assert (stack != NULL); - - new_pid = clone (fn, stack + STACK_SIZE, CLONE_FILES -#if defined(__UCLIBC__) && defined(HAS_NOMMU) - | CLONE_VM -#endif /* defined(__UCLIBC__) && defined(HAS_NOMMU) */ - , NULL, NULL, NULL, NULL); - assert (new_pid > 0); + int new_pid, status, ret; + + new_pid = do_clone (fn); + + /* Note the clone call above didn't use CLONE_THREAD, so it actually + put the new child in a new thread group. However, the new clone + is still reported with PTRACE_EVENT_CLONE to GDB, since we didn't + use CLONE_VFORK (results in PTRACE_EVENT_VFORK) nor set the + termination signal to SIGCHLD (results in PTRACE_EVENT_FORK), so + GDB thinks of it as a new thread of the same inferior. It's a + bit of an odd setup, but it's not important for what we're + testing, and, it let's us conveniently use waitpid to wait for + the child, which you can't with CLONE_THREAD. */ + ret = waitpid (new_pid, &status, __WALL); + assert (ret == new_pid); + assert (WIFEXITED (status) && WEXITSTATUS (status) == 0); + + fn_exited = 1; + + /* Don't exit before the breakpoint at fn_return triggers. */ + while (!fn_return_reached) + usleep (1); return 0; } diff --git a/gdb/testsuite/gdb.threads/current-lwp-dead.exp b/gdb/testsuite/gdb.threads/current-lwp-dead.exp index b69fdbb5988..6728dbe87ab 100644 --- a/gdb/testsuite/gdb.threads/current-lwp-dead.exp +++ b/gdb/testsuite/gdb.threads/current-lwp-dead.exp @@ -15,8 +15,14 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# Please email any bugs, comments, and/or additions to this file to: -# bug-gdb@gnu.org +# Regression test for issue originally described here: +# +# https://sourceware.org/legacy-ml/gdb-patches/2009-06/msg00802.html +# +# The relevant code has since been removed from GDB, but it doesn't +# hurt to keep the testcase. + +standard_testfile # This only works with on Linux targets. if ![istarget *-*-linux*] then { @@ -31,5 +37,16 @@ if {[runto_main] <= 0} { return -1 } -gdb_breakpoint "fn_return" +# Run to "fn" so that thread 2 is made current. +gdb_breakpoint "fn" +gdb_continue_to_breakpoint "fn" ".*do_clone.*" + +# Run to thread 3, at a point where thread 2 is gone. +set line [gdb_get_line_number "at-fn_return"] +gdb_breakpoint $line gdb_continue_to_breakpoint "fn_return" ".*at-fn_return.*" + +# Confirm thread 2 is really gone. +gdb_test "info threads 2" "No threads match '2'\\." + +gdb_continue_to_end "" continue 1 -- 2.30.2