Re-add zombie leader on exit, gdbserver/linux

author Pedro Alves <pedro@palves.net>

Tue, 22 Feb 2022 10:15:07 +0000 (10:15 +0000)

committer Pedro Alves <pedro@palves.net>

Thu, 10 Mar 2022 11:35:54 +0000 (11:35 +0000)
author Pedro Alves <pedro@palves.net>
Tue, 22 Feb 2022 10:15:07 +0000 (10:15 +0000)
committer Pedro Alves <pedro@palves.net>
Thu, 10 Mar 2022 11:35:54 +0000 (11:35 +0000)
diff --git a/gdbserver/linux-low.cc b/gdbserver/linux-low.cc

index 442b7d9b81ba1778c74626873f968013dae30bc9..7726a4a0c36c6bafea32fc929b954ed83fd80289 100644 (file)
--- a/gdbserver/linux-low.cc
+++ b/gdbserver/linux-low.cc
@@ -135,6 +135,15 @@ typedef struct
  /* Does the current host support PTRACE_GETREGSET?  */
  int have_ptrace_getregset = -1;
  
+/* Return TRUE if THREAD is the leader thread of the process.  */
+
+static bool
+is_leader (thread_info *thread)
+{
+  ptid_t ptid = ptid_of (thread);
+  return ptid.pid () == ptid.lwp ();
+}
+
  /* LWP accessors.  */
  
  /* See nat/linux-nat.h.  */
@@ -1733,42 +1742,63 @@ linux_process_target::check_zombie_leaders ()
  
        if (leader_lp != NULL && !leader_lp->stopped
           /* Check if there are other threads in the group, as we may
-            have raced with the inferior simply exiting.  */
+            have raced with the inferior simply exiting.  Note this
+            isn't a watertight check.  If the inferior is
+            multi-threaded and is exiting, it may be we see the
+            leader as zombie before we reap all the non-leader
+            threads.  See comments below.  */
           && !last_thread_of_process_p (leader_pid)
           && linux_proc_pid_is_zombie (leader_pid))
         {
-         /* A leader zombie can mean one of two things:
-
-            - It exited, and there's an exit status pending
-            available, or only the leader exited (not the whole
-            program).  In the latter case, we can't waitpid the
-            leader's exit status until all other threads are gone.
-
-            - There are 3 or more threads in the group, and a thread
-            other than the leader exec'd.  On an exec, the Linux
-            kernel destroys all other threads (except the execing
-            one) in the thread group, and resets the execing thread's
-            tid to the tgid.  No exit notification is sent for the
-            execing thread -- from the ptracer's perspective, it
-            appears as though the execing thread just vanishes.
-            Until we reap all other threads except the leader and the
-            execing thread, the leader will be zombie, and the
-            execing thread will be in `D (disc sleep)'.  As soon as
-            all other threads are reaped, the execing thread changes
-            it's tid to the tgid, and the previous (zombie) leader
-            vanishes, giving place to the "new" leader.  We could try
-            distinguishing the exit and exec cases, by waiting once
-            more, and seeing if something comes out, but it doesn't
-            sound useful.  The previous leader _does_ go away, and
-            we'll re-add the new one once we see the exec event
-            (which is just the same as what would happen if the
-            previous leader did exit voluntarily before some other
-            thread execs).  */
-
+         /* A zombie leader in a multi-threaded program can mean one
+            of three things:
+
+            #1 - Only the leader exited, not the whole program, e.g.,
+            with pthread_exit.  Since we can't reap the leader's exit
+            status until all other threads are gone and reaped too,
+            we want to delete the zombie leader right away, as it
+            can't be debugged, we can't read its registers, etc.
+            This is the main reason we check for zombie leaders
+            disappearing.
+
+            #2 - The whole thread-group/process exited (a group exit,
+            via e.g. exit(3), and there is (or will be shortly) an
+            exit reported for each thread in the process, and then
+            finally an exit for the leader once the non-leaders are
+            reaped.
+
+            #3 - There are 3 or more threads in the group, and a
+            thread other than the leader exec'd.  See comments on
+            exec events at the top of the file.
+
+            Ideally we would never delete the leader for case #2.
+            Instead, we want to collect the exit status of each
+            non-leader thread, and then finally collect the exit
+            status of the leader as normal and use its exit code as
+            whole-process exit code.  Unfortunately, there's no
+            race-free way to distinguish cases #1 and #2.  We can't
+            assume the exit events for the non-leaders threads are
+            already pending in the kernel, nor can we assume the
+            non-leader threads are in zombie state already.  Between
+            the leader becoming zombie and the non-leaders exiting
+            and becoming zombie themselves, there's a small time
+            window, so such a check would be racy.  Temporarily
+            pausing all threads and checking to see if all threads
+            exit or not before re-resuming them would work in the
+            case that all threads are running right now, but it
+            wouldn't work if some thread is currently already
+            ptrace-stopped, e.g., due to scheduler-locking.
+
+            So what we do is we delete the leader anyhow, and then
+            later on when we see its exit status, we re-add it back.
+            We also make sure that we only report a whole-process
+            exit when we see the leader exiting, as opposed to when
+            the last LWP in the LWP list exits, which can be a
+            non-leader if we deleted the leader here.  */
           threads_debug_printf ("Thread group leader %d zombie "
-                               "(it exited, or another thread execd).",
+                               "(it exited, or another thread execd), "
+                               "deleting it.",
                                 leader_pid);
-
           delete_lwp (leader_lp);
         }
      });
@@ -2185,7 +2215,22 @@ linux_process_target::filter_event (int lwpid, int wstat)
           /* Don't report an event for the exit of an LWP not in our
              list, i.e. not part of any inferior we're debugging.
              This can happen if we detach from a program we originally
-            forked and then it exits.  */
+            forked and then it exits.  However, note that we may have
+            earlier deleted a leader of an inferior we're debugging,
+            in check_zombie_leaders.  Re-add it back here if so.  */
+         find_process ([&] (process_info *proc)
+           {
+             if (proc->pid == lwpid)
+               {
+                 threads_debug_printf
+                   ("Re-adding thread group leader LWP %d after exit.",
+                    lwpid);
+
+                 child = add_lwp (ptid_t (lwpid, lwpid));
+                 return true;
+               }
+             return false;
+           });
         }
  
        if (child == nullptr)
@@ -2209,11 +2254,10 @@ linux_process_target::filter_event (int lwpid, int wstat)
           unsuspend_all_lwps (child);
         }
  
-      /* If there is at least one more LWP, then the exit signal was
-        not the end of the debugged application and should be
-        ignored, unless GDB wants to hear about thread exits.  */
-      if (cs.report_thread_events
-         || last_thread_of_process_p (pid_of (thread)))
+      /* If this is not the leader LWP, then the exit signal was not
+        the end of the debugged application and should be ignored,
+        unless GDB wants to hear about thread exits.  */
+      if (cs.report_thread_events || is_leader (thread))
         {
           /* Since events are serialized to GDB core, and we can't
              report this one right now.  Leave the status pending for
@@ -2780,7 +2824,7 @@ linux_process_target::filter_exit_event (lwp_info *event_child,
    struct thread_info *thread = get_lwp_thread (event_child);
    ptid_t ptid = ptid_of (thread);
  
-  if (!last_thread_of_process_p (pid_of (thread)))
+  if (!is_leader (thread))
      {
        if (cs.report_thread_events)
         ourstatus->set_thread_exited (0);
author	Pedro Alves <pedro@palves.net>
	Tue, 22 Feb 2022 10:15:07 +0000 (10:15 +0000)
committer	Pedro Alves <pedro@palves.net>
	Thu, 10 Mar 2022 11:35:54 +0000 (11:35 +0000)