/* Does the current host support PTRACE_GETREGSET? */
int have_ptrace_getregset = -1;
+/* Return TRUE if THREAD is the leader thread of the process. */
+
+static bool
+is_leader (thread_info *thread)
+{
+ ptid_t ptid = ptid_of (thread);
+ return ptid.pid () == ptid.lwp ();
+}
+
/* LWP accessors. */
/* See nat/linux-nat.h. */
if (leader_lp != NULL && !leader_lp->stopped
/* Check if there are other threads in the group, as we may
- have raced with the inferior simply exiting. */
+ have raced with the inferior simply exiting. Note this
+ isn't a watertight check. If the inferior is
+ multi-threaded and is exiting, it may be we see the
+ leader as zombie before we reap all the non-leader
+ threads. See comments below. */
&& !last_thread_of_process_p (leader_pid)
&& linux_proc_pid_is_zombie (leader_pid))
{
- /* A leader zombie can mean one of two things:
-
- - It exited, and there's an exit status pending
- available, or only the leader exited (not the whole
- program). In the latter case, we can't waitpid the
- leader's exit status until all other threads are gone.
-
- - There are 3 or more threads in the group, and a thread
- other than the leader exec'd. On an exec, the Linux
- kernel destroys all other threads (except the execing
- one) in the thread group, and resets the execing thread's
- tid to the tgid. No exit notification is sent for the
- execing thread -- from the ptracer's perspective, it
- appears as though the execing thread just vanishes.
- Until we reap all other threads except the leader and the
- execing thread, the leader will be zombie, and the
- execing thread will be in `D (disc sleep)'. As soon as
- all other threads are reaped, the execing thread changes
- it's tid to the tgid, and the previous (zombie) leader
- vanishes, giving place to the "new" leader. We could try
- distinguishing the exit and exec cases, by waiting once
- more, and seeing if something comes out, but it doesn't
- sound useful. The previous leader _does_ go away, and
- we'll re-add the new one once we see the exec event
- (which is just the same as what would happen if the
- previous leader did exit voluntarily before some other
- thread execs). */
-
+ /* A zombie leader in a multi-threaded program can mean one
+ of three things:
+
+ #1 - Only the leader exited, not the whole program, e.g.,
+ with pthread_exit. Since we can't reap the leader's exit
+ status until all other threads are gone and reaped too,
+ we want to delete the zombie leader right away, as it
+ can't be debugged, we can't read its registers, etc.
+ This is the main reason we check for zombie leaders
+ disappearing.
+
+ #2 - The whole thread-group/process exited (a group exit,
+ via e.g. exit(3), and there is (or will be shortly) an
+ exit reported for each thread in the process, and then
+ finally an exit for the leader once the non-leaders are
+ reaped.
+
+ #3 - There are 3 or more threads in the group, and a
+ thread other than the leader exec'd. See comments on
+ exec events at the top of the file.
+
+ Ideally we would never delete the leader for case #2.
+ Instead, we want to collect the exit status of each
+ non-leader thread, and then finally collect the exit
+ status of the leader as normal and use its exit code as
+ whole-process exit code. Unfortunately, there's no
+ race-free way to distinguish cases #1 and #2. We can't
+ assume the exit events for the non-leaders threads are
+ already pending in the kernel, nor can we assume the
+ non-leader threads are in zombie state already. Between
+ the leader becoming zombie and the non-leaders exiting
+ and becoming zombie themselves, there's a small time
+ window, so such a check would be racy. Temporarily
+ pausing all threads and checking to see if all threads
+ exit or not before re-resuming them would work in the
+ case that all threads are running right now, but it
+ wouldn't work if some thread is currently already
+ ptrace-stopped, e.g., due to scheduler-locking.
+
+ So what we do is we delete the leader anyhow, and then
+ later on when we see its exit status, we re-add it back.
+ We also make sure that we only report a whole-process
+ exit when we see the leader exiting, as opposed to when
+ the last LWP in the LWP list exits, which can be a
+ non-leader if we deleted the leader here. */
threads_debug_printf ("Thread group leader %d zombie "
- "(it exited, or another thread execd).",
+ "(it exited, or another thread execd), "
+ "deleting it.",
leader_pid);
-
delete_lwp (leader_lp);
}
});
/* Don't report an event for the exit of an LWP not in our
list, i.e. not part of any inferior we're debugging.
This can happen if we detach from a program we originally
- forked and then it exits. */
+ forked and then it exits. However, note that we may have
+ earlier deleted a leader of an inferior we're debugging,
+ in check_zombie_leaders. Re-add it back here if so. */
+ find_process ([&] (process_info *proc)
+ {
+ if (proc->pid == lwpid)
+ {
+ threads_debug_printf
+ ("Re-adding thread group leader LWP %d after exit.",
+ lwpid);
+
+ child = add_lwp (ptid_t (lwpid, lwpid));
+ return true;
+ }
+ return false;
+ });
}
if (child == nullptr)
unsuspend_all_lwps (child);
}
- /* If there is at least one more LWP, then the exit signal was
- not the end of the debugged application and should be
- ignored, unless GDB wants to hear about thread exits. */
- if (cs.report_thread_events
- || last_thread_of_process_p (pid_of (thread)))
+ /* If this is not the leader LWP, then the exit signal was not
+ the end of the debugged application and should be ignored,
+ unless GDB wants to hear about thread exits. */
+ if (cs.report_thread_events || is_leader (thread))
{
/* Since events are serialized to GDB core, and we can't
report this one right now. Leave the status pending for
struct thread_info *thread = get_lwp_thread (event_child);
ptid_t ptid = ptid_of (thread);
- if (!last_thread_of_process_p (pid_of (thread)))
+ if (!is_leader (thread))
{
if (cs.report_thread_events)
ourstatus->set_thread_exited (0);