static void close_proc_mem_file (pid_t pid);
static void open_proc_mem_file (ptid_t ptid);
+/* Return TRUE if LWP is the leader thread of the process. */
+
+static bool
+is_leader (lwp_info *lp)
+{
+ return lp->ptid.pid () == lp->ptid.lwp ();
+}
+
\f
/* LWP accessors. */
/* Don't report an event for the exit of an LWP not in our
list, i.e. not part of any inferior we're debugging.
This can happen if we detach from a program we originally
- forked and then it exits. */
+ forked and then it exits. However, note that we may have
+ earlier deleted a leader of an inferior we're debugging,
+ in check_zombie_leaders. Re-add it back here if so. */
+ for (inferior *inf : all_inferiors (linux_target))
+ {
+ if (inf->pid == lwpid)
+ {
+ linux_nat_debug_printf
+ ("Re-adding thread group leader LWP %d after exit.",
+ lwpid);
+
+ lp = add_lwp (ptid_t (lwpid, lwpid));
+ lp->resumed = 1;
+ add_thread (linux_target, lp->ptid);
+ break;
+ }
+ }
}
if (lp == nullptr)
/* Check if the thread has exited. */
if (WIFEXITED (status) || WIFSIGNALED (status))
{
- if (!report_thread_events
- && num_lwps (lp->ptid.pid ()) > 1)
+ if (!report_thread_events && !is_leader (lp))
{
linux_nat_debug_printf ("%s exited.",
lp->ptid.to_string ().c_str ());
- /* If there is at least one more LWP, then the exit signal
+ /* If this was not the leader exiting, then the exit signal
was not the end of the debugged application and should be
ignored. */
exit_lwp (lp);
leader_lp = find_lwp_pid (ptid_t (inf->pid));
if (leader_lp != NULL
/* Check if there are other threads in the group, as we may
- have raced with the inferior simply exiting. */
+ have raced with the inferior simply exiting. Note this
+ isn't a watertight check. If the inferior is
+ multi-threaded and is exiting, it may be we see the
+ leader as zombie before we reap all the non-leader
+ threads. See comments below. */
&& num_lwps (inf->pid) > 1
&& linux_proc_pid_is_zombie (inf->pid))
{
+ /* A zombie leader in a multi-threaded program can mean one
+ of three things:
+
+ #1 - Only the leader exited, not the whole program, e.g.,
+ with pthread_exit. Since we can't reap the leader's exit
+ status until all other threads are gone and reaped too,
+ we want to delete the zombie leader right away, as it
+ can't be debugged, we can't read its registers, etc.
+ This is the main reason we check for zombie leaders
+ disappearing.
+
+ #2 - The whole thread-group/process exited (a group exit,
+ via e.g. exit(3), and there is (or will be shortly) an
+ exit reported for each thread in the process, and then
+ finally an exit for the leader once the non-leaders are
+ reaped.
+
+ #3 - There are 3 or more threads in the group, and a
+ thread other than the leader exec'd. See comments on
+ exec events at the top of the file.
+
+ Ideally we would never delete the leader for case #2.
+ Instead, we want to collect the exit status of each
+ non-leader thread, and then finally collect the exit
+ status of the leader as normal and use its exit code as
+ whole-process exit code. Unfortunately, there's no
+ race-free way to distinguish cases #1 and #2. We can't
+ assume the exit events for the non-leaders threads are
+ already pending in the kernel, nor can we assume the
+ non-leader threads are in zombie state already. Between
+ the leader becoming zombie and the non-leaders exiting
+ and becoming zombie themselves, there's a small time
+ window, so such a check would be racy. Temporarily
+ pausing all threads and checking to see if all threads
+ exit or not before re-resuming them would work in the
+ case that all threads are running right now, but it
+ wouldn't work if some thread is currently already
+ ptrace-stopped, e.g., due to scheduler-locking.
+
+ So what we do is we delete the leader anyhow, and then
+ later on when we see its exit status, we re-add it back.
+ We also make sure that we only report a whole-process
+ exit when we see the leader exiting, as opposed to when
+ the last LWP in the LWP list exits, which can be a
+ non-leader if we deleted the leader here. */
linux_nat_debug_printf ("Thread group leader %d zombie "
- "(it exited, or another thread execd).",
+ "(it exited, or another thread execd), "
+ "deleting it.",
inf->pid);
-
- /* A leader zombie can mean one of two things:
-
- - It exited, and there's an exit status pending
- available, or only the leader exited (not the whole
- program). In the latter case, we can't waitpid the
- leader's exit status until all other threads are gone.
-
- - There are 3 or more threads in the group, and a thread
- other than the leader exec'd. See comments on exec
- events at the top of the file. We could try
- distinguishing the exit and exec cases, by waiting once
- more, and seeing if something comes out, but it doesn't
- sound useful. The previous leader _does_ go away, and
- we'll re-add the new one once we see the exec event
- (which is just the same as what would happen if the
- previous leader did exit voluntarily before some other
- thread execs). */
-
- linux_nat_debug_printf ("Thread group leader %d vanished.", inf->pid);
exit_lwp (leader_lp);
}
}
{
ptid_t ptid = event_child->ptid;
- if (num_lwps (ptid.pid ()) > 1)
+ if (!is_leader (event_child))
{
if (report_thread_events)
ourstatus->set_thread_exited (0);