From 225170409b47bc96b62b2ecfcc0d9d5ae1ef8949 Mon Sep 17 00:00:00 2001 From: Simon Marchi Date: Sat, 23 Apr 2022 23:20:48 -0400 Subject: [PATCH] gdb/infrun: make fetch_inferior_event restore thread if exited or signalled Commit 152a1749566 ("gdb: prune inferiors at end of fetch_inferior_event, fix intermittent failure of gdb.threads/fork-plus-threads.exp") introduced some follow-fork-related test failures, such as: info inferiors^M Num Description Connection Executable ^M * 1 process 634972 1 (native) /home/simark/build/binutils-gdb-one-target/gdb/testsuite/outputs/gdb.base/foll-fork/foll-fork ^M 2 process 634975 1 (native) /home/simark/build/binutils-gdb-one-target/gdb/testsuite/outputs/gdb.base/foll-fork/foll-fork ^M (gdb) PASS: gdb.base/foll-fork.exp: follow-fork-mode=parent: detach-on-fork=off: cmd=next 2: test_follow_fork: info inferiors inferior 2^M [Switching to inferior 2 [process 634975] (/home/simark/build/binutils-gdb-one-target/gdb/testsuite/outputs/gdb.base/foll-fork/foll-fork)]^M [Switching to thread 2.1 (Thread 0x7ffff7c9a740 (LWP 634975))]^M #0 0x00007ffff7d7abf7 in _Fork () from /usr/lib/libc.so.6^M (gdb) PASS: gdb.base/foll-fork.exp: follow-fork-mode=parent: detach-on-fork=off: cmd=next 2: test_follow_fork: inferior 2 continue^M Continuing.^M [Inferior 2 (process 634975) exited normally]^M [Switching to Thread 0x7ffff7c9a740 (LWP 634972)]^M (gdb) PASS: gdb.base/foll-fork.exp: follow-fork-mode=parent: detach-on-fork=off: cmd=next 2: test_follow_fork: continue until exit at continue unfollowed inferior to end break callee^M Breakpoint 2 at 0x555555555160: file /home/simark/src/binutils-gdb/gdb/testsuite/gdb.base/foll-fork.c, line 9.^M (gdb) FAIL: gdb.base/foll-fork.exp: follow-fork-mode=parent: detach-on-fork=off: cmd=next 2: test_follow_fork: break callee What happens here is: - inferior 2 is selected - we continue, leading to inferior 2's exit - we set breakpoint, expect 2 locations, but only one location is resolved Reading between the lines, we understand that inferior 2 got pruned, when it shouldn't have been. The issue can be reproduced by hand with: $ ./gdb -q --data-directory=data-directory testsuite/outputs/gdb.base/foll-fork/foll-fork -ex "set detach-on-fork off" -ex start -ex "next 2" -ex "inferior 2" -ex "set debug infrun" ... Temporary breakpoint 1, main () at /home/simark/src/binutils-gdb/gdb/testsuite/gdb.base/foll-fork.c:14 14 int v = 5; [New inferior 2 (process 637627)] [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/../lib/libthread_db.so.1". 17 if (pid == 0) /* set breakpoint here */ [Switching to inferior 2 [process 637627] (/home/simark/build/binutils-gdb-one-target/gdb/testsuite/outputs/gdb.base/foll-fork/foll-fork)] [Switching to thread 2.1 (Thread 0x7ffff7c9a740 (LWP 637627))] #0 0x00007ffff7d7abf7 in _Fork () from /usr/lib/libc.so.6 (gdb) continue Continuing. [infrun] clear_proceed_status_thread: 637627.637627.0 [infrun] proceed: enter [infrun] proceed: addr=0xffffffffffffffff, signal=GDB_SIGNAL_DEFAULT [infrun] scoped_disable_commit_resumed: reason=proceeding [infrun] start_step_over: enter [infrun] start_step_over: stealing global queue of threads to step, length = 0 [infrun] operator(): step-over queue now empty [infrun] start_step_over: exit [infrun] proceed: start: resuming threads, all-stop-on-top-of-non-stop [infrun] proceed: resuming 637627.637627.0 [infrun] resume_1: step=0, signal=GDB_SIGNAL_0, trap_expected=0, current thread [637627.637627.0] at 0x7ffff7d7abf7 [infrun] do_target_resume: resume_ptid=637627.637627.0, step=0, sig=GDB_SIGNAL_0 [infrun] infrun_async: enable=1 [infrun] prepare_to_wait: prepare_to_wait [infrun] proceed: end: resuming threads, all-stop-on-top-of-non-stop [infrun] reset: reason=proceeding [infrun] maybe_set_commit_resumed_all_targets: enabling commit-resumed for target native [infrun] maybe_call_commit_resumed_all_targets: calling commit_resumed for target native [infrun] maybe_call_commit_resumed_all_targets: calling commit_resumed for target native [infrun] proceed: exit [infrun] fetch_inferior_event: enter [infrun] scoped_disable_commit_resumed: reason=handling event [infrun] do_target_wait: Found 2 inferiors, starting at #1 [infrun] random_pending_event_thread: None found. [infrun] print_target_wait_results: target_wait (-1.0.0 [process -1], status) = [infrun] print_target_wait_results: 637627.637627.0 [process 637627], [infrun] print_target_wait_results: status->kind = EXITED, exit_status = 0 [infrun] handle_inferior_event: status->kind = EXITED, exit_status = 0 [Inferior 2 (process 637627) exited normally] [infrun] stop_waiting: stop_waiting [infrun] stop_all_threads: start: reason=presenting stop to user in all-stop, inf=-1 [infrun] stop_all_threads: pass=0, iterations=0 [infrun] stop_all_threads: 637624.637624.0 not executing [infrun] stop_all_threads: pass=1, iterations=1 [infrun] stop_all_threads: 637624.637624.0 not executing [infrun] stop_all_threads: done [infrun] stop_all_threads: end: reason=presenting stop to user in all-stop, inf=-1 [Switching to Thread 0x7ffff7c9a740 (LWP 637624)] [infrun] infrun_async: enable=0 [infrun] reset: reason=handling event [infrun] maybe_set_commit_resumed_all_targets: not requesting commit-resumed for target native, no resumed threads (gdb) [infrun] fetch_inferior_event: exit (gdb) info inferiors Num Description Connection Executable * 1 process 637624 1 (native) /home/simark/build/binutils-gdb-one-target/gdb/testsuite/outputs/gdb.base/foll-fork/foll-fork (gdb) i th Id Target Id Frame * 1 Thread 0x7ffff7c9a740 (LWP 637624) "foll-fork" main () at /home/simark/src/binutils-gdb/gdb/testsuite/gdb.base/foll-fork.c:17 After handling the EXITED event for inferior 2, inferior 2 should have stayed the current inferior, which should have prevented it from getting pruned. When debugging, we find that when getting at the prune_inferiors call, the current inferior is inferior 1. Further debugging shows that prior to the call to clean_up_just_stopped_threads_fsms, the current inferior is inferior 2, and after, it's inferior 1. Then, back in fetch_inferior_event, the restore_thread object is disabled, due to: /* If we got a TARGET_WAITKIND_NO_RESUMED event, then the previously selected thread is gone. We have two choices - switch to no thread selected, or restore the previously selected thread (now exited). We chose the later, just because that's what GDB used to do. After this, "info threads" says "The current thread has terminated." instead of "No thread selected.". */ if (!non_stop && cmd_done && ecs->ws.kind () != TARGET_WAITKIND_NO_RESUMED) restore_thread.dont_restore (); So in the end, inferior 1 stays current, and inferior 2 gets wrongfully pruned. I'd say clean_up_just_stopped_threads_fsms is the culprit here. It actually attempts to restore the event_thread to be current at the end, after the loop (I presume the current thread on entry is always supposed to be the event thread). But in this case, the event is of kind EXITED, and ecs->event_thread is not set, so the current inferior isn't restored. Fix that by using scoped_restore_current_thread. If there is no current thread, scoped_restore_current_thread will still restore the current inferior, and that's what we want. Random note: the thread_info object for inferior 2's thread is never freed. It is held (by refcount) by the restore_thread object in fetch_inferior_event, while the inferior's thread list gets cleared, in the exit event processing. When the refcount reaches 0 (when the restore_thread object is destroyed), there's nothing that actually deletes the thread_info object. And I think that nothing in GDB points to it anymore, so it leaks. I don't want to fix that in this patch, but thought it would be good to mention it, in case somebody has an idea for how to fix that. Change-Id: Ibc7df543e2c46aad5f3b9250b28c3fb5912be4e8 --- gdb/infrun.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gdb/infrun.c b/gdb/infrun.c index acefd4f0bec..531d398d7b8 100644 --- a/gdb/infrun.c +++ b/gdb/infrun.c @@ -4056,12 +4056,19 @@ reinstall_readline_callback_handler_cleanup () static void clean_up_just_stopped_threads_fsms (struct execution_control_state *ecs) { + /* The first clean_up call below assumes the event thread is the current + one. */ + if (ecs->event_thread != nullptr) + gdb_assert (ecs->event_thread == inferior_thread ()); + if (ecs->event_thread != nullptr && ecs->event_thread->thread_fsm () != nullptr) ecs->event_thread->thread_fsm ()->clean_up (ecs->event_thread); if (!non_stop) { + scoped_restore_current_thread restore_thread; + for (thread_info *thr : all_non_exited_threads ()) { if (thr->thread_fsm () == nullptr) @@ -4072,9 +4079,6 @@ clean_up_just_stopped_threads_fsms (struct execution_control_state *ecs) switch_to_thread (thr); thr->thread_fsm ()->clean_up (thr); } - - if (ecs->event_thread != nullptr) - switch_to_thread (ecs->event_thread); } } -- 2.30.2