From b3e3a4c11496dca710c62e32db80e27dd7301223 Mon Sep 17 00:00:00 2001 From: Simon Marchi Date: Sat, 4 Jul 2020 13:33:19 +0100 Subject: [PATCH] Fix GDB busy loop when interrupting non-stop program (PR 26199) When interrupting a program in non-stop, the program gets interrupted correctly, but GDB busy loops (the event loop is always woken up). Here is how to reproduce it: 1. Start GDB: ./gdb -nx --data-directory=data-directory -ex "set non-stop 1" --args /bin/sleep 60 2. Run the program with "run" 3. Interrupt with ^C. 4. Look into htop, see GDB taking 100% CPU Debugging `handle_file_event`, we see that the event source that wakes up the event loop is the linux-nat one: (top-gdb) p file_ptr.proc $5 = (handler_func *) 0xb9cccd ^^^^^^^^^^^^^^^^^^^ | \-- the linux-nat callback Debugging fetch_inferior_event and do_target_wait, we see that we don't actually call `wait` on the linux-nat target, because inferior_matches returns false: auto inferior_matches = [&wait_ptid] (inferior *inf) { return (inf->process_target () != NULL && (threads_are_executing (inf->process_target ()) || threads_are_resumed_pending_p (inf)) && ptid_t (inf->pid).matches (wait_ptid)); }; because `threads_are_executing` is false. What happens is: 1. User types ctrl-c, that writes in the linux-nat pipe, waking up the event source. 2. linux-nat's wait gets called, the SIGINT event is returned, but before returning, it marks the pipe again, in order for wait to get called again: /* If we requested any event, and something came out, assume there may be more. If we requested a specific lwp or process, also assume there may be more. */ if (target_is_async_p () && ((ourstatus->kind != TARGET_WAITKIND_IGNORE && ourstatus->kind != TARGET_WAITKIND_NO_RESUMED) || ptid != minus_one_ptid)) async_file_mark (); 3. The SIGINT event is handled, the program is stopped, the stop notification is printed. 4. The event loop is woken up again because of the `async_file_mark` of step 2. 5. Because `inferior_matches` returns false, we never call linux-nat's wait, so the pipe stays readable. 6. Goto 4. Pedro says: This commit fixes it by letting do_target_wait call target_wait even if threads_are_executing is false. This will normally result in the target returning TARGET_WAITKIND_NO_RESUMED, and _not_ marking its event source again. This results in infrun only calling into the target only once (i.e., breaking the busy loop). Note that the busy loop bug didn't trigger in all-stop mode because all-stop handles this by unregistering the target from the event loop as soon as it was all stopped -- see inf-loop.c:inferior_event_handler's INF_EXEC_COMPLETE handling. If we remove that non-stop check from inferior_event_handler, and replace the target_has_execution check for threads_are_executing instead, it also fixes the issue for non-stop. I considered that as the final solution, but decided that the solution proposed here instead is just simpler and more future-proof design. With the TARGET_WAITKIND_NO_RESUMED handling fixes done in the previous patches, I think it should be possible to always keep the target registered in the event loop, meaning we could eliminate the target_async(0) call from inferior_event_handler as well as most of the target_async(1) calls in the target backends. That would allow in the future e.g., the remote target reporting asynchronous notifications even if all threads are stopped. I haven't attempted that, though. gdb/ChangeLog: yyyy-mm-dd Simon Marchi Pedro Alves PR gdb/26199 * infrun.c (threads_are_resumed_pending_p): Delete. (do_target_wait): Remove threads_are_executing and threads_are_resumed_pending_p checks from the inferior_matches lambda. Update comments. --- gdb/ChangeLog | 9 +++++++++ gdb/infrun.c | 38 +++++++++++--------------------------- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/gdb/ChangeLog b/gdb/ChangeLog index e35b276309d..59db3402186 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,12 @@ +2020-07-10 Simon Marchi + Pedro Alves + + PR gdb/26199 + * infrun.c (threads_are_resumed_pending_p): Delete. + (do_target_wait): Remove threads_are_executing and + threads_are_resumed_pending_p checks from the inferior_matches + lambda. Update comments. + 2020-07-10 Pedro Alves PR gdb/26199 diff --git a/gdb/infrun.c b/gdb/infrun.c index 158b1990694..31266109a6d 100644 --- a/gdb/infrun.c +++ b/gdb/infrun.c @@ -3601,23 +3601,9 @@ do_target_wait_1 (inferior *inf, ptid_t ptid, return event_ptid; } -/* Returns true if INF has any resumed thread with a status - pending. */ - -static bool -threads_are_resumed_pending_p (inferior *inf) -{ - for (thread_info *tp : inf->non_exited_threads ()) - if (tp->resumed - && tp->suspend.waitstatus_pending_p) - return true; - - return false; -} - /* Wrapper for target_wait that first checks whether threads have pending statuses to report before actually asking the target for - more events. Polls for events from all inferiors/targets. */ + more events. Polls for events from all inferiors/targets. */ static bool do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, int options) @@ -3625,20 +3611,18 @@ do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, int options) int num_inferiors = 0; int random_selector; - /* For fairness, we pick the first inferior/target to poll at - random, and then continue polling the rest of the inferior list - starting from that one in a circular fashion until the whole list - is polled once. */ + /* For fairness, we pick the first inferior/target to poll at random + out of all inferiors that may report events, and then continue + polling the rest of the inferior list starting from that one in a + circular fashion until the whole list is polled once. */ auto inferior_matches = [&wait_ptid] (inferior *inf) { return (inf->process_target () != NULL - && (threads_are_executing (inf->process_target ()) - || threads_are_resumed_pending_p (inf)) && ptid_t (inf->pid).matches (wait_ptid)); }; - /* First see how many resumed inferiors we have. */ + /* First see how many matching inferiors we have. */ for (inferior *inf : all_inferiors ()) if (inferior_matches (inf)) num_inferiors++; @@ -3649,7 +3633,7 @@ do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, int options) return false; } - /* Now randomly pick an inferior out of those that were resumed. */ + /* Now randomly pick an inferior out of those that matched. */ random_selector = (int) ((num_inferiors * (double) rand ()) / (RAND_MAX + 1.0)); @@ -3658,7 +3642,7 @@ do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, int options) "infrun: Found %d inferiors, starting at #%d\n", num_inferiors, random_selector); - /* Select the Nth inferior that was resumed. */ + /* Select the Nth inferior that matched. */ inferior *selected = nullptr; @@ -3670,7 +3654,7 @@ do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, int options) break; } - /* Now poll for events out of each of the resumed inferior's + /* Now poll for events out of each of the matching inferior's targets, starting from the selected one. */ auto do_wait = [&] (inferior *inf) @@ -3680,8 +3664,8 @@ do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, int options) return (ecs->ws.kind != TARGET_WAITKIND_IGNORE); }; - /* Needed in all-stop+target-non-stop mode, because we end up here - spuriously after the target is all stopped and we've already + /* Needed in 'all-stop + target-non-stop' mode, because we end up + here spuriously after the target is all stopped and we've already reported the stop to the user, polling for events. */ scoped_restore_current_thread restore_thread; -- 2.30.2