From: Laurent Morichetti <Laurent.Morichetti@amd.com>
Date: Thu, 14 May 2020 23:59:16 +0000 (-0400)
Subject: gdb: infrun: consume multiple events at each pass in stop_all_threads
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=29d6859f0927bf92fcddb4d519379992399e2ac9;p=binutils-gdb.git

gdb: infrun: consume multiple events at each pass in stop_all_threads

[Simon: I send this patch on behalf of Laurent Morichetti, I added the
 commit message and performance measurement stuff.

 Also, this patch is better viewed with "git show -w".]

stop_all_threads, in infrun.c, is used to stop all running threads on
targets that are always non-stop.  It's used, for example, when the
program hits a breakpoint while GDB is set to "non-stop off".  It sends
a stop request for each running thread, then collects one wait event for
each.

Since new threads can spawn while we are stopping the threads, it's
written in a way where it makes multiple such "send stop requests to
running threads & collect wait events" passes.  The function completes
when it has made two passes where it hasn't seen any running threads.

With the way it's written right now is, it iterates on the thread list,
sending a stop request for each running thread.  It then waits for a
single event, after which it iterates through the thread list again.  It
sends stop requests for any running threads that's been created since
the last iteration.  It then consumes another single wait event.

This makes it so we iterate on O(n^2) threads in total, where n is the
number of threads.  This patch changes the function to reduce it to
O(n).  This starts to have an impact when dealing with multiple
thousands of threads (see numbers below).  At each pass, we know the
number of outstanding stop requests we have sent, for which we need to
collect a stop event.  We can therefore loop to collect this many stop
events before proceeding to the next pass and iterate on the thread list
again.

To check the performance improvements with this patch, I made an
x86/Linux program with a large number of idle threads (varying from 1000
to 10000).  The program's main thread hits a breakpoint once all these
threads have started, which causes stop_all_threads to be called to stop
all these threads.  I measured (by patching stop_all_threads):

- the execution time of stop_all_threads
- the total number of threads we iterate on during the complete
  execution of the function (the total number of times we execute the
  "for (thread_info *t : all_non_exited_threads ())" loop)

These are the execution times, in milliseconds:

    # threads  before  after
         1000     226    106
         2000     997    919
         3000    3461   2323
         4000    4330   3570
         5000    8642   6600
         6000    9918   8039
         7000   12662  10930
         8000   16652  11222
         9000   21561  15875
        10000   26613  20019

Note that I very unscientifically executed each case only once.

These are the number of loop executions:

    # threads     before  after
         1000    1003002   3003
         2000    4006002   6003
         3000    9009002   9003
         4000   16012002  12003
         5000   25015002  15003
         6000   36018002  18003
         7000   49021002  21003
         8000   64024002  24003
         9000   81027002  27003
        10000  100030002  30003

This last table shows pretty well the O(n^2) vs O(n) behaviors.

Reg-tested on x86 GNU/Linux (Ubuntu 16.04).

gdb/ChangeLog:

YYYY-MM-DD  Laurent Morichetti  <Laurent.Morichetti@amd.com>
YYYY-MM-DD  Simon Marchi  <simon.marchi@efficios.com>

	* infrun.c (stop_all_threads): Collect multiple wait events at
	each pass.
---

diff --git a/gdb/ChangeLog b/gdb/ChangeLog
index 5533db5d5c7..42118e731f5 100644
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@@ -1,3 +1,9 @@
+2020-05-14  Laurent Morichetti  <Laurent.Morichetti@amd.com>
+	    Simon Marchi  <simon.marchi@efficios.com>
+
+	* infrun.c (stop_all_threads): Collect multiple wait events at
+	each pass.
+
 2020-05-14  Simon Marchi  <simon.marchi@efficios.com>
 
 	* gdbtypes.h (TYPE_CODE): Remove.  Change all call sites to use
diff --git a/gdb/infrun.c b/gdb/infrun.c
index c3e23a28bdf..601a2acca42 100644
--- a/gdb/infrun.c
+++ b/gdb/infrun.c
@@ -4802,7 +4802,7 @@ stop_all_threads (void)
 			    "iterations=%d\n", pass, iterations);
       while (1)
 	{
-	  int need_wait = 0;
+	  int waits_needed = 0;
 
 	  for (auto *target : all_non_exited_process_targets ())
 	    {
@@ -4849,7 +4849,7 @@ stop_all_threads (void)
 		    }
 
 		  if (t->stop_requested)
-		    need_wait = 1;
+		    waits_needed++;
 		}
 	      else
 		{
@@ -4864,7 +4864,7 @@ stop_all_threads (void)
 		}
 	    }
 
-	  if (!need_wait)
+	  if (waits_needed == 0)
 	    break;
 
 	  /* If we find new threads on the second iteration, restart
@@ -4873,160 +4873,164 @@ stop_all_threads (void)
 	  if (pass > 0)
 	    pass = -1;
 
-	  wait_one_event event = wait_one ();
-
-	  if (debug_infrun)
-	    {
-	      fprintf_unfiltered (gdb_stdlog,
-				  "infrun: stop_all_threads %s %s\n",
-				  target_waitstatus_to_string (&event.ws).c_str (),
-				  target_pid_to_str (event.ptid).c_str ());
-	    }
-
-	  if (event.ws.kind == TARGET_WAITKIND_NO_RESUMED)
-	    {
-	      /* All resumed threads exited.  */
-	    }
-	  else if (event.ws.kind == TARGET_WAITKIND_THREAD_EXITED
-		   || event.ws.kind == TARGET_WAITKIND_EXITED
-		   || event.ws.kind == TARGET_WAITKIND_SIGNALLED)
+	  for (int i = 0; i < waits_needed; i++)
 	    {
-	      /* One thread/process exited/signalled.  */
+	      wait_one_event event = wait_one ();
 
-	      thread_info *t = nullptr;
-
-	      /* The target may have reported just a pid.  If so, try
-		 the first non-exited thread.  */
-	      if (event.ptid.is_pid ())
-		{
-		  int pid  = event.ptid.pid ();
-		  inferior *inf = find_inferior_pid (event.target, pid);
-		  for (thread_info *tp : inf->non_exited_threads ())
-		    {
-		      t = tp;
-		      break;
-		    }
-
-		  /* If there is no available thread, the event would
-		     have to be appended to a per-inferior event list,
-		     which does not exist (and if it did, we'd have
-		     to adjust run control command to be able to
-		     resume such an inferior).  We assert here instead
-		     of going into an infinite loop.  */
-		  gdb_assert (t != nullptr);
-
-		  if (debug_infrun)
-		    fprintf_unfiltered (gdb_stdlog,
-					"infrun: stop_all_threads, using %s\n",
-					target_pid_to_str (t->ptid).c_str ());
-		}
-	      else
+	      if (debug_infrun)
 		{
-		  t = find_thread_ptid (event.target, event.ptid);
-		  /* Check if this is the first time we see this thread.
-		     Don't bother adding if it individually exited.  */
-		  if (t == nullptr
-		      && event.ws.kind != TARGET_WAITKIND_THREAD_EXITED)
-		    t = add_thread (event.target, event.ptid);
+		  fprintf_unfiltered (gdb_stdlog,
+				      "infrun: stop_all_threads %s %s\n",
+				      target_waitstatus_to_string (&event.ws).c_str (),
+				      target_pid_to_str (event.ptid).c_str ());
 		}
 
-	      if (t != nullptr)
+	      if (event.ws.kind == TARGET_WAITKIND_NO_RESUMED)
 		{
-		  /* Set the threads as non-executing to avoid
-		     another stop attempt on them.  */
-		  switch_to_thread_no_regs (t);
-		  mark_non_executing_threads (event.target, event.ptid,
-					      event.ws);
-		  save_waitstatus (t, &event.ws);
-		  t->stop_requested = false;
+		  /* All resumed threads exited.  */
+		  break;
 		}
-	    }
-	  else
-	    {
-	      thread_info *t = find_thread_ptid (event.target, event.ptid);
-	      if (t == NULL)
-		t = add_thread (event.target, event.ptid);
-
-	      t->stop_requested = 0;
-	      t->executing = 0;
-	      t->resumed = false;
-	      t->control.may_range_step = 0;
-
-	      /* This may be the first time we see the inferior report
-		 a stop.  */
-	      inferior *inf = find_inferior_ptid (event.target, event.ptid);
-	      if (inf->needs_setup)
+	      else if (event.ws.kind == TARGET_WAITKIND_THREAD_EXITED
+		       || event.ws.kind == TARGET_WAITKIND_EXITED
+		       || event.ws.kind == TARGET_WAITKIND_SIGNALLED)
 		{
-		  switch_to_thread_no_regs (t);
-		  setup_inferior (0);
-		}
+		  /* One thread/process exited/signalled.  */
 
-	      if (event.ws.kind == TARGET_WAITKIND_STOPPED
-		  && event.ws.value.sig == GDB_SIGNAL_0)
-		{
-		  /* We caught the event that we intended to catch, so
-		     there's no event pending.  */
-		  t->suspend.waitstatus.kind = TARGET_WAITKIND_IGNORE;
-		  t->suspend.waitstatus_pending_p = 0;
+		  thread_info *t = nullptr;
 
-		  if (displaced_step_fixup (t, GDB_SIGNAL_0) < 0)
+		  /* The target may have reported just a pid.  If so, try
+		     the first non-exited thread.  */
+		  if (event.ptid.is_pid ())
 		    {
-		      /* Add it back to the step-over queue.  */
-		      if (debug_infrun)
+		      int pid  = event.ptid.pid ();
+		      inferior *inf = find_inferior_pid (event.target, pid);
+		      for (thread_info *tp : inf->non_exited_threads ())
 			{
-			  fprintf_unfiltered (gdb_stdlog,
-					      "infrun: displaced-step of %s "
-					      "canceled: adding back to the "
-					      "step-over queue\n",
-					      target_pid_to_str (t->ptid).c_str ());
+			  t = tp;
+			  break;
 			}
-		      t->control.trap_expected = 0;
-		      thread_step_over_chain_enqueue (t);
+
+		      /* If there is no available thread, the event would
+			 have to be appended to a per-inferior event list,
+			 which does not exist (and if it did, we'd have
+			 to adjust run control command to be able to
+			 resume such an inferior).  We assert here instead
+			 of going into an infinite loop.  */
+		      gdb_assert (t != nullptr);
+
+		      if (debug_infrun)
+			fprintf_unfiltered (gdb_stdlog,
+					    "infrun: stop_all_threads, using %s\n",
+					    target_pid_to_str (t->ptid).c_str ());
+		    }
+		  else
+		    {
+		      t = find_thread_ptid (event.target, event.ptid);
+		      /* Check if this is the first time we see this thread.
+			 Don't bother adding if it individually exited.  */
+		      if (t == nullptr
+			  && event.ws.kind != TARGET_WAITKIND_THREAD_EXITED)
+			t = add_thread (event.target, event.ptid);
+		    }
+
+		  if (t != nullptr)
+		    {
+		      /* Set the threads as non-executing to avoid
+			 another stop attempt on them.  */
+		      switch_to_thread_no_regs (t);
+		      mark_non_executing_threads (event.target, event.ptid,
+						  event.ws);
+		      save_waitstatus (t, &event.ws);
+		      t->stop_requested = false;
 		    }
 		}
 	      else
 		{
-		  enum gdb_signal sig;
-		  struct regcache *regcache;
+		  thread_info *t = find_thread_ptid (event.target, event.ptid);
+		  if (t == NULL)
+		    t = add_thread (event.target, event.ptid);
 
-		  if (debug_infrun)
+		  t->stop_requested = 0;
+		  t->executing = 0;
+		  t->resumed = false;
+		  t->control.may_range_step = 0;
+
+		  /* This may be the first time we see the inferior report
+		     a stop.  */
+		  inferior *inf = find_inferior_ptid (event.target, event.ptid);
+		  if (inf->needs_setup)
 		    {
-		      std::string statstr = target_waitstatus_to_string (&event.ws);
-
-		      fprintf_unfiltered (gdb_stdlog,
-					  "infrun: target_wait %s, saving "
-					  "status for %d.%ld.%ld\n",
-					  statstr.c_str (),
-					  t->ptid.pid (),
-					  t->ptid.lwp (),
-					  t->ptid.tid ());
+		      switch_to_thread_no_regs (t);
+		      setup_inferior (0);
 		    }
 
-		  /* Record for later.  */
-		  save_waitstatus (t, &event.ws);
-
-		  sig = (event.ws.kind == TARGET_WAITKIND_STOPPED
-			 ? event.ws.value.sig : GDB_SIGNAL_0);
-
-		  if (displaced_step_fixup (t, sig) < 0)
+		  if (event.ws.kind == TARGET_WAITKIND_STOPPED
+		      && event.ws.value.sig == GDB_SIGNAL_0)
 		    {
-		      /* Add it back to the step-over queue.  */
-		      t->control.trap_expected = 0;
-		      thread_step_over_chain_enqueue (t);
+		      /* We caught the event that we intended to catch, so
+			 there's no event pending.  */
+		      t->suspend.waitstatus.kind = TARGET_WAITKIND_IGNORE;
+		      t->suspend.waitstatus_pending_p = 0;
+
+		      if (displaced_step_fixup (t, GDB_SIGNAL_0) < 0)
+			{
+			  /* Add it back to the step-over queue.  */
+			  if (debug_infrun)
+			    {
+			      fprintf_unfiltered (gdb_stdlog,
+						  "infrun: displaced-step of %s "
+						  "canceled: adding back to the "
+						  "step-over queue\n",
+						  target_pid_to_str (t->ptid).c_str ());
+			    }
+			  t->control.trap_expected = 0;
+			  thread_step_over_chain_enqueue (t);
+			}
 		    }
+		  else
+		    {
+		      enum gdb_signal sig;
+		      struct regcache *regcache;
 
-		  regcache = get_thread_regcache (t);
-		  t->suspend.stop_pc = regcache_read_pc (regcache);
+		      if (debug_infrun)
+			{
+			  std::string statstr = target_waitstatus_to_string (&event.ws);
 
-		  if (debug_infrun)
-		    {
-		      fprintf_unfiltered (gdb_stdlog,
-					  "infrun: saved stop_pc=%s for %s "
-					  "(currently_stepping=%d)\n",
-					  paddress (target_gdbarch (),
-						    t->suspend.stop_pc),
-					  target_pid_to_str (t->ptid).c_str (),
-					  currently_stepping (t));
+			  fprintf_unfiltered (gdb_stdlog,
+					      "infrun: target_wait %s, saving "
+					      "status for %d.%ld.%ld\n",
+					      statstr.c_str (),
+					      t->ptid.pid (),
+					      t->ptid.lwp (),
+					      t->ptid.tid ());
+			}
+
+		      /* Record for later.  */
+		      save_waitstatus (t, &event.ws);
+
+		      sig = (event.ws.kind == TARGET_WAITKIND_STOPPED
+			     ? event.ws.value.sig : GDB_SIGNAL_0);
+
+		      if (displaced_step_fixup (t, sig) < 0)
+			{
+			  /* Add it back to the step-over queue.  */
+			  t->control.trap_expected = 0;
+			  thread_step_over_chain_enqueue (t);
+			}
+
+		      regcache = get_thread_regcache (t);
+		      t->suspend.stop_pc = regcache_read_pc (regcache);
+
+		      if (debug_infrun)
+			{
+			  fprintf_unfiltered (gdb_stdlog,
+					      "infrun: saved stop_pc=%s for %s "
+					      "(currently_stepping=%d)\n",
+					      paddress (target_gdbarch (),
+							t->suspend.stop_pc),
+					      target_pid_to_str (t->ptid).c_str (),
+					      currently_stepping (t));
+			}
 		    }
 		}
 	    }