gdb: Don't drop SIGSTOP during stop_all_threads

author Andrew Burgess <andrew.burgess@embecosm.com>

Thu, 10 May 2018 22:52:49 +0000 (23:52 +0100)

committer Andrew Burgess <andrew.burgess@embecosm.com>

Sat, 16 Jun 2018 00:03:57 +0000 (01:03 +0100)
author Andrew Burgess <andrew.burgess@embecosm.com>
Thu, 10 May 2018 22:52:49 +0000 (23:52 +0100)
committer Andrew Burgess <andrew.burgess@embecosm.com>
Sat, 16 Jun 2018 00:03:57 +0000 (01:03 +0100)
diff --git a/gdb/ChangeLog b/gdb/ChangeLog

index 44d213e0b859043f0ba67a29304af9a26bbdb682..4886f1c07bc1be900ff3274133e411ee69a53c83 100644 (file)
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@@ -1,3 +1,9 @@
+2018-06-16  Andrew Burgess  <andrew.burgess@embecosm.com>
+           Richard Bunt <Richard.Bunt@arm.com>
+
+       * linux-nat.c (stop_wait_callback): Don't discard SIGSTOP if it
+       was requested by GDB.
+
  2018-06-15  Tom de Vries  <tdevries@suse.de>
  
         * MAINTAINERS (Write After Approval): Add Tom de Vries.
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c

index 445b59fa4adadbb2890a9e2debffb5330f1e09e4..a07f41cf318f1fdc24e7d7f62b3cd75d2f47b709 100644 (file)
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -2527,17 +2527,23 @@ stop_wait_callback (struct lwp_info *lp, void *data)
         }
        else
         {
-         /* We caught the SIGSTOP that we intended to catch, so
-            there's no SIGSTOP pending.  */
+         /* We caught the SIGSTOP that we intended to catch.  */
  
           if (debug_linux_nat)
             fprintf_unfiltered (gdb_stdlog,
                                 "SWC: Expected SIGSTOP caught for %s.\n",
                                 target_pid_to_str (lp->ptid));
  
-         /* Reset SIGNALLED only after the stop_wait_callback call
-            above as it does gdb_assert on SIGNALLED.  */
           lp->signalled = 0;
+
+         /* If we are waiting for this stop so we can report the thread
+            stopped then we need to record this status.  Otherwise, we can
+            now discard this stop event.  */
+         if (lp->last_resume_kind == resume_stop)
+           {
+             lp->status = status;
+             save_stop_reason (lp);
+           }
         }
      }
  
diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog

index 6d1f5f7af5c6d14824ca38eedbdecb4739bfcd30..c7fee804a5dd88ac747626d6cd4153e4c7662e49 100644 (file)
--- a/gdb/testsuite/ChangeLog
+++ b/gdb/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2018-06-16  Andrew Burgess  <andrew.burgess@embecosm.com>
+           Richard Bunt <Richard.Bunt@arm.com>
+
+       * gdb.threads/attach-slow-waitpid.c: New file.
+       * gdb.threads/attach-slow-waitpid.exp: New file.
+       * gdb.threads/slow-waitpid.c: New file.
+
  2018-06-14  Pedro Alves  <palves@redhat.com>
  
         * gdb.base/fork-running-state.c: Include <errno.h>.
diff --git a/gdb/testsuite/gdb.threads/attach-slow-waitpid.c b/gdb/testsuite/gdb.threads/attach-slow-waitpid.c

new file mode 100644 (file)

index 0000000..06e99ab
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/attach-slow-waitpid.c
@@ -0,0 +1,77 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2018 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#define NUM_THREADS 4
+
+/* Crude spin lock.  Threads all spin until this is set to 0.  */
+int go = 1;
+
+/* Thread function, just spin until GO is set to 0.  */
+void *
+perform_work (void *argument)
+{
+  /* Cast to volatile to ensure that ARGUMENT is loaded each time around
+     the loop.  */
+  while (*((volatile int*) argument))
+    {
+      /* Nothing.  */
+    }
+  return NULL;
+}
+
+/* The spin loop for the main thread.  */
+void
+function (void)
+{
+  (void) perform_work (&go);
+  printf ("Finished from function\n");
+}
+
+/* Main program, create some threads which all spin waiting for GO to be
+   set to 0.  */
+int
+main (void)
+{
+  pthread_t threads[NUM_THREADS];
+  int result_code;
+  unsigned index;
+
+  /* Create some threads.  */
+  for (index = 0; index < NUM_THREADS; ++index)
+    {
+      printf ("In main: creating thread %d\n", index);
+      result_code = pthread_create (&threads[index], NULL, perform_work, &go);
+      assert (!result_code);
+    }
+
+  function ();
+
+  /* Wait for each thread to complete.  */
+  for (index = 0; index < NUM_THREADS; ++index)
+    {
+      /* Block until thread INDEX completes.  */
+      result_code = pthread_join (threads[index], NULL);
+      assert (!result_code);
+      printf ("In main: thread %d has completed\n", index);
+    }
+  printf ("In main: All threads completed successfully\n");
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.threads/attach-slow-waitpid.exp b/gdb/testsuite/gdb.threads/attach-slow-waitpid.exp

new file mode 100644 (file)

index 0000000..095c193
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/attach-slow-waitpid.exp
@@ -0,0 +1,100 @@
+# Copyright 2018 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This test script tries to expose a bug in some of the uses of
+# waitpid in the Linux native support within GDB.  The problem was
+# spotted on systems which were heavily loaded when attaching to
+# threaded test programs.  What happened was that during the initial
+# attach, the loop of waitpid calls that normally received the stop
+# events from each of the threads in the inferior was not receiving a
+# stop event for some threads (the kernel just hadn't sent the stop
+# event yet).
+#
+# GDB would then trigger a call to stop_all_threads which would
+# continue to wait for all of the outstanding threads to stop, when
+# the outstanding stop events finally arrived GDB would then
+# (incorrectly) discard the stop event, resume the thread, and
+# continue to wait for the thread to stop.... which it now never
+# would.
+#
+# In order to try and expose this issue reliably, this test preloads a
+# library that intercepts waitpid calls.  All waitpid calls targeting
+# pid -1 with the WNOHANG flag are rate limited so that only 1 per
+# second can complete.  Additional calls are forced to return 0
+# indicating no event waiting.  This is enough to trigger the bug
+# during the attach phase.
+
+# This test only works on Linux
+if { ![isnative] || [is_remote host] || [use_gdb_stub]
+     || ![istarget *-linux*] } {
+    continue
+}
+
+standard_testfile
+
+set libfile slow-waitpid
+set libsrc "${srcdir}/${subdir}/${libfile}.c"
+set libobj [standard_output_file ${libfile}.so]
+
+with_test_prefix "compile preload library" {
+    # Compile the preload library.  We only get away with this as we
+    # limit this test to running when ISNATIVE is true.
+    if { [gdb_compile_shlib_pthreads \
+             $libsrc $libobj {debug}] != "" } then {
+       return -1
+    }
+}
+
+with_test_prefix "compile test executable" {
+    # Compile the test program
+    if { [gdb_compile_pthreads \
+             "${srcdir}/${subdir}/${srcfile}" "${binfile}" \
+             executable {debug}] != "" } {
+       return -1
+    }
+}
+
+# Spawn GDB with LIB preloaded with LD_PRELOAD.
+
+proc gdb_spawn_with_ld_preload {lib} {
+    global env
+
+    save_vars { env(LD_PRELOAD) } {
+       if { ![info exists env(LD_PRELOAD) ]
+            || $env(LD_PRELOAD) == "" } {
+           set env(LD_PRELOAD) "$lib"
+       } else {
+           append env(LD_PRELOAD) ":$lib"
+       }
+
+       gdb_start
+    }
+}
+
+# Run test program in the background.
+set test_spawn_id [spawn_wait_for_attach $binfile]
+set testpid [spawn_id_get_pid $test_spawn_id]
+
+# Start GDB with preload library in place.
+gdb_spawn_with_ld_preload $libobj
+
+# Load binary, and attach to running program.
+gdb_load ${binfile}
+gdb_test "attach $testpid" "Attaching to program.*" "attach to target"
+
+gdb_exit
+
+# Kill of test program.
+kill_wait_spawned_process $test_spawn_id
diff --git a/gdb/testsuite/gdb.threads/slow-waitpid.c b/gdb/testsuite/gdb.threads/slow-waitpid.c

new file mode 100644 (file)

index 0000000..93304ef
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/slow-waitpid.c
@@ -0,0 +1,342 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2018 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* This file contains a library that can be preloaded into GDB on Linux
+   using the LD_PRELOAD technique.
+
+   The library intercepts calls to WAITPID and SIGSUSPEND in order to
+   simulate the behaviour of a heavily loaded kernel.
+
+   When GDB wants to stop all threads in an inferior each thread is sent a
+   SIGSTOP, GDB will then wait for the signal to be received by the thread
+   with a waitpid call.
+
+   If the kernel is slow in either delivering the signal, or making the
+   result available to the waitpid call then GDB will enter a sigsuspend
+   call in order to wait for the inferior threads to change state, this is
+   signalled to GDB with a SIGCHLD.
+
+   A bug in GDB meant that in some cases we would deadlock during this
+   process.  This was rarely seen as the kernel is usually quick at
+   delivering signals and making the results available to waitpid, so quick
+   that GDB would gather the statuses from all inferior threads in the
+   original pass.
+
+   The idea in this library is to rate limit calls to waitpid (where pid is
+   -1 and the WNOHANG option is set) so that only 1 per second can return
+   an answer.  Any additional calls will report that no threads are
+   currently ready.  This should match the behaviour we see on a slow
+   kernel.
+
+   However, given that usually when using this library, the kernel does
+   have the waitpid result ready this means that the kernel will never send
+   GDB a SIGCHLD.  This means that when GDB enters sigsuspend it will block
+   forever.  Alternatively, if GDB enters its polling loop the lack of
+   SIGCHLD means that we will never see an event on the child threads.  To
+   resolve these problems the library intercepts calls to sigsuspend and
+   forces the call to exit if there is a pending waitpid result.  Also,
+   when we know that there's a waitpid result that we've ignored, we create
+   a new thread which, after a short delay, will send GDB a SIGCHLD.  */
+
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <errno.h>
+#include <pthread.h>
+#include <unistd.h>
+
+/* Logging.  */
+
+static void
+log_msg (const char *fmt, ...)
+{
+#ifdef LOGGING
+  va_list ap;
+
+  va_start (ap, fmt);
+  vfprintf (stderr, fmt, ap);
+  va_end (ap);
+#endif /* LOGGING */
+}
+
+/* Error handling, message and exit.  */
+
+static void
+error (const char *fmt, ...)
+{
+  va_list ap;
+
+  va_start (ap, fmt);
+  vfprintf (stderr, fmt, ap);
+  va_end (ap);
+
+  exit (EXIT_FAILURE);
+}
+
+/* Cache the result of a waitpid call that has not been reported back to
+   GDB yet.  We only ever cache a single result.  Once we have a result
+   cached then later calls to waitpid with the WNOHANG option will return a
+   result of 0.  */
+
+static struct
+{
+  /* Flag to indicate when we have a result cached.  */
+  int cached_p;
+
+  /* The cached result fields from a waitpid call.  */
+  pid_t pid;
+  int wstatus;
+} cached_wait_status;
+
+/* Lock to hold when modifying SIGNAL_THREAD_ACTIVE_P.  */
+
+static pthread_mutex_t thread_creation_lock_obj = PTHREAD_MUTEX_INITIALIZER;
+#define thread_creation_lock (&thread_creation_lock_obj)
+
+/* This flag is only modified while holding the THREAD_CREATION_LOCK mutex.
+   When this flag is true then there is a signal thread alive that will be
+   sending a SIGCHLD at some point in the future.  */
+
+static int signal_thread_active_p;
+
+/* When we last allowed a waitpid to complete.  */
+
+static struct timeval last_waitpid_time = { 0, 0 };
+
+/* The number of seconds that must elapse between calls to waitpid where
+   the pid is -1 and the WNOHANG option is set.  If calls occur faster than
+   this then we force a result of 0 to be returned from waitpid.  */
+
+#define WAITPID_MIN_TIME (1)
+
+/* Return true (non-zero) if we should skip this call to waitpid, or false
+   (zero) if this waitpid call should be handled with a call to the "real"
+   waitpid function.  Allows 1 waitpid call per second.  */
+
+static int
+should_skip_waitpid (void)
+{
+  struct timeval *tv = &last_waitpid_time;
+  if (tv->tv_sec == 0)
+    {
+      if (gettimeofday (tv, NULL) < 0)
+       error ("error: gettimeofday failed\n");
+      return 0; /* Don't skip.  */
+    }
+  else
+    {
+      struct timeval new_tv;
+
+      if (gettimeofday (&new_tv, NULL) < 0)
+       error ("error: gettimeofday failed\n");
+
+      if ((new_tv.tv_sec - tv->tv_sec) < WAITPID_MIN_TIME)
+       return 1; /* Skip.  */
+
+      *tv = new_tv;
+    }
+
+  /* Don't skip.  */
+  return 0;
+}
+
+/* Perform a real waitpid call.  */
+
+static pid_t
+real_waitpid (pid_t pid, int *wstatus, int options)
+{
+  typedef pid_t (*fptr_t) (pid_t, int *, int);
+  static fptr_t real_func = NULL;
+
+  if (real_func == NULL)
+    {
+      real_func = dlsym (RTLD_NEXT, "waitpid");
+      if (real_func == NULL)
+       error ("error: failed to find real waitpid\n");
+    }
+
+  return (*real_func) (pid, wstatus, options);
+}
+
+/* Thread worker created when we cache a waitpid result.  Delays for a
+   short period of time and then sends SIGCHLD to the GDB process.  This
+   should trigger GDB to call waitpid again, at which point we will make
+   the cached waitpid result available.  */
+
+static void*
+send_sigchld_thread (void *arg)
+{
+  /* Delay one second longer than WAITPID_MIN_TIME so that there can be no
+     chance that a call to SHOULD_SKIP_WAITPID will return true once the
+     SIGCHLD is delivered and handled.  */
+  sleep (WAITPID_MIN_TIME + 1);
+
+  pthread_mutex_lock (thread_creation_lock);
+  signal_thread_active_p = 0;
+
+  if (cached_wait_status.cached_p)
+    {
+      log_msg ("signal-thread: sending SIGCHLD\n");
+      kill (getpid (), SIGCHLD);
+    }
+
+  pthread_mutex_unlock (thread_creation_lock);
+  return NULL;
+}
+
+/* The waitpid entry point function.  */
+
+pid_t
+waitpid (pid_t pid, int *wstatus, int options)
+{
+  log_msg ("waitpid: waitpid (%d, %p, 0x%x)\n", pid, wstatus, options);
+
+  if ((options & WNOHANG) != 0
+      && pid == -1
+      && should_skip_waitpid ())
+    {
+      if (!cached_wait_status.cached_p)
+       {
+         /* Do the waitpid call, but hold the result back.  */
+         pid_t tmp_pid;
+         int tmp_wstatus;
+
+         tmp_pid = real_waitpid (-1, &tmp_wstatus, options);
+         if (tmp_pid > 0)
+           {
+             log_msg ("waitpid: delaying waitpid result (pid = %d)\n",
+                      tmp_pid);
+
+             /* Cache the result.  */
+             cached_wait_status.pid = tmp_pid;
+             cached_wait_status.wstatus = tmp_wstatus;
+             cached_wait_status.cached_p = 1;
+
+             /* Is there a thread around that will be sending a signal in
+                the near future?  The prevents us from creating one
+                thread per call to waitpid when the calls occur in a
+                sequence.  */
+             pthread_mutex_lock (thread_creation_lock);
+             if (!signal_thread_active_p)
+               {
+                 sigset_t old_ss, new_ss;
+                 pthread_t thread_id;
+                 pthread_attr_t attr;
+
+                 /* Create the new signal sending thread in detached
+                    state.  This means that the thread doesn't need to be
+                    pthread_join'ed.  Which is fine as there's no result
+                    we care about.  */
+                 pthread_attr_init (&attr);
+                 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
+
+                 /* Ensure the signal sending thread has all signals
+                    blocked.  We don't want any signals to GDB to be
+                    handled in that thread.  */
+                 sigfillset (&new_ss);
+                 sigprocmask (SIG_BLOCK, &new_ss, &old_ss);
+
+                 log_msg ("waitpid: spawn thread to signal us\n");
+                 if (pthread_create (&thread_id, &attr,
+                                     send_sigchld_thread, NULL) != 0)
+                   error ("error: pthread_create failed\n");
+
+                 signal_thread_active_p = 1;
+                 sigprocmask (SIG_SETMASK, &old_ss, NULL);
+                 pthread_attr_destroy (&attr);
+               }
+
+             pthread_mutex_unlock (thread_creation_lock);
+           }
+       }
+
+      log_msg ("waitpid: skipping\n");
+      return 0;
+    }
+
+  /* If we have a cached result that is a suitable reply for this call to
+     waitpid then send that cached result back now.  */
+  if (cached_wait_status.cached_p
+      && (pid == -1 || pid == cached_wait_status.pid))
+    {
+      pid_t pid;
+
+      pid = cached_wait_status.pid;
+      log_msg ("waitpid: return cached result (%d)\n", pid);
+      *wstatus = cached_wait_status.wstatus;
+      cached_wait_status.cached_p = 0;
+      return pid;
+    }
+
+  log_msg ("waitpid: real waitpid call\n");
+  return real_waitpid (pid, wstatus, options);
+}
+
+/* Perform a real sigsuspend call.  */
+
+static int
+real_sigsuspend (const sigset_t *mask)
+{
+  typedef int (*fptr_t) (const sigset_t *);
+  static fptr_t real_func = NULL;
+
+  if (real_func == NULL)
+    {
+      real_func = dlsym (RTLD_NEXT, "sigsuspend");
+      if (real_func == NULL)
+       error ("error: failed to find real sigsuspend\n");
+    }
+
+  return (*real_func) (mask);
+}
+
+/* The sigsuspend entry point function.  */
+
+int
+sigsuspend (const sigset_t *mask)
+{
+  log_msg ("sigsuspend: sigsuspend (0x%p)\n", ((void *) mask));
+
+  /* If SIGCHLD is _not_ in MASK, and is therefore deliverable, then if we
+     have a pending wait status pretend that a signal arrived.  We will
+     have a thread alive that is going to deliver a signal but doing this
+     will boost the speed as we don't have to wait for a signal.  If the
+     signal ends up being delivered then it should be harmless, we'll just
+     perform an additional waitpid call.   */
+  if (!sigismember (mask, SIGCHLD))
+    {
+      if (cached_wait_status.cached_p)
+       {
+         log_msg ("sigsuspend: interrupt for cached waitstatus\n");
+         last_waitpid_time.tv_sec = 0;
+         last_waitpid_time.tv_usec = 0;
+         errno = EINTR;
+         return -1;
+       }
+    }
+
+  log_msg ("sigsuspend: real sigsuspend call\n");
+  return real_sigsuspend (mask);
+}
author	Andrew Burgess <andrew.burgess@embecosm.com>
	Thu, 10 May 2018 22:52:49 +0000 (23:52 +0100)
committer	Andrew Burgess <andrew.burgess@embecosm.com>
	Sat, 16 Jun 2018 00:03:57 +0000 (01:03 +0100)
gdb/ChangeLog		patch \| blob \| history
gdb/linux-nat.c		patch \| blob \| history
gdb/testsuite/ChangeLog		patch \| blob \| history
gdb/testsuite/gdb.threads/attach-slow-waitpid.c	[new file with mode: 0644]	patch \| blob
gdb/testsuite/gdb.threads/attach-slow-waitpid.exp	[new file with mode: 0644]	patch \| blob
gdb/testsuite/gdb.threads/slow-waitpid.c	[new file with mode: 0644]	patch \| blob