libstdc++: Optimise std::future::wait_for and fix futex polling
authorJonathan Wakely <jwakely@redhat.com>
Thu, 12 Nov 2020 21:25:14 +0000 (21:25 +0000)
committerJonathan Wakely <jwakely@redhat.com>
Thu, 12 Nov 2020 23:47:04 +0000 (23:47 +0000)
To poll a std::future to see if it's ready you have to call one of the
timed waiting functions. The most obvious way is wait_for(0s) but this
was previously very inefficient because it would turn the relative
timeout to an absolute one by calling system_clock::now(). When the
relative timeout is zero (or less) we're obviously going to get a time
that has already passed, but the overhead of obtaining the current time
can be dozens of microseconds. The alternative is to call wait_until
with an absolute timeout that is in the past. If you know the clock's
epoch is in the past you can use a default constructed time_point.
Alternatively, using some_clock::time_point::min() gives the earliest
time point supported by the clock, which should be safe to assume is in
the past. However, using a futex wait with an absolute timeout before
the UNIX epoch fails and sets errno=EINVAL. The new code using futex
waits with absolute timeouts was not checking for this case, which could
result in hangs (or killing the process if the libray is built with
assertions enabled).

This patch checks for times before the epoch before attempting to wait
on a futex with an absolute timeout, which fixes the hangs or crashes.
It also makes it very fast to poll using an absolute timeout before the
epoch (because we skip the futex syscall).

It also makes future::wait_for avoid waiting at all when the relative
timeout is zero or less, to avoid the unnecessary overhead of getting
the current time. This makes polling with wait_for(0s) take only a few
cycles instead of dozens of milliseconds.

libstdc++-v3/ChangeLog:

* include/std/future (future::wait_for): Do not wait for
durations less than or equal to zero.
* src/c++11/futex.cc (_M_futex_wait_until)
(_M_futex_wait_until_steady): Do not wait for timeouts before
the epoch.
* testsuite/30_threads/future/members/poll.cc: New test.

libstdc++-v3/include/std/future
libstdc++-v3/src/c++11/futex.cc
libstdc++-v3/testsuite/30_threads/future/members/poll.cc [new file with mode: 0644]

index 5d948018c75c3dd1bc995101fb446c1dd640e4b4..f7617cac8e932a2929bd24d3e2c4dffd8c88e5b1 100644 (file)
@@ -345,10 +345,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          // to synchronize with the thread that made it ready.
          if (_M_status._M_load(memory_order_acquire) == _Status::__ready)
            return future_status::ready;
+
          if (_M_is_deferred_future())
            return future_status::deferred;
-         if (_M_status._M_load_when_equal_for(_Status::__ready,
-             memory_order_acquire, __rel))
+
+         // Don't wait unless the relative time is greater than zero.
+         if (__rel > __rel.zero()
+             && _M_status._M_load_when_equal_for(_Status::__ready,
+                                                 memory_order_acquire,
+                                                 __rel))
            {
              // _GLIBCXX_RESOLVE_LIB_DEFECTS
              // 2100.  timed waiting functions must also join
@@ -377,10 +382,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          // to synchronize with the thread that made it ready.
          if (_M_status._M_load(memory_order_acquire) == _Status::__ready)
            return future_status::ready;
+
          if (_M_is_deferred_future())
            return future_status::deferred;
+
          if (_M_status._M_load_when_equal_until(_Status::__ready,
-             memory_order_acquire, __abs))
+                                                memory_order_acquire,
+                                                __abs))
            {
              // _GLIBCXX_RESOLVE_LIB_DEFECTS
              // 2100.  timed waiting functions must also join
index 0331bd6df64f43a9ff22cd42f2b980c50178ccae..57f7dfe87e9e27b3128db6ddb045c5fcd47ef07c 100644 (file)
@@ -78,6 +78,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
            struct timespec rt;
            rt.tv_sec = __s.count();
            rt.tv_nsec = __ns.count();
+
+           // futex sets errno=EINVAL for absolute timeouts before the epoch.
+           if (__builtin_expect(rt.tv_sec < 0, false))
+             return false;
+
            if (syscall (SYS_futex, __addr,
                         futex_wait_bitset_op | futex_clock_realtime_flag,
                         __val, &rt, nullptr, futex_bitset_match_any) == -1)
@@ -151,6 +156,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
            rt.tv_sec = __s.count();
            rt.tv_nsec = __ns.count();
 
+           // futex sets errno=EINVAL for absolute timeouts before the epoch.
+           if (__builtin_expect(rt.tv_sec < 0, false))
+             return false;
+
            if (syscall (SYS_futex, __addr,
                         futex_wait_bitset_op | futex_clock_monotonic_flag,
                         __val, &rt, nullptr, futex_bitset_match_any) == -1)
diff --git a/libstdc++-v3/testsuite/30_threads/future/members/poll.cc b/libstdc++-v3/testsuite/30_threads/future/members/poll.cc
new file mode 100644 (file)
index 0000000..5458057
--- /dev/null
@@ -0,0 +1,103 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-O3" }
+// { dg-do run { target c++11 } }
+
+#include <future>
+#include <chrono>
+#include <iostream>
+#include <testsuite_hooks.h>
+
+const int iterations = 200;
+
+using namespace std;
+
+template<typename Duration>
+double
+print(const char* desc, Duration dur)
+{
+  auto ns = chrono::duration_cast<chrono::nanoseconds>(dur).count();
+  double d = double(ns) / iterations;
+  cout << desc << ": " << ns << "ns for " << iterations
+    << " calls, avg " << d << "ns per call\n";
+  return d;
+}
+
+int main()
+{
+  promise<int> p;
+  future<int> f = p.get_future();
+
+  auto start = chrono::high_resolution_clock::now();
+  for(int i = 0; i < iterations; i++)
+    f.wait_for(chrono::seconds(0));
+  auto stop = chrono::high_resolution_clock::now();
+  double wait_for_0 = print("wait_for(0s)", stop - start);
+
+  start = chrono::high_resolution_clock::now();
+  for(int i = 0; i < iterations; i++)
+    f.wait_until(chrono::system_clock::time_point());
+  stop = chrono::high_resolution_clock::now();
+  double wait_until_sys_epoch __attribute__((unused))
+    = print("wait_until(system_clock epoch)", stop - start);
+
+  start = chrono::high_resolution_clock::now();
+  for(int i = 0; i < iterations; i++)
+    f.wait_until(chrono::steady_clock::time_point());
+  stop = chrono::high_resolution_clock::now();
+  double wait_until_steady_epoch __attribute__((unused))
+    = print("wait_until(steady_clock epoch", stop - start);
+
+  start = chrono::high_resolution_clock::now();
+  for(int i = 0; i < iterations; i++)
+    f.wait_until(chrono::system_clock::time_point::min());
+  stop = chrono::high_resolution_clock::now();
+  double wait_until_sys_min __attribute__((unused))
+    = print("wait_until(system_clock minimum)", stop - start);
+
+  start = chrono::high_resolution_clock::now();
+  for(int i = 0; i < iterations; i++)
+    f.wait_until(chrono::steady_clock::time_point::min());
+  stop = chrono::high_resolution_clock::now();
+  double wait_until_steady_min __attribute__((unused))
+    = print("wait_until(steady_clock minimum)", stop - start);
+
+  p.set_value(1);
+
+  start = chrono::high_resolution_clock::now();
+  for(int i = 0; i < iterations; i++)
+    f.wait_for(chrono::seconds(0));
+  stop = chrono::high_resolution_clock::now();
+  double ready = print("wait_for when ready", stop - start);
+
+  // polling before ready with wait_for(0s) should be almost as fast as
+  // after the result is ready.
+  VERIFY( wait_for_0 < (ready * 10) );
+
+  // The following two tests fail with GCC 11, see
+  // https://gcc.gnu.org/pipermail/libstdc++/2020-November/051422.html
+#if 0
+  // polling before ready using wait_until(epoch) should not be terribly slow.
+  VERIFY( wait_until_sys_epoch < (ready * 100) );
+  VERIFY( wait_until_steady_epoch < (ready * 100) );
+#endif
+
+  // polling before ready using wait_until(min) should not be terribly slow.
+  VERIFY( wait_until_sys_min < (ready * 100) );
+  VERIFY( wait_until_steady_min < (ready * 100) );
+}