Fix __atomic to not implement atomic loads with CAS.

author Torvald Riegel <torvald@gcc.gnu.org>

Wed, 1 Feb 2017 17:21:59 +0000 (17:21 +0000)

committer Torvald Riegel <torvald@gcc.gnu.org>

Wed, 1 Feb 2017 17:21:59 +0000 (17:21 +0000)
author Torvald Riegel <torvald@gcc.gnu.org>
Wed, 1 Feb 2017 17:21:59 +0000 (17:21 +0000)
committer Torvald Riegel <torvald@gcc.gnu.org>
Wed, 1 Feb 2017 17:21:59 +0000 (17:21 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index bb31a8f4b16282579ad6b9c91171cf4c9ddd496d..594cc3bf1666cfd2f6886e5f03c14364c92838a0 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,21 @@
+2017-02-01  Torvald Riegel  <triegel@redhat.com>
+            Richard Henderson  <rth@redhat.com>
+
+       * builtins.c (fold_builtin_atomic_always_lock_free): Make "lock-free"
+       conditional on existance of a fast atomic load.
+       * optabs-query.c (can_atomic_load_p): New function.
+       * optabs-query.h (can_atomic_load_p): Declare it.
+       * optabs.c (expand_atomic_exchange): Always delegate to libatomic if
+       no fast atomic load is available for the particular size of access.
+       (expand_atomic_compare_and_swap): Likewise.
+       (expand_atomic_load): Likewise.
+       (expand_atomic_store): Likewise.
+       (expand_atomic_fetch_op): Likewise.
+       * testsuite/lib/target-supports.exp
+       (check_effective_target_sync_int_128): Remove x86 because it provides
+       no fast atomic load.
+       (check_effective_target_sync_int_128_runtime): Likewise.
+
  2017-02-01  Richard Biener  <rguenther@suse.de>
  
         * graphite.c: Include tree-vectorizer.h for find_loop_location.
diff --git a/gcc/builtins.c b/gcc/builtins.c

index bf68e317124f3ae3aa033142d9ae813e78e9ef98..0a0e8b9e2fa7bcfe2c4bc4e4d46c02228bf418d1 100644 (file)
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6157,8 +6157,9 @@ fold_builtin_atomic_always_lock_free (tree arg0, tree arg1)
  
    /* Check if a compare_and_swap pattern exists for the mode which represents
       the required size.  The pattern is not allowed to fail, so the existence
-     of the pattern indicates support is present.  */
-  if (can_compare_and_swap_p (mode, true))
+     of the pattern indicates support is present.  Also require that an
+     atomic load exists for the required size.  */
+  if (can_compare_and_swap_p (mode, true) && can_atomic_load_p (mode))
      return boolean_true_node;
    else
      return boolean_false_node;
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c

index 6c34a4e9a312ee4e1a9f5e676466e08ebdd72338..4899333096e5d327bcd98fa70ddfb7b2dd386801 100644 (file)
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@@ -584,6 +584,25 @@ can_atomic_exchange_p (machine_mode mode, bool allow_libcall)
    return can_compare_and_swap_p (mode, allow_libcall);
  }
  
+/* Return true if an atomic load can be performed without falling back to
+   a compare-and-swap.  */
+
+bool
+can_atomic_load_p (machine_mode mode)
+{
+  enum insn_code icode;
+
+  /* Does the target supports the load directly?  */
+  icode = direct_optab_handler (atomic_load_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    return true;
+
+  /* If the size of the object is greater than word size on this target,
+     then we assume that a load will not be atomic.  Also see
+     expand_atomic_load.  */
+  return GET_MODE_PRECISION (mode) <= BITS_PER_WORD;
+}
+
  /* Determine whether "1 << x" is relatively cheap in word_mode.  */
  
  bool
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h

index a80a0e7b0b692eaa665ee8d7135e800bf2c4f092..e85a7f11b95905295cc35107ad9538934e22de76 100644 (file)
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -176,6 +176,7 @@ int can_mult_highpart_p (machine_mode, bool);
  bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
  bool can_compare_and_swap_p (machine_mode, bool);
  bool can_atomic_exchange_p (machine_mode, bool);
+bool can_atomic_load_p (machine_mode);
  bool lshift_cheap_p (bool);
  
  #endif
diff --git a/gcc/optabs.c b/gcc/optabs.c

index d8831a89f218c808f275803771593b99131be8f8..1afd593ae1520475b13c789ff6df2602b41be024 100644 (file)
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6086,8 +6086,15 @@ expand_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
  rtx
  expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
  {
+  machine_mode mode = GET_MODE (mem);
    rtx ret;
  
+  /* If loads are not atomic for the required size and we are not called to
+     provide a __sync builtin, do not do anything so that we stay consistent
+     with atomic loads of the same size.  */
+  if (!can_atomic_load_p (mode) && !is_mm_sync (model))
+    return NULL_RTX;
+
    ret = maybe_emit_atomic_exchange (target, mem, val, model);
  
    /* Next try a compare-and-swap loop for the exchange.  */
@@ -6121,6 +6128,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
    rtx target_oval, target_bool = NULL_RTX;
    rtx libfunc;
  
+  /* If loads are not atomic for the required size and we are not called to
+     provide a __sync builtin, do not do anything so that we stay consistent
+     with atomic loads of the same size.  */
+  if (!can_atomic_load_p (mode) && !is_mm_sync (succ_model))
+    return false;
+
    /* Load expected into a register for the compare and swap.  */
    if (MEM_P (expected))
      expected = copy_to_reg (expected);
@@ -6316,19 +6329,13 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model)
      }
  
    /* If the size of the object is greater than word size on this target,
-     then we assume that a load will not be atomic.  */
+     then we assume that a load will not be atomic.  We could try to
+     emulate a load with a compare-and-swap operation, but the store that
+     doing this could result in would be incorrect if this is a volatile
+     atomic load or targetting read-only-mapped memory.  */
    if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
-    {
-      /* Issue val = compare_and_swap (mem, 0, 0).
-        This may cause the occasional harmless store of 0 when the value is
-        already 0, but it seems to be OK according to the standards guys.  */
-      if (expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx,
-                                         const0_rtx, false, model, model))
-       return target;
-      else
-      /* Otherwise there is no atomic load, leave the library call.  */
-        return NULL_RTX;
-    }
+    /* If there is no atomic load, leave the library call.  */
+    return NULL_RTX;
  
    /* Otherwise assume loads are atomic, and emit the proper barriers.  */
    if (!target || target == const0_rtx)
@@ -6370,7 +6377,9 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
         return const0_rtx;
      }
  
-  /* If using __sync_lock_release is a viable alternative, try it.  */
+  /* If using __sync_lock_release is a viable alternative, try it.
+     Note that this will not be set to true if we are expanding a generic
+     __atomic_store_n.  */
    if (use_release)
      {
        icode = direct_optab_handler (sync_lock_release_optab, mode);
@@ -6389,16 +6398,22 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
      }
  
    /* If the size of the object is greater than word size on this target,
-     a default store will not be atomic, Try a mem_exchange and throw away
-     the result.  If that doesn't work, don't do anything.  */
+     a default store will not be atomic.  */
    if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
      {
-      rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model);
-      if (!target)
-        target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val);
-      if (target)
-        return const0_rtx;
-      else
+      /* If loads are atomic or we are called to provide a __sync builtin,
+        we can try a atomic_exchange and throw away the result.  Otherwise,
+        don't do anything so that we do not create an inconsistency between
+        loads and stores.  */
+      if (can_atomic_load_p (mode) || is_mm_sync (model))
+       {
+         rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model);
+         if (!target)
+           target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem,
+                                                               val);
+         if (target)
+           return const0_rtx;
+       }
          return NULL_RTX;
      }
  
@@ -6713,6 +6728,12 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
    rtx result;
    bool unused_result = (target == const0_rtx);
  
+  /* If loads are not atomic for the required size and we are not called to
+     provide a __sync builtin, do not do anything so that we stay consistent
+     with atomic loads of the same size.  */
+  if (!can_atomic_load_p (mode) && !is_mm_sync (model))
+    return NULL_RTX;
+
    result = expand_atomic_fetch_op_no_fallback (target, mem, val, code, model,
                                                after);
    
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index 95a1c500c28a0612835436787a55c51bb1866fea..7a260085405c850ef4042faffc4ad71fdb25c830 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6514,9 +6514,7 @@ proc check_effective_target_section_anchors { } {
  # Return 1 if the target supports atomic operations on "int_128" values.
  
  proc check_effective_target_sync_int_128 { } {
-    if { (([istarget i?86-*-*] || [istarget x86_64-*-*])
-         && ![is-effective-target ia32])
-        || [istarget spu-*-*] } {
+    if { [istarget spu-*-*] } {
         return 1
      } else {
         return 0
@@ -6525,23 +6523,10 @@ proc check_effective_target_sync_int_128 { } {
  
  # Return 1 if the target supports atomic operations on "int_128" values
  # and can execute them.
+# This requires support for both compare-and-swap and true atomic loads.
  
  proc check_effective_target_sync_int_128_runtime { } {
-    if { (([istarget i?86-*-*] || [istarget x86_64-*-*])
-         && ![is-effective-target ia32]
-         && [check_cached_effective_target sync_int_128_available {
-             check_runtime_nocache sync_int_128_available {
-                 #include "cpuid.h"
-                 int main ()
-                 {
-                     unsigned int eax, ebx, ecx, edx;
-                     if (__get_cpuid (1, &eax, &ebx, &ecx, &edx))
-                       return !(ecx & bit_CMPXCHG16B);
-                     return 1;
-                 }
-             } ""
-         }])
-        || [istarget spu-*-*] } {
+    if { [istarget spu-*-*] } {
         return 1
      } else {
         return 0
diff --git a/libatomic/ChangeLog b/libatomic/ChangeLog

index 1b47b8f2235d401cf1e987dfc971caef8d0e7bef..d2b83369b74c36459140f1cea5e44e4ed408a98d 100644 (file)
--- a/libatomic/ChangeLog
+++ b/libatomic/ChangeLog
@@ -1,3 +1,12 @@
+2017-02-01  Richard Henderson  <rth@redhat.com>
+            Torvald Riegel  <triegel@redhat.com>
+
+       * acinclude.m4: Add #define FAST_ATOMIC_LDST_*.
+       * auto-config.h.in: Regenerate.
+       * config/x86/host-config.h (FAST_ATOMIC_LDST_16): Define to 0.
+       (atomic_compare_exchange_n): New.
+       * glfree.c (EXACT, LARGER): Change condition and add comments.
+
  2017-01-30  Szabolcs Nagy  <szabolcs.nagy@arm.com>
  
         PR target/78945
diff --git a/libatomic/acinclude.m4 b/libatomic/acinclude.m4

index a86e52b4ff4b172b6df27cbf76db6885c7e76280..485d731df55177afbe5c05c21fc7ac6661b5e0cc 100644 (file)
--- a/libatomic/acinclude.m4
+++ b/libatomic/acinclude.m4
@@ -96,6 +96,7 @@ AC_DEFUN([LIBAT_HAVE_ATOMIC_LOADSTORE],[
    LIBAT_DEFINE_YESNO([HAVE_ATOMIC_LDST_$2], [$libat_cv_have_at_ldst_$2],
         [Have __atomic_load/store for $2 byte integers.])
    AH_BOTTOM([#define MAYBE_HAVE_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2])
+  AH_BOTTOM([#define FAST_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2])
  ])
  
  dnl
diff --git a/libatomic/auto-config.h.in b/libatomic/auto-config.h.in

index 83e54e2db3b06a9d8bc294b57457b931035b9881..d5b8a26e33e172cf18177bad5e2b8355ad0a764e 100644 (file)
--- a/libatomic/auto-config.h.in
+++ b/libatomic/auto-config.h.in
@@ -222,6 +222,16 @@
  
  #define MAYBE_HAVE_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1
  
+#define FAST_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
+
+#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1
+
+#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2
+
+#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4
+
+#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8
+
  #define MAYBE_HAVE_ATOMIC_TAS_16 HAVE_ATOMIC_TAS_16
  
  #define MAYBE_HAVE_ATOMIC_EXCHANGE_1 HAVE_ATOMIC_EXCHANGE_1
@@ -232,6 +242,8 @@
  
  #define MAYBE_HAVE_ATOMIC_EXCHANGE_8 HAVE_ATOMIC_EXCHANGE_8
  
+#define FAST_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1
+
  #define MAYBE_HAVE_ATOMIC_EXCHANGE_16 HAVE_ATOMIC_EXCHANGE_16
  
  #define MAYBE_HAVE_ATOMIC_CAS_1 HAVE_ATOMIC_CAS_1
@@ -242,8 +254,6 @@
  
  #define MAYBE_HAVE_ATOMIC_CAS_8 HAVE_ATOMIC_CAS_8
  
-#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
-
  #define MAYBE_HAVE_ATOMIC_CAS_16 HAVE_ATOMIC_CAS_16
  
  #define MAYBE_HAVE_ATOMIC_FETCH_ADD_1 HAVE_ATOMIC_FETCH_ADD_1
@@ -254,6 +264,8 @@
  
  #define MAYBE_HAVE_ATOMIC_FETCH_ADD_8 HAVE_ATOMIC_FETCH_ADD_8
  
+#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
+
  #define MAYBE_HAVE_ATOMIC_FETCH_ADD_16 HAVE_ATOMIC_FETCH_ADD_16
  
  #define MAYBE_HAVE_ATOMIC_FETCH_OP_1 HAVE_ATOMIC_FETCH_OP_1
@@ -264,22 +276,20 @@
  
  #define MAYBE_HAVE_ATOMIC_FETCH_OP_8 HAVE_ATOMIC_FETCH_OP_8
  
-#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
-
  #define MAYBE_HAVE_ATOMIC_FETCH_OP_16 HAVE_ATOMIC_FETCH_OP_16
  
  #ifndef WORDS_BIGENDIAN
  #define WORDS_BIGENDIAN 0
  #endif
  
-#define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
+#define FAST_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
  
-#define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
+#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
  
-#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1
+#define FAST_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
  
-#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2
+#define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
  
-#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4
+#define FAST_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
  
-#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8
+#define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
diff --git a/libatomic/config/x86/host-config.h b/libatomic/config/x86/host-config.h

index 5754db4fccf29a46a22689b9c5fa5bea5d3eba81..2e9f85aee5f9fe69a298a7efa17389b5120226a2 100644 (file)
--- a/libatomic/config/x86/host-config.h
+++ b/libatomic/config/x86/host-config.h
@@ -47,6 +47,9 @@ extern unsigned int libat_feat1_edx HIDDEN;
  # define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_1
  # undef MAYBE_HAVE_ATOMIC_LDST_16
  # define MAYBE_HAVE_ATOMIC_LDST_16     IFUNC_COND_1
+/* Since load and store are implemented with CAS, they are not fast.  */
+# undef FAST_ATOMIC_LDST_16
+# define FAST_ATOMIC_LDST_16           0
  # if IFUNC_ALT == 1
  #  undef HAVE_ATOMIC_CAS_16
  #  define HAVE_ATOMIC_CAS_16 1
@@ -64,6 +67,21 @@ extern unsigned int libat_feat1_edx HIDDEN;
  # endif
  #endif
  
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+static inline bool
+atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
+                           bool weak_p UNUSED, int sm UNUSED, int fm UNUSED)
+{
+  UTYPE cmpval = *eptr;
+  UTYPE oldval = __sync_val_compare_and_swap_16 (mptr, cmpval, newval);
+  if (oldval == cmpval)
+    return true;
+  *eptr = oldval;
+  return false;
+}
+# define atomic_compare_exchange_n atomic_compare_exchange_n
+#endif /* Have CAS 16 */
+
  #endif /* HAVE_IFUNC */
  
  #include_next <host-config.h>
diff --git a/libatomic/glfree.c b/libatomic/glfree.c

index b68dec77db0b220102b4f8cb68792334ef7b2e74..59fe533bc308444bf02c7468d2eeaba5ccd94bd1 100644 (file)
--- a/libatomic/glfree.c
+++ b/libatomic/glfree.c
@@ -24,26 +24,41 @@
  
  #include "libatomic_i.h"
  
-
+/* Accesses with a power-of-two size are not lock-free if we don't have an
+   integer type of this size or if they are not naturally aligned.  They
+   are lock-free if such a naturally aligned access is always lock-free
+   according to the compiler, which requires that both atomic loads and CAS
+   are available.
+   In all other cases, we fall through to LARGER (see below).  */
  #define EXACT(N)                                               \
    do {                                                         \
      if (!C2(HAVE_INT,N)) break;                                        \
      if ((uintptr_t)ptr & (N - 1)) break;                       \
      if (__atomic_always_lock_free(N, 0)) return true;          \
-    if (C2(MAYBE_HAVE_ATOMIC_CAS_,N)) return true;             \
+    if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break;                  \
+    if (C2(FAST_ATOMIC_LDST_,N)) return true;                  \
    } while (0)
  
  
+/* We next check to see if an access of a larger size is lock-free.  We use
+   a similar check as in EXACT, except that we also check that the alignment
+   of the access is so that the data to be accessed is completely covered
+   by the larger access.  */
  #define LARGER(N)                                              \
    do {                                                         \
      uintptr_t r = (uintptr_t)ptr & (N - 1);                    \
      if (!C2(HAVE_INT,N)) break;                                        \
-    if (!C2(HAVE_ATOMIC_LDST_,N)) break;                       \
+    if (!C2(FAST_ATOMIC_LDST_,N)) break;                       \
      if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break;                  \
      if (r + n <= N) return true;                               \
    } while (0)
  
  
+/* Note that this can return that a size/alignment is not lock-free even if
+   all the operations that we use to implement the respective accesses provide
+   lock-free forward progress as specified in C++14:  Users likely expect
+   "lock-free" to also mean "fast", which is why we do not return true if, for
+   example, we implement loads with this size/alignment using a CAS.  */
  bool
  libat_is_lock_free (size_t n, void *ptr)
  {
author	Torvald Riegel <torvald@gcc.gnu.org>
	Wed, 1 Feb 2017 17:21:59 +0000 (17:21 +0000)
committer	Torvald Riegel <torvald@gcc.gnu.org>
	Wed, 1 Feb 2017 17:21:59 +0000 (17:21 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/builtins.c		patch \| blob \| history
gcc/optabs-query.c		patch \| blob \| history
gcc/optabs-query.h		patch \| blob \| history
gcc/optabs.c		patch \| blob \| history
gcc/testsuite/lib/target-supports.exp		patch \| blob \| history
libatomic/ChangeLog		patch \| blob \| history
libatomic/acinclude.m4		patch \| blob \| history
libatomic/auto-config.h.in		patch \| blob \| history
libatomic/config/x86/host-config.h		patch \| blob \| history
libatomic/glfree.c		patch \| blob \| history