+2017-02-01 Torvald Riegel <triegel@redhat.com>
+ Richard Henderson <rth@redhat.com>
+
+ * builtins.c (fold_builtin_atomic_always_lock_free): Make "lock-free"
+ conditional on existance of a fast atomic load.
+ * optabs-query.c (can_atomic_load_p): New function.
+ * optabs-query.h (can_atomic_load_p): Declare it.
+ * optabs.c (expand_atomic_exchange): Always delegate to libatomic if
+ no fast atomic load is available for the particular size of access.
+ (expand_atomic_compare_and_swap): Likewise.
+ (expand_atomic_load): Likewise.
+ (expand_atomic_store): Likewise.
+ (expand_atomic_fetch_op): Likewise.
+ * testsuite/lib/target-supports.exp
+ (check_effective_target_sync_int_128): Remove x86 because it provides
+ no fast atomic load.
+ (check_effective_target_sync_int_128_runtime): Likewise.
+
2017-02-01 Richard Biener <rguenther@suse.de>
* graphite.c: Include tree-vectorizer.h for find_loop_location.
/* Check if a compare_and_swap pattern exists for the mode which represents
the required size. The pattern is not allowed to fail, so the existence
- of the pattern indicates support is present. */
- if (can_compare_and_swap_p (mode, true))
+ of the pattern indicates support is present. Also require that an
+ atomic load exists for the required size. */
+ if (can_compare_and_swap_p (mode, true) && can_atomic_load_p (mode))
return boolean_true_node;
else
return boolean_false_node;
return can_compare_and_swap_p (mode, allow_libcall);
}
+/* Return true if an atomic load can be performed without falling back to
+ a compare-and-swap. */
+
+bool
+can_atomic_load_p (machine_mode mode)
+{
+ enum insn_code icode;
+
+ /* Does the target supports the load directly? */
+ icode = direct_optab_handler (atomic_load_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ return true;
+
+ /* If the size of the object is greater than word size on this target,
+ then we assume that a load will not be atomic. Also see
+ expand_atomic_load. */
+ return GET_MODE_PRECISION (mode) <= BITS_PER_WORD;
+}
+
/* Determine whether "1 << x" is relatively cheap in word_mode. */
bool
bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
bool can_compare_and_swap_p (machine_mode, bool);
bool can_atomic_exchange_p (machine_mode, bool);
+bool can_atomic_load_p (machine_mode);
bool lshift_cheap_p (bool);
#endif
rtx
expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
{
+ machine_mode mode = GET_MODE (mem);
rtx ret;
+ /* If loads are not atomic for the required size and we are not called to
+ provide a __sync builtin, do not do anything so that we stay consistent
+ with atomic loads of the same size. */
+ if (!can_atomic_load_p (mode) && !is_mm_sync (model))
+ return NULL_RTX;
+
ret = maybe_emit_atomic_exchange (target, mem, val, model);
/* Next try a compare-and-swap loop for the exchange. */
rtx target_oval, target_bool = NULL_RTX;
rtx libfunc;
+ /* If loads are not atomic for the required size and we are not called to
+ provide a __sync builtin, do not do anything so that we stay consistent
+ with atomic loads of the same size. */
+ if (!can_atomic_load_p (mode) && !is_mm_sync (succ_model))
+ return false;
+
/* Load expected into a register for the compare and swap. */
if (MEM_P (expected))
expected = copy_to_reg (expected);
}
/* If the size of the object is greater than word size on this target,
- then we assume that a load will not be atomic. */
+ then we assume that a load will not be atomic. We could try to
+ emulate a load with a compare-and-swap operation, but the store that
+ doing this could result in would be incorrect if this is a volatile
+ atomic load or targetting read-only-mapped memory. */
if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
- {
- /* Issue val = compare_and_swap (mem, 0, 0).
- This may cause the occasional harmless store of 0 when the value is
- already 0, but it seems to be OK according to the standards guys. */
- if (expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx,
- const0_rtx, false, model, model))
- return target;
- else
- /* Otherwise there is no atomic load, leave the library call. */
- return NULL_RTX;
- }
+ /* If there is no atomic load, leave the library call. */
+ return NULL_RTX;
/* Otherwise assume loads are atomic, and emit the proper barriers. */
if (!target || target == const0_rtx)
return const0_rtx;
}
- /* If using __sync_lock_release is a viable alternative, try it. */
+ /* If using __sync_lock_release is a viable alternative, try it.
+ Note that this will not be set to true if we are expanding a generic
+ __atomic_store_n. */
if (use_release)
{
icode = direct_optab_handler (sync_lock_release_optab, mode);
}
/* If the size of the object is greater than word size on this target,
- a default store will not be atomic, Try a mem_exchange and throw away
- the result. If that doesn't work, don't do anything. */
+ a default store will not be atomic. */
if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
{
- rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model);
- if (!target)
- target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val);
- if (target)
- return const0_rtx;
- else
+ /* If loads are atomic or we are called to provide a __sync builtin,
+ we can try a atomic_exchange and throw away the result. Otherwise,
+ don't do anything so that we do not create an inconsistency between
+ loads and stores. */
+ if (can_atomic_load_p (mode) || is_mm_sync (model))
+ {
+ rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model);
+ if (!target)
+ target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem,
+ val);
+ if (target)
+ return const0_rtx;
+ }
return NULL_RTX;
}
rtx result;
bool unused_result = (target == const0_rtx);
+ /* If loads are not atomic for the required size and we are not called to
+ provide a __sync builtin, do not do anything so that we stay consistent
+ with atomic loads of the same size. */
+ if (!can_atomic_load_p (mode) && !is_mm_sync (model))
+ return NULL_RTX;
+
result = expand_atomic_fetch_op_no_fallback (target, mem, val, code, model,
after);
# Return 1 if the target supports atomic operations on "int_128" values.
proc check_effective_target_sync_int_128 { } {
- if { (([istarget i?86-*-*] || [istarget x86_64-*-*])
- && ![is-effective-target ia32])
- || [istarget spu-*-*] } {
+ if { [istarget spu-*-*] } {
return 1
} else {
return 0
# Return 1 if the target supports atomic operations on "int_128" values
# and can execute them.
+# This requires support for both compare-and-swap and true atomic loads.
proc check_effective_target_sync_int_128_runtime { } {
- if { (([istarget i?86-*-*] || [istarget x86_64-*-*])
- && ![is-effective-target ia32]
- && [check_cached_effective_target sync_int_128_available {
- check_runtime_nocache sync_int_128_available {
- #include "cpuid.h"
- int main ()
- {
- unsigned int eax, ebx, ecx, edx;
- if (__get_cpuid (1, &eax, &ebx, &ecx, &edx))
- return !(ecx & bit_CMPXCHG16B);
- return 1;
- }
- } ""
- }])
- || [istarget spu-*-*] } {
+ if { [istarget spu-*-*] } {
return 1
} else {
return 0
+2017-02-01 Richard Henderson <rth@redhat.com>
+ Torvald Riegel <triegel@redhat.com>
+
+ * acinclude.m4: Add #define FAST_ATOMIC_LDST_*.
+ * auto-config.h.in: Regenerate.
+ * config/x86/host-config.h (FAST_ATOMIC_LDST_16): Define to 0.
+ (atomic_compare_exchange_n): New.
+ * glfree.c (EXACT, LARGER): Change condition and add comments.
+
2017-01-30 Szabolcs Nagy <szabolcs.nagy@arm.com>
PR target/78945
LIBAT_DEFINE_YESNO([HAVE_ATOMIC_LDST_$2], [$libat_cv_have_at_ldst_$2],
[Have __atomic_load/store for $2 byte integers.])
AH_BOTTOM([#define MAYBE_HAVE_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2])
+ AH_BOTTOM([#define FAST_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2])
])
dnl
#define MAYBE_HAVE_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1
+#define FAST_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
+
+#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1
+
+#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2
+
+#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4
+
+#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8
+
#define MAYBE_HAVE_ATOMIC_TAS_16 HAVE_ATOMIC_TAS_16
#define MAYBE_HAVE_ATOMIC_EXCHANGE_1 HAVE_ATOMIC_EXCHANGE_1
#define MAYBE_HAVE_ATOMIC_EXCHANGE_8 HAVE_ATOMIC_EXCHANGE_8
+#define FAST_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1
+
#define MAYBE_HAVE_ATOMIC_EXCHANGE_16 HAVE_ATOMIC_EXCHANGE_16
#define MAYBE_HAVE_ATOMIC_CAS_1 HAVE_ATOMIC_CAS_1
#define MAYBE_HAVE_ATOMIC_CAS_8 HAVE_ATOMIC_CAS_8
-#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
-
#define MAYBE_HAVE_ATOMIC_CAS_16 HAVE_ATOMIC_CAS_16
#define MAYBE_HAVE_ATOMIC_FETCH_ADD_1 HAVE_ATOMIC_FETCH_ADD_1
#define MAYBE_HAVE_ATOMIC_FETCH_ADD_8 HAVE_ATOMIC_FETCH_ADD_8
+#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
+
#define MAYBE_HAVE_ATOMIC_FETCH_ADD_16 HAVE_ATOMIC_FETCH_ADD_16
#define MAYBE_HAVE_ATOMIC_FETCH_OP_1 HAVE_ATOMIC_FETCH_OP_1
#define MAYBE_HAVE_ATOMIC_FETCH_OP_8 HAVE_ATOMIC_FETCH_OP_8
-#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
-
#define MAYBE_HAVE_ATOMIC_FETCH_OP_16 HAVE_ATOMIC_FETCH_OP_16
#ifndef WORDS_BIGENDIAN
#define WORDS_BIGENDIAN 0
#endif
-#define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
+#define FAST_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
-#define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
+#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
-#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1
+#define FAST_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
-#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2
+#define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
-#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4
+#define FAST_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
-#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8
+#define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_1
# undef MAYBE_HAVE_ATOMIC_LDST_16
# define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_1
+/* Since load and store are implemented with CAS, they are not fast. */
+# undef FAST_ATOMIC_LDST_16
+# define FAST_ATOMIC_LDST_16 0
# if IFUNC_ALT == 1
# undef HAVE_ATOMIC_CAS_16
# define HAVE_ATOMIC_CAS_16 1
# endif
#endif
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+static inline bool
+atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
+ bool weak_p UNUSED, int sm UNUSED, int fm UNUSED)
+{
+ UTYPE cmpval = *eptr;
+ UTYPE oldval = __sync_val_compare_and_swap_16 (mptr, cmpval, newval);
+ if (oldval == cmpval)
+ return true;
+ *eptr = oldval;
+ return false;
+}
+# define atomic_compare_exchange_n atomic_compare_exchange_n
+#endif /* Have CAS 16 */
+
#endif /* HAVE_IFUNC */
#include_next <host-config.h>
#include "libatomic_i.h"
-
+/* Accesses with a power-of-two size are not lock-free if we don't have an
+ integer type of this size or if they are not naturally aligned. They
+ are lock-free if such a naturally aligned access is always lock-free
+ according to the compiler, which requires that both atomic loads and CAS
+ are available.
+ In all other cases, we fall through to LARGER (see below). */
#define EXACT(N) \
do { \
if (!C2(HAVE_INT,N)) break; \
if ((uintptr_t)ptr & (N - 1)) break; \
if (__atomic_always_lock_free(N, 0)) return true; \
- if (C2(MAYBE_HAVE_ATOMIC_CAS_,N)) return true; \
+ if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break; \
+ if (C2(FAST_ATOMIC_LDST_,N)) return true; \
} while (0)
+/* We next check to see if an access of a larger size is lock-free. We use
+ a similar check as in EXACT, except that we also check that the alignment
+ of the access is so that the data to be accessed is completely covered
+ by the larger access. */
#define LARGER(N) \
do { \
uintptr_t r = (uintptr_t)ptr & (N - 1); \
if (!C2(HAVE_INT,N)) break; \
- if (!C2(HAVE_ATOMIC_LDST_,N)) break; \
+ if (!C2(FAST_ATOMIC_LDST_,N)) break; \
if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break; \
if (r + n <= N) return true; \
} while (0)
+/* Note that this can return that a size/alignment is not lock-free even if
+ all the operations that we use to implement the respective accesses provide
+ lock-free forward progress as specified in C++14: Users likely expect
+ "lock-free" to also mean "fast", which is why we do not return true if, for
+ example, we implement loads with this size/alignment using a CAS. */
bool
libat_is_lock_free (size_t n, void *ptr)
{