From: John David Anglin Date: Sat, 11 Aug 2018 21:37:55 +0000 (+0000) Subject: pa.md (UNSPEC_MEMORY_BARRIER): New unspec enum. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2b1969f6355abca6017fc817524a7d3cbd07f923;p=gcc.git pa.md (UNSPEC_MEMORY_BARRIER): New unspec enum. gcc * config/pa/pa.md (UNSPEC_MEMORY_BARRIER): New unspec enum. Update comment for atomic instructions. (atomic_storeqi, atomic_storehi, atomic_storesi, atomic_storesf, atomic_loaddf, atomic_loaddf_1, atomic_storedf, atomic_storedf_1): Remove. (atomic_loaddi): Revise fence expansion to only emit fence prior to load for __ATOMIC_SEQ_CST model. (atomic_loaddi_1): Remove float register target. (atomic_storedi): Handle CONST_INT values. (atomic_storedi_1): Remove float register source. Add special case for zero value. (memory_barrier): New expander and insn. libgcc * config/pa/linux-atomic.c: Update comment. (FETCH_AND_OP_2, OP_AND_FETCH_2, FETCH_AND_OP_WORD, OP_AND_FETCH_WORD, COMPARE_AND_SWAP_2, __sync_val_compare_and_swap_4, SYNC_LOCK_TEST_AND_SET_2, __sync_lock_test_and_set_4): Use __ATOMIC_RELAXED for atomic loads. (SYNC_LOCK_RELEASE_1): New define. Use __sync_synchronize() and unordered store to release lock. (__sync_lock_release_8): Likewise. (SYNC_LOCK_RELEASE_2): Remove define. From-SVN: r263488 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0da6f84cf93..c669cadfec5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2018-08-11 John David Anglin + + * config/pa/pa.md (UNSPEC_MEMORY_BARRIER): New unspec enum. + Update comment for atomic instructions. + (atomic_storeqi, atomic_storehi, atomic_storesi, atomic_storesf, + atomic_loaddf, atomic_loaddf_1, atomic_storedf, atomic_storedf_1): + Remove. + (atomic_loaddi): Revise fence expansion to only emit fence prior to + load for __ATOMIC_SEQ_CST model. + (atomic_loaddi_1): Remove float register target. + (atomic_storedi): Handle CONST_INT values. + (atomic_storedi_1): Remove float register source. Add special case + for zero value. + (memory_barrier): New expander and insn. + 2018-08-11 Jakub Jelinek PR tree-optimization/86835 diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index e748f053651..0094c58f5e4 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -84,6 +84,7 @@ UNSPEC_TLSGD_PIC UNSPEC_TLSLDM_PIC UNSPEC_TLSIE_PIC + UNSPEC_MEMORY_BARRIER ]) ;; UNSPEC_VOLATILE: @@ -9955,84 +9956,8 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" ;; doubleword loads and stores are not guaranteed to be atomic ;; when referencing the I/O address space. -;; The kernel cmpxchg operation on linux is not atomic with respect to -;; memory stores on SMP machines, so we must do stores using a cmpxchg -;; operation. - ;; These patterns are at the bottom so the non atomic versions are preferred. -;; Implement atomic QImode store using exchange. - -(define_expand "atomic_storeqi" - [(match_operand:QI 0 "memory_operand") ;; memory - (match_operand:QI 1 "register_operand") ;; val out - (match_operand:SI 2 "const_int_operand")] ;; model - "" -{ - if (TARGET_SYNC_LIBCALL) - { - rtx mem = operands[0]; - rtx val = operands[1]; - if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) - DONE; - } - FAIL; -}) - -;; Implement atomic HImode stores using exchange. - -(define_expand "atomic_storehi" - [(match_operand:HI 0 "memory_operand") ;; memory - (match_operand:HI 1 "register_operand") ;; val out - (match_operand:SI 2 "const_int_operand")] ;; model - "" -{ - if (TARGET_SYNC_LIBCALL) - { - rtx mem = operands[0]; - rtx val = operands[1]; - if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) - DONE; - } - FAIL; -}) - -;; Implement atomic SImode store using exchange. - -(define_expand "atomic_storesi" - [(match_operand:SI 0 "memory_operand") ;; memory - (match_operand:SI 1 "register_operand") ;; val out - (match_operand:SI 2 "const_int_operand")] ;; model - "" -{ - if (TARGET_SYNC_LIBCALL) - { - rtx mem = operands[0]; - rtx val = operands[1]; - if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) - DONE; - } - FAIL; -}) - -;; Implement atomic SFmode store using exchange. - -(define_expand "atomic_storesf" - [(match_operand:SF 0 "memory_operand") ;; memory - (match_operand:SF 1 "register_operand") ;; val out - (match_operand:SI 2 "const_int_operand")] ;; model - "" -{ - if (TARGET_SYNC_LIBCALL) - { - rtx mem = operands[0]; - rtx val = operands[1]; - if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) - DONE; - } - FAIL; -}) - ;; Implement atomic DImode load using 64-bit floating point load. (define_expand "atomic_loaddi" @@ -10048,47 +9973,39 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" model = memmodel_from_int (INTVAL (operands[2])); operands[1] = force_reg (SImode, XEXP (operands[1], 0)); - expand_mem_thread_fence (model); - emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1])); if (is_mm_seq_cst (model)) expand_mem_thread_fence (model); + emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1])); + expand_mem_thread_fence (model); DONE; }) (define_insn "atomic_loaddi_1" - [(set (match_operand:DI 0 "register_operand" "=f,r") - (mem:DI (match_operand:SI 1 "register_operand" "r,r"))) - (clobber (match_scratch:DI 2 "=X,f"))] + [(set (match_operand:DI 0 "register_operand" "=r") + (mem:DI (match_operand:SI 1 "register_operand" "r"))) + (clobber (match_scratch:DI 2 "=f"))] "!TARGET_64BIT && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT" - "@ - {fldds|fldd} 0(%1),%0 - {fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0" - [(set_attr "type" "move,move") - (set_attr "length" "4,16")]) + "{fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0" + [(set_attr "type" "move") + (set_attr "length" "16")]) ;; Implement atomic DImode store. (define_expand "atomic_storedi" [(match_operand:DI 0 "memory_operand") ;; memory - (match_operand:DI 1 "register_operand") ;; val out + (match_operand:DI 1 "reg_or_cint_move_operand") ;; val out (match_operand:SI 2 "const_int_operand")] ;; model "" { enum memmodel model; - if (TARGET_SYNC_LIBCALL) - { - rtx mem = operands[0]; - rtx val = operands[1]; - if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) - DONE; - } - if (TARGET_64BIT || TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT) FAIL; model = memmodel_from_int (INTVAL (operands[2])); operands[0] = force_reg (SImode, XEXP (operands[0], 0)); + if (operands[1] != CONST0_RTX (DImode)) + operands[1] = force_reg (DImode, operands[1]); expand_mem_thread_fence (model); emit_insn (gen_atomic_storedi_1 (operands[0], operands[1])); if (is_mm_seq_cst (model)) @@ -10098,87 +10015,33 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (define_insn "atomic_storedi_1" [(set (mem:DI (match_operand:SI 0 "register_operand" "r,r")) - (match_operand:DI 1 "register_operand" "f,r")) + (match_operand:DI 1 "reg_or_0_operand" "M,r")) (clobber (match_scratch:DI 2 "=X,f"))] - "!TARGET_64BIT && !TARGET_DISABLE_FPREGS - && !TARGET_SOFT_FLOAT && !TARGET_SYNC_LIBCALL" + "!TARGET_64BIT && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT" "@ - {fstds|fstd} %1,0(%0) + {fstds|fstd} %%fr0,0(%0) {stws|stw} %1,-16(%%sp)\n\t{stws|stw} %R1,-12(%%sp)\n\t{fldds|fldd} -16(%%sp),%2\n\t{fstds|fstd} %2,0(%0)" [(set_attr "type" "move,move") (set_attr "length" "4,16")]) -;; Implement atomic DFmode load using 64-bit floating point load. +;; PA 2.0 hardware supports out-of-order execution of loads and stores, so +;; we need a memory barrier to enforce program order for memory references. +;; Since we want PA 1.x code to be PA 2.0 compatible, we also need the +;; barrier when generating PA 1.x code. -(define_expand "atomic_loaddf" - [(match_operand:DF 0 "register_operand") ;; val out - (match_operand:DF 1 "memory_operand") ;; memory - (match_operand:SI 2 "const_int_operand")] ;; model +(define_expand "memory_barrier" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] "" { - enum memmodel model; - - if (TARGET_64BIT || TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT) - FAIL; - - model = memmodel_from_int (INTVAL (operands[2])); - operands[1] = force_reg (SImode, XEXP (operands[1], 0)); - expand_mem_thread_fence (model); - emit_insn (gen_atomic_loaddf_1 (operands[0], operands[1])); - if (is_mm_seq_cst (model)) - expand_mem_thread_fence (model); - DONE; + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; }) -(define_insn "atomic_loaddf_1" - [(set (match_operand:DF 0 "register_operand" "=f,r") - (mem:DF (match_operand:SI 1 "register_operand" "r,r"))) - (clobber (match_scratch:DF 2 "=X,f"))] - "!TARGET_64BIT && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT" - "@ - {fldds|fldd} 0(%1),%0 - {fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0" - [(set_attr "type" "move,move") - (set_attr "length" "4,16")]) - -;; Implement atomic DFmode store using 64-bit floating point store. - -(define_expand "atomic_storedf" - [(match_operand:DF 0 "memory_operand") ;; memory - (match_operand:DF 1 "register_operand") ;; val out - (match_operand:SI 2 "const_int_operand")] ;; model +(define_insn "*memory_barrier" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] "" -{ - enum memmodel model; - - if (TARGET_SYNC_LIBCALL) - { - rtx mem = operands[0]; - rtx val = operands[1]; - if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) - DONE; - } - - if (TARGET_64BIT || TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT) - FAIL; - - model = memmodel_from_int (INTVAL (operands[2])); - operands[0] = force_reg (SImode, XEXP (operands[0], 0)); - expand_mem_thread_fence (model); - emit_insn (gen_atomic_storedf_1 (operands[0], operands[1])); - if (is_mm_seq_cst (model)) - expand_mem_thread_fence (model); - DONE; -}) - -(define_insn "atomic_storedf_1" - [(set (mem:DF (match_operand:SI 0 "register_operand" "r,r")) - (match_operand:DF 1 "register_operand" "f,r")) - (clobber (match_scratch:DF 2 "=X,f"))] - "!TARGET_64BIT && !TARGET_DISABLE_FPREGS - && !TARGET_SOFT_FLOAT && !TARGET_SYNC_LIBCALL" - "@ - {fstds|fstd} %1,0(%0) - {stws|stw} %1,-16(%%sp)\n\t{stws|stw} %R1,-12(%%sp)\n\t{fldds|fldd} -16(%%sp),%2\n\t{fstds|fstd} %2,0(%0)" - [(set_attr "type" "move,move") - (set_attr "length" "4,16")]) + "sync" + [(set_attr "type" "binary") + (set_attr "length" "4")]) diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index bfdda6e2283..084131abd04 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,15 @@ +2018-08-11 John David Anglin + + * config/pa/linux-atomic.c: Update comment. + (FETCH_AND_OP_2, OP_AND_FETCH_2, FETCH_AND_OP_WORD, OP_AND_FETCH_WORD, + COMPARE_AND_SWAP_2, __sync_val_compare_and_swap_4, + SYNC_LOCK_TEST_AND_SET_2, __sync_lock_test_and_set_4): Use + __ATOMIC_RELAXED for atomic loads. + (SYNC_LOCK_RELEASE_1): New define. Use __sync_synchronize() and + unordered store to release lock. + (__sync_lock_release_8): Likewise. + (SYNC_LOCK_RELEASE_2): Remove define. + 2018-08-02 Nicolas Pitre PR libgcc/86512 diff --git a/libgcc/config/pa/linux-atomic.c b/libgcc/config/pa/linux-atomic.c index 79c89e19900..ddd0b1e9708 100644 --- a/libgcc/config/pa/linux-atomic.c +++ b/libgcc/config/pa/linux-atomic.c @@ -28,14 +28,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define EBUSY 16 #define ENOSYS 251 -/* All PA-RISC implementations supported by linux have strongly - ordered loads and stores. Only cache flushes and purges can be - delayed. The data cache implementations are all globally - coherent. Thus, there is no need to synchonize memory accesses. - - GCC automatically issues a asm memory barrier when it encounters - a __sync_synchronize builtin. Thus, we do not need to define this - builtin. +/* PA-RISC 2.0 supports out-of-order execution for loads and stores. + Thus, we need to synchonize memory accesses. For more info, see: + "Advanced Performance Features of the 64-bit PA-8000" by Doug Hunt. We implement byte, short and int versions of each atomic operation using the kernel helper defined below. There is no support for @@ -119,7 +114,7 @@ __kernel_cmpxchg2 (void *mem, const void *oldval, const void *newval, long failure; \ \ do { \ - tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ + tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED); \ newval = PFX_OP (tmp INF_OP val); \ failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX); \ } while (failure != 0); \ @@ -156,7 +151,7 @@ FETCH_AND_OP_2 (nand, ~, &, signed char, 1, 0) long failure; \ \ do { \ - tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ + tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED); \ newval = PFX_OP (tmp INF_OP val); \ failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX); \ } while (failure != 0); \ @@ -193,7 +188,7 @@ OP_AND_FETCH_2 (nand, ~, &, signed char, 1, 0) long failure; \ \ do { \ - tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ + tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED); \ failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val)); \ } while (failure != 0); \ \ @@ -215,7 +210,7 @@ FETCH_AND_OP_WORD (nand, ~, &) long failure; \ \ do { \ - tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ + tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED); \ failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val)); \ } while (failure != 0); \ \ @@ -241,7 +236,7 @@ typedef unsigned char bool; \ while (1) \ { \ - actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ + actual_oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED); \ \ if (__builtin_expect (oldval != actual_oldval, 0)) \ return actual_oldval; \ @@ -273,7 +268,7 @@ __sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval) while (1) { - actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); + actual_oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED); if (__builtin_expect (oldval != actual_oldval, 0)) return actual_oldval; @@ -300,7 +295,7 @@ TYPE HIDDEN \ long failure; \ \ do { \ - oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ + oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED); \ failure = __kernel_cmpxchg2 (ptr, &oldval, &val, INDEX); \ } while (failure != 0); \ \ @@ -318,38 +313,31 @@ __sync_lock_test_and_set_4 (int *ptr, int val) int oldval; do { - oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); + oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED); failure = __kernel_cmpxchg (ptr, oldval, val); } while (failure != 0); return oldval; } -#define SYNC_LOCK_RELEASE_2(TYPE, WIDTH, INDEX) \ +void HIDDEN +__sync_lock_release_8 (long long *ptr) +{ + /* All accesses must be complete before we release the lock. */ + __sync_synchronize (); + *(double *)ptr = 0; +} + +#define SYNC_LOCK_RELEASE_1(TYPE, WIDTH) \ void HIDDEN \ __sync_lock_release_##WIDTH (TYPE *ptr) \ { \ - TYPE oldval, zero = 0; \ - long failure; \ - \ - do { \ - oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ - failure = __kernel_cmpxchg2 (ptr, &oldval, &zero, INDEX); \ - } while (failure != 0); \ + /* All accesses must be complete before we release \ + the lock. */ \ + __sync_synchronize (); \ + *ptr = 0; \ } -SYNC_LOCK_RELEASE_2 (long long, 8, 3) -SYNC_LOCK_RELEASE_2 (short, 2, 1) -SYNC_LOCK_RELEASE_2 (signed char, 1, 0) - -void HIDDEN -__sync_lock_release_4 (int *ptr) -{ - long failure; - int oldval; - - do { - oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); - failure = __kernel_cmpxchg (ptr, oldval, 0); - } while (failure != 0); -} +SYNC_LOCK_RELEASE_1 (int, 4) +SYNC_LOCK_RELEASE_1 (short, 2) +SYNC_LOCK_RELEASE_1 (signed char, 1)