UNSPEC_TLSGD_PIC
UNSPEC_TLSLDM_PIC
UNSPEC_TLSIE_PIC
+ UNSPEC_MEMORY_BARRIER
])
;; UNSPEC_VOLATILE:
;; doubleword loads and stores are not guaranteed to be atomic
;; when referencing the I/O address space.
-;; The kernel cmpxchg operation on linux is not atomic with respect to
-;; memory stores on SMP machines, so we must do stores using a cmpxchg
-;; operation.
-
;; These patterns are at the bottom so the non atomic versions are preferred.
-;; Implement atomic QImode store using exchange.
-
-(define_expand "atomic_storeqi"
- [(match_operand:QI 0 "memory_operand") ;; memory
- (match_operand:QI 1 "register_operand") ;; val out
- (match_operand:SI 2 "const_int_operand")] ;; model
- ""
-{
- if (TARGET_SYNC_LIBCALL)
- {
- rtx mem = operands[0];
- rtx val = operands[1];
- if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
- DONE;
- }
- FAIL;
-})
-
-;; Implement atomic HImode stores using exchange.
-
-(define_expand "atomic_storehi"
- [(match_operand:HI 0 "memory_operand") ;; memory
- (match_operand:HI 1 "register_operand") ;; val out
- (match_operand:SI 2 "const_int_operand")] ;; model
- ""
-{
- if (TARGET_SYNC_LIBCALL)
- {
- rtx mem = operands[0];
- rtx val = operands[1];
- if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
- DONE;
- }
- FAIL;
-})
-
-;; Implement atomic SImode store using exchange.
-
-(define_expand "atomic_storesi"
- [(match_operand:SI 0 "memory_operand") ;; memory
- (match_operand:SI 1 "register_operand") ;; val out
- (match_operand:SI 2 "const_int_operand")] ;; model
- ""
-{
- if (TARGET_SYNC_LIBCALL)
- {
- rtx mem = operands[0];
- rtx val = operands[1];
- if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
- DONE;
- }
- FAIL;
-})
-
-;; Implement atomic SFmode store using exchange.
-
-(define_expand "atomic_storesf"
- [(match_operand:SF 0 "memory_operand") ;; memory
- (match_operand:SF 1 "register_operand") ;; val out
- (match_operand:SI 2 "const_int_operand")] ;; model
- ""
-{
- if (TARGET_SYNC_LIBCALL)
- {
- rtx mem = operands[0];
- rtx val = operands[1];
- if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
- DONE;
- }
- FAIL;
-})
-
;; Implement atomic DImode load using 64-bit floating point load.
(define_expand "atomic_loaddi"
model = memmodel_from_int (INTVAL (operands[2]));
operands[1] = force_reg (SImode, XEXP (operands[1], 0));
- expand_mem_thread_fence (model);
- emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
if (is_mm_seq_cst (model))
expand_mem_thread_fence (model);
+ emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
+ expand_mem_thread_fence (model);
DONE;
})
(define_insn "atomic_loaddi_1"
- [(set (match_operand:DI 0 "register_operand" "=f,r")
- (mem:DI (match_operand:SI 1 "register_operand" "r,r")))
- (clobber (match_scratch:DI 2 "=X,f"))]
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (mem:DI (match_operand:SI 1 "register_operand" "r")))
+ (clobber (match_scratch:DI 2 "=f"))]
"!TARGET_64BIT && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT"
- "@
- {fldds|fldd} 0(%1),%0
- {fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0"
- [(set_attr "type" "move,move")
- (set_attr "length" "4,16")])
+ "{fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0"
+ [(set_attr "type" "move")
+ (set_attr "length" "16")])
;; Implement atomic DImode store.
(define_expand "atomic_storedi"
[(match_operand:DI 0 "memory_operand") ;; memory
- (match_operand:DI 1 "register_operand") ;; val out
+ (match_operand:DI 1 "reg_or_cint_move_operand") ;; val out
(match_operand:SI 2 "const_int_operand")] ;; model
""
{
enum memmodel model;
- if (TARGET_SYNC_LIBCALL)
- {
- rtx mem = operands[0];
- rtx val = operands[1];
- if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
- DONE;
- }
-
if (TARGET_64BIT || TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
FAIL;
model = memmodel_from_int (INTVAL (operands[2]));
operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+ if (operands[1] != CONST0_RTX (DImode))
+ operands[1] = force_reg (DImode, operands[1]);
expand_mem_thread_fence (model);
emit_insn (gen_atomic_storedi_1 (operands[0], operands[1]));
if (is_mm_seq_cst (model))
(define_insn "atomic_storedi_1"
[(set (mem:DI (match_operand:SI 0 "register_operand" "r,r"))
- (match_operand:DI 1 "register_operand" "f,r"))
+ (match_operand:DI 1 "reg_or_0_operand" "M,r"))
(clobber (match_scratch:DI 2 "=X,f"))]
- "!TARGET_64BIT && !TARGET_DISABLE_FPREGS
- && !TARGET_SOFT_FLOAT && !TARGET_SYNC_LIBCALL"
+ "!TARGET_64BIT && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT"
"@
- {fstds|fstd} %1,0(%0)
+ {fstds|fstd} %%fr0,0(%0)
{stws|stw} %1,-16(%%sp)\n\t{stws|stw} %R1,-12(%%sp)\n\t{fldds|fldd} -16(%%sp),%2\n\t{fstds|fstd} %2,0(%0)"
[(set_attr "type" "move,move")
(set_attr "length" "4,16")])
-;; Implement atomic DFmode load using 64-bit floating point load.
+;; PA 2.0 hardware supports out-of-order execution of loads and stores, so
+;; we need a memory barrier to enforce program order for memory references.
+;; Since we want PA 1.x code to be PA 2.0 compatible, we also need the
+;; barrier when generating PA 1.x code.
-(define_expand "atomic_loaddf"
- [(match_operand:DF 0 "register_operand") ;; val out
- (match_operand:DF 1 "memory_operand") ;; memory
- (match_operand:SI 2 "const_int_operand")] ;; model
+(define_expand "memory_barrier"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
""
{
- enum memmodel model;
-
- if (TARGET_64BIT || TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
- FAIL;
-
- model = memmodel_from_int (INTVAL (operands[2]));
- operands[1] = force_reg (SImode, XEXP (operands[1], 0));
- expand_mem_thread_fence (model);
- emit_insn (gen_atomic_loaddf_1 (operands[0], operands[1]));
- if (is_mm_seq_cst (model))
- expand_mem_thread_fence (model);
- DONE;
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
})
-(define_insn "atomic_loaddf_1"
- [(set (match_operand:DF 0 "register_operand" "=f,r")
- (mem:DF (match_operand:SI 1 "register_operand" "r,r")))
- (clobber (match_scratch:DF 2 "=X,f"))]
- "!TARGET_64BIT && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT"
- "@
- {fldds|fldd} 0(%1),%0
- {fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0"
- [(set_attr "type" "move,move")
- (set_attr "length" "4,16")])
-
-;; Implement atomic DFmode store using 64-bit floating point store.
-
-(define_expand "atomic_storedf"
- [(match_operand:DF 0 "memory_operand") ;; memory
- (match_operand:DF 1 "register_operand") ;; val out
- (match_operand:SI 2 "const_int_operand")] ;; model
+(define_insn "*memory_barrier"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
""
-{
- enum memmodel model;
-
- if (TARGET_SYNC_LIBCALL)
- {
- rtx mem = operands[0];
- rtx val = operands[1];
- if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
- DONE;
- }
-
- if (TARGET_64BIT || TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
- FAIL;
-
- model = memmodel_from_int (INTVAL (operands[2]));
- operands[0] = force_reg (SImode, XEXP (operands[0], 0));
- expand_mem_thread_fence (model);
- emit_insn (gen_atomic_storedf_1 (operands[0], operands[1]));
- if (is_mm_seq_cst (model))
- expand_mem_thread_fence (model);
- DONE;
-})
-
-(define_insn "atomic_storedf_1"
- [(set (mem:DF (match_operand:SI 0 "register_operand" "r,r"))
- (match_operand:DF 1 "register_operand" "f,r"))
- (clobber (match_scratch:DF 2 "=X,f"))]
- "!TARGET_64BIT && !TARGET_DISABLE_FPREGS
- && !TARGET_SOFT_FLOAT && !TARGET_SYNC_LIBCALL"
- "@
- {fstds|fstd} %1,0(%0)
- {stws|stw} %1,-16(%%sp)\n\t{stws|stw} %R1,-12(%%sp)\n\t{fldds|fldd} -16(%%sp),%2\n\t{fstds|fstd} %2,0(%0)"
- [(set_attr "type" "move,move")
- (set_attr "length" "4,16")])
+ "sync"
+ [(set_attr "type" "binary")
+ (set_attr "length" "4")])
#define EBUSY 16
#define ENOSYS 251
-/* All PA-RISC implementations supported by linux have strongly
- ordered loads and stores. Only cache flushes and purges can be
- delayed. The data cache implementations are all globally
- coherent. Thus, there is no need to synchonize memory accesses.
-
- GCC automatically issues a asm memory barrier when it encounters
- a __sync_synchronize builtin. Thus, we do not need to define this
- builtin.
+/* PA-RISC 2.0 supports out-of-order execution for loads and stores.
+ Thus, we need to synchonize memory accesses. For more info, see:
+ "Advanced Performance Features of the 64-bit PA-8000" by Doug Hunt.
We implement byte, short and int versions of each atomic operation
using the kernel helper defined below. There is no support for
long failure; \
\
do { \
- tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \
+ tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED); \
newval = PFX_OP (tmp INF_OP val); \
failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX); \
} while (failure != 0); \
long failure; \
\
do { \
- tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \
+ tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED); \
newval = PFX_OP (tmp INF_OP val); \
failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX); \
} while (failure != 0); \
long failure; \
\
do { \
- tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \
+ tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED); \
failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val)); \
} while (failure != 0); \
\
long failure; \
\
do { \
- tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \
+ tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED); \
failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val)); \
} while (failure != 0); \
\
\
while (1) \
{ \
- actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \
+ actual_oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED); \
\
if (__builtin_expect (oldval != actual_oldval, 0)) \
return actual_oldval; \
while (1)
{
- actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
+ actual_oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED);
if (__builtin_expect (oldval != actual_oldval, 0))
return actual_oldval;
long failure; \
\
do { \
- oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \
+ oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED); \
failure = __kernel_cmpxchg2 (ptr, &oldval, &val, INDEX); \
} while (failure != 0); \
\
int oldval;
do {
- oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
+ oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED);
failure = __kernel_cmpxchg (ptr, oldval, val);
} while (failure != 0);
return oldval;
}
-#define SYNC_LOCK_RELEASE_2(TYPE, WIDTH, INDEX) \
+void HIDDEN
+__sync_lock_release_8 (long long *ptr)
+{
+ /* All accesses must be complete before we release the lock. */
+ __sync_synchronize ();
+ *(double *)ptr = 0;
+}
+
+#define SYNC_LOCK_RELEASE_1(TYPE, WIDTH) \
void HIDDEN \
__sync_lock_release_##WIDTH (TYPE *ptr) \
{ \
- TYPE oldval, zero = 0; \
- long failure; \
- \
- do { \
- oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \
- failure = __kernel_cmpxchg2 (ptr, &oldval, &zero, INDEX); \
- } while (failure != 0); \
+ /* All accesses must be complete before we release \
+ the lock. */ \
+ __sync_synchronize (); \
+ *ptr = 0; \
}
-SYNC_LOCK_RELEASE_2 (long long, 8, 3)
-SYNC_LOCK_RELEASE_2 (short, 2, 1)
-SYNC_LOCK_RELEASE_2 (signed char, 1, 0)
-
-void HIDDEN
-__sync_lock_release_4 (int *ptr)
-{
- long failure;
- int oldval;
-
- do {
- oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
- failure = __kernel_cmpxchg (ptr, oldval, 0);
- } while (failure != 0);
-}
+SYNC_LOCK_RELEASE_1 (int, 4)
+SYNC_LOCK_RELEASE_1 (short, 2)
+SYNC_LOCK_RELEASE_1 (signed char, 1)