+2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
+
+ * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_ldop):
+ Adjust declaration.
+ * config/aarch64/aarch64.c (aarch64_emit_bic): New.
+ (aarch64_gen_atomic_ldop): Adjust comment. Add parameter
+ out_result. Update to support update-fetch operations.
+ * config/aarch64/atomics.md (aarch64_atomic_exchange<mode>_lse):
+ Adjust for change to aarch64_gen_atomic_ldop.
+ (aarch64_atomic_<atomic_optab><mode>_lse): Likewise.
+ (aarch64_atomic_fetch_<atomic_optab><mode>_lse): Likewise.
+ (atomic_<atomic_optab>_fetch<mode>): Change to an expander.
+ (aarch64_atomic_<atomic_optab>_fetch<mode>): New.
+ (aarch64_atomic_<atomic_optab>_fetch<mode>_lse): New.
+
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64-protos.h
void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
bool aarch64_atomic_ldop_supported_p (enum rtx_code);
-void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
+void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
aarch64_emit_post_barrier (model);
}
+/* Emit a BIC instruction. */
+
+static void
+aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
+{
+ rtx shift_rtx = GEN_INT (shift);
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+
+ switch (mode)
+ {
+ case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
+ case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (gen (dst, s2, shift_rtx, s1));
+}
+
/* Emit an atomic swap. */
static void
}
/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
- location to store the data read from memory. MEM is the memory location to
- read and modify. MODEL_RTX is the memory ordering to use. VALUE is the
- second operand for the operation. Either OUT_DATA or OUT_RESULT, but not
- both, can be NULL. */
+ location to store the data read from memory. OUT_RESULT is the location to
+ store the result of the operation. MEM is the memory location to read and
+ modify. MODEL_RTX is the memory ordering to use. VALUE is the second
+ operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
+ be NULL. */
void
-aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
+aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
rtx mem, rtx value, rtx model_rtx)
{
machine_mode mode = GET_MODE (mem);
if (out_data)
out_data = gen_lowpart (mode, out_data);
+ if (out_result)
+ out_result = gen_lowpart (mode, out_result);
+
/* Make sure the value is in a register, putting it into a destination
register if it needs to be manipulated. */
if (!register_operand (value, mode)
|| code == AND || code == MINUS)
{
- src = out_data;
+ src = out_result ? out_result : out_data;
emit_move_insn (src, gen_lowpart (mode, value));
}
else
}
aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
+
+ /* If necessary, calculate the data in memory after the update by redoing the
+ operation from values in registers. */
+ if (!out_result)
+ return;
+
+ if (short_mode)
+ {
+ src = gen_lowpart (wmode, src);
+ out_data = gen_lowpart (wmode, out_data);
+ out_result = gen_lowpart (wmode, out_result);
+ }
+
+ x = NULL_RTX;
+
+ switch (code)
+ {
+ case MINUS:
+ case PLUS:
+ x = gen_rtx_PLUS (wmode, out_data, src);
+ break;
+ case IOR:
+ x = gen_rtx_IOR (wmode, out_data, src);
+ break;
+ case XOR:
+ x = gen_rtx_XOR (wmode, out_data, src);
+ break;
+ case AND:
+ aarch64_emit_bic (wmode, out_result, out_data, src, 0);
+ return;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_set_insn (out_result, x);
+
+ return;
}
/* Split an atomic operation. */
"&& reload_completed"
[(const_int 0)]
{
- aarch64_gen_atomic_ldop (SET, operands[0], operands[1],
+ aarch64_gen_atomic_ldop (SET, operands[0], NULL, operands[1],
operands[2], operands[3]);
DONE;
}
"&& reload_completed"
[(const_int 0)]
{
- aarch64_gen_atomic_ldop (<CODE>, operands[3], operands[0],
+ aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
operands[1], operands[2]);
DONE;
}
"&& reload_completed"
[(const_int 0)]
{
- aarch64_gen_atomic_ldop (<CODE>, operands[0], operands[1],
+ aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
operands[2], operands[3]);
DONE;
}
}
)
-(define_insn_and_split "atomic_<atomic_optab>_fetch<mode>"
+;; Load-operate-store, returning the original memory data.
+
+(define_expand "atomic_<atomic_optab>_fetch<mode>"
+ [(match_operand:ALLI 0 "register_operand" "")
+ (atomic_op:ALLI
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
+ (match_operand:ALLI 2 "<atomic_op_operand>" ""))
+ (match_operand:SI 3 "const_int_operand")]
+ ""
+{
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+ rtx value = operands[2];
+
+ /* Use an atomic load-operate instruction when possible. */
+ if (aarch64_atomic_ldop_supported_p (<CODE>))
+ gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
+ else
+ gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
+
+ emit_insn (gen (operands[0], operands[1], value, operands[3]));
+
+ DONE;
+})
+
+(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(atomic_op:ALLI
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
}
)
+(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
+ [(set (match_operand:ALLI 0 "register_operand" "=&r")
+ (atomic_op:ALLI
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
+ (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
+ (set (match_dup 1)
+ (unspec_volatile:ALLI
+ [(match_dup 1)
+ (match_dup 2)
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPECV_ATOMIC_LDOP))
+ (clobber (match_scratch:ALLI 4 "=r"))]
+ "TARGET_LSE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
+ operands[2], operands[3]);
+ DONE;
+ }
+)
+
(define_insn_and_split "atomic_nand_fetch<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(not:ALLI
+2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
+
+ * gcc.target/aarch64/atomic-inst-ldadd.c: Add tests for
+ update-fetch operations.
+ * gcc.target/aarch64/atomic-inst-ldlogic.c: Likewise.
+
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/atomic-inst-ldadd.c: New.
__atomic_fetch_sub (val, foo, MODEL); \
}
+#define ADD_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_add_fetch (val, foo, MODEL); \
+ }
+
+#define ADD_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_add_fetch (val, foo, MODEL); \
+ }
+
+#define SUB_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_sub_fetch (val, foo, MODEL); \
+ }
+
+#define SUB_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_sub_fetch (val, foo, MODEL); \
+ }
TEST (load_add, LOAD_ADD)
TEST (load_add_notreturn, LOAD_ADD_NORETURN)
TEST (load_sub, LOAD_SUB)
TEST (load_sub_notreturn, LOAD_SUB_NORETURN)
-/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddab\t" 16} } */
-/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddalb\t" 16} } */
+TEST (add_load, ADD_LOAD)
+TEST (add_load_notreturn, ADD_LOAD_NORETURN)
+
+TEST (sub_load, SUB_LOAD)
+TEST (sub_load_notreturn, SUB_LOAD_NORETURN)
+
+/* { dg-final { scan-assembler-times "ldaddb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddab\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddlb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddalb\t" 32} } */
-/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddah\t" 16} } */
-/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddalh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddah\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddlh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddalh\t" 32} } */
-/* { dg-final { scan-assembler-times "ldadd\t" 16} } */
-/* { dg-final { scan-assembler-times "ldadda\t" 32} } */
-/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */
-/* { dg-final { scan-assembler-times "ldaddal\t" 32} } */
+/* { dg-final { scan-assembler-times "ldadd\t" 32} } */
+/* { dg-final { scan-assembler-times "ldadda\t" 64} } */
+/* { dg-final { scan-assembler-times "ldaddl\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddal\t" 64} } */
/* { dg-final { scan-assembler-not "ldaxr\t" } } */
/* { dg-final { scan-assembler-not "stlxr\t" } } */
__atomic_fetch_xor (val, foo, MODEL); \
}
+#define OR_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_or_fetch (val, foo, MODEL); \
+ }
+
+#define OR_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_or_fetch (val, foo, MODEL); \
+ }
+
+#define AND_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_and_fetch (val, foo, MODEL); \
+ }
+
+#define AND_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_and_fetch (val, foo, MODEL); \
+ }
+
+#define XOR_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_xor_fetch (val, foo, MODEL); \
+ }
+
+#define XOR_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_xor_fetch (val, foo, MODEL); \
+ }
+
TEST (load_or, LOAD_OR)
TEST (load_or_notreturn, LOAD_OR_NORETURN)
TEST (load_xor, LOAD_XOR)
TEST (load_xor_notreturn, LOAD_XOR_NORETURN)
+TEST (or_load, OR_LOAD)
+TEST (or_load_notreturn, OR_LOAD_NORETURN)
+
+TEST (and_load, AND_LOAD)
+TEST (and_load_notreturn, AND_LOAD_NORETURN)
+
+TEST (xor_load, XOR_LOAD)
+TEST (xor_load_notreturn, XOR_LOAD_NORETURN)
+
+
/* Load-OR. */
-/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetab\t" 8} } */
-/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetalb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetalb\t" 16} } */
-/* { dg-final { scan-assembler-times "ldseth\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetah\t" 8} } */
-/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetalh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldseth\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetalh\t" 16} } */
-/* { dg-final { scan-assembler-times "ldset\t" 8} } */
-/* { dg-final { scan-assembler-times "ldseta\t" 16} } */
-/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */
-/* { dg-final { scan-assembler-times "ldsetal\t" 16} } */
+/* { dg-final { scan-assembler-times "ldset\t" 16} } */
+/* { dg-final { scan-assembler-times "ldseta\t" 32} } */
+/* { dg-final { scan-assembler-times "ldsetl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetal\t" 32} } */
/* Load-AND. */
-/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclrab\t" 8} } */
-/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclralb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclralb\t" 16} } */
-/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclrah\t" 8} } */
-/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclralh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclralh\t" 16} } */
-/* { dg-final { scan-assembler-times "ldclr\t" 8} */
-/* { dg-final { scan-assembler-times "ldclra\t" 16} } */
-/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */
-/* { dg-final { scan-assembler-times "ldclral\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclr\t" 16} */
+/* { dg-final { scan-assembler-times "ldclra\t" 32} } */
+/* { dg-final { scan-assembler-times "ldclrl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclral\t" 32} } */
/* Load-XOR. */
-/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeorab\t" 8} } */
-/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeoralb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeoralb\t" 16} } */
-/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeorah\t" 8} } */
-/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeoralh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeoralh\t" 16} } */
-/* { dg-final { scan-assembler-times "ldeor\t" 8} */
-/* { dg-final { scan-assembler-times "ldeora\t" 16} } */
-/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */
-/* { dg-final { scan-assembler-times "ldeoral\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeor\t" 16} */
+/* { dg-final { scan-assembler-times "ldeora\t" 32} } */
+/* { dg-final { scan-assembler-times "ldeorl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeoral\t" 32} } */
/* { dg-final { scan-assembler-not "ldaxr\t" } } */
/* { dg-final { scan-assembler-not "stlxr\t" } } */