From 68729b062d576417d74f9b807e2d9e8f659d2d06 Mon Sep 17 00:00:00 2001 From: Matthew Wahab Date: Tue, 22 Sep 2015 09:41:15 +0000 Subject: [PATCH] [AArch64] Use atomic load-operate instructions for update-fetch patterns. 2015-09-22 Matthew Wahab * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_ldop): Adjust declaration. * config/aarch64/aarch64.c (aarch64_emit_bic): New. (aarch64_gen_atomic_ldop): Adjust comment. Add parameter out_result. Update to support update-fetch operations. * config/aarch64/atomics.md (aarch64_atomic_exchange_lse): Adjust for change to aarch64_gen_atomic_ldop. (aarch64_atomic__lse): Likewise. (aarch64_atomic_fetch__lse): Likewise. (atomic__fetch): Change to an expander. (aarch64_atomic__fetch): New. (aarch64_atomic__fetch_lse): New. gcc/testsuite 2015-09-22 Matthew Wahab * gcc.target/aarch64/atomic-inst-ldadd.c: Add tests for update-fetch operations. * gcc.target/aarch64/atomic-inst-ldlogic.c: Likewise. From-SVN: r228002 --- gcc/ChangeLog | 15 +++ gcc/config/aarch64/aarch64-protos.h | 2 +- gcc/config/aarch64/aarch64.c | 72 ++++++++++- gcc/config/aarch64/atomics.md | 55 +++++++- gcc/testsuite/ChangeLog | 6 + .../gcc.target/aarch64/atomic-inst-ldadd.c | 53 ++++++-- .../gcc.target/aarch64/atomic-inst-ldlogic.c | 118 ++++++++++++------ 7 files changed, 262 insertions(+), 59 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ad6ec7f0353..50a950ec3f0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2015-09-22 Matthew Wahab + + * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_ldop): + Adjust declaration. + * config/aarch64/aarch64.c (aarch64_emit_bic): New. + (aarch64_gen_atomic_ldop): Adjust comment. Add parameter + out_result. Update to support update-fetch operations. + * config/aarch64/atomics.md (aarch64_atomic_exchange_lse): + Adjust for change to aarch64_gen_atomic_ldop. + (aarch64_atomic__lse): Likewise. + (aarch64_atomic_fetch__lse): Likewise. + (atomic__fetch): Change to an expander. + (aarch64_atomic__fetch): New. + (aarch64_atomic__fetch_lse): New. + 2015-09-22 Matthew Wahab * config/aarch64/aarch64-protos.h diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 76ebd6f37b7..dd8ebcca167 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -380,7 +380,7 @@ void aarch64_split_compare_and_swap (rtx op[]); void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx); bool aarch64_atomic_ldop_supported_p (enum rtx_code); -void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx); +void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx); void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 93e36b76a29..3c8c058edda 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -11018,6 +11018,25 @@ aarch64_split_compare_and_swap (rtx operands[]) aarch64_emit_post_barrier (model); } +/* Emit a BIC instruction. */ + +static void +aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift) +{ + rtx shift_rtx = GEN_INT (shift); + rtx (*gen) (rtx, rtx, rtx, rtx); + + switch (mode) + { + case SImode: gen = gen_and_one_cmpl_lshrsi3; break; + case DImode: gen = gen_and_one_cmpl_lshrdi3; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, s2, shift_rtx, s1)); +} + /* Emit an atomic swap. */ static void @@ -11112,13 +11131,14 @@ aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code, } /* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the - location to store the data read from memory. MEM is the memory location to - read and modify. MODEL_RTX is the memory ordering to use. VALUE is the - second operand for the operation. Either OUT_DATA or OUT_RESULT, but not - both, can be NULL. */ + location to store the data read from memory. OUT_RESULT is the location to + store the result of the operation. MEM is the memory location to read and + modify. MODEL_RTX is the memory ordering to use. VALUE is the second + operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can + be NULL. */ void -aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, +aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result, rtx mem, rtx value, rtx model_rtx) { machine_mode mode = GET_MODE (mem); @@ -11131,12 +11151,15 @@ aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, if (out_data) out_data = gen_lowpart (mode, out_data); + if (out_result) + out_result = gen_lowpart (mode, out_result); + /* Make sure the value is in a register, putting it into a destination register if it needs to be manipulated. */ if (!register_operand (value, mode) || code == AND || code == MINUS) { - src = out_data; + src = out_result ? out_result : out_data; emit_move_insn (src, gen_lowpart (mode, value)); } else @@ -11202,6 +11225,43 @@ aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, } aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx); + + /* If necessary, calculate the data in memory after the update by redoing the + operation from values in registers. */ + if (!out_result) + return; + + if (short_mode) + { + src = gen_lowpart (wmode, src); + out_data = gen_lowpart (wmode, out_data); + out_result = gen_lowpart (wmode, out_result); + } + + x = NULL_RTX; + + switch (code) + { + case MINUS: + case PLUS: + x = gen_rtx_PLUS (wmode, out_data, src); + break; + case IOR: + x = gen_rtx_IOR (wmode, out_data, src); + break; + case XOR: + x = gen_rtx_XOR (wmode, out_data, src); + break; + case AND: + aarch64_emit_bic (wmode, out_result, out_data, src, 0); + return; + default: + gcc_unreachable (); + } + + emit_set_insn (out_result, x); + + return; } /* Split an atomic operation. */ diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index e0d885668f1..e7ac5f6fc1c 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -219,7 +219,7 @@ "&& reload_completed" [(const_int 0)] { - aarch64_gen_atomic_ldop (SET, operands[0], operands[1], + aarch64_gen_atomic_ldop (SET, operands[0], NULL, operands[1], operands[2], operands[3]); DONE; } @@ -280,7 +280,7 @@ "&& reload_completed" [(const_int 0)] { - aarch64_gen_atomic_ldop (, operands[3], operands[0], + aarch64_gen_atomic_ldop (, operands[3], NULL, operands[0], operands[1], operands[2]); DONE; } @@ -368,7 +368,7 @@ "&& reload_completed" [(const_int 0)] { - aarch64_gen_atomic_ldop (, operands[0], operands[1], + aarch64_gen_atomic_ldop (, operands[0], NULL, operands[1], operands[2], operands[3]); DONE; } @@ -398,7 +398,31 @@ } ) -(define_insn_and_split "atomic__fetch" +;; Load-operate-store, returning the original memory data. + +(define_expand "atomic__fetch" + [(match_operand:ALLI 0 "register_operand" "") + (atomic_op:ALLI + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") + (match_operand:ALLI 2 "" "")) + (match_operand:SI 3 "const_int_operand")] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + rtx value = operands[2]; + + /* Use an atomic load-operate instruction when possible. */ + if (aarch64_atomic_ldop_supported_p ()) + gen = gen_aarch64_atomic__fetch_lse; + else + gen = gen_aarch64_atomic__fetch; + + emit_insn (gen (operands[0], operands[1], value, operands[3])); + + DONE; +}) + +(define_insn_and_split "aarch64_atomic__fetch" [(set (match_operand:ALLI 0 "register_operand" "=&r") (atomic_op:ALLI (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") @@ -421,6 +445,29 @@ } ) +(define_insn_and_split "aarch64_atomic__fetch_lse" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (atomic_op:ALLI + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") + (match_operand:ALLI 2 "" "r"))) + (set (match_dup 1) + (unspec_volatile:ALLI + [(match_dup 1) + (match_dup 2) + (match_operand:SI 3 "const_int_operand")] + UNSPECV_ATOMIC_LDOP)) + (clobber (match_scratch:ALLI 4 "=r"))] + "TARGET_LSE" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_gen_atomic_ldop (, operands[4], operands[0], operands[1], + operands[2], operands[3]); + DONE; + } +) + (define_insn_and_split "atomic_nand_fetch" [(set (match_operand:ALLI 0 "register_operand" "=&r") (not:ALLI diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4c5364a3900..9f0686b96e2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-09-22 Matthew Wahab + + * gcc.target/aarch64/atomic-inst-ldadd.c: Add tests for + update-fetch operations. + * gcc.target/aarch64/atomic-inst-ldlogic.c: Likewise. + 2015-09-22 Matthew Wahab * gcc.target/aarch64/atomic-inst-ldadd.c: New. diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c index c21d2eda6c0..4b2282c6861 100644 --- a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c +++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c @@ -31,6 +31,29 @@ __atomic_fetch_sub (val, foo, MODEL); \ } +#define ADD_LOAD(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_add_fetch (val, foo, MODEL); \ + } + +#define ADD_LOAD_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_add_fetch (val, foo, MODEL); \ + } + +#define SUB_LOAD(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_sub_fetch (val, foo, MODEL); \ + } + +#define SUB_LOAD_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_sub_fetch (val, foo, MODEL); \ + } TEST (load_add, LOAD_ADD) TEST (load_add_notreturn, LOAD_ADD_NORETURN) @@ -38,20 +61,26 @@ TEST (load_add_notreturn, LOAD_ADD_NORETURN) TEST (load_sub, LOAD_SUB) TEST (load_sub_notreturn, LOAD_SUB_NORETURN) -/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */ -/* { dg-final { scan-assembler-times "ldaddab\t" 16} } */ -/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */ -/* { dg-final { scan-assembler-times "ldaddalb\t" 16} } */ +TEST (add_load, ADD_LOAD) +TEST (add_load_notreturn, ADD_LOAD_NORETURN) + +TEST (sub_load, SUB_LOAD) +TEST (sub_load_notreturn, SUB_LOAD_NORETURN) + +/* { dg-final { scan-assembler-times "ldaddb\t" 16} } */ +/* { dg-final { scan-assembler-times "ldaddab\t" 32} } */ +/* { dg-final { scan-assembler-times "ldaddlb\t" 16} } */ +/* { dg-final { scan-assembler-times "ldaddalb\t" 32} } */ -/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */ -/* { dg-final { scan-assembler-times "ldaddah\t" 16} } */ -/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */ -/* { dg-final { scan-assembler-times "ldaddalh\t" 16} } */ +/* { dg-final { scan-assembler-times "ldaddh\t" 16} } */ +/* { dg-final { scan-assembler-times "ldaddah\t" 32} } */ +/* { dg-final { scan-assembler-times "ldaddlh\t" 16} } */ +/* { dg-final { scan-assembler-times "ldaddalh\t" 32} } */ -/* { dg-final { scan-assembler-times "ldadd\t" 16} } */ -/* { dg-final { scan-assembler-times "ldadda\t" 32} } */ -/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */ -/* { dg-final { scan-assembler-times "ldaddal\t" 32} } */ +/* { dg-final { scan-assembler-times "ldadd\t" 32} } */ +/* { dg-final { scan-assembler-times "ldadda\t" 64} } */ +/* { dg-final { scan-assembler-times "ldaddl\t" 32} } */ +/* { dg-final { scan-assembler-times "ldaddal\t" 64} } */ /* { dg-final { scan-assembler-not "ldaxr\t" } } */ /* { dg-final { scan-assembler-not "stlxr\t" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c index fd0f484cdab..4879d52b9b4 100644 --- a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c +++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c @@ -43,6 +43,42 @@ __atomic_fetch_xor (val, foo, MODEL); \ } +#define OR_LOAD(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_or_fetch (val, foo, MODEL); \ + } + +#define OR_LOAD_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_or_fetch (val, foo, MODEL); \ + } + +#define AND_LOAD(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_and_fetch (val, foo, MODEL); \ + } + +#define AND_LOAD_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_and_fetch (val, foo, MODEL); \ + } + +#define XOR_LOAD(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_xor_fetch (val, foo, MODEL); \ + } + +#define XOR_LOAD_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_xor_fetch (val, foo, MODEL); \ + } + TEST (load_or, LOAD_OR) TEST (load_or_notreturn, LOAD_OR_NORETURN) @@ -53,56 +89,66 @@ TEST (load_and_notreturn, LOAD_AND_NORETURN) TEST (load_xor, LOAD_XOR) TEST (load_xor_notreturn, LOAD_XOR_NORETURN) +TEST (or_load, OR_LOAD) +TEST (or_load_notreturn, OR_LOAD_NORETURN) + +TEST (and_load, AND_LOAD) +TEST (and_load_notreturn, AND_LOAD_NORETURN) + +TEST (xor_load, XOR_LOAD) +TEST (xor_load_notreturn, XOR_LOAD_NORETURN) + + /* Load-OR. */ -/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */ -/* { dg-final { scan-assembler-times "ldsetab\t" 8} } */ -/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */ -/* { dg-final { scan-assembler-times "ldsetalb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldsetb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldsetab\t" 16} } */ +/* { dg-final { scan-assembler-times "ldsetlb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldsetalb\t" 16} } */ -/* { dg-final { scan-assembler-times "ldseth\t" 4} } */ -/* { dg-final { scan-assembler-times "ldsetah\t" 8} } */ -/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */ -/* { dg-final { scan-assembler-times "ldsetalh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldseth\t" 8} } */ +/* { dg-final { scan-assembler-times "ldsetah\t" 16} } */ +/* { dg-final { scan-assembler-times "ldsetlh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldsetalh\t" 16} } */ -/* { dg-final { scan-assembler-times "ldset\t" 8} } */ -/* { dg-final { scan-assembler-times "ldseta\t" 16} } */ -/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */ -/* { dg-final { scan-assembler-times "ldsetal\t" 16} } */ +/* { dg-final { scan-assembler-times "ldset\t" 16} } */ +/* { dg-final { scan-assembler-times "ldseta\t" 32} } */ +/* { dg-final { scan-assembler-times "ldsetl\t" 16} } */ +/* { dg-final { scan-assembler-times "ldsetal\t" 32} } */ /* Load-AND. */ -/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */ -/* { dg-final { scan-assembler-times "ldclrab\t" 8} } */ -/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */ -/* { dg-final { scan-assembler-times "ldclralb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldclrb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldclrab\t" 16} } */ +/* { dg-final { scan-assembler-times "ldclrlb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldclralb\t" 16} } */ -/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */ -/* { dg-final { scan-assembler-times "ldclrah\t" 8} } */ -/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */ -/* { dg-final { scan-assembler-times "ldclralh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldclrh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldclrah\t" 16} } */ +/* { dg-final { scan-assembler-times "ldclrlh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldclralh\t" 16} } */ -/* { dg-final { scan-assembler-times "ldclr\t" 8} */ -/* { dg-final { scan-assembler-times "ldclra\t" 16} } */ -/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */ -/* { dg-final { scan-assembler-times "ldclral\t" 16} } */ +/* { dg-final { scan-assembler-times "ldclr\t" 16} */ +/* { dg-final { scan-assembler-times "ldclra\t" 32} } */ +/* { dg-final { scan-assembler-times "ldclrl\t" 16} } */ +/* { dg-final { scan-assembler-times "ldclral\t" 32} } */ /* Load-XOR. */ -/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */ -/* { dg-final { scan-assembler-times "ldeorab\t" 8} } */ -/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */ -/* { dg-final { scan-assembler-times "ldeoralb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldeorb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldeorab\t" 16} } */ +/* { dg-final { scan-assembler-times "ldeorlb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldeoralb\t" 16} } */ -/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */ -/* { dg-final { scan-assembler-times "ldeorah\t" 8} } */ -/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */ -/* { dg-final { scan-assembler-times "ldeoralh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldeorh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldeorah\t" 16} } */ +/* { dg-final { scan-assembler-times "ldeorlh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldeoralh\t" 16} } */ -/* { dg-final { scan-assembler-times "ldeor\t" 8} */ -/* { dg-final { scan-assembler-times "ldeora\t" 16} } */ -/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */ -/* { dg-final { scan-assembler-times "ldeoral\t" 16} } */ +/* { dg-final { scan-assembler-times "ldeor\t" 16} */ +/* { dg-final { scan-assembler-times "ldeora\t" 32} } */ +/* { dg-final { scan-assembler-times "ldeorl\t" 16} } */ +/* { dg-final { scan-assembler-times "ldeoral\t" 32} } */ /* { dg-final { scan-assembler-not "ldaxr\t" } } */ /* { dg-final { scan-assembler-not "stlxr\t" } } */ -- 2.30.2