From 7803ec5ee2a547043fb6708a08ddb1361ba91202 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 31 Oct 2018 09:58:48 +0000 Subject: [PATCH] aarch64: Improve atomic-op lse generation Fix constraints; avoid unnecessary split. Drop the use of the atomic_op iterator in favor of the ATOMIC_LDOP iterator; this is simplier and more logical for ldclr aka bic. * config/aarch64/aarch64.c (aarch64_emit_bic): Remove. (aarch64_atomic_ldop_supported_p): Remove. (aarch64_gen_atomic_ldop): Remove. * config/aarch64/atomic.md (atomic_): Fully expand LSE operations here. (atomic_fetch_): Likewise. (atomic__fetch): Likewise. (aarch64_atomic__lse): Drop atomic_op iterator and use ATOMIC_LDOP instead; use register_operand for the input; drop the split and emit insns directly. (aarch64_atomic_fetch__lse): Likewise. (aarch64_atomic__fetch_lse): Remove. (@aarch64_atomic_load): Remove. From-SVN: r265660 --- gcc/ChangeLog | 14 ++ gcc/config/aarch64/aarch64-protos.h | 2 - gcc/config/aarch64/aarch64.c | 176 ------------------------- gcc/config/aarch64/atomics.md | 197 +++++++++++++++------------- gcc/config/aarch64/iterators.md | 5 +- 5 files changed, 122 insertions(+), 272 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a3f9048cb88..bec71246a8c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,19 @@ 2018-10-31 Richard Henderson + * config/aarch64/aarch64.c (aarch64_emit_bic): Remove. + (aarch64_atomic_ldop_supported_p): Remove. + (aarch64_gen_atomic_ldop): Remove. + * config/aarch64/atomic.md (atomic_): + Fully expand LSE operations here. + (atomic_fetch_): Likewise. + (atomic__fetch): Likewise. + (aarch64_atomic__lse): Drop atomic_op iterator + and use ATOMIC_LDOP instead; use register_operand for the input; + drop the split and emit insns directly. + (aarch64_atomic_fetch__lse): Likewise. + (aarch64_atomic__fetch_lse): Remove. + (@aarch64_atomic_load): Remove. + * config/aarch64/aarch64.c (aarch64_emit_atomic_swap): Remove. (aarch64_gen_atomic_ldop): Don't call it. * config/aarch64/atomics.md (atomic_exchange): diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index f662533c108..288efe9fc67 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -564,8 +564,6 @@ rtx aarch64_load_tp (rtx); void aarch64_expand_compare_and_swap (rtx op[]); void aarch64_split_compare_and_swap (rtx op[]); -bool aarch64_atomic_ldop_supported_p (enum rtx_code); -void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx); void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index e9829ab7539..e646cced7d2 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14663,32 +14663,6 @@ aarch64_expand_compare_and_swap (rtx operands[]) emit_insn (gen_rtx_SET (bval, x)); } -/* Test whether the target supports using a atomic load-operate instruction. - CODE is the operation and AFTER is TRUE if the data in memory after the - operation should be returned and FALSE if the data before the operation - should be returned. Returns FALSE if the operation isn't supported by the - architecture. */ - -bool -aarch64_atomic_ldop_supported_p (enum rtx_code code) -{ - if (!TARGET_LSE) - return false; - - switch (code) - { - case SET: - case AND: - case IOR: - case XOR: - case MINUS: - case PLUS: - return true; - default: - return false; - } -} - /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a sequence implementing an atomic operation. */ @@ -14821,156 +14795,6 @@ aarch64_split_compare_and_swap (rtx operands[]) aarch64_emit_post_barrier (model); } -/* Emit a BIC instruction. */ - -static void -aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift) -{ - rtx shift_rtx = GEN_INT (shift); - rtx (*gen) (rtx, rtx, rtx, rtx); - - switch (mode) - { - case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break; - case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break; - default: - gcc_unreachable (); - } - - emit_insn (gen (dst, s2, shift_rtx, s1)); -} - -/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the - location to store the data read from memory. OUT_RESULT is the location to - store the result of the operation. MEM is the memory location to read and - modify. MODEL_RTX is the memory ordering to use. VALUE is the second - operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can - be NULL. */ - -void -aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result, - rtx mem, rtx value, rtx model_rtx) -{ - machine_mode mode = GET_MODE (mem); - machine_mode wmode = (mode == DImode ? DImode : SImode); - const bool short_mode = (mode < SImode); - int ldop_code; - rtx src; - rtx x; - - if (out_data) - out_data = gen_lowpart (mode, out_data); - - if (out_result) - out_result = gen_lowpart (mode, out_result); - - /* Make sure the value is in a register, putting it into a destination - register if it needs to be manipulated. */ - if (!register_operand (value, mode) - || code == AND || code == MINUS) - { - src = out_result ? out_result : out_data; - emit_move_insn (src, gen_lowpart (mode, value)); - } - else - src = value; - gcc_assert (register_operand (src, mode)); - - /* Preprocess the data for the operation as necessary. If the operation is - a SET then emit a swap instruction and finish. */ - switch (code) - { - case MINUS: - /* Negate the value and treat it as a PLUS. */ - { - rtx neg_src; - - /* Resize the value if necessary. */ - if (short_mode) - src = gen_lowpart (wmode, src); - - neg_src = gen_rtx_NEG (wmode, src); - emit_insn (gen_rtx_SET (src, neg_src)); - - if (short_mode) - src = gen_lowpart (mode, src); - } - /* Fall-through. */ - case PLUS: - ldop_code = UNSPECV_ATOMIC_LDOP_PLUS; - break; - - case IOR: - ldop_code = UNSPECV_ATOMIC_LDOP_OR; - break; - - case XOR: - ldop_code = UNSPECV_ATOMIC_LDOP_XOR; - break; - - case AND: - { - rtx not_src; - - /* Resize the value if necessary. */ - if (short_mode) - src = gen_lowpart (wmode, src); - - not_src = gen_rtx_NOT (wmode, src); - emit_insn (gen_rtx_SET (src, not_src)); - - if (short_mode) - src = gen_lowpart (mode, src); - } - ldop_code = UNSPECV_ATOMIC_LDOP_BIC; - break; - - default: - /* The operation can't be done with atomic instructions. */ - gcc_unreachable (); - } - - emit_insn (gen_aarch64_atomic_load (ldop_code, mode, - out_data, mem, src, model_rtx)); - - /* If necessary, calculate the data in memory after the update by redoing the - operation from values in registers. */ - if (!out_result) - return; - - if (short_mode) - { - src = gen_lowpart (wmode, src); - out_data = gen_lowpart (wmode, out_data); - out_result = gen_lowpart (wmode, out_result); - } - - x = NULL_RTX; - - switch (code) - { - case MINUS: - case PLUS: - x = gen_rtx_PLUS (wmode, out_data, src); - break; - case IOR: - x = gen_rtx_IOR (wmode, out_data, src); - break; - case XOR: - x = gen_rtx_XOR (wmode, out_data, src); - break; - case AND: - aarch64_emit_bic (wmode, out_result, out_data, src, 0); - return; - default: - gcc_unreachable (); - } - - emit_set_insn (out_result, x); - - return; -} - /* Split an atomic operation. */ void diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index bc9e396dc96..2198649b1be 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -207,13 +207,37 @@ rtx (*gen) (rtx, rtx, rtx); /* Use an atomic load-operate instruction when possible. */ - if (aarch64_atomic_ldop_supported_p ()) - gen = gen_aarch64_atomic__lse; + if (TARGET_LSE) + { + switch () + { + case MINUS: + operands[1] = expand_simple_unop (mode, NEG, operands[1], + NULL, 1); + /* fallthru */ + case PLUS: + gen = gen_aarch64_atomic_add_lse; + break; + case IOR: + gen = gen_aarch64_atomic_ior_lse; + break; + case XOR: + gen = gen_aarch64_atomic_xor_lse; + break; + case AND: + operands[1] = expand_simple_unop (mode, NOT, operands[1], + NULL, 1); + gen = gen_aarch64_atomic_bic_lse; + break; + default: + gcc_unreachable (); + } + operands[1] = force_reg (mode, operands[1]); + } else gen = gen_aarch64_atomic_; emit_insn (gen (operands[0], operands[1], operands[2])); - DONE; } ) @@ -239,22 +263,25 @@ } ) -(define_insn_and_split "aarch64_atomic__lse" +(define_insn "aarch64_atomic__lse" [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") - (unspec_volatile:ALLI - [(atomic_op:ALLI (match_dup 0) - (match_operand:ALLI 1 "" "r")) - (match_operand:SI 2 "const_int_operand")] - UNSPECV_ATOMIC_OP)) + (unspec_volatile:ALLI + [(match_dup 0) + (match_operand:ALLI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand")] + ATOMIC_LDOP)) (clobber (match_scratch:ALLI 3 "=&r"))] "TARGET_LSE" - "#" - "&& reload_completed" - [(const_int 0)] { - aarch64_gen_atomic_ldop (, operands[3], NULL, operands[0], - operands[1], operands[2]); - DONE; + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); + if (is_mm_relaxed (model)) + return "ld\t%1, %3, %0"; + else if (is_mm_release (model)) + return "ldl\t%1, %3, %0"; + else if (is_mm_acquire (model) || is_mm_consume (model)) + return "lda\t%1, %3, %0"; + else + return "ldal\t%1, %3, %0"; } ) @@ -280,7 +307,7 @@ } ) -;; Load-operate-store, returning the updated memory data. +;; Load-operate-store, returning the original memory data. (define_expand "atomic_fetch_" [(match_operand:ALLI 0 "register_operand" "") @@ -293,13 +320,37 @@ rtx (*gen) (rtx, rtx, rtx, rtx); /* Use an atomic load-operate instruction when possible. */ - if (aarch64_atomic_ldop_supported_p ()) - gen = gen_aarch64_atomic_fetch__lse; + if (TARGET_LSE) + { + switch () + { + case MINUS: + operands[2] = expand_simple_unop (mode, NEG, operands[2], + NULL, 1); + /* fallthru */ + case PLUS: + gen = gen_aarch64_atomic_fetch_add_lse; + break; + case IOR: + gen = gen_aarch64_atomic_fetch_ior_lse; + break; + case XOR: + gen = gen_aarch64_atomic_fetch_xor_lse; + break; + case AND: + operands[2] = expand_simple_unop (mode, NOT, operands[2], + NULL, 1); + gen = gen_aarch64_atomic_fetch_bic_lse; + break; + default: + gcc_unreachable (); + } + operands[2] = force_reg (mode, operands[2]); + } else gen = gen_aarch64_atomic_fetch_; emit_insn (gen (operands[0], operands[1], operands[2], operands[3])); - DONE; }) @@ -326,23 +377,26 @@ } ) -(define_insn_and_split "aarch64_atomic_fetch__lse" - [(set (match_operand:ALLI 0 "register_operand" "=&r") - (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) +(define_insn "aarch64_atomic_fetch__lse" + [(set (match_operand:ALLI 0 "register_operand" "=r") + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) (set (match_dup 1) - (unspec_volatile:ALLI - [(atomic_op:ALLI (match_dup 1) - (match_operand:ALLI 2 "" "r")) - (match_operand:SI 3 "const_int_operand")] - UNSPECV_ATOMIC_LDOP))] + (unspec_volatile:ALLI + [(match_dup 1) + (match_operand:ALLI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand")] + ATOMIC_LDOP))] "TARGET_LSE" - "#" - "&& reload_completed" - [(const_int 0)] { - aarch64_gen_atomic_ldop (, operands[0], NULL, operands[1], - operands[2], operands[3]); - DONE; + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model)) + return "ld\t%2, %0, %1"; + else if (is_mm_acquire (model) || is_mm_consume (model)) + return "lda\t%2, %0, %1"; + else if (is_mm_release (model)) + return "ldl\t%2, %0, %1"; + else + return "ldal\t%2, %0, %1"; } ) @@ -370,7 +424,7 @@ } ) -;; Load-operate-store, returning the original memory data. +;; Load-operate-store, returning the updated memory data. (define_expand "atomic__fetch" [(match_operand:ALLI 0 "register_operand" "") @@ -380,17 +434,23 @@ (match_operand:SI 3 "const_int_operand")] "" { - rtx (*gen) (rtx, rtx, rtx, rtx); - rtx value = operands[2]; - - /* Use an atomic load-operate instruction when possible. */ - if (aarch64_atomic_ldop_supported_p ()) - gen = gen_aarch64_atomic__fetch_lse; + /* Use an atomic load-operate instruction when possible. In this case + we will re-compute the result from the original mem value. */ + if (TARGET_LSE) + { + rtx tmp = gen_reg_rtx (mode); + operands[2] = force_reg (mode, operands[2]); + emit_insn (gen_atomic_fetch_ + (tmp, operands[1], operands[2], operands[3])); + tmp = expand_simple_binop (mode, , tmp, operands[2], + operands[0], 1, OPTAB_WIDEN); + emit_move_insn (operands[0], tmp); + } else - gen = gen_aarch64_atomic__fetch; - - emit_insn (gen (operands[0], operands[1], value, operands[3])); - + { + emit_insn (gen_aarch64_atomic__fetch + (operands[0], operands[1], operands[2], operands[3])); + } DONE; }) @@ -417,29 +477,6 @@ } ) -(define_insn_and_split "aarch64_atomic__fetch_lse" - [(set (match_operand:ALLI 0 "register_operand" "=&r") - (atomic_op:ALLI - (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") - (match_operand:ALLI 2 "" "r"))) - (set (match_dup 1) - (unspec_volatile:ALLI - [(match_dup 1) - (match_dup 2) - (match_operand:SI 3 "const_int_operand")] - UNSPECV_ATOMIC_LDOP)) - (clobber (match_scratch:ALLI 4 "=&r"))] - "TARGET_LSE" - "#" - "&& reload_completed" - [(const_int 0)] - { - aarch64_gen_atomic_ldop (, operands[4], operands[0], operands[1], - operands[2], operands[3]); - DONE; - } -) - (define_insn_and_split "atomic_nand_fetch" [(set (match_operand:ALLI 0 "register_operand" "=&r") (not:ALLI @@ -585,29 +622,3 @@ return "dmb\\tish"; } ) - -;; ARMv8.1-A LSE instructions. - -;; Atomic load-op: Load data, operate, store result, keep data. - -(define_insn "@aarch64_atomic_load" - [(set (match_operand:ALLI 0 "register_operand" "=r") - (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) - (set (match_dup 1) - (unspec_volatile:ALLI - [(match_dup 1) - (match_operand:ALLI 2 "register_operand") - (match_operand:SI 3 "const_int_operand")] - ATOMIC_LDOP))] - "TARGET_LSE && reload_completed" - { - enum memmodel model = memmodel_from_int (INTVAL (operands[3])); - if (is_mm_relaxed (model)) - return "ld\t%2, %0, %1"; - else if (is_mm_acquire (model) || is_mm_consume (model)) - return "lda\t%2, %0, %1"; - else if (is_mm_release (model)) - return "ldl\t%2, %0, %1"; - else - return "ldal\t%2, %0, %1"; - }) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index a43956054e8..524e4e6929b 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -503,7 +503,6 @@ UNSPECV_ATOMIC_CAS ; Represent an atomic CAS. UNSPECV_ATOMIC_SWP ; Represent an atomic SWP. UNSPECV_ATOMIC_OP ; Represent an atomic operation. - UNSPECV_ATOMIC_LDOP ; Represent an atomic load-operation UNSPECV_ATOMIC_LDOP_OR ; Represent an atomic load-or UNSPECV_ATOMIC_LDOP_BIC ; Represent an atomic load-bic UNSPECV_ATOMIC_LDOP_XOR ; Represent an atomic load-xor @@ -1591,6 +1590,10 @@ [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr") (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")]) +(define_int_attr atomic_ldoptab + [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic") + (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")]) + ;; ------------------------------------------------------------------- ;; Int Iterators Attributes. ;; ------------------------------------------------------------------- -- 2.30.2