From 17f47f8621099fdc4b0fc6681924f97230c8a114 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Fri, 2 Jun 2017 14:41:52 +0000 Subject: [PATCH] [AArch64] Emit tighter strong atomic compare-exchange loop when comparing against zero * config/aarch64/aarch64.c (aarch64_split_compare_and_swap): Emit CBNZ inside loop when doing a strong exchange and comparing against zero. Generate the CC flags after the loop. * gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c: New test. From-SVN: r248832 --- gcc/ChangeLog | 6 +++ gcc/config/aarch64/aarch64.c | 41 ++++++++++++++++--- gcc/testsuite/ChangeLog | 4 ++ .../atomic_cmp_exchange_zero_strong_1.c | 12 ++++++ 4 files changed, 57 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3d945d61bc1..a941ffdf75c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2017-06-02 Kyrylo Tkachov + + * config/aarch64/aarch64.c (aarch64_split_compare_and_swap): + Emit CBNZ inside loop when doing a strong exchange and comparing + against zero. Generate the CC flags after the loop. + 2017-06-02 David Edelsohn * dwarf2out.c (DWARF_INITIAL_LENGTH_SIZE_STR): New. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 5e546bf9091..7064f050a10 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -12111,6 +12111,17 @@ aarch64_split_compare_and_swap (rtx operands[]) mode = GET_MODE (mem); model = memmodel_from_int (INTVAL (model_rtx)); + /* When OLDVAL is zero and we want the strong version we can emit a tighter + loop: + .label1: + LD[A]XR rval, [mem] + CBNZ rval, .label2 + ST[L]XR scratch, newval, [mem] + CBNZ scratch, .label1 + .label2: + CMP rval, 0. */ + bool strong_zero_p = !is_weak && oldval == const0_rtx; + label1 = NULL; if (!is_weak) { @@ -12127,11 +12138,21 @@ aarch64_split_compare_and_swap (rtx operands[]) else aarch64_emit_load_exclusive (mode, rval, mem, model_rtx); - cond = aarch64_gen_compare_reg (NE, rval, oldval); - x = gen_rtx_NE (VOIDmode, cond, const0_rtx); - x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, - gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); - aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + if (strong_zero_p) + { + x = gen_rtx_NE (VOIDmode, rval, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + } + else + { + cond = aarch64_gen_compare_reg (NE, rval, oldval); + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + } aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx); @@ -12150,7 +12171,15 @@ aarch64_split_compare_and_swap (rtx operands[]) } emit_label (label2); - + /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL + to set the condition flags. If this is not used it will be removed by + later passes. */ + if (strong_zero_p) + { + cond = gen_rtx_REG (CCmode, CC_REGNUM); + x = gen_rtx_COMPARE (CCmode, rval, const0_rtx); + emit_insn (gen_rtx_SET (cond, x)); + } /* Emit any final barrier needed for a __sync operation. */ if (is_mm_sync (model)) aarch64_emit_post_barrier (model); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 611a851a99b..006b992733a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-06-02 Kyrylo Tkachov + + * gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c: New test. + 2017-06-02 Will Schmidt * gcc.target/powerpc/fold-vec-abs-char.c: New. diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c new file mode 100644 index 00000000000..b14a7c29437 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (int *a) +{ + int x = 0; + return __atomic_compare_exchange_n (a, &x, 4, 0, + __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); +} + +/* { dg-final { scan-assembler-times "cbnz\\tw\[0-9\]+" 2 } } */ -- 2.30.2