[AArch64] Use atomic load-operate instructions for update-fetch patterns.
authorMatthew Wahab <matthew.wahab@arm.com>
Tue, 22 Sep 2015 09:41:15 +0000 (09:41 +0000)
committerMatthew Wahab <mwahab@gcc.gnu.org>
Tue, 22 Sep 2015 09:41:15 +0000 (09:41 +0000)
2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>

* config/aarch64/aarch64-protos.h (aarch64_gen_atomic_ldop):
Adjust declaration.
* config/aarch64/aarch64.c (aarch64_emit_bic): New.
(aarch64_gen_atomic_ldop): Adjust comment.  Add parameter
out_result.  Update to support update-fetch operations.
* config/aarch64/atomics.md (aarch64_atomic_exchange<mode>_lse):
Adjust for change to aarch64_gen_atomic_ldop.
(aarch64_atomic_<atomic_optab><mode>_lse): Likewise.
(aarch64_atomic_fetch_<atomic_optab><mode>_lse): Likewise.
(atomic_<atomic_optab>_fetch<mode>): Change to an expander.
(aarch64_atomic_<atomic_optab>_fetch<mode>): New.
(aarch64_atomic_<atomic_optab>_fetch<mode>_lse): New.

gcc/testsuite
2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>

* gcc.target/aarch64/atomic-inst-ldadd.c: Add tests for
update-fetch operations.
* gcc.target/aarch64/atomic-inst-ldlogic.c: Likewise.

From-SVN: r228002

gcc/ChangeLog
gcc/config/aarch64/aarch64-protos.h
gcc/config/aarch64/aarch64.c
gcc/config/aarch64/atomics.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c

index ad6ec7f03535a53bfb98512ecd10bd3e731667e1..50a950ec3f02a2f79d7e172a91b9eb861d235639 100644 (file)
@@ -1,3 +1,18 @@
+2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>
+
+       * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_ldop):
+       Adjust declaration.
+       * config/aarch64/aarch64.c (aarch64_emit_bic): New.
+       (aarch64_gen_atomic_ldop): Adjust comment.  Add parameter
+       out_result.  Update to support update-fetch operations.
+       * config/aarch64/atomics.md (aarch64_atomic_exchange<mode>_lse):
+       Adjust for change to aarch64_gen_atomic_ldop.
+       (aarch64_atomic_<atomic_optab><mode>_lse): Likewise.
+       (aarch64_atomic_fetch_<atomic_optab><mode>_lse): Likewise.
+       (atomic_<atomic_optab>_fetch<mode>): Change to an expander.
+       (aarch64_atomic_<atomic_optab>_fetch<mode>): New.
+       (aarch64_atomic_<atomic_optab>_fetch<mode>_lse): New.
+
 2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>
 
        * config/aarch64/aarch64-protos.h
index 76ebd6f37b75477f75356fd424c74ad42e126c19..dd8ebcca16718b54ca0f32208d074162f0f021a6 100644 (file)
@@ -380,7 +380,7 @@ void aarch64_split_compare_and_swap (rtx op[]);
 void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
 
 bool aarch64_atomic_ldop_supported_p (enum rtx_code);
-void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
+void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
 void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
 
 bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
index 93e36b76a29807bf84e8985d494b54d28a01531d..3c8c058edda824799cc844350d973634566e8485 100644 (file)
@@ -11018,6 +11018,25 @@ aarch64_split_compare_and_swap (rtx operands[])
     aarch64_emit_post_barrier (model);
 }
 
+/* Emit a BIC instruction.  */
+
+static void
+aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
+{
+  rtx shift_rtx = GEN_INT (shift);
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
+    case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (dst, s2, shift_rtx, s1));
+}
+
 /* Emit an atomic swap.  */
 
 static void
@@ -11112,13 +11131,14 @@ aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
 }
 
 /* Emit an atomic load+operate.  CODE is the operation.  OUT_DATA is the
-   location to store the data read from memory.  MEM is the memory location to
-   read and modify.  MODEL_RTX is the memory ordering to use.  VALUE is the
-   second operand for the operation.  Either OUT_DATA or OUT_RESULT, but not
-   both, can be NULL.  */
+   location to store the data read from memory.  OUT_RESULT is the location to
+   store the result of the operation.  MEM is the memory location to read and
+   modify.  MODEL_RTX is the memory ordering to use.  VALUE is the second
+   operand for the operation.  Either OUT_DATA or OUT_RESULT, but not both, can
+   be NULL.  */
 
 void
-aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
+aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
                         rtx mem, rtx value, rtx model_rtx)
 {
   machine_mode mode = GET_MODE (mem);
@@ -11131,12 +11151,15 @@ aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
   if (out_data)
     out_data = gen_lowpart (mode, out_data);
 
+  if (out_result)
+    out_result = gen_lowpart (mode, out_result);
+
   /* Make sure the value is in a register, putting it into a destination
      register if it needs to be manipulated.  */
   if (!register_operand (value, mode)
       || code == AND || code == MINUS)
     {
-      src = out_data;
+      src = out_result ? out_result : out_data;
       emit_move_insn (src, gen_lowpart (mode, value));
     }
   else
@@ -11202,6 +11225,43 @@ aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
     }
 
   aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
+
+  /* If necessary, calculate the data in memory after the update by redoing the
+     operation from values in registers.  */
+  if (!out_result)
+    return;
+
+  if (short_mode)
+    {
+      src = gen_lowpart (wmode, src);
+      out_data = gen_lowpart (wmode, out_data);
+      out_result = gen_lowpart (wmode, out_result);
+    }
+
+  x = NULL_RTX;
+
+  switch (code)
+    {
+    case MINUS:
+    case PLUS:
+      x = gen_rtx_PLUS (wmode, out_data, src);
+      break;
+    case IOR:
+      x = gen_rtx_IOR (wmode, out_data, src);
+      break;
+    case XOR:
+      x = gen_rtx_XOR (wmode, out_data, src);
+      break;
+    case AND:
+      aarch64_emit_bic (wmode, out_result, out_data, src, 0);
+      return;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_set_insn (out_result, x);
+
+  return;
 }
 
 /* Split an atomic operation.  */
index e0d885668f117e870763c8fa1583bbe5fb52e13d..e7ac5f6fc1c78f77375ec8ccce6d2409470d90b1 100644 (file)
   "&& reload_completed"
   [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (SET, operands[0], operands[1],
+    aarch64_gen_atomic_ldop (SET, operands[0], NULL, operands[1],
                             operands[2], operands[3]);
     DONE;
   }
   "&& reload_completed"
   [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[3], operands[0],
+    aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
                             operands[1], operands[2]);
     DONE;
   }
   "&& reload_completed"
   [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[0], operands[1],
+    aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
                             operands[2], operands[3]);
     DONE;
   }
   }
 )
 
-(define_insn_and_split "atomic_<atomic_optab>_fetch<mode>"
+;; Load-operate-store, returning the original memory data.
+
+(define_expand "atomic_<atomic_optab>_fetch<mode>"
+ [(match_operand:ALLI 0 "register_operand" "")
+  (atomic_op:ALLI
+   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
+   (match_operand:ALLI 2 "<atomic_op_operand>" ""))
+  (match_operand:SI 3 "const_int_operand")]
+ ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  rtx value = operands[2];
+
+  /* Use an atomic load-operate instruction when possible.  */
+  if (aarch64_atomic_ldop_supported_p (<CODE>))
+    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
+  else
+    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
+
+  emit_insn (gen (operands[0], operands[1], value, operands[3]));
+
+  DONE;
+})
+
+(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=&r")
     (atomic_op:ALLI
       (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
   }
 )
 
+(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (atomic_op:ALLI
+     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
+     (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(match_dup 1)
+       (match_dup 2)
+       (match_operand:SI 3 "const_int_operand")]
+      UNSPECV_ATOMIC_LDOP))
+     (clobber (match_scratch:ALLI 4 "=r"))]
+  "TARGET_LSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
+                            operands[2], operands[3]);
+    DONE;
+  }
+)
+
 (define_insn_and_split "atomic_nand_fetch<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=&r")
     (not:ALLI
index 4c5364a3900e7d84b1b9365c0c37d84949fb47a7..9f0686b96e224f7c06216279a0690ebc6c07017f 100644 (file)
@@ -1,3 +1,9 @@
+2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>
+
+       * gcc.target/aarch64/atomic-inst-ldadd.c: Add tests for
+       update-fetch operations.
+       * gcc.target/aarch64/atomic-inst-ldlogic.c: Likewise.
+
 2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>
 
        * gcc.target/aarch64/atomic-inst-ldadd.c: New.
index c21d2eda6c0b5b236e7db27c1496fa98365c5b8a..4b2282c6861f01c96c2a0e02a40849bbef429a64 100644 (file)
     __atomic_fetch_sub (val, foo, MODEL);                              \
   }
 
+#define ADD_LOAD(FN, TY, MODEL)                                                \
+  TY FNNAME (FN, TY) (TY* val, TY* foo)                                        \
+  {                                                                    \
+    return __atomic_add_fetch (val, foo, MODEL);                       \
+  }
+
+#define ADD_LOAD_NORETURN(FN, TY, MODEL)                               \
+  void FNNAME (FN, TY) (TY* val, TY* foo)                              \
+  {                                                                    \
+    __atomic_add_fetch (val, foo, MODEL);                              \
+  }
+
+#define SUB_LOAD(FN, TY, MODEL)                                                \
+  TY FNNAME (FN, TY) (TY* val, TY* foo)                                        \
+  {                                                                    \
+    return __atomic_sub_fetch (val, foo, MODEL);                       \
+  }
+
+#define SUB_LOAD_NORETURN(FN, TY, MODEL)                               \
+  void FNNAME (FN, TY) (TY* val, TY* foo)                              \
+  {                                                                    \
+    __atomic_sub_fetch (val, foo, MODEL);                              \
+  }
 
 TEST (load_add, LOAD_ADD)
 TEST (load_add_notreturn, LOAD_ADD_NORETURN)
@@ -38,20 +61,26 @@ TEST (load_add_notreturn, LOAD_ADD_NORETURN)
 TEST (load_sub, LOAD_SUB)
 TEST (load_sub_notreturn, LOAD_SUB_NORETURN)
 
-/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddab\t" 16} } */
-/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddalb\t" 16} } */
+TEST (add_load, ADD_LOAD)
+TEST (add_load_notreturn, ADD_LOAD_NORETURN)
+
+TEST (sub_load, SUB_LOAD)
+TEST (sub_load_notreturn, SUB_LOAD_NORETURN)
+
+/* { dg-final { scan-assembler-times "ldaddb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddab\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddlb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddalb\t" 32} } */
 
-/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddah\t" 16} } */
-/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddalh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddah\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddlh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddalh\t" 32} } */
 
-/* { dg-final { scan-assembler-times "ldadd\t" 16} } */
-/* { dg-final { scan-assembler-times "ldadda\t" 32} } */
-/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */
-/* { dg-final { scan-assembler-times "ldaddal\t" 32} } */
+/* { dg-final { scan-assembler-times "ldadd\t" 32} } */
+/* { dg-final { scan-assembler-times "ldadda\t" 64} } */
+/* { dg-final { scan-assembler-times "ldaddl\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddal\t" 64} } */
 
 /* { dg-final { scan-assembler-not "ldaxr\t" } } */
 /* { dg-final { scan-assembler-not "stlxr\t" } } */
index fd0f484cdabd4db0d9afa1b185a3c098e72fe4ca..4879d52b9b4f88fb32209cd995fff7f5b7e46ce7 100644 (file)
     __atomic_fetch_xor (val, foo, MODEL);                              \
   }
 
+#define OR_LOAD(FN, TY, MODEL)                                         \
+  TY FNNAME (FN, TY) (TY* val, TY* foo)                                        \
+  {                                                                    \
+    return __atomic_or_fetch (val, foo, MODEL);                                \
+  }
+
+#define OR_LOAD_NORETURN(FN, TY, MODEL)                                        \
+  void FNNAME (FN, TY) (TY* val, TY* foo)                              \
+  {                                                                    \
+    __atomic_or_fetch (val, foo, MODEL);                               \
+  }
+
+#define AND_LOAD(FN, TY, MODEL)                                                \
+  TY FNNAME (FN, TY) (TY* val, TY* foo)                                        \
+  {                                                                    \
+    return __atomic_and_fetch (val, foo, MODEL);                       \
+  }
+
+#define AND_LOAD_NORETURN(FN, TY, MODEL)                               \
+  void FNNAME (FN, TY) (TY* val, TY* foo)                              \
+  {                                                                    \
+    __atomic_and_fetch (val, foo, MODEL);                              \
+  }
+
+#define XOR_LOAD(FN, TY, MODEL)                                                \
+  TY FNNAME (FN, TY) (TY* val, TY* foo)                                        \
+  {                                                                    \
+    return __atomic_xor_fetch (val, foo, MODEL);                       \
+  }
+
+#define XOR_LOAD_NORETURN(FN, TY, MODEL)                               \
+  void FNNAME (FN, TY) (TY* val, TY* foo)                              \
+  {                                                                    \
+    __atomic_xor_fetch (val, foo, MODEL);                              \
+  }
+
 
 TEST (load_or, LOAD_OR)
 TEST (load_or_notreturn, LOAD_OR_NORETURN)
@@ -53,56 +89,66 @@ TEST (load_and_notreturn, LOAD_AND_NORETURN)
 TEST (load_xor, LOAD_XOR)
 TEST (load_xor_notreturn, LOAD_XOR_NORETURN)
 
+TEST (or_load, OR_LOAD)
+TEST (or_load_notreturn, OR_LOAD_NORETURN)
+
+TEST (and_load, AND_LOAD)
+TEST (and_load_notreturn, AND_LOAD_NORETURN)
+
+TEST (xor_load, XOR_LOAD)
+TEST (xor_load_notreturn, XOR_LOAD_NORETURN)
+
+
 /* Load-OR.  */
 
-/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetab\t" 8} } */
-/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetalb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetalb\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldseth\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetah\t" 8} } */
-/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetalh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldseth\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetalh\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldset\t" 8} } */
-/* { dg-final { scan-assembler-times "ldseta\t" 16} } */
-/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */
-/* { dg-final { scan-assembler-times "ldsetal\t" 16} } */
+/* { dg-final { scan-assembler-times "ldset\t" 16} } */
+/* { dg-final { scan-assembler-times "ldseta\t" 32} } */
+/* { dg-final { scan-assembler-times "ldsetl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetal\t" 32} } */
 
 /* Load-AND.  */
 
-/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclrab\t" 8} } */
-/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclralb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclralb\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclrah\t" 8} } */
-/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclralh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclralh\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldclr\t" 8} */
-/* { dg-final { scan-assembler-times "ldclra\t" 16} } */
-/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */
-/* { dg-final { scan-assembler-times "ldclral\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclr\t" 16} */
+/* { dg-final { scan-assembler-times "ldclra\t" 32} } */
+/* { dg-final { scan-assembler-times "ldclrl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclral\t" 32} } */
 
 /* Load-XOR.  */
 
-/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeorab\t" 8} } */
-/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeoralb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeoralb\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeorah\t" 8} } */
-/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeoralh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeoralh\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldeor\t" 8} */
-/* { dg-final { scan-assembler-times "ldeora\t" 16} } */
-/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */
-/* { dg-final { scan-assembler-times "ldeoral\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeor\t" 16} */
+/* { dg-final { scan-assembler-times "ldeora\t" 32} } */
+/* { dg-final { scan-assembler-times "ldeorl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeoral\t" 32} } */
 
 /* { dg-final { scan-assembler-not "ldaxr\t" } } */
 /* { dg-final { scan-assembler-not "stlxr\t" } } */