[AArch64] Add support for 64-bit vector-mode ldp/stp
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 20 Oct 2015 17:18:24 +0000 (17:18 +0000)
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>
Tue, 20 Oct 2015 17:18:24 +0000 (17:18 +0000)
* config/aarch64/aarch64.c (aarch64_mode_valid_for_sched_fusion_p):
New function.
(fusion_load_store): Use it.
* config/aarch64/aarch64-ldpstp.md: Add new peephole2s for
ldp and stp in VD modes.
* config/aarch64/aarch64-simd.md (load_pair<mode>, VD): New pattern.
(store_pair<mode>, VD): Likewise.

* gcc.target/aarch64/stp_vec_64_1.c: New test.
* gcc.target/aarch64/ldp_vec_64_1.c: Likewise.

From-SVN: r229094

gcc/ChangeLog
gcc/config/aarch64/aarch64-ldpstp.md
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/aarch64.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/ldp_vec_64_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/stp_vec_64_1.c [new file with mode: 0644]

index 9ad52a8262e8b5365a13f654aec4add22a8da278..65ef0afce421d2439bf884c0aff53baf33c960b3 100644 (file)
@@ -1,3 +1,13 @@
+2015-10-20  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * config/aarch64/aarch64.c (aarch64_mode_valid_for_sched_fusion_p):
+       New function.
+       (fusion_load_store): Use it.
+       * config/aarch64/aarch64-ldpstp.md: Add new peephole2s for
+       ldp and stp in VD modes.
+       * config/aarch64/aarch64-simd.md (load_pair<mode>, VD): New pattern.
+       (store_pair<mode>, VD): Likewise.
+
 2015-10-20  Vladimir Makarov  <vmakarov@redhat.com>
 
        PR rtl-optimization/67609
index 8d6d88259d5931a38b152791bc1a7f5e2681b2f5..458829ce51d7fef912c839b229abbfc802d9fc7a 100644 (file)
     }
 })
 
+(define_peephole2
+  [(set (match_operand:VD 0 "register_operand" "")
+       (match_operand:VD 1 "aarch64_mem_pair_operand" ""))
+   (set (match_operand:VD 2 "register_operand" "")
+       (match_operand:VD 3 "memory_operand" ""))]
+  "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+             (set (match_dup 2) (match_dup 3))])]
+{
+  rtx base, offset_1, offset_2;
+
+  extract_base_offset_in_addr (operands[1], &base, &offset_1);
+  extract_base_offset_in_addr (operands[3], &base, &offset_2);
+  if (INTVAL (offset_1) > INTVAL (offset_2))
+    {
+      std::swap (operands[0], operands[2]);
+      std::swap (operands[1], operands[3]);
+    }
+})
+
+(define_peephole2
+  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "")
+       (match_operand:VD 1 "register_operand" ""))
+   (set (match_operand:VD 2 "memory_operand" "")
+       (match_operand:VD 3 "register_operand" ""))]
+  "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+             (set (match_dup 2) (match_dup 3))])]
+{
+  rtx base, offset_1, offset_2;
+
+  extract_base_offset_in_addr (operands[0], &base, &offset_1);
+  extract_base_offset_in_addr (operands[2], &base, &offset_2);
+  if (INTVAL (offset_1) > INTVAL (offset_2))
+    {
+      std::swap (operands[0], operands[2]);
+      std::swap (operands[1], operands[3]);
+    }
+})
+
+
 ;; Handle sign/zero extended consecutive load/store.
 
 (define_peephole2
index cf1ff6d7da346e545f86c27b2fa1ecfd86cf757b..65a2b6fad64f9210e10c45da7089dfca24e69e71 100644 (file)
    (set_attr "length" "4,4,4,8,8,8,4")]
 )
 
+(define_insn "load_pair<mode>"
+  [(set (match_operand:VD 0 "register_operand" "=w")
+       (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:VD 2 "register_operand" "=w")
+       (match_operand:VD 3 "memory_operand" "m"))]
+  "TARGET_SIMD
+   && rtx_equal_p (XEXP (operands[3], 0),
+                  plus_constant (Pmode,
+                                 XEXP (operands[1], 0),
+                                 GET_MODE_SIZE (<MODE>mode)))"
+  "ldp\\t%d0, %d2, %1"
+  [(set_attr "type" "neon_ldp")]
+)
+
+(define_insn "store_pair<mode>"
+  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
+       (match_operand:VD 1 "register_operand" "w"))
+   (set (match_operand:VD 2 "memory_operand" "=m")
+       (match_operand:VD 3 "register_operand" "w"))]
+  "TARGET_SIMD
+   && rtx_equal_p (XEXP (operands[2], 0),
+                  plus_constant (Pmode,
+                                 XEXP (operands[0], 0),
+                                 GET_MODE_SIZE (<MODE>mode)))"
+  "stp\\t%d1, %d3, %0"
+  [(set_attr "type" "neon_stp")]
+)
+
 (define_split
   [(set (match_operand:VQ 0 "register_operand" "")
       (match_operand:VQ 1 "register_operand" ""))]
index 6197a367a7ab7f6a09dfe31e5cc05077ed6efef0..47404e95ea7b1dc9a56e9ccc5a63393d544b0695 100644 (file)
@@ -3468,6 +3468,18 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
          && offset % GET_MODE_SIZE (mode) == 0);
 }
 
+/* Return true if MODE is one of the modes for which we
+   support LDP/STP operations.  */
+
+static bool
+aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
+{
+  return mode == SImode || mode == DImode
+        || mode == SFmode || mode == DFmode
+        || (aarch64_vector_mode_supported_p (mode)
+            && GET_MODE_SIZE (mode) == 8);
+}
+
 /* Return true if X is a valid address for machine mode MODE.  If it is,
    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
@@ -12813,8 +12825,9 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
   src = SET_SRC (x);
   dest = SET_DEST (x);
 
-  if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
-      && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
+  machine_mode dest_mode = GET_MODE (dest);
+
+  if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
     return SCHED_FUSION_NONE;
 
   if (GET_CODE (src) == SIGN_EXTEND)
index 62f71b1e1e57bb71cf1dc94fe5c15adbba483340..924d5dab9fbbd675731bddeb57de4885952bc764 100644 (file)
@@ -1,3 +1,8 @@
+2015-10-20  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * gcc.target/aarch64/stp_vec_64_1.c: New test.
+       * gcc.target/aarch64/ldp_vec_64_1.c: Likewise.
+
 2015-10-20  Alan Lawrence  <alan.lawrence@arm.com>
 
        * lib/target-supports.exp (check_effective_target_vect64): Add AArch64.
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_vec_64_1.c b/gcc/testsuite/gcc.target/aarch64/ldp_vec_64_1.c
new file mode 100644 (file)
index 0000000..62213f3
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
+
+void
+foo (int32x2_t *foo, int32x2_t *bar)
+{
+  int i = 0;
+  int32x2_t val = { 3, 2 };
+
+  for (i = 0; i < 1024; i+=2)
+    foo[i] = bar[i] + bar[i + 1];
+}
+
+/* { dg-final { scan-assembler "ldp\td\[0-9\]+, d\[0-9\]" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/stp_vec_64_1.c b/gcc/testsuite/gcc.target/aarch64/stp_vec_64_1.c
new file mode 100644 (file)
index 0000000..11e757a
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+
+typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
+
+void
+bar (int32x2_t *foo)
+{
+  int i = 0;
+  int32x2_t val = { 3, 2 };
+
+  for (i = 0; i < 256; i+=2)
+    {
+      foo[i] = val;
+      foo[i+1] = val;
+    }
+}
+
+/* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]" } } */