[AArch64][SVE2] Support for EOR3 and variants of BSL
authorYuliang Wang <yuliang.wang@arm.com>
Thu, 17 Oct 2019 13:23:52 +0000 (13:23 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Thu, 17 Oct 2019 13:23:52 +0000 (13:23 +0000)
2019-10-17  Yuliang Wang  <yuliang.wang@arm.com>

gcc/
* config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>)
(aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>)
(aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>)
(aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>):
New combine patterns.
* config/aarch64/iterators.md (BSL_DUP): New int iterator for the
above.
(bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above.

gcc/testsuite/
* gcc.target/aarch64/sve2/eor3_1.c: New test.
* gcc.target/aarch64/sve2/nlogic_1.c: As above.
* gcc.target/aarch64/sve2/nlogic_2.c: As above.
* gcc.target/aarch64/sve2/bitsel_1.c: As above.
* gcc.target/aarch64/sve2/bitsel_2.c: As above.
* gcc.target/aarch64/sve2/bitsel_3.c: As above.
* gcc.target/aarch64/sve2/bitsel_4.c: As above.

From-SVN: r277110

gcc/ChangeLog
gcc/config/aarch64/aarch64-sve2.md
gcc/config/aarch64/iterators.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c [new file with mode: 0644]

index efbc5c6a8a177779959b21704ed261da633f12f0..7eb331d60f5747fedbe8733a4eff43ecd115ed02 100644 (file)
@@ -1,3 +1,14 @@
+2019-10-17  Yuliang Wang  <yuliang.wang@arm.com>
+
+       * config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>)
+       (aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>)
+       (aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>)
+       (aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>):
+       New combine patterns.
+       * config/aarch64/iterators.md (BSL_DUP): New int iterator for the
+       above.
+       (bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above.
+
 2019-10-17  Aldy Hernandez  <aldyh@redhat.com>
 
        * tree-vrp.c (value_range_base::dump): Display +INF for both
index b018f5b0bc9b51edf831e2571f0f5a9af2210829..ecbee9733f038b4b96d2da65086b414ff67f985a 100644 (file)
   }
 )
 
+;; Unpredicated 3-way exclusive OR.
+(define_insn "*aarch64_sve2_eor3<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
+       (xor:SVE_I
+         (xor:SVE_I
+           (match_operand:SVE_I 1 "register_operand" "0, w, w, w")
+           (match_operand:SVE_I 2 "register_operand" "w, 0, w, w"))
+         (match_operand:SVE_I 3 "register_operand" "w, w, 0, w")))]
+  "TARGET_SVE2"
+  "@
+  eor3\t%0.d, %0.d, %2.d, %3.d
+  eor3\t%0.d, %0.d, %1.d, %3.d
+  eor3\t%0.d, %0.d, %1.d, %2.d
+  movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d"
+  [(set_attr "movprfx" "*,*,*,yes")]
+)
+
+;; Use NBSL for vector NOR.
+(define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
+         [(match_operand 3)
+          (and:SVE_I
+            (not:SVE_I
+              (match_operand:SVE_I 1 "register_operand" "%0, w"))
+            (not:SVE_I
+              (match_operand:SVE_I 2 "register_operand" "w, w")))]
+         UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "@
+  nbsl\t%0.d, %0.d, %2.d, %0.d
+  movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Use NBSL for vector NAND.
+(define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
+         [(match_operand 3)
+          (ior:SVE_I
+            (not:SVE_I
+              (match_operand:SVE_I 1 "register_operand" "%0, w"))
+            (not:SVE_I
+              (match_operand:SVE_I 2 "register_operand" "w, w")))]
+         UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "@
+  nbsl\t%0.d, %0.d, %2.d, %2.d
+  movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select.
+;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_insn "*aarch64_sve2_bsl<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (xor:SVE_I
+         (and:SVE_I
+           (xor:SVE_I
+             (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+             (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
+           (match_operand:SVE_I 3 "register_operand" "w, w"))
+         (match_dup BSL_DUP)))]
+  "TARGET_SVE2"
+  "@
+  bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
+  movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise inverted select.
+;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
+(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
+         [(match_operand 4)
+          (not:SVE_I
+            (xor:SVE_I
+              (and:SVE_I
+                (xor:SVE_I
+                  (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+                  (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
+                (match_operand:SVE_I 3 "register_operand" "w, w"))
+              (match_dup BSL_DUP)))]
+         UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "@
+  nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
+  movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select with inverted first operand.
+;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (xor:SVE_I
+         (and:SVE_I
+           (unspec:SVE_I
+             [(match_operand 4)
+              (not:SVE_I
+                (xor:SVE_I
+                  (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+                  (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")))]
+             UNSPEC_PRED_X)
+           (match_operand:SVE_I 3 "register_operand" "w, w"))
+         (match_dup BSL_DUP)))]
+  "TARGET_SVE2"
+  "@
+  bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
+  movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select with inverted second operand.
+;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (ior:SVE_I
+         (and:SVE_I
+           (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+           (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
+         (unspec:SVE_I
+           [(match_operand 4)
+            (and:SVE_I
+              (not:SVE_I
+                (match_operand:SVE_I 3 "register_operand" "w, w"))
+              (not:SVE_I
+                (match_dup BSL_DUP)))]
+           UNSPEC_PRED_X)))]
+  "TARGET_SVE2"
+  "@
+  bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
+  movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select with inverted second operand, alternative form.
+;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (ior:SVE_I
+         (and:SVE_I
+           (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+           (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
+         (unspec:SVE_I
+           [(match_operand 4)
+            (and:SVE_I
+              (not:SVE_I
+                (match_dup BSL_DUP))
+              (not:SVE_I
+                (match_operand:SVE_I 3 "register_operand" "w, w")))]
+           UNSPEC_PRED_X)))]
+  "TARGET_SVE2"
+  "@
+  bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
+  movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
index 1e321af710bfe80606eedee7e0d191f36c70355b..f879fadb007a23749a523edbe7fe247dee33fa94 100644 (file)
 
 (define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT])
 
+(define_int_iterator BSL_DUP [1 2])
+
 (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
 
 (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
                         (UNSPEC_RADDHN2 "add")
                         (UNSPEC_RSUBHN2 "sub")])
 
+;; BSL variants: first commutative operand.
+(define_int_attr bsl_1st [(1 "w") (2 "0")])
+
+;; BSL variants: second commutative operand.
+(define_int_attr bsl_2nd [(1 "0") (2 "w")])
+
+;; BSL variants: duplicated input operand.
+(define_int_attr bsl_dup [(1 "1") (2 "2")])
+
+;; BSL variants: operand which requires preserving via movprfx.
+(define_int_attr bsl_mov [(1 "2") (2 "1")])
+
 (define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "")
                           (UNSPEC_SSRI "offset_")
                           (UNSPEC_USRI "offset_")])
index abd9dd1c2dd1b5740180659f86acf8cec3aa99ae..5ea4a731243918e36a485a60eb647e4e68d54754 100644 (file)
@@ -1,3 +1,13 @@
+2019-10-17  Yuliang Wang  <yuliang.wang@arm.com>
+
+       * gcc.target/aarch64/sve2/eor3_1.c: New test.
+       * gcc.target/aarch64/sve2/nlogic_1.c: As above.
+       * gcc.target/aarch64/sve2/nlogic_2.c: As above.
+       * gcc.target/aarch64/sve2/bitsel_1.c: As above.
+       * gcc.target/aarch64/sve2/bitsel_2.c: As above.
+       * gcc.target/aarch64/sve2/bitsel_3.c: As above.
+       * gcc.target/aarch64/sve2/bitsel_4.c: As above.
+
 2019-10-17  Aldy Hernandez  <aldyh@redhat.com>
 
        * gcc.dg/tree-ssa/evrp4.c: Check for +INF instead of -1.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c
new file mode 100644 (file)
index 0000000..629f741
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef OP
+#define OP(x,y,z) (((x) & (z)) | ((y) & ~(z)))
+#endif
+
+#define TYPE(N) int##N##_t
+
+#define TEMPLATE(SIZE)                                         \
+void __attribute__ ((noinline, noclone))                       \
+f_##SIZE##_##OP                                                        \
+  (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b,             \
+   TYPE(SIZE) *restrict c, TYPE(SIZE) *restrict d, int n)      \
+{                                                              \
+  for (int i = 0; i < n; i++)                                  \
+    a[i] = OP (b[i], c[i], d[i]);                              \
+}
+
+TEMPLATE (8);
+TEMPLATE (16);
+TEMPLATE (32);
+TEMPLATE (64);
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c
new file mode 100644 (file)
index 0000000..ee2d4a3
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y,z) (~(((x) & (z)) | ((y) & ~(z))))
+
+#include "bitsel_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c
new file mode 100644 (file)
index 0000000..d0dc713
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y,z) ((~(x) & (z)) | ((y) & ~(z)))
+
+#include "bitsel_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tbic\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tbsl1n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c
new file mode 100644 (file)
index 0000000..5eb71c9
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y,z) (((x) & (z)) | (~(y) & ~(z)))
+
+#include "bitsel_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tbsl2n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c
new file mode 100644 (file)
index 0000000..13df93e
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y,z) ((x) ^ (y) ^ (z))
+
+#include "bitsel_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\teor3\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c
new file mode 100644 (file)
index 0000000..de34b6d
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef OP
+#define OP(x,y) (~((x) | (y)))
+#endif
+
+#define TYPE(N) int##N##_t
+
+#define TEMPLATE(SIZE)                                 \
+void __attribute__ ((noinline, noclone))               \
+f_##SIZE##_##OP                                                \
+  (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b,     \
+   TYPE(SIZE) *restrict c, int n)                      \
+{                                                      \
+  for (int i = 0; i < n; i++)                          \
+    a[i] = OP (b[i], c[i]);                            \
+}
+
+TEMPLATE (8);
+TEMPLATE (16);
+TEMPLATE (32);
+TEMPLATE (64);
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c
new file mode 100644 (file)
index 0000000..14400b5
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y) (~((x) & (y)))
+
+#include "nlogic_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */