From: Yuliang Wang Date: Thu, 17 Oct 2019 13:23:52 +0000 (+0000) Subject: [AArch64][SVE2] Support for EOR3 and variants of BSL X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2d57b12e2acd52b843adbcd6d5909cb0b9f7196b;p=gcc.git [AArch64][SVE2] Support for EOR3 and variants of BSL 2019-10-17 Yuliang Wang gcc/ * config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3) (aarch64_sve2_nor, aarch64_sve2_nand) (aarch64_sve2_bsl, aarch64_sve2_nbsl) (aarch64_sve2_bsl1n, aarch64_sve2_bsl2n): New combine patterns. * config/aarch64/iterators.md (BSL_DUP): New int iterator for the above. (bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above. gcc/testsuite/ * gcc.target/aarch64/sve2/eor3_1.c: New test. * gcc.target/aarch64/sve2/nlogic_1.c: As above. * gcc.target/aarch64/sve2/nlogic_2.c: As above. * gcc.target/aarch64/sve2/bitsel_1.c: As above. * gcc.target/aarch64/sve2/bitsel_2.c: As above. * gcc.target/aarch64/sve2/bitsel_3.c: As above. * gcc.target/aarch64/sve2/bitsel_4.c: As above. From-SVN: r277110 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index efbc5c6a8a1..7eb331d60f5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2019-10-17 Yuliang Wang + + * config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3) + (aarch64_sve2_nor, aarch64_sve2_nand) + (aarch64_sve2_bsl, aarch64_sve2_nbsl) + (aarch64_sve2_bsl1n, aarch64_sve2_bsl2n): + New combine patterns. + * config/aarch64/iterators.md (BSL_DUP): New int iterator for the + above. + (bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above. + 2019-10-17 Aldy Hernandez * tree-vrp.c (value_range_base::dump): Display +INF for both diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index b018f5b0bc9..ecbee9733f0 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -142,3 +142,187 @@ } ) +;; Unpredicated 3-way exclusive OR. +(define_insn "*aarch64_sve2_eor3" + [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w") + (xor:SVE_I + (xor:SVE_I + (match_operand:SVE_I 1 "register_operand" "0, w, w, w") + (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")) + (match_operand:SVE_I 3 "register_operand" "w, w, 0, w")))] + "TARGET_SVE2" + "@ + eor3\t%0.d, %0.d, %2.d, %3.d + eor3\t%0.d, %0.d, %1.d, %3.d + eor3\t%0.d, %0.d, %1.d, %2.d + movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d" + [(set_attr "movprfx" "*,*,*,yes")] +) + +;; Use NBSL for vector NOR. +(define_insn_and_rewrite "*aarch64_sve2_nor" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I + [(match_operand 3) + (and:SVE_I + (not:SVE_I + (match_operand:SVE_I 1 "register_operand" "%0, w")) + (not:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w")))] + UNSPEC_PRED_X))] + "TARGET_SVE2" + "@ + nbsl\t%0.d, %0.d, %2.d, %0.d + movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d" + "&& !CONSTANT_P (operands[3])" + { + operands[3] = CONSTM1_RTX (mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Use NBSL for vector NAND. +(define_insn_and_rewrite "*aarch64_sve2_nand" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I + [(match_operand 3) + (ior:SVE_I + (not:SVE_I + (match_operand:SVE_I 1 "register_operand" "%0, w")) + (not:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w")))] + UNSPEC_PRED_X))] + "TARGET_SVE2" + "@ + nbsl\t%0.d, %0.d, %2.d, %2.d + movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d" + "&& !CONSTANT_P (operands[3])" + { + operands[3] = CONSTM1_RTX (mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise select. +;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) +(define_insn "*aarch64_sve2_bsl" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (xor:SVE_I + (and:SVE_I + (xor:SVE_I + (match_operand:SVE_I 1 "register_operand" ", w") + (match_operand:SVE_I 2 "register_operand" ", w")) + (match_operand:SVE_I 3 "register_operand" "w, w")) + (match_dup BSL_DUP)))] + "TARGET_SVE2" + "@ + bsl\t%0.d, %0.d, %.d, %3.d + movprfx\t%0, %\;bsl\t%0.d, %0.d, %.d, %3.d" + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise inverted select. +;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)) +(define_insn_and_rewrite "*aarch64_sve2_nbsl" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I + [(match_operand 4) + (not:SVE_I + (xor:SVE_I + (and:SVE_I + (xor:SVE_I + (match_operand:SVE_I 1 "register_operand" ", w") + (match_operand:SVE_I 2 "register_operand" ", w")) + (match_operand:SVE_I 3 "register_operand" "w, w")) + (match_dup BSL_DUP)))] + UNSPEC_PRED_X))] + "TARGET_SVE2" + "@ + nbsl\t%0.d, %0.d, %.d, %3.d + movprfx\t%0, %\;nbsl\t%0.d, %0.d, %.d, %3.d" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise select with inverted first operand. +;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) +(define_insn_and_rewrite "*aarch64_sve2_bsl1n" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (xor:SVE_I + (and:SVE_I + (unspec:SVE_I + [(match_operand 4) + (not:SVE_I + (xor:SVE_I + (match_operand:SVE_I 1 "register_operand" ", w") + (match_operand:SVE_I 2 "register_operand" ", w")))] + UNSPEC_PRED_X) + (match_operand:SVE_I 3 "register_operand" "w, w")) + (match_dup BSL_DUP)))] + "TARGET_SVE2" + "@ + bsl1n\t%0.d, %0.d, %.d, %3.d + movprfx\t%0, %\;bsl1n\t%0.d, %0.d, %.d, %3.d" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise select with inverted second operand. +;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup)) +(define_insn_and_rewrite "*aarch64_sve2_bsl2n" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (ior:SVE_I + (and:SVE_I + (match_operand:SVE_I 1 "register_operand" ", w") + (match_operand:SVE_I 2 "register_operand" ", w")) + (unspec:SVE_I + [(match_operand 4) + (and:SVE_I + (not:SVE_I + (match_operand:SVE_I 3 "register_operand" "w, w")) + (not:SVE_I + (match_dup BSL_DUP)))] + UNSPEC_PRED_X)))] + "TARGET_SVE2" + "@ + bsl2n\t%0.d, %0.d, %3.d, %.d + movprfx\t%0, %\;bsl2n\t%0.d, %0.d, %3.d, %.d" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise select with inverted second operand, alternative form. +;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3)) +(define_insn_and_rewrite "*aarch64_sve2_bsl2n" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (ior:SVE_I + (and:SVE_I + (match_operand:SVE_I 1 "register_operand" ", w") + (match_operand:SVE_I 2 "register_operand" ", w")) + (unspec:SVE_I + [(match_operand 4) + (and:SVE_I + (not:SVE_I + (match_dup BSL_DUP)) + (not:SVE_I + (match_operand:SVE_I 3 "register_operand" "w, w")))] + UNSPEC_PRED_X)))] + "TARGET_SVE2" + "@ + bsl2n\t%0.d, %0.d, %3.d, %.d + movprfx\t%0, %\;bsl2n\t%0.d, %0.d, %3.d, %.d" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (mode); + } + [(set_attr "movprfx" "*,yes")] +) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 1e321af710b..f879fadb007 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1611,6 +1611,8 @@ (define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT]) +(define_int_iterator BSL_DUP [1 2]) + (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN @@ -1976,6 +1978,18 @@ (UNSPEC_RADDHN2 "add") (UNSPEC_RSUBHN2 "sub")]) +;; BSL variants: first commutative operand. +(define_int_attr bsl_1st [(1 "w") (2 "0")]) + +;; BSL variants: second commutative operand. +(define_int_attr bsl_2nd [(1 "0") (2 "w")]) + +;; BSL variants: duplicated input operand. +(define_int_attr bsl_dup [(1 "1") (2 "2")]) + +;; BSL variants: operand which requires preserving via movprfx. +(define_int_attr bsl_mov [(1 "2") (2 "1")]) + (define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "") (UNSPEC_SSRI "offset_") (UNSPEC_USRI "offset_")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index abd9dd1c2dd..5ea4a731243 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2019-10-17 Yuliang Wang + + * gcc.target/aarch64/sve2/eor3_1.c: New test. + * gcc.target/aarch64/sve2/nlogic_1.c: As above. + * gcc.target/aarch64/sve2/nlogic_2.c: As above. + * gcc.target/aarch64/sve2/bitsel_1.c: As above. + * gcc.target/aarch64/sve2/bitsel_2.c: As above. + * gcc.target/aarch64/sve2/bitsel_3.c: As above. + * gcc.target/aarch64/sve2/bitsel_4.c: As above. + 2019-10-17 Aldy Hernandez * gcc.dg/tree-ssa/evrp4.c: Check for +INF instead of -1. diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c new file mode 100644 index 00000000000..629f74167a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#include + +#ifndef OP +#define OP(x,y,z) (((x) & (z)) | ((y) & ~(z))) +#endif + +#define TYPE(N) int##N##_t + +#define TEMPLATE(SIZE) \ +void __attribute__ ((noinline, noclone)) \ +f_##SIZE##_##OP \ + (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \ + TYPE(SIZE) *restrict c, TYPE(SIZE) *restrict d, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + a[i] = OP (b[i], c[i], d[i]); \ +} + +TEMPLATE (8); +TEMPLATE (16); +TEMPLATE (32); +TEMPLATE (64); + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c new file mode 100644 index 00000000000..ee2d4a35a1d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y,z) (~(((x) & (z)) | ((y) & ~(z)))) + +#include "bitsel_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c new file mode 100644 index 00000000000..d0dc713d92c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y,z) ((~(x) & (z)) | ((y) & ~(z))) + +#include "bitsel_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tbic\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tbsl1n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c new file mode 100644 index 00000000000..5eb71c93ae7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y,z) (((x) & (z)) | (~(y) & ~(z))) + +#include "bitsel_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tbsl2n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c new file mode 100644 index 00000000000..13df93e56b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y,z) ((x) ^ (y) ^ (z)) + +#include "bitsel_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\teor3\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c new file mode 100644 index 00000000000..de34b6d817a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#include + +#ifndef OP +#define OP(x,y) (~((x) | (y))) +#endif + +#define TYPE(N) int##N##_t + +#define TEMPLATE(SIZE) \ +void __attribute__ ((noinline, noclone)) \ +f_##SIZE##_##OP \ + (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \ + TYPE(SIZE) *restrict c, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + a[i] = OP (b[i], c[i]); \ +} + +TEMPLATE (8); +TEMPLATE (16); +TEMPLATE (32); +TEMPLATE (64); + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c new file mode 100644 index 00000000000..14400b5713e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y) (~((x) & (y))) + +#include "nlogic_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */