From 11e9443f49729ae4ddfc9dda63b5ad7f65f33170 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Mar 2018 15:12:14 +0000 Subject: [PATCH] [AArch64] Add SVE mul_highpart patterns One advantage of the new permute handling compared to the old way is that we can now easily take advantage of the vectoriser's divmod patterns for SVE. 2018-03-13 Richard Sandiford gcc/ * config/aarch64/iterators.md (UNSPEC_SMUL_HIGHPART) (UNSPEC_UMUL_HIGHPART): New constants. (MUL_HIGHPART): New int iteraor. (su): Handle UNSPEC_SMUL_HIGHPART and UNSPEC_UMUL_HIGHPART. * config/aarch64/aarch64-sve.md (mul3_highpart): New define_expand. (*mul3_highpart): New define_insn. gcc/testsuite/ * gcc.target/aarch64/sve/mul_highpart_1.c: New test. * gcc.target/aarch64/sve/mul_highpart_1_run.c: Likewise. From-SVN: r258487 --- gcc/ChangeLog | 10 +++++++ gcc/config/aarch64/aarch64-sve.md | 28 ++++++++++++++++++ gcc/config/aarch64/iterators.md | 8 ++++- gcc/testsuite/ChangeLog | 5 ++++ .../gcc.target/aarch64/sve/mul_highpart_1.c | 25 ++++++++++++++++ .../aarch64/sve/mul_highpart_1_run.c | 29 +++++++++++++++++++ 6 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0937241a058..2dbe441db56 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2018-03-13 Richard Sandiford + + * config/aarch64/iterators.md (UNSPEC_SMUL_HIGHPART) + (UNSPEC_UMUL_HIGHPART): New constants. + (MUL_HIGHPART): New int iteraor. + (su): Handle UNSPEC_SMUL_HIGHPART and UNSPEC_UMUL_HIGHPART. + * config/aarch64/aarch64-sve.md (mul3_highpart): New + define_expand. + (*mul3_highpart): New define_insn. + 2018-03-13 Eric Botcazou PR lto/84805 diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 9140862d747..2e7f0a45f79 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -980,6 +980,34 @@ mls\t%0., %1/m, %2., %3." ) +;; Unpredicated highpart multiplication. +(define_expand "mul3_highpart" + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I + [(match_dup 3) + (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") + (match_operand:SVE_I 2 "register_operand")] + MUL_HIGHPART)] + UNSPEC_MERGE_PTRUE))] + "TARGET_SVE" + { + operands[3] = force_reg (mode, CONSTM1_RTX (mode)); + } +) + +;; Predicated highpart multiplication. +(define_insn "*mul3_highpart" + [(set (match_operand:SVE_I 0 "register_operand" "=w") + (unspec:SVE_I + [(match_operand: 1 "register_operand" "Upl") + (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0") + (match_operand:SVE_I 3 "register_operand" "w")] + MUL_HIGHPART)] + UNSPEC_MERGE_PTRUE))] + "TARGET_SVE" + "mulh\t%0., %1/m, %0., %3." +) + ;; Unpredicated NEG, NOT and POPCOUNT. (define_expand "2" [(set (match_operand:SVE_I 0 "register_operand") diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 9c1c9dabdd9..a2945a81848 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -438,6 +438,8 @@ UNSPEC_ANDF ; Used in aarch64-sve.md. UNSPEC_IORF ; Used in aarch64-sve.md. UNSPEC_XORF ; Used in aarch64-sve.md. + UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md. + UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md. UNSPEC_COND_ADD ; Used in aarch64-sve.md. UNSPEC_COND_SUB ; Used in aarch64-sve.md. UNSPEC_COND_SMAX ; Used in aarch64-sve.md. @@ -1467,6 +1469,8 @@ (define_int_iterator UNPACK_UNSIGNED [UNSPEC_UNPACKULO UNSPEC_UNPACKUHI]) +(define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART]) + (define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB UNSPEC_COND_SMAX UNSPEC_COND_UMAX UNSPEC_COND_SMIN UNSPEC_COND_UMIN @@ -1558,7 +1562,9 @@ (define_int_attr su [(UNSPEC_UNPACKSHI "s") (UNSPEC_UNPACKUHI "u") (UNSPEC_UNPACKSLO "s") - (UNSPEC_UNPACKULO "u")]) + (UNSPEC_UNPACKULO "u") + (UNSPEC_SMUL_HIGHPART "s") + (UNSPEC_UMUL_HIGHPART "u")]) (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1f26d4bfaa6..1257912b438 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-03-13 Richard Sandiford + + * gcc.target/aarch64/sve/mul_highpart_1.c: New test. + * gcc.target/aarch64/sve/mul_highpart_1_run.c: Likewise. + 2018-03-13 Martin Liska PR ipa/84658. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c new file mode 100644 index 00000000000..4354c1c6634 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c @@ -0,0 +1,25 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */ + +#include + +#define DEF_LOOP(TYPE) \ +void __attribute__ ((noipa)) \ +mod_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = src[i] % 17; \ +} + +#define TEST_ALL(T) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c new file mode 100644 index 00000000000..4eb173bd879 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */ + +#include "mul_highpart_1.c" + +#define N 79 + +#define TEST_LOOP(TYPE) \ + { \ + TYPE dst[N], src[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + src[i] = i * 7 + i % 3; \ + if (i % 11 > 7) \ + src[i] = -src[i]; \ + asm volatile ("" ::: "memory"); \ + } \ + mod_##TYPE (dst, src, N); \ + for (int i = 0; i < N; ++i) \ + if (dst[i] != src[i] % 17) \ + __builtin_abort (); \ + } + +int +main (void) +{ + TEST_ALL (TEST_LOOP); + return 0; +} -- 2.30.2