[AArch64] Add SVE support for integer division
authorRichard Sandiford <richard.sandiford@linaro.org>
Fri, 25 May 2018 08:38:12 +0000 (08:38 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Fri, 25 May 2018 08:38:12 +0000 (08:38 +0000)
After the previous patch to prevent pessimisation of divisions
by constants, this patch adds support for the SVE integer division
instructions.

2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
* config/aarch64/iterators.md (SVE_INT_BINARY_SD): New code iterator.
(optab, sve_int_op): Handle div and udiv.
* config/aarch64/aarch64-sve.md (<optab><mode>3): New expander
for SVE_INT_BINARY_SD.
(*<optab><mode>3): New insn for the same.

gcc/testsuite/
* gcc.target/aarch64/sve/div_1.c: New test.
* gcc.target/aarch64/sve/div_1_run.c: Likewise.
* gcc.target/aarch64/sve/mul_highpart_2.c: Likewise.
* gcc.target/aarch64/sve/mul_highpart_2_run.c: Likewise.

From-SVN: r260712

gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/iterators.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/div_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/div_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2_run.c [new file with mode: 0644]

index 67938386d67cb06b64c32c94cae61e526a292a86..0dcece5c219fba08e4cdb27528b6443c60c83547 100644 (file)
@@ -1,3 +1,11 @@
+2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * config/aarch64/iterators.md (SVE_INT_BINARY_SD): New code iterator.
+       (optab, sve_int_op): Handle div and udiv.
+       * config/aarch64/aarch64-sve.md (<optab><mode>3): New expander
+       for SVE_INT_BINARY_SD.
+       (*<optab><mode>3): New insn for the same.
+
 2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>
 
        * tree-vect-patterns.c: Include predict.h.
index 0bb37e72bd43bbdb1507f5da69f098a2f4a12f90..eac3ac97590060e46dba35061ca6d2747a81116e 100644 (file)
   "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
 )
 
+;; Unpredicated division.
+(define_expand "<optab><mode>3"
+  [(set (match_operand:SVE_SDI 0 "register_operand")
+       (unspec:SVE_SDI
+         [(match_dup 3)
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 1 "register_operand")
+            (match_operand:SVE_SDI 2 "register_operand"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  {
+    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+  }
+)
+
+;; Division predicated with a PTRUE.
+(define_insn "*<optab><mode>3"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w")
+       (unspec:SVE_SDI
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 2 "register_operand" "0, w")
+            (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+)
+
 ;; Unpredicated NEG, NOT and POPCOUNT.
 (define_expand "<optab><mode>2"
   [(set (match_operand:SVE_I 0 "register_operand")
index 4db3a4c368f35ca5ddc95ff8917a9bb82fc64b17..dad07e437f060e0df80099124f66b5c7317c6e88 100644 (file)
 
 (define_code_iterator SVE_INT_BINARY_REV [minus])
 
+(define_code_iterator SVE_INT_BINARY_SD [div udiv])
+
 ;; SVE integer comparisons.
 (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
 
                         (neg "neg")
                         (plus "add")
                         (minus "sub")
+                        (div "div")
+                        (udiv "udiv")
                         (ss_plus "qadd")
                         (us_plus "qadd")
                         (ss_minus "qsub")
 ;; The integer SVE instruction that implements an rtx code.
 (define_code_attr sve_int_op [(plus "add")
                              (minus "sub")
+                             (div "sdiv")
+                             (udiv "udiv")
                              (neg "neg")
                              (smin "smin")
                              (smax "smax")
index 66296db447e21f437521f28339812e232a8f065a..c1e289a9bbf632efaabe02ba67e9acd8278c4892 100644 (file)
@@ -1,3 +1,10 @@
+2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * gcc.target/aarch64/sve/div_1.c: New test.
+       * gcc.target/aarch64/sve/div_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/mul_highpart_2.c: Likewise.
+       * gcc.target/aarch64/sve/mul_highpart_2_run.c: Likewise.
+
 2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>
 
        * gcc.dg/vect/bb-slp-div-1.c: New XFAILed test.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_1.c b/gcc/testsuite/gcc.target/aarch64/sve/div_1.c
new file mode 100644 (file)
index 0000000..ec51ce1
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE)                                 \
+void __attribute__ ((noipa))                           \
+mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1,   \
+           TYPE *restrict src2, int count)             \
+{                                                      \
+  for (int i = 0; i < count; ++i)                      \
+    dst[i] = src1[i] / src2[i];                                \
+}
+
+#define TEST_ALL(T) \
+  T (int32_t) \
+  T (uint32_t) \
+  T (int64_t) \
+  T (uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/div_1_run.c
new file mode 100644 (file)
index 0000000..8134837
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include "div_1.c"
+
+#define N 79
+
+#define TEST_LOOP(TYPE)                                \
+  {                                            \
+    TYPE dst[N], src1[N], src2[N];             \
+    for (int i = 0; i < N; ++i)                        \
+      {                                                \
+       src1[i] = i * 7 + i % 3;                \
+       if (i % 11 > 7)                         \
+         src1[i] = -src1[i];                   \
+       src2[i] = 5 + (i % 5);                  \
+       asm volatile ("" ::: "memory");         \
+      }                                                \
+    mod_##TYPE (dst, src1, src2, N);           \
+    for (int i = 0; i < N; ++i)                        \
+      if (dst[i] != src1[i] / src2[i])         \
+       __builtin_abort ();                     \
+  }
+
+int
+main (void)
+{
+  TEST_ALL (TEST_LOOP);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2.c
new file mode 100644 (file)
index 0000000..d2d8b93
--- /dev/null
@@ -0,0 +1,25 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE)                         \
+void __attribute__ ((noipa))                   \
+mod_##TYPE (TYPE *dst, TYPE *src, int count)   \
+{                                              \
+  for (int i = 0; i < count; ++i)              \
+    dst[i] = src[i] / 17;                      \
+}
+
+#define TEST_ALL(T) \
+  T (int32_t) \
+  T (uint32_t) \
+  T (int64_t) \
+  T (uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2_run.c
new file mode 100644 (file)
index 0000000..24f4a57
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include "mul_highpart_2.c"
+
+#define N 79
+
+#define TEST_LOOP(TYPE)                                \
+  {                                            \
+    TYPE dst[N], src[N];                       \
+    for (int i = 0; i < N; ++i)                        \
+      {                                                \
+       src[i] = i * 7 + i % 3;                 \
+       if (i % 11 > 7)                         \
+         src[i] = -src[i];                     \
+       asm volatile ("" ::: "memory");         \
+      }                                                \
+    mod_##TYPE (dst, src, N);                  \
+    for (int i = 0; i < N; ++i)                        \
+      if (dst[i] != src[i] / 17)               \
+       __builtin_abort ();                     \
+  }
+
+int
+main (void)
+{
+  TEST_ALL (TEST_LOOP);
+  return 0;
+}