aarch64: Add SVE support for -mlow-precision-div

author Richard Sandiford <richard.sandiford@arm.com>

Wed, 19 Feb 2020 18:28:48 +0000 (18:28 +0000)

committer Richard Sandiford <richard.sandiford@arm.com>

Fri, 21 Feb 2020 10:17:30 +0000 (10:17 +0000)
author Richard Sandiford <richard.sandiford@arm.com>
Wed, 19 Feb 2020 18:28:48 +0000 (18:28 +0000)
committer Richard Sandiford <richard.sandiford@arm.com>
Fri, 21 Feb 2020 10:17:30 +0000 (10:17 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index b5eb87a0fc2b1aa07a94aa2692a17aaf7113945d..4d161ca2cd1d58f1122517de73c4a829bc5f618f 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2020-02-21  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/aarch64.c (aarch64_emit_mult): New function.
+       (aarch64_emit_approx_div): Add SVE support.  Use aarch64_emit_mult
+       instead of emitting multiplication instructions directly.
+       * config/aarch64/iterators.md (SVE_COND_FP_BINARY_OPTAB): New iterator.
+       * config/aarch64/aarch64-sve.md (div<mode>3, @aarch64_frecpe<mode>)
+       (@aarch64_frecps<mode>): New expanders.
+
  2020-02-21  Richard Sandiford  <richard.sandiford@arm.com>
  
         * config/aarch64/aarch64-protos.h (AARCH64_APPROX_MODE): Operate
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index fa3852992e1341a68b927db81f2712091dbf72ce..e3b1da89c1ae30bae34dfd1860c5cf504c7f6b7e 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -99,6 +99,7 @@
  ;; ---- [FP] Subtraction
  ;; ---- [FP] Absolute difference
  ;; ---- [FP] Multiplication
+;; ---- [FP] Division
  ;; ---- [FP] Binary logical operations
  ;; ---- [FP] Sign copying
  ;; ---- [FP] Maximum and minimum
@@ -4719,7 +4720,7 @@
            (const_int SVE_RELAXED_GP)
            (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
            (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
-         SVE_COND_FP_BINARY))]
+         SVE_COND_FP_BINARY_OPTAB))]
    "TARGET_SVE"
    {
      operands[3] = aarch64_ptrue_reg (<VPRED>mode);
@@ -5455,6 +5456,47 @@
    "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
  )
  
+;; -------------------------------------------------------------------------
+;; ---- [FP] Division
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+(define_expand "div<mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+       (unspec:SVE_FULL_F
+         [(match_dup 3)
+          (const_int SVE_RELAXED_GP)
+          (match_operand:SVE_FULL_F 1 "nonmemory_operand")
+          (match_operand:SVE_FULL_F 2 "register_operand")]
+         UNSPEC_COND_FDIV))]
+  "TARGET_SVE"
+  {
+    if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
+      DONE;
+
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+(define_expand "@aarch64_frecpe<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+       (unspec:SVE_FULL_F
+         [(match_operand:SVE_FULL_F 1 "register_operand")]
+         UNSPEC_FRECPE))]
+  "TARGET_SVE"
+)
+
+(define_expand "@aarch64_frecps<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+       (unspec:SVE_FULL_F
+         [(match_operand:SVE_FULL_F 1 "register_operand")
+          (match_operand:SVE_FULL_F 2 "register_operand")]
+         UNSPEC_FRECPS))]
+  "TARGET_SVE"
+)
+
  ;; -------------------------------------------------------------------------
  ;; ---- [FP] Binary logical operations
  ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 0acaa06b91c47095cb355ab6212eaacf27e780d8..c1bbc4917c74ea5d636d16b30d104277eb8d2943 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -12739,6 +12739,25 @@ aarch64_builtin_reciprocal (tree fndecl)
    gcc_unreachable ();
  }
  
+/* Emit code to perform the floating-point operation:
+
+     DST = SRC1 * SRC2
+
+   where all three operands are already known to be registers.
+   If the operation is an SVE one, PTRUE is a suitable all-true
+   predicate.  */
+
+static void
+aarch64_emit_mult (rtx dst, rtx ptrue, rtx src1, rtx src2)
+{
+  if (ptrue)
+    emit_insn (gen_aarch64_pred (UNSPEC_COND_FMUL, GET_MODE (dst),
+                                dst, ptrue, src1, src2,
+                                gen_int_mode (SVE_RELAXED_GP, SImode)));
+  else
+    emit_set_insn (dst, gen_rtx_MULT (GET_MODE (dst), src1, src2));
+}
+
  /* Emit instruction sequence to compute either the approximate square root
     or its approximate reciprocal, depending on the flag RECP, and return
     whether the sequence was emitted or not.  */
@@ -12857,6 +12876,10 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
    if (!TARGET_SIMD && VECTOR_MODE_P (mode))
      return false;
  
+  rtx pg = NULL_RTX;
+  if (aarch64_sve_mode_p (mode))
+    pg = aarch64_ptrue_reg (aarch64_sve_pred_mode (mode));
+
    /* Estimate the approximate reciprocal.  */
    rtx xrcp = gen_reg_rtx (mode);
    emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
@@ -12876,7 +12899,7 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
        emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
  
        if (iterations > 0)
-       emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
+       aarch64_emit_mult (xrcp, pg, xrcp, xtmp);
      }
  
    if (num != CONST1_RTX (mode))
@@ -12884,11 +12907,11 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
        /* As the approximate reciprocal of DEN is already calculated, only
          calculate the approximate division when NUM is not 1.0.  */
        rtx xnum = force_reg (mode, num);
-      emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
+      aarch64_emit_mult (xrcp, pg, xrcp, xnum);
      }
  
    /* Finalize the approximation.  */
-  emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
+  aarch64_emit_mult (quo, pg, xrcp, xtmp);
    return true;
  }
  
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index d17d79a30daee3161a2d92b0339c1f1efa50505d..548ee0f51e87621f7326708c87d6ce4f1a506d98 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2291,6 +2291,17 @@
                                          UNSPEC_COND_FMULX
                                          UNSPEC_COND_FSUB])
  
+;; Same as SVE_COND_FP_BINARY, but without codes that have a dedicated
+;; <optab><mode>3 expander.
+(define_int_iterator SVE_COND_FP_BINARY_OPTAB [UNSPEC_COND_FADD
+                                              UNSPEC_COND_FMAX
+                                              UNSPEC_COND_FMAXNM
+                                              UNSPEC_COND_FMIN
+                                              UNSPEC_COND_FMINNM
+                                              UNSPEC_COND_FMUL
+                                              UNSPEC_COND_FMULX
+                                              UNSPEC_COND_FSUB])
+
  (define_int_iterator SVE_COND_FP_BINARY_INT [UNSPEC_COND_FSCALE])
  
  (define_int_iterator SVE_COND_FP_ADD [UNSPEC_COND_FADD])
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 216bac92df5933fcb2d4d885ce6c78c786e7f02f..936260e4ae796604eb337e298ba265d0c0f18815 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2020-02-21  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/recip_1.c: New test.
+       * gcc.target/aarch64/sve/recip_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/recip_2.c: Likewise.
+       * gcc.target/aarch64/sve/recip_2_run.c: Likewise.
+
  2020-02-20  Martin Sebor  <msebor@redhat.com>
  
         PR c++/93801
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/recip_1.c b/gcc/testsuite/gcc.target/aarch64/sve/recip_1.c

new file mode 100644 (file)

index 0000000..c9d470f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/recip_1.c
@@ -0,0 +1,27 @@
+/* { dg-options "-Ofast -mlow-precision-div" } */
+
+#define DEF_LOOP(TYPE)                 \
+  void                                 \
+  test_##TYPE (TYPE *x, int n)         \
+  {                                    \
+    for (int i = 0; i < n; ++i)                \
+      x[i] = (TYPE) 1 / x[i];          \
+  }
+
+#define TEST_ALL(T)    \
+  T (_Float16)         \
+  T (float)            \
+  T (double)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tfrecpe\tz[0-9]+\.h} } } */
+/* { dg-final { scan-assembler-not {\tfrecps\tz[0-9]+\.h} } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrecpe\tz[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrecps\tz[0-9]+\.s} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrecpe\tz[0-9]+\.d} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrecps\tz[0-9]+\.d} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/recip_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/recip_1_run.c

new file mode 100644 (file)

index 0000000..b232b88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/recip_1_run.c
@@ -0,0 +1,27 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast -mlow-precision-div" } */
+
+#include "recip_1.c"
+
+#define N 77
+
+#define TEST_LOOP(TYPE)                                \
+  {                                            \
+    TYPE a[N];                                 \
+    for (int i = 0; i < N; ++i)                        \
+      a[i] = i + 1;                            \
+    test_##TYPE (a, N);                                \
+    for (int i = 0; i < N; ++i)                        \
+      {                                                \
+       double diff = a[i] - 1.0 / (i + 1);     \
+       if (__builtin_fabs (diff) > 0x1.0p-8)   \
+         __builtin_abort ();                   \
+      }                                                \
+  }
+
+int
+main (void)
+{
+  TEST_ALL (TEST_LOOP);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/recip_2.c b/gcc/testsuite/gcc.target/aarch64/sve/recip_2.c

new file mode 100644 (file)

index 0000000..f308a6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/recip_2.c
@@ -0,0 +1,27 @@
+/* { dg-options "-Ofast -mlow-precision-div" } */
+
+#define DEF_LOOP(TYPE)                                         \
+  void                                                         \
+  test_##TYPE (TYPE *restrict x, TYPE *restrict y, int n)      \
+  {                                                            \
+    for (int i = 0; i < n; ++i)                                        \
+      x[i] /= y[i];                                            \
+  }
+
+#define TEST_ALL(T)    \
+  T (_Float16)         \
+  T (float)            \
+  T (double)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tfrecpe\tz[0-9]+\.h} } } */
+/* { dg-final { scan-assembler-not {\tfrecps\tz[0-9]+\.h} } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrecpe\tz[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrecps\tz[0-9]+\.s} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tfrecpe\tz[0-9]+\.d} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrecps\tz[0-9]+\.d} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/recip_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/recip_2_run.c

new file mode 100644 (file)

index 0000000..25a31e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/recip_2_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast -mlow-precision-div" } */
+
+#include "recip_2.c"
+
+#define N 77
+
+#define TEST_LOOP(TYPE)                                        \
+  {                                                    \
+    TYPE a[N], b[N];                                   \
+    for (int i = 0; i < N; ++i)                                \
+      {                                                        \
+       a[i] = i + 11;                                  \
+       b[i] = i + 1;                                   \
+      }                                                        \
+    test_##TYPE (a, b, N);                             \
+    for (int i = 0; i < N; ++i)                                \
+      {                                                        \
+       double diff = a[i] - (i + 11.0) / (i + 1);      \
+       if (__builtin_fabs (diff) > 0x1.0p-8)           \
+         __builtin_abort ();                           \
+      }                                                        \
+  }
+
+int
+main (void)
+{
+  TEST_ALL (TEST_LOOP);
+  return 0;
+}
author	Richard Sandiford <richard.sandiford@arm.com>
	Wed, 19 Feb 2020 18:28:48 +0000 (18:28 +0000)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Fri, 21 Feb 2020 10:17:30 +0000 (10:17 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/recip_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/recip_1_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/recip_2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/recip_2_run.c	[new file with mode: 0644]	patch \| blob