[AArch64][SVE] Remove unnecessary PTRUEs from FP arithmetic
authorRichard Sandiford <richard.sandiford@arm.com>
Fri, 7 Dec 2018 15:01:47 +0000 (15:01 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Fri, 7 Dec 2018 15:01:47 +0000 (15:01 +0000)
When using the unpredicated all-register forms of FADD, FSUB and FMUL,
the rtl patterns would still have the predicate operand we created for
the other forms.  This patch splits the patterns after reload in order
to get rid of the predicate, like we already do for WHILE.

2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* config/aarch64/iterators.md (SVE_UNPRED_FP_BINARY): New code
iterator.
(sve_fp_op): Handle minus and mult.
* config/aarch64/aarch64-sve.md (*add<mode>3, *sub<mode>3)
(*mul<mode>3): Split the patterns after reload if we don't
need the predicate operand.
(*post_ra_<sve_fp_op><mode>3): New pattern.

gcc/testsuite/
* gcc.target/aarch64/sve/pred_elim_1.c: New test.

From-SVN: r266891

gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/iterators.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/pred_elim_1.c [new file with mode: 0644]

index dbe8b5f90d99cecc0dc8018cfdd31312dc8c6c01..d1a4025423263d00a8f9a6b757e304afd1bcd785 100644 (file)
@@ -1,3 +1,13 @@
+2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/iterators.md (SVE_UNPRED_FP_BINARY): New code
+       iterator.
+       (sve_fp_op): Handle minus and mult.
+       * config/aarch64/aarch64-sve.md (*add<mode>3, *sub<mode>3)
+       (*mul<mode>3): Split the patterns after reload if we don't
+       need the predicate operand.
+       (*post_ra_<sve_fp_op><mode>3): New pattern.
+
 2018-12-07  Bin Cheng  <bin.cheng@linux.alibaba.com>
 
        * profile-count.h (profile_count::oeprator>=): Fix typo by inverting
index 5cd591b94335cde2230decf632f65c0faf33c4de..edc6cff8fbda29e143c10921b4fac72930ec1315 100644 (file)
 )
 
 ;; Floating-point addition predicated with a PTRUE.
-(define_insn "*add<mode>3"
+(define_insn_and_split "*add<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
   "@
    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
-   fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+   #"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
 )
 
 ;; Unpredicated floating-point subtraction.
 )
 
 ;; Floating-point subtraction predicated with a PTRUE.
-(define_insn "*sub<mode>3"
+(define_insn_and_split "*sub<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
-   fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+   #"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && register_operand (operands[2], <MODE>mode)
+   && register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
 )
 
 ;; Unpredicated floating-point multiplication.
 )
 
 ;; Floating-point multiplication predicated with a PTRUE.
-(define_insn "*mul<mode>3"
+(define_insn_and_split "*mul<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
   "TARGET_SVE"
   "@
    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
-   fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+   #"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
 )
 
+;; Unpredicated floating-point binary operations (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_<sve_fp_op><mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (SVE_UNPRED_FP_BINARY:SVE_F
+         (match_operand:SVE_F 1 "register_operand" "w")
+         (match_operand:SVE_F 2 "register_operand" "w")))]
+  "TARGET_SVE && reload_completed"
+  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
+
 ;; Unpredicated fma (%0 = (%1 * %2) + %3).
 (define_expand "fma<mode>4"
   [(set (match_operand:SVE_F 0 "register_operand")
index 524e4e6929bc9a7136966987600de2513748c20b..a80755734d67e90d25b4570d36de2f82d38addc6 100644 (file)
 ;; SVE integer binary division operations.
 (define_code_iterator SVE_INT_BINARY_SD [div udiv])
 
+;; SVE floating-point operations with an unpredicated all-register form.
+(define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult])
+
 ;; SVE integer comparisons.
 (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
 
 
 ;; The floating-point SVE instruction that implements an rtx code.
 (define_code_attr sve_fp_op [(plus "fadd")
+                            (minus "fsub")
+                            (mult "fmul")
                             (neg "fneg")
                             (abs "fabs")
                             (sqrt "fsqrt")])
index 03fc100526f7a562a211760a69fbb2043fbea15e..996cacda1cbf891f25d7d94cdb273cd68d7a1c35 100644 (file)
@@ -1,3 +1,7 @@
+2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/pred_elim_1.c: New test.
+
 2018-12-07  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
        * gcc.target/i386/ipa-stack-alignment-2.c: Add
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_elim_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_elim_1.c
new file mode 100644 (file)
index 0000000..6b0faf1
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TEST_OP(NAME, TYPE, OP)                                \
+  void                                                 \
+  NAME##_##TYPE (TYPE *restrict a, TYPE *restrict b,   \
+                TYPE *restrict c, int n)               \
+  {                                                    \
+    for (int i = 0; i < n; ++i)                                \
+      a[i] = b[i] OP c[i];                             \
+  }
+
+#define TEST_TYPE(TYPE) \
+  TEST_OP (add, TYPE, +) \
+  TEST_OP (sub, TYPE, -) \
+  TEST_OP (mult, TYPE, *) \
+
+TEST_TYPE (float)
+TEST_TYPE (double)
+
+/* { dg-final { scan-assembler-times {\tfadd\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\t} 2 } } */
+/* { dg-final { scan-assembler-not {\tptrue\t} } } */