[AArch64][SVE] Remove unnecessary PTRUEs from integer arithmetic
authorRichard Sandiford <richard.sandiford@arm.com>
Fri, 7 Dec 2018 15:03:15 +0000 (15:03 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Fri, 7 Dec 2018 15:03:15 +0000 (15:03 +0000)
When using the unpredicated immediate forms of MUL, LSL, LSR and ASR,
the rtl patterns would still have the predicate operand we created for
the other forms.  This patch splits the patterns after reload in order
to get rid of the predicate, like we already do for WHILE.

2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* config/aarch64/aarch64-sve.md (*mul<mode>3, *v<optab><mode>3):
Split the patterns after reload if we don't need the predicate
operand.
(*post_ra_mul<mode>3, *post_ra_v<optab><mode>3): New patterns.

gcc/testsuite/
* gcc.target/aarch64/sve/pred_elim_2.c: New test.

From-SVN: r266892

gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/pred_elim_2.c [new file with mode: 0644]

index d1a4025423263d00a8f9a6b757e304afd1bcd785..95c1a4a8029dfc41e5791c40fac510909c0b05b5 100644 (file)
@@ -1,3 +1,10 @@
+2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/aarch64-sve.md (*mul<mode>3, *v<optab><mode>3):
+       Split the patterns after reload if we don't need the predicate
+       operand.
+       (*post_ra_mul<mode>3, *post_ra_v<optab><mode>3): New patterns.
+
 2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>
 
        * config/aarch64/iterators.md (SVE_UNPRED_FP_BINARY): New code
index edc6cff8fbda29e143c10921b4fac72930ec1315..8569a8e1ea7466c67091feb4e6f25603817436fa 100644 (file)
 ;; predicate for the first alternative, but using Upa or X isn't likely
 ;; to gain much and would make the instruction seem less uniform to the
 ;; register allocator.
-(define_insn "*mul<mode>3"
+(define_insn_and_split "*mul<mode>3"
   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
        (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
          UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
-   mul\t%0.<Vetype>, %0.<Vetype>, #%3
+   #
    mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && !register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
+  ""
   [(set_attr "movprfx" "*,*,yes")]
 )
 
+;; Unpredicated multiplications by a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_mul<mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+       (mult:SVE_I
+         (match_operand:SVE_I 1 "register_operand" "0")
+         (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
+  "TARGET_SVE && reload_completed"
+  "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
+)
+
 (define_insn "*madd<mode>"
   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
        (plus:SVE_I
 ;; actually need the predicate for the first alternative, but using Upa
 ;; or X isn't likely to gain much and would make the instruction seem
 ;; less uniform to the register allocator.
-(define_insn "*v<optab><mode>3"
+(define_insn_and_split "*v<optab><mode>3"
   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
        (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
          UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
-   <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
+   #
    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& reload_completed
+   && !register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
+  ""
   [(set_attr "movprfx" "*,*,yes")]
 )
 
+;; Unpredicated shift operations by a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_v<optab><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+       (ASHIFT:SVE_I
+         (match_operand:SVE_I 1 "register_operand" "w")
+         (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
+  "TARGET_SVE && reload_completed"
+  "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
+)
+
 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
 ;; shifts above.
 (define_expand "<ASHIFT:optab><mode>3"
index 996cacda1cbf891f25d7d94cdb273cd68d7a1c35..231275454bd24051280db93eebdf2dd5752dac64 100644 (file)
@@ -1,3 +1,7 @@
+2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/pred_elim_2.c: New test.
+
 2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>
 
        * gcc.target/aarch64/sve/pred_elim_1.c: New test.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_elim_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_elim_2.c
new file mode 100644 (file)
index 0000000..ed9c700
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_OP(NAME, TYPE, OP)                                        \
+  void                                                         \
+  NAME##_##TYPE (TYPE *restrict a, TYPE *restrict b, int n)    \
+  {                                                            \
+    for (int i = 0; i < n; ++i)                                        \
+      a[i] = b[i] OP;                                          \
+  }
+
+#define TEST_TYPE(TYPE) \
+  TEST_OP (shl, TYPE, << 6) \
+  TEST_OP (shr, TYPE, >> 6) \
+  TEST_OP (mult, TYPE, * 0x2b)
+
+TEST_TYPE (int8_t)
+TEST_TYPE (int16_t)
+TEST_TYPE (int32_t)
+TEST_TYPE (int64_t)
+TEST_TYPE (uint8_t)
+TEST_TYPE (uint16_t)
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+
+/* { dg-final { scan-assembler-times {\tlsl\t} 8 } } */
+/* { dg-final { scan-assembler-times {\tlsr\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tasr\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tmul\t} 8 } } */
+/* { dg-final { scan-assembler-not {\tptrue\t} } } */