[AArch64] Use "x" predication for SVE integer arithmetic patterns
authorRichard Sandiford <richard.sandiford@arm.com>
Wed, 14 Aug 2019 08:45:49 +0000 (08:45 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Wed, 14 Aug 2019 08:45:49 +0000 (08:45 +0000)
The SVE patterns used an UNSPEC_MERGE_PTRUE unspec to attach a predicate
to an otherwise unpredicated integer arithmetic operation.  As its name
suggests, this was designed to be a wrapper used for merging instructions
in which the predicate is known to be a PTRUE.

This unspec dates from the very early days of the port and nothing has
ever taken advantage of the PTRUE guarantee for arithmetic (as opposed
to comparisons).  This patch replaces it with the less stringent
guarantee that:

(a) the values of inactive lanes don't matter and
(b) it is valid to make extra lanes active if there's a specific benefit

Doing this makes the patterns suitable for the ACLE _x functions, which
have the above semantics.

See the block comment in the patch for more details.

2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* config/aarch64/aarch64.md (UNSPEC_PRED_X): New unspec.
* config/aarch64/aarch64-sve.md: Add a section describing it.
(@aarch64_pred_mov<mode>, @aarch64_pred_mov<mode>)
(<SVE_INT_UNARY:optab><mode>2, *<SVE_INT_UNARY:optab><mode>2)
(aarch64_<su>abd<mode>_3, mul<SVE_I:mode>3, *mul<SVE_I:mode>3)
(<su>mul<mode>3_highpart, *<su>mul<mode>3_highpart)
(<SVE_INT_BINARY:optab><mode>3, *<SVE_INT_BINARY:optab><mode>3)
(*bic<mode>3, v<ASHIFT:optab><mode>3, *v<ASHIFT:optab><mode>3)
(<su><maxmin><mode>3, *<su><maxmin><mode>3, *madd<SVE_I:mode>)
(*msub<SVE_I:mode>3, *aarch64_sve_rev64<mode>)
(*aarch64_sve_rev32<mode>, *aarch64_sve_rev16vnx16qi): Use
UNSPEC_PRED_X instead of UNSPEC_MERGE_PTRUE.
* config/aarch64/aarch64-sve2.md (<u>avg<mode>3_floor)
(<u>avg<mode>3_ceil, *<sur>h<addsub><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_split_sve_subreg_move)
(aarch64_evpc_rev_local): Update accordingly.

From-SVN: r274425

gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64-sve2.md
gcc/config/aarch64/aarch64.c
gcc/config/aarch64/aarch64.md

index ddf67637c78abfb4245996f28447a23f95f7c5b9..ce4c5f6986803246ebfda102b6726abffb142083 100644 (file)
@@ -1,3 +1,22 @@
+2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/aarch64.md (UNSPEC_PRED_X): New unspec.
+       * config/aarch64/aarch64-sve.md: Add a section describing it.
+       (@aarch64_pred_mov<mode>, @aarch64_pred_mov<mode>)
+       (<SVE_INT_UNARY:optab><mode>2, *<SVE_INT_UNARY:optab><mode>2)
+       (aarch64_<su>abd<mode>_3, mul<SVE_I:mode>3, *mul<SVE_I:mode>3)
+       (<su>mul<mode>3_highpart, *<su>mul<mode>3_highpart)
+       (<SVE_INT_BINARY:optab><mode>3, *<SVE_INT_BINARY:optab><mode>3)
+       (*bic<mode>3, v<ASHIFT:optab><mode>3, *v<ASHIFT:optab><mode>3)
+       (<su><maxmin><mode>3, *<su><maxmin><mode>3, *madd<SVE_I:mode>)
+       (*msub<SVE_I:mode>3, *aarch64_sve_rev64<mode>)
+       (*aarch64_sve_rev32<mode>, *aarch64_sve_rev16vnx16qi): Use
+       UNSPEC_PRED_X instead of UNSPEC_MERGE_PTRUE.
+       * config/aarch64/aarch64-sve2.md (<u>avg<mode>3_floor)
+       (<u>avg<mode>3_ceil, *<sur>h<addsub><mode>): Likewise.
+       * config/aarch64/aarch64.c (aarch64_split_sve_subreg_move)
+       (aarch64_evpc_rev_local): Update accordingly.
+
 2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
 
        * config/aarch64/iterators.md (VNx4SI_ONLY, VNx2DF_ONLY): New mode
index 64992c4875b627bba7227f3f57529323a02a0ee3..4f416189663e18370c7c491d6e5f49b3056ad6d5 100644 (file)
@@ -24,6 +24,7 @@
 ;; == General notes
 ;; ---- Note on the handling of big-endian SVE
 ;; ---- Description of UNSPEC_PTEST
+;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 ;;
 ;; == Moves
 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
 ;;
 ;; -------------------------------------------------------------------------
+;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
+;; -------------------------------------------------------------------------
+;;
+;; Many SVE integer operations are predicated.  We can generate them
+;; from four sources:
+;;
+;; (1) Using normal unpredicated optabs.  In this case we need to create
+;;     an all-true predicate register to act as the governing predicate
+;;     for the SVE instruction.  There are no inactive lanes, and thus
+;;     the values of inactive lanes don't matter.
+;;
+;; (2) Using _x ACLE functions.  In this case the function provides a
+;;     specific predicate and some lanes might be inactive.  However,
+;;     as for (1), the values of the inactive lanes don't matter.
+;;     We can make extra lanes active without changing the behavior
+;;     (although for code-quality reasons we should avoid doing so
+;;     needlessly).
+;;
+;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
+;;     These optabs have a predicate operand that specifies which lanes are
+;;     active and another operand that provides the values of inactive lanes.
+;;
+;; (4) Using _m and _z ACLE functions.  These functions map to the same
+;;     patterns as (3), with the _z functions setting inactive lanes to zero
+;;     and the _m functions setting the inactive lanes to one of the function
+;;     arguments.
+;;
+;; For (1) and (2) we need a way of attaching the predicate to a normal
+;; unpredicated integer operation.  We do this using:
+;;
+;;   (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
+;;
+;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
+;; is a predicate of mode <M:VPRED>.  PRED might or might not be a PTRUE;
+;; it always is for (1), but might not be for (2).
+;;
+;; The unspec as a whole has the same value as (code:M ...) when PRED is
+;; all-true.  It is always semantically valid to replace PRED with a PTRUE,
+;; but as noted above, we should only do so if there's a specific benefit.
+;;
+;; (The "_X" in the unspec is named after the ACLE functions in (2).)
+;;
+;; For (3) and (4) we can simply use the SVE port's normal representation
+;; of a predicate-based select:
+;;
+;;   (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
+;;
+;; where INACTIVE specifies the values of inactive lanes.
+;;
+;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
+;; than inserting the integer operation directly.  This is mostly useful
+;; if we want the combine pass to merge an integer operation with an explicit
+;; vcond_mask (in other words, with a following SEL instruction).  However,
+;; it's generally better to merge such operations at the gimple level
+;; using (3).
+;;
+;; -------------------------------------------------------------------------
 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 ;; -------------------------------------------------------------------------
 ;;
        (unspec:SVE_ALL
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
           (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
        (unspec:SVE_STRUCT
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
           (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
        (unspec:SVE_I
          [(match_dup 2)
           (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
          [(match_operand:<VPRED> 1 "register_operand" "Upl")
           (SVE_INT_UNARY:SVE_I
             (match_operand:SVE_I 2 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
 )
             (<max_opp>:SVE_I
               (match_dup 2)
               (match_dup 3)))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
           (mult:SVE_I
             (match_operand:SVE_I 1 "register_operand")
             (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
           (mult:SVE_I
             (match_operand:SVE_I 2 "register_operand" "%0, 0, w")
             (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
    #
           (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
                          (match_operand:SVE_I 2 "register_operand")]
                         MUL_HIGHPART)]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
           (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
                          (match_operand:SVE_I 3 "register_operand" "w, w")]
                         MUL_HIGHPART)]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
    <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
           (SVE_INT_BINARY_SD:SVE_SDI
             (match_operand:SVE_SDI 1 "register_operand")
             (match_operand:SVE_SDI 2 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
           (SVE_INT_BINARY_SD:SVE_SDI
             (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
             (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
          (unspec:SVE_I
            [(match_operand 3)
             (not:SVE_I (match_operand:SVE_I 2 "register_operand" "w"))]
-           UNSPEC_MERGE_PTRUE)
+           UNSPEC_PRED_X)
          (match_operand:SVE_I 1 "register_operand" "w")))]
   "TARGET_SVE"
   "bic\t%0.d, %1.d, %2.d"
           (ASHIFT:SVE_I
             (match_operand:SVE_I 1 "register_operand")
             (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
           (ASHIFT:SVE_I
             (match_operand:SVE_I 2 "register_operand" "w, 0, w")
             (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
    #
          [(match_dup 3)
           (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
                         (match_operand:SVE_I 2 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
                         (match_operand:SVE_I 3 "register_operand" "w, w"))]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
    <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
            [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
             (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
                         (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
-           UNSPEC_MERGE_PTRUE)
+           UNSPEC_PRED_X)
          (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
   "TARGET_SVE"
   "@
            [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
             (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
                         (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
-           UNSPEC_MERGE_PTRUE)))]
+           UNSPEC_PRED_X)))]
   "TARGET_SVE"
   "@
    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
          [(match_operand:VNx2BI 1 "register_operand" "Upl")
           (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
                           UNSPEC_REV64)]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "rev<Vesize>\t%0.d, %1/m, %2.d"
 )
          [(match_operand:VNx4BI 1 "register_operand" "Upl")
           (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
                          UNSPEC_REV32)]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "rev<Vesize>\t%0.s, %1/m, %2.s"
 )
          [(match_operand:VNx8BI 1 "register_operand" "Upl")
           (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
                           UNSPEC_REV16)]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE"
   "revb\t%0.h, %1/m, %2.h"
 )
index d0c235b8c8a6e1621073d2a8a653004799462c4c..2334e5a7b7dc524bbd1f4d0a48ba5cd991970118 100644 (file)
@@ -26,7 +26,7 @@
           (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
                          (match_operand:SVE_I 2 "register_operand")]
                         HADD)]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE2"
   {
     operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
@@ -41,7 +41,7 @@
           (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
                          (match_operand:SVE_I 2 "register_operand")]
                         RHADD)]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE2"
   {
     operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
           (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
                          (match_operand:SVE_I 3 "register_operand" "w, w")]
                         HADDSUB)]
-         UNSPEC_MERGE_PTRUE))]
+         UNSPEC_PRED_X))]
   "TARGET_SVE2"
   "@
    <sur>h<addsub>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;<sur>h<addsub>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
   [(set_attr "movprfx" "*,yes")]
-)
\ No newline at end of file
+)
index 99b84cdb7084a5130695ec303e66d49e4a5aefe8..a936608482e8cf707c3a4a46cd7e7e7f031c1d73 100644 (file)
@@ -4097,8 +4097,7 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
 
   /* Emit:
 
-       (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
-                        UNSPEC_MERGE_PTRUE))
+       (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)] UNSPEC_PRED_X))
 
      with the appropriate modes.  */
   ptrue = gen_lowpart (pred_mode, ptrue);
@@ -4106,7 +4105,7 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
   src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
   src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
   src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
-                       UNSPEC_MERGE_PTRUE);
+                       UNSPEC_PRED_X);
   emit_insn (gen_rtx_SET (dest, src));
 }
 
@@ -17434,7 +17433,7 @@ aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
     {
       rtx pred = aarch64_ptrue_reg (pred_mode);
       src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
-                           UNSPEC_MERGE_PTRUE);
+                           UNSPEC_PRED_X);
     }
   emit_set_insn (d->target, src);
   return true;
index 850c6ada2a5ef7b58a927281cc39202534034b57..589b4d3ae90c3ee7f9ce11e3cb0c25da14677eca 100644 (file)
     UNSPEC_LD1_GATHER
     UNSPEC_ST1_SCATTER
     UNSPEC_MERGE_PTRUE
+    UNSPEC_PRED_X
     UNSPEC_PTEST
     UNSPEC_UNPACKSHI
     UNSPEC_UNPACKUHI