[AArch64] Add a "GP strictness" operand to SVE FP unspecs
authorRichard Sandiford <richard.sandiford@arm.com>
Wed, 14 Aug 2019 08:16:04 +0000 (08:16 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Wed, 14 Aug 2019 08:16:04 +0000 (08:16 +0000)
This patch makes the SVE unary, binary and ternary FP unspecs
take a new "GP strictness" operand that indicates whether the
predicate has to be taken literally, or whether it is valid to
make extra lanes active (up to and including using a PTRUE).

This again is laying the groundwork for the ACLE patterns,
in which the value can depend on the FP command-line flags.

At the moment it's only needed for addition, subtraction and
multiplication, which have unpredicated forms that can only
be used when operating on all lanes is safe.  But in future
it might be useful for optimising predicate usage.

The strict mode requires extra alternatives for addition,
subtraction and multiplication, but I've left those for the
main ACLE patch.

2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
    Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

gcc/
* config/aarch64/aarch64.md (SVE_RELAXED_GP, SVE_STRICT_GP): New
constants.
* config/aarch64/predicates.md (aarch64_sve_gp_strictness): New
predicate.
* config/aarch64/aarch64-protos.h (aarch64_sve_pred_dominates_p):
Declare.
* config/aarch64/aarch64.c (aarch64_sve_pred_dominates_p): New
function.
* config/aarch64/aarch64-sve.md: Add a block comment about the
handling of predicated FP operations.
(<SVE_COND_FP_UNARY:optab><SVE_F:mode>2, add<SVE_F:mode>3)
(sub<SVE_F:mode>3, mul<SVE_F:mode>3, div<SVE_F:mode>3)
(<SVE_COND_FP_MAXMIN_PUBLIC:optab><SVE_F:mode>3)
(<SVE_COND_FP_MAXMIN_PUBLIC:maxmin_uns><SVE_F:mode>3)
(<SVE_COND_FP_TERNARY:optab><SVE_F:mode>4): Add an SVE_RELAXED_GP
operand.
(cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>)
(cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>): Add an SVE_STRICT_GP
operand.
(*<SVE_COND_FP_UNARY:optab><SVE_F:mode>2)
(*cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>_2)
(*cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>_3)
(*cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>_any)
(*fabd<SVE_F:mode>3, *div<SVE_F:mode>3)
(*<SVE_COND_FP_MAXMIN_PUBLIC:optab><SVE_F:mode>3)
(*<SVE_COND_FP_TERNARY:optab><SVE_F:mode>4)
(*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_2)
(*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_4)
(*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_any): Match the
strictness operands.  Use aarch64_sve_pred_dominates_p to check
whether the predicate on the conditional operation is suitable
for merging.  Split patterns into the canonical equal-predicate form.
(*add<SVE_F:mode>3, *sub<SVE_F:mode>3, *mul<SVE_F:mode>3): Likewise.
Restrict the unpredicated alternatives to SVE_RELAXED_GP.

Co-Authored-By: Kugan Vivekanandarajah <kuganv@linaro.org>
From-SVN: r274418

gcc/ChangeLog
gcc/config/aarch64/aarch64-protos.h
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64.c
gcc/config/aarch64/aarch64.md
gcc/config/aarch64/predicates.md

index 6e6f9ed7df712c758623504a74d1de774dab81f2..9a918d49896c4c27ff02ee4ed85979cec5c23ead 100644 (file)
@@ -1,3 +1,41 @@
+2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
+           Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
+
+       * config/aarch64/aarch64.md (SVE_RELAXED_GP, SVE_STRICT_GP): New
+       constants.
+       * config/aarch64/predicates.md (aarch64_sve_gp_strictness): New
+       predicate.
+       * config/aarch64/aarch64-protos.h (aarch64_sve_pred_dominates_p):
+       Declare.
+       * config/aarch64/aarch64.c (aarch64_sve_pred_dominates_p): New
+       function.
+       * config/aarch64/aarch64-sve.md: Add a block comment about the
+       handling of predicated FP operations.
+       (<SVE_COND_FP_UNARY:optab><SVE_F:mode>2, add<SVE_F:mode>3)
+       (sub<SVE_F:mode>3, mul<SVE_F:mode>3, div<SVE_F:mode>3)
+       (<SVE_COND_FP_MAXMIN_PUBLIC:optab><SVE_F:mode>3)
+       (<SVE_COND_FP_MAXMIN_PUBLIC:maxmin_uns><SVE_F:mode>3)
+       (<SVE_COND_FP_TERNARY:optab><SVE_F:mode>4): Add an SVE_RELAXED_GP
+       operand.
+       (cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>)
+       (cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>): Add an SVE_STRICT_GP
+       operand.
+       (*<SVE_COND_FP_UNARY:optab><SVE_F:mode>2)
+       (*cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>_2)
+       (*cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>_3)
+       (*cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>_any)
+       (*fabd<SVE_F:mode>3, *div<SVE_F:mode>3)
+       (*<SVE_COND_FP_MAXMIN_PUBLIC:optab><SVE_F:mode>3)
+       (*<SVE_COND_FP_TERNARY:optab><SVE_F:mode>4)
+       (*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_2)
+       (*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_4)
+       (*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_any): Match the
+       strictness operands.  Use aarch64_sve_pred_dominates_p to check
+       whether the predicate on the conditional operation is suitable
+       for merging.  Split patterns into the canonical equal-predicate form.
+       (*add<SVE_F:mode>3, *sub<SVE_F:mode>3, *mul<SVE_F:mode>3): Likewise.
+       Restrict the unpredicated alternatives to SVE_RELAXED_GP.
+
 2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
            Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
 
index 22a5f9433db8e37b380c255a26c1b4a9c06862a8..a4728e77c1d17add6ed81d18d9ef674e63fb5d94 100644 (file)
@@ -554,6 +554,7 @@ rtx aarch64_ptrue_all (unsigned int);
 void aarch64_expand_mov_immediate (rtx, rtx);
 rtx aarch64_ptrue_reg (machine_mode);
 rtx aarch64_pfalse_reg (machine_mode);
+bool aarch64_sve_pred_dominates_p (rtx *, rtx);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
 void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
 bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
index a7ca1b85b91e5f018be66eccde4830cbacfc9ac8..9fb59024b445cb5ecd2b620696eb8784b195529d 100644 (file)
@@ -24,6 +24,7 @@
 ;; == General notes
 ;; ---- Note on the handling of big-endian SVE
 ;; ---- Description of UNSPEC_PTEST
+;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 ;;
 ;; == Moves
 ;; ---- Moves of single vectors
 ;;   SVE_MAYBE_NOT_PTRUE otherwise.
 ;;
 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
+;;
+;; -------------------------------------------------------------------------
+;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
+;; -------------------------------------------------------------------------
+;;
+;; Most SVE floating-point operations are predicated.  We can generate
+;; them from four sources:
+;;
+;; (1) Using normal unpredicated optabs.  In this case we need to create
+;;     an all-true predicate register to act as the governing predicate
+;;     for the SVE instruction.  There are no inactive lanes, and thus
+;;     the values of inactive lanes don't matter.
+;;
+;; (2) Using _x ACLE functions.  In this case the function provides a
+;;     specific predicate and some lanes might be inactive.  However,
+;;     as for (1), the values of the inactive lanes don't matter.
+;;
+;;     The instruction must have the same exception behavior as the
+;;     function call unless things like command-line flags specifically
+;;     allow otherwise.  For example, with -ffast-math, it is OK to
+;;     raise exceptions for inactive lanes, but normally it isn't.
+;;
+;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
+;;     These optabs have a predicate operand that specifies which lanes are
+;;     active and another operand that provides the values of inactive lanes.
+;;
+;; (4) Using _m and _z ACLE functions.  These functions map to the same
+;;     patterns as (3), with the _z functions setting inactive lanes to zero
+;;     and the _m functions setting the inactive lanes to one of the function
+;;     arguments.
+;;
+;; So:
+;;
+;; - In (1), the predicate is known to be all true and the pattern can use
+;;   unpredicated operations where available.
+;;
+;; - In (2), the predicate might or might not be all true.  The pattern can
+;;   use unpredicated instructions if the predicate is all-true or if things
+;;   like command-line flags allow exceptions for inactive lanes.
+;;
+;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
+;;   might be inactive and inactive lanes of the result must have specific
+;;   values.  There is no scope for using unpredicated instructions (and no
+;;   reason to want to), so the question about command-line flags doesn't
+;;   arise.
+;;
+;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
+;; in combination with a separate predicate operand, e.g.
+;;
+;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
+;;           (sqrt:SVE_F 2 "register_operand" "w")]
+;;          ....)
+;;
+;; because (sqrt ...) can raise an exception for any lane, including
+;; inactive ones.  We therefore need to use an unspec instead.
+;;
+;; Also, (2) requires some way of distinguishing the case in which the
+;; predicate might have inactive lanes and cannot be changed from the
+;; case in which the predicate has no inactive lanes or can be changed.
+;; This information is also useful when matching combined FP patterns
+;; in which the predicates might not be equal.
+;;
+;; We therefore model FP operations as an unspec of the form:
+;;
+;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
+;;
+;; where:
+;;
+;; - PRED is the governing predicate.
+;;
+;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
+;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
+;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
+;;
+;; - OP0 OP1 ... are the normal input operands to the operation.
+;;
+;; - MNEMONIC is the mnemonic of the associated SVE instruction.
 
 ;; =========================================================================
 ;; == Moves
   [(set (match_operand:SVE_F 0 "register_operand")
        (unspec:SVE_F
          [(match_dup 2)
+          (const_int SVE_RELAXED_GP)
           (match_operand:SVE_F 1 "register_operand")]
          SVE_COND_FP_UNARY))]
   "TARGET_SVE"
   [(set (match_operand:SVE_F 0 "register_operand" "=w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (match_operand:SI 3 "aarch64_sve_gp_strictness")
           (match_operand:SVE_F 2 "register_operand" "w")]
          SVE_COND_FP_UNARY))]
   "TARGET_SVE"
          [(match_operand:<VPRED> 1 "register_operand")
           (unspec:SVE_F
             [(match_dup 1)
+             (const_int SVE_STRICT_GP)
              (match_operand:SVE_F 2 "register_operand")
              (match_operand:SVE_F 3 "register_operand")]
             SVE_COND_FP_BINARY)
 )
 
 ;; Predicated floating-point operations, merging with the first input.
-(define_insn "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (unspec:SVE_F
-            [(match_dup 1)
+            [(match_operand 4)
+             (match_operand:SI 5 "aarch64_sve_gp_strictness")
              (match_operand:SVE_F 2 "register_operand" "0, w")
              (match_operand:SVE_F 3 "register_operand" "w, w")]
             SVE_COND_FP_BINARY)
           (match_dup 2)]
          UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
   "@
    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
   [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated floating-point operations, merging with the second input.
-(define_insn "*cond_<optab><mode>_3"
+(define_insn_and_rewrite "*cond_<optab><mode>_3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (unspec:SVE_F
-            [(match_dup 1)
+            [(match_operand 4)
+             (match_operand:SI 5 "aarch64_sve_gp_strictness")
              (match_operand:SVE_F 2 "register_operand" "w, w")
              (match_operand:SVE_F 3 "register_operand" "0, w")]
             SVE_COND_FP_BINARY)
           (match_dup 3)]
          UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
   "@
    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
    movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
   [(set_attr "movprfx" "*,yes")]
 )
 
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
           (unspec:SVE_F
-            [(match_dup 1)
+            [(match_operand 5)
+             (match_operand:SI 6 "aarch64_sve_gp_strictness")
              (match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
              (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
             SVE_COND_FP_BINARY)
          UNSPEC_SEL))]
   "TARGET_SVE
    && !rtx_equal_p (operands[2], operands[4])
-   && !rtx_equal_p (operands[3], operands[4])"
+   && !rtx_equal_p (operands[3], operands[4])
+   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    #"
-  "&& reload_completed
-   && register_operand (operands[4], <MODE>mode)
-   && !rtx_equal_p (operands[0], operands[4])"
+  "&& 1"
   {
-    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
-                                            operands[4], operands[1]));
-    operands[4] = operands[2] = operands[0];
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+       emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+                                                operands[4], operands[1]));
+       operands[4] = operands[2] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[5]))
+      operands[5] = copy_rtx (operands[1]);
+    else
+      FAIL;
   }
   [(set_attr "movprfx" "yes")]
 )
   [(set (match_operand:SVE_F 0 "register_operand")
        (unspec:SVE_F
          [(match_dup 3)
+          (const_int SVE_RELAXED_GP)
           (match_operand:SVE_F 1 "register_operand")
           (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand")]
          UNSPEC_COND_FADD))]
   }
 )
 
-;; Floating-point addition predicated with a PTRUE.
+;; Predicated floating-point addition.
 (define_insn_and_split "*add<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z")
           (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
           (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w")]
          UNSPEC_COND_FADD))]
   [(set (match_operand:SVE_F 0 "register_operand")
        (unspec:SVE_F
          [(match_dup 3)
+          (const_int SVE_RELAXED_GP)
           (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
           (match_operand:SVE_F 2 "register_operand")]
          UNSPEC_COND_FSUB))]
   }
 )
 
-;; Floating-point subtraction predicated with a PTRUE.
+;; Predicated floating-point subtraction.
 (define_insn_and_split "*sub<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+          (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, i, Z")
           (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
           (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w")]
          UNSPEC_COND_FSUB))]
 ;; -------------------------------------------------------------------------
 
 ;; Predicated floating-point absolute difference.
-(define_insn "*fabd<mode>3"
+(define_insn_and_rewrite "*fabd<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (match_operand:SI 4 "aarch64_sve_gp_strictness")
           (unspec:SVE_F
-            [(match_dup 1)
+            [(match_operand 5)
+             (match_operand:SI 6 "aarch64_sve_gp_strictness")
              (match_operand:SVE_F 2 "register_operand" "0")
              (match_operand:SVE_F 3 "register_operand" "w")]
             UNSPEC_COND_FSUB)]
          UNSPEC_COND_FABS))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
   "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
 )
 
 ;; -------------------------------------------------------------------------
   [(set (match_operand:SVE_F 0 "register_operand")
        (unspec:SVE_F
          [(match_dup 3)
+          (const_int SVE_RELAXED_GP)
           (match_operand:SVE_F 1 "register_operand")
           (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand")]
          UNSPEC_COND_FMUL))]
   }
 )
 
-;; Floating-point multiplication predicated with a PTRUE.
+;; Predicated floating-point multiplication.
 (define_insn_and_split "*mul<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z")
           (match_operand:SVE_F 2 "register_operand" "%0, w")
           (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w")]
          UNSPEC_COND_FMUL))]
   [(set (match_operand:SVE_F 0 "register_operand")
        (unspec:SVE_F
          [(match_dup 3)
+          (const_int SVE_RELAXED_GP)
           (match_operand:SVE_F 1 "register_operand")
           (match_operand:SVE_F 2 "register_operand")]
          UNSPEC_COND_FDIV))]
   }
 )
 
-;; Floating-point division predicated with a PTRUE.
+;; Predicated floating-point division.
 (define_insn "*div<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (match_operand:SI 4 "aarch64_sve_gp_strictness")
           (match_operand:SVE_F 2 "register_operand" "0, w, w")
           (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
          UNSPEC_COND_FDIV))]
   [(set (match_operand:SVE_F 0 "register_operand")
        (unspec:SVE_F
          [(match_dup 3)
+          (const_int SVE_RELAXED_GP)
           (match_operand:SVE_F 1 "register_operand")
           (match_operand:SVE_F 2 "register_operand")]
          SVE_COND_FP_MAXMIN_PUBLIC))]
   [(set (match_operand:SVE_F 0 "register_operand")
        (unspec:SVE_F
          [(match_dup 3)
+          (const_int SVE_RELAXED_GP)
           (match_operand:SVE_F 1 "register_operand")
           (match_operand:SVE_F 2 "register_operand")]
          SVE_COND_FP_MAXMIN_PUBLIC))]
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (match_operand:SI 4 "aarch64_sve_gp_strictness")
           (match_operand:SVE_F 2 "register_operand" "%0, w")
           (match_operand:SVE_F 3 "register_operand" "w, w")]
          SVE_COND_FP_MAXMIN_PUBLIC))]
   [(set (match_operand:SVE_F 0 "register_operand")
        (unspec:SVE_F
          [(match_dup 4)
+          (const_int SVE_RELAXED_GP)
           (match_operand:SVE_F 1 "register_operand")
           (match_operand:SVE_F 2 "register_operand")
           (match_operand:SVE_F 3 "register_operand")]
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (match_operand:SI 5 "aarch64_sve_gp_strictness")
           (match_operand:SVE_F 2 "register_operand" "%w, 0, w")
           (match_operand:SVE_F 3 "register_operand" "w, w, w")
           (match_operand:SVE_F 4 "register_operand" "0, w, w")]
          [(match_operand:<VPRED> 1 "register_operand")
           (unspec:SVE_F
             [(match_dup 1)
+             (const_int SVE_STRICT_GP)
              (match_operand:SVE_F 2 "register_operand")
              (match_operand:SVE_F 3 "register_operand")
              (match_operand:SVE_F 4 "register_operand")]
 
 ;; Predicated floating-point ternary operations, merging with the
 ;; first input.
-(define_insn "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (unspec:SVE_F
-            [(match_dup 1)
+            [(match_operand 5)
+             (match_operand:SI 6 "aarch64_sve_gp_strictness")
              (match_operand:SVE_F 2 "register_operand" "0, w")
              (match_operand:SVE_F 3 "register_operand" "w, w")
              (match_operand:SVE_F 4 "register_operand" "w, w")]
             SVE_COND_FP_TERNARY)
           (match_dup 2)]
          UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
   "@
    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
    movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
   [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated floating-point ternary operations, merging with the
 ;; third input.
-(define_insn "*cond_<optab><mode>_4"
+(define_insn_and_rewrite "*cond_<optab><mode>_4"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (unspec:SVE_F
-            [(match_dup 1)
+            [(match_operand 5)
+             (match_operand:SI 6 "aarch64_sve_gp_strictness")
              (match_operand:SVE_F 2 "register_operand" "w, w")
              (match_operand:SVE_F 3 "register_operand" "w, w")
              (match_operand:SVE_F 4 "register_operand" "0, w")]
             SVE_COND_FP_TERNARY)
           (match_dup 4)]
          UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
   "@
    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
   [(set_attr "movprfx" "*,yes")]
 )
 
        (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
           (unspec:SVE_F
-            [(match_dup 1)
+            [(match_operand 6)
+             (match_operand:SI 7 "aarch64_sve_gp_strictness")
              (match_operand:SVE_F 2 "register_operand" "w, w, w")
              (match_operand:SVE_F 3 "register_operand" "w, w, w")
              (match_operand:SVE_F 4 "register_operand" "w, w, w")]
   "TARGET_SVE
    && !rtx_equal_p (operands[2], operands[5])
    && !rtx_equal_p (operands[3], operands[5])
-   && !rtx_equal_p (operands[4], operands[5])"
+   && !rtx_equal_p (operands[4], operands[5])
+   && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
    #"
-  "&& reload_completed
-   && !CONSTANT_P (operands[5])
-   && !rtx_equal_p (operands[0], operands[5])"
+  "&& 1"
   {
-    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
-                                            operands[5], operands[1]));
-    operands[5] = operands[4] = operands[0];
+    if (reload_completed
+        && register_operand (operands[5], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[5]))
+      {
+       emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+                                                operands[5], operands[1]));
+       operands[5] = operands[4] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[6]))
+      operands[6] = copy_rtx (operands[1]);
+    else
+      FAIL;
   }
   [(set_attr "movprfx" "yes")]
 )
index 813d4877dca0b93d946fe0fb7e3231b490942f72..6efcb03004a7d7ccde6ffae4cf6bdcf8b8e80aa5 100644 (file)
@@ -2765,6 +2765,24 @@ aarch64_pfalse_reg (machine_mode mode)
   return gen_lowpart (mode, reg);
 }
 
+/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is
+   true, or alternatively if we know that the operation predicated by
+   PRED1[0] is safe to perform whenever PRED2 is true.  PRED1[1] is a
+   aarch64_sve_gp_strictness operand that describes the operation
+   predicated by PRED1[0].  */
+
+bool
+aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2)
+{
+  machine_mode mode = GET_MODE (pred2);
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+             && mode == GET_MODE (pred1[0])
+             && aarch64_sve_gp_strictness (pred1[1], SImode));
+  return (pred1[0] == CONSTM1_RTX (mode)
+         || INTVAL (pred1[1]) == SVE_RELAXED_GP
+         || rtx_equal_p (pred1[0], pred2));
+}
+
 /* Use a comparison to convert integer vector SRC into MODE, which is
    the corresponding SVE predicate mode.  Use TARGET for the result
    if it's nonnull and convenient.  */
index 3f802c79ee8cc9917bf3c4aa2c0a2514ffcbd277..f910166bfba6f56e26a6fd55b69beba7b1ce7662 100644 (file)
    ; Indicates that the predicate is known to be a PTRUE.
    (SVE_KNOWN_PTRUE 1)])
 
+;; These constants are used as a const_int in predicated SVE FP arithmetic
+;; to indicate whether the operation is allowed to make additional lanes
+;; active without worrying about the effect on faulting behavior.
+(define_constants
+  [; Indicates either that all lanes are active or that the instruction may
+   ; operate on inactive inputs even if doing so could induce a fault.
+   (SVE_RELAXED_GP 0)
+
+   ; Indicates that some lanes might be inactive and that the instruction
+   ; must not operate on inactive inputs if doing so could induce a fault.
+   (SVE_STRICT_GP 1)])
+
 ;; If further include files are added the defintion of MD_INCLUDES
 ;; must be updated.
 
index 9038dfb71a19f345e8a842342657b9f1f94e4afb..bfbe97db952af899dd1741d5c45a904729c3d206 100644 (file)
        (ior (match_test "INTVAL (op) == SVE_MAYBE_NOT_PTRUE")
            (match_test "INTVAL (op) == SVE_KNOWN_PTRUE"))))
 
+(define_predicate "aarch64_sve_gp_strictness"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == SVE_RELAXED_GP")
+           (match_test "INTVAL (op) == SVE_STRICT_GP"))))
+
 (define_predicate "aarch64_gather_scale_operand_w"
   (and (match_code "const_int")
        (match_test "INTVAL (op) == 1 || INTVAL (op) == 4")))