From c9c5a8090c58b84c1eb45e39e77eee223f992009 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 14 Aug 2019 08:16:04 +0000 Subject: [PATCH] [AArch64] Add a "GP strictness" operand to SVE FP unspecs This patch makes the SVE unary, binary and ternary FP unspecs take a new "GP strictness" operand that indicates whether the predicate has to be taken literally, or whether it is valid to make extra lanes active (up to and including using a PTRUE). This again is laying the groundwork for the ACLE patterns, in which the value can depend on the FP command-line flags. At the moment it's only needed for addition, subtraction and multiplication, which have unpredicated forms that can only be used when operating on all lanes is safe. But in future it might be useful for optimising predicate usage. The strict mode requires extra alternatives for addition, subtraction and multiplication, but I've left those for the main ACLE patch. 2019-08-14 Richard Sandiford Kugan Vivekanandarajah gcc/ * config/aarch64/aarch64.md (SVE_RELAXED_GP, SVE_STRICT_GP): New constants. * config/aarch64/predicates.md (aarch64_sve_gp_strictness): New predicate. * config/aarch64/aarch64-protos.h (aarch64_sve_pred_dominates_p): Declare. * config/aarch64/aarch64.c (aarch64_sve_pred_dominates_p): New function. * config/aarch64/aarch64-sve.md: Add a block comment about the handling of predicated FP operations. (2, add3) (sub3, mul3, div3) (3) (3) (4): Add an SVE_RELAXED_GP operand. (cond_) (cond_): Add an SVE_STRICT_GP operand. (*2) (*cond__2) (*cond__3) (*cond__any) (*fabd3, *div3) (*3) (*4) (*cond__2) (*cond__4) (*cond__any): Match the strictness operands. Use aarch64_sve_pred_dominates_p to check whether the predicate on the conditional operation is suitable for merging. Split patterns into the canonical equal-predicate form. (*add3, *sub3, *mul3): Likewise. Restrict the unpredicated alternatives to SVE_RELAXED_GP. Co-Authored-By: Kugan Vivekanandarajah From-SVN: r274418 --- gcc/ChangeLog | 38 +++++ gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-sve.md | 209 +++++++++++++++++++++++----- gcc/config/aarch64/aarch64.c | 18 +++ gcc/config/aarch64/aarch64.md | 12 ++ gcc/config/aarch64/predicates.md | 5 + 6 files changed, 248 insertions(+), 35 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6e6f9ed7df7..9a918d49896 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,41 @@ +2019-08-14 Richard Sandiford + Kugan Vivekanandarajah + + * config/aarch64/aarch64.md (SVE_RELAXED_GP, SVE_STRICT_GP): New + constants. + * config/aarch64/predicates.md (aarch64_sve_gp_strictness): New + predicate. + * config/aarch64/aarch64-protos.h (aarch64_sve_pred_dominates_p): + Declare. + * config/aarch64/aarch64.c (aarch64_sve_pred_dominates_p): New + function. + * config/aarch64/aarch64-sve.md: Add a block comment about the + handling of predicated FP operations. + (2, add3) + (sub3, mul3, div3) + (3) + (3) + (4): Add an SVE_RELAXED_GP + operand. + (cond_) + (cond_): Add an SVE_STRICT_GP + operand. + (*2) + (*cond__2) + (*cond__3) + (*cond__any) + (*fabd3, *div3) + (*3) + (*4) + (*cond__2) + (*cond__4) + (*cond__any): Match the + strictness operands. Use aarch64_sve_pred_dominates_p to check + whether the predicate on the conditional operation is suitable + for merging. Split patterns into the canonical equal-predicate form. + (*add3, *sub3, *mul3): Likewise. + Restrict the unpredicated alternatives to SVE_RELAXED_GP. + 2019-08-14 Richard Sandiford Kugan Vivekanandarajah diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 22a5f9433db..a4728e77c1d 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -554,6 +554,7 @@ rtx aarch64_ptrue_all (unsigned int); void aarch64_expand_mov_immediate (rtx, rtx); rtx aarch64_ptrue_reg (machine_mode); rtx aarch64_pfalse_reg (machine_mode); +bool aarch64_sve_pred_dominates_p (rtx *, rtx); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index a7ca1b85b91..9fb59024b44 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -24,6 +24,7 @@ ;; == General notes ;; ---- Note on the handling of big-endian SVE ;; ---- Description of UNSPEC_PTEST +;; ---- Note on predicated FP arithmetic patterns and GP "strictness" ;; ;; == Moves ;; ---- Moves of single vectors @@ -228,6 +229,83 @@ ;; SVE_MAYBE_NOT_PTRUE otherwise. ;; ;; - OP is the predicate we want to test, of the same mode as CAST_GP. +;; +;; ------------------------------------------------------------------------- +;; ---- Note on predicated FP arithmetic patterns and GP "strictness" +;; ------------------------------------------------------------------------- +;; +;; Most SVE floating-point operations are predicated. We can generate +;; them from four sources: +;; +;; (1) Using normal unpredicated optabs. In this case we need to create +;; an all-true predicate register to act as the governing predicate +;; for the SVE instruction. There are no inactive lanes, and thus +;; the values of inactive lanes don't matter. +;; +;; (2) Using _x ACLE functions. In this case the function provides a +;; specific predicate and some lanes might be inactive. However, +;; as for (1), the values of the inactive lanes don't matter. +;; +;; The instruction must have the same exception behavior as the +;; function call unless things like command-line flags specifically +;; allow otherwise. For example, with -ffast-math, it is OK to +;; raise exceptions for inactive lanes, but normally it isn't. +;; +;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. +;; These optabs have a predicate operand that specifies which lanes are +;; active and another operand that provides the values of inactive lanes. +;; +;; (4) Using _m and _z ACLE functions. These functions map to the same +;; patterns as (3), with the _z functions setting inactive lanes to zero +;; and the _m functions setting the inactive lanes to one of the function +;; arguments. +;; +;; So: +;; +;; - In (1), the predicate is known to be all true and the pattern can use +;; unpredicated operations where available. +;; +;; - In (2), the predicate might or might not be all true. The pattern can +;; use unpredicated instructions if the predicate is all-true or if things +;; like command-line flags allow exceptions for inactive lanes. +;; +;; - (3) and (4) represent a native SVE predicated operation. Some lanes +;; might be inactive and inactive lanes of the result must have specific +;; values. There is no scope for using unpredicated instructions (and no +;; reason to want to), so the question about command-line flags doesn't +;; arise. +;; +;; It would be inaccurate to model (2) as an rtx code like (sqrt ...) +;; in combination with a separate predicate operand, e.g. +;; +;; (unspec [(match_operand: 1 "register_operand" "Upl") +;; (sqrt:SVE_F 2 "register_operand" "w")] +;; ....) +;; +;; because (sqrt ...) can raise an exception for any lane, including +;; inactive ones. We therefore need to use an unspec instead. +;; +;; Also, (2) requires some way of distinguishing the case in which the +;; predicate might have inactive lanes and cannot be changed from the +;; case in which the predicate has no inactive lanes or can be changed. +;; This information is also useful when matching combined FP patterns +;; in which the predicates might not be equal. +;; +;; We therefore model FP operations as an unspec of the form: +;; +;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_) +;; +;; where: +;; +;; - PRED is the governing predicate. +;; +;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the +;; value SVE_STRICT_GP if PRED might have inactive lanes and if those +;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise. +;; +;; - OP0 OP1 ... are the normal input operands to the operation. +;; +;; - MNEMONIC is the mnemonic of the associated SVE instruction. ;; ========================================================================= ;; == Moves @@ -1290,6 +1368,7 @@ [(set (match_operand:SVE_F 0 "register_operand") (unspec:SVE_F [(match_dup 2) + (const_int SVE_RELAXED_GP) (match_operand:SVE_F 1 "register_operand")] SVE_COND_FP_UNARY))] "TARGET_SVE" @@ -1303,6 +1382,7 @@ [(set (match_operand:SVE_F 0 "register_operand" "=w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl") + (match_operand:SI 3 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "w")] SVE_COND_FP_UNARY))] "TARGET_SVE" @@ -1964,6 +2044,7 @@ [(match_operand: 1 "register_operand") (unspec:SVE_F [(match_dup 1) + (const_int SVE_STRICT_GP) (match_operand:SVE_F 2 "register_operand") (match_operand:SVE_F 3 "register_operand")] SVE_COND_FP_BINARY) @@ -1973,40 +2054,50 @@ ) ;; Predicated floating-point operations, merging with the first input. -(define_insn "*cond__2" +(define_insn_and_rewrite "*cond__2" [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_F - [(match_dup 1) + [(match_operand 4) + (match_operand:SI 5 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "0, w") (match_operand:SVE_F 3 "register_operand" "w, w")] SVE_COND_FP_BINARY) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE" + "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" "@ \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." + "&& !rtx_equal_p (operands[1], operands[4])" + { + operands[4] = copy_rtx (operands[1]); + } [(set_attr "movprfx" "*,yes")] ) ;; Predicated floating-point operations, merging with the second input. -(define_insn "*cond__3" +(define_insn_and_rewrite "*cond__3" [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_F - [(match_dup 1) + [(match_operand 4) + (match_operand:SI 5 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "w, w") (match_operand:SVE_F 3 "register_operand" "0, w")] SVE_COND_FP_BINARY) (match_dup 3)] UNSPEC_SEL))] - "TARGET_SVE" + "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" "@ \t%0., %1/m, %0., %2. movprfx\t%0, %3\;\t%0., %1/m, %0., %2." + "&& !rtx_equal_p (operands[1], operands[4])" + { + operands[4] = copy_rtx (operands[1]); + } [(set_attr "movprfx" "*,yes")] ) @@ -2016,7 +2107,8 @@ (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") (unspec:SVE_F - [(match_dup 1) + [(match_operand 5) + (match_operand:SI 6 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "0, w, w, w, w") (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")] SVE_COND_FP_BINARY) @@ -2024,20 +2116,28 @@ UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4]) - && !rtx_equal_p (operands[3], operands[4])" + && !rtx_equal_p (operands[3], operands[4]) + && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" "@ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. #" - "&& reload_completed - && register_operand (operands[4], mode) - && !rtx_equal_p (operands[0], operands[4])" + "&& 1" { - emit_insn (gen_vcond_mask_ (operands[0], operands[2], - operands[4], operands[1])); - operands[4] = operands[2] = operands[0]; + if (reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])) + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + else if (!rtx_equal_p (operands[1], operands[5])) + operands[5] = copy_rtx (operands[1]); + else + FAIL; } [(set_attr "movprfx" "yes")] ) @@ -2055,6 +2155,7 @@ [(set (match_operand:SVE_F 0 "register_operand") (unspec:SVE_F [(match_dup 3) + (const_int SVE_RELAXED_GP) (match_operand:SVE_F 1 "register_operand") (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand")] UNSPEC_COND_FADD))] @@ -2064,11 +2165,12 @@ } ) -;; Floating-point addition predicated with a PTRUE. +;; Predicated floating-point addition. (define_insn_and_split "*add3" [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z") (match_operand:SVE_F 2 "register_operand" "%0, 0, w") (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w")] UNSPEC_COND_FADD))] @@ -2100,6 +2202,7 @@ [(set (match_operand:SVE_F 0 "register_operand") (unspec:SVE_F [(match_dup 3) + (const_int SVE_RELAXED_GP) (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand") (match_operand:SVE_F 2 "register_operand")] UNSPEC_COND_FSUB))] @@ -2109,11 +2212,12 @@ } ) -;; Floating-point subtraction predicated with a PTRUE. +;; Predicated floating-point subtraction. (define_insn_and_split "*sub3" [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") + (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, i, Z") (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w") (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w")] UNSPEC_COND_FSUB))] @@ -2143,18 +2247,24 @@ ;; ------------------------------------------------------------------------- ;; Predicated floating-point absolute difference. -(define_insn "*fabd3" +(define_insn_and_rewrite "*fabd3" [(set (match_operand:SVE_F 0 "register_operand" "=w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl") + (match_operand:SI 4 "aarch64_sve_gp_strictness") (unspec:SVE_F - [(match_dup 1) + [(match_operand 5) + (match_operand:SI 6 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "0") (match_operand:SVE_F 3 "register_operand" "w")] UNSPEC_COND_FSUB)] UNSPEC_COND_FABS))] - "TARGET_SVE" + "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" "fabd\t%0., %1/m, %2., %3." + "&& !rtx_equal_p (operands[1], operands[5])" + { + operands[5] = copy_rtx (operands[1]); + } ) ;; ------------------------------------------------------------------------- @@ -2169,6 +2279,7 @@ [(set (match_operand:SVE_F 0 "register_operand") (unspec:SVE_F [(match_dup 3) + (const_int SVE_RELAXED_GP) (match_operand:SVE_F 1 "register_operand") (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand")] UNSPEC_COND_FMUL))] @@ -2178,11 +2289,12 @@ } ) -;; Floating-point multiplication predicated with a PTRUE. +;; Predicated floating-point multiplication. (define_insn_and_split "*mul3" [(set (match_operand:SVE_F 0 "register_operand" "=w, w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl") + (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z") (match_operand:SVE_F 2 "register_operand" "%0, w") (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w")] UNSPEC_COND_FMUL))] @@ -2212,6 +2324,7 @@ [(set (match_operand:SVE_F 0 "register_operand") (unspec:SVE_F [(match_dup 3) + (const_int SVE_RELAXED_GP) (match_operand:SVE_F 1 "register_operand") (match_operand:SVE_F 2 "register_operand")] UNSPEC_COND_FDIV))] @@ -2221,11 +2334,12 @@ } ) -;; Floating-point division predicated with a PTRUE. +;; Predicated floating-point division. (define_insn "*div3" [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (match_operand:SI 4 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "0, w, w") (match_operand:SVE_F 3 "register_operand" "w, 0, w")] UNSPEC_COND_FDIV))] @@ -2334,6 +2448,7 @@ [(set (match_operand:SVE_F 0 "register_operand") (unspec:SVE_F [(match_dup 3) + (const_int SVE_RELAXED_GP) (match_operand:SVE_F 1 "register_operand") (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_MAXMIN_PUBLIC))] @@ -2348,6 +2463,7 @@ [(set (match_operand:SVE_F 0 "register_operand") (unspec:SVE_F [(match_dup 3) + (const_int SVE_RELAXED_GP) (match_operand:SVE_F 1 "register_operand") (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_MAXMIN_PUBLIC))] @@ -2362,6 +2478,7 @@ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl") + (match_operand:SI 4 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "%0, w") (match_operand:SVE_F 3 "register_operand" "w, w")] SVE_COND_FP_MAXMIN_PUBLIC))] @@ -2612,6 +2729,7 @@ [(set (match_operand:SVE_F 0 "register_operand") (unspec:SVE_F [(match_dup 4) + (const_int SVE_RELAXED_GP) (match_operand:SVE_F 1 "register_operand") (match_operand:SVE_F 2 "register_operand") (match_operand:SVE_F 3 "register_operand")] @@ -2627,6 +2745,7 @@ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (match_operand:SI 5 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "%w, 0, w") (match_operand:SVE_F 3 "register_operand" "w, w, w") (match_operand:SVE_F 4 "register_operand" "0, w, w")] @@ -2646,6 +2765,7 @@ [(match_operand: 1 "register_operand") (unspec:SVE_F [(match_dup 1) + (const_int SVE_STRICT_GP) (match_operand:SVE_F 2 "register_operand") (match_operand:SVE_F 3 "register_operand") (match_operand:SVE_F 4 "register_operand")] @@ -2662,43 +2782,53 @@ ;; Predicated floating-point ternary operations, merging with the ;; first input. -(define_insn "*cond__2" +(define_insn_and_rewrite "*cond__2" [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_F - [(match_dup 1) + [(match_operand 5) + (match_operand:SI 6 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "0, w") (match_operand:SVE_F 3 "register_operand" "w, w") (match_operand:SVE_F 4 "register_operand" "w, w")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE" + "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" "@ \t%0., %1/m, %3., %4. movprfx\t%0, %2\;\t%0., %1/m, %3., %4." + "&& !rtx_equal_p (operands[1], operands[5])" + { + operands[5] = copy_rtx (operands[1]); + } [(set_attr "movprfx" "*,yes")] ) ;; Predicated floating-point ternary operations, merging with the ;; third input. -(define_insn "*cond__4" +(define_insn_and_rewrite "*cond__4" [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_F - [(match_dup 1) + [(match_operand 5) + (match_operand:SI 6 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "w, w") (match_operand:SVE_F 3 "register_operand" "w, w") (match_operand:SVE_F 4 "register_operand" "0, w")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] - "TARGET_SVE" + "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" "@ \t%0., %1/m, %2., %3. movprfx\t%0, %4\;\t%0., %1/m, %2., %3." + "&& !rtx_equal_p (operands[1], operands[5])" + { + operands[5] = copy_rtx (operands[1]); + } [(set_attr "movprfx" "*,yes")] ) @@ -2709,7 +2839,8 @@ (unspec:SVE_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_F - [(match_dup 1) + [(match_operand 6) + (match_operand:SI 7 "aarch64_sve_gp_strictness") (match_operand:SVE_F 2 "register_operand" "w, w, w") (match_operand:SVE_F 3 "register_operand" "w, w, w") (match_operand:SVE_F 4 "register_operand" "w, w, w")] @@ -2719,18 +2850,26 @@ "TARGET_SVE && !rtx_equal_p (operands[2], operands[5]) && !rtx_equal_p (operands[3], operands[5]) - && !rtx_equal_p (operands[4], operands[5])" + && !rtx_equal_p (operands[4], operands[5]) + && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" "@ movprfx\t%0., %1/z, %4.\;\t%0., %1/m, %2., %3. movprfx\t%0., %1/m, %4.\;\t%0., %1/m, %2., %3. #" - "&& reload_completed - && !CONSTANT_P (operands[5]) - && !rtx_equal_p (operands[0], operands[5])" + "&& 1" { - emit_insn (gen_vcond_mask_ (operands[0], operands[4], - operands[5], operands[1])); - operands[5] = operands[4] = operands[0]; + if (reload_completed + && register_operand (operands[5], mode) + && !rtx_equal_p (operands[0], operands[5])) + { + emit_insn (gen_vcond_mask_ (operands[0], operands[4], + operands[5], operands[1])); + operands[5] = operands[4] = operands[0]; + } + else if (!rtx_equal_p (operands[1], operands[6])) + operands[6] = copy_rtx (operands[1]); + else + FAIL; } [(set_attr "movprfx" "yes")] ) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 813d4877dca..6efcb03004a 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2765,6 +2765,24 @@ aarch64_pfalse_reg (machine_mode mode) return gen_lowpart (mode, reg); } +/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is + true, or alternatively if we know that the operation predicated by + PRED1[0] is safe to perform whenever PRED2 is true. PRED1[1] is a + aarch64_sve_gp_strictness operand that describes the operation + predicated by PRED1[0]. */ + +bool +aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2) +{ + machine_mode mode = GET_MODE (pred2); + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL + && mode == GET_MODE (pred1[0]) + && aarch64_sve_gp_strictness (pred1[1], SImode)); + return (pred1[0] == CONSTM1_RTX (mode) + || INTVAL (pred1[1]) == SVE_RELAXED_GP + || rtx_equal_p (pred1[0], pred2)); +} + /* Use a comparison to convert integer vector SRC into MODE, which is the corresponding SVE predicate mode. Use TARGET for the result if it's nonnull and convenient. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 3f802c79ee8..f910166bfba 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -268,6 +268,18 @@ ; Indicates that the predicate is known to be a PTRUE. (SVE_KNOWN_PTRUE 1)]) +;; These constants are used as a const_int in predicated SVE FP arithmetic +;; to indicate whether the operation is allowed to make additional lanes +;; active without worrying about the effect on faulting behavior. +(define_constants + [; Indicates either that all lanes are active or that the instruction may + ; operate on inactive inputs even if doing so could induce a fault. + (SVE_RELAXED_GP 0) + + ; Indicates that some lanes might be inactive and that the instruction + ; must not operate on inactive inputs if doing so could induce a fault. + (SVE_STRICT_GP 1)]) + ;; If further include files are added the defintion of MD_INCLUDES ;; must be updated. diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 9038dfb71a1..bfbe97db952 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -689,6 +689,11 @@ (ior (match_test "INTVAL (op) == SVE_MAYBE_NOT_PTRUE") (match_test "INTVAL (op) == SVE_KNOWN_PTRUE")))) +(define_predicate "aarch64_sve_gp_strictness" + (and (match_code "const_int") + (ior (match_test "INTVAL (op) == SVE_RELAXED_GP") + (match_test "INTVAL (op) == SVE_STRICT_GP")))) + (define_predicate "aarch64_gather_scale_operand_w" (and (match_code "const_int") (match_test "INTVAL (op) == 1 || INTVAL (op) == 4"))) -- 2.30.2