From 0eb5e901f6e25a7b8a9790a7a8c209147fb649ec Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 2 Oct 2020 11:53:06 +0100 Subject: [PATCH] aarch64: Remove aarch64_sve_pred_dominates_p MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In r11-2922, Przemek fixed a post-RA instruction match failure caused by the SVE FP subtraction patterns.. This patch applies the same fix to the other patterns. To recap, the issue is around the handling of predication. We want to do two things: - Optimise cases in which a predicate is known to be all-true. - Differentiate cases in which the predicate on an _x ACLE function has to be kept as-is from cases in which we can make more lanes active. The former is true by default, the latter is true for certain combinations of flags in the -ffast-math group. This is handled by a boolean flag in the unspecs to say whether the predicate is “strict” or “relaxed”. When combining multiple strict operations, the predicates used in the operations generally need to match. When combining multiple relaxed operations, we can ignore the predicates on nested operations and just use the predicate on the “outermost” operation. Originally I'd tried to reduce combinatorial explosion by using aarch64_sve_pred_dominates_p. This required matching predicates for strict operations but allowed more combinations for relaxed operations. The problem (as I should have remembered) is that C conditions on insn patterns can't reliably enforce matching operands. If the same register is used in two different input operands, the RA is allowed to use different hard registers for those input operands (and sometimes it has to). So operands that match before RA might not match afterwards. The only sure way to force a match is via match_dup. This patch splits the cases into two. I cry bitter tears at having to do this, but I think it's the only backportable fix. There might be some way of using define_subst to generate the cond_* patterns from the pred_* patterns, with some alternatives strategically disabled in each case, but that's future work and might not be an improvement. Since so many patterns now do this, I moved the comments from the subtraction pattern to a new banner comment at the head of the file. gcc/ * config/aarch64/aarch64-protos.h (aarch64_sve_pred_dominates_p): Delete. * config/aarch64/aarch64.c (aarch64_sve_pred_dominates_p): Likewise. * config/aarch64/aarch64-sve.md: Add banner comment describing how merging predicated FP operations are represented. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__2_const): Split into... (*cond__2_const_relaxed): ...this and... (*cond__2_const_strict): ...this. (*cond__3): Split into... (*cond__3_relaxed): ...this and... (*cond__3_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond__any_const): Split into... (*cond__any_const_relaxed): ...this and... (*cond__any_const_strict): ...this. (*cond_add_2_const): Split into... (*cond_add_2_const_relaxed): ...this and... (*cond_add_2_const_strict): ...this. (*cond_add_any_const): Split into... (*cond_add_any_const_relaxed): ...this and... (*cond_add_any_const_strict): ...this. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond_sub_3_const): Split into... (*cond_sub_3_const_relaxed): ...this and... (*cond_sub_3_const_strict): ...this. (*aarch64_pred_abd): Split into... (*aarch64_pred_abd_relaxed): ...this and... (*aarch64_pred_abd_strict): ...this. (*aarch64_cond_abd_2): Split into... (*aarch64_cond_abd_2_relaxed): ...this and... (*aarch64_cond_abd_2_strict): ...this. (*aarch64_cond_abd_3): Split into... (*aarch64_cond_abd_3_relaxed): ...this and... (*aarch64_cond_abd_3_strict): ...this. (*aarch64_cond_abd_any): Split into... (*aarch64_cond_abd_any_relaxed): ...this and... (*aarch64_cond_abd_any_strict): ...this. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__4): Split into... (*cond__4_relaxed): ...this and... (*cond__4_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond__4): Split into... (*cond__4_relaxed): ...this and... (*cond__4_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*aarch64_pred_fac): Split into... (*aarch64_pred_fac_relaxed): ...this and... (*aarch64_pred_fac_strict): ...this. (*cond__nontrunc): Split into... (*cond__nontrunc_relaxed): ...this and... (*cond__nontrunc_strict): ...this. (*cond__nonextend): Split into... (*cond__nonextend_relaxed): ...this and... (*cond__nonextend_strict): ...this. * config/aarch64/aarch64-sve2.md (*cond_): Split into... (*cond__relaxed): ...this and... (*cond__strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond_): Split into... (*cond__relaxed): ...this and... (*cond__strict): ...this. --- gcc/config/aarch64/aarch64-protos.h | 1 - gcc/config/aarch64/aarch64-sve.md | 923 ++++++++++++++++++++++++---- gcc/config/aarch64/aarch64-sve2.md | 73 ++- gcc/config/aarch64/aarch64.c | 18 - 4 files changed, 853 insertions(+), 162 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 302e09b202f..7a34c841355 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -630,7 +630,6 @@ void aarch64_expand_mov_immediate (rtx, rtx); rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type); rtx aarch64_ptrue_reg (machine_mode); rtx aarch64_pfalse_reg (machine_mode); -bool aarch64_sve_pred_dominates_p (rtx *, rtx); bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index cd79aba90ec..31a8c5a5aef 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -464,6 +464,95 @@ ;; ;; - MNEMONIC is the mnemonic of the associated SVE instruction. ;; +;; For (3) and (4), we combine these operations with an UNSPEC_SEL +;; that selects between the result of the FP operation and the "else" +;; value. (This else value is a merge input for _m ACLE functions +;; and zero for _z ACLE functions.) The outer pattern then has the form: +;; +;; (unspec [pred fp_operation else_value] UNSPEC_SEL) +;; +;; This means that the patterns for (3) and (4) have two predicates: +;; one for the FP operation itself and one for the UNSPEC_SEL. +;; This pattern is equivalent to the result of combining an instance +;; of (1) or (2) with a separate vcond instruction, so these patterns +;; are useful as combine targets too. +;; +;; However, in the combine case, the instructions that we want to +;; combine might use different predicates. Then: +;; +;; - Some of the active lanes of the FP operation might be discarded +;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes, +;; even for SVE_STRICT_GP, since the operations on those lanes are +;; effectively dead code. +;; +;; - Some of the inactive lanes of the FP operation might be selected +;; by the UNSPEC_SEL, giving unspecified values for those lanes. +;; SVE_RELAXED_GP lets us extend the FP operation to cover these +;; extra lanes, but SVE_STRICT_GP does not. +;; +;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation +;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate. +;; This typically leads to patterns like: +;; +;; (unspec [(match_operand 1 "register_operand" "Upl") +;; (unspec [(match_operand N) +;; (const_int SVE_RELAXED_GP) +;; ...] +;; UNSPEC_COND_) +;; ...]) +;; +;; where operand N is allowed to be anything. These instructions then +;; have rewrite rules to replace operand N with operand 1, which gives the +;; instructions a canonical form and means that the original operand N is +;; not kept live unnecessarily. +;; +;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be +;; a subset of the FP operation predicate. This case isn't interesting +;; for FP operations that have an all-true predicate, since such operations +;; use SVE_RELAXED_GP instead. And it is not possible for instruction +;; conditions to track the subset relationship for arbitrary registers. +;; So in practice, the only useful case for SVE_STRICT_GP is the one +;; in which the predicates match: +;; +;; (unspec [(match_operand 1 "register_operand" "Upl") +;; (unspec [(match_dup 1) +;; (const_int SVE_STRICT_GP) +;; ...] +;; UNSPEC_COND_) +;; ...]) +;; +;; This pattern would also be correct for SVE_RELAXED_GP, but it would +;; be redundant with the one above. However, if the combine pattern +;; has multiple FP operations, using a match_operand allows combinations +;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided +;; that the predicates are the same: +;; +;; (unspec [(match_operand 1 "register_operand" "Upl") +;; (... +;; (unspec [(match_dup 1) +;; (match_operand:SI N "aarch64_sve_gp_strictness") +;; ...] +;; UNSPEC_COND_) +;; (unspec [(match_dup 1) +;; (match_operand:SI M "aarch64_sve_gp_strictness") +;; ...] +;; UNSPEC_COND_) ...) +;; ...]) +;; +;; The fully-relaxed version of this pattern is: +;; +;; (unspec [(match_operand 1 "register_operand" "Upl") +;; (... +;; (unspec [(match_operand:SI N) +;; (const_int SVE_RELAXED_GP) +;; ...] +;; UNSPEC_COND_) +;; (unspec [(match_operand:SI M) +;; (const_int SVE_RELAXED_GP) +;; ...] +;; UNSPEC_COND_) ...) +;; ...]) +;; ;; ------------------------------------------------------------------------- ;; ---- Note on FFR handling ;; ------------------------------------------------------------------------- @@ -3304,18 +3393,18 @@ ) ;; Predicated floating-point unary arithmetic, merging with the first input. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 3) - (match_operand:SI 4 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] SVE_COND_FP_UNARY) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0. movprfx\t%0, %2\;\t%0., %1/m, %2." @@ -3326,6 +3415,24 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] + SVE_COND_FP_UNARY) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0. + movprfx\t%0, %2\;\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point unary arithmetic, merging with an independent ;; value. ;; @@ -3334,20 +3441,18 @@ ;; which is handled above rather than here. Marking all the alternatives ;; as earlyclobber helps to make the instruction more regular to the ;; register allocator. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] SVE_COND_FP_UNARY) (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[3]) - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. @@ -3359,6 +3464,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] + SVE_COND_FP_UNARY) + (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" + "@ + \t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. + movprfx\t%0, %3\;\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Square root ;; ------------------------------------------------------------------------- @@ -4649,19 +4773,19 @@ ;; Predicated floating-point binary operations that take an integer as their ;; second operand, with inactive lanes coming from the first operand. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand: 3 "register_operand" "w, w")] SVE_COND_FP_BINARY_INT) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." @@ -4672,24 +4796,41 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand: 3 "register_operand" "w, w")] + SVE_COND_FP_BINARY_INT) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0., %3. + movprfx\t%0, %2\;\t%0., %1/m, %0., %3." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point binary operations that take an integer as ;; their second operand, with the values of inactive lanes being distinct ;; from the other inputs. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") (match_operand: 3 "register_operand" "w, w, w, w")] SVE_COND_FP_BINARY_INT) (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" "@ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. @@ -4713,6 +4854,35 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") + (match_operand: 3 "register_operand" "w, w, w, w")] + SVE_COND_FP_BINARY_INT) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" + "@ + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. + movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] General binary arithmetic corresponding to rtx codes ;; ------------------------------------------------------------------------- @@ -4813,19 +4983,19 @@ ) ;; Predicated floating-point operations, merging with the first input. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] SVE_COND_FP_BINARY) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." @@ -4836,20 +5006,39 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] + SVE_COND_FP_BINARY) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0., %3. + movprfx\t%0, %2\;\t%0., %1/m, %0., %3." + [(set_attr "movprfx" "*,yes")] +) + ;; Same for operations that take a 1-bit constant. -(define_insn_and_rewrite "*cond__2_const" +(define_insn_and_rewrite "*cond__2_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "")] SVE_COND_FP_BINARY_I1) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0., #%3 movprfx\t%0, %2\;\t%0., %1/m, %0., #%3" @@ -4860,20 +5049,39 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "")] + SVE_COND_FP_BINARY_I1) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0., #%3 + movprfx\t%0, %2\;\t%0., %1/m, %0., #%3" + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point operations, merging with the second input. -(define_insn_and_rewrite "*cond__3" +(define_insn_and_rewrite "*cond__3_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w") (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] SVE_COND_FP_BINARY) (match_dup 3)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0., %2. movprfx\t%0, %3\;\t%0., %1/m, %0., %2." @@ -4884,14 +5092,33 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__3_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] + SVE_COND_FP_BINARY) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0., %2. + movprfx\t%0, %3\;\t%0., %1/m, %0., %2." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point operations, merging with an independent value. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] SVE_COND_FP_BINARY) @@ -4899,8 +5126,7 @@ UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4]) - && !rtx_equal_p (operands[3], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + && !rtx_equal_p (operands[3], operands[4])" "@ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. @@ -4925,22 +5151,52 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] + SVE_COND_FP_BINARY) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE + && !rtx_equal_p (operands[2], operands[4]) + && !rtx_equal_p (operands[3], operands[4])" + "@ + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. + movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; Same for operations that take a 1-bit constant. -(define_insn_and_rewrite "*cond__any_const" +(define_insn_and_rewrite "*cond__any_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") (match_operand:SVE_FULL_F 3 "")] SVE_COND_FP_BINARY_I1) (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" "@ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 @@ -4963,6 +5219,34 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") + (match_operand:SVE_FULL_F 3 "")] + SVE_COND_FP_BINARY_I1) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" + "@ + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 + movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Addition ;; ------------------------------------------------------------------------- @@ -5001,19 +5285,19 @@ ;; Predicated floating-point addition of a constant, merging with the ;; first input. -(define_insn_and_rewrite "*cond_add_2_const" +(define_insn_and_rewrite "*cond_add_2_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] UNSPEC_COND_FADD) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ fadd\t%0., %1/m, %0., #%3 fsub\t%0., %1/m, %0., #%N3 @@ -5026,23 +5310,42 @@ [(set_attr "movprfx" "*,*,yes,yes")] ) +(define_insn "*cond_add_2_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") + (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] + UNSPEC_COND_FADD) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fadd\t%0., %1/m, %0., #%3 + fsub\t%0., %1/m, %0., #%N3 + movprfx\t%0, %2\;fadd\t%0., %1/m, %0., #%3 + movprfx\t%0, %2\;fsub\t%0., %1/m, %0., #%N3" + [(set_attr "movprfx" "*,*,yes,yes")] +) + ;; Predicated floating-point addition of a constant, merging with an ;; independent value. -(define_insn_and_rewrite "*cond_add_any_const" +(define_insn_and_rewrite "*cond_add_any_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] UNSPEC_COND_FADD) (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" "@ movprfx\t%0., %1/z, %2.\;fadd\t%0., %1/m, %0., #%3 movprfx\t%0., %1/z, %2.\;fsub\t%0., %1/m, %0., #%N3 @@ -5068,6 +5371,37 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond_add_any_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") + (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] + UNSPEC_COND_FADD) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" + "@ + movprfx\t%0., %1/z, %2.\;fadd\t%0., %1/m, %0., #%3 + movprfx\t%0., %1/z, %2.\;fsub\t%0., %1/m, %0., #%N3 + movprfx\t%0., %1/m, %2.\;fadd\t%0., %1/m, %0., #%3 + movprfx\t%0., %1/m, %2.\;fsub\t%0., %1/m, %0., #%N3 + # + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; Register merging forms are handled through SVE_COND_FP_BINARY. ;; ------------------------------------------------------------------------- @@ -5110,19 +5444,19 @@ ) ;; Predicated FCADD, merging with the first input. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] SVE_COND_FCADD) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ fcadd\t%0., %1/m, %0., %3., # movprfx\t%0, %2\;fcadd\t%0., %1/m, %0., %3., #" @@ -5133,22 +5467,39 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] + SVE_COND_FCADD) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fcadd\t%0., %1/m, %0., %3., # + movprfx\t%0, %2\;fcadd\t%0., %1/m, %0., %3., #" + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated FCADD, merging with an independent value. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] SVE_COND_FCADD) (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" "@ movprfx\t%0., %1/z, %2.\;fcadd\t%0., %1/m, %0., %3., # movprfx\t%0., %1/z, %0.\;fcadd\t%0., %1/m, %0., %3., # @@ -5172,6 +5523,35 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] + SVE_COND_FCADD) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" + "@ + movprfx\t%0., %1/z, %2.\;fcadd\t%0., %1/m, %0., %3., # + movprfx\t%0., %1/z, %0.\;fcadd\t%0., %1/m, %0., %3., # + movprfx\t%0., %1/m, %2.\;fcadd\t%0., %1/m, %0., %3., # + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Subtraction ;; ------------------------------------------------------------------------- @@ -5209,19 +5589,19 @@ ;; Predicated floating-point subtraction from a constant, merging with the ;; second input. -(define_insn_and_rewrite "*cond_sub_3_const" +(define_insn_and_rewrite "*cond_sub_3_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] UNSPEC_COND_FSUB) (match_dup 3)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ fsubr\t%0., %1/m, %0., #%2 movprfx\t%0, %3\;fsubr\t%0., %1/m, %0., #%2" @@ -5232,12 +5612,28 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond_sub_3_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] + UNSPEC_COND_FSUB) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fsubr\t%0., %1/m, %0., #%2 + movprfx\t%0, %3\;fsubr\t%0., %1/m, %0., #%2" + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point subtraction from a constant, merging with an ;; independent value. -;; -;; The subtraction predicate and the merge predicate are allowed to be -;; different. -(define_insn_and_rewrite "*cond_sub_relaxed_const" +(define_insn_and_rewrite "*cond_sub_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") @@ -5272,11 +5668,7 @@ [(set_attr "movprfx" "yes")] ) -;; Predicated floating-point subtraction from a constant, merging with an -;; independent value. -;; -;; The subtraction predicate and the merge predicate must be the same. -(define_insn_and_rewrite "*cond_sub_strict_const" +(define_insn_and_rewrite "*cond_sub_const_strict" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") @@ -5329,19 +5721,19 @@ ) ;; Predicated floating-point absolute difference. -(define_insn_and_rewrite "*aarch64_pred_abd" +(define_insn_and_rewrite "*aarch64_pred_abd_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (match_operand:SI 4 "aarch64_sve_gp_strictness") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] UNSPEC_COND_FSUB)] UNSPEC_COND_FABS))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE" "@ fabd\t%0., %1/m, %0., %3. movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." @@ -5352,6 +5744,25 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*aarch64_pred_abd_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (match_operand:SI 4 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] + UNSPEC_COND_FSUB)] + UNSPEC_COND_FABS))] + "TARGET_SVE" + "@ + fabd\t%0., %1/m, %0., %3. + movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." + [(set_attr "movprfx" "*,yes")] +) + (define_expand "@aarch64_cond_abd" [(set (match_operand:SVE_FULL_F 0 "register_operand") (unspec:SVE_FULL_F @@ -5376,82 +5787,124 @@ ;; Predicated floating-point absolute difference, merging with the first ;; input. -(define_insn_and_rewrite "*aarch64_cond_abd_2" +(define_insn_and_rewrite "*aarch64_cond_abd_2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (unspec:SVE_FULL_F - [(match_operand 6) - (match_operand:SI 7 "aarch64_sve_gp_strictness") + [(match_operand 5) + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] UNSPEC_COND_FSUB)] UNSPEC_COND_FABS) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE - && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) - && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" + "TARGET_SVE" "@ fabd\t%0., %1/m, %0., %3. movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." "&& (!rtx_equal_p (operands[1], operands[4]) - || !rtx_equal_p (operands[1], operands[6]))" + || !rtx_equal_p (operands[1], operands[5]))" { operands[4] = copy_rtx (operands[1]); - operands[6] = copy_rtx (operands[1]); + operands[5] = copy_rtx (operands[1]); } [(set_attr "movprfx" "*,yes")] ) +(define_insn "*aarch64_cond_abd_2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 4 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] + UNSPEC_COND_FSUB)] + UNSPEC_COND_FABS) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fabd\t%0., %1/m, %0., %3. + movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point absolute difference, merging with the second ;; input. -(define_insn_and_rewrite "*aarch64_cond_abd_3" +(define_insn_and_rewrite "*aarch64_cond_abd_3_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (unspec:SVE_FULL_F - [(match_operand 6) - (match_operand:SI 7 "aarch64_sve_gp_strictness") + [(match_operand 5) + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w") (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] UNSPEC_COND_FSUB)] UNSPEC_COND_FABS) (match_dup 3)] UNSPEC_SEL))] - "TARGET_SVE - && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) - && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" + "TARGET_SVE" "@ fabd\t%0., %1/m, %0., %2. movprfx\t%0, %3\;fabd\t%0., %1/m, %0., %2." "&& (!rtx_equal_p (operands[1], operands[4]) - || !rtx_equal_p (operands[1], operands[6]))" + || !rtx_equal_p (operands[1], operands[5]))" { operands[4] = copy_rtx (operands[1]); - operands[6] = copy_rtx (operands[1]); + operands[5] = copy_rtx (operands[1]); } [(set_attr "movprfx" "*,yes")] ) +(define_insn "*aarch64_cond_abd_3_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 4 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] + UNSPEC_COND_FSUB)] + UNSPEC_COND_FABS) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fabd\t%0., %1/m, %0., %2. + movprfx\t%0, %3\;fabd\t%0., %1/m, %0., %2." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point absolute difference, merging with an ;; independent value. -(define_insn_and_rewrite "*aarch64_cond_abd_any" +(define_insn_and_rewrite "*aarch64_cond_abd_any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (unspec:SVE_FULL_F - [(match_operand 7) - (match_operand:SI 8 "aarch64_sve_gp_strictness") + [(match_operand 6) + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] UNSPEC_COND_FSUB)] @@ -5460,9 +5913,7 @@ UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4]) - && !rtx_equal_p (operands[3], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) - && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" + && !rtx_equal_p (operands[3], operands[4])" "@ movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %2. @@ -5472,18 +5923,18 @@ "&& 1" { if (reload_completed - && register_operand (operands[4], mode) - && !rtx_equal_p (operands[0], operands[4])) + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])) { emit_insn (gen_vcond_mask_ (operands[0], operands[3], operands[4], operands[1])); operands[4] = operands[3] = operands[0]; } else if (!rtx_equal_p (operands[1], operands[5]) - || !rtx_equal_p (operands[1], operands[7])) + || !rtx_equal_p (operands[1], operands[6])) { operands[5] = copy_rtx (operands[1]); - operands[7] = copy_rtx (operands[1]); + operands[6] = copy_rtx (operands[1]); } else FAIL; @@ -5491,6 +5942,42 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*aarch64_cond_abd_any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 6 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] + UNSPEC_COND_FSUB)] + UNSPEC_COND_FABS) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE + && !rtx_equal_p (operands[2], operands[4]) + && !rtx_equal_p (operands[3], operands[4])" + "@ + movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %3. + movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %2. + movprfx\t%0., %1/z, %2.\;fabd\t%0., %1/m, %0., %3. + movprfx\t%0., %1/m, %2.\;fabd\t%0., %1/m, %0., %3. + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[3], + operands[4], operands[1])); + operands[4] = operands[3] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Multiplication ;; ------------------------------------------------------------------------- @@ -6416,20 +6903,20 @@ ;; Predicated floating-point ternary operations, merging with the ;; first input. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w") (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %3., %4. movprfx\t%0, %2\;\t%0., %1/m, %3., %4." @@ -6440,22 +6927,42 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] + SVE_COND_FP_TERNARY) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %3., %4. + movprfx\t%0, %2\;\t%0., %1/m, %3., %4." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point ternary operations, merging with the ;; third input. -(define_insn_and_rewrite "*cond__4" +(define_insn_and_rewrite "*cond__4_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w") (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %2., %3. movprfx\t%0, %4\;\t%0., %1/m, %2., %3." @@ -6466,15 +6973,35 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__4_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] + SVE_COND_FP_TERNARY) + (match_dup 4)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %2., %3. + movprfx\t%0, %4\;\t%0., %1/m, %2., %3." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point ternary operations, merging with an ;; independent value. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 6) - (match_operand:SI 7 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] @@ -6484,8 +7011,7 @@ "TARGET_SVE && !rtx_equal_p (operands[2], operands[5]) && !rtx_equal_p (operands[3], operands[5]) - && !rtx_equal_p (operands[4], operands[5]) - && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" + && !rtx_equal_p (operands[4], operands[5])" "@ movprfx\t%0., %1/z, %4.\;\t%0., %1/m, %2., %3. movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %3. @@ -6511,6 +7037,41 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] + SVE_COND_FP_TERNARY) + (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE + && !rtx_equal_p (operands[2], operands[5]) + && !rtx_equal_p (operands[3], operands[5]) + && !rtx_equal_p (operands[4], operands[5])" + "@ + movprfx\t%0., %1/z, %4.\;\t%0., %1/m, %2., %3. + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %3. + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %3., %4. + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %4. + movprfx\t%0., %1/m, %4.\;\t%0., %1/m, %2., %3. + #" + "&& reload_completed + && register_operand (operands[5], mode) + && !rtx_equal_p (operands[0], operands[5])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[4], + operands[5], operands[1])); + operands[5] = operands[4] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using ;; (fma ...) since target-independent code won't understand the indexing. (define_insn "@aarch64__lane_" @@ -6572,20 +7133,20 @@ ) ;; Predicated FCMLA, merging with the third input. -(define_insn_and_rewrite "*cond__4" +(define_insn_and_rewrite "*cond__4_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w") (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] SVE_COND_FCMLA) (match_dup 4)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE" "@ fcmla\t%0., %1/m, %2., %3., # movprfx\t%0, %4\;fcmla\t%0., %1/m, %2., %3., #" @@ -6596,23 +7157,41 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__4_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] + SVE_COND_FCMLA) + (match_dup 4)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fcmla\t%0., %1/m, %2., %3., # + movprfx\t%0, %4\;fcmla\t%0., %1/m, %2., %3., #" + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated FCMLA, merging with an independent value. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 6) - (match_operand:SI 7 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] SVE_COND_FCMLA) (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[4], operands[5]) - && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" "@ movprfx\t%0., %1/z, %4.\;fcmla\t%0., %1/m, %2., %3., # movprfx\t%0., %1/z, %0.\;fcmla\t%0., %1/m, %2., %3., # @@ -6636,6 +7215,36 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] + SVE_COND_FCMLA) + (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" + "@ + movprfx\t%0., %1/z, %4.\;fcmla\t%0., %1/m, %2., %3., # + movprfx\t%0., %1/z, %0.\;fcmla\t%0., %1/m, %2., %3., # + movprfx\t%0., %1/m, %4.\;fcmla\t%0., %1/m, %2., %3., # + #" + "&& reload_completed + && register_operand (operands[5], mode) + && !rtx_equal_p (operands[0], operands[5])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[4], + operands[5], operands[1])); + operands[5] = operands[4] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; Unpredicated FCMLA with indexing. (define_insn "@aarch64__lane_" [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") @@ -7328,34 +7937,52 @@ "TARGET_SVE" ) -(define_insn_and_rewrite "*aarch64_pred_fac" +(define_insn_and_rewrite "*aarch64_pred_fac_relaxed" [(set (match_operand: 0 "register_operand" "=Upa") (unspec: [(match_operand: 1 "register_operand" "Upl") (match_operand:SI 4 "aarch64_sve_ptrue_flag") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w")] UNSPEC_COND_FABS) (unspec:SVE_FULL_F - [(match_operand 7) - (match_operand:SI 8 "aarch64_sve_gp_strictness") + [(match_operand 6) + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 3 "register_operand" "w")] UNSPEC_COND_FABS)] SVE_COND_FP_ABS_CMP))] - "TARGET_SVE - && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) - && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" + "TARGET_SVE" "fac\t%0., %1/z, %2., %3." "&& (!rtx_equal_p (operands[1], operands[5]) - || !rtx_equal_p (operands[1], operands[7]))" + || !rtx_equal_p (operands[1], operands[6]))" { operands[5] = copy_rtx (operands[1]); - operands[7] = copy_rtx (operands[1]); + operands[6] = copy_rtx (operands[1]); } ) +(define_insn "*aarch64_pred_fac_strict" + [(set (match_operand: 0 "register_operand" "=Upa") + (unspec: + [(match_operand: 1 "register_operand" "Upl") + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "w")] + UNSPEC_COND_FABS) + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 6 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 3 "register_operand" "w")] + UNSPEC_COND_FABS)] + SVE_COND_FP_ABS_CMP))] + "TARGET_SVE" + "fac\t%0., %1/z, %2., %3." +) + ;; ------------------------------------------------------------------------- ;; ---- [PRED] Select ;; ------------------------------------------------------------------------- @@ -7937,20 +8564,18 @@ ;; the same register (despite having different modes). Making all the ;; alternatives earlyclobber makes things more consistent for the ;; register allocator. -(define_insn_and_rewrite "*cond__nontrunc" +(define_insn_and_rewrite "*cond__nontrunc_relaxed" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") (unspec:SVE_FULL_HSDI [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_FULL_HSDI [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] SVE_COND_FCVTI) (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE - && >= - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE && >= " "@ fcvtz\t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;fcvtz\t%0., %1/m, %2. @@ -7962,6 +8587,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__nontrunc_strict" + [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") + (unspec:SVE_FULL_HSDI + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:SVE_FULL_HSDI + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] + SVE_COND_FCVTI) + (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE && >= " + "@ + fcvtz\t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;fcvtz\t%0., %1/m, %2. + movprfx\t%0, %3\;fcvtz\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; Predicated narrowing float-to-integer conversion with merging. (define_expand "@cond__trunc" [(set (match_operand:VNx4SI_ONLY 0 "register_operand") @@ -8101,20 +8745,18 @@ ;; the same register (despite having different modes). Making all the ;; alternatives earlyclobber makes things more consistent for the ;; register allocator. -(define_insn_and_rewrite "*cond__nonextend" +(define_insn_and_rewrite "*cond__nonextend_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] SVE_COND_ICVTF) (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE - && >= - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE && >= " "@ cvtf\t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. @@ -8126,6 +8768,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__nonextend_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] + SVE_COND_ICVTF) + (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE && >= " + "@ + cvtf\t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. + movprfx\t%0, %3\;cvtf\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; Predicated widening integer-to-float conversion with merging. (define_expand "@cond__extend" [(set (match_operand:VNx2DF_ONLY 0 "register_operand") diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index e18b9fef16e..0cafd0b690d 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -1890,18 +1890,18 @@ ) ;; These instructions do not take MOVPRFX. -(define_insn_and_rewrite "*cond_" +(define_insn_and_rewrite "*cond__relaxed" [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") (unspec:SVE_FULL_SDF [(match_operand: 1 "register_operand" "Upl") (unspec:SVE_FULL_SDF [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand: 2 "register_operand" "w")] SVE2_COND_FP_UNARY_LONG) (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] UNSPEC_SEL))] - "TARGET_SVE2 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE2" "\t%0., %1/m, %2." "&& !rtx_equal_p (operands[1], operands[4])" { @@ -1909,6 +1909,21 @@ } ) +(define_insn "*cond__strict" + [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") + (unspec:SVE_FULL_SDF + [(match_operand: 1 "register_operand" "Upl") + (unspec:SVE_FULL_SDF + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand: 2 "register_operand" "w")] + SVE2_COND_FP_UNARY_LONG) + (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] + UNSPEC_SEL))] + "TARGET_SVE2" + "\t%0., %1/m, %2." +) + ;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Narrowing conversions ;; ------------------------------------------------------------------------- @@ -1963,20 +1978,18 @@ "TARGET_SVE2" ) -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") (unspec:VNx4SF_ONLY [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:VNx4SF_ONLY [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand: 2 "register_operand" "w, w, w")] SVE2_COND_FP_UNARY_NARROWB) (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE2 - && !rtx_equal_p (operands[2], operands[3]) - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. @@ -1988,6 +2001,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__any_strict" + [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") + (unspec:VNx4SF_ONLY + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:VNx4SF_ONLY + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand: 2 "register_operand" "w, w, w")] + SVE2_COND_FP_UNARY_NARROWB) + (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" + "@ + \t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. + movprfx\t%0, %3\;\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_* ;; pair because the even elements always have to be supplied for active ;; elements, even if the inactive elements don't matter. @@ -2113,14 +2145,12 @@ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec: [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] SVE2_COND_INT_UNARY_FP) (match_operand: 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE2 - && !rtx_equal_p (operands[2], operands[3]) - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. @@ -2132,6 +2162,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__strict" + [(set (match_operand: 0 "register_operand" "=&w, ?&w, ?&w") + (unspec: + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec: + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] + SVE2_COND_INT_UNARY_FP) + (match_operand: 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" + "@ + \t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. + movprfx\t%0, %3\;\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Polynomial multiplication ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 9e88438b3c3..a3408f48c82 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3717,24 +3717,6 @@ aarch64_pfalse_reg (machine_mode mode) return gen_lowpart (mode, reg); } -/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is - true, or alternatively if we know that the operation predicated by - PRED1[0] is safe to perform whenever PRED2 is true. PRED1[1] is a - aarch64_sve_gp_strictness operand that describes the operation - predicated by PRED1[0]. */ - -bool -aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2) -{ - machine_mode mode = GET_MODE (pred2); - gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL - && mode == GET_MODE (pred1[0]) - && aarch64_sve_gp_strictness (pred1[1], SImode)); - return (pred1[0] == CONSTM1_RTX (mode) - || INTVAL (pred1[1]) == SVE_RELAXED_GP - || rtx_equal_p (pred1[0], pred2)); -} - /* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag for it. PRED2[0] is the predicate for the instruction whose result is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag -- 2.30.2