;; == General notes
;; ---- Note on the handling of big-endian SVE
;; ---- Description of UNSPEC_PTEST
+;; ---- Description of UNSPEC_PRED_Z
;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
;;
;; - OP is the predicate we want to test, of the same mode as CAST_GP.
;;
;; -------------------------------------------------------------------------
+;; ---- Description of UNSPEC_PRED_Z
+;; -------------------------------------------------------------------------
+;;
+;; SVE integer comparisons are predicated and return zero for inactive
+;; lanes. Sometimes we use them with predicates that are all-true and
+;; sometimes we use them with general predicates.
+;;
+;; The integer comparisons also set the flags and so build-in the effect
+;; of a PTEST. We therefore want to be able to combine integer comparison
+;; patterns with PTESTs of the result. One difficulty with doing this is
+;; that (as noted above) the PTEST is always a .B operation and so can place
+;; stronger requirements on the governing predicate than the comparison does.
+;;
+;; For example, when applying a separate PTEST to the result of a full-vector
+;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
+;; .B PTRUE. In constrast, the comparison might be predicated on either
+;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
+;; bits don't matter for .H operations.
+;;
+;; We therefore can't rely on a full-vector comparison using the same
+;; predicate register as a following PTEST. We instead need to remember
+;; whether a comparison is known to be a full-vector comparison and use
+;; this information in addition to a check for equal predicate registers.
+;; At the same time, it's useful to have a common representation for all
+;; integer comparisons, so that they can be handled by a single set of
+;; patterns.
+;;
+;; We therefore take a similar approach to UNSPEC_PTEST above and use:
+;;
+;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
+;;
+;; where:
+;;
+;; - GP is the governing predicate, of mode <M:VPRED>
+;;
+;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
+;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
+;; otherwise
+;;
+;; - CODE is the comparison code
+;;
+;; - OP0 and OP1 are the values being compared, of mode M
+;;
+;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
+;;
+;; -------------------------------------------------------------------------
;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
;; -------------------------------------------------------------------------
;;
}
)
-;; Integer comparisons predicated with a PTRUE.
-(define_insn "*cmp<cmp_op><mode>"
+;; Predicated integer comparisons.
+(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
(SVE_INT_CMP:<VPRED>
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
- UNSPEC_MERGE_PTRUE))
+ (match_operand:SVE_I 3 "register_operand" "w, w")
+ (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+ UNSPEC_PRED_Z))
(clobber (reg:CC_NZC CC_REGNUM))]
"TARGET_SVE"
"@
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
)
-;; Integer comparisons predicated with a PTRUE in which both the flag and
-;; predicate results are interesting.
-(define_insn "*cmp<cmp_op><mode>_cc"
+;; Predicated integer comparisons in which both the flag and predicate
+;; results are interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
(match_operand 4)
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
- [(match_dup 4)
+ [(match_operand 6)
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
(SVE_INT_CMP:<VPRED>
(match_operand:SVE_I 2 "register_operand" "w, w")
(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
- UNSPEC_MERGE_PTRUE)]
+ UNSPEC_PRED_Z)]
UNSPEC_PTEST))
(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(unspec:<VPRED>
- [(match_dup 4)
+ [(match_dup 6)
+ (match_dup 7)
(SVE_INT_CMP:<VPRED>
(match_dup 2)
(match_dup 3))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
+ UNSPEC_PRED_Z))]
+ "TARGET_SVE
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
"@
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
)
-;; Integer comparisons predicated with a PTRUE in which only the flags result
-;; is interesting.
-(define_insn "*cmp<cmp_op><mode>_ptest"
+;; Predicated integer comparisons in which only the flags result is
+;; interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
(match_operand 4)
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
- [(match_dup 4)
+ [(match_operand 6)
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
(SVE_INT_CMP:<VPRED>
(match_operand:SVE_I 2 "register_operand" "w, w")
(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
- UNSPEC_MERGE_PTRUE)]
+ UNSPEC_PRED_Z)]
UNSPEC_PTEST))
(clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
- "TARGET_SVE"
+ "TARGET_SVE
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
"@
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
)
;; Predicated integer comparisons, formed by combining a PTRUE-predicated
;; comparison with an AND. Split the instruction into its preferred form
-;; (below) at the earliest opportunity, in order to get rid of the
-;; redundant operand 1.
-(define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
+;; at the earliest opportunity, in order to get rid of the redundant
+;; operand 4.
+(define_insn_and_split "*cmp<cmp_op><mode>_and"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (and:<VPRED>
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1)
- (SVE_INT_CMP:<VPRED>
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
- UNSPEC_MERGE_PTRUE)
- (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
+ (and:<VPRED>
+ (unspec:<VPRED>
+ [(match_operand 4)
+ (const_int SVE_KNOWN_PTRUE)
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_I 2 "register_operand" "w, w")
+ (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+ UNSPEC_PRED_Z)
+ (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
(clobber (reg:CC_NZC CC_REGNUM))]
"TARGET_SVE"
"#"
"&& 1"
[(parallel
[(set (match_dup 0)
- (and:<VPRED>
- (SVE_INT_CMP:<VPRED>
- (match_dup 2)
- (match_dup 3))
- (match_dup 4)))
+ (unspec:<VPRED>
+ [(match_dup 1)
+ (const_int SVE_MAYBE_NOT_PTRUE)
+ (SVE_INT_CMP:<VPRED>
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_PRED_Z))
(clobber (reg:CC_NZC CC_REGNUM))])]
)
-;; Predicated integer comparisons.
-(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (and:<VPRED>
- (SVE_INT_CMP:<VPRED>
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
- (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
- (clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_SVE"
- "@
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
;; -------------------------------------------------------------------------
;; ---- [INT] While tests
;; -------------------------------------------------------------------------
|| rtx_equal_p (pred1[0], pred2));
}
+/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag
+ for it. PRED2[0] is the predicate for the instruction whose result
+ is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag
+ for it. Return true if we can prove that the two predicates are
+ equivalent for PTEST purposes; that is, if we can replace PRED2[0]
+ with PRED1[0] without changing behavior. */
+
+bool
+aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
+{
+ machine_mode mode = GET_MODE (pred1[0]);
+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+ && mode == GET_MODE (pred2[0])
+ && aarch64_sve_ptrue_flag (pred1[1], SImode)
+ && aarch64_sve_ptrue_flag (pred2[1], SImode));
+
+ bool ptrue1_p = (pred1[0] == CONSTM1_RTX (mode)
+ || INTVAL (pred1[1]) == SVE_KNOWN_PTRUE);
+ bool ptrue2_p = (pred2[0] == CONSTM1_RTX (mode)
+ || INTVAL (pred2[1]) == SVE_KNOWN_PTRUE);
+ return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]);
+}
+
+/* Emit a comparison CMP between OP0 and OP1, both of which have mode
+ DATA_MODE, and return the result in a predicate of mode PRED_MODE.
+ Use TARGET as the target register if nonnull and convenient. */
+
+static rtx
+aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
+ machine_mode data_mode, rtx op1, rtx op2)
+{
+ insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
+ expand_operand ops[5];
+ create_output_operand (&ops[0], target, pred_mode);
+ create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
+ create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
+ create_input_operand (&ops[3], op1, data_mode);
+ create_input_operand (&ops[4], op2, data_mode);
+ expand_insn (icode, 5, ops);
+ return ops[0].value;
+}
+
/* Use a comparison to convert integer vector SRC into MODE, which is
the corresponding SVE predicate mode. Use TARGET for the result
if it's nonnull and convenient. */
aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
{
machine_mode src_mode = GET_MODE (src);
- insn_code icode = code_for_aarch64_pred_cmp (NE, src_mode);
- expand_operand ops[4];
- create_output_operand (&ops[0], target, mode);
- create_input_operand (&ops[1], CONSTM1_RTX (mode), mode);
- create_input_operand (&ops[2], src, src_mode);
- create_input_operand (&ops[3], CONST0_RTX (src_mode), src_mode);
- expand_insn (icode, 4, ops);
- return ops[0].value;
+ return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
+ src, CONST0_RTX (src_mode));
}
/* Return true if we can move VALUE into a register using a single
return force_reg (V16QImode, mask);
}
-/* Return true if X is a valid second operand for the SVE instruction
- that implements integer comparison OP_CODE. */
-
-static bool
-aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
-{
- if (register_operand (x, VOIDmode))
- return true;
-
- switch (op_code)
- {
- case LTU:
- case LEU:
- case GEU:
- case GTU:
- return aarch64_sve_cmp_immediate_p (x, false);
- case LT:
- case LE:
- case GE:
- case GT:
- case NE:
- case EQ:
- return aarch64_sve_cmp_immediate_p (x, true);
- default:
- gcc_unreachable ();
- }
-}
-
-/* Use predicated SVE instructions to implement the equivalent of:
-
- (set TARGET OP)
-
- given that PTRUE is an all-true predicate of the appropriate mode
- and that the instruction clobbers the condition codes. */
-
-static void
-aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
-{
- rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
- gen_rtvec (2, ptrue, op),
- UNSPEC_MERGE_PTRUE);
- rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec));
- set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
-}
-
/* Expand an SVE integer comparison using the SVE equivalent of:
(set TARGET (CODE OP0 OP1)). */
{
machine_mode pred_mode = GET_MODE (target);
machine_mode data_mode = GET_MODE (op0);
-
- if (!aarch64_sve_cmp_operand_p (code, op1))
- op1 = force_reg (data_mode, op1);
-
- rtx ptrue = aarch64_ptrue_reg (pred_mode);
- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
- aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
+ rtx res = aarch64_sve_emit_int_cmp (target, pred_mode, code, data_mode,
+ op0, op1);
+ if (!rtx_equal_p (target, res))
+ emit_move_insn (target, res);
}
/* Return the UNSPEC_COND_* code for comparison CODE. */