From 00fa90d975bfacfd91a615fbee24e3e6a100100f Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 14 Aug 2019 08:50:10 +0000 Subject: [PATCH] [AArch64] Rework SVE integer comparisons The remaining uses of UNSPEC_MERGE_PTRUE were in integer comparison patterns. These aren't actually merging operations but zeroing ones, although there's no practical difference when the predicate is a PTRUE. All comparisons produced by expand are predicated on a PTRUE, although we try to pattern-match a compare-and-AND as a predicated comparison during combine. Like previous patches, this one rearranges things in a way that works better with the ACLE, where the initial predicate might or might not be a PTRUE. The new patterns use UNSPEC_PRED_Z to represent zeroing predication, with a aarch64_sve_ptrue_flag to record whether the predicate is all-true (as for UNSPEC_PTEST). See the block comment in the patch for more details. 2019-08-14 Richard Sandiford gcc/ * config/aarch64/aarch64-protos.h (aarch64_sve_same_pred_for_ptest_p): Declare. * config/aarch64/aarch64.c (aarch64_sve_same_pred_for_ptest_p) (aarch64_sve_emit_int_cmp): New functions. (aarch64_convert_sve_data_to_pred): Use aarch64_sve_emit_int_cmp. (aarch64_sve_cmp_operand_p, aarch64_emit_sve_ptrue_op_cc): Delete. (aarch64_expand_sve_vec_cmp_int): Use aarch64_sve_emit_int_cmp. * config/aarch64/aarch64.md (UNSPEC_MERGE_PTRUE): Delete. (UNSPEC_PRED_Z): New unspec. (set_clobber_cc_nzc): Delete. * config/aarch64/aarch64-sve.md: Add a block comment about UNSPEC_PRED_Z. (*cmp): Rename to... (@aarch64_pred_cmp): ...this, replacing the old pattern with that name. Use UNSPEC_PRED_Z instead of UNSPEC_MERGE_PTRUE. (*cmp_cc): Use UNSPEC_PRED_Z instead of UNSPEC_MERGE_PTRUE. Use aarch64_sve_same_pred_for_ptest_p to check for compatible predicates. (*cmp_ptest): Likewise. (*cmp_and): Match a known-ptrue UNSPEC_PRED_Z instead of UNSPEC_MERGE_PTRUE. Split into the new form of predicated comparisons above. From-SVN: r274429 --- gcc/ChangeLog | 26 +++++ gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-sve.md | 155 ++++++++++++++++++---------- gcc/config/aarch64/aarch64.c | 108 +++++++++---------- gcc/config/aarch64/aarch64.md | 8 +- 5 files changed, 179 insertions(+), 119 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ce4c5f69868..9de3d0c5575 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2019-08-14 Richard Sandiford + + * config/aarch64/aarch64-protos.h (aarch64_sve_same_pred_for_ptest_p): + Declare. + * config/aarch64/aarch64.c (aarch64_sve_same_pred_for_ptest_p) + (aarch64_sve_emit_int_cmp): New functions. + (aarch64_convert_sve_data_to_pred): Use aarch64_sve_emit_int_cmp. + (aarch64_sve_cmp_operand_p, aarch64_emit_sve_ptrue_op_cc): Delete. + (aarch64_expand_sve_vec_cmp_int): Use aarch64_sve_emit_int_cmp. + * config/aarch64/aarch64.md (UNSPEC_MERGE_PTRUE): Delete. + (UNSPEC_PRED_Z): New unspec. + (set_clobber_cc_nzc): Delete. + * config/aarch64/aarch64-sve.md: Add a block comment about + UNSPEC_PRED_Z. + (*cmp): Rename to... + (@aarch64_pred_cmp): ...this, replacing + the old pattern with that name. Use UNSPEC_PRED_Z instead of + UNSPEC_MERGE_PTRUE. + (*cmp_cc): Use UNSPEC_PRED_Z instead of + UNSPEC_MERGE_PTRUE. Use aarch64_sve_same_pred_for_ptest_p to + check for compatible predicates. + (*cmp_ptest): Likewise. + (*cmp_and): Match a known-ptrue UNSPEC_PRED_Z instead + of UNSPEC_MERGE_PTRUE. Split into the new form of predicated + comparisons above. + 2019-08-14 Richard Sandiford * config/aarch64/aarch64.md (UNSPEC_PRED_X): New unspec. diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index a4728e77c1d..858c4500406 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -555,6 +555,7 @@ void aarch64_expand_mov_immediate (rtx, rtx); rtx aarch64_ptrue_reg (machine_mode); rtx aarch64_pfalse_reg (machine_mode); bool aarch64_sve_pred_dominates_p (rtx *, rtx); +bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 4f416189663..e95486749b9 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -24,6 +24,7 @@ ;; == General notes ;; ---- Note on the handling of big-endian SVE ;; ---- Description of UNSPEC_PTEST +;; ---- Description of UNSPEC_PRED_Z ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X ;; ---- Note on predicated FP arithmetic patterns and GP "strictness" ;; @@ -231,6 +232,52 @@ ;; - OP is the predicate we want to test, of the same mode as CAST_GP. ;; ;; ------------------------------------------------------------------------- +;; ---- Description of UNSPEC_PRED_Z +;; ------------------------------------------------------------------------- +;; +;; SVE integer comparisons are predicated and return zero for inactive +;; lanes. Sometimes we use them with predicates that are all-true and +;; sometimes we use them with general predicates. +;; +;; The integer comparisons also set the flags and so build-in the effect +;; of a PTEST. We therefore want to be able to combine integer comparison +;; patterns with PTESTs of the result. One difficulty with doing this is +;; that (as noted above) the PTEST is always a .B operation and so can place +;; stronger requirements on the governing predicate than the comparison does. +;; +;; For example, when applying a separate PTEST to the result of a full-vector +;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a +;; .B PTRUE. In constrast, the comparison might be predicated on either +;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate +;; bits don't matter for .H operations. +;; +;; We therefore can't rely on a full-vector comparison using the same +;; predicate register as a following PTEST. We instead need to remember +;; whether a comparison is known to be a full-vector comparison and use +;; this information in addition to a check for equal predicate registers. +;; At the same time, it's useful to have a common representation for all +;; integer comparisons, so that they can be handled by a single set of +;; patterns. +;; +;; We therefore take a similar approach to UNSPEC_PTEST above and use: +;; +;; (unspec: [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z) +;; +;; where: +;; +;; - GP is the governing predicate, of mode +;; +;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value +;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE +;; otherwise +;; +;; - CODE is the comparison code +;; +;; - OP0 and OP1 are the values being compared, of mode M +;; +;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero. +;; +;; ------------------------------------------------------------------------- ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X ;; ------------------------------------------------------------------------- ;; @@ -3008,115 +3055,119 @@ } ) -;; Integer comparisons predicated with a PTRUE. -(define_insn "*cmp" +;; Predicated integer comparisons. +(define_insn "@aarch64_pred_cmp" [(set (match_operand: 0 "register_operand" "=Upa, Upa") (unspec: [(match_operand: 1 "register_operand" "Upl, Upl") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") (SVE_INT_CMP: - (match_operand:SVE_I 2 "register_operand" "w, w") - (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] - UNSPEC_MERGE_PTRUE)) + (match_operand:SVE_I 3 "register_operand" "w, w") + (match_operand:SVE_I 4 "aarch64_sve_cmp__operand" ", w"))] + UNSPEC_PRED_Z)) (clobber (reg:CC_NZC CC_REGNUM))] "TARGET_SVE" "@ - cmp\t%0., %1/z, %2., #%3 - cmp\t%0., %1/z, %2., %3." + cmp\t%0., %1/z, %3., #%4 + cmp\t%0., %1/z, %3., %4." ) -;; Integer comparisons predicated with a PTRUE in which both the flag and -;; predicate results are interesting. -(define_insn "*cmp_cc" +;; Predicated integer comparisons in which both the flag and predicate +;; results are interesting. +(define_insn_and_rewrite "*cmp_cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") (match_operand 4) (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec: - [(match_dup 4) + [(match_operand 6) + (match_operand:SI 7 "aarch64_sve_ptrue_flag") (SVE_INT_CMP: (match_operand:SVE_I 2 "register_operand" "w, w") (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] - UNSPEC_MERGE_PTRUE)] + UNSPEC_PRED_Z)] UNSPEC_PTEST)) (set (match_operand: 0 "register_operand" "=Upa, Upa") (unspec: - [(match_dup 4) + [(match_dup 6) + (match_dup 7) (SVE_INT_CMP: (match_dup 2) (match_dup 3))] - UNSPEC_MERGE_PTRUE))] - "TARGET_SVE" + UNSPEC_PRED_Z))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" "@ cmp\t%0., %1/z, %2., #%3 cmp\t%0., %1/z, %2., %3." + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } ) -;; Integer comparisons predicated with a PTRUE in which only the flags result -;; is interesting. -(define_insn "*cmp_ptest" +;; Predicated integer comparisons in which only the flags result is +;; interesting. +(define_insn_and_rewrite "*cmp_ptest" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") (match_operand 4) (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec: - [(match_dup 4) + [(match_operand 6) + (match_operand:SI 7 "aarch64_sve_ptrue_flag") (SVE_INT_CMP: (match_operand:SVE_I 2 "register_operand" "w, w") (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] - UNSPEC_MERGE_PTRUE)] + UNSPEC_PRED_Z)] UNSPEC_PTEST)) (clobber (match_scratch: 0 "=Upa, Upa"))] - "TARGET_SVE" + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" "@ cmp\t%0., %1/z, %2., #%3 cmp\t%0., %1/z, %2., %3." + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } ) ;; Predicated integer comparisons, formed by combining a PTRUE-predicated ;; comparison with an AND. Split the instruction into its preferred form -;; (below) at the earliest opportunity, in order to get rid of the -;; redundant operand 1. -(define_insn_and_split "*pred_cmp_combine" +;; at the earliest opportunity, in order to get rid of the redundant +;; operand 4. +(define_insn_and_split "*cmp_and" [(set (match_operand: 0 "register_operand" "=Upa, Upa") - (and: - (unspec: - [(match_operand: 1) - (SVE_INT_CMP: - (match_operand:SVE_I 2 "register_operand" "w, w") - (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] - UNSPEC_MERGE_PTRUE) - (match_operand: 4 "register_operand" "Upl, Upl"))) + (and: + (unspec: + [(match_operand 4) + (const_int SVE_KNOWN_PTRUE) + (SVE_INT_CMP: + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] + UNSPEC_PRED_Z) + (match_operand: 1 "register_operand" "Upl, Upl"))) (clobber (reg:CC_NZC CC_REGNUM))] "TARGET_SVE" "#" "&& 1" [(parallel [(set (match_dup 0) - (and: - (SVE_INT_CMP: - (match_dup 2) - (match_dup 3)) - (match_dup 4))) + (unspec: + [(match_dup 1) + (const_int SVE_MAYBE_NOT_PTRUE) + (SVE_INT_CMP: + (match_dup 2) + (match_dup 3))] + UNSPEC_PRED_Z)) (clobber (reg:CC_NZC CC_REGNUM))])] ) -;; Predicated integer comparisons. -(define_insn "@aarch64_pred_cmp" - [(set (match_operand: 0 "register_operand" "=Upa, Upa") - (and: - (SVE_INT_CMP: - (match_operand:SVE_I 2 "register_operand" "w, w") - (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w")) - (match_operand: 1 "register_operand" "Upl, Upl"))) - (clobber (reg:CC_NZC CC_REGNUM))] - "TARGET_SVE" - "@ - cmp\t%0., %1/z, %2., #%3 - cmp\t%0., %1/z, %2., %3." -) - ;; ------------------------------------------------------------------------- ;; ---- [INT] While tests ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a936608482e..40d102b3d19 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2783,6 +2783,48 @@ aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2) || rtx_equal_p (pred1[0], pred2)); } +/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag + for it. PRED2[0] is the predicate for the instruction whose result + is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag + for it. Return true if we can prove that the two predicates are + equivalent for PTEST purposes; that is, if we can replace PRED2[0] + with PRED1[0] without changing behavior. */ + +bool +aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2) +{ + machine_mode mode = GET_MODE (pred1[0]); + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL + && mode == GET_MODE (pred2[0]) + && aarch64_sve_ptrue_flag (pred1[1], SImode) + && aarch64_sve_ptrue_flag (pred2[1], SImode)); + + bool ptrue1_p = (pred1[0] == CONSTM1_RTX (mode) + || INTVAL (pred1[1]) == SVE_KNOWN_PTRUE); + bool ptrue2_p = (pred2[0] == CONSTM1_RTX (mode) + || INTVAL (pred2[1]) == SVE_KNOWN_PTRUE); + return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]); +} + +/* Emit a comparison CMP between OP0 and OP1, both of which have mode + DATA_MODE, and return the result in a predicate of mode PRED_MODE. + Use TARGET as the target register if nonnull and convenient. */ + +static rtx +aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp, + machine_mode data_mode, rtx op1, rtx op2) +{ + insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode); + expand_operand ops[5]; + create_output_operand (&ops[0], target, pred_mode); + create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode); + create_integer_operand (&ops[2], SVE_KNOWN_PTRUE); + create_input_operand (&ops[3], op1, data_mode); + create_input_operand (&ops[4], op2, data_mode); + expand_insn (icode, 5, ops); + return ops[0].value; +} + /* Use a comparison to convert integer vector SRC into MODE, which is the corresponding SVE predicate mode. Use TARGET for the result if it's nonnull and convenient. */ @@ -2791,14 +2833,8 @@ static rtx aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src) { machine_mode src_mode = GET_MODE (src); - insn_code icode = code_for_aarch64_pred_cmp (NE, src_mode); - expand_operand ops[4]; - create_output_operand (&ops[0], target, mode); - create_input_operand (&ops[1], CONSTM1_RTX (mode), mode); - create_input_operand (&ops[2], src, src_mode); - create_input_operand (&ops[3], CONST0_RTX (src_mode), src_mode); - expand_insn (icode, 4, ops); - return ops[0].value; + return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode, + src, CONST0_RTX (src_mode)); } /* Return true if we can move VALUE into a register using a single @@ -17667,51 +17703,6 @@ aarch64_reverse_mask (machine_mode mode, unsigned int nunits) return force_reg (V16QImode, mask); } -/* Return true if X is a valid second operand for the SVE instruction - that implements integer comparison OP_CODE. */ - -static bool -aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x) -{ - if (register_operand (x, VOIDmode)) - return true; - - switch (op_code) - { - case LTU: - case LEU: - case GEU: - case GTU: - return aarch64_sve_cmp_immediate_p (x, false); - case LT: - case LE: - case GE: - case GT: - case NE: - case EQ: - return aarch64_sve_cmp_immediate_p (x, true); - default: - gcc_unreachable (); - } -} - -/* Use predicated SVE instructions to implement the equivalent of: - - (set TARGET OP) - - given that PTRUE is an all-true predicate of the appropriate mode - and that the instruction clobbers the condition codes. */ - -static void -aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op) -{ - rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), - gen_rtvec (2, ptrue, op), - UNSPEC_MERGE_PTRUE); - rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec)); - set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); -} - /* Expand an SVE integer comparison using the SVE equivalent of: (set TARGET (CODE OP0 OP1)). */ @@ -17721,13 +17712,10 @@ aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1) { machine_mode pred_mode = GET_MODE (target); machine_mode data_mode = GET_MODE (op0); - - if (!aarch64_sve_cmp_operand_p (code, op1)) - op1 = force_reg (data_mode, op1); - - rtx ptrue = aarch64_ptrue_reg (pred_mode); - rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); - aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond); + rtx res = aarch64_sve_emit_int_cmp (target, pred_mode, code, data_mode, + op0, op1); + if (!rtx_equal_p (target, res)) + emit_move_insn (target, res); } /* Return the UNSPEC_COND_* code for comparison CODE. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 589b4d3ae90..6d9692f563f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -219,8 +219,8 @@ UNSPEC_LD1RQ UNSPEC_LD1_GATHER UNSPEC_ST1_SCATTER - UNSPEC_MERGE_PTRUE UNSPEC_PRED_X + UNSPEC_PRED_Z UNSPEC_PTEST UNSPEC_UNPACKSHI UNSPEC_UNPACKUHI @@ -7155,12 +7155,6 @@ [(set_attr "type" "no_insn")] ) -;; Helper for aarch64.c code. -(define_expand "set_clobber_cc_nzc" - [(parallel [(set (match_operand 0) - (match_operand 1)) - (clobber (reg:CC_NZC CC_REGNUM))])]) - ;; Hard speculation barrier. (define_insn "speculation_barrier" [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)] -- 2.30.2