From 4a942af61c16f38f7fe51ed72a7ac23f73f62f2a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 14 Aug 2019 08:29:56 +0000 Subject: [PATCH] [AArch64] Rework SVE FP comparisons This patch rewrites the SVE FP comparisons so that they always use unspecs and so that they have an additional operand to indicate whether the predicate is known to be a PTRUE. It's part of a series that rewrites the SVE FP patterns so that they can cope with non-PTRUE predicates. 2019-08-14 Richard Sandiford gcc/ * config/aarch64/iterators.md (UNSPEC_COND_FCMUO): New unspec. (cmp_op): Handle it. (SVE_COND_FP_CMP): Rename to... (SVE_COND_FP_CMP_I0): ...this. (SVE_FP_CMP): Remove. * config/aarch64/aarch64-sve.md (*fcm): Replace with... (*fcm): ...this new pattern, using unspecs to represent the comparison. (*fcmuo): Use UNSPEC_COND_FCMUO. (*fcm_and_combine, *fcmuo_and_combine): Update accordingly. * config/aarch64/aarch64.c (aarch64_emit_sve_ptrue_op): Delete. (aarch64_unspec_cond_code): Move after integer code. Handle UNORDERED. (aarch64_emit_sve_predicated_cond): Replace with... (aarch64_emit_sve_fp_cond): ...this new function. (aarch64_emit_sve_or_conds): Replace with... (aarch64_emit_sve_or_fp_conds): ...this new function. (aarch64_emit_sve_inverted_cond): Replace with... (aarch64_emit_sve_invert_fp_cond): ...this new function. (aarch64_expand_sve_vec_cmp_float): Update accordingly. From-SVN: r274421 --- gcc/ChangeLog | 25 ++++++ gcc/config/aarch64/aarch64-sve.md | 96 +++++++---------------- gcc/config/aarch64/aarch64.c | 122 ++++++++++++++---------------- gcc/config/aarch64/iterators.md | 20 ++--- 4 files changed, 118 insertions(+), 145 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9f4efd78e6e..693d6f872a5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2019-08-14 Richard Sandiford + + * config/aarch64/iterators.md (UNSPEC_COND_FCMUO): New unspec. + (cmp_op): Handle it. + (SVE_COND_FP_CMP): Rename to... + (SVE_COND_FP_CMP_I0): ...this. + (SVE_FP_CMP): Remove. + * config/aarch64/aarch64-sve.md + (*fcm): Replace with... + (*fcm): ...this new pattern, + using unspecs to represent the comparison. + (*fcmuo): Use UNSPEC_COND_FCMUO. + (*fcm_and_combine, *fcmuo_and_combine): Update + accordingly. + * config/aarch64/aarch64.c (aarch64_emit_sve_ptrue_op): Delete. + (aarch64_unspec_cond_code): Move after integer code. Handle + UNORDERED. + (aarch64_emit_sve_predicated_cond): Replace with... + (aarch64_emit_sve_fp_cond): ...this new function. + (aarch64_emit_sve_or_conds): Replace with... + (aarch64_emit_sve_or_fp_conds): ...this new function. + (aarch64_emit_sve_inverted_cond): Replace with... + (aarch64_emit_sve_invert_fp_cond): ...this new function. + (aarch64_expand_sve_vec_cmp_float): Update accordingly. + 2019-08-14 Richard Sandiford * config/aarch64/iterators.md (SVE_HSD): New mode iterator. diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index d2583914be9..7cbd690932e 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3136,15 +3136,15 @@ } ) -;; Floating-point comparisons predicated with a PTRUE. +;; Predicated floating-point comparisons. (define_insn "*fcm" [(set (match_operand: 0 "register_operand" "=Upa, Upa") (unspec: [(match_operand: 1 "register_operand" "Upl, Upl") - (SVE_FP_CMP: - (match_operand:SVE_F 2 "register_operand" "w, w") - (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] - UNSPEC_MERGE_PTRUE))] + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 2 "register_operand" "w, w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] + SVE_COND_FP_CMP_I0))] "TARGET_SVE" "@ fcm\t%0., %1/z, %2., #0.0 @@ -3156,10 +3156,10 @@ [(set (match_operand: 0 "register_operand" "=Upa") (unspec: [(match_operand: 1 "register_operand" "Upl") - (unordered: - (match_operand:SVE_F 2 "register_operand" "w") - (match_operand:SVE_F 3 "register_operand" "w"))] - UNSPEC_MERGE_PTRUE))] + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] + UNSPEC_COND_FCMUO))] "TARGET_SVE" "fcmuo\t%0., %1/z, %2., %3." ) @@ -3177,20 +3177,21 @@ (and: (unspec: [(match_operand: 1) - (SVE_FP_CMP - (match_operand:SVE_F 2 "register_operand" "w, w") - (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] - UNSPEC_MERGE_PTRUE) + (const_int SVE_KNOWN_PTRUE) + (match_operand:SVE_F 2 "register_operand" "w, w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] + SVE_COND_FP_CMP_I0) (match_operand: 4 "register_operand" "Upl, Upl")))] "TARGET_SVE" "#" "&& 1" [(set (match_dup 0) - (and: - (SVE_FP_CMP: - (match_dup 2) - (match_dup 3)) - (match_dup 4)))] + (unspec: + [(match_dup 4) + (const_int SVE_MAYBE_NOT_PTRUE) + (match_dup 2) + (match_dup 3)] + SVE_COND_FP_CMP_I0))] ) ;; Same for unordered comparisons. @@ -3199,62 +3200,21 @@ (and: (unspec: [(match_operand: 1) - (unordered - (match_operand:SVE_F 2 "register_operand" "w") - (match_operand:SVE_F 3 "register_operand" "w"))] - UNSPEC_MERGE_PTRUE) + (const_int SVE_KNOWN_PTRUE) + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] + UNSPEC_COND_FCMUO) (match_operand: 4 "register_operand" "Upl")))] "TARGET_SVE" "#" "&& 1" [(set (match_dup 0) - (and: - (unordered: - (match_dup 2) - (match_dup 3)) - (match_dup 4)))] -) - -;; Unpredicated floating-point comparisons, with the results ANDed with -;; another predicate. This is a valid fold for the same reasons as above. -(define_insn "*fcm_and" - [(set (match_operand: 0 "register_operand" "=Upa, Upa") - (and: - (SVE_FP_CMP: - (match_operand:SVE_F 2 "register_operand" "w, w") - (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")) - (match_operand: 1 "register_operand" "Upl, Upl")))] - "TARGET_SVE" - "@ - fcm\t%0., %1/z, %2., #0.0 - fcm\t%0., %1/z, %2., %3." -) - -;; Same for unordered comparisons. -(define_insn "*fcmuo_and" - [(set (match_operand: 0 "register_operand" "=Upa") - (and: - (unordered: - (match_operand:SVE_F 2 "register_operand" "w") - (match_operand:SVE_F 3 "register_operand" "w")) - (match_operand: 1 "register_operand" "Upl")))] - "TARGET_SVE" - "fcmuo\t%0., %1/z, %2., %3." -) - -;; Predicated floating-point comparisons. We don't need a version -;; of this for unordered comparisons. -(define_insn "*pred_fcm" - [(set (match_operand: 0 "register_operand" "=Upa, Upa") (unspec: - [(match_operand: 1 "register_operand" "Upl, Upl") - (match_operand:SVE_F 2 "register_operand" "w, w") - (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] - SVE_COND_FP_CMP))] - "TARGET_SVE" - "@ - fcm\t%0., %1/z, %2., #0.0 - fcm\t%0., %1/z, %2., %3." + [(match_dup 4) + (const_int SVE_MAYBE_NOT_PTRUE) + (match_dup 2) + (match_dup 3)] + UNSPEC_COND_FCMUO))] ) ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 6efcb03004a..99b84cdb708 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -17700,28 +17700,35 @@ aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x) (set TARGET OP) - given that PTRUE is an all-true predicate of the appropriate mode. */ + given that PTRUE is an all-true predicate of the appropriate mode + and that the instruction clobbers the condition codes. */ static void -aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op) +aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op) { rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), gen_rtvec (2, ptrue, op), UNSPEC_MERGE_PTRUE); - rtx_insn *insn = emit_set_insn (target, unspec); + rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec)); set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); } -/* Likewise, but also clobber the condition codes. */ +/* Expand an SVE integer comparison using the SVE equivalent of: -static void -aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op) + (set TARGET (CODE OP0 OP1)). */ + +void +aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1) { - rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), - gen_rtvec (2, ptrue, op), - UNSPEC_MERGE_PTRUE); - rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec)); - set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); + machine_mode pred_mode = GET_MODE (target); + machine_mode data_mode = GET_MODE (op0); + + if (!aarch64_sve_cmp_operand_p (code, op1)) + op1 = force_reg (data_mode, op1); + + rtx ptrue = aarch64_ptrue_reg (pred_mode); + rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); + aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond); } /* Return the UNSPEC_COND_* code for comparison CODE. */ @@ -17743,6 +17750,8 @@ aarch64_unspec_cond_code (rtx_code code) return UNSPEC_COND_FCMLE; case GE: return UNSPEC_COND_FCMGE; + case UNORDERED: + return UNSPEC_COND_FCMUO; default: gcc_unreachable (); } @@ -17750,78 +17759,58 @@ aarch64_unspec_cond_code (rtx_code code) /* Emit: - (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_)) + (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) - where is the operation associated with comparison CODE. This form - of instruction is used when (and (CODE OP0 OP1) PRED) would have different - semantics, such as when PRED might not be all-true and when comparing - inactive lanes could have side effects. */ + where is the operation associated with comparison CODE. + KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ static void -aarch64_emit_sve_predicated_cond (rtx target, rtx_code code, - rtx pred, rtx op0, rtx op1) +aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred, + bool known_ptrue_p, rtx op0, rtx op1) { + rtx flag = gen_int_mode (known_ptrue_p, SImode); rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred), - gen_rtvec (3, pred, op0, op1), + gen_rtvec (4, pred, flag, op0, op1), aarch64_unspec_cond_code (code)); emit_set_insn (target, unspec); } -/* Expand an SVE integer comparison using the SVE equivalent of: - - (set TARGET (CODE OP0 OP1)). */ - -void -aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1) -{ - machine_mode pred_mode = GET_MODE (target); - machine_mode data_mode = GET_MODE (op0); - - if (!aarch64_sve_cmp_operand_p (code, op1)) - op1 = force_reg (data_mode, op1); - - rtx ptrue = aarch64_ptrue_reg (pred_mode); - rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); - aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond); -} - /* Emit the SVE equivalent of: - (set TMP1 (CODE1 OP0 OP1)) - (set TMP2 (CODE2 OP0 OP1)) + (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) + (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) (set TARGET (ior:PRED_MODE TMP1 TMP2)) - PTRUE is an all-true predicate with the same mode as TARGET. */ + where is the operation associated with comparison CODEi. + KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ static void -aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2, - rtx ptrue, rtx op0, rtx op1) +aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2, + rtx pred, bool known_ptrue_p, rtx op0, rtx op1) { - machine_mode pred_mode = GET_MODE (ptrue); + machine_mode pred_mode = GET_MODE (pred); rtx tmp1 = gen_reg_rtx (pred_mode); - aarch64_emit_sve_ptrue_op (tmp1, ptrue, - gen_rtx_fmt_ee (code1, pred_mode, op0, op1)); + aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1); rtx tmp2 = gen_reg_rtx (pred_mode); - aarch64_emit_sve_ptrue_op (tmp2, ptrue, - gen_rtx_fmt_ee (code2, pred_mode, op0, op1)); + aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1); aarch64_emit_binop (target, ior_optab, tmp1, tmp2); } /* Emit the SVE equivalent of: - (set TMP (CODE OP0 OP1)) + (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) (set TARGET (not TMP)) - PTRUE is an all-true predicate with the same mode as TARGET. */ + where is the operation associated with comparison CODE. + KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ static void -aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code, - rtx op0, rtx op1) +aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred, + bool known_ptrue_p, rtx op0, rtx op1) { - machine_mode pred_mode = GET_MODE (ptrue); + machine_mode pred_mode = GET_MODE (pred); rtx tmp = gen_reg_rtx (pred_mode); - aarch64_emit_sve_ptrue_op (tmp, ptrue, - gen_rtx_fmt_ee (code, pred_mode, op0, op1)); + aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1); aarch64_emit_unop (target, one_cmpl_optab, tmp); } @@ -17854,14 +17843,13 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, case NE: { /* There is native support for the comparison. */ - rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); - aarch64_emit_sve_ptrue_op (target, ptrue, cond); + aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1); return false; } case LTGT: /* This is a trapping operation (LT or GT). */ - aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1); + aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1); return false; case UNEQ: @@ -17869,7 +17857,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, { /* This would trap for signaling NaNs. */ op1 = force_reg (data_mode, op1); - aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1); + aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ, + ptrue, true, op0, op1); return false; } /* fall through */ @@ -17882,7 +17871,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, /* Work out which elements are ordered. */ rtx ordered = gen_reg_rtx (pred_mode); op1 = force_reg (data_mode, op1); - aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1); + aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED, + ptrue, true, op0, op1); /* Test the opposite condition for the ordered elements, then invert the result. */ @@ -17892,13 +17882,12 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, code = reverse_condition_maybe_unordered (code); if (can_invert_p) { - aarch64_emit_sve_predicated_cond (target, code, - ordered, op0, op1); + aarch64_emit_sve_fp_cond (target, code, + ordered, false, op0, op1); return true; } - rtx tmp = gen_reg_rtx (pred_mode); - aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1); - aarch64_emit_unop (target, one_cmpl_optab, tmp); + aarch64_emit_sve_invert_fp_cond (target, code, + ordered, false, op0, op1); return false; } break; @@ -17916,11 +17905,10 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, code = reverse_condition_maybe_unordered (code); if (can_invert_p) { - rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); - aarch64_emit_sve_ptrue_op (target, ptrue, cond); + aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1); return true; } - aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1); + aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1); return false; } diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 32c662f90b6..6bf3638ed1a 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -479,6 +479,7 @@ UNSPEC_COND_FCMLE ; Used in aarch64-sve.md. UNSPEC_COND_FCMLT ; Used in aarch64-sve.md. UNSPEC_COND_FCMNE ; Used in aarch64-sve.md. + UNSPEC_COND_FCMUO ; Used in aarch64-sve.md. UNSPEC_COND_FDIV ; Used in aarch64-sve.md. UNSPEC_COND_FMAXNM ; Used in aarch64-sve.md. UNSPEC_COND_FMINNM ; Used in aarch64-sve.md. @@ -1273,9 +1274,6 @@ ;; SVE integer comparisons. (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu]) -;; SVE floating-point comparisons. -(define_code_iterator SVE_FP_CMP [lt le eq ne ge gt]) - ;; ------------------------------------------------------------------- ;; Code Attributes ;; ------------------------------------------------------------------- @@ -1663,12 +1661,13 @@ UNSPEC_COND_FNMLA UNSPEC_COND_FNMLS]) -(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_FCMEQ - UNSPEC_COND_FCMGE - UNSPEC_COND_FCMGT - UNSPEC_COND_FCMLE - UNSPEC_COND_FCMLT - UNSPEC_COND_FCMNE]) +;; SVE FP comparisons that accept #0.0. +(define_int_iterator SVE_COND_FP_CMP_I0 [UNSPEC_COND_FCMEQ + UNSPEC_COND_FCMGE + UNSPEC_COND_FCMGT + UNSPEC_COND_FCMLE + UNSPEC_COND_FCMLT + UNSPEC_COND_FCMNE]) (define_int_iterator FCADD [UNSPEC_FCADD90 UNSPEC_FCADD270]) @@ -1955,7 +1954,8 @@ (UNSPEC_COND_FCMGT "gt") (UNSPEC_COND_FCMLE "le") (UNSPEC_COND_FCMLT "lt") - (UNSPEC_COND_FCMNE "ne")]) + (UNSPEC_COND_FCMNE "ne") + (UNSPEC_COND_FCMUO "uo")]) (define_int_attr sve_int_op [(UNSPEC_ANDV "andv") (UNSPEC_IORV "orv") -- 2.30.2