From 34467289631e29545e14148515ab5f5d0d9e4fa7 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 14 Aug 2019 07:58:30 +0000 Subject: [PATCH] [AArch64] Rework SVE PTEST patterns This patch reworks the rtl representation of the SVE PTEST operation so that: - the governing predicate is always VNx16BI (and so all bits are defined) - it is still possible to pattern-match the governing predicate in the mode that it had previously - a new hint operand says whether the governing predicate is known to be all true for the element size of interest, rather than this being part of the unspec name. These changes make it easier to handle more flag-setting instructions as part of the ACLE work. See the comment in aarch64-sve.md for more details. 2019-08-14 Richard Sandiford gcc/ * config/aarch64/aarch64-protos.h (aarch64_ptrue_all): Declare. * config/aarch64/aarch64.c (aarch64_ptrue_all): New function. * config/aarch64/aarch64.md (UNSPEC_PTEST_PTRUE): Delete. (UNSPEC_PTEST): New unspec. (SVE_MAYBE_NOT_PTRUE, SVE_KNOWN_PTRUE): New constants. * config/aarch64/iterators.md (data_bytes): New mode attribute. * config/aarch64/predicates.md (aarch64_sve_ptrue_flag): New predicate. * config/aarch64/aarch64-sve.md: Add a new section describing the handling of UNSPEC_PTEST. (pred_3): Rename to... (@aarch64_pred__z): ...this. (ptest_ptrue): Replace with... (aarch64_ptest): ...this new pattern. (cbranch4): Update after above changes. (*3_cc): Use UNSPEC_PTEST instead of UNSPEC_PTEST_PTRUE. (*cmp_cc): Likewise. (*cmp_ptest): Likewise. (*while_ult_cc): Likewise. From-SVN: r274414 --- gcc/ChangeLog | 22 ++++ gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-sve.md | 154 ++++++++++++++++++++-------- gcc/config/aarch64/aarch64.c | 16 +++ gcc/config/aarch64/aarch64.md | 11 +- gcc/config/aarch64/iterators.md | 4 + gcc/config/aarch64/predicates.md | 5 + 7 files changed, 168 insertions(+), 45 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b8ee4ed4bdb..9e2b2c15dcf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2019-08-14 Richard Sandiford + + * config/aarch64/aarch64-protos.h (aarch64_ptrue_all): Declare. + * config/aarch64/aarch64.c (aarch64_ptrue_all): New function. + * config/aarch64/aarch64.md (UNSPEC_PTEST_PTRUE): Delete. + (UNSPEC_PTEST): New unspec. + (SVE_MAYBE_NOT_PTRUE, SVE_KNOWN_PTRUE): New constants. + * config/aarch64/iterators.md (data_bytes): New mode attribute. + * config/aarch64/predicates.md (aarch64_sve_ptrue_flag): New predicate. + * config/aarch64/aarch64-sve.md: Add a new section describing the + handling of UNSPEC_PTEST. + (pred_3): Rename to... + (@aarch64_pred__z): ...this. + (ptest_ptrue): Replace with... + (aarch64_ptest): ...this new pattern. + (cbranch4): Update after above changes. + (*3_cc): Use UNSPEC_PTEST instead of + UNSPEC_PTEST_PTRUE. + (*cmp_cc): Likewise. + (*cmp_ptest): Likewise. + (*while_ult_cc): Likewise. + 2019-08-14 Xiong Hu Luo PR lto/91287 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 86d53c5ce1e..22a5f9433db 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -550,6 +550,7 @@ const char * aarch64_output_probe_stack_range (rtx, rtx); const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); void aarch64_err_no_fpadvsimd (machine_mode); void aarch64_expand_epilogue (bool); +rtx aarch64_ptrue_all (unsigned int); void aarch64_expand_mov_immediate (rtx, rtx); rtx aarch64_ptrue_reg (machine_mode); rtx aarch64_pfalse_reg (machine_mode); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 53d93a367db..f94ad5cdbf4 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -23,6 +23,7 @@ ;; ;; == General notes ;; ---- Note on the handling of big-endian SVE +;; ---- Description of UNSPEC_PTEST ;; ;; == Moves ;; ---- Moves of single vectors @@ -166,7 +167,67 @@ ;; the order of the bytes within the elements is different. We instead ;; access spill slots via LD1 and ST1, using secondary reloads to ;; reserve a predicate register. - +;; +;; ------------------------------------------------------------------------- +;; ---- Description of UNSPEC_PTEST +;; ------------------------------------------------------------------------- +;; +;; SVE provides a PTEST instruction for testing the active lanes of a +;; predicate and setting the flags based on the result. The associated +;; condition code tests are: +;; +;; - any (= ne): at least one active bit is set +;; - none (= eq): all active bits are clear (*) +;; - first (= mi): the first active bit is set +;; - nfrst (= pl): the first active bit is clear (*) +;; - last (= cc): the last active bit is set +;; - nlast (= cs): the last active bit is clear (*) +;; +;; where the conditions marked (*) are also true when there are no active +;; lanes (i.e. when the governing predicate is a PFALSE). The flags results +;; of a PTEST use the condition code mode CC_NZC. +;; +;; PTEST is always a .B operation (i.e. it always operates on VNx16BI). +;; This means that for other predicate modes, we need a governing predicate +;; in which all bits are defined. +;; +;; For example, most predicated .H operations ignore the odd bits of the +;; governing predicate, so that an active lane is represented by the +;; bits "1x" and an inactive lane by the bits "0x", where "x" can be +;; any value. To test a .H predicate, we instead need "10" and "00" +;; respectively, so that the condition only tests the even bits of the +;; predicate. +;; +;; Several instructions set the flags as a side-effect, in the same way +;; that a separate PTEST would. It's important for code quality that we +;; use these flags results as often as possible, particularly in the case +;; of WHILE* and RDFFR. +;; +;; Also, some of the instructions that set the flags are unpredicated +;; and instead implicitly test all .B, .H, .S or .D elements, as though +;; they were predicated on a PTRUE of that size. For example, a .S +;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE +;; would. +;; +;; We therefore need to represent PTEST operations in a way that +;; makes it easy to combine them with both predicated and unpredicated +;; operations, while using a VNx16BI governing predicate for all +;; predicate modes. We do this using: +;; +;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST) +;; +;; where: +;; +;; - GP is the real VNx16BI governing predicate +;; +;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting +;; GP to CAST_GP are guaranteed to be clear in GP. +;; +;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value +;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and +;; SVE_MAYBE_NOT_PTRUE otherwise. +;; +;; - OP is the predicate we want to test, of the same mode as CAST_GP. ;; ========================================================================= ;; == Moves @@ -2343,7 +2404,7 @@ ) ;; Predicated predicate AND, EOR and ORR. -(define_insn "pred_3" +(define_insn "@aarch64_pred__z" [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") (and:PRED_ALL (LOGICAL:PRED_ALL @@ -2355,23 +2416,23 @@ ) ;; Perform a logical operation on operands 2 and 3, using operand 1 as -;; the GP (which is known to be a PTRUE). Store the result in operand 0 -;; and set the flags in the same way as for PTEST. The (and ...) in the -;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested -;; value is structurally equivalent to rhs of the second set. +;; the GP. Store the result in operand 0 and set the flags in the same +;; way as for PTEST. (define_insn "*3_cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC - [(match_operand:PRED_ALL 1 "register_operand" "Upa") + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") (and:PRED_ALL (LOGICAL:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa") (match_operand:PRED_ALL 3 "register_operand" "Upa")) - (match_dup 1))] - UNSPEC_PTEST_PTRUE)) + (match_dup 4))] + UNSPEC_PTEST)) (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) - (match_dup 1)))] + (match_dup 4)))] "TARGET_SVE" "s\t%0.b, %1/z, %2.b, %3.b" ) @@ -2836,17 +2897,19 @@ (define_insn "*cmp_cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC - [(match_operand: 1 "register_operand" "Upl, Upl") + [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec: - [(match_dup 1) + [(match_dup 4) (SVE_INT_CMP: (match_operand:SVE_I 2 "register_operand" "w, w") (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] UNSPEC_MERGE_PTRUE)] - UNSPEC_PTEST_PTRUE)) + UNSPEC_PTEST)) (set (match_operand: 0 "register_operand" "=Upa, Upa") (unspec: - [(match_dup 1) + [(match_dup 4) (SVE_INT_CMP: (match_dup 2) (match_dup 3))] @@ -2862,14 +2925,16 @@ (define_insn "*cmp_ptest" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC - [(match_operand: 1 "register_operand" "Upl, Upl") + [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec: - [(match_dup 1) + [(match_dup 4) (SVE_INT_CMP: (match_operand:SVE_I 2 "register_operand" "w, w") (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] UNSPEC_MERGE_PTRUE)] - UNSPEC_PTEST_PTRUE)) + UNSPEC_PTEST)) (clobber (match_scratch: 0 "=Upa, Upa"))] "TARGET_SVE" "@ @@ -2940,28 +3005,31 @@ ) ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. -;; Handle the case in which both results are useful. The GP operand -;; to the PTEST isn't needed, so we allow it to be anything. +;; Handle the case in which both results are useful. The GP operands +;; to the PTEST aren't needed, so we allow them to be anything. (define_insn_and_rewrite "*while_ult_cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC - [(match_operand:PRED_ALL 1) + [(match_operand 3) + (match_operand 4) + (const_int SVE_KNOWN_PTRUE) (unspec:PRED_ALL - [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") - (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] + [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] UNSPEC_WHILE_LO)] - UNSPEC_PTEST_PTRUE)) + UNSPEC_PTEST)) (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") - (unspec:PRED_ALL [(match_dup 2) - (match_dup 3)] + (unspec:PRED_ALL [(match_dup 1) + (match_dup 2)] UNSPEC_WHILE_LO))] "TARGET_SVE" - "whilelo\t%0., %2, %3" + "whilelo\t%0., %1, %2" ;; Force the compiler to drop the unused predicate operand, so that we ;; don't have an unnecessary PTRUE. - "&& !CONSTANT_P (operands[1])" + "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" { - operands[1] = CONSTM1_RTX (mode); + operands[3] = CONSTM1_RTX (VNx16BImode); + operands[4] = CONSTM1_RTX (mode); } ) @@ -3133,36 +3201,34 @@ (pc)))] "" { - rtx ptrue = aarch64_ptrue_reg (mode); + rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all ()); + rtx cast_ptrue = gen_lowpart (mode, ptrue); + rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); rtx pred; if (operands[2] == CONST0_RTX (mode)) pred = operands[1]; else { pred = gen_reg_rtx (mode); - emit_insn (gen_pred_xor3 (pred, ptrue, operands[1], - operands[2])); + emit_insn (gen_aarch64_pred_xor_z (pred, cast_ptrue, operands[1], + operands[2])); } - emit_insn (gen_ptest_ptrue (ptrue, pred)); + emit_insn (gen_aarch64_ptest (ptrue, cast_ptrue, ptrue_flag, pred)); operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); operands[2] = const0_rtx; } ) -;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE. -;; -;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP -;; is a PTRUE even if the optimizers haven't yet been able to propagate -;; the constant. We would use a separate unspec code for PTESTs involving -;; GPs that might not be PTRUEs. -(define_insn "ptest_ptrue" +;; See "Description of UNSPEC_PTEST" above for details. +(define_insn "aarch64_ptest" [(set (reg:CC_NZC CC_REGNUM) - (unspec:CC_NZC - [(match_operand:PRED_ALL 0 "register_operand" "Upa") - (match_operand:PRED_ALL 1 "register_operand" "Upa")] - UNSPEC_PTEST_PTRUE))] + (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") + (match_operand 1) + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:PRED_ALL 3 "register_operand" "Upa")] + UNSPEC_PTEST))] "TARGET_SVE" - "ptest\t%0, %1.b" + "ptest\t%0, %3.b" ) ;; ========================================================================= diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 46c058c9857..b2d49da843b 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2699,6 +2699,22 @@ aarch64_svpattern_for_vl (machine_mode pred_mode, int vl) return AARCH64_NUM_SVPATTERNS; } +/* Return a VNx16BImode constant in which every sequence of ELT_SIZE + bits has the lowest bit set and the upper bits clear. This is the + VNx16BImode equivalent of a PTRUE for controlling elements of + ELT_SIZE bytes. However, because the constant is VNx16BImode, + all bits are significant, even the upper zeros. */ + +rtx +aarch64_ptrue_all (unsigned int elt_size) +{ + rtx_vector_builder builder (VNx16BImode, elt_size, 1); + builder.quick_push (const1_rtx); + for (unsigned int i = 1; i < elt_size; ++i) + builder.quick_push (const0_rtx); + return builder.build (); +} + /* Return an all-true predicate register of mode MODE. */ rtx diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a85bdd13fae..3f802c79ee8 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -220,7 +220,7 @@ UNSPEC_LD1_GATHER UNSPEC_ST1_SCATTER UNSPEC_MERGE_PTRUE - UNSPEC_PTEST_PTRUE + UNSPEC_PTEST UNSPEC_UNPACKSHI UNSPEC_UNPACKUHI UNSPEC_UNPACKSLO @@ -259,6 +259,15 @@ ] ) +;; These constants are used as a const_int in various SVE unspecs +;; to indicate whether the governing predicate is known to be a PTRUE. +(define_constants + [; Indicates that the predicate might not be a PTRUE. + (SVE_MAYBE_NOT_PTRUE 0) + + ; Indicates that the predicate is known to be a PTRUE. + (SVE_KNOWN_PTRUE 1)]) + ;; If further include files are added the defintion of MD_INCLUDES ;; must be updated. diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index f59052baf21..f6ca6380b05 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1169,6 +1169,10 @@ (V4HF "[%4]") (V8HF "[%4]") ]) +;; The number of bytes controlled by a predicate +(define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2") + (VNx4BI "4") (VNx2BI "8")]) + ;; ------------------------------------------------------------------- ;; Code Iterators ;; ------------------------------------------------------------------- diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 5d229f8cc0e..9038dfb71a1 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -684,6 +684,11 @@ (ior (match_operand 0 "register_operand") (match_operand 0 "aarch64_constant_vector_operand"))) +(define_predicate "aarch64_sve_ptrue_flag" + (and (match_code "const_int") + (ior (match_test "INTVAL (op) == SVE_MAYBE_NOT_PTRUE") + (match_test "INTVAL (op) == SVE_KNOWN_PTRUE")))) + (define_predicate "aarch64_gather_scale_operand_w" (and (match_code "const_int") (match_test "INTVAL (op) == 1 || INTVAL (op) == 4"))) -- 2.30.2