From: Richard Sandiford Date: Thu, 7 Jan 2021 15:00:38 +0000 (+0000) Subject: gimple-isel: Fall back to using vcond_mask [PR98560] X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=78595e918ee168f595d16268073a3754c64d67fe;p=gcc.git gimple-isel: Fall back to using vcond_mask [PR98560] PR98560 is about a case in which the vectoriser initially generates: mask_1 = a < 0; mask_2 = mask_1 & ...; res = VEC_COND_EXPR ; The vectoriser thus expects res to be calculated using vcond_mask. However, we later manage to fold mask_2 to mask_1, leaving: mask_1 = a < 0; res = VEC_COND_EXPR ; gimple-isel then required a combined vcond to exist. On most targets, it's not too onerous to provide all possible (compare x select) combinations. For each data mode, you just need to provide unsigned comparisons, signed comparisons, and floating-point comparisons, with the data mode and type of comparison uniquely determining the mode of the compared values. But for targets like SVE that support “unpacked” vectors, it's not that simple: the level of unpacking adds another degree of freedom. Rather than insist that the combined versions exist, I think we should be prepared to fall back to using separate comparisons and vcond_masks. I think that makes more sense on targets like AArch64 and AArch32 in which compares and selects are fundementally separate operations anyway. gcc/ PR tree-optimization/98560 * gimple-isel.cc (gimple_expand_vec_cond_expr): If we fail to use IFN_VCOND{,U,EQ}, fall back on IFN_VCOND_MASK. gcc/testsuite/ PR tree-optimization/98560 * gcc.dg/vect/pr98560-1.c: New test. --- diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc index d40338ce4a2..0f3d6bba229 100644 --- a/gcc/gimple-isel.cc +++ b/gcc/gimple-isel.cc @@ -154,6 +154,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, return gimple_build_assign (lhs, tem3); } + bool can_compute_op0 = true; gcc_assert (!COMPARISON_CLASS_P (op0)); if (TREE_CODE (op0) == SSA_NAME) { @@ -184,13 +185,16 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, tree op0_type = TREE_TYPE (op0); tree op0a_type = TREE_TYPE (op0a); + if (TREE_CODE_CLASS (tcode) == tcc_comparison) + can_compute_op0 = expand_vec_cmp_expr_p (op0a_type, op0_type, + tcode); /* Try to fold x CMP y ? -1 : 0 to x CMP y. */ - if (integer_minus_onep (op1) + if (can_compute_op0 + && integer_minus_onep (op1) && integer_zerop (op2) - && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)) - && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode)) + && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0))) { tree conv_op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), op0); gassign *new_stmt = gimple_build_assign (lhs, conv_op); @@ -198,10 +202,10 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, return new_stmt; } - if (used_vec_cond_exprs >= 2 + if (can_compute_op0 + && used_vec_cond_exprs >= 2 && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type)) - != CODE_FOR_nothing) - && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode)) + != CODE_FOR_nothing)) { /* Keep the SSA name and use vcond_mask. */ tcode = TREE_CODE (op0); @@ -254,7 +258,15 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, } } - gcc_assert (icode != CODE_FOR_nothing); + if (icode == CODE_FOR_nothing) + { + gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)) + && can_compute_op0 + && (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0))) + != CODE_FOR_nothing)); + return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2); + } + tree tcode_tree = build_int_cst (integer_type_node, tcode); return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND, 5, op0a, op0b, op1, op2, tcode_tree); diff --git a/gcc/testsuite/gcc.dg/vect/pr98560-1.c b/gcc/testsuite/gcc.dg/vect/pr98560-1.c new file mode 100644 index 00000000000..2583fc48f8a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr98560-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -fno-tree-vrp -fno-tree-fre -fno-tree-pre -fno-code-hoisting -fvect-cost-model=dynamic" } */ +/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve } } */ + +#include + +void +f (uint16_t *restrict dst, uint32_t *restrict src1, float *restrict src2) +{ + int i = 0; + for (int j = 0; j < 4; ++j) + { + uint16_t tmp = src1[i] >> 1; + dst[i] = (uint16_t) (src2[i] < 0 && i < 4 ? tmp : 1); + i += 1; + } +}