From 7b3adfa7bb47e4ebde91634caa5a7e13175558f1 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 26 Jun 2020 11:18:19 +0200 Subject: [PATCH] tree-optimization/95839 - teach SLP vectorization about vector inputs This teaches SLP analysis about vector typed externals that are fed into the SLP operations via lane extracting BIT_FIELD_REFs. It shows that there's currently no good representation for vector code on the SLP side so I went a half way and represent such vector externals uses always using a SLP permutation node with a single external SLP child which has a non-standard representation of no scalar defs but only a vector def. That works best for shielding the rest of the vectorizer from it. 2020-06-26 Richard Biener PR tree-optimization/95839 * tree-vect-slp.c (vect_slp_tree_uniform_p): Pre-existing vectors are not uniform. (vect_build_slp_tree_1): Handle BIT_FIELD_REFs of vector registers. (vect_build_slp_tree_2): For groups of lane extracts from a vector register generate a permute node with a special child representing the pre-existing vector. (vect_prologue_cost_for_slp): Pre-existing vectors cost nothing. (vect_slp_analyze_node_operations): Use SLP_TREE_LANES. (vectorizable_slp_permutation): Do not generate or cost identity permutes. (vect_schedule_slp_instance): Handle pre-existing vector that are function arguments. * gcc.dg/vect/bb-slp-pr95839-2.c: New testcase. --- gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c | 20 ++++ gcc/tree-vect-slp.c | 119 ++++++++++++++++--- 2 files changed, 124 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c new file mode 100644 index 00000000000..49e75d8c95c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-additional-options "-w -Wno-psabi" } */ + +typedef double __attribute__((vector_size(16))) v2df; + +v2df f(v2df a, v2df b) +{ + return (v2df){a[0] + b[0], a[1] + b[1]}; +} + +v2df g(v2df a, v2df b) +{ + return (v2df){a[0] + b[1], a[1] + b[0]}; +} + +/* Verify we manage to vectorize this with using the original vectors + and do not end up with any vector CTORs. */ +/* { dg-final { scan-tree-dump-times "basic block vectorized" 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-not "vect_cst" "slp2" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 1ffbf6f6af9..532809d2667 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -247,6 +247,10 @@ vect_slp_tree_uniform_p (slp_tree node) gcc_assert (SLP_TREE_DEF_TYPE (node) == vect_constant_def || SLP_TREE_DEF_TYPE (node) == vect_external_def); + /* Pre-exsting vectors. */ + if (SLP_TREE_SCALAR_OPS (node).is_empty ()) + return false; + unsigned i; tree op, first = NULL_TREE; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) @@ -838,7 +842,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, else { rhs_code = gimple_assign_rhs_code (stmt); - load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference; + load_p = gimple_vuse (stmt); } /* Check the operation. */ @@ -899,6 +903,22 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, need_same_oprnds = true; first_op1 = gimple_assign_rhs2 (stmt); } + else if (!load_p + && rhs_code == BIT_FIELD_REF) + { + tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); + if (TREE_CODE (vec) != SSA_NAME + || !types_compatible_p (vectype, TREE_TYPE (vec))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Build SLP failed: " + "BIT_FIELD_REF not supported\n"); + /* Fatal mismatch. */ + matches[0] = false; + return false; + } + } else if (call_stmt && gimple_call_internal_p (call_stmt, IFN_DIV_POW2)) { @@ -957,6 +977,18 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, continue; } } + if (!load_p + && first_stmt_code == BIT_FIELD_REF + && (TREE_OPERAND (gimple_assign_rhs1 (first_stmt_info->stmt), 0) + != TREE_OPERAND (gimple_assign_rhs1 (stmt_info->stmt), 0))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Build SLP failed: different BIT_FIELD_REF " + "arguments in %G", stmt); + /* Mismatch. */ + continue; + } if (!load_p && rhs_code == CALL_EXPR) { @@ -1026,7 +1058,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, && TREE_CODE_CLASS (rhs_code) != tcc_expression && TREE_CODE_CLASS (rhs_code) != tcc_comparison && rhs_code != VIEW_CONVERT_EXPR - && rhs_code != CALL_EXPR) + && rhs_code != CALL_EXPR + && rhs_code != BIT_FIELD_REF) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -1287,6 +1320,45 @@ vect_build_slp_tree_2 (vec_info *vinfo, return node; } } + else if (gimple_assign_single_p (stmt_info->stmt) + && !gimple_vuse (stmt_info->stmt) + && gimple_assign_rhs_code (stmt_info->stmt) == BIT_FIELD_REF) + { + /* vect_build_slp_tree_2 determined all BIT_FIELD_REFs reference + the same SSA name vector of a compatible type to vectype. */ + vec > lperm = vNULL; + tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt_info->stmt), 0); + stmt_vec_info estmt_info; + FOR_EACH_VEC_ELT (stmts, i, estmt_info) + { + gassign *estmt = as_a (estmt_info->stmt); + tree bfref = gimple_assign_rhs1 (estmt); + HOST_WIDE_INT lane; + if (!known_eq (bit_field_size (bfref), + tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (vectype)))) + || !constant_multiple_p (bit_field_offset (bfref), + bit_field_size (bfref), &lane)) + { + lperm.release (); + return NULL; + } + lperm.safe_push (std::make_pair (0, (unsigned)lane)); + } + slp_tree vnode = vect_create_new_slp_node (vNULL); + SLP_TREE_VECTYPE (vnode) = TREE_TYPE (vec); + SLP_TREE_VEC_DEFS (vnode).safe_push (vec); + /* We are always building a permutation node even if it is an identity + permute to shield the rest of the vectorizer from the odd node + representing an actual vector without any scalar ops. + ??? We could hide it completely with making the permute node + external? */ + node = vect_create_new_slp_node (stmts, 1); + SLP_TREE_CODE (node) = VEC_PERM_EXPR; + SLP_TREE_LANE_PERMUTATION (node) = lperm; + SLP_TREE_VECTYPE (node) = vectype; + SLP_TREE_CHILDREN (node).quick_push (vnode); + return node; + } /* Get at the operands, verifying they are compatible. */ vec oprnds_info = vect_create_oprnd_info (nops, group_size); @@ -2744,6 +2816,10 @@ static void vect_prologue_cost_for_slp (slp_tree node, stmt_vector_for_cost *cost_vec) { + /* There's a special case of an existing vector, that costs nothing. */ + if (SLP_TREE_SCALAR_OPS (node).length () == 0 + && !SLP_TREE_VEC_DEFS (node).is_empty ()) + return; /* Without looking at the actual initializer a vector of constants can be implemented as load from the constant pool. When all elements are the same we can use a splat. */ @@ -2857,7 +2933,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, && j == 1); continue; } - unsigned group_size = SLP_TREE_SCALAR_OPS (child).length (); + unsigned group_size = SLP_TREE_LANES (child); poly_uint64 vf = 1; if (loop_vec_info loop_vinfo = dyn_cast (vinfo)) vf = loop_vinfo->vectorization_factor; @@ -4139,7 +4215,9 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, { indices.new_vector (mask, second_vec.first == -1U ? 1 : 2, const_nunits); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + bool identity_p = indices.series_p (0, 1, 0, 1); + if (!identity_p + && !can_vec_perm_const_p (TYPE_MODE (vectype), indices)) { if (dump_enabled_p ()) { @@ -4157,11 +4235,10 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, return false; } - nperms++; + if (!identity_p) + nperms++; if (gsi) { - tree mask_vec = vect_gen_perm_mask_checked (vectype, indices); - if (second_vec.first == -1U) second_vec = first_vec; @@ -4169,14 +4246,22 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, slp_tree first_node = SLP_TREE_CHILDREN (node)[first_vec.first]; tree first_def = vect_get_slp_vect_def (first_node, first_vec.second); - slp_tree second_node = SLP_TREE_CHILDREN (node)[second_vec.first]; - tree second_def - = vect_get_slp_vect_def (second_node, second_vec.second); + gassign *perm_stmt; tree perm_dest = make_ssa_name (vectype); - gassign *perm_stmt - = gimple_build_assign (perm_dest, VEC_PERM_EXPR, - first_def, second_def, - mask_vec); + if (!identity_p) + { + slp_tree second_node + = SLP_TREE_CHILDREN (node)[second_vec.first]; + tree second_def + = vect_get_slp_vect_def (second_node, second_vec.second); + tree mask_vec = vect_gen_perm_mask_checked (vectype, indices); + perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR, + first_def, second_def, + mask_vec); + } + else + /* We need a copy here in case the def was external. */ + perm_stmt = gimple_build_assign (perm_dest, first_def); vect_finish_stmt_generation (vinfo, NULL, perm_stmt, gsi); /* Store the vector statement in NODE. */ SLP_TREE_VEC_STMTS (node).quick_push (perm_stmt); @@ -4315,13 +4400,17 @@ vect_schedule_slp_instance (vec_info *vinfo, unsigned j; tree vdef; FOR_EACH_VEC_ELT (SLP_TREE_VEC_DEFS (child), j, vdef) - if (TREE_CODE (vdef) == SSA_NAME) + if (TREE_CODE (vdef) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (vdef)) { gimple *vstmt = SSA_NAME_DEF_STMT (vdef); if (!last_stmt || vect_stmt_dominates_stmt_p (last_stmt, vstmt)) last_stmt = vstmt; } + /* This can happen when all children are pre-existing vectors. */ + if (!last_stmt) + last_stmt = vect_find_first_scalar_stmt_in_slp (node)->stmt; } if (is_a (last_stmt)) si = gsi_after_labels (gimple_bb (last_stmt)); -- 2.30.2