From a4b48fc47c3406b6f41be093c4615879b7006710 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 18 May 2020 16:05:00 +0200 Subject: [PATCH] cost invariant nodes from vect_slp_analyze_node_operations SLP walk 2020-05-19 Richard Biener * tree-vectorizer.h (_slp_tree::vectype): Add field. (SLP_TREE_VECTYPE): New. * tree-vect-slp.c (vect_create_new_slp_node): Initialize SLP_TREE_VECTYPE. (vect_create_new_slp_node): Likewise. (vect_prologue_cost_for_slp): Move here from tree-vect-stmts.c and simplify. (vect_slp_analyze_node_operations): Walk nodes children for invariant costing. (vect_get_constant_vectors): Use local scope op variable. * tree-vect-stmts.c (vect_prologue_cost_for_slp_op): Remove here. (vect_model_simple_cost): Adjust. (vect_model_store_cost): Likewise. (vectorizable_store): Likewise. --- gcc/ChangeLog | 17 ++++++++ gcc/tree-vect-slp.c | 84 ++++++++++++++++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 93 ++----------------------------------------- gcc/tree-vectorizer.h | 2 + 4 files changed, 107 insertions(+), 89 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c100737c431..56c056f9226 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2020-05-19 Richard Biener + + * tree-vectorizer.h (_slp_tree::vectype): Add field. + (SLP_TREE_VECTYPE): New. + * tree-vect-slp.c (vect_create_new_slp_node): Initialize + SLP_TREE_VECTYPE. + (vect_create_new_slp_node): Likewise. + (vect_prologue_cost_for_slp): Move here from tree-vect-stmts.c + and simplify. + (vect_slp_analyze_node_operations): Walk nodes children for + invariant costing. + (vect_get_constant_vectors): Use local scope op variable. + * tree-vect-stmts.c (vect_prologue_cost_for_slp_op): Remove here. + (vect_model_simple_cost): Adjust. + (vect_model_store_cost): Likewise. + (vectorizable_store): Likewise. + 2020-05-18 Martin Sebor PR middle-end/92815 diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 276d9604855..69a2002717f 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -129,6 +129,7 @@ vect_create_new_slp_node (vec scalar_stmts) SLP_TREE_LOAD_PERMUTATION (node) = vNULL; SLP_TREE_TWO_OPERATORS (node) = false; SLP_TREE_DEF_TYPE (node) = vect_internal_def; + SLP_TREE_VECTYPE (node) = NULL_TREE; node->refcnt = 1; node->max_nunits = 1; @@ -155,6 +156,7 @@ vect_create_new_slp_node (vec ops) SLP_TREE_LOAD_PERMUTATION (node) = vNULL; SLP_TREE_TWO_OPERATORS (node) = false; SLP_TREE_DEF_TYPE (node) = vect_external_def; + SLP_TREE_VECTYPE (node) = NULL_TREE; node->refcnt = 1; node->max_nunits = 1; @@ -2720,6 +2722,66 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node, return true; } +/* Compute the prologue cost for invariant or constant operands represented + by NODE. */ + +static void +vect_prologue_cost_for_slp (vec_info *vinfo, + slp_tree node, + stmt_vector_for_cost *cost_vec) +{ + /* Without looking at the actual initializer a vector of + constants can be implemented as load from the constant pool. + When all elements are the same we can use a splat. */ + tree vectype = SLP_TREE_VECTYPE (node); + /* ??? Ideally we'd want all invariant nodes to have a vectype. */ + if (!vectype) + vectype = get_vectype_for_scalar_type (vinfo, + TREE_TYPE (SLP_TREE_SCALAR_OPS + (node)[0]), node); + unsigned group_size = SLP_TREE_SCALAR_OPS (node).length (); + unsigned num_vects_to_check; + unsigned HOST_WIDE_INT const_nunits; + unsigned nelt_limit; + if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits) + && ! multiple_p (const_nunits, group_size)) + { + num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node); + nelt_limit = const_nunits; + } + else + { + /* If either the vector has variable length or the vectors + are composed of repeated whole groups we only need to + cost construction once. All vectors will be the same. */ + num_vects_to_check = 1; + nelt_limit = group_size; + } + tree elt = NULL_TREE; + unsigned nelt = 0; + for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j) + { + unsigned si = j % group_size; + if (nelt == 0) + elt = SLP_TREE_SCALAR_OPS (node)[si]; + /* ??? We're just tracking whether all operands of a single + vector initializer are the same, ideally we'd check if + we emitted the same one already. */ + else if (elt != SLP_TREE_SCALAR_OPS (node)[si]) + elt = NULL_TREE; + nelt++; + if (nelt == nelt_limit) + { + record_stmt_cost (cost_vec, 1, + SLP_TREE_DEF_TYPE (node) == vect_external_def + ? (elt ? scalar_to_vec : vec_construct) + : vector_load, + NULL, vectype, 0, vect_prologue); + nelt = 0; + } + } +} + /* Analyze statements contained in SLP tree NODE after recursively analyzing the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. @@ -2735,6 +2797,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, int i, j; slp_tree child; + /* Assume we can code-generate all invariants. */ if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) return true; @@ -2798,6 +2861,26 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, if (SLP_TREE_SCALAR_STMTS (child).length () != 0) STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j]; + /* When the node can be vectorized cost invariant nodes it references. + This is not done in DFS order to allow the refering node + vectorizable_* calls to nail down the invariant nodes vector type + and possibly unshare it if it needs a different vector type than + other referrers. */ + if (res) + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) + { + /* ??? After auditing more code paths make a "default" + and push the vector type from NODE to all children + if it is not already set. */ + /* Perform usual caching, note code-generation still + code-gens these nodes multiple times but we expect + to CSE them later. */ + if (!visited.contains (child) + && !lvisited.add (child)) + vect_prologue_cost_for_slp (vinfo, child, cost_vec); + } + /* If this node can't be vectorized, try pruning the tree here rather than felling the whole thing. */ if (!res && vect_slp_convert_to_external (vinfo, node, node_instance)) @@ -3600,6 +3683,7 @@ vect_get_constant_vectors (vec_info *vinfo, stmt_vec_info insert_after = NULL; for (j = 0; j < number_of_copies; j++) { + tree op; for (i = group_size - 1; op_node->ops.iterate (i, &op); i--) { /* Create 'vect_ = {op0,op1,...,opn}'. */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 7c4afea32f8..82750a975aa 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -786,68 +786,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) return opt_result::success (); } -/* Compute the prologue cost for invariant or constant operands. */ - -static unsigned -vect_prologue_cost_for_slp_op (vec_info *vinfo, - slp_tree node, - unsigned opno, enum vect_def_type dt, - stmt_vector_for_cost *cost_vec) -{ - gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt; - tree op = gimple_op (stmt, opno); - unsigned prologue_cost = 0; - - /* Without looking at the actual initializer a vector of - constants can be implemented as load from the constant pool. - When all elements are the same we can use a splat. */ - tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node); - unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length (); - unsigned num_vects_to_check; - unsigned HOST_WIDE_INT const_nunits; - unsigned nelt_limit; - if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits) - && ! multiple_p (const_nunits, group_size)) - { - num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node); - nelt_limit = const_nunits; - } - else - { - /* If either the vector has variable length or the vectors - are composed of repeated whole groups we only need to - cost construction once. All vectors will be the same. */ - num_vects_to_check = 1; - nelt_limit = group_size; - } - tree elt = NULL_TREE; - unsigned nelt = 0; - for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j) - { - unsigned si = j % group_size; - if (nelt == 0) - elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno); - /* ??? We're just tracking whether all operands of a single - vector initializer are the same, ideally we'd check if - we emitted the same one already. */ - else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, - opno)) - elt = NULL_TREE; - nelt++; - if (nelt == nelt_limit) - { - prologue_cost += record_stmt_cost - (cost_vec, 1, - dt == vect_external_def - ? (elt ? scalar_to_vec : vec_construct) : vector_load, - NULL, vectype, 0, vect_prologue); - nelt = 0; - } - } - - return prologue_cost; -} - /* Function vect_model_simple_cost. Models cost for simple operations, i.e. those that only emit ncopies of a @@ -855,7 +793,7 @@ vect_prologue_cost_for_slp_op (vec_info *vinfo, be generated for the single vector op. We will handle that shortly. */ static void -vect_model_simple_cost (vec_info *vinfo, +vect_model_simple_cost (vec_info *, stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt, int ndts, @@ -871,26 +809,7 @@ vect_model_simple_cost (vec_info *vinfo, if (node) ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node); - if (node) - { - /* Scan operands and account for prologue cost of constants/externals. - ??? This over-estimates cost for multiple uses and should be - re-engineered. */ - gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt; - tree lhs = gimple_get_lhs (stmt); - for (unsigned i = 0; i < gimple_num_ops (stmt); ++i) - { - tree op = gimple_op (stmt, i); - enum vect_def_type dt; - if (!op || op == lhs) - continue; - if (vect_is_simple_use (op, vinfo, &dt) - && (dt == vect_constant_def || dt == vect_external_def)) - prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node, - i, dt, cost_vec); - } - } - else + if (!node) /* Cost the "broadcast" of a scalar operand in to a vector operand. Use scalar_to_vec to cost the broadcast, as elsewhere in the vector cost model. */ @@ -991,7 +910,6 @@ cfun_returns (tree decl) static void vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies, - enum vect_def_type dt, vect_memory_access_type memory_access_type, vec_load_store_type vls_type, slp_tree slp_node, stmt_vector_for_cost *cost_vec) @@ -1006,10 +924,7 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies, if (vls_type == VLS_STORE_INVARIANT) { - if (slp_node) - prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node, - 1, dt, cost_vec); - else + if (!slp_node) prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0, vect_prologue); } @@ -7565,7 +7480,7 @@ vectorizable_store (vec_info *vinfo, memory_access_type, &gs_info, mask); STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; - vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt, + vect_model_store_cost (vinfo, stmt_info, ncopies, memory_access_type, vls_type, slp_node, cost_vec); return true; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 9b2cbe6d715..38a0a1d278b 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -130,6 +130,7 @@ struct _slp_tree { permutation. */ vec load_permutation; + tree vectype; /* Vectorized stmt/s. */ vec vec_stmts; /* Number of vector stmts that are created to replace the group of scalar @@ -186,6 +187,7 @@ public: #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators #define SLP_TREE_DEF_TYPE(S) (S)->def_type +#define SLP_TREE_VECTYPE(S) (S)->vectype /* Key for map that records association between scalar conditions and corresponding loop mask, and -- 2.30.2