+2020-05-19 Richard Biener <rguenther@suse.de>
+
+ * tree-vectorizer.h (_slp_tree::vectype): Add field.
+ (SLP_TREE_VECTYPE): New.
+ * tree-vect-slp.c (vect_create_new_slp_node): Initialize
+ SLP_TREE_VECTYPE.
+ (vect_create_new_slp_node): Likewise.
+ (vect_prologue_cost_for_slp): Move here from tree-vect-stmts.c
+ and simplify.
+ (vect_slp_analyze_node_operations): Walk nodes children for
+ invariant costing.
+ (vect_get_constant_vectors): Use local scope op variable.
+ * tree-vect-stmts.c (vect_prologue_cost_for_slp_op): Remove here.
+ (vect_model_simple_cost): Adjust.
+ (vect_model_store_cost): Likewise.
+ (vectorizable_store): Likewise.
+
2020-05-18 Martin Sebor <msebor@redhat.com>
PR middle-end/92815
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
+ SLP_TREE_VECTYPE (node) = NULL_TREE;
node->refcnt = 1;
node->max_nunits = 1;
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_external_def;
+ SLP_TREE_VECTYPE (node) = NULL_TREE;
node->refcnt = 1;
node->max_nunits = 1;
return true;
}
+/* Compute the prologue cost for invariant or constant operands represented
+ by NODE. */
+
+static void
+vect_prologue_cost_for_slp (vec_info *vinfo,
+ slp_tree node,
+ stmt_vector_for_cost *cost_vec)
+{
+ /* Without looking at the actual initializer a vector of
+ constants can be implemented as load from the constant pool.
+ When all elements are the same we can use a splat. */
+ tree vectype = SLP_TREE_VECTYPE (node);
+ /* ??? Ideally we'd want all invariant nodes to have a vectype. */
+ if (!vectype)
+ vectype = get_vectype_for_scalar_type (vinfo,
+ TREE_TYPE (SLP_TREE_SCALAR_OPS
+ (node)[0]), node);
+ unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
+ unsigned num_vects_to_check;
+ unsigned HOST_WIDE_INT const_nunits;
+ unsigned nelt_limit;
+ if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
+ && ! multiple_p (const_nunits, group_size))
+ {
+ num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ nelt_limit = const_nunits;
+ }
+ else
+ {
+ /* If either the vector has variable length or the vectors
+ are composed of repeated whole groups we only need to
+ cost construction once. All vectors will be the same. */
+ num_vects_to_check = 1;
+ nelt_limit = group_size;
+ }
+ tree elt = NULL_TREE;
+ unsigned nelt = 0;
+ for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
+ {
+ unsigned si = j % group_size;
+ if (nelt == 0)
+ elt = SLP_TREE_SCALAR_OPS (node)[si];
+ /* ??? We're just tracking whether all operands of a single
+ vector initializer are the same, ideally we'd check if
+ we emitted the same one already. */
+ else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
+ elt = NULL_TREE;
+ nelt++;
+ if (nelt == nelt_limit)
+ {
+ record_stmt_cost (cost_vec, 1,
+ SLP_TREE_DEF_TYPE (node) == vect_external_def
+ ? (elt ? scalar_to_vec : vec_construct)
+ : vector_load,
+ NULL, vectype, 0, vect_prologue);
+ nelt = 0;
+ }
+ }
+}
+
/* Analyze statements contained in SLP tree NODE after recursively analyzing
the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
int i, j;
slp_tree child;
+ /* Assume we can code-generate all invariants. */
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return true;
if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
+ /* When the node can be vectorized cost invariant nodes it references.
+ This is not done in DFS order to allow the refering node
+ vectorizable_* calls to nail down the invariant nodes vector type
+ and possibly unshare it if it needs a different vector type than
+ other referrers. */
+ if (res)
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
+ if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+ {
+ /* ??? After auditing more code paths make a "default"
+ and push the vector type from NODE to all children
+ if it is not already set. */
+ /* Perform usual caching, note code-generation still
+ code-gens these nodes multiple times but we expect
+ to CSE them later. */
+ if (!visited.contains (child)
+ && !lvisited.add (child))
+ vect_prologue_cost_for_slp (vinfo, child, cost_vec);
+ }
+
/* If this node can't be vectorized, try pruning the tree here rather
than felling the whole thing. */
if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
stmt_vec_info insert_after = NULL;
for (j = 0; j < number_of_copies; j++)
{
+ tree op;
for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
{
/* Create 'vect_ = {op0,op1,...,opn}'. */
return opt_result::success ();
}
-/* Compute the prologue cost for invariant or constant operands. */
-
-static unsigned
-vect_prologue_cost_for_slp_op (vec_info *vinfo,
- slp_tree node,
- unsigned opno, enum vect_def_type dt,
- stmt_vector_for_cost *cost_vec)
-{
- gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
- tree op = gimple_op (stmt, opno);
- unsigned prologue_cost = 0;
-
- /* Without looking at the actual initializer a vector of
- constants can be implemented as load from the constant pool.
- When all elements are the same we can use a splat. */
- tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
- unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
- unsigned num_vects_to_check;
- unsigned HOST_WIDE_INT const_nunits;
- unsigned nelt_limit;
- if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
- && ! multiple_p (const_nunits, group_size))
- {
- num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
- nelt_limit = const_nunits;
- }
- else
- {
- /* If either the vector has variable length or the vectors
- are composed of repeated whole groups we only need to
- cost construction once. All vectors will be the same. */
- num_vects_to_check = 1;
- nelt_limit = group_size;
- }
- tree elt = NULL_TREE;
- unsigned nelt = 0;
- for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
- {
- unsigned si = j % group_size;
- if (nelt == 0)
- elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
- /* ??? We're just tracking whether all operands of a single
- vector initializer are the same, ideally we'd check if
- we emitted the same one already. */
- else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
- opno))
- elt = NULL_TREE;
- nelt++;
- if (nelt == nelt_limit)
- {
- prologue_cost += record_stmt_cost
- (cost_vec, 1,
- dt == vect_external_def
- ? (elt ? scalar_to_vec : vec_construct) : vector_load,
- NULL, vectype, 0, vect_prologue);
- nelt = 0;
- }
- }
-
- return prologue_cost;
-}
-
/* Function vect_model_simple_cost.
Models cost for simple operations, i.e. those that only emit ncopies of a
be generated for the single vector op. We will handle that shortly. */
static void
-vect_model_simple_cost (vec_info *vinfo,
+vect_model_simple_cost (vec_info *,
stmt_vec_info stmt_info, int ncopies,
enum vect_def_type *dt,
int ndts,
if (node)
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
- if (node)
- {
- /* Scan operands and account for prologue cost of constants/externals.
- ??? This over-estimates cost for multiple uses and should be
- re-engineered. */
- gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
- tree lhs = gimple_get_lhs (stmt);
- for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
- {
- tree op = gimple_op (stmt, i);
- enum vect_def_type dt;
- if (!op || op == lhs)
- continue;
- if (vect_is_simple_use (op, vinfo, &dt)
- && (dt == vect_constant_def || dt == vect_external_def))
- prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node,
- i, dt, cost_vec);
- }
- }
- else
+ if (!node)
/* Cost the "broadcast" of a scalar operand in to a vector operand.
Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
cost model. */
static void
vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
- enum vect_def_type dt,
vect_memory_access_type memory_access_type,
vec_load_store_type vls_type, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
if (vls_type == VLS_STORE_INVARIANT)
{
- if (slp_node)
- prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node,
- 1, dt, cost_vec);
- else
+ if (!slp_node)
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
stmt_info, 0, vect_prologue);
}
memory_access_type, &gs_info, mask);
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
- vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt,
+ vect_model_store_cost (vinfo, stmt_info, ncopies,
memory_access_type, vls_type, slp_node, cost_vec);
return true;
}
permutation. */
vec<unsigned> load_permutation;
+ tree vectype;
/* Vectorized stmt/s. */
vec<stmt_vec_info> vec_stmts;
/* Number of vector stmts that are created to replace the group of scalar
#define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
#define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
#define SLP_TREE_DEF_TYPE(S) (S)->def_type
+#define SLP_TREE_VECTYPE(S) (S)->vectype
/* Key for map that records association between
scalar conditions and corresponding loop mask, and