+2018-05-16 Richard Biener <rguenther@suse.de>
+
+ * tree-vectorizer.h (struct stmt_info_for_cost): Add where member.
+ (dump_stmt_cost): Declare.
+ (add_stmt_cost): Dump cost we add.
+ (add_stmt_costs): New function.
+ (vect_model_simple_cost, vect_model_store_cost, vect_model_load_cost):
+ No longer exported.
+ (vect_analyze_stmt): Adjust prototype.
+ (vectorizable_condition): Likewise.
+ (vectorizable_live_operation): Likewise.
+ (vectorizable_reduction): Likewise.
+ (vectorizable_induction): Likewise.
+ * tree-vect-loop.c (vect_analyze_loop_operations): Create local
+ cost vector to pass to vectorizable_ and record afterwards.
+ (vect_model_reduction_cost): Take cost vector argument and adjust.
+ (vect_model_induction_cost): Likewise.
+ (vectorizable_reduction): Likewise.
+ (vectorizable_induction): Likewise.
+ (vectorizable_live_operation): Likewise.
+ * tree-vect-slp.c (vect_create_new_slp_node): Initialize
+ SLP_TREE_NUMBER_OF_VEC_STMTS.
+ (vect_analyze_slp_cost_1): Remove.
+ (vect_analyze_slp_cost): Likewise.
+ (vect_slp_analyze_node_operations): Take visited args and
+ a target cost vector. Avoid processing already visited stmt sets.
+ (vect_slp_analyze_operations): Use a local cost vector to gather
+ costs and register those of non-discarded instances.
+ (vect_bb_vectorization_profitable_p): Use add_stmt_costs.
+ (vect_schedule_slp_instance): Remove copying of
+ SLP_TREE_NUMBER_OF_VEC_STMTS. Instead assert that it is not
+ zero.
+ * tree-vect-stmts.c (record_stmt_cost): Remove path directly
+ adding cost. Record cost entry location.
+ (vect_prologue_cost_for_slp_op): Function to compute cost of
+ a constant or invariant generated for SLP vect in the prologue,
+ split out from vect_analyze_slp_cost_1.
+ (vect_model_simple_cost): Make static. Adjust for SLP costing.
+ (vect_model_promotion_demotion_cost): Likewise.
+ (vect_model_store_cost): Likewise, make static.
+ (vect_model_load_cost): Likewise.
+ (vectorizable_bswap): Add cost vector arg and adjust.
+ (vectorizable_call): Likewise.
+ (vectorizable_simd_clone_call): Likewise.
+ (vectorizable_conversion): Likewise.
+ (vectorizable_assignment): Likewise.
+ (vectorizable_shift): Likewise.
+ (vectorizable_operation): Likewise.
+ (vectorizable_store): Likewise.
+ (vectorizable_load): Likewise.
+ (vectorizable_condition): Likewise.
+ (vectorizable_comparison): Likewise.
+ (can_vectorize_live_stmts): Likewise.
+ (vect_analyze_stmt): Likewise.
+ (vect_transform_stmt): Adjust calls to vectorizable_*.
+ * tree-vectorizer.c: Include gimple-pretty-print.h.
+ (dump_stmt_cost): New function.
+
2018-05-16 Richard Biener <rguenther@suse.de>
* params.def (PARAM_DSE_MAX_ALIAS_QUERIES_PER_STORE): New param.
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_analyze_loop_operations ===\n");
+ stmt_vector_for_cost cost_vec;
+ cost_vec.create (2);
+
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
need_to_vectorize = true;
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
&& ! PURE_SLP_STMT (stmt_info))
- ok = vectorizable_induction (phi, NULL, NULL, NULL);
+ ok = vectorizable_induction (phi, NULL, NULL, NULL, &cost_vec);
else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
&& ! PURE_SLP_STMT (stmt_info))
- ok = vectorizable_reduction (phi, NULL, NULL, NULL, NULL);
+ ok = vectorizable_reduction (phi, NULL, NULL, NULL, NULL,
+ &cost_vec);
}
/* SLP PHIs are tested by vect_slp_analyze_node_operations. */
if (ok
&& STMT_VINFO_LIVE_P (stmt_info)
&& !PURE_SLP_STMT (stmt_info))
- ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL);
+ ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL,
+ &cost_vec);
if (!ok)
{
{
gimple *stmt = gsi_stmt (si);
if (!gimple_clobber_p (stmt)
- && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL, NULL))
+ && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL, NULL,
+ &cost_vec))
return false;
}
} /* bbs */
+ add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec);
+ cost_vec.release ();
+
/* All operations in the loop are either irrelevant (deal with loop
control, or dead), or only used outside the loop and can be moved
out of the loop (e.g. invariants, inductions). The loop can be
static void
vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
- int ncopies)
+ int ncopies, stmt_vector_for_cost *cost_vec)
{
int prologue_cost = 0, epilogue_cost = 0, inside_cost;
enum tree_code code;
machine_mode mode;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = NULL;
- void *target_cost_data;
if (loop_vinfo)
- {
- loop = LOOP_VINFO_LOOP (loop_vinfo);
- target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
- }
- else
- target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
/* Condition reductions generate two reductions in the loop. */
vect_reduction_type reduction_type
if (reduction_type == EXTRACT_LAST_REDUCTION || reduc_fn != IFN_LAST)
/* Count one reduction-like operation per vector. */
- inside_cost = add_stmt_cost (target_cost_data, ncopies, vec_to_scalar,
- stmt_info, 0, vect_body);
+ inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar,
+ stmt_info, 0, vect_body);
else
{
/* Use NELEMENTS extracts and NELEMENTS scalar ops. */
unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
- inside_cost = add_stmt_cost (target_cost_data, nelements,
- vec_to_scalar, stmt_info, 0,
- vect_body);
- inside_cost += add_stmt_cost (target_cost_data, nelements,
- scalar_stmt, stmt_info, 0,
- vect_body);
+ inside_cost = record_stmt_cost (cost_vec, nelements,
+ vec_to_scalar, stmt_info, 0,
+ vect_body);
+ inside_cost += record_stmt_cost (cost_vec, nelements,
+ scalar_stmt, stmt_info, 0,
+ vect_body);
}
}
else
initial result of the data reduction, initial value of the index
reduction. */
int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1;
- prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
- scalar_to_vec, stmt_info, 0,
- vect_prologue);
+ prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
+ scalar_to_vec, stmt_info, 0,
+ vect_prologue);
/* Cost of reduction op inside loop. */
- inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
+ inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
}
/* Determine cost of epilogue code.
if (reduction_type == COND_REDUCTION)
{
/* An EQ stmt and an COND_EXPR stmt. */
- epilogue_cost += add_stmt_cost (target_cost_data, 2,
- vector_stmt, stmt_info, 0,
- vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec, 2,
+ vector_stmt, stmt_info, 0,
+ vect_epilogue);
/* Reduction of the max index and a reduction of the found
values. */
- epilogue_cost += add_stmt_cost (target_cost_data, 2,
- vec_to_scalar, stmt_info, 0,
- vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec, 2,
+ vec_to_scalar, stmt_info, 0,
+ vect_epilogue);
/* A broadcast of the max value. */
- epilogue_cost += add_stmt_cost (target_cost_data, 1,
- scalar_to_vec, stmt_info, 0,
- vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec, 1,
+ scalar_to_vec, stmt_info, 0,
+ vect_epilogue);
}
else
{
- epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
- stmt_info, 0, vect_epilogue);
- epilogue_cost += add_stmt_cost (target_cost_data, 1,
- vec_to_scalar, stmt_info, 0,
- vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
+ stmt_info, 0, vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec, 1,
+ vec_to_scalar, stmt_info, 0,
+ vect_epilogue);
}
}
else if (reduction_type == COND_REDUCTION)
{
unsigned estimated_nunits = vect_nunits_for_cost (vectype);
/* Extraction of scalar elements. */
- epilogue_cost += add_stmt_cost (target_cost_data,
- 2 * estimated_nunits,
- vec_to_scalar, stmt_info, 0,
- vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec,
+ 2 * estimated_nunits,
+ vec_to_scalar, stmt_info, 0,
+ vect_epilogue);
/* Scalar max reductions via COND_EXPR / MAX_EXPR. */
- epilogue_cost += add_stmt_cost (target_cost_data,
- 2 * estimated_nunits - 3,
- scalar_stmt, stmt_info, 0,
- vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec,
+ 2 * estimated_nunits - 3,
+ scalar_stmt, stmt_info, 0,
+ vect_epilogue);
}
else if (reduction_type == EXTRACT_LAST_REDUCTION
|| reduction_type == FOLD_LEFT_REDUCTION)
{
/* Final reduction via vector shifts and the reduction operator.
Also requires scalar extract. */
- epilogue_cost += add_stmt_cost (target_cost_data,
- exact_log2 (nelements) * 2,
- vector_stmt, stmt_info, 0,
- vect_epilogue);
- epilogue_cost += add_stmt_cost (target_cost_data, 1,
- vec_to_scalar, stmt_info, 0,
- vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec,
+ exact_log2 (nelements) * 2,
+ vector_stmt, stmt_info, 0,
+ vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec, 1,
+ vec_to_scalar, stmt_info, 0,
+ vect_epilogue);
}
else
/* Use extracts and reduction op for final reduction. For N
elements, we have N extracts and N-1 reduction ops. */
- epilogue_cost += add_stmt_cost (target_cost_data,
- nelements + nelements - 1,
- vector_stmt, stmt_info, 0,
- vect_epilogue);
+ epilogue_cost += record_stmt_cost (cost_vec,
+ nelements + nelements - 1,
+ vector_stmt, stmt_info, 0,
+ vect_epilogue);
}
}
Models cost for induction operations. */
static void
-vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
+vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
+ stmt_vector_for_cost *cost_vec)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
unsigned inside_cost, prologue_cost;
if (PURE_SLP_STMT (stmt_info))
return;
/* loop cost for vec_loop. */
- inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
+ inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
/* prologue cost for vec_init and vec_step. */
- prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
- stmt_info, 0, vect_prologue);
+ prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
+ stmt_info, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
bool
vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
gimple **vec_stmt, slp_tree slp_node,
- slp_instance slp_node_instance)
+ slp_instance slp_node_instance,
+ stmt_vector_for_cost *cost_vec)
{
tree vec_dest;
tree scalar_dest;
/* Only call during the analysis stage, otherwise we'll lose
STMT_VINFO_TYPE. */
if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
- ops[reduc_index], 0, NULL))
+ ops[reduc_index], 0, NULL,
+ cost_vec))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
if (!vec_stmt) /* transformation not required. */
{
if (first_p)
- vect_model_reduction_cost (stmt_info, reduc_fn, ncopies);
+ vect_model_reduction_cost (stmt_info, reduc_fn, ncopies, cost_vec);
if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
{
if (reduction_type != FOLD_LEFT_REDUCTION
{
gcc_assert (!slp_node);
return vectorizable_condition (stmt, gsi, vec_stmt,
- NULL, reduc_index, NULL);
+ NULL, reduc_index, NULL, NULL);
}
/* Create the destination vector */
gcc_assert (!slp_node);
vectorizable_condition (stmt, gsi, vec_stmt,
PHI_RESULT (phis[0]),
- reduc_index, NULL);
+ reduc_index, NULL, NULL);
/* Multiple types are not supported for condition. */
break;
}
bool
vectorizable_induction (gimple *phi,
gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
- gimple **vec_stmt, slp_tree slp_node)
+ gimple **vec_stmt, slp_tree slp_node,
+ stmt_vector_for_cost *cost_vec)
{
stmt_vec_info stmt_info = vinfo_for_stmt (phi);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vectorizable_induction ===\n");
- vect_model_induction_cost (stmt_info, ncopies);
+ vect_model_induction_cost (stmt_info, ncopies, cost_vec);
return true;
}
vectorizable_live_operation (gimple *stmt,
gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
slp_tree slp_node, int slp_index,
- gimple **vec_stmt)
+ gimple **vec_stmt,
+ stmt_vector_for_cost *)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
node = XNEW (struct _slp_tree);
SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
SLP_TREE_VEC_STMTS (node).create (0);
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
SLP_TREE_CHILDREN (node).create (nops);
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false;
typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
static scalar_stmts_set_t *bst_fail;
+typedef hash_map <vec <gimple *>, slp_tree,
+ simple_hashmap_traits <bst_traits, slp_tree> >
+ scalar_stmts_to_slp_tree_map_t;
+
static slp_tree
vect_build_slp_tree_2 (vec_info *vinfo,
vec<gimple *> stmts, unsigned int group_size,
return last;
}
-/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
-
-static void
-vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
- stmt_vector_for_cost *prologue_cost_vec,
- stmt_vector_for_cost *body_cost_vec,
- unsigned ncopies_for_cost,
- scalar_stmts_set_t* visited)
-{
- unsigned i, j;
- slp_tree child;
- gimple *stmt;
- stmt_vec_info stmt_info;
- tree lhs;
-
- /* If we already costed the exact same set of scalar stmts we're done.
- We share the generated vector stmts for those. */
- if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
- return;
-
- visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
-
- /* Recurse down the SLP tree. */
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
- vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
- body_cost_vec, ncopies_for_cost, visited);
-
- /* Look at the first scalar stmt to determine the cost. */
- stmt = SLP_TREE_SCALAR_STMTS (node)[0];
- stmt_info = vinfo_for_stmt (stmt);
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- {
- vect_memory_access_type memory_access_type
- = (STMT_VINFO_STRIDED_P (stmt_info)
- ? VMAT_STRIDED_SLP
- : VMAT_CONTIGUOUS);
- if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
- vect_model_store_cost (stmt_info, ncopies_for_cost,
- memory_access_type, VLS_STORE,
- node, prologue_cost_vec, body_cost_vec);
- else
- {
- gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
- if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
- {
- /* If the load is permuted then the alignment is determined by
- the first group element not by the first scalar stmt DR. */
- stmt = GROUP_FIRST_ELEMENT (stmt_info);
- stmt_info = vinfo_for_stmt (stmt);
- /* Record the cost for the permutation. */
- unsigned n_perms;
- vect_transform_slp_perm_load (node, vNULL, NULL,
- ncopies_for_cost, instance, true,
- &n_perms);
- record_stmt_cost (body_cost_vec, n_perms, vec_perm,
- stmt_info, 0, vect_body);
- unsigned assumed_nunits
- = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
- /* And adjust the number of loads performed. This handles
- redundancies as well as loads that are later dead. */
- auto_sbitmap perm (GROUP_SIZE (stmt_info));
- bitmap_clear (perm);
- for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
- bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
- ncopies_for_cost = 0;
- bool load_seen = false;
- for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
- {
- if (i % assumed_nunits == 0)
- {
- if (load_seen)
- ncopies_for_cost++;
- load_seen = false;
- }
- if (bitmap_bit_p (perm, i))
- load_seen = true;
- }
- if (load_seen)
- ncopies_for_cost++;
- gcc_assert (ncopies_for_cost
- <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
- + assumed_nunits - 1) / assumed_nunits);
- poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
- ncopies_for_cost *= estimated_poly_value (uf);
- }
- /* Record the cost for the vector loads. */
- vect_model_load_cost (stmt_info, ncopies_for_cost,
- memory_access_type, node, prologue_cost_vec,
- body_cost_vec);
- return;
- }
- }
- else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
- {
- /* ncopies_for_cost is the number of IVs we generate. */
- record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
- stmt_info, 0, vect_body);
-
- /* Prologue cost for the initial values and step vector. */
- record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
- CONSTANT_CLASS_P
- (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
- (stmt_info))
- ? vector_load : vec_construct,
- stmt_info, 0, vect_prologue);
- record_stmt_cost (prologue_cost_vec, 1,
- CONSTANT_CLASS_P
- (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
- ? vector_load : vec_construct,
- stmt_info, 0, vect_prologue);
-
- /* ??? No easy way to get at the actual number of vector stmts
- to be geneated and thus the derived IVs. */
- }
- else
- {
- record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
- stmt_info, 0, vect_body);
- if (SLP_TREE_TWO_OPERATORS (node))
- {
- record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
- stmt_info, 0, vect_body);
- record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
- stmt_info, 0, vect_body);
- }
- }
-
- /* Push SLP node def-type to stmts. */
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
- STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
-
- /* Scan operands and account for prologue cost of constants/externals.
- ??? This over-estimates cost for multiple uses and should be
- re-engineered. */
- stmt = SLP_TREE_SCALAR_STMTS (node)[0];
- lhs = gimple_get_lhs (stmt);
- for (i = 0; i < gimple_num_ops (stmt); ++i)
- {
- tree op = gimple_op (stmt, i);
- gimple *def_stmt;
- enum vect_def_type dt;
- if (!op || op == lhs)
- continue;
- if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
- && (dt == vect_constant_def || dt == vect_external_def))
- {
- /* Without looking at the actual initializer a vector of
- constants can be implemented as load from the constant pool.
- When all elements are the same we can use a splat. */
- tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
- unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
- unsigned num_vects_to_check;
- unsigned HOST_WIDE_INT const_nunits;
- unsigned nelt_limit;
- if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
- && ! multiple_p (const_nunits, group_size))
- {
- num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
- nelt_limit = const_nunits;
- }
- else
- {
- /* If either the vector has variable length or the vectors
- are composed of repeated whole groups we only need to
- cost construction once. All vectors will be the same. */
- num_vects_to_check = 1;
- nelt_limit = group_size;
- }
- tree elt = NULL_TREE;
- unsigned nelt = 0;
- for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
- {
- unsigned si = j % group_size;
- if (nelt == 0)
- elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i);
- /* ??? We're just tracking whether all operands of a single
- vector initializer are the same, ideally we'd check if
- we emitted the same one already. */
- else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i))
- elt = NULL_TREE;
- nelt++;
- if (nelt == nelt_limit)
- {
- /* ??? We need to pass down stmt_info for a vector type
- even if it points to the wrong stmt. */
- record_stmt_cost (prologue_cost_vec, 1,
- dt == vect_external_def
- ? (elt ? scalar_to_vec : vec_construct)
- : vector_load,
- stmt_info, 0, vect_prologue);
- nelt = 0;
- }
- }
- }
- }
-
- /* Restore stmt def-types. */
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
- STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
-}
-
-/* Compute the cost for the SLP instance INSTANCE. */
-
-static void
-vect_analyze_slp_cost (slp_instance instance, void *data, scalar_stmts_set_t *visited)
-{
- stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
- unsigned ncopies_for_cost;
- stmt_info_for_cost *si;
- unsigned i;
-
- /* Calculate the number of vector stmts to create based on the unrolling
- factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
- GROUP_SIZE / NUNITS otherwise. */
- unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
- slp_tree node = SLP_INSTANCE_TREE (instance);
- stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
- /* Get the estimated vectorization factor, which is always one for
- basic-block vectorization. */
- unsigned int assumed_vf;
- if (STMT_VINFO_LOOP_VINFO (stmt_info))
- assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
- else
- assumed_vf = 1;
- /* For reductions look at a reduction operand in case the reduction
- operation is widening like DOT_PROD or SAD. */
- tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
- if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
- {
- gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
- switch (gimple_assign_rhs_code (stmt))
- {
- case DOT_PROD_EXPR:
- case SAD_EXPR:
- vectype_for_cost = get_vectype_for_scalar_type
- (TREE_TYPE (gimple_assign_rhs1 (stmt)));
- break;
- default:;
- }
- }
- unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
- ncopies_for_cost = (least_common_multiple (assumed_nunits,
- group_size * assumed_vf)
- / assumed_nunits);
-
- prologue_cost_vec.create (10);
- body_cost_vec.create (10);
- vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
- &prologue_cost_vec, &body_cost_vec,
- ncopies_for_cost, visited);
-
- /* Record the prologue costs, which were delayed until we were
- sure that SLP was successful. */
- FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
- {
- struct _stmt_vec_info *stmt_info
- = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
- (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
- si->misalign, vect_prologue);
- }
-
- /* Record the instance's instructions in the target cost model. */
- FOR_EACH_VEC_ELT (body_cost_vec, i, si)
- {
- struct _stmt_vec_info *stmt_info
- = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
- (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
- si->misalign, vect_body);
- }
-
- prologue_cost_vec.release ();
- body_cost_vec.release ();
-}
-
/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
the first GROUP1_SIZE stmts, since stores are consecutive), the second
static bool
vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
- slp_instance node_instance)
+ slp_instance node_instance,
+ scalar_stmts_to_slp_tree_map_t *visited,
+ scalar_stmts_to_slp_tree_map_t *lvisited,
+ stmt_vector_for_cost *cost_vec)
{
bool dummy;
int i, j;
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return true;
+ /* If we already analyzed the exact same set of scalar stmts we're done.
+ We share the generated vector stmts for those. */
+ slp_tree *leader;
+ if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
+ || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
+ {
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node)
+ = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
+ return true;
+ }
+
+ /* The SLP graph is acyclic so not caching whether we failed or succeeded
+ doesn't result in any issue since we throw away the lvisited set
+ when we fail. */
+ lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
+
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
+ if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
+ visited, lvisited, cost_vec))
return false;
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
= SLP_TREE_DEF_TYPE (child);
- bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
+ bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
/* Restore def-types. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_slp_analyze_operations ===\n");
+ scalar_stmts_to_slp_tree_map_t *visited
+ = new scalar_stmts_to_slp_tree_map_t ();
for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
{
+ scalar_stmts_to_slp_tree_map_t lvisited;
+ stmt_vector_for_cost cost_vec;
+ cost_vec.create (2);
if (!vect_slp_analyze_node_operations (vinfo,
SLP_INSTANCE_TREE (instance),
- instance))
+ instance, visited, &lvisited,
+ &cost_vec))
{
dump_printf_loc (MSG_NOTE, vect_location,
"removing SLP instance operations starting from: ");
(SLP_INSTANCE_TREE (instance))[0], 0);
vect_free_slp_instance (instance);
vinfo->slp_instances.ordered_remove (i);
+ cost_vec.release ();
}
else
- i++;
- }
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_slp_cost ===\n");
+ {
+ for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
+ x != lvisited.end(); ++x)
+ visited->put ((*x).first.copy (), (*x).second);
+ i++;
- /* Compute the costs of the SLP instances. */
- scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
- for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
- vect_analyze_slp_cost (instance, vinfo->target_cost_data, visited);
+ add_stmt_costs (vinfo->target_cost_data, &cost_vec);
+ cost_vec.release ();
+ }
+ }
delete visited;
return !vinfo->slp_instances.is_empty ();
&life, &scalar_costs);
}
void *target_cost_data = init_cost (NULL);
- stmt_info_for_cost *si;
- FOR_EACH_VEC_ELT (scalar_costs, i, si)
- {
- struct _stmt_vec_info *stmt_info
- = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
- (void) add_stmt_cost (target_cost_data, si->count,
- si->kind, stmt_info, si->misalign,
- vect_body);
- }
+ add_stmt_costs (target_cost_data, &scalar_costs);
scalar_costs.release ();
unsigned dummy;
finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
return true;
}
-typedef hash_map <vec <gimple *>, slp_tree,
- simple_hashmap_traits <bst_traits, slp_tree> >
- scalar_stmts_to_slp_tree_map_t;
-
/* Vectorize SLP instance tree in postorder. */
static bool
if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
{
SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
- SLP_TREE_NUMBER_OF_VEC_STMTS (node)
- = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
return false;
}
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+ gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
if (!SLP_TREE_VEC_STMTS (node).exists ())
SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
if ((kind == vector_store || kind == unaligned_store)
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info))
kind = vector_scatter_store;
- if (body_cost_vec)
- {
- tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
- stmt_info_for_cost si = { count, kind,
- stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
- misalign };
- body_cost_vec->safe_push (si);
- return (unsigned)
- (builtin_vectorization_cost (kind, vectype, misalign) * count);
- }
- else
- return add_stmt_cost (stmt_info->vinfo->target_cost_data,
- count, kind, stmt_info, misalign, where);
+
+ stmt_info_for_cost si = { count, kind, where,
+ stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
+ misalign };
+ body_cost_vec->safe_push (si);
+
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ return (unsigned)
+ (builtin_vectorization_cost (kind, vectype, misalign) * count);
}
/* Return a variable of type ELEM_TYPE[NELEMS]. */
return true;
}
+/* Compute the prologue cost for invariant or constant operands. */
+
+static unsigned
+vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
+ unsigned opno, enum vect_def_type dt,
+ stmt_vector_for_cost *cost_vec)
+{
+ gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+ tree op = gimple_op (stmt, opno);
+ unsigned prologue_cost = 0;
+
+ /* Without looking at the actual initializer a vector of
+ constants can be implemented as load from the constant pool.
+ When all elements are the same we can use a splat. */
+ tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
+ unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
+ unsigned num_vects_to_check;
+ unsigned HOST_WIDE_INT const_nunits;
+ unsigned nelt_limit;
+ if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
+ && ! multiple_p (const_nunits, group_size))
+ {
+ num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ nelt_limit = const_nunits;
+ }
+ else
+ {
+ /* If either the vector has variable length or the vectors
+ are composed of repeated whole groups we only need to
+ cost construction once. All vectors will be the same. */
+ num_vects_to_check = 1;
+ nelt_limit = group_size;
+ }
+ tree elt = NULL_TREE;
+ unsigned nelt = 0;
+ for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
+ {
+ unsigned si = j % group_size;
+ if (nelt == 0)
+ elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
+ /* ??? We're just tracking whether all operands of a single
+ vector initializer are the same, ideally we'd check if
+ we emitted the same one already. */
+ else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
+ opno))
+ elt = NULL_TREE;
+ nelt++;
+ if (nelt == nelt_limit)
+ {
+ /* ??? We need to pass down stmt_info for a vector type
+ even if it points to the wrong stmt. */
+ prologue_cost += record_stmt_cost
+ (cost_vec, 1,
+ dt == vect_external_def
+ ? (elt ? scalar_to_vec : vec_construct)
+ : vector_load,
+ stmt_info, 0, vect_prologue);
+ nelt = 0;
+ }
+ }
+
+ return prologue_cost;
+}
/* Function vect_model_simple_cost.
single op. Right now, this does not account for multiple insns that could
be generated for the single vector op. We will handle that shortly. */
-void
+static void
vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
enum vect_def_type *dt,
int ndts,
- stmt_vector_for_cost *prologue_cost_vec,
- stmt_vector_for_cost *body_cost_vec)
+ slp_tree node,
+ stmt_vector_for_cost *cost_vec)
{
- int i;
int inside_cost = 0, prologue_cost = 0;
- /* The SLP costs were already calculated during SLP tree build. */
- gcc_assert (!PURE_SLP_STMT (stmt_info));
+ gcc_assert (cost_vec != NULL);
- /* Cost the "broadcast" of a scalar operand in to a vector operand.
- Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
- cost model. */
- for (i = 0; i < ndts; i++)
- if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
- prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
- stmt_info, 0, vect_prologue);
+ /* ??? Somehow we need to fix this at the callers. */
+ if (node)
+ ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+
+ if (node)
+ {
+ /* Scan operands and account for prologue cost of constants/externals.
+ ??? This over-estimates cost for multiple uses and should be
+ re-engineered. */
+ gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+ tree lhs = gimple_get_lhs (stmt);
+ for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
+ {
+ tree op = gimple_op (stmt, i);
+ gimple *def_stmt;
+ enum vect_def_type dt;
+ if (!op || op == lhs)
+ continue;
+ if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
+ && (dt == vect_constant_def || dt == vect_external_def))
+ prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
+ i, dt, cost_vec);
+ }
+ }
+ else
+ /* Cost the "broadcast" of a scalar operand in to a vector operand.
+ Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
+ cost model. */
+ for (int i = 0; i < ndts; i++)
+ if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
+ prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+
+ /* Adjust for two-operator SLP nodes. */
+ if (node && SLP_TREE_TWO_OPERATORS (node))
+ {
+ ncopies *= 2;
+ inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
+ stmt_info, 0, vect_body);
+ }
/* Pass the inside-of-loop statements to the target-specific cost model. */
- inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
+ inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
static void
vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
- enum vect_def_type *dt, int pwr)
+ enum vect_def_type *dt, int pwr,
+ stmt_vector_for_cost *cost_vec)
{
int i, tmp;
int inside_cost = 0, prologue_cost = 0;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- void *target_cost_data;
-
- /* The SLP costs were already calculated during SLP tree build. */
- gcc_assert (!PURE_SLP_STMT (stmt_info));
-
- if (loop_vinfo)
- target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
- else
- target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
for (i = 0; i < pwr + 1; i++)
{
tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
(i + 1) : i;
- inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
- vec_promote_demote, stmt_info, 0,
- vect_body);
+ inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
+ vec_promote_demote, stmt_info, 0,
+ vect_body);
}
/* FORNOW: Assuming maximum 2 args per stmts. */
for (i = 0; i < 2; i++)
if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
- prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
- stmt_info, 0, vect_prologue);
+ prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
+ stmt_info, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
Models cost for stores. In the case of grouped accesses, one access
has the overhead of the grouped access attributed to it. */
-void
+static void
vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
+ enum vect_def_type dt,
vect_memory_access_type memory_access_type,
vec_load_store_type vls_type, slp_tree slp_node,
- stmt_vector_for_cost *prologue_cost_vec,
- stmt_vector_for_cost *body_cost_vec)
+ stmt_vector_for_cost *cost_vec)
{
unsigned int inside_cost = 0, prologue_cost = 0;
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
+ /* ??? Somehow we need to fix this at the callers. */
+ if (slp_node)
+ ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+
if (vls_type == VLS_STORE_INVARIANT)
- prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
- stmt_info, 0, vect_prologue);
+ {
+ if (slp_node)
+ prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
+ 1, dt, cost_vec);
+ else
+ prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+ }
/* Grouped stores update all elements in the group at once,
so we want the DR for the first statement. */
needed permute. */
int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
int nstmts = ncopies * ceil_log2 (group_size) * group_size;
- inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
+ inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
stmt_info, 0, vect_body);
if (dump_enabled_p ())
{
/* N scalar stores plus extracting the elements. */
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
- inside_cost += record_stmt_cost (body_cost_vec,
+ inside_cost += record_stmt_cost (cost_vec,
ncopies * assumed_nunits,
scalar_store, stmt_info, 0, vect_body);
}
else
- vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
+ vect_get_store_cost (dr, ncopies, &inside_cost, cost_vec);
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_STRIDED_SLP)
{
/* N scalar stores plus extracting the elements. */
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
- inside_cost += record_stmt_cost (body_cost_vec,
+ inside_cost += record_stmt_cost (cost_vec,
ncopies * assumed_nunits,
vec_to_scalar, stmt_info, 0, vect_body);
}
accesses are supported for loads, we also account for the costs of the
access scheme chosen. */
-void
-vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
+static void
+vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
vect_memory_access_type memory_access_type,
+ slp_instance instance,
slp_tree slp_node,
- stmt_vector_for_cost *prologue_cost_vec,
- stmt_vector_for_cost *body_cost_vec)
+ stmt_vector_for_cost *cost_vec)
{
gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
unsigned int inside_cost = 0, prologue_cost = 0;
bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
+ gcc_assert (cost_vec);
+
+ /* ??? Somehow we need to fix this at the callers. */
+ if (slp_node)
+ ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+
+ if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+ {
+ /* If the load is permuted then the alignment is determined by
+ the first group element not by the first scalar stmt DR. */
+ gimple *stmt = GROUP_FIRST_ELEMENT (stmt_info);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ /* Record the cost for the permutation. */
+ unsigned n_perms;
+ unsigned assumed_nunits
+ = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
+ unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
+ vect_transform_slp_perm_load (slp_node, vNULL, NULL,
+ slp_vf, instance, true,
+ &n_perms);
+ inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
+ stmt_info, 0, vect_body);
+ /* And adjust the number of loads performed. This handles
+ redundancies as well as loads that are later dead. */
+ auto_sbitmap perm (GROUP_SIZE (stmt_info));
+ bitmap_clear (perm);
+ for (unsigned i = 0;
+ i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
+ bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
+ ncopies = 0;
+ bool load_seen = false;
+ for (unsigned i = 0; i < GROUP_SIZE (stmt_info); ++i)
+ {
+ if (i % assumed_nunits == 0)
+ {
+ if (load_seen)
+ ncopies++;
+ load_seen = false;
+ }
+ if (bitmap_bit_p (perm, i))
+ load_seen = true;
+ }
+ if (load_seen)
+ ncopies++;
+ gcc_assert (ncopies
+ <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
+ + assumed_nunits - 1) / assumed_nunits);
+ }
+
+ /* ??? Need to transition load permutation (and load cost) handling
+ from vect_analyze_slp_cost_1 to here. */
+
/* Grouped loads read all elements in the group at once,
so we want the DR for the first statement. */
if (!slp_node && grouped_access_p)
for each needed permute. */
int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
int nstmts = ncopies * ceil_log2 (group_size) * group_size;
- inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
- stmt_info, 0, vect_body);
+ inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
+ stmt_info, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
/* N scalar loads plus gathering them into a vector. */
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
- inside_cost += record_stmt_cost (body_cost_vec,
+ inside_cost += record_stmt_cost (cost_vec,
ncopies * assumed_nunits,
scalar_load, stmt_info, 0, vect_body);
}
else
vect_get_load_cost (dr, ncopies, first_stmt_p,
&inside_cost, &prologue_cost,
- prologue_cost_vec, body_cost_vec, true);
+ cost_vec, cost_vec, true);
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_STRIDED_SLP)
- inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
+ inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
stmt_info, 0, vect_body);
if (dump_enabled_p ())
static bool
vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
gimple **vec_stmt, slp_tree slp_node,
- tree vectype_in, enum vect_def_type *dt)
+ tree vectype_in, enum vect_def_type *dt,
+ stmt_vector_for_cost *cost_vec)
{
tree op, vectype;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
"\n");
if (! slp_node)
{
- add_stmt_cost (stmt_info->vinfo->target_cost_data,
- 1, vector_stmt, stmt_info, 0, vect_prologue);
- add_stmt_cost (stmt_info->vinfo->target_cost_data,
- ncopies, vec_perm, stmt_info, 0, vect_body);
+ record_stmt_cost (cost_vec,
+ 1, vector_stmt, stmt_info, 0, vect_prologue);
+ record_stmt_cost (cost_vec,
+ ncopies, vec_perm, stmt_info, 0, vect_body);
}
return true;
}
static bool
vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
- slp_tree slp_node)
+ slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
gcall *stmt;
tree vec_dest;
|| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
|| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
- vectype_in, dt);
+ vectype_in, dt, cost_vec);
else
{
if (dump_enabled_p ())
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
"\n");
- if (!slp_node)
- {
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
- if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
- add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
- vec_promote_demote, stmt_info, 0, vect_body);
- }
+ vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+ if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
+ record_stmt_cost (cost_vec, ncopies / 2,
+ vec_promote_demote, stmt_info, 0, vect_body);
return true;
}
static bool
vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
- gimple **vec_stmt, slp_tree slp_node)
+ gimple **vec_stmt, slp_tree slp_node,
+ stmt_vector_for_cost *)
{
tree vec_dest;
tree scalar_dest;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vectorizable_simd_clone_call ===\n");
-/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
+/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
return true;
}
static bool
vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
- gimple **vec_stmt, slp_tree slp_node)
+ gimple **vec_stmt, slp_tree slp_node,
+ stmt_vector_for_cost *cost_vec)
{
tree vec_dest;
tree scalar_dest;
if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
{
STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
- if (!slp_node)
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
+ vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
+ cost_vec);
}
else if (modifier == NARROW)
{
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
- if (!slp_node)
- vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
+ vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
+ cost_vec);
}
else
{
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
- if (!slp_node)
- vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
+ vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
+ cost_vec);
}
interm_types.release ();
return true;
static bool
vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
- gimple **vec_stmt, slp_tree slp_node)
+ gimple **vec_stmt, slp_tree slp_node,
+ stmt_vector_for_cost *cost_vec)
{
tree vec_dest;
tree scalar_dest;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vectorizable_assignment ===\n");
- if (!slp_node)
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
+ vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
return true;
}
static bool
vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
- gimple **vec_stmt, slp_tree slp_node)
+ gimple **vec_stmt, slp_tree slp_node,
+ stmt_vector_for_cost *cost_vec)
{
tree vec_dest;
tree scalar_dest;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vectorizable_shift ===\n");
- if (!slp_node)
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
+ vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
return true;
}
static bool
vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
- gimple **vec_stmt, slp_tree slp_node)
+ gimple **vec_stmt, slp_tree slp_node,
+ stmt_vector_for_cost *cost_vec)
{
tree vec_dest;
tree scalar_dest;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vectorizable_operation ===\n");
- if (!slp_node)
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
+ vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
return true;
}
static bool
vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
- slp_tree slp_node)
+ slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
tree data_ref;
tree op;
memory_access_type, &gs_info);
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
- /* The SLP costs are calculated during SLP analysis. */
- if (!slp_node)
- vect_model_store_cost (stmt_info, ncopies, memory_access_type,
- vls_type, NULL, NULL, NULL);
+ vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
+ vls_type, slp_node, cost_vec);
return true;
}
gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
static bool
vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
- slp_tree slp_node, slp_instance slp_node_instance)
+ slp_tree slp_node, slp_instance slp_node_instance,
+ stmt_vector_for_cost *cost_vec)
{
tree scalar_dest;
tree vec_dest = NULL;
memory_access_type, &gs_info);
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
- /* The SLP costs are calculated during SLP analysis. */
- if (! slp_node)
- vect_model_load_cost (stmt_info, ncopies, memory_access_type,
- NULL, NULL, NULL);
+ vect_model_load_cost (stmt_info, ncopies, memory_access_type,
+ slp_node_instance, slp_node, cost_vec);
return true;
}
bool
vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
gimple **vec_stmt, tree reduc_def, int reduc_index,
- slp_tree slp_node)
+ slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
tree scalar_dest = NULL_TREE;
tree vec_dest = NULL_TREE;
if (!vec_stmt)
{
- STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
if (bitop1 != NOP_EXPR)
{
machine_mode mode = TYPE_MODE (comp_vectype);
if (expand_vec_cond_expr_p (vectype, comp_vectype,
cond_code))
{
- if (!slp_node)
- vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
+ STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
+ vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
+ cost_vec);
return true;
}
return false;
static bool
vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
gimple **vec_stmt, tree reduc_def,
- slp_tree slp_node)
+ slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
tree lhs, rhs1, rhs2;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
if (!vec_stmt)
{
- STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
- if (!slp_node)
- vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
- dts, ndts, NULL, NULL);
if (bitop1 == NOP_EXPR)
- return expand_vec_cmp_expr_p (vectype, mask_type, code);
+ {
+ if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
+ return false;
+ }
else
{
machine_mode mode = TYPE_MODE (vectype);
if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
return false;
}
- return true;
}
+
+ STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
+ vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
+ dts, ndts, slp_node, cost_vec);
+ return true;
}
/* Transform. */
static bool
can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
- slp_tree slp_node, gimple **vec_stmt)
+ slp_tree slp_node, gimple **vec_stmt,
+ stmt_vector_for_cost *cost_vec)
{
if (slp_node)
{
stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
if (STMT_VINFO_LIVE_P (slp_stmt_info)
&& !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
- vec_stmt))
+ vec_stmt, cost_vec))
return false;
}
}
else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
- && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
+ && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
+ cost_vec))
return false;
return true;
bool
vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
- slp_instance node_instance)
+ slp_instance node_instance, stmt_vector_for_cost *cost_vec)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
}
if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
- node_instance))
+ node_instance, cost_vec))
return false;
}
}
if (!vect_analyze_stmt (pattern_def_stmt,
- need_to_vectorize, node, node_instance))
+ need_to_vectorize, node, node_instance,
+ cost_vec))
return false;
}
}
if (!bb_vinfo
&& (STMT_VINFO_RELEVANT_P (stmt_info)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
- ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
- || vectorizable_conversion (stmt, NULL, NULL, node)
- || vectorizable_shift (stmt, NULL, NULL, node)
- || vectorizable_operation (stmt, NULL, NULL, node)
- || vectorizable_assignment (stmt, NULL, NULL, node)
- || vectorizable_load (stmt, NULL, NULL, node, NULL)
- || vectorizable_call (stmt, NULL, NULL, node)
- || vectorizable_store (stmt, NULL, NULL, node)
- || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
- || vectorizable_induction (stmt, NULL, NULL, node)
- || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
- || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
+ ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
+ || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
+ cost_vec)
+ || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
+ || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
else
{
if (bb_vinfo)
- ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
- || vectorizable_conversion (stmt, NULL, NULL, node)
- || vectorizable_shift (stmt, NULL, NULL, node)
- || vectorizable_operation (stmt, NULL, NULL, node)
- || vectorizable_assignment (stmt, NULL, NULL, node)
- || vectorizable_load (stmt, NULL, NULL, node, NULL)
- || vectorizable_call (stmt, NULL, NULL, node)
- || vectorizable_store (stmt, NULL, NULL, node)
- || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
- || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
+ ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_load (stmt, NULL, NULL, node, node_instance,
+ cost_vec)
+ || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
+ cost_vec)
+ || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
+ cost_vec));
}
if (!ok)
return false;
}
- if (bb_vinfo)
- return true;
-
/* Stmts that are (also) "live" (i.e. - that are used out of the loop)
need extra handling, except for vectorizable reductions. */
- if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
- && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
+ if (!bb_vinfo
+ && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
+ && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
{
if (dump_enabled_p ())
{
case type_demotion_vec_info_type:
case type_promotion_vec_info_type:
case type_conversion_vec_info_type:
- done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
+ done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case induc_vec_info_type:
- done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
+ done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case shift_vec_info_type:
- done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
+ done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case op_vec_info_type:
- done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
+ done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case assignment_vec_info_type:
- done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
+ done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case load_vec_info_type:
done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
- slp_node_instance);
+ slp_node_instance, NULL);
gcc_assert (done);
break;
case store_vec_info_type:
- done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
+ done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
{
break;
case condition_vec_info_type:
- done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
+ done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
gcc_assert (done);
break;
case comparison_vec_info_type:
- done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
+ done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
gcc_assert (done);
break;
case call_vec_info_type:
- done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
+ done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
stmt = gsi_stmt (*gsi);
break;
case call_simd_clone_vec_info_type:
- done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
+ done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
stmt = gsi_stmt (*gsi);
break;
case reduc_vec_info_type:
done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
- slp_node_instance);
+ slp_node_instance, NULL);
gcc_assert (done);
break;
being vectorized. */
if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
{
- done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
+ done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
gcc_assert (done);
}
#include "tree-scalar-evolution.h"
#include "stringpool.h"
#include "attribs.h"
+#include "gimple-pretty-print.h"
/* Loop or bb location. */
/* Vector mapping GIMPLE stmt to stmt_vec_info. */
vec<stmt_vec_info> stmt_vec_info_vec;
+
+/* Dump a cost entry according to args to F. */
+
+void
+dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
+ stmt_vec_info stmt_info, int misalign,
+ enum vect_cost_model_location where)
+{
+ fprintf (f, "%p ", data);
+ if (stmt_info)
+ {
+ print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
+ fprintf (f, " ");
+ }
+ else
+ fprintf (f, "<unknown> ");
+ fprintf (f, "%d times ", count);
+ const char *ks = "unknown";
+ switch (kind)
+ {
+ case scalar_stmt:
+ ks = "scalar_stmt";
+ break;
+ case scalar_load:
+ ks = "scalar_load";
+ break;
+ case scalar_store:
+ ks = "scalar_store";
+ break;
+ case vector_stmt:
+ ks = "vector_stmt";
+ break;
+ case vector_load:
+ ks = "vector_load";
+ break;
+ case vector_gather_load:
+ ks = "vector_gather_load";
+ break;
+ case unaligned_load:
+ ks = "unaligned_load";
+ break;
+ case unaligned_store:
+ ks = "unaligned_store";
+ break;
+ case vector_store:
+ ks = "unaligned_store";
+ break;
+ case vector_scatter_store:
+ ks = "unaligned_store";
+ break;
+ case vec_to_scalar:
+ ks = "unaligned_store";
+ break;
+ case scalar_to_vec:
+ ks = "unaligned_store";
+ break;
+ case cond_branch_not_taken:
+ ks = "unaligned_store";
+ break;
+ case cond_branch_taken:
+ ks = "unaligned_store";
+ break;
+ case vec_perm:
+ ks = "unaligned_store";
+ break;
+ case vec_promote_demote:
+ ks = "unaligned_store";
+ break;
+ case vec_construct:
+ ks = "unaligned_store";
+ break;
+ }
+ fprintf (f, "%s ", ks);
+ if (kind == unaligned_load || kind == unaligned_store)
+ fprintf (f, "(misalign %d) ", misalign);
+ const char *ws = "unknown";
+ switch (where)
+ {
+ case vect_prologue:
+ ws = "prologue";
+ break;
+ case vect_body:
+ ws = "body";
+ break;
+ case vect_epilogue:
+ ws = "epilogue";
+ break;
+ }
+ fprintf (f, "in %s\n", ws);
+}
\f
/* For mapping simduid to vectorization factor. */
struct stmt_info_for_cost {
int count;
enum vect_cost_for_stmt kind;
+ enum vect_cost_model_location where;
gimple *stmt;
int misalign;
};
return targetm.vectorize.init_cost (loop_info);
}
+extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
+ stmt_vec_info, int, enum vect_cost_model_location);
+
/* Alias targetm.vectorize.add_stmt_cost. */
static inline unsigned
stmt_vec_info stmt_info, int misalign,
enum vect_cost_model_location where)
{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign, where);
return targetm.vectorize.add_stmt_cost (data, count, kind,
stmt_info, misalign, where);
}
targetm.vectorize.destroy_cost_data (data);
}
+inline void
+add_stmt_costs (void *data, stmt_vector_for_cost *cost_vec)
+{
+ stmt_info_for_cost *cost;
+ unsigned i;
+ FOR_EACH_VEC_ELT (*cost_vec, i, cost)
+ add_stmt_cost (data, cost->count, cost->kind,
+ cost->stmt ? vinfo_for_stmt (cost->stmt) : NULL,
+ cost->misalign, cost->where);
+}
+
/*-----------------------------------------------------------------*/
/* Info on data references alignment. */
/*-----------------------------------------------------------------*/
int *, vec<tree> *);
extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *);
extern void free_stmt_vec_info (gimple *stmt);
-extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
- int, stmt_vector_for_cost *,
- stmt_vector_for_cost *);
-extern void vect_model_store_cost (stmt_vec_info, int, vect_memory_access_type,
- vec_load_store_type, slp_tree,
- stmt_vector_for_cost *,
- stmt_vector_for_cost *);
-extern void vect_model_load_cost (stmt_vec_info, int, vect_memory_access_type,
- slp_tree, stmt_vector_for_cost *,
- stmt_vector_for_cost *);
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
enum vect_cost_for_stmt, stmt_vec_info,
int, enum vect_cost_model_location);
extern bool vect_transform_stmt (gimple *, gimple_stmt_iterator *,
bool *, slp_tree, slp_instance);
extern void vect_remove_stores (gimple *);
-extern bool vect_analyze_stmt (gimple *, bool *, slp_tree, slp_instance);
+extern bool vect_analyze_stmt (gimple *, bool *, slp_tree, slp_instance,
+ stmt_vector_for_cost *);
extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *,
- gimple **, tree, int, slp_tree);
+ gimple **, tree, int, slp_tree,
+ stmt_vector_for_cost *);
extern void vect_get_load_cost (struct data_reference *, int, bool,
unsigned int *, unsigned int *,
stmt_vector_for_cost *,
extern struct loop *vect_transform_loop (loop_vec_info);
extern loop_vec_info vect_analyze_loop_form (struct loop *);
extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *,
- slp_tree, int, gimple **);
+ slp_tree, int, gimple **,
+ stmt_vector_for_cost *);
extern bool vectorizable_reduction (gimple *, gimple_stmt_iterator *,
- gimple **, slp_tree, slp_instance);
+ gimple **, slp_tree, slp_instance,
+ stmt_vector_for_cost *);
extern bool vectorizable_induction (gimple *, gimple_stmt_iterator *,
- gimple **, slp_tree);
+ gimple **, slp_tree,
+ stmt_vector_for_cost *);
extern tree get_initial_def_for_reduction (gimple *, tree, tree *);
extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,