{
vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
SLP_INSTANCE_LOADS (instance).release ();
- SLP_INSTANCE_BODY_COST_VEC (instance).release ();
free (instance);
}
/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
static void
-vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- slp_instance instance, slp_tree node,
+vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
stmt_vector_for_cost *prologue_cost_vec,
+ stmt_vector_for_cost *body_cost_vec,
unsigned ncopies_for_cost)
{
- stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
-
unsigned i;
slp_tree child;
gimple stmt, s;
/* Recurse down the SLP tree. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (child)
- vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
- instance, child, prologue_cost_vec,
- ncopies_for_cost);
+ vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
+ body_cost_vec, ncopies_for_cost);
/* Look at the first scalar stmt to determine the cost. */
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
enum vect_def_type dt;
if (!op || op == lhs)
continue;
- if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
+ if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
+ STMT_VINFO_BB_VINFO (stmt_info),
&def_stmt, &def, &dt))
{
/* Without looking at the actual initializer a vector of
/* Compute the cost for the SLP instance INSTANCE. */
static void
-vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- slp_instance instance, unsigned nunits)
+vect_analyze_slp_cost (slp_instance instance, void *data)
{
stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
unsigned ncopies_for_cost;
factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
GROUP_SIZE / NUNITS otherwise. */
unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+ slp_tree node = SLP_INSTANCE_TREE (instance);
+ stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
+ /* Adjust the group_size by the vectorization factor which is always one
+ for basic-block vectorization. */
+ if (STMT_VINFO_LOOP_VINFO (stmt_info))
+ group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
+ unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+ /* For reductions look at a reduction operand in case the reduction
+ operation is widening like DOT_PROD or SAD. */
+ if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ {
+ gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+ switch (gimple_assign_rhs_code (stmt))
+ {
+ case DOT_PROD_EXPR:
+ case SAD_EXPR:
+ nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
+ (TREE_TYPE (gimple_assign_rhs1 (stmt))));
+ break;
+ default:;
+ }
+ }
ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
prologue_cost_vec.create (10);
body_cost_vec.create (10);
- SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
- vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
- instance, SLP_INSTANCE_TREE (instance),
- &prologue_cost_vec, ncopies_for_cost);
+ vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
+ &prologue_cost_vec, &body_cost_vec,
+ ncopies_for_cost);
/* Record the prologue costs, which were delayed until we were
- sure that SLP was successful. Unlike the body costs, we know
- the final values now regardless of the loop vectorization factor. */
- void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
- : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
+ sure that SLP was successful. */
FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
{
struct _stmt_vec_info *stmt_info
si->misalign, vect_prologue);
}
+ /* Record the instance's instructions in the target cost model. */
+ FOR_EACH_VEC_ELT (body_cost_vec, i, si)
+ {
+ struct _stmt_vec_info *stmt_info
+ = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
+ (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
+ si->misalign, vect_body);
+ }
+
prologue_cost_vec.release ();
+ body_cost_vec.release ();
}
/* Analyze an SLP instance starting from a group of grouped stores. Call
SLP_INSTANCE_TREE (new_instance) = node;
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
- SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
SLP_INSTANCE_LOADS (new_instance) = loads;
/* Compute the load permutation. */
if (loop_vinfo)
- {
- /* Compute the costs of this SLP instance. Delay this for BB
- vectorization as we don't have vector types computed yet. */
- vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
- new_instance, TYPE_VECTOR_SUBPARTS (vectype));
- LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
- }
+ LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
else
BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
operations are supported. */
bool
-vect_slp_analyze_operations (vec<slp_instance> slp_instances)
+vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
{
slp_instance instance;
int i;
slp_instances.ordered_remove (i);
}
else
- i++;
+ {
+ /* Compute the costs of the SLP instance. */
+ vect_analyze_slp_cost (instance, data);
+ i++;
+ }
}
if (!slp_instances.length ())
{
vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
slp_instance instance;
- int i, j;
+ int i;
unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
- void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
- stmt_vec_info stmt_info = NULL;
- stmt_vector_for_cost body_cost_vec;
- stmt_info_for_cost *ci;
-
- /* Calculate vector costs. */
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
-
- FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
- {
- stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
- (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
- stmt_info, ci->misalign, vect_body);
- }
- }
/* Calculate scalar cost. */
FOR_EACH_VEC_ELT (slp_instances, i, instance)
return NULL;
}
- if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo)))
+ if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
+ BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
return NULL;
}
- /* Compute the costs of the SLP instances. */
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- gimple stmt = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0];
- tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
- vect_analyze_slp_cost (NULL, bb_vinfo,
- instance, TYPE_VECTOR_SUBPARTS (vectype));
- }
-
/* Cost model: check if the vectorization is worthwhile. */
if (!unlimited_cost_model (NULL)
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
}
-/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
- the number of created vector stmts depends on the unrolling factor).
- However, the actual number of vector stmts for every SLP node depends on
- VF which is set later in vect_analyze_operations (). Hence, SLP costs
- should be updated. In this function we assume that the inside costs
- calculated in vect_model_xxx_cost are linear in ncopies. */
-
-void
-vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
-{
- unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
- slp_instance instance;
- stmt_vector_for_cost body_cost_vec;
- stmt_info_for_cost *si;
- void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_update_slp_costs_according_to_vf ===\n");
-
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- /* We assume that costs are linear in ncopies. */
- int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
-
- /* Record the instance's instructions in the target cost model.
- This was delayed until here because the count of instructions
- isn't known beforehand. */
- body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
-
- FOR_EACH_VEC_ELT (body_cost_vec, j, si)
- (void) add_stmt_cost (data, si->count * ncopies, si->kind,
- vinfo_for_stmt (si->stmt), si->misalign,
- vect_body);
- }
-}
-
-
/* For constant and loop invariant defs of SLP_NODE this function returns
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of