return loop_vinfo;
}
+/* Scan the loop stmts and dependent on whether there are any (non-)SLP
+ statements update the vectorization factor. */
+
+static void
+vect_update_vf_for_slp (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes;
+ unsigned int vectorization_factor;
+ int i;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "=== vect_update_vf_for_slp ===\n");
+
+ vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ gcc_assert (vectorization_factor != 0);
+
+ /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by
+ the SLP instances. If that unrolling factor is 1, we say, that we
+ perform pure SLP on loop - cross iteration parallelism is not
+ exploited. */
+ bool only_slp_in_loop = true;
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
+ gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+ && STMT_VINFO_RELATED_STMT (stmt_info))
+ {
+ stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ stmt_info = vinfo_for_stmt (stmt);
+ }
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+ && !PURE_SLP_STMT (stmt_info))
+ /* STMT needs both SLP and loop-based vectorization. */
+ only_slp_in_loop = false;
+ }
+ }
+
+ if (only_slp_in_loop)
+ vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ else
+ vectorization_factor
+ = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Updating vectorization factor to %d\n",
+ vectorization_factor);
+}
/* Function vect_analyze_loop_operations.
Scan the loop stmts and make sure they are all vectorizable. */
static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
- unsigned int vectorization_factor = 0;
+ unsigned int vectorization_factor;
int i;
stmt_vec_info stmt_info;
bool need_to_vectorize = false;
int min_profitable_iters;
int min_scalar_loop_bound;
unsigned int th;
- bool only_slp_in_loop = true, ok;
+ bool ok;
HOST_WIDE_INT max_niter;
HOST_WIDE_INT estimated_niter;
int min_profitable_estimate;
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_analyze_loop_operations ===\n");
- gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
- vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- if (slp)
- {
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by
- the SLP instances. If that unrolling factor is 1, we say, that we
- perform pure SLP on loop - cross iteration parallelism is not
- exploited. */
- for (i = 0; i < nbbs; i++)
- {
- basic_block bb = bbs[i];
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- if (STMT_VINFO_IN_PATTERN_P (stmt_info)
- && STMT_VINFO_RELATED_STMT (stmt_info))
- {
- stmt = STMT_VINFO_RELATED_STMT (stmt_info);
- stmt_info = vinfo_for_stmt (stmt);
- }
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
- }
- }
-
- if (only_slp_in_loop)
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- else
- vectorization_factor = least_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Updating vectorization factor to %d\n",
- vectorization_factor);
- }
-
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
+ if (STMT_SLP_TYPE (vinfo_for_stmt (stmt)))
+ {
+ need_to_vectorize = true;
+ continue;
+ }
if (!gimple_clobber_p (stmt)
&& !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
return false;
}
+ vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ gcc_assert (vectorization_factor != 0);
+
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vectorization_factor = %d, niters = "
/* Analyze cost. Decide if worth while to vectorize. */
- /* Once VF is set, SLP costs should be updated since the number of created
- vector stmts depends on VF. */
- vect_update_slp_costs_according_to_vf (loop_vinfo);
-
vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
&min_profitable_estimate);
LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
static bool
vect_analyze_loop_2 (loop_vec_info loop_vinfo)
{
- bool ok, slp = false;
+ bool ok;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
unsigned int th;
ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
if (ok)
{
- /* Decide which possible SLP instances to SLP. */
- slp = vect_make_slp_decision (loop_vinfo);
-
- /* Find stmts that need to be both vectorized and SLPed. */
- vect_detect_hybrid_slp (loop_vinfo);
+ /* If there are any SLP instances mark them as pure_slp. */
+ if (vect_make_slp_decision (loop_vinfo))
+ {
+ /* Find stmts that need to be both vectorized and SLPed. */
+ vect_detect_hybrid_slp (loop_vinfo);
+
+ /* Update the vectorization factor based on the SLP decision. */
+ vect_update_vf_for_slp (loop_vinfo);
+
+ /* Once VF is set, SLP costs should be updated since the number of
+ created vector stmts depends on VF. */
+ vect_update_slp_costs_according_to_vf (loop_vinfo);
+
+ /* Analyze operations in the SLP instances. Note this may
+ remove unsupported SLP instances which makes the above
+ SLP kind detection invalid. */
+ unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
+ vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+ if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
+ return false;
+ }
}
else
return false;
- /* Scan all the operations in the loop and make sure they are
- vectorizable. */
-
- ok = vect_analyze_loop_operations (loop_vinfo, slp);
+ /* Scan all the remaining operations in the loop that are not subject
+ to SLP and make sure they are vectorizable. */
+ ok = vect_analyze_loop_operations (loop_vinfo);
if (!ok)
{
if (dump_enabled_p ())
the subtree. Return TRUE if the operations are supported. */
static bool
-vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
+vect_slp_analyze_node_operations (slp_tree node)
{
bool dummy;
int i;
return true;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (!vect_slp_analyze_node_operations (bb_vinfo, child))
+ if (!vect_slp_analyze_node_operations (child))
return false;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
gcc_assert (stmt_info);
- gcc_assert (PURE_SLP_STMT (stmt_info));
+ gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
if (!vect_analyze_stmt (stmt, &dummy, node))
- return false;
+ return false;
}
return true;
/* Analyze statements in SLP instances of the basic block. Return TRUE if the
operations are supported. */
-static bool
-vect_slp_analyze_operations (bb_vec_info bb_vinfo)
+bool
+vect_slp_analyze_operations (vec<slp_instance> slp_instances)
{
- vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
slp_instance instance;
int i;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "=== vect_slp_analyze_operations ===\n");
+
for (i = 0; slp_instances.iterate (i, &instance); )
{
- if (!vect_slp_analyze_node_operations (bb_vinfo,
- SLP_INSTANCE_TREE (instance)))
+ if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
{
- vect_free_slp_instance (instance);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "removing SLP instance operations starting from: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
+ SLP_TREE_SCALAR_STMTS
+ (SLP_INSTANCE_TREE (instance))[0], 0);
+ vect_free_slp_instance (instance);
slp_instances.ordered_remove (i);
}
else
return NULL;
}
- if (!vect_slp_analyze_operations (bb_vinfo))
+ if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,