From a12e42fc3dc64fb4183ee30e32906d3111552a17 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 26 May 2015 09:03:53 +0000 Subject: [PATCH] tree-vect-loop.c (vect_update_vf_for_slp): Split out from ... 2015-05-26 Richard Biener * tree-vect-loop.c (vect_update_vf_for_slp): Split out from ... (vect_analyze_loop_operations): ... here. Remove slp parameter, detect whether we apply SLP. Remove call to vect_update_slp_costs_according_to_vf. (vect_analyze_loop_2): Call vect_update_vf_for_slp and vect_update_slp_costs_according_to_vf from here. Dispatch to vect_slp_analyze_operations to analyze SLP stmts. * tree-vect-slp.c (vect_slp_analyze_node_operations): Drop unused bb_vec_info parameter, adjust assert. (vect_slp_analyze_operations): Pass in the slp instance tree instead of bb_vec_info. (vect_slp_analyze_bb_1): Adjust call to vect_slp_analyze_operations. * tree-vectorizer.h (vect_slp_analyze_operations): Declare. From-SVN: r223670 --- gcc/ChangeLog | 16 +++++ gcc/tree-vect-loop.c | 157 ++++++++++++++++++++++++++---------------- gcc/tree-vect-slp.c | 29 +++++--- gcc/tree-vectorizer.h | 1 + 4 files changed, 131 insertions(+), 72 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a387c80ca85..dab3e07add8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2015-05-26 Richard Biener + + * tree-vect-loop.c (vect_update_vf_for_slp): Split out from ... + (vect_analyze_loop_operations): ... here. Remove slp parameter, + detect whether we apply SLP. Remove call to + vect_update_slp_costs_according_to_vf. + (vect_analyze_loop_2): Call vect_update_vf_for_slp and + vect_update_slp_costs_according_to_vf from here. Dispatch + to vect_slp_analyze_operations to analyze SLP stmts. + * tree-vect-slp.c (vect_slp_analyze_node_operations): Drop + unused bb_vec_info parameter, adjust assert. + (vect_slp_analyze_operations): Pass in the slp instance tree + instead of bb_vec_info. + (vect_slp_analyze_bb_1): Adjust call to vect_slp_analyze_operations. + * tree-vectorizer.h (vect_slp_analyze_operations): Declare. + 2015-05-25 Alexander Monakov * config/i386/i386.h (enum reg_class): Move CLOBBERED_REGS prior to diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 2c983b889d1..89202c425ed 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1355,25 +1355,85 @@ vect_analyze_loop_form (struct loop *loop) return loop_vinfo; } +/* Scan the loop stmts and dependent on whether there are any (non-)SLP + statements update the vectorization factor. */ + +static void +vect_update_vf_for_slp (loop_vec_info loop_vinfo) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); + int nbbs = loop->num_nodes; + unsigned int vectorization_factor; + int i; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "=== vect_update_vf_for_slp ===\n"); + + vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + gcc_assert (vectorization_factor != 0); + + /* If all the stmts in the loop can be SLPed, we perform only SLP, and + vectorization factor of the loop is the unrolling factor required by + the SLP instances. If that unrolling factor is 1, we say, that we + perform pure SLP on loop - cross iteration parallelism is not + exploited. */ + bool only_slp_in_loop = true; + for (i = 0; i < nbbs; i++) + { + basic_block bb = bbs[i]; + for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); + gsi_next (&si)) + { + gimple stmt = gsi_stmt (si); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && STMT_VINFO_RELATED_STMT (stmt_info)) + { + stmt = STMT_VINFO_RELATED_STMT (stmt_info); + stmt_info = vinfo_for_stmt (stmt); + } + if ((STMT_VINFO_RELEVANT_P (stmt_info) + || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) + && !PURE_SLP_STMT (stmt_info)) + /* STMT needs both SLP and loop-based vectorization. */ + only_slp_in_loop = false; + } + } + + if (only_slp_in_loop) + vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); + else + vectorization_factor + = least_common_multiple (vectorization_factor, + LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); + + LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Updating vectorization factor to %d\n", + vectorization_factor); +} /* Function vect_analyze_loop_operations. Scan the loop stmts and make sure they are all vectorizable. */ static bool -vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) +vect_analyze_loop_operations (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; - unsigned int vectorization_factor = 0; + unsigned int vectorization_factor; int i; stmt_vec_info stmt_info; bool need_to_vectorize = false; int min_profitable_iters; int min_scalar_loop_bound; unsigned int th; - bool only_slp_in_loop = true, ok; + bool ok; HOST_WIDE_INT max_niter; HOST_WIDE_INT estimated_niter; int min_profitable_estimate; @@ -1382,50 +1442,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_loop_operations ===\n"); - gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); - vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - if (slp) - { - /* If all the stmts in the loop can be SLPed, we perform only SLP, and - vectorization factor of the loop is the unrolling factor required by - the SLP instances. If that unrolling factor is 1, we say, that we - perform pure SLP on loop - cross iteration parallelism is not - exploited. */ - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { - gimple stmt = gsi_stmt (si); - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - if (STMT_VINFO_IN_PATTERN_P (stmt_info) - && STMT_VINFO_RELATED_STMT (stmt_info)) - { - stmt = STMT_VINFO_RELATED_STMT (stmt_info); - stmt_info = vinfo_for_stmt (stmt); - } - if ((STMT_VINFO_RELEVANT_P (stmt_info) - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - && !PURE_SLP_STMT (stmt_info)) - /* STMT needs both SLP and loop-based vectorization. */ - only_slp_in_loop = false; - } - } - - if (only_slp_in_loop) - vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); - else - vectorization_factor = least_common_multiple (vectorization_factor, - LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); - - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Updating vectorization factor to %d\n", - vectorization_factor); - } - for (i = 0; i < nbbs; i++) { basic_block bb = bbs[i]; @@ -1540,6 +1556,11 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) gsi_next (&si)) { gimple stmt = gsi_stmt (si); + if (STMT_SLP_TYPE (vinfo_for_stmt (stmt))) + { + need_to_vectorize = true; + continue; + } if (!gimple_clobber_p (stmt) && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL)) return false; @@ -1563,6 +1584,9 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) return false; } + vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + gcc_assert (vectorization_factor != 0); + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectorization_factor = %d, niters = " @@ -1586,10 +1610,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) /* Analyze cost. Decide if worth while to vectorize. */ - /* Once VF is set, SLP costs should be updated since the number of created - vector stmts depends on VF. */ - vect_update_slp_costs_according_to_vf (loop_vinfo); - vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, &min_profitable_estimate); LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters; @@ -1664,7 +1684,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) static bool vect_analyze_loop_2 (loop_vec_info loop_vinfo) { - bool ok, slp = false; + bool ok; int max_vf = MAX_VECTORIZATION_FACTOR; int min_vf = 2; unsigned int th; @@ -1790,19 +1810,34 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts); if (ok) { - /* Decide which possible SLP instances to SLP. */ - slp = vect_make_slp_decision (loop_vinfo); - - /* Find stmts that need to be both vectorized and SLPed. */ - vect_detect_hybrid_slp (loop_vinfo); + /* If there are any SLP instances mark them as pure_slp. */ + if (vect_make_slp_decision (loop_vinfo)) + { + /* Find stmts that need to be both vectorized and SLPed. */ + vect_detect_hybrid_slp (loop_vinfo); + + /* Update the vectorization factor based on the SLP decision. */ + vect_update_vf_for_slp (loop_vinfo); + + /* Once VF is set, SLP costs should be updated since the number of + created vector stmts depends on VF. */ + vect_update_slp_costs_according_to_vf (loop_vinfo); + + /* Analyze operations in the SLP instances. Note this may + remove unsupported SLP instances which makes the above + SLP kind detection invalid. */ + unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length (); + vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo)); + if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size) + return false; + } } else return false; - /* Scan all the operations in the loop and make sure they are - vectorizable. */ - - ok = vect_analyze_loop_operations (loop_vinfo, slp); + /* Scan all the remaining operations in the loop that are not subject + to SLP and make sure they are vectorizable. */ + ok = vect_analyze_loop_operations (loop_vinfo); if (!ok) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 1c519903153..1e680207aed 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2191,7 +2191,7 @@ destroy_bb_vec_info (bb_vec_info bb_vinfo) the subtree. Return TRUE if the operations are supported. */ static bool -vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node) +vect_slp_analyze_node_operations (slp_tree node) { bool dummy; int i; @@ -2202,17 +2202,17 @@ vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node) return true; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - if (!vect_slp_analyze_node_operations (bb_vinfo, child)) + if (!vect_slp_analyze_node_operations (child)) return false; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); gcc_assert (stmt_info); - gcc_assert (PURE_SLP_STMT (stmt_info)); + gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); if (!vect_analyze_stmt (stmt, &dummy, node)) - return false; + return false; } return true; @@ -2222,19 +2222,26 @@ vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node) /* Analyze statements in SLP instances of the basic block. Return TRUE if the operations are supported. */ -static bool -vect_slp_analyze_operations (bb_vec_info bb_vinfo) +bool +vect_slp_analyze_operations (vec slp_instances) { - vec slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo); slp_instance instance; int i; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "=== vect_slp_analyze_operations ===\n"); + for (i = 0; slp_instances.iterate (i, &instance); ) { - if (!vect_slp_analyze_node_operations (bb_vinfo, - SLP_INSTANCE_TREE (instance))) + if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance))) { - vect_free_slp_instance (instance); + dump_printf_loc (MSG_NOTE, vect_location, + "removing SLP instance operations starting from: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, + SLP_TREE_SCALAR_STMTS + (SLP_INSTANCE_TREE (instance))[0], 0); + vect_free_slp_instance (instance); slp_instances.ordered_remove (i); } else @@ -2498,7 +2505,7 @@ vect_slp_analyze_bb_1 (basic_block bb) return NULL; } - if (!vect_slp_analyze_operations (bb_vinfo)) + if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index adde2fb0d1d..ae795a9f7d5 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1114,6 +1114,7 @@ extern void vect_free_slp_instance (slp_instance); extern bool vect_transform_slp_perm_load (slp_tree, vec , gimple_stmt_iterator *, int, slp_instance, bool); +extern bool vect_slp_analyze_operations (vec slp_instances); extern bool vect_schedule_slp (loop_vec_info, bb_vec_info); extern void vect_update_slp_costs_according_to_vf (loop_vec_info); extern bool vect_analyze_slp (loop_vec_info, bb_vec_info, unsigned); -- 2.30.2