From 2e8ab70c2eee7a912ac74b57c47e7bc32f9f1198 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 23 Apr 2015 08:43:10 +0000 Subject: [PATCH] tree-vect-slp.c (vect_find_first_load_in_slp_instance): Remove. 2015-04-23 Richard Biener * tree-vect-slp.c (vect_find_first_load_in_slp_instance): Remove. (vect_find_last_store_in_slp_instance): Rename to ... (vect_find_last_scalar_stmt_in_slp): ... this and generalize. (vect_analyze_slp_cost_1): Use vector_load for constant defs and vec_construct for external defs when estimating prologue cost. (vect_analyze_slp_instance): Do not init SLP_INSTANCE_FIRST_LOAD_STMT. Compute costs here only when vectorizing loops. (vect_slp_analyze_bb_1): Compute SLP cost here, after vector types have been determined. (vect_schedule_slp_instance): Simplify vectorized code placement and prepare for in-BB external defs. * tree-vectorizer.h (struct _slp_instance): Remove first_load member. (SLP_INSTANCE_FIRST_LOAD_STMT): Remove. * tree-vect-stmts.c (vect_model_store_cost): Remove PURE_SLP_STMT guard. (vect_model_load_cost): Likewise. (vectorizable_store): Instead add it here. (vectorizable_load): Likewise. (vect_is_simple_use): Dump def type textually. From-SVN: r222354 --- gcc/ChangeLog | 22 +++++++++ gcc/tree-vect-slp.c | 105 +++++++++++++++++------------------------- gcc/tree-vect-stmts.c | 57 +++++++++++++++++------ gcc/tree-vectorizer.h | 5 -- 4 files changed, 108 insertions(+), 81 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d76faf853e9..077a49eae9a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2015-04-23 Richard Biener + + * tree-vect-slp.c (vect_find_first_load_in_slp_instance): Remove. + (vect_find_last_store_in_slp_instance): Rename to ... + (vect_find_last_scalar_stmt_in_slp): ... this and generalize. + (vect_analyze_slp_cost_1): Use vector_load for constant defs + and vec_construct for external defs when estimating prologue cost. + (vect_analyze_slp_instance): Do not init SLP_INSTANCE_FIRST_LOAD_STMT. + Compute costs here only when vectorizing loops. + (vect_slp_analyze_bb_1): Compute SLP cost here, after vector types + have been determined. + (vect_schedule_slp_instance): Simplify vectorized code placement + and prepare for in-BB external defs. + * tree-vectorizer.h (struct _slp_instance): Remove first_load member. + (SLP_INSTANCE_FIRST_LOAD_STMT): Remove. + * tree-vect-stmts.c (vect_model_store_cost): Remove PURE_SLP_STMT + guard. + (vect_model_load_cost): Likewise. + (vectorizable_store): Instead add it here. + (vectorizable_load): Likewise. + (vect_is_simple_use): Dump def type textually. + 2015-04-23 Richard Biener * cfgexpand.c (expand_gimple_stmt_1): Use ops.code. diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 73ab24e8f3b..d82df3e5daa 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1379,42 +1379,23 @@ vect_supported_load_permutation_p (slp_instance slp_instn) } -/* Find the first load in the loop that belongs to INSTANCE. - When loads are in several SLP nodes, there can be a case in which the first - load does not appear in the first SLP node to be transformed, causing - incorrect order of statements. Since we generate all the loads together, - they must be inserted before the first load of the SLP instance and not - before the first load of the first node of the instance. */ - -static gimple -vect_find_first_load_in_slp_instance (slp_instance instance) -{ - int i, j; - slp_tree load_node; - gimple first_load = NULL, load; - - FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node) - FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load) - first_load = get_earlier_stmt (load, first_load); - - return first_load; -} - - /* Find the last store in SLP INSTANCE. */ static gimple -vect_find_last_store_in_slp_instance (slp_instance instance) +vect_find_last_scalar_stmt_in_slp (slp_tree node) { - int i; - slp_tree node; - gimple last_store = NULL, store; + gimple last = NULL, stmt; - node = SLP_INSTANCE_TREE (instance); - for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++) - last_store = get_later_stmt (store, last_store); + for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++) + { + stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); + if (is_pattern_stmt_p (stmt_vinfo)) + last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last); + else + last = get_later_stmt (stmt, last); + } - return last_store; + return last; } /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */ @@ -1487,10 +1468,19 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, if (!op || op == lhs) continue; if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, - &def_stmt, &def, &dt) - && (dt == vect_constant_def || dt == vect_external_def)) - record_stmt_cost (prologue_cost_vec, 1, vector_stmt, - stmt_info, 0, vect_prologue); + &def_stmt, &def, &dt)) + { + /* Without looking at the actual initializer a vector of + constants can be implemented as load from the constant pool. + ??? We need to pass down stmt_info for a vector type + even if it points to the wrong stmt. */ + if (dt == vect_constant_def) + record_stmt_cost (prologue_cost_vec, 1, vector_load, + stmt_info, 0, vect_prologue); + else if (dt == vect_external_def) + record_stmt_cost (prologue_cost_vec, 1, vec_construct, + stmt_info, 0, vect_prologue); + } } } @@ -1668,7 +1658,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL; SLP_INSTANCE_LOADS (new_instance) = loads; - SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; /* Compute the load permutation. */ slp_tree load_node; @@ -1715,17 +1704,17 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, vect_free_slp_instance (new_instance); return false; } - - SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) - = vect_find_first_load_in_slp_instance (new_instance); } - /* Compute the costs of this SLP instance. */ - vect_analyze_slp_cost (loop_vinfo, bb_vinfo, - new_instance, TYPE_VECTOR_SUBPARTS (vectype)); if (loop_vinfo) - LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance); + { + /* Compute the costs of this SLP instance. Delay this for BB + vectorization as we don't have vector types computed yet. */ + vect_analyze_slp_cost (loop_vinfo, bb_vinfo, + new_instance, TYPE_VECTOR_SUBPARTS (vectype)); + LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance); + } else BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance); @@ -2368,6 +2357,15 @@ vect_slp_analyze_bb_1 (basic_block bb) return NULL; } + /* Compute the costs of the SLP instances. */ + FOR_EACH_VEC_ELT (slp_instances, i, instance) + { + gimple stmt = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]; + tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); + vect_analyze_slp_cost (NULL, bb_vinfo, + instance, TYPE_VECTOR_SUBPARTS (vectype)); + } + /* Cost model: check if the vectorization is worthwhile. */ if (!unlimited_cost_model (NULL) && !vect_bb_vectorization_profitable_p (bb_vinfo)) @@ -3236,26 +3234,9 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, dump_printf (MSG_NOTE, "\n"); } - /* Loads should be inserted before the first load. */ - if (SLP_INSTANCE_FIRST_LOAD_STMT (instance) - && STMT_VINFO_GROUPED_ACCESS (stmt_info) - && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)) - && SLP_TREE_LOAD_PERMUTATION (node).exists ()) - si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); - else if (is_pattern_stmt_p (stmt_info)) - si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); - else - si = gsi_for_stmt (stmt); - - /* Stores should be inserted just before the last store. */ - if (STMT_VINFO_GROUPED_ACCESS (stmt_info) - && REFERENCE_CLASS_P (gimple_get_lhs (stmt))) - { - gimple last_store = vect_find_last_store_in_slp_instance (instance); - if (is_pattern_stmt_p (vinfo_for_stmt (last_store))) - last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store)); - si = gsi_for_stmt (last_store); - } + /* Vectorized stmts go before the last scalar stmt which is where + all uses are ready. */ + si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node)); /* Mark the first element of the reduction chain as reduction to properly transform the node. In the analysis phase only the last element of the diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index de35508d560..4496293fb46 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -968,10 +968,6 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, struct data_reference *first_dr; gimple first_stmt; - /* The SLP costs were already calculated during SLP tree build. */ - if (PURE_SLP_STMT (stmt_info)) - return; - if (dt == vect_constant_def || dt == vect_external_def) prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec, stmt_info, 0, vect_prologue); @@ -1098,10 +1094,6 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; unsigned int inside_cost = 0, prologue_cost = 0; - /* The SLP costs were already calculated during SLP tree build. */ - if (PURE_SLP_STMT (stmt_info)) - return; - /* Grouped accesses? */ first_stmt = GROUP_FIRST_ELEMENT (stmt_info); if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node) @@ -5181,8 +5173,10 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; - vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, - NULL, NULL, NULL); + /* The SLP costs are calculated during SLP analysis. */ + if (!PURE_SLP_STMT (stmt_info)) + vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, + NULL, NULL, NULL); return true; } @@ -5901,7 +5895,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; - vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL); + /* The SLP costs are calculated during SLP analysis. */ + if (!PURE_SLP_STMT (stmt_info)) + vect_model_load_cost (stmt_info, ncopies, load_lanes_p, + NULL, NULL, NULL); return true; } @@ -7758,6 +7755,41 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo, *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); } + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "type of def: "); + switch (*dt) + { + case vect_uninitialized_def: + dump_printf (MSG_NOTE, "uninitialized\n"); + break; + case vect_constant_def: + dump_printf (MSG_NOTE, "constant\n"); + break; + case vect_external_def: + dump_printf (MSG_NOTE, "external\n"); + break; + case vect_internal_def: + dump_printf (MSG_NOTE, "internal\n"); + break; + case vect_induction_def: + dump_printf (MSG_NOTE, "induction\n"); + break; + case vect_reduction_def: + dump_printf (MSG_NOTE, "reduction\n"); + break; + case vect_double_reduction_def: + dump_printf (MSG_NOTE, "double reduction\n"); + break; + case vect_nested_cycle: + dump_printf (MSG_NOTE, "nested cycle\n"); + break; + case vect_unknown_def_type: + dump_printf (MSG_NOTE, "unknown\n"); + break; + } + } + if (*dt == vect_unknown_def_type || (stmt && *dt == vect_double_reduction_def @@ -7769,9 +7801,6 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo, return false; } - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt); - switch (gimple_code (*def_stmt)) { case GIMPLE_PHI: diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index f22d6cff5cb..0796cc19fcd 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -131,10 +131,6 @@ typedef struct _slp_instance { /* The group of nodes that contain loads of this SLP instance. */ vec loads; - - /* The first scalar load of the instance. The created vector loads will be - inserted before this statement. */ - gimple first_load; } *slp_instance; @@ -144,7 +140,6 @@ typedef struct _slp_instance { #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor #define SLP_INSTANCE_BODY_COST_VEC(S) (S)->body_cost_vec #define SLP_INSTANCE_LOADS(S) (S)->loads -#define SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load #define SLP_TREE_CHILDREN(S) (S)->children #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts -- 2.30.2