From 78604de064490c8c12d2d4efadbd453f7c8c7685 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 12 Feb 2018 13:55:04 +0000 Subject: [PATCH] re PR tree-optimization/84037 (Speed regression of polyhedron benchmark since r256644) 2018-02-12 Richard Biener PR tree-optimization/84037 * tree-vect-slp.c (vect_analyze_slp_cost): Add visited parameter, move visited init to caller. (vect_slp_analyze_operations): Separate cost from validity check, initialize visited once for all instances. (vect_schedule_slp): Analyze map to CSE vectorized nodes once for all instances. * tree-vect-stmts.c (vect_model_simple_cost): Make early out an assert. (vect_model_promotion_demotion_cost): Likewise. (vectorizable_bswap): Guard cost modeling with !slp_node instead of !PURE_SLP_STMT to avoid double-counting on hybrid SLP stmts. (vectorizable_call): Likewise. (vectorizable_conversion): Likewise. (vectorizable_assignment): Likewise. (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. (vectorizable_store): Likewise. (vectorizable_load): Likewise. (vectorizable_condition): Likewise. (vectorizable_comparison): Likewise. From-SVN: r257588 --- gcc/ChangeLog | 25 ++++++++++++++++++++++ gcc/tree-vect-slp.c | 31 ++++++++++++++------------- gcc/tree-vect-stmts.c | 49 +++++++++++++++++++++++++------------------ 3 files changed, 70 insertions(+), 35 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 37ed597d4d7..5a264391268 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2018-02-12 Richard Biener + + PR tree-optimization/84037 + * tree-vect-slp.c (vect_analyze_slp_cost): Add visited + parameter, move visited init to caller. + (vect_slp_analyze_operations): Separate cost from validity + check, initialize visited once for all instances. + (vect_schedule_slp): Analyze map to CSE vectorized nodes once + for all instances. + * tree-vect-stmts.c (vect_model_simple_cost): Make early + out an assert. + (vect_model_promotion_demotion_cost): Likewise. + (vectorizable_bswap): Guard cost modeling with !slp_node + instead of !PURE_SLP_STMT to avoid double-counting on hybrid + SLP stmts. + (vectorizable_call): Likewise. + (vectorizable_conversion): Likewise. + (vectorizable_assignment): Likewise. + (vectorizable_shift): Likewise. + (vectorizable_operation): Likewise. + (vectorizable_store): Likewise. + (vectorizable_load): Likewise. + (vectorizable_condition): Likewise. + (vectorizable_comparison): Likewise. + 2018-02-12 Paolo Bonzini PR sanitizer/84307 diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 453f0199a4c..73aa2271b53 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2003,17 +2003,13 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, /* Compute the cost for the SLP instance INSTANCE. */ static void -vect_analyze_slp_cost (slp_instance instance, void *data) +vect_analyze_slp_cost (slp_instance instance, void *data, scalar_stmts_set_t *visited) { stmt_vector_for_cost body_cost_vec, prologue_cost_vec; unsigned ncopies_for_cost; stmt_info_for_cost *si; unsigned i; - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "=== vect_analyze_slp_cost ===\n"); - /* Calculate the number of vector stmts to create based on the unrolling factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is GROUP_SIZE / NUNITS otherwise. */ @@ -2050,11 +2046,9 @@ vect_analyze_slp_cost (slp_instance instance, void *data) prologue_cost_vec.create (10); body_cost_vec.create (10); - scalar_stmts_set_t *visited = new scalar_stmts_set_t (); vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance), &prologue_cost_vec, &body_cost_vec, ncopies_for_cost, visited); - delete visited; /* Record the prologue costs, which were delayed until we were sure that SLP was successful. */ @@ -2871,13 +2865,19 @@ vect_slp_analyze_operations (vec_info *vinfo) vinfo->slp_instances.ordered_remove (i); } else - { - /* Compute the costs of the SLP instance. */ - vect_analyze_slp_cost (instance, vinfo->target_cost_data); - i++; - } + i++; } + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "=== vect_analyze_slp_cost ===\n"); + + /* Compute the costs of the SLP instances. */ + scalar_stmts_set_t *visited = new scalar_stmts_set_t (); + for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i) + vect_analyze_slp_cost (instance, vinfo->target_cost_data, visited); + delete visited; + return !vinfo->slp_instances.is_empty (); } @@ -4246,19 +4246,20 @@ vect_schedule_slp (vec_info *vinfo) unsigned int i; bool is_store = false; + + scalar_stmts_to_slp_tree_map_t *bst_map + = new scalar_stmts_to_slp_tree_map_t (); slp_instances = vinfo->slp_instances; FOR_EACH_VEC_ELT (slp_instances, i, instance) { /* Schedule the tree of INSTANCE. */ - scalar_stmts_to_slp_tree_map_t *bst_map - = new scalar_stmts_to_slp_tree_map_t (); is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), instance, bst_map); - delete bst_map; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectorizing stmts using SLP.\n"); } + delete bst_map; FOR_EACH_VEC_ELT (slp_instances, i, instance) { diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 6066a52c23e..a98e0e5e259 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -826,8 +826,7 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, int inside_cost = 0, prologue_cost = 0; /* The SLP costs were already calculated during SLP tree build. */ - if (PURE_SLP_STMT (stmt_info)) - return; + gcc_assert (!PURE_SLP_STMT (stmt_info)); /* Cost the "broadcast" of a scalar operand in to a vector operand. Use scalar_to_vec to cost the broadcast, as elsewhere in the vector @@ -864,8 +863,7 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, void *target_cost_data; /* The SLP costs were already calculated during SLP tree build. */ - if (PURE_SLP_STMT (stmt_info)) - return; + gcc_assert (!PURE_SLP_STMT (stmt_info)); if (loop_vinfo) target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); @@ -2891,7 +2889,7 @@ vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ===" "\n"); - if (! PURE_SLP_STMT (stmt_info)) + if (! slp_node) { add_stmt_cost (stmt_info->vinfo->target_cost_data, 1, vector_stmt, stmt_info, 0, vect_prologue); @@ -3210,10 +3208,13 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===" "\n"); - vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); - if (ifn != IFN_LAST && modifier == NARROW && !slp_node) - add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2, - vec_promote_demote, stmt_info, 0, vect_body); + if (!slp_node) + { + vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); + if (ifn != IFN_LAST && modifier == NARROW && !slp_node) + add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2, + vec_promote_demote, stmt_info, 0, vect_body); + } return true; } @@ -4742,17 +4743,20 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi, if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR) { STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; - vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); + if (!slp_node) + vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); } else if (modifier == NARROW) { STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; - vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); + if (!slp_node) + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); } else { STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; - vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); + if (!slp_node) + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); } interm_types.release (); return true; @@ -5149,7 +5153,8 @@ vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_assignment ===\n"); - vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); + if (!slp_node) + vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); return true; } @@ -5513,7 +5518,8 @@ vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===\n"); - vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); + if (!slp_node) + vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); return true; } @@ -5836,7 +5842,8 @@ vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_operation ===\n"); - vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); + if (!slp_node) + vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); return true; } @@ -6240,7 +6247,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; /* The SLP costs are calculated during SLP analysis. */ - if (!PURE_SLP_STMT (stmt_info)) + if (!slp_node) vect_model_store_cost (stmt_info, ncopies, memory_access_type, vls_type, NULL, NULL, NULL); return true; @@ -7451,7 +7458,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; /* The SLP costs are calculated during SLP analysis. */ - if (!PURE_SLP_STMT (stmt_info)) + if (! slp_node) vect_model_load_cost (stmt_info, ncopies, memory_access_type, NULL, NULL, NULL); return true; @@ -8673,7 +8680,8 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi, if (expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)) { - vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL); + if (!slp_node) + vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL); return true; } return false; @@ -9037,8 +9045,9 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi, if (!vec_stmt) { STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type; - vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)), - dts, ndts, NULL, NULL); + if (!slp_node) + vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)), + dts, ndts, NULL, NULL); if (bitop1 == NOP_EXPR) return expand_vec_cmp_expr_p (vectype, mask_type, code); else -- 2.30.2