From 30c5a937e19ac28dbb6d023516af9c1b902614aa Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Sat, 4 Apr 2015 10:47:08 +0000 Subject: [PATCH] re PR tree-optimization/64909 (Missed vectorization with bdver1) 2015-04-04 Richard Biener PR tree-optimization/64909 PR tree-optimization/65660 * tree-vectorizer.h (vect_get_known_peeling_cost): Adjust to take a cost vector for scalar iteration cost. (vect_get_single_scalar_iteration_cost): Likewise. * tree-vect-loop.c (vect_get_single_scalar_iteration_cost): Compute the scalar iteration cost into a cost vector. (vect_get_known_peeling_cost): Use the scalar cost vector to account for the cost of the peeled iterations. (vect_estimate_min_profitable_iters): Likewise. * tree-vect-data-refs.c (vect_peeling_hash_get_lowest_cost): Likewise. From-SVN: r221866 --- gcc/ChangeLog | 15 +++++++ gcc/tree-vect-data-refs.c | 10 ++--- gcc/tree-vect-loop.c | 83 ++++++++++++++++++++++++--------------- gcc/tree-vectorizer.h | 6 ++- 4 files changed, 74 insertions(+), 40 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d29f29a82b3..d5d3aaa70a9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2015-04-04 Richard Biener + + PR tree-optimization/64909 + PR tree-optimization/65660 + * tree-vectorizer.h (vect_get_known_peeling_cost): Adjust + to take a cost vector for scalar iteration cost. + (vect_get_single_scalar_iteration_cost): Likewise. + * tree-vect-loop.c (vect_get_single_scalar_iteration_cost): + Compute the scalar iteration cost into a cost vector. + (vect_get_known_peeling_cost): Use the scalar cost vector to + account for the cost of the peeled iterations. + (vect_estimate_min_profitable_iters): Likewise. + * tree-vect-data-refs.c (vect_peeling_hash_get_lowest_cost): + Likewise. + 2015-04-04 Alan Modra PR target/65576 diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 094275e8439..3913862eb6c 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -1152,7 +1152,6 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot, vec datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); struct data_reference *dr; stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec; - int single_iter_cost; prologue_cost_vec.create (2); body_cost_vec.create (2); @@ -1175,14 +1174,11 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot, SET_DR_MISALIGNMENT (dr, save_misalignment); } - single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo); + auto_vec scalar_cost_vec; + vect_get_single_scalar_iteration_cost (loop_vinfo, &scalar_cost_vec); outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy, - /* ??? We use this cost as number of stmts with scalar_stmt cost, - thus divide by that. This introduces rounding errors, thus better - introduce a new cost kind (raw_cost? scalar_iter_cost?). */ - single_iter_cost / vect_get_stmt_cost (scalar_stmt), - &prologue_cost_vec, &epilogue_cost_vec); + &scalar_cost_vec, &prologue_cost_vec, &epilogue_cost_vec); /* Prologue and epilogue costs are added to the target model later. These costs depend only on the scalar iteration cost, the diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index dd4ada2d09d..88ef251e91a 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2653,12 +2653,13 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple phi, /* Calculate the cost of one scalar iteration of the loop. */ int -vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo) +vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo, + stmt_vector_for_cost *scalar_cost_vec) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0; - int innerloop_iters, i, stmt_cost; + int innerloop_iters, i; /* Count statements in scalar loop. Using this as scalar cost for a single iteration for now. @@ -2699,17 +2700,20 @@ vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo) && !STMT_VINFO_IN_PATTERN_P (stmt_info)) continue; + vect_cost_for_stmt kind; if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))) { if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) - stmt_cost = vect_get_stmt_cost (scalar_load); + kind = scalar_load; else - stmt_cost = vect_get_stmt_cost (scalar_store); + kind = scalar_store; } else - stmt_cost = vect_get_stmt_cost (scalar_stmt); + kind = scalar_stmt; - scalar_single_iter_cost += stmt_cost * factor; + scalar_single_iter_cost + += record_stmt_cost (scalar_cost_vec, factor, kind, + NULL, 0, vect_prologue); } } return scalar_single_iter_cost; @@ -2719,7 +2723,7 @@ vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo) int vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, int *peel_iters_epilogue, - int scalar_single_iter_cost, + stmt_vector_for_cost *scalar_cost_vec, stmt_vector_for_cost *prologue_cost_vec, stmt_vector_for_cost *epilogue_cost_vec) { @@ -2736,8 +2740,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, /* If peeled iterations are known but number of scalar loop iterations are unknown, count a taken branch per peeled loop. */ - retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken, + retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, NULL, 0, vect_prologue); + retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, + NULL, 0, vect_epilogue); } else { @@ -2751,14 +2757,21 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, *peel_iters_epilogue = vf; } + stmt_info_for_cost *si; + int j; if (peel_iters_prologue) - retval += record_stmt_cost (prologue_cost_vec, - peel_iters_prologue * scalar_single_iter_cost, - scalar_stmt, NULL, 0, vect_prologue); + FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) + retval += record_stmt_cost (prologue_cost_vec, + si->count * peel_iters_prologue, + si->kind, NULL, si->misalign, + vect_prologue); if (*peel_iters_epilogue) - retval += record_stmt_cost (epilogue_cost_vec, - *peel_iters_epilogue * scalar_single_iter_cost, - scalar_stmt, NULL, 0, vect_epilogue); + FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) + retval += record_stmt_cost (epilogue_cost_vec, + si->count * *peel_iters_epilogue, + si->kind, NULL, si->misalign, + vect_epilogue); + return retval; } @@ -2833,12 +2846,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, TODO: Consider assigning different costs to different scalar statements. */ - scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo); - /* ??? Below we use this cost as number of stmts with scalar_stmt cost, - thus divide by that. This introduces rounding errors, thus better - introduce a new cost kind (raw_cost? scalar_iter_cost?). */ - int scalar_single_iter_stmts - = scalar_single_iter_cost / vect_get_stmt_cost (scalar_stmt); + auto_vec scalar_cost_vec; + scalar_single_iter_cost + = vect_get_single_scalar_iteration_cost (loop_vinfo, &scalar_cost_vec); /* Add additional cost for the peeled instructions in prologue and epilogue loop. @@ -2866,18 +2876,29 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, branch per peeled loop. Even if scalar loop iterations are known, vector iterations are not known since peeled prologue iterations are not known. Hence guards remain the same. */ - (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken, + (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0, vect_prologue); - (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken, + (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken, NULL, 0, vect_prologue); - /* FORNOW: Don't attempt to pass individual scalar instructions to - the model; just assume linear cost for scalar iterations. */ - (void) add_stmt_cost (target_cost_data, - peel_iters_prologue * scalar_single_iter_stmts, - scalar_stmt, NULL, 0, vect_prologue); - (void) add_stmt_cost (target_cost_data, - peel_iters_epilogue * scalar_single_iter_stmts, - scalar_stmt, NULL, 0, vect_epilogue); + (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, + NULL, 0, vect_epilogue); + (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken, + NULL, 0, vect_epilogue); + stmt_info_for_cost *si; + int j; + FOR_EACH_VEC_ELT (scalar_cost_vec, j, si) + { + struct _stmt_vec_info *stmt_info + = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; + (void) add_stmt_cost (target_cost_data, + si->count * peel_iters_prologue, + si->kind, stmt_info, si->misalign, + vect_prologue); + (void) add_stmt_cost (target_cost_data, + si->count * peel_iters_epilogue, + si->kind, stmt_info, si->misalign, + vect_epilogue); + } } else { @@ -2892,7 +2913,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue, &peel_iters_epilogue, - scalar_single_iter_stmts, + &scalar_cost_vec, &prologue_cost_vec, &epilogue_cost_vec); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 0ede62390f6..66d592d523a 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1101,10 +1101,12 @@ extern bool vectorizable_reduction (gimple, gimple_stmt_iterator *, gimple *, extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *); extern tree get_initial_def_for_reduction (gimple, tree, tree *); extern int vect_min_worthwhile_factor (enum tree_code); -extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, int, +extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, + stmt_vector_for_cost *, stmt_vector_for_cost *, stmt_vector_for_cost *); -extern int vect_get_single_scalar_iteration_cost (loop_vec_info); +extern int vect_get_single_scalar_iteration_cost (loop_vec_info, + stmt_vector_for_cost *); /* In tree-vect-slp.c. */ extern void vect_free_slp_instance (slp_instance); -- 2.30.2