From 0ff959e0a2214fa553ac2d06ed66ede62602fc9a Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Mon, 27 Jul 2020 21:30:26 -0500 Subject: [PATCH] vect: Refactor peel_iters_{pro,epi}logue cost modeling This patch is to refactor the existing peel_iters_prologue and peel_iters_epilogue cost model handlings, by following the structure below suggested by Richard Sandiford: - calculate peel_iters_prologue - calculate peel_iters_epilogue - add costs associated with peel_iters_prologue - add costs associated with peel_iters_epilogue - add costs related to branch taken/not_taken. Bootstrapped/regtested on aarch64-linux-gnu. gcc/ChangeLog: * tree-vect-loop.c (vect_get_known_peeling_cost): Factor out some code to determine peel_iters_epilogue to... (vect_get_peel_iters_epilogue): ...this new function. (vect_estimate_min_profitable_iters): Refactor cost calculation on peel_iters_prologue and peel_iters_epilogue. --- gcc/tree-vect-loop.c | 267 +++++++++++++++++++++++-------------------- 1 file changed, 142 insertions(+), 125 deletions(-) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index e933441b922..185019c3305 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3474,42 +3474,56 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, return NULL; } -/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ -int -vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, - int *peel_iters_epilogue, - stmt_vector_for_cost *scalar_cost_vec, - stmt_vector_for_cost *prologue_cost_vec, - stmt_vector_for_cost *epilogue_cost_vec) +/* Estimate the number of peeled epilogue iterations for LOOP_VINFO. + PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations, + or -1 if not known. */ + +static int +vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue) { - int retval = 0; int assumed_vf = vect_vf_for_cost (loop_vinfo); - - if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) || peel_iters_prologue == -1) { - *peel_iters_epilogue = assumed_vf / 2; if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, + dump_printf_loc (MSG_NOTE, vect_location, "cost model: epilogue peel iters set to vf/2 " "because loop iterations are unknown .\n"); - - /* If peeled iterations are known but number of scalar loop - iterations are unknown, count a taken branch per peeled loop. */ - retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, - NULL, NULL_TREE, 0, vect_prologue); - retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken, - NULL, NULL_TREE, 0, vect_epilogue); + return assumed_vf / 2; } else { int niters = LOOP_VINFO_INT_NITERS (loop_vinfo); - peel_iters_prologue = niters < peel_iters_prologue ? - niters : peel_iters_prologue; - *peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf; + peel_iters_prologue = MIN (niters, peel_iters_prologue); + int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf; /* If we need to peel for gaps, but no peeling is required, we have to peel VF iterations. */ - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue) - *peel_iters_epilogue = assumed_vf; + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !peel_iters_epilogue) + peel_iters_epilogue = assumed_vf; + return peel_iters_epilogue; + } +} + +/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ +int +vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, + int *peel_iters_epilogue, + stmt_vector_for_cost *scalar_cost_vec, + stmt_vector_for_cost *prologue_cost_vec, + stmt_vector_for_cost *epilogue_cost_vec) +{ + int retval = 0; + + *peel_iters_epilogue + = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue); + + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + { + /* If peeled iterations are known but number of scalar loop + iterations are unknown, count a taken branch per peeled loop. */ + retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, NULL, + NULL_TREE, 0, vect_prologue); + retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken, NULL, + NULL_TREE, 0, vect_epilogue); } stmt_info_for_cost *si; @@ -3652,24 +3666,110 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, TODO: Build an expression that represents peel_iters for prologue and epilogue to be used in a run-time test. */ + bool prologue_need_br_taken_cost = false; + bool prologue_need_br_not_taken_cost = false; + + /* Calculate peel_iters_prologue. */ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + peel_iters_prologue = 0; + else if (npeel < 0) { - peel_iters_prologue = 0; - peel_iters_epilogue = 0; + peel_iters_prologue = assumed_vf / 2; + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "cost model: " + "prologue peel iters set to vf/2.\n"); - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) - { - /* We need to peel exactly one iteration. */ - peel_iters_epilogue += 1; - stmt_info_for_cost *si; - int j; - FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), - j, si) - (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count, - si->kind, si->stmt_info, si->vectype, - si->misalign, vect_epilogue); - } + /* If peeled iterations are unknown, count a taken branch and a not taken + branch per peeled loop. Even if scalar loop iterations are known, + vector iterations are not known since peeled prologue iterations are + not known. Hence guards remain the same. */ + prologue_need_br_taken_cost = true; + prologue_need_br_not_taken_cost = true; + } + else + { + peel_iters_prologue = npeel; + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + /* If peeled iterations are known but number of scalar loop + iterations are unknown, count a taken branch per peeled loop. */ + prologue_need_br_taken_cost = true; + } + bool epilogue_need_br_taken_cost = false; + bool epilogue_need_br_not_taken_cost = false; + + /* Calculate peel_iters_epilogue. */ + if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) + /* We need to peel exactly one iteration for gaps. */ + peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0; + else if (npeel < 0) + { + /* If peeling for alignment is unknown, loop bound of main loop + becomes unknown. */ + peel_iters_epilogue = assumed_vf / 2; + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "cost model: " + "epilogue peel iters set to vf/2 because " + "peeling for alignment is unknown.\n"); + + /* See the same reason above in peel_iters_prologue calculation. */ + epilogue_need_br_taken_cost = true; + epilogue_need_br_not_taken_cost = true; + } + else + { + peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, npeel); + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + /* If peeled iterations are known but number of scalar loop + iterations are unknown, count a taken branch per peeled loop. */ + epilogue_need_br_taken_cost = true; + } + + stmt_info_for_cost *si; + int j; + /* Add costs associated with peel_iters_prologue. */ + if (peel_iters_prologue) + FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si) + { + (void) add_stmt_cost (loop_vinfo, target_cost_data, + si->count * peel_iters_prologue, si->kind, + si->stmt_info, si->vectype, si->misalign, + vect_prologue); + } + + /* Add costs associated with peel_iters_epilogue. */ + if (peel_iters_epilogue) + FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si) + { + (void) add_stmt_cost (loop_vinfo, target_cost_data, + si->count * peel_iters_epilogue, si->kind, + si->stmt_info, si->vectype, si->misalign, + vect_epilogue); + } + + /* Add possible cond_branch_taken/cond_branch_not_taken cost. */ + + if (prologue_need_br_taken_cost) + (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, + NULL, NULL_TREE, 0, vect_prologue); + + if (prologue_need_br_not_taken_cost) + (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, + cond_branch_not_taken, NULL, NULL_TREE, 0, + vect_prologue); + + if (epilogue_need_br_taken_cost) + (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, + NULL, NULL_TREE, 0, vect_epilogue); + + if (epilogue_need_br_not_taken_cost) + (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, + cond_branch_not_taken, NULL, NULL_TREE, 0, + vect_epilogue); + + /* Take care of special costs for rgroup controls of partial vectors. */ + if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + { /* Calculate how many masks we need to generate. */ unsigned int num_masks = 0; rgroup_controls *rgm; @@ -3691,93 +3791,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, simpler and safer to use the worst-case cost; if this ends up being the tie-breaker between vectorizing or not, then it's probably better not to vectorize. */ - (void) add_stmt_cost (loop_vinfo, - target_cost_data, num_masks, vector_stmt, - NULL, NULL_TREE, 0, vect_prologue); - (void) add_stmt_cost (loop_vinfo, - target_cost_data, num_masks - 1, vector_stmt, - NULL, NULL_TREE, 0, vect_body); - } - else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) - { - peel_iters_prologue = 0; - peel_iters_epilogue = 0; - } - else if (npeel < 0) - { - peel_iters_prologue = assumed_vf / 2; - if (dump_enabled_p ()) - dump_printf (MSG_NOTE, "cost model: " - "prologue peel iters set to vf/2.\n"); - - /* If peeling for alignment is unknown, loop bound of main loop becomes - unknown. */ - peel_iters_epilogue = assumed_vf / 2; - if (dump_enabled_p ()) - dump_printf (MSG_NOTE, "cost model: " - "epilogue peel iters set to vf/2 because " - "peeling for alignment is unknown.\n"); - - /* If peeled iterations are unknown, count a taken branch and a not taken - branch per peeled loop. Even if scalar loop iterations are known, - vector iterations are not known since peeled prologue iterations are - not known. Hence guards remain the same. */ - (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, - NULL, NULL_TREE, 0, vect_prologue); - (void) add_stmt_cost (loop_vinfo, - target_cost_data, 1, cond_branch_not_taken, - NULL, NULL_TREE, 0, vect_prologue); - (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, - NULL, NULL_TREE, 0, vect_epilogue); - (void) add_stmt_cost (loop_vinfo, - target_cost_data, 1, cond_branch_not_taken, - NULL, NULL_TREE, 0, vect_epilogue); - stmt_info_for_cost *si; - int j; - FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si) - { - (void) add_stmt_cost (loop_vinfo, target_cost_data, - si->count * peel_iters_prologue, - si->kind, si->stmt_info, si->vectype, - si->misalign, - vect_prologue); - (void) add_stmt_cost (loop_vinfo, target_cost_data, - si->count * peel_iters_epilogue, - si->kind, si->stmt_info, si->vectype, - si->misalign, - vect_epilogue); - } - } - else - { - stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec; - stmt_info_for_cost *si; - int j; - void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); - - prologue_cost_vec.create (2); - epilogue_cost_vec.create (2); - peel_iters_prologue = npeel; - - (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue, - &peel_iters_epilogue, - &LOOP_VINFO_SCALAR_ITERATION_COST - (loop_vinfo), - &prologue_cost_vec, - &epilogue_cost_vec); - - FOR_EACH_VEC_ELT (prologue_cost_vec, j, si) - (void) add_stmt_cost (loop_vinfo, - data, si->count, si->kind, si->stmt_info, - si->vectype, si->misalign, vect_prologue); - - FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si) - (void) add_stmt_cost (loop_vinfo, - data, si->count, si->kind, si->stmt_info, - si->vectype, si->misalign, vect_epilogue); - - prologue_cost_vec.release (); - epilogue_cost_vec.release (); + (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks, + vector_stmt, NULL, NULL_TREE, 0, vect_prologue); + (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks - 1, + vector_stmt, NULL, NULL_TREE, 0, vect_body); } /* FORNOW: The scalar outside cost is incremented in one of the -- 2.30.2