From a5e3d6146d07611eb967befec4dc480c8a19db0d Mon Sep 17 00:00:00 2001 From: Bin Cheng Date: Thu, 13 Oct 2016 11:03:31 +0000 Subject: [PATCH] tree-vect-loop-manip.c (adjust_vec_debug_stmts): Don't release adjust_vec automatically. * tree-vect-loop-manip.c (adjust_vec_debug_stmts): Don't release adjust_vec automatically. (slpeel_add_loop_guard): Remove param cond_expr_stmt_list. Rename param exit_bb to guard_to. (slpeel_checking_verify_cfg_after_peeling): (set_prologue_iterations): (create_lcssa_for_virtual_phi): New func which is factored out from slpeel_tree_peel_loop_to_edge. (slpeel_tree_peel_loop_to_edge): (iv_phi_p): New func. (vect_can_advance_ivs_p): Call iv_phi_p. (vect_update_ivs_after_vectorizer): Call iv_phi_p. Directly insert new gimple stmts in basic block. (vect_do_peeling_for_loop_bound): (vect_do_peeling_for_alignment): (vect_gen_niters_for_prolog_loop): Rename to... (vect_gen_prolog_loop_niters): ...Rename from. Change parameters and adjust implementation. (vect_update_inits_of_drs): Fix code style issue. Convert niters to sizetype if necessary. (vect_build_loop_niters): Move to here from tree-vect-loop.c. Change it to external function. (vect_gen_scalar_loop_niters, vect_gen_vector_loop_niters): New. (vect_gen_vector_loop_niters_mult_vf): New. (slpeel_update_phi_nodes_for_loops): New. (slpeel_update_phi_nodes_for_guard1): Reimplement. (find_guard_arg, slpeel_update_phi_nodes_for_guard2): Reimplement. (slpeel_update_phi_nodes_for_lcssa, vect_do_peeling): New. * tree-vect-loop.c (vect_build_loop_niters): Move to file tree-vect-loop-manip.c (vect_generate_tmps_on_preheader): Delete. (vect_transform_loop): Rename vectorization_factor to vf. Call vect_do_peeling instead of vect_do_peeling-* functions. * tree-vectorizer.h (vect_do_peeling): New decl. (vect_build_loop_niters, vect_gen_vector_loop_niters): New decls. (vect_do_peeling_for_loop_bound): Delete. (vect_do_peeling_for_alignment): Delete. From-SVN: r241099 --- gcc/ChangeLog | 40 + gcc/tree-vect-loop-manip.c | 1974 +++++++++++++++--------------------- gcc/tree-vect-loop.c | 191 +--- gcc/tree-vectorizer.h | 8 +- 4 files changed, 910 insertions(+), 1303 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f134678aeba..74749aee8a5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,43 @@ +2016-10-13 Bin Cheng + + * tree-vect-loop-manip.c (adjust_vec_debug_stmts): Don't release + adjust_vec automatically. + (slpeel_add_loop_guard): Remove param cond_expr_stmt_list. Rename + param exit_bb to guard_to. + (slpeel_checking_verify_cfg_after_peeling): + (set_prologue_iterations): + (create_lcssa_for_virtual_phi): New func which is factored out from + slpeel_tree_peel_loop_to_edge. + (slpeel_tree_peel_loop_to_edge): + (iv_phi_p): New func. + (vect_can_advance_ivs_p): Call iv_phi_p. + (vect_update_ivs_after_vectorizer): Call iv_phi_p. Directly insert + new gimple stmts in basic block. + (vect_do_peeling_for_loop_bound): + (vect_do_peeling_for_alignment): + (vect_gen_niters_for_prolog_loop): Rename to... + (vect_gen_prolog_loop_niters): ...Rename from. Change parameters and + adjust implementation. + (vect_update_inits_of_drs): Fix code style issue. Convert niters to + sizetype if necessary. + (vect_build_loop_niters): Move to here from tree-vect-loop.c. Change + it to external function. + (vect_gen_scalar_loop_niters, vect_gen_vector_loop_niters): New. + (vect_gen_vector_loop_niters_mult_vf): New. + (slpeel_update_phi_nodes_for_loops): New. + (slpeel_update_phi_nodes_for_guard1): Reimplement. + (find_guard_arg, slpeel_update_phi_nodes_for_guard2): Reimplement. + (slpeel_update_phi_nodes_for_lcssa, vect_do_peeling): New. + * tree-vect-loop.c (vect_build_loop_niters): Move to file + tree-vect-loop-manip.c + (vect_generate_tmps_on_preheader): Delete. + (vect_transform_loop): Rename vectorization_factor to vf. Call + vect_do_peeling instead of vect_do_peeling-* functions. + * tree-vectorizer.h (vect_do_peeling): New decl. + (vect_build_loop_niters, vect_gen_vector_loop_niters): New decls. + (vect_do_peeling_for_loop_bound): Delete. + (vect_do_peeling_for_alignment): Delete. + 2016-10-13 Bin Cheng * tree-vect-loop-manip.c (slpeel_tree_duplicate_loop_to_edge_cfg): Put diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 9eb82185577..291ecd940db 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -188,8 +188,6 @@ adjust_vec_debug_stmts (void) adjust_debug_stmts_now (&adjust_vec.last ()); adjust_vec.pop (); } - - adjust_vec.release (); } /* Adjust any debug stmts that referenced FROM values to use the @@ -235,434 +233,6 @@ adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def) gimple_bb (update_phi)); } - -/* Update PHI nodes for a guard of the LOOP. - - Input: - - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that - controls whether LOOP is to be executed. GUARD_EDGE is the edge that - originates from the guard-bb, skips LOOP and reaches the (unique) exit - bb of LOOP. This loop-exit-bb is an empty bb with one successor. - We denote this bb NEW_MERGE_BB because before the guard code was added - it had a single predecessor (the LOOP header), and now it became a merge - point of two paths - the path that ends with the LOOP exit-edge, and - the path that ends with GUARD_EDGE. - - NEW_EXIT_BB: New basic block that is added by this function between LOOP - and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis. - - ===> The CFG before the guard-code was added: - LOOP_header_bb: - loop_body - if (exit_loop) goto update_bb - else goto LOOP_header_bb - update_bb: - - ==> The CFG after the guard-code was added: - guard_bb: - if (LOOP_guard_condition) goto new_merge_bb - else goto LOOP_header_bb - LOOP_header_bb: - loop_body - if (exit_loop_condition) goto new_merge_bb - else goto LOOP_header_bb - new_merge_bb: - goto update_bb - update_bb: - - ==> The CFG after this function: - guard_bb: - if (LOOP_guard_condition) goto new_merge_bb - else goto LOOP_header_bb - LOOP_header_bb: - loop_body - if (exit_loop_condition) goto new_exit_bb - else goto LOOP_header_bb - new_exit_bb: - new_merge_bb: - goto update_bb - update_bb: - - This function: - 1. creates and updates the relevant phi nodes to account for the new - incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves: - 1.1. Create phi nodes at NEW_MERGE_BB. - 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted - UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB - 2. preserves loop-closed-ssa-form by creating the required phi nodes - at the exit of LOOP (i.e, in NEW_EXIT_BB). - - There are two flavors to this function: - - slpeel_update_phi_nodes_for_guard1: - Here the guard controls whether we enter or skip LOOP, where LOOP is a - prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are - for variables that have phis in the loop header. - - slpeel_update_phi_nodes_for_guard2: - Here the guard controls whether we enter or skip LOOP, where LOOP is an - epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are - for variables that have phis in the loop exit. - - I.E., the overall structure is: - - loop1_preheader_bb: - guard1 (goto loop1/merge1_bb) - loop1 - loop1_exit_bb: - guard2 (goto merge1_bb/merge2_bb) - merge1_bb - loop2 - loop2_exit_bb - merge2_bb - next_bb - - slpeel_update_phi_nodes_for_guard1 takes care of creating phis in - loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars - that have phis in loop1->header). - - slpeel_update_phi_nodes_for_guard2 takes care of creating phis in - loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars - that have phis in next_bb). It also adds some of these phis to - loop1_exit_bb. - - slpeel_update_phi_nodes_for_guard1 is always called before - slpeel_update_phi_nodes_for_guard2. They are both needed in order - to create correct data-flow and loop-closed-ssa-form. - - Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables - that change between iterations of a loop (and therefore have a phi-node - at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates - phis for variables that are used out of the loop (and therefore have - loop-closed exit phis). Some variables may be both updated between - iterations and used after the loop. This is why in loop1_exit_bb we - may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1) - and exit phis (created by slpeel_update_phi_nodes_for_guard2). - - - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of - an original loop. i.e., we have: - - orig_loop - guard_bb (goto LOOP/new_merge) - new_loop <-- LOOP - new_exit - new_merge - next_bb - - If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we - have: - - new_loop - guard_bb (goto LOOP/new_merge) - orig_loop <-- LOOP - new_exit - new_merge - next_bb - - The SSA names defined in the original loop have a current - reaching definition that records the corresponding new ssa-name - used in the new duplicated loop copy. - */ - -/* Function slpeel_update_phi_nodes_for_guard1 - - Input: - - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. - - DEFS - a bitmap of ssa names to mark new names for which we recorded - information. - - In the context of the overall structure, we have: - - loop1_preheader_bb: - guard1 (goto loop1/merge1_bb) -LOOP-> loop1 - loop1_exit_bb: - guard2 (goto merge1_bb/merge2_bb) - merge1_bb - loop2 - loop2_exit_bb - merge2_bb - next_bb - - For each name updated between loop iterations (i.e - for each name that has - an entry (loop-header) phi in LOOP) we create a new phi in: - 1. merge1_bb (to account for the edge from guard1) - 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form) -*/ - -static void -slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, - bool is_new_loop, basic_block *new_exit_bb) -{ - gphi *orig_phi, *new_phi; - gphi *update_phi, *update_phi2; - tree guard_arg, loop_arg; - basic_block new_merge_bb = guard_edge->dest; - edge e = EDGE_SUCC (new_merge_bb, 0); - basic_block update_bb = e->dest; - basic_block orig_bb = loop->header; - edge new_exit_e; - tree current_new_name; - gphi_iterator gsi_orig, gsi_update; - - /* Create new bb between loop and new_merge_bb. */ - *new_exit_bb = split_edge (single_exit (loop)); - - new_exit_e = EDGE_SUCC (*new_exit_bb, 0); - - for (gsi_orig = gsi_start_phis (orig_bb), - gsi_update = gsi_start_phis (update_bb); - !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update); - gsi_next (&gsi_orig), gsi_next (&gsi_update)) - { - source_location loop_locus, guard_locus; - tree new_res; - orig_phi = gsi_orig.phi (); - update_phi = gsi_update.phi (); - - /** 1. Handle new-merge-point phis **/ - - /* 1.1. Generate new phi node in NEW_MERGE_BB: */ - new_res = copy_ssa_name (PHI_RESULT (orig_phi)); - new_phi = create_phi_node (new_res, new_merge_bb); - - /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge - of LOOP. Set the two phi args in NEW_PHI for these edges: */ - loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0)); - loop_locus = gimple_phi_arg_location_from_edge (orig_phi, - EDGE_SUCC (loop->latch, - 0)); - guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop)); - guard_locus - = gimple_phi_arg_location_from_edge (orig_phi, - loop_preheader_edge (loop)); - - add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus); - add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus); - - /* 1.3. Update phi in successor block. */ - gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg - || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg); - adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi)); - update_phi2 = new_phi; - - - /** 2. Handle loop-closed-ssa-form phis **/ - - if (virtual_operand_p (PHI_RESULT (orig_phi))) - continue; - - /* 2.1. Generate new phi node in NEW_EXIT_BB: */ - new_res = copy_ssa_name (PHI_RESULT (orig_phi)); - new_phi = create_phi_node (new_res, *new_exit_bb); - - /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ - add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus); - - /* 2.3. Update phi in successor of NEW_EXIT_BB: */ - gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); - adjust_phi_and_debug_stmts (update_phi2, new_exit_e, - PHI_RESULT (new_phi)); - - /* 2.4. Record the newly created name with set_current_def. - We want to find a name such that - name = get_current_def (orig_loop_name) - and to set its current definition as follows: - set_current_def (name, new_phi_name) - - If LOOP is a new loop then loop_arg is already the name we're - looking for. If LOOP is the original loop, then loop_arg is - the orig_loop_name and the relevant name is recorded in its - current reaching definition. */ - if (is_new_loop) - current_new_name = loop_arg; - else - { - current_new_name = get_current_def (loop_arg); - /* current_def is not available only if the variable does not - change inside the loop, in which case we also don't care - about recording a current_def for it because we won't be - trying to create loop-exit-phis for it. */ - if (!current_new_name) - continue; - } - tree new_name = get_current_def (current_new_name); - /* Because of peeled_chrec optimization it is possible that we have - set this earlier. Verify the PHI has the same value. */ - if (new_name) - { - gimple *phi = SSA_NAME_DEF_STMT (new_name); - gcc_assert (gimple_code (phi) == GIMPLE_PHI - && gimple_bb (phi) == *new_exit_bb - && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop)) - == loop_arg)); - continue; - } - - set_current_def (current_new_name, PHI_RESULT (new_phi)); - } -} - - -/* Function slpeel_update_phi_nodes_for_guard2 - - Input: - - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. - - In the context of the overall structure, we have: - - loop1_preheader_bb: - guard1 (goto loop1/merge1_bb) - loop1 - loop1_exit_bb: - guard2 (goto merge1_bb/merge2_bb) - merge1_bb -LOOP-> loop2 - loop2_exit_bb - merge2_bb - next_bb - - For each name used out side the loop (i.e - for each name that has an exit - phi in next_bb) we create a new phi in: - 1. merge2_bb (to account for the edge from guard_bb) - 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form) - 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form), - if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1). -*/ - -static void -slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, - bool is_new_loop, basic_block *new_exit_bb) -{ - gphi *orig_phi, *new_phi; - gphi *update_phi, *update_phi2; - tree guard_arg, loop_arg; - basic_block new_merge_bb = guard_edge->dest; - edge e = EDGE_SUCC (new_merge_bb, 0); - basic_block update_bb = e->dest; - edge new_exit_e; - tree orig_def, orig_def_new_name; - tree new_name, new_name2; - tree arg; - gphi_iterator gsi; - - /* Create new bb between loop and new_merge_bb. */ - *new_exit_bb = split_edge (single_exit (loop)); - - new_exit_e = EDGE_SUCC (*new_exit_bb, 0); - - for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - tree new_res; - update_phi = gsi.phi (); - orig_phi = update_phi; - orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); - /* This loop-closed-phi actually doesn't represent a use - out of the loop - the phi arg is a constant. */ - if (TREE_CODE (orig_def) != SSA_NAME) - continue; - orig_def_new_name = get_current_def (orig_def); - arg = NULL_TREE; - - /** 1. Handle new-merge-point phis **/ - - /* 1.1. Generate new phi node in NEW_MERGE_BB: */ - new_res = copy_ssa_name (PHI_RESULT (orig_phi)); - new_phi = create_phi_node (new_res, new_merge_bb); - - /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge - of LOOP. Set the two PHI args in NEW_PHI for these edges: */ - new_name = orig_def; - new_name2 = NULL_TREE; - if (orig_def_new_name) - { - new_name = orig_def_new_name; - /* Some variables have both loop-entry-phis and loop-exit-phis. - Such variables were given yet newer names by phis placed in - guard_bb by slpeel_update_phi_nodes_for_guard1. I.e: - new_name2 = get_current_def (get_current_def (orig_name)). */ - new_name2 = get_current_def (new_name); - } - - if (is_new_loop) - { - guard_arg = orig_def; - loop_arg = new_name; - } - else - { - guard_arg = new_name; - loop_arg = orig_def; - } - if (new_name2) - guard_arg = new_name2; - - add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION); - add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION); - - /* 1.3. Update phi in successor block. */ - gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def); - adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi)); - update_phi2 = new_phi; - - - /** 2. Handle loop-closed-ssa-form phis **/ - - if (virtual_operand_p (PHI_RESULT (orig_phi))) - continue; - - /* 2.1. Generate new phi node in NEW_EXIT_BB: */ - new_res = copy_ssa_name (PHI_RESULT (orig_phi)); - new_phi = create_phi_node (new_res, *new_exit_bb); - - /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ - add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION); - - /* 2.3. Update phi in successor of NEW_EXIT_BB: */ - gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); - adjust_phi_and_debug_stmts (update_phi2, new_exit_e, - PHI_RESULT (new_phi)); - - - /** 3. Handle loop-closed-ssa-form phis for first loop **/ - - /* 3.1. Find the relevant names that need an exit-phi in - GUARD_BB, i.e. names for which - slpeel_update_phi_nodes_for_guard1 had not already created a - phi node. This is the case for names that are used outside - the loop (and therefore need an exit phi) but are not updated - across loop iterations (and therefore don't have a - loop-header-phi). - - slpeel_update_phi_nodes_for_guard1 is responsible for - creating loop-exit phis in GUARD_BB for names that have a - loop-header-phi. When such a phi is created we also record - the new name in its current definition. If this new name - exists, then guard_arg was set to this new name (see 1.2 - above). Therefore, if guard_arg is not this new name, this - is an indication that an exit-phi in GUARD_BB was not yet - created, so we take care of it here. */ - if (guard_arg == new_name2) - continue; - arg = guard_arg; - - /* 3.2. Generate new phi node in GUARD_BB: */ - new_res = copy_ssa_name (PHI_RESULT (orig_phi)); - new_phi = create_phi_node (new_res, guard_edge->src); - - /* 3.3. GUARD_BB has one incoming edge: */ - gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1); - add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0), - UNKNOWN_LOCATION); - - /* 3.4. Update phi in successor of GUARD_BB: */ - gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge) - == guard_arg); - adjust_phi_and_debug_stmts (update_phi2, guard_edge, - PHI_RESULT (new_phi)); - } -} - - /* Make the LOOP iterate NITERS times. This is done by adding a new IV that starts at zero, increases by one and its limit is NITERS. @@ -942,15 +512,14 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, } -/* Given the condition statement COND, put it as the last statement - of GUARD_BB; EXIT_BB is the basic block to skip the loop; - Assumes that this is the single exit of the guarded loop. - Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */ +/* Given the condition expression COND, put it as the last statement of + GUARD_BB; set both edges' probability; set dominator of GUARD_TO to + DOM_BB; return the skip edge. GUARD_TO is the target basic block to + skip the loop. PROBABILITY is the skip edge's probability. */ static edge slpeel_add_loop_guard (basic_block guard_bb, tree cond, - gimple_seq cond_expr_stmt_list, - basic_block exit_bb, basic_block dom_bb, + basic_block guard_to, basic_block dom_bb, int probability) { gimple_stmt_iterator gsi; @@ -966,23 +535,21 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr, NULL_TREE); if (gimplify_stmt_list) - gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); - cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE); - if (cond_expr_stmt_list) - gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT); + gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT); + cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE); gsi = gsi_last_bb (guard_bb); gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); /* Add new edge to connect guard block to the merge/loop-exit block. */ - new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); + new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE); new_e->count = guard_bb->count; new_e->probability = probability; new_e->count = apply_probability (enter_e->count, probability); enter_e->count -= new_e->count; enter_e->probability = inverse_probability (probability); - set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); + set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb); return new_e; } @@ -1018,217 +585,19 @@ slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e) return true; } -static void -slpeel_checking_verify_cfg_after_peeling (struct loop *first_loop, - struct loop *second_loop) -{ - if (!flag_checking) - return; - - basic_block loop1_exit_bb = single_exit (first_loop)->dest; - basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src; - basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src; - - /* A guard that controls whether the second_loop is to be executed or skipped - is placed in first_loop->exit. first_loop->exit therefore has two - successors - one is the preheader of second_loop, and the other is a bb - after second_loop. - */ - gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2); - - /* 1. Verify that one of the successors of first_loop->exit is the preheader - of second_loop. */ - - /* The preheader of new_loop is expected to have two predecessors: - first_loop->exit and the block that precedes first_loop. */ - - gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2 - && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb - && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb) - || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb - && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb))); - - /* Verify that the other successor of first_loop->exit is after the - second_loop. */ - /* TODO */ -} - -/* If the run time cost model check determines that vectorization is - not profitable and hence scalar loop should be generated then set - FIRST_NITERS to prologue peeled iterations. This will allow all the - iterations to be executed in the prologue peeled scalar loop. */ +/* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI + in the exit bb and rename all the uses after the loop. This simplifies + the *guard[12] routines, which assume loop closed SSA form for all PHIs + (but normally loop closed SSA form doesn't require virtual PHIs to be + in the same form). Doing this early simplifies the checking what + uses should be renamed. */ static void -set_prologue_iterations (basic_block bb_before_first_loop, - tree *first_niters, - struct loop *loop, - unsigned int th, - int probability) +create_lcssa_for_virtual_phi (struct loop *loop) { - edge e; - basic_block cond_bb, then_bb; - tree var, prologue_after_cost_adjust_name; - gimple_stmt_iterator gsi; - gphi *newphi; - edge e_true, e_false, e_fallthru; - gcond *cond_stmt; - gimple_seq stmts = NULL; - tree cost_pre_condition = NULL_TREE; - tree scalar_loop_iters = - unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop))); - - e = single_pred_edge (bb_before_first_loop); - cond_bb = split_edge (e); - - e = single_pred_edge (bb_before_first_loop); - then_bb = split_edge (e); - set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); - - e_false = make_single_succ_edge (cond_bb, bb_before_first_loop, - EDGE_FALSE_VALUE); - set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb); - - e_true = EDGE_PRED (then_bb, 0); - e_true->flags &= ~EDGE_FALLTHRU; - e_true->flags |= EDGE_TRUE_VALUE; - - e_true->probability = probability; - e_false->probability = inverse_probability (probability); - e_true->count = apply_probability (cond_bb->count, probability); - e_false->count = cond_bb->count - e_true->count; - then_bb->frequency = EDGE_FREQUENCY (e_true); - then_bb->count = e_true->count; - - e_fallthru = EDGE_SUCC (then_bb, 0); - e_fallthru->count = then_bb->count; - - gsi = gsi_last_bb (cond_bb); - cost_pre_condition = - fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, - build_int_cst (TREE_TYPE (scalar_loop_iters), th)); - cost_pre_condition = - force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr, - NULL_TREE, false, GSI_CONTINUE_LINKING); - cond_stmt = gimple_build_cond_from_tree (cost_pre_condition, - NULL_TREE, NULL_TREE); - gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); - - var = create_tmp_var (TREE_TYPE (scalar_loop_iters), - "prologue_after_cost_adjust"); - prologue_after_cost_adjust_name = - force_gimple_operand (scalar_loop_iters, &stmts, false, var); - - gsi = gsi_last_bb (then_bb); - if (stmts) - gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); - - newphi = create_phi_node (var, bb_before_first_loop); - add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru, - UNKNOWN_LOCATION); - add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION); - - *first_niters = PHI_RESULT (newphi); -} - -/* Function slpeel_tree_peel_loop_to_edge. - - Peel the first (last) iterations of LOOP into a new prolog (epilog) loop - that is placed on the entry (exit) edge E of LOOP. After this transformation - we have two loops one after the other - first-loop iterates FIRST_NITERS - times, and second-loop iterates the remainder NITERS - FIRST_NITERS times. - If the cost model indicates that it is profitable to emit a scalar - loop instead of the vector one, then the prolog (epilog) loop will iterate - for the entire unchanged scalar iterations of the loop. - - Input: - - LOOP: the loop to be peeled. - - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks - should be copied. - - E: the exit or entry edge of LOOP. - If it is the entry edge, we peel the first iterations of LOOP. In this - case first-loop is LOOP, and second-loop is the newly created loop. - If it is the exit edge, we peel the last iterations of LOOP. In this - case, first-loop is the newly created loop, and second-loop is LOOP. - - NITERS: the number of iterations that LOOP iterates. - - FIRST_NITERS: the number of iterations that the first-loop should iterate. - - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible - for updating the loop bound of the first-loop to FIRST_NITERS. If it - is false, the caller of this function may want to take care of this - (this can be useful if we don't want new stmts added to first-loop). - - TH: cost model profitability threshold of iterations for vectorization. - - CHECK_PROFITABILITY: specify whether cost model check has not occurred - during versioning and hence needs to occur during - prologue generation or whether cost model check - has not occurred during prologue generation and hence - needs to occur during epilogue generation. - - BOUND1 is the upper bound on number of iterations of the first loop (if known) - - BOUND2 is the upper bound on number of iterations of the second loop (if known) - - - Output: - The function returns a pointer to the new loop-copy, or NULL if it failed - to perform the transformation. - - The function generates two if-then-else guards: one before the first loop, - and the other before the second loop: - The first guard is: - if (FIRST_NITERS == 0) then skip the first loop, - and go directly to the second loop. - The second guard is: - if (FIRST_NITERS == NITERS) then skip the second loop. - - If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given - then the generated condition is combined with COND_EXPR and the - statements in COND_EXPR_STMT_LIST are emitted together with it. - - FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p). - FORNOW the resulting code will not be in loop-closed-ssa form. -*/ - -static struct loop * -slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop, - edge e, tree *first_niters, - tree niters, bool update_first_loop_count, - unsigned int th, bool check_profitability, - tree cond_expr, gimple_seq cond_expr_stmt_list, - int bound1, int bound2) -{ - struct loop *new_loop = NULL, *first_loop, *second_loop; - edge skip_e; - tree pre_condition = NULL_TREE; - basic_block bb_before_second_loop, bb_after_second_loop; - basic_block bb_before_first_loop; - basic_block bb_between_loops; - basic_block new_exit_bb; gphi_iterator gsi; edge exit_e = single_exit (loop); - source_location loop_loc; - /* There are many aspects to how likely the first loop is going to be executed. - Without histogram we can't really do good job. Simply set it to - 2/3, so the first loop is not reordered to the end of function and - the hot path through stays short. */ - int first_guard_probability = 2 * REG_BR_PROB_BASE / 3; - int second_guard_probability = 2 * REG_BR_PROB_BASE / 3; - int probability_of_second_loop; - - if (!slpeel_can_duplicate_loop_p (loop, e)) - return NULL; - /* We might have a queued need to update virtual SSA form. As we - delete the update SSA machinery below after doing a regular - incremental SSA update during loop copying make sure we don't - lose that fact. - ??? Needing to update virtual SSA form by renaming is unfortunate - but not all of the vectorizer code inserting new loads / stores - properly assigns virtual operands to those statements. */ - update_ssa (TODO_update_ssa_only_virtuals); - - /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI - in the exit bb and rename all the uses after the loop. This simplifies - the *guard[12] routines, which assume loop closed SSA form for all PHIs - (but normally loop closed SSA form doesn't require virtual PHIs to be - in the same form). Doing this early simplifies the checking what - uses should be renamed. */ for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi)))) { @@ -1257,247 +626,6 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop, break; } - /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP). - Resulting CFG would be: - - first_loop: - do { - } while ... - - second_loop: - do { - } while ... - - orig_exit_bb: - */ - - if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, - e))) - { - loop_loc = find_loop_location (loop); - dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, - "tree_duplicate_loop_to_edge_cfg failed.\n"); - return NULL; - } - - if (MAY_HAVE_DEBUG_STMTS) - { - gcc_assert (!adjust_vec.exists ()); - adjust_vec.create (32); - } - - if (e == exit_e) - { - /* NEW_LOOP was placed after LOOP. */ - first_loop = loop; - second_loop = new_loop; - } - else - { - /* NEW_LOOP was placed before LOOP. */ - first_loop = new_loop; - second_loop = loop; - } - - /* 2. Add the guard code in one of the following ways: - - 2.a Add the guard that controls whether the first loop is executed. - This occurs when this function is invoked for prologue or epilogue - generation and when the cost model check can be done at compile time. - - Resulting CFG would be: - - bb_before_first_loop: - if (FIRST_NITERS == 0) GOTO bb_before_second_loop - GOTO first-loop - - first_loop: - do { - } while ... - - bb_before_second_loop: - - second_loop: - do { - } while ... - - orig_exit_bb: - - 2.b Add the cost model check that allows the prologue - to iterate for the entire unchanged scalar - iterations of the loop in the event that the cost - model indicates that the scalar loop is more - profitable than the vector one. This occurs when - this function is invoked for prologue generation - and the cost model check needs to be done at run - time. - - Resulting CFG after prologue peeling would be: - - if (scalar_loop_iterations <= th) - FIRST_NITERS = scalar_loop_iterations - - bb_before_first_loop: - if (FIRST_NITERS == 0) GOTO bb_before_second_loop - GOTO first-loop - - first_loop: - do { - } while ... - - bb_before_second_loop: - - second_loop: - do { - } while ... - - orig_exit_bb: - - 2.c Add the cost model check that allows the epilogue - to iterate for the entire unchanged scalar - iterations of the loop in the event that the cost - model indicates that the scalar loop is more - profitable than the vector one. This occurs when - this function is invoked for epilogue generation - and the cost model check needs to be done at run - time. This check is combined with any pre-existing - check in COND_EXPR to avoid versioning. - - Resulting CFG after prologue peeling would be: - - bb_before_first_loop: - if ((scalar_loop_iterations <= th) - || - FIRST_NITERS == 0) GOTO bb_before_second_loop - GOTO first-loop - - first_loop: - do { - } while ... - - bb_before_second_loop: - - second_loop: - do { - } while ... - - orig_exit_bb: - */ - - bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); - /* Loop copying insterted a forwarder block for us here. */ - bb_before_second_loop = single_exit (first_loop)->dest; - - probability_of_second_loop = (inverse_probability (first_guard_probability) - + combine_probabilities (second_guard_probability, - first_guard_probability)); - /* Theoretically preheader edge of first loop and exit edge should have - same frequencies. Loop exit probablities are however easy to get wrong. - It is safer to copy value from original loop entry. */ - bb_before_second_loop->frequency - = combine_probabilities (bb_before_first_loop->frequency, - probability_of_second_loop); - bb_before_second_loop->count - = apply_probability (bb_before_first_loop->count, - probability_of_second_loop); - single_succ_edge (bb_before_second_loop)->count - = bb_before_second_loop->count; - - /* Epilogue peeling. */ - if (!update_first_loop_count) - { - loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop); - tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo); - unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1; - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) - limit = limit + 1; - if (check_profitability - && th > limit) - limit = th; - pre_condition = - fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters, - build_int_cst (TREE_TYPE (scalar_loop_iters), limit)); - if (cond_expr) - { - pre_condition = - fold_build2 (TRUTH_OR_EXPR, boolean_type_node, - pre_condition, - fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, - cond_expr)); - } - } - - /* Prologue peeling. */ - else - { - if (check_profitability) - set_prologue_iterations (bb_before_first_loop, first_niters, - loop, th, first_guard_probability); - - pre_condition = - fold_build2 (LE_EXPR, boolean_type_node, *first_niters, - build_int_cst (TREE_TYPE (*first_niters), 0)); - } - - skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, - cond_expr_stmt_list, - bb_before_second_loop, bb_before_first_loop, - inverse_probability (first_guard_probability)); - scale_loop_profile (first_loop, first_guard_probability, - check_profitability && (int)th > bound1 ? th : bound1); - slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop, - first_loop == new_loop, - &new_exit_bb); - - - /* 3. Add the guard that controls whether the second loop is executed. - Resulting CFG would be: - - bb_before_first_loop: - if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop) - GOTO first-loop - - first_loop: - do { - } while ... - - bb_between_loops: - if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop) - GOTO bb_before_second_loop - - bb_before_second_loop: - - second_loop: - do { - } while ... - - bb_after_second_loop: - - orig_exit_bb: - */ - - bb_between_loops = new_exit_bb; - bb_after_second_loop = split_edge (single_exit (second_loop)); - - pre_condition = - fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters); - skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL, - bb_after_second_loop, bb_before_first_loop, - inverse_probability (second_guard_probability)); - scale_loop_profile (second_loop, probability_of_second_loop, bound2); - slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop, - second_loop == new_loop, &new_exit_bb); - - /* 4. Make first-loop iterate FIRST_NITERS times, if requested. - */ - if (update_first_loop_count) - slpeel_make_loop_iterate_ntimes (first_loop, *first_niters); - - delete_update_ssa (); - - adjust_vec_debug_stmts (); - - return new_loop; } /* Function vect_get_loop_location. @@ -1541,6 +669,22 @@ find_loop_location (struct loop *loop) return UNKNOWN_LOCATION; } +/* Return true if PHI defines an IV of the loop to be vectorized. */ + +static bool +iv_phi_p (gphi *phi) +{ + if (virtual_operand_p (PHI_RESULT (phi))) + return false; + + stmt_vec_info stmt_info = vinfo_for_stmt (phi); + gcc_assert (stmt_info != NULL); + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) + return false; + + return true; +} /* Function vect_can_advance_ivs_p @@ -1556,7 +700,6 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block bb = loop->header; - gimple *phi; gphi_iterator gsi; /* Analyze phi functions of the loop header. */ @@ -1567,7 +710,7 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) { tree evolution_part; - phi = gsi.phi (); + gphi *phi = gsi.phi (); if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: "); @@ -1575,26 +718,17 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) } /* Skip virtual phi's. The data dependences that are associated with - virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ + virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. - if (virtual_operand_p (PHI_RESULT (phi))) + Skip reduction phis. */ + if (!iv_phi_p (phi)) { if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "virtual phi. skip.\n"); + dump_printf_loc (MSG_NOTE, vect_location, + "reduc or virtual phi. skip.\n"); continue; } - /* Skip reduction phis. */ - - if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduc phi. skip.\n"); - continue; - } - /* Analyze the evolution function. */ evolution_part @@ -1676,19 +810,17 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) */ static void -vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, - edge update_e) +vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, + tree niters, edge update_e) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block exit_bb = single_exit (loop)->dest; - gphi *phi, *phi1; gphi_iterator gsi, gsi1; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block update_bb = update_e->dest; - - gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo)); + basic_block exit_bb = single_exit (loop)->dest; /* Make sure there exists a single-predecessor exit bb: */ gcc_assert (single_pred_p (exit_bb)); + gcc_assert (single_succ_edge (exit_bb) == update_e); for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb); !gsi_end_p (gsi) && !gsi_end_p (gsi1); @@ -1699,39 +831,27 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, tree type; tree var, ni, ni_name; gimple_stmt_iterator last_gsi; - stmt_vec_info stmt_info; - phi = gsi.phi (); - phi1 = gsi1.phi (); + gphi *phi = gsi.phi (); + gphi *phi1 = gsi1.phi (); if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "vect_update_ivs_after_vectorizer: phi: "); + { + dump_printf_loc (MSG_NOTE, vect_location, + "vect_update_ivs_after_vectorizer: phi: "); dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); - } + } - /* Skip virtual phi's. */ - if (virtual_operand_p (PHI_RESULT (phi))) + /* Skip reduction and virtual phis. */ + if (!iv_phi_p (phi)) { if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "virtual phi. skip.\n"); + dump_printf_loc (MSG_NOTE, vect_location, + "reduc or virtual phi. skip.\n"); continue; } - /* Skip reduction phis. */ - stmt_info = vinfo_for_stmt (phi); - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def - || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduc phi. skip.\n"); - continue; - } - type = TREE_TYPE (gimple_phi_result (phi)); - step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info); + step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi)); step_expr = unshare_expr (step_expr); /* FORNOW: We do not support IVs whose evolution function is a polynomial @@ -1752,118 +872,40 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, var = create_tmp_var (type, "tmp"); last_gsi = gsi_last_bb (exit_bb); - ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var, - true, GSI_SAME_STMT); + gimple_seq new_stmts = NULL; + ni_name = force_gimple_operand (ni, &new_stmts, false, var); + /* Exit_bb shouldn't be empty. */ + if (!gsi_end_p (last_gsi)) + gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT); + else + gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT); /* Fix phi expressions in the successor bb. */ adjust_phi_and_debug_stmts (phi1, update_e, ni_name); } } -/* Function vect_do_peeling_for_loop_bound - - Peel the last iterations of the loop represented by LOOP_VINFO. - The peeled iterations form a new epilog loop. Given that the loop now - iterates NITERS times, the new epilog loop iterates - NITERS % VECTORIZATION_FACTOR times. - - If CHECK_PROFITABILITY is 1 then profitability check is generated - using TH as a cost model profitability threshold of iterations for - vectorization. - - The original loop will later be made to iterate - NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). - - COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated - test. */ +/* Function vect_gen_prolog_loop_niters -void -vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, - tree ni_name, tree ratio_mult_vf_name, - unsigned int th, bool check_profitability) -{ - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); - struct loop *new_loop; - edge update_e; - basic_block preheader; - int max_iter; - tree cond_expr = NULL_TREE; - gimple_seq cond_expr_stmt_list = NULL; - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "=== vect_do_peeling_for_loop_bound ===\n"); - - initialize_original_copy_tables (); - - new_loop - = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop), - &ratio_mult_vf_name, ni_name, false, - th, check_profitability, - cond_expr, cond_expr_stmt_list, - 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); - gcc_assert (new_loop); - slpeel_checking_verify_cfg_after_peeling (loop, new_loop); - - /* A guard that controls whether the new_loop is to be executed or skipped - is placed in LOOP->exit. LOOP->exit therefore has two successors - one - is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other - is a bb after NEW_LOOP, where these IVs are not used. Find the edge that - is on the path where the LOOP IVs are used and need to be updated. */ - - preheader = loop_preheader_edge (new_loop)->src; - if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest) - update_e = EDGE_PRED (preheader, 0); - else - update_e = EDGE_PRED (preheader, 1); - - /* Update IVs of original loop as if they were advanced - by ratio_mult_vf_name steps. */ - vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); - - /* For vectorization factor N, we need to copy last N-1 values in epilogue - and this means N-2 loopback edge executions. - - PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue - will execute at least LOOP_VINFO_VECT_FACTOR times. */ - max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2 - : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2; - if (check_profitability) - max_iter = MAX (max_iter, (int) th - 1); - record_niter_bound (new_loop, max_iter, false, true); - dump_printf (MSG_NOTE, - "Setting upper bound of nb iterations for epilogue " - "loop to %d\n", max_iter); - - /* After peeling we have to reset scalar evolution analyzer. */ - scev_reset (); - - free_original_copy_tables (); -} - - -/* Function vect_gen_niters_for_prolog_loop - - Set the number of iterations for the loop represented by LOOP_VINFO - to the minimum between LOOP_NITERS (the original iteration count of the loop) - and the misalignment of DR - the data reference recorded in - LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of - this loop, the data reference DR will refer to an aligned location. - - The following computation is generated: + Generate the number of iterations which should be peeled as prolog for the + loop represented by LOOP_VINFO. It is calculated as the misalignment of + DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). + As a result, after the execution of this loop, the data reference DR will + refer to an aligned location. The following computation is generated: If the misalignment of DR is known at compile time: addr_mis = int mis = DR_MISALIGNMENT (dr); Else, compute address misalignment in bytes: addr_mis = addr & (vectype_align - 1) - prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step) + prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step (elem_size = element type size; an element is the scalar element whose type is the inner type of the vectype) + The computations will be emitted at the end of BB. We also compute and + store upper bound of the result in BOUND. + When the step of the data-ref in the loop is not 1 (as in interleaved data and SLP), the number of iterations of the prolog must be divided by the step (which is equal to the size of interleaved group). @@ -1875,24 +917,21 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, use TYPE_VECTOR_SUBPARTS. */ static tree -vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound) +vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo, + basic_block bb, int *bound) { struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree var; - gimple_seq stmts; + tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)); + gimple_seq stmts = NULL, new_stmts = NULL; tree iters, iters_name; - edge pe; - basic_block new_bb; gimple *dr_stmt = DR_STMT (dr); stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; - tree niters_type = TREE_TYPE (loop_niters); int nelements = TYPE_VECTOR_SUBPARTS (vectype); - pe = loop_preheader_edge (loop); - if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) { int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); @@ -1902,16 +941,15 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int "known peeling = %d.\n", npeel); iters = build_int_cst (niters_type, npeel); - *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); + *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) + 1; } else { - gimple_seq new_stmts = NULL; bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; tree offset = negative ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node; tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, - &new_stmts, offset, loop); + &stmts, offset, loop); tree type = unsigned_type_for (TREE_TYPE (start_addr)); tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1); HOST_WIDE_INT elem_size = @@ -1922,17 +960,14 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int tree byte_misalign; tree elem_misalign; - new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts); - gcc_assert (!new_bb); - /* Create: byte_misalign = addr & (vectype_align - 1) */ byte_misalign = - fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), - vectype_align_minus_1); + fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), + vectype_align_minus_1); /* Create: elem_misalign = byte_misalign / element_size */ elem_misalign = - fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); + fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */ if (negative) @@ -1944,13 +979,6 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound = nelements; } - /* Create: prolog_loop_niters = min (iters, loop_niters) */ - /* If the loop bound is known at compile time we already verified that it is - greater than vf; since the misalignment ('iters') is at most vf, there's - no need to generate the MIN_EXPR in this case. */ - if (TREE_CODE (loop_niters) != INTEGER_CST) - iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters); - if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, @@ -1960,16 +988,19 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int } var = create_tmp_var (niters_type, "prolog_loop_niters"); - stmts = NULL; - iters_name = force_gimple_operand (iters, &stmts, false, var); + iters_name = force_gimple_operand (iters, &new_stmts, false, var); - /* Insert stmt on loop preheader edge. */ + if (new_stmts) + gimple_seq_add_seq (&stmts, new_stmts); if (stmts) { - basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); + gcc_assert (single_succ_p (bb)); + gimple_stmt_iterator gsi = gsi_last_bb (bb); + if (gsi_end_p (gsi)) + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + else + gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT); } - return iters_name; } @@ -2009,102 +1040,783 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) unsigned int i; vec datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); struct data_reference *dr; - - if (dump_enabled_p ()) + + if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, - "=== vect_update_inits_of_dr ===\n"); + "=== vect_update_inits_of_dr ===\n"); + + /* Adjust niters to sizetype and insert stmts on loop preheader edge. */ + if (!types_compatible_p (sizetype, TREE_TYPE (niters))) + { + gimple_seq seq; + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); + tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters"); + + niters = fold_convert (sizetype, niters); + niters = force_gimple_operand (niters, &seq, false, var); + if (seq) + { + basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); + gcc_assert (!new_bb); + } + } FOR_EACH_VEC_ELT (datarefs, i, dr) vect_update_init_of_dr (dr, niters); } -/* Function vect_do_peeling_for_alignment +/* This function builds ni_name = number of iterations. Statements + are emitted on the loop preheader edge. */ + +tree +vect_build_loop_niters (loop_vec_info loop_vinfo) +{ + tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); + if (TREE_CODE (ni) == INTEGER_CST) + return ni; + else + { + tree ni_name, var; + gimple_seq stmts = NULL; + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); + + var = create_tmp_var (TREE_TYPE (ni), "niters"); + ni_name = force_gimple_operand (ni, &stmts, false, var); + if (stmts) + gsi_insert_seq_on_edge_immediate (pe, stmts); + + return ni_name; + } +} + +/* Calculate the number of iterations under which scalar loop will be + preferred than vectorized loop. NITERS_PROLOG is the number of + iterations of prolog loop. If it's integer const, the integer + number is also passed by INT_NITERS_PROLOG. VF is vector factor; + TH is the threshold for vectorized loop if CHECK_PROFITABILITY is + true. This function also store upper bound of the result in BOUND. */ + +static tree +vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog, + int bound_prolog, int vf, int th, int *bound, + bool check_profitability) +{ + tree type = TREE_TYPE (niters_prolog); + tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog, + build_int_cst (type, vf)); - Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. - 'niters' is set to the misalignment of one of the data references in the - loop, thereby forcing it to refer to an aligned location at the beginning - of the execution of this loop. The data reference for which we are - peeling is recorded in LOOP_VINFO_UNALIGNED_DR. + *bound = vf + bound_prolog; + if (check_profitability) + { + th++; + /* Peeling for constant times. */ + if (int_niters_prolog >= 0) + { + *bound = (int_niters_prolog + vf < th + ? th + : vf + int_niters_prolog); + return build_int_cst (type, *bound); + } + /* Peeling for unknown times, in this case, prolog loop must + execute less than bound_prolog times. */ + if (th >= vf + bound_prolog - 1) + { + *bound = th; + return build_int_cst (type, th); + } + /* Need to do runtime comparison, but bound remains the same. */ + else if (th > vf) + return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters); + } + return niters; +} + +/* This function generates the following statements: - If CHECK_PROFITABILITY is 1 then profitability check is generated - using TH as a cost model profitability threshold of iterations for - vectorization. */ + niters = number of iterations loop executes (after peeling) + niters_vector = niters / vf + + and places them on the loop preheader edge. NITERS_NO_OVERFLOW is + true if NITERS doesn't overflow. */ void -vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name, - unsigned int th, bool check_profitability) +vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, + tree *niters_vector_ptr, bool niters_no_overflow) { + tree ni_minus_gap, var; + tree niters_vector; + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); + tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf)); + + /* If epilogue loop is required because of data accesses with gaps, we + subtract one iteration from the total number of iterations here for + correct calculation of RATIO. */ + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + { + ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), + niters, + build_one_cst (TREE_TYPE (niters))); + if (!is_gimple_val (ni_minus_gap)) + { + var = create_tmp_var (TREE_TYPE (niters), "ni_gap"); + gimple *stmts = NULL; + ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts, + true, var); + gsi_insert_seq_on_edge_immediate (pe, stmts); + } + } + else + ni_minus_gap = niters; + + /* Create: niters >> log2(vf) */ + /* If it's known that niters == number of latch executions + 1 doesn't + overflow, we can generate niters >> log2(vf); otherwise we generate + (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio + will be at least one. */ + if (niters_no_overflow) + niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters), + ni_minus_gap, log_vf); + else + niters_vector + = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), + fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters), + fold_build2 (MINUS_EXPR, TREE_TYPE (niters), + ni_minus_gap, + build_int_cst + (TREE_TYPE (niters), vf)), + log_vf), + build_int_cst (TREE_TYPE (niters), 1)); + + if (!is_gimple_val (niters_vector)) + { + var = create_tmp_var (TREE_TYPE (niters), "bnd"); + gimple *stmts = NULL; + niters_vector = force_gimple_operand (niters_vector, &stmts, true, var); + gsi_insert_seq_on_edge_immediate (pe, stmts); + } + *niters_vector_ptr = niters_vector; + + return; +} + +/* Given NITERS_VECTOR which is the number of iterations for vectorized + loop specified by LOOP_VINFO after vectorization, compute the number + of iterations before vectorization (niters_vector * vf) and store it + to NITERS_VECTOR_MULT_VF_PTR. */ + +static void +vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo, + tree niters_vector, + tree *niters_vector_mult_vf_ptr) +{ + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); - tree niters_of_prolog_loop; - tree wide_prolog_niters; - struct loop *new_loop; - int max_iter; - int bound = 0; + tree type = TREE_TYPE (niters_vector); + tree log_vf = build_int_cst (type, exact_log2 (vf)); + basic_block exit_bb = single_exit (loop)->dest; - if (dump_enabled_p ()) - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, - "loop peeled for vectorization to enhance" - " alignment\n"); + gcc_assert (niters_vector_mult_vf_ptr != NULL); + tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type, + niters_vector, log_vf); + if (!is_gimple_val (niters_vector_mult_vf)) + { + tree var = create_tmp_var (type, "niters_vector_mult_vf"); + gimple_seq stmts = NULL; + niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf, + &stmts, true, var); + gimple_stmt_iterator gsi = gsi_start_bb (exit_bb); + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + } + *niters_vector_mult_vf_ptr = niters_vector_mult_vf; +} +/* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND + from SECOND/FIRST and puts it at the original loop's preheader/exit + edge, the two loops are arranged as below: + + preheader_a: + first_loop: + header_a: + i_1 = PHI; + ... + i_2 = i_1 + 1; + if (cond_a) + goto latch_a; + else + goto between_bb; + latch_a: + goto header_a; + + between_bb: + ;; i_x = PHI; ;; LCSSA phi node to be created for FIRST, + + second_loop: + header_b: + i_3 = PHI; ;; Use of i_0 to be replaced with i_x, + or with i_2 if no LCSSA phi is created + under condition of CREATE_LCSSA_FOR_IV_PHIS. + ... + i_4 = i_3 + 1; + if (cond_b) + goto latch_b; + else + goto exit_bb; + latch_b: + goto header_b; + + exit_bb: + + This function creates loop closed SSA for the first loop; update the + second loop's PHI nodes by replacing argument on incoming edge with the + result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS + is false, Loop closed ssa phis will only be created for non-iv phis for + the first loop. + + This function assumes exit bb of the first loop is preheader bb of the + second loop, i.e, between_bb in the example code. With PHIs updated, + the second loop will execute rest iterations of the first. */ + +static void +slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo, + struct loop *first, struct loop *second, + bool create_lcssa_for_iv_phis) +{ + gphi_iterator gsi_update, gsi_orig; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + + edge first_latch_e = EDGE_SUCC (first->latch, 0); + edge second_preheader_e = loop_preheader_edge (second); + basic_block between_bb = single_exit (first)->dest; + + gcc_assert (between_bb == second_preheader_e->src); + gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb)); + /* Either the first loop or the second is the loop to be vectorized. */ + gcc_assert (loop == first || loop == second); + + for (gsi_orig = gsi_start_phis (first->header), + gsi_update = gsi_start_phis (second->header); + !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update); + gsi_next (&gsi_orig), gsi_next (&gsi_update)) + { + gphi *orig_phi = gsi_orig.phi (); + gphi *update_phi = gsi_update.phi (); + + tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e); + /* Generate lcssa PHI node for the first loop. */ + gphi *vect_phi = (loop == first) ? orig_phi : update_phi; + if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi)) + { + tree new_res = copy_ssa_name (PHI_RESULT (orig_phi)); + gphi *lcssa_phi = create_phi_node (new_res, between_bb); + add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION); + arg = new_res; + } + + /* Update PHI node in the second loop by replacing arg on the loop's + incoming edge. */ + adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg); + } +} + +/* Function slpeel_add_loop_guard adds guard skipping from the beginning + of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE + are two pred edges of the merge point before UPDATE_LOOP. The two loops + appear like below: + + guard_bb: + if (cond) + goto merge_bb; + else + goto skip_loop; + + skip_loop: + header_a: + i_1 = PHI; + ... + i_2 = i_1 + 1; + if (cond_a) + goto latch_a; + else + goto exit_a; + latch_a: + goto header_a; + + exit_a: + i_5 = PHI; + + merge_bb: + ;; PHI (i_x = PHI) to be created at merge point. + + update_loop: + header_b: + i_3 = PHI; ;; Use of i_5 to be replaced with i_x. + ... + i_4 = i_3 + 1; + if (cond_b) + goto latch_b; + else + goto exit_bb; + latch_b: + goto header_b; + + exit_bb: + + This function creates PHI nodes at merge_bb and replaces the use of i_5 + in the update_loop's PHI node with the result of new PHI result. */ + +static void +slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop, + struct loop *update_loop, + edge guard_edge, edge merge_edge) +{ + source_location merge_loc, guard_loc; + edge orig_e = loop_preheader_edge (skip_loop); + edge update_e = loop_preheader_edge (update_loop); + gphi_iterator gsi_orig, gsi_update; + + for ((gsi_orig = gsi_start_phis (skip_loop->header), + gsi_update = gsi_start_phis (update_loop->header)); + !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update); + gsi_next (&gsi_orig), gsi_next (&gsi_update)) + { + gphi *orig_phi = gsi_orig.phi (); + gphi *update_phi = gsi_update.phi (); + + /* Generate new phi node at merge bb of the guard. */ + tree new_res = copy_ssa_name (PHI_RESULT (orig_phi)); + gphi *new_phi = create_phi_node (new_res, guard_edge->dest); + + /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the + args in NEW_PHI for these edges. */ + tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e); + tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e); + merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e); + guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e); + add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc); + add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc); + + /* Update phi in UPDATE_PHI. */ + adjust_phi_and_debug_stmts (update_phi, update_e, new_res); + } +} + +/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP, + this function searches for the corresponding lcssa phi node in exit + bb of LOOP. If it is found, return the phi result; otherwise return + NULL. */ + +static tree +find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED, + gphi *lcssa_phi) +{ + gphi_iterator gsi; + edge e = single_exit (loop); + + gcc_assert (single_pred_p (e->dest)); + for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gphi *phi = gsi.phi (); + if (operand_equal_p (PHI_ARG_DEF (phi, 0), + PHI_ARG_DEF (lcssa_phi, 0), 0)) + return PHI_RESULT (phi); + } + return NULL_TREE; +} + +/* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied + from LOOP. Function slpeel_add_loop_guard adds guard skipping from a + point between the two loops to the end of EPILOG. Edges GUARD_EDGE + and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG. + The CFG looks like: + + loop: + header_a: + i_1 = PHI; + ... + i_2 = i_1 + 1; + if (cond_a) + goto latch_a; + else + goto exit_a; + latch_a: + goto header_a; + + exit_a: + + guard_bb: + if (cond) + goto merge_bb; + else + goto epilog_loop; + + ;; fall_through_bb + + epilog_loop: + header_b: + i_3 = PHI; + ... + i_4 = i_3 + 1; + if (cond_b) + goto latch_b; + else + goto merge_bb; + latch_b: + goto header_b; + + merge_bb: + ; PHI node (i_y = PHI) to be created at merge point. + + exit_bb: + i_x = PHI; ;Use of i_4 to be replaced with i_y in merge_bb. + + For each name used out side EPILOG (i.e - for each name that has a lcssa + phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two + args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is + the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined + by LOOP and is found in the exit bb of LOOP. Arg of the original PHI + in exit_bb will also be updated. */ + +static void +slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog, + edge guard_edge, edge merge_edge) +{ + gphi_iterator gsi; + basic_block merge_bb = guard_edge->dest; + + gcc_assert (single_succ_p (merge_bb)); + edge e = single_succ_edge (merge_bb); + basic_block exit_bb = e->dest; + gcc_assert (single_pred_p (exit_bb)); + gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest); + + for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gphi *update_phi = gsi.phi (); + tree old_arg = PHI_ARG_DEF (update_phi, 0); + /* This loop-closed-phi actually doesn't represent a use out of the + loop - the phi arg is a constant. */ + if (TREE_CODE (old_arg) != SSA_NAME) + continue; + + tree merge_arg = get_current_def (old_arg); + if (!merge_arg) + merge_arg = old_arg; + + tree guard_arg = find_guard_arg (loop, epilog, update_phi); + /* If the var is live after loop but not a reduction, we simply + use the old arg. */ + if (!guard_arg) + guard_arg = old_arg; + + /* Create new phi node in MERGE_BB: */ + tree new_res = copy_ssa_name (PHI_RESULT (update_phi)); + gphi *merge_phi = create_phi_node (new_res, merge_bb); + + /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set + the two PHI args in merge_phi for these edges. */ + add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION); + add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION); + + /* Update the original phi in exit_bb. */ + adjust_phi_and_debug_stmts (update_phi, e, new_res); + } +} + +/* EPILOG loop is duplicated from the original loop for vectorizing, + the arg of its loop closed ssa PHI needs to be updated. */ + +static void +slpeel_update_phi_nodes_for_lcssa (struct loop *epilog) +{ + gphi_iterator gsi; + basic_block exit_bb = single_exit (epilog)->dest; + + gcc_assert (single_pred_p (exit_bb)); + edge e = EDGE_PRED (exit_bb, 0); + for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi)) + rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e)); +} + +/* Function vect_do_peeling. + + Input: + - LOOP_VINFO: Represent a loop to be vectorized, which looks like: + + preheader: + LOOP: + header_bb: + loop_body + if (exit_loop_cond) goto exit_bb + else goto header_bb + exit_bb: + + - NITERS: The number of iterations of the loop. + - NITERSM1: The number of iterations of the loop's latch. + - NITERS_NO_OVERFLOW: No overflow in computing NITERS. + - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if + CHECK_PROFITABILITY is true. + Output: + - NITERS_VECTOR: The number of iterations of loop after vectorization. + + This function peels prolog and epilog from the loop, adds guards skipping + PROLOG and EPILOG for various conditions. As a result, the changed CFG + would look like: + + guard_bb_1: + if (prefer_scalar_loop) goto merge_bb_1 + else goto guard_bb_2 + + guard_bb_2: + if (skip_prolog) goto merge_bb_2 + else goto prolog_preheader + + prolog_preheader: + PROLOG: + prolog_header_bb: + prolog_body + if (exit_prolog_cond) goto prolog_exit_bb + else goto prolog_header_bb + prolog_exit_bb: + + merge_bb_2: + + vector_preheader: + VECTOR LOOP: + vector_header_bb: + vector_body + if (exit_vector_cond) goto vector_exit_bb + else goto vector_header_bb + vector_exit_bb: + + guard_bb_3: + if (skip_epilog) goto merge_bb_3 + else goto epilog_preheader + + merge_bb_1: + + epilog_preheader: + EPILOG: + epilog_header_bb: + epilog_body + if (exit_epilog_cond) goto merge_bb_3 + else goto epilog_header_bb + + merge_bb_3: + + Note this function peels prolog and epilog only if it's necessary, + as well as guards. + + TODO: Guard for prefer_scalar_loop should be emitted along with + versioning conditions if loop versioning is needed. */ + +void +vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, + tree *niters_vector, int th, bool check_profitability, + bool niters_no_overflow) +{ + edge e, guard_e; + tree type = TREE_TYPE (niters), guard_cond; + basic_block guard_bb, guard_to; + int prob_prolog, prob_vector, prob_epilog; + int bound_prolog = 0, bound_epilog = 0, bound = 0; + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); + bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) + || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); + + if (!prolog_peeling && !epilog_peeling) + return; + + prob_vector = 9 * REG_BR_PROB_BASE / 10; + if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2) + vf = 3; + prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf; + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + + struct loop *prolog, *epilog, *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *first_loop = loop; + create_lcssa_for_virtual_phi (loop); + update_ssa (TODO_update_ssa_only_virtuals); + + if (MAY_HAVE_DEBUG_STMTS) + { + gcc_assert (!adjust_vec.exists ()); + adjust_vec.create (32); + } initialize_original_copy_tables (); - niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, - ni_name, - &bound); - - /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ - new_loop = - slpeel_tree_peel_loop_to_edge (loop, scalar_loop, - loop_preheader_edge (loop), - &niters_of_prolog_loop, ni_name, true, - th, check_profitability, NULL_TREE, NULL, - bound, 0); - - gcc_assert (new_loop); - slpeel_checking_verify_cfg_after_peeling (new_loop, loop); - /* For vectorization factor N, we need to copy at most N-1 values - for alignment and this means N-2 loopback edge executions. */ - max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2; - if (check_profitability) - max_iter = MAX (max_iter, (int) th - 1); - record_niter_bound (new_loop, max_iter, false, true); - dump_printf (MSG_NOTE, - "Setting upper bound of nb iterations for prologue " - "loop to %d\n", max_iter); - - /* Update number of times loop executes. */ - LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, - TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop); - LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR, - TREE_TYPE (ni_name), - LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop); - - if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop))) - wide_prolog_niters = niters_of_prolog_loop; - else + /* Prolog loop may be skipped. */ + bool skip_prolog = (prolog_peeling != 0); + /* Skip to epilog if scalar loop may be preferred. It's only used when + we peel for epilog loop. */ + bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)); + /* Epilog loop must be executed if the number of iterations for epilog + loop is known at compile time, otherwise we need to add a check at + the end of vector loop and skip to the end of epilog loop. */ + bool skip_epilog = (prolog_peeling < 0 + || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)); + /* PEELING_FOR_GAPS is special because epilog loop must be executed. */ + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + skip_epilog = false; + + /* Record the anchor bb at which guard should be placed if scalar loop + may be preferred. */ + basic_block anchor = loop_preheader_edge (loop)->src; + if (skip_vector) + split_edge (loop_preheader_edge (loop)); + + tree niters_prolog = build_int_cst (type, 0); + source_location loop_loc = find_loop_location (loop); + struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); + if (prolog_peeling) { - gimple_seq seq = NULL; - edge pe = loop_preheader_edge (loop); - tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop); - tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters"); - wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false, - var); - if (seq) + e = loop_preheader_edge (loop); + if (!slpeel_can_duplicate_loop_p (loop, e)) { - /* Insert stmt on loop preheader edge. */ - basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); - gcc_assert (!new_bb); - } + dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, + "loop can't be duplicated to preheader edge.\n"); + gcc_unreachable (); + } + /* Peel prolog and put it on preheader edge of loop. */ + prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e); + if (!prolog) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, + "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n"); + gcc_unreachable (); + } + slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true); + first_loop = prolog; + reset_original_copy_tables (); + + /* Generate and update the number of iterations for prolog loop. */ + niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor, + &bound_prolog); + slpeel_make_loop_iterate_ntimes (prolog, niters_prolog); + + /* Skip the prolog loop. */ + if (skip_prolog) + { + guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, + niters_prolog, build_int_cst (type, 0)); + guard_bb = loop_preheader_edge (prolog)->src; + guard_to = split_edge (loop_preheader_edge (loop)); + guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, + guard_to, guard_bb, + inverse_probability (prob_prolog)); + e = EDGE_PRED (guard_to, 0); + e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); + slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e); + scale_loop_profile (prolog, prob_prolog, bound_prolog); + } + /* Update init address of DRs. */ + vect_update_inits_of_drs (loop_vinfo, niters_prolog); + /* Update niters for vector loop. */ + LOOP_VINFO_NITERS (loop_vinfo) + = fold_build2 (MINUS_EXPR, type, niters, niters_prolog); + LOOP_VINFO_NITERSM1 (loop_vinfo) + = fold_build2 (MINUS_EXPR, type, + LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog); + niters = vect_build_loop_niters (loop_vinfo); + + /* Prolog iterates at most bound_prolog - 1 times, latch iterates + at most bound_prolog - 2 times. */ + record_niter_bound (prolog, bound_prolog - 2, false, true); + delete_update_ssa (); + adjust_vec_debug_stmts (); + scev_reset (); } - /* Update the init conditions of the access functions of all data refs. */ - vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters); + if (epilog_peeling) + { + e = single_exit (loop); + if (!slpeel_can_duplicate_loop_p (loop, e)) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, + "loop can't be duplicated to exit edge.\n"); + gcc_unreachable (); + } + /* Peel epilog and put it on exit edge of loop. */ + epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e); + if (!epilog) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, + "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n"); + gcc_unreachable (); + } + slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false); + + /* Scalar version loop may be preferred. In this case, add guard + and skip to epilog. Note this only happens when the number of + iterations of loop is unknown at compile time, otherwise this + won't be vectorized. */ + if (skip_vector) + { + /* Guard_cond needs is based on NITERSM1 because NITERS might + overflow, so here it is niters_scalar - 1 generated. In + other words, both niters_scalar and bound_epilog are for + scalar loop's latch. */ + tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling, + bound_prolog, vf - 1, th - 1, + &bound_epilog, + check_profitability); + guard_cond = fold_build2 (LT_EXPR, boolean_type_node, + nitersm1, t); + guard_bb = anchor; + guard_to = split_edge (loop_preheader_edge (epilog)); + guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, + guard_to, guard_bb, + inverse_probability (prob_vector)); + e = EDGE_PRED (guard_to, 0); + e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); + slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e); + scale_loop_profile (epilog, prob_vector, bound_epilog); + } - /* After peeling we have to reset scalar evolution analyzer. */ - scev_reset (); + tree niters_vector_mult_vf; + /* If loop is peeled for non-zero constant times, now niters refers to + orig_niters - prolog_peeling, it won't overflow even the orig_niters + overflows. */ + niters_no_overflow |= (prolog_peeling > 0); + vect_gen_vector_loop_niters (loop_vinfo, niters, + niters_vector, niters_no_overflow); + vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector, + &niters_vector_mult_vf); + /* Update IVs of original loop as if they were advanced by + niters_vector_mult_vf steps. */ + gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo)); + edge update_e = skip_vector ? e : loop_preheader_edge (epilog); + vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf, + update_e); + + if (skip_epilog) + { + guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, + niters, niters_vector_mult_vf); + guard_bb = single_exit (loop)->dest; + guard_to = split_edge (single_exit (epilog)); + guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to, + skip_vector ? anchor : guard_bb, + inverse_probability (prob_epilog)); + slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e, + single_exit (epilog)); + scale_loop_profile (epilog, prob_epilog, bound); + } + else + slpeel_update_phi_nodes_for_lcssa (epilog); + bound = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf * 2 : vf) - 2; + /* We share epilog loop with scalar version loop. */ + bound_epilog = MAX (bound, bound_epilog - 1); + record_niter_bound (epilog, bound_epilog, false, true); + + delete_update_ssa (); + adjust_vec_debug_stmts (); + scev_reset (); + } + adjust_vec.release (); free_original_copy_tables (); } diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index be6f3fb6c65..0470445de81 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6620,120 +6620,6 @@ vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt) } } - -/* This function builds ni_name = number of iterations. Statements - are emitted on the loop preheader edge. */ - -static tree -vect_build_loop_niters (loop_vec_info loop_vinfo) -{ - tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); - if (TREE_CODE (ni) == INTEGER_CST) - return ni; - else - { - tree ni_name, var; - gimple_seq stmts = NULL; - edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - - var = create_tmp_var (TREE_TYPE (ni), "niters"); - ni_name = force_gimple_operand (ni, &stmts, false, var); - if (stmts) - gsi_insert_seq_on_edge_immediate (pe, stmts); - - return ni_name; - } -} - - -/* This function generates the following statements: - - ni_name = number of iterations loop executes - ratio = ni_name / vf - ratio_mult_vf_name = ratio * vf - - and places them on the loop preheader edge. */ - -static void -vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, - tree ni_name, - tree *ratio_mult_vf_name_ptr, - tree *ratio_name_ptr) -{ - tree ni_minus_gap_name; - tree var; - tree ratio_name; - tree ratio_mult_vf_name; - int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - tree log_vf; - - log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf)); - - /* If epilogue loop is required because of data accesses with gaps, we - subtract one iteration from the total number of iterations here for - correct calculation of RATIO. */ - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) - { - ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), - ni_name, - build_one_cst (TREE_TYPE (ni_name))); - if (!is_gimple_val (ni_minus_gap_name)) - { - var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap"); - gimple *stmts = NULL; - ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, - true, var); - gsi_insert_seq_on_edge_immediate (pe, stmts); - } - } - else - ni_minus_gap_name = ni_name; - - /* Create: ratio = ni >> log2(vf) */ - /* ??? As we have ni == number of latch executions + 1, ni could - have overflown to zero. So avoid computing ratio based on ni - but compute it using the fact that we know ratio will be at least - one, thus via (ni - vf) >> log2(vf) + 1. */ - ratio_name - = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name), - fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), - fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), - ni_minus_gap_name, - build_int_cst - (TREE_TYPE (ni_name), vf)), - log_vf), - build_int_cst (TREE_TYPE (ni_name), 1)); - if (!is_gimple_val (ratio_name)) - { - var = create_tmp_var (TREE_TYPE (ni_name), "bnd"); - gimple *stmts = NULL; - ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); - gsi_insert_seq_on_edge_immediate (pe, stmts); - } - *ratio_name_ptr = ratio_name; - - /* Create: ratio_mult_vf = ratio << log2 (vf). */ - - if (ratio_mult_vf_name_ptr) - { - ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), - ratio_name, log_vf); - if (!is_gimple_val (ratio_mult_vf_name)) - { - var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf"); - gimple *stmts = NULL; - ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, - true, var); - gsi_insert_seq_on_edge_immediate (pe, stmts); - } - *ratio_mult_vf_name_ptr = ratio_mult_vf_name; - } - - return; -} - - /* Function vect_transform_loop. The analysis phase has determined that the loop is vectorizable. @@ -6747,8 +6633,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; int i; - tree ratio = NULL; - int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + tree niters_vector = NULL; + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); bool grouped_store; bool slp_scheduled = false; gimple *stmt, *pattern_stmt; @@ -6818,49 +6704,20 @@ vect_transform_loop (loop_vec_info loop_vinfo) } } - tree ni_name = vect_build_loop_niters (loop_vinfo); - LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name; - - /* Peel the loop if there are data refs with unknown alignment. - Only one data ref with unknown store is allowed. */ - - if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) + tree niters = vect_build_loop_niters (loop_vinfo); + LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters; + tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo)); + vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th, + check_profitability, false); + if (niters_vector == NULL_TREE) { - vect_do_peeling_for_alignment (loop_vinfo, ni_name, - th, check_profitability); - check_profitability = false; - /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to - be re-computed. */ - ni_name = NULL_TREE; - } - - /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a - compile time constant), or it is a constant that doesn't divide by the - vectorization factor, then an epilog loop needs to be created. - We therefore duplicate the loop: the original loop will be vectorized, - and will compute the first (n/VF) iterations. The second copy of the loop - will remain scalar and will compute the remaining (n%VF) iterations. - (VF is the vectorization factor). */ - - if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) - || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) - { - tree ratio_mult_vf; - if (!ni_name) - ni_name = vect_build_loop_niters (loop_vinfo); - vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf, - &ratio); - vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf, - th, check_profitability); - } - else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) - ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), - LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); - else - { - if (!ni_name) - ni_name = vect_build_loop_niters (loop_vinfo); - vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio); + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + niters_vector + = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), + LOOP_VINFO_INT_NITERS (loop_vinfo) / vf); + else + vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector, + false); } /* 1) Make sure the loop header has exactly two entries @@ -6903,7 +6760,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (STMT_VINFO_VECTYPE (stmt_info) && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)) - != (unsigned HOST_WIDE_INT) vectorization_factor) + != (unsigned HOST_WIDE_INT) vf) && dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); @@ -7036,7 +6893,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) = (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); if (!STMT_SLP_TYPE (stmt_info) - && nunits != (unsigned int) vectorization_factor + && nunits != (unsigned int) vf && dump_enabled_p ()) /* For SLP VF is set according to unrolling factor, and not to vector size, hence for SLP this print is not valid. */ @@ -7108,11 +6965,11 @@ vect_transform_loop (loop_vec_info loop_vinfo) } /* stmts in BB */ } /* BBs in loop */ - slpeel_make_loop_iterate_ntimes (loop, ratio); + slpeel_make_loop_iterate_ntimes (loop, niters_vector); /* Reduce loop iterations by the vectorization factor. */ - scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor), - expected_iterations / vectorization_factor); + scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vf), + expected_iterations / vf); if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) { if (loop->nb_iterations_upper_bound != 0) @@ -7122,16 +6979,14 @@ vect_transform_loop (loop_vec_info loop_vinfo) = loop->nb_iterations_likely_upper_bound - 1; } loop->nb_iterations_upper_bound - = wi::udiv_floor (loop->nb_iterations_upper_bound + 1, - vectorization_factor) - 1; + = wi::udiv_floor (loop->nb_iterations_upper_bound + 1, vf) - 1; loop->nb_iterations_likely_upper_bound - = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + 1, - vectorization_factor) - 1; + = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + 1, vf) - 1; if (loop->any_estimate) { loop->nb_iterations_estimate - = wi::udiv_floor (loop->nb_iterations_estimate, vectorization_factor); + = wi::udiv_floor (loop->nb_iterations_estimate, vf); if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && loop->nb_iterations_estimate != 0) loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 240af069a51..386654862b4 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1032,10 +1032,8 @@ extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, struct loop *, edge); extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); -extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree, tree, - unsigned int, bool); -extern void vect_do_peeling_for_alignment (loop_vec_info, tree, - unsigned int, bool); +extern void vect_do_peeling (loop_vec_info, tree, tree, + tree *, int, bool, bool); extern source_location find_loop_location (struct loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); @@ -1147,6 +1145,8 @@ extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool, bool *, bool); /* Drive for loop analysis stage. */ extern loop_vec_info vect_analyze_loop (struct loop *); +extern tree vect_build_loop_niters (loop_vec_info); +extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool); /* Drive for loop transformation stage. */ extern void vect_transform_loop (loop_vec_info); extern loop_vec_info vect_analyze_loop_form (struct loop *); -- 2.30.2