X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=gcc%2Ftree-vect-loop-manip.c;h=11c3ae28cceaadd5767b27638e2240739a1e9352;hb=1e44e857e05c165f6f01aeb56a7a43ee765bfc99;hp=32f57fe993af74f0b1f694f3e9d4db247d1a8803;hpb=f7a06a988e73a9a0208af4f714ae40fb19d7e1ae;p=gcc.git diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 32f57fe993a..11c3ae28cce 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1,6 +1,5 @@ /* Vectorizer Specific Loop Manipulations - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 - Free Software Foundation, Inc. + Copyright (C) 2003-2015 Free Software Foundation, Inc. Contributed by Dorit Naishlos and Ira Rosen @@ -23,16 +22,27 @@ along with GCC; see the file COPYING3. If not see #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" -#include "ggc.h" +#include "dumpfile.h" +#include "backend.h" +#include "cfghooks.h" #include "tree.h" -#include "basic-block.h" -#include "tree-pretty-print.h" +#include "gimple.h" +#include "hard-reg-set.h" +#include "ssa.h" +#include "alias.h" +#include "fold-const.h" +#include "cfganal.h" #include "gimple-pretty-print.h" -#include "tree-flow.h" -#include "tree-dump.h" +#include "internal-fn.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimplify-me.h" +#include "tree-cfg.h" +#include "tree-ssa-loop-manip.h" +#include "tree-into-ssa.h" +#include "tree-ssa.h" +#include "tree-pass.h" #include "cfgloop.h" -#include "cfglayout.h" #include "diagnostic-core.h" #include "tree-scalar-evolution.h" #include "tree-vectorizer.h" @@ -67,61 +77,52 @@ rename_use_op (use_operand_p op_p) } -/* Renames the variables in basic block BB. */ +/* Renames the variables in basic block BB. Allow renaming of PHI argumnets + on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is + true. */ -void -rename_variables_in_bb (basic_block bb) +static void +rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop) { - gimple_stmt_iterator gsi; - gimple stmt; + gimple *stmt; use_operand_p use_p; ssa_op_iter iter; edge e; edge_iterator ei; struct loop *loop = bb->loop_father; + struct loop *outer_loop = NULL; + + if (rename_from_outer_loop) + { + gcc_assert (loop); + outer_loop = loop_outer (loop); + } - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) { stmt = gsi_stmt (gsi); FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES) rename_use_op (use_p); } - FOR_EACH_EDGE (e, ei, bb->succs) + FOR_EACH_EDGE (e, ei, bb->preds) { - if (!flow_bb_inside_loop_p (loop, e->dest)) + if (!flow_bb_inside_loop_p (loop, e->src) + && (!rename_from_outer_loop || e->src != outer_loop->header)) continue; - for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) - rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e)); + for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e)); } } -/* Renames variables in new generated LOOP. */ - -void -rename_variables_in_loop (struct loop *loop) -{ - unsigned i; - basic_block *bbs; - - bbs = get_loop_body (loop); - - for (i = 0; i < loop->num_nodes; i++) - rename_variables_in_bb (bbs[i]); - - free (bbs); -} - -typedef struct +struct adjust_info { tree from, to; basic_block bb; -} adjust_info; - -DEF_VEC_O(adjust_info); -DEF_VEC_ALLOC_O_STACK(adjust_info); -#define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc) +}; /* A stack of values to be adjusted in debug stmts. We have to process them LIFO, so that the closest substitution applies. If we @@ -129,7 +130,7 @@ DEF_VEC_ALLOC_O_STACK(adjust_info); with a PHI DEF that would soon become non-dominant, and when we got to the suitable one, it wouldn't have anything to substitute any more. */ -static VEC(adjust_info, stack) *adjust_vec; +static vec adjust_vec; /* Adjust any debug stmts that referenced AI->from values to use the loop-closed AI->to, if the references are dominated by AI->bb and @@ -142,7 +143,7 @@ adjust_debug_stmts_now (adjust_info *ai) tree orig_def = ai->from; tree new_def = ai->to; imm_use_iterator imm_iter; - gimple stmt; + gimple *stmt; basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def)); gcc_assert (dom_info_available_p (CDI_DOMINATORS)); @@ -186,15 +187,15 @@ adjust_vec_debug_stmts (void) if (!MAY_HAVE_DEBUG_STMTS) return; - gcc_assert (adjust_vec); + gcc_assert (adjust_vec.exists ()); - while (!VEC_empty (adjust_info, adjust_vec)) + while (!adjust_vec.is_empty ()) { - adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec)); - VEC_pop (adjust_info, adjust_vec); + adjust_debug_stmts_now (&adjust_vec.last ()); + adjust_vec.pop (); } - VEC_free (adjust_info, stack, adjust_vec); + adjust_vec.release (); } /* Adjust any debug stmts that referenced FROM values to use the @@ -207,15 +208,17 @@ adjust_debug_stmts (tree from, tree to, basic_block bb) { adjust_info ai; - if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME - && SSA_NAME_VAR (from) != gimple_vop (cfun)) + if (MAY_HAVE_DEBUG_STMTS + && TREE_CODE (from) == SSA_NAME + && ! SSA_NAME_IS_DEFAULT_DEF (from) + && ! virtual_operand_p (from)) { ai.from = from; ai.to = to; ai.bb = bb; - if (adjust_vec) - VEC_safe_push (adjust_info, stack, adjust_vec, &ai); + if (adjust_vec.exists ()) + adjust_vec.safe_push (ai); else adjust_debug_stmts_now (&ai); } @@ -227,7 +230,7 @@ adjust_debug_stmts (tree from, tree to, basic_block bb) transformations. */ static void -adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def) +adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def) { tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e); @@ -239,101 +242,6 @@ adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def) } -/* Update the PHI nodes of NEW_LOOP. - - NEW_LOOP is a duplicate of ORIG_LOOP. - AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP: - AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it - executes before it. */ - -static void -slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, - struct loop *new_loop, bool after) -{ - tree new_ssa_name; - gimple phi_new, phi_orig; - tree def; - edge orig_loop_latch = loop_latch_edge (orig_loop); - edge orig_entry_e = loop_preheader_edge (orig_loop); - edge new_loop_exit_e = single_exit (new_loop); - edge new_loop_entry_e = loop_preheader_edge (new_loop); - edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e); - gimple_stmt_iterator gsi_new, gsi_orig; - - /* - step 1. For each loop-header-phi: - Add the first phi argument for the phi in NEW_LOOP - (the one associated with the entry of NEW_LOOP) - - step 2. For each loop-header-phi: - Add the second phi argument for the phi in NEW_LOOP - (the one associated with the latch of NEW_LOOP) - - step 3. Update the phis in the successor block of NEW_LOOP. - - case 1: NEW_LOOP was placed before ORIG_LOOP: - The successor block of NEW_LOOP is the header of ORIG_LOOP. - Updating the phis in the successor block can therefore be done - along with the scanning of the loop header phis, because the - header blocks of ORIG_LOOP and NEW_LOOP have exactly the same - phi nodes, organized in the same order. - - case 2: NEW_LOOP was placed after ORIG_LOOP: - The successor block of NEW_LOOP is the original exit block of - ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis. - We postpone updating these phis to a later stage (when - loop guards are added). - */ - - - /* Scan the phis in the headers of the old and new loops - (they are organized in exactly the same order). */ - - for (gsi_new = gsi_start_phis (new_loop->header), - gsi_orig = gsi_start_phis (orig_loop->header); - !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig); - gsi_next (&gsi_new), gsi_next (&gsi_orig)) - { - source_location locus; - phi_new = gsi_stmt (gsi_new); - phi_orig = gsi_stmt (gsi_orig); - - /* step 1. */ - def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e); - locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e); - add_phi_arg (phi_new, def, new_loop_entry_e, locus); - - /* step 2. */ - def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch); - locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch); - if (TREE_CODE (def) != SSA_NAME) - continue; - - new_ssa_name = get_current_def (def); - if (!new_ssa_name) - { - /* This only happens if there are no definitions - inside the loop. use the phi_result in this case. */ - new_ssa_name = PHI_RESULT (phi_new); - } - - /* An ordinary ssa name defined in the loop. */ - add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus); - - /* Drop any debug references outside the loop, if they would - become ill-formed SSA. */ - adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest); - - /* step 3 (case 1). */ - if (!after) - { - gcc_assert (new_loop_exit_e == orig_entry_e); - adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name); - } - } -} - - /* Update PHI nodes for a guard of the LOOP. Input: @@ -457,8 +365,8 @@ slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, next_bb The SSA names defined in the original loop have a current - reaching definition that that records the corresponding new - ssa-name used in the new duplicated loop copy. + reaching definition that records the corresponding new ssa-name + used in the new duplicated loop copy. */ /* Function slpeel_update_phi_nodes_for_guard1 @@ -489,11 +397,10 @@ LOOP-> loop1 static void slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, - bool is_new_loop, basic_block *new_exit_bb, - bitmap *defs) + bool is_new_loop, basic_block *new_exit_bb) { - gimple orig_phi, new_phi; - gimple update_phi, update_phi2; + gphi *orig_phi, *new_phi; + gphi *update_phi, *update_phi2; tree guard_arg, loop_arg; basic_block new_merge_bb = guard_edge->dest; edge e = EDGE_SUCC (new_merge_bb, 0); @@ -501,7 +408,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, basic_block orig_bb = loop->header; edge new_exit_e; tree current_new_name; - gimple_stmt_iterator gsi_orig, gsi_update; + gphi_iterator gsi_orig, gsi_update; /* Create new bb between loop and new_merge_bb. */ *new_exit_bb = split_edge (single_exit (loop)); @@ -514,14 +421,15 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, gsi_next (&gsi_orig), gsi_next (&gsi_update)) { source_location loop_locus, guard_locus; - orig_phi = gsi_stmt (gsi_orig); - update_phi = gsi_stmt (gsi_update); + tree new_res; + orig_phi = gsi_orig.phi (); + update_phi = gsi_update.phi (); /** 1. Handle new-merge-point phis **/ /* 1.1. Generate new phi node in NEW_MERGE_BB: */ - new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), - new_merge_bb); + new_res = copy_ssa_name (PHI_RESULT (orig_phi)); + new_phi = create_phi_node (new_res, new_merge_bb); /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge of LOOP. Set the two phi args in NEW_PHI for these edges: */ @@ -546,12 +454,12 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, /** 2. Handle loop-closed-ssa-form phis **/ - if (!is_gimple_reg (PHI_RESULT (orig_phi))) + if (virtual_operand_p (PHI_RESULT (orig_phi))) continue; /* 2.1. Generate new phi node in NEW_EXIT_BB: */ - new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), - *new_exit_bb); + new_res = copy_ssa_name (PHI_RESULT (orig_phi)); + new_phi = create_phi_node (new_res, *new_exit_bb); /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus); @@ -583,10 +491,20 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, if (!current_new_name) continue; } - gcc_assert (get_current_def (current_new_name) == NULL_TREE); + tree new_name = get_current_def (current_new_name); + /* Because of peeled_chrec optimization it is possible that we have + set this earlier. Verify the PHI has the same value. */ + if (new_name) + { + gimple *phi = SSA_NAME_DEF_STMT (new_name); + gcc_assert (gimple_code (phi) == GIMPLE_PHI + && gimple_bb (phi) == *new_exit_bb + && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop)) + == loop_arg)); + continue; + } set_current_def (current_new_name, PHI_RESULT (new_phi)); - bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name)); } } @@ -621,8 +539,8 @@ static void slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, bool is_new_loop, basic_block *new_exit_bb) { - gimple orig_phi, new_phi; - gimple update_phi, update_phi2; + gphi *orig_phi, *new_phi; + gphi *update_phi, *update_phi2; tree guard_arg, loop_arg; basic_block new_merge_bb = guard_edge->dest; edge e = EDGE_SUCC (new_merge_bb, 0); @@ -631,7 +549,7 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, tree orig_def, orig_def_new_name; tree new_name, new_name2; tree arg; - gimple_stmt_iterator gsi; + gphi_iterator gsi; /* Create new bb between loop and new_merge_bb. */ *new_exit_bb = split_edge (single_exit (loop)); @@ -640,7 +558,8 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi)) { - update_phi = gsi_stmt (gsi); + tree new_res; + update_phi = gsi.phi (); orig_phi = update_phi; orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); /* This loop-closed-phi actually doesn't represent a use @@ -653,8 +572,8 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, /** 1. Handle new-merge-point phis **/ /* 1.1. Generate new phi node in NEW_MERGE_BB: */ - new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), - new_merge_bb); + new_res = copy_ssa_name (PHI_RESULT (orig_phi)); + new_phi = create_phi_node (new_res, new_merge_bb); /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge of LOOP. Set the two PHI args in NEW_PHI for these edges: */ @@ -695,8 +614,8 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, /** 2. Handle loop-closed-ssa-form phis **/ /* 2.1. Generate new phi node in NEW_EXIT_BB: */ - new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), - *new_exit_bb); + new_res = copy_ssa_name (PHI_RESULT (orig_phi)); + new_phi = create_phi_node (new_res, *new_exit_bb); /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION); @@ -730,8 +649,8 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, arg = guard_arg; /* 3.2. Generate new phi node in GUARD_BB: */ - new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), - guard_edge->src); + new_res = copy_ssa_name (PHI_RESULT (orig_phi)); + new_phi = create_phi_node (new_res, guard_edge->src); /* 3.3. GUARD_BB has one incoming edge: */ gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1); @@ -756,15 +675,15 @@ void slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) { tree indx_before_incr, indx_after_incr; - gimple cond_stmt; - gimple orig_cond; + gcond *cond_stmt; + gcond *orig_cond; edge exit_edge = single_exit (loop); gimple_stmt_iterator loop_cond_gsi; gimple_stmt_iterator incr_gsi; bool insert_after; tree init = build_int_cst (TREE_TYPE (niters), 0); tree step = build_int_cst (TREE_TYPE (niters), 1); - LOC loop_loc; + source_location loop_loc; enum tree_code code; orig_cond = get_loop_exit_condition (loop); @@ -789,136 +708,221 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) /* Remove old loop exit test: */ gsi_remove (&loop_cond_gsi, true); + free_stmt_vec_info (orig_cond); loop_loc = find_loop_location (loop); - if (dump_file && (dump_flags & TDF_DETAILS)) + if (dump_enabled_p ()) { - if (loop_loc != UNKNOWN_LOC) - fprintf (dump_file, "\nloop at %s:%d: ", - LOC_FILE (loop_loc), LOC_LINE (loop_loc)); - print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM); + if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION) + dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc), + LOCATION_LINE (loop_loc)); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0); + dump_printf (MSG_NOTE, "\n"); } - loop->nb_iterations = niters; } +/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg. + For all PHI arguments in FROM->dest and TO->dest from those + edges ensure that TO->dest PHI arguments have current_def + to that in from. */ + +static void +slpeel_duplicate_current_defs_from_edges (edge from, edge to) +{ + gimple_stmt_iterator gsi_from, gsi_to; + + for (gsi_from = gsi_start_phis (from->dest), + gsi_to = gsi_start_phis (to->dest); + !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to); + gsi_next (&gsi_from), gsi_next (&gsi_to)) + { + gimple *from_phi = gsi_stmt (gsi_from); + gimple *to_phi = gsi_stmt (gsi_to); + tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from); + tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to); + if (TREE_CODE (from_arg) == SSA_NAME + && TREE_CODE (to_arg) == SSA_NAME + && get_current_def (to_arg) == NULL_TREE) + set_current_def (to_arg, get_current_def (from_arg)); + } +} + /* Given LOOP this function generates a new copy of it and puts it - on E which is either the entry or exit of LOOP. */ + on E which is either the entry or exit of LOOP. If SCALAR_LOOP is + non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the + basic blocks from SCALAR_LOOP instead of LOOP, but to either the + entry or exit of LOOP. */ struct loop * -slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e) +slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, + struct loop *scalar_loop, edge e) { struct loop *new_loop; basic_block *new_bbs, *bbs; bool at_exit; bool was_imm_dom; basic_block exit_dest; - gimple phi; - tree phi_arg; edge exit, new_exit; - gimple_stmt_iterator gsi; + bool duplicate_outer_loop = false; - at_exit = (e == single_exit (loop)); + exit = single_exit (loop); + at_exit = (e == exit); if (!at_exit && e != loop_preheader_edge (loop)) return NULL; - bbs = get_loop_body (loop); + if (scalar_loop == NULL) + scalar_loop = loop; + bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1); + get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes); + /* Allow duplication of outer loops. */ + if (scalar_loop->inner) + duplicate_outer_loop = true; /* Check whether duplication is possible. */ - if (!can_copy_bbs_p (bbs, loop->num_nodes)) + if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes)) { free (bbs); return NULL; } /* Generate new loop structure. */ - new_loop = duplicate_loop (loop, loop_outer (loop)); - if (!new_loop) - { - free (bbs); - return NULL; - } + new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop)); + duplicate_subloops (scalar_loop, new_loop); - exit_dest = single_exit (loop)->dest; + exit_dest = exit->dest; was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, exit_dest) == loop->header ? true : false); - new_bbs = XNEWVEC (basic_block, loop->num_nodes); + /* Also copy the pre-header, this avoids jumping through hoops to + duplicate the loop entry PHI arguments. Create an empty + pre-header unconditionally for this. */ + basic_block preheader = split_edge (loop_preheader_edge (scalar_loop)); + edge entry_e = single_pred_edge (preheader); + bbs[scalar_loop->num_nodes] = preheader; + new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1); - exit = single_exit (loop); - copy_bbs (bbs, loop->num_nodes, new_bbs, + exit = single_exit (scalar_loop); + copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs, &exit, 1, &new_exit, NULL, - e->src); - - /* Duplicating phi args at exit bbs as coming - also from exit of duplicated loop. */ - for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi)) - { - phi = gsi_stmt (gsi); - phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop)); - if (phi_arg) - { - edge new_loop_exit_edge; - source_location locus; + e->src, true); + exit = single_exit (loop); + basic_block new_preheader = new_bbs[scalar_loop->num_nodes]; - locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop)); - if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch) - new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1); - else - new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0); + add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL); - add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus); - } + if (scalar_loop != loop) + { + /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from + SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop, + but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects + the LOOP SSA_NAMEs (on the exit edge and edge from latch to + header) to have current_def set, so copy them over. */ + slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop), + exit); + slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch, + 0), + EDGE_SUCC (loop->latch, 0)); } if (at_exit) /* Add the loop copy at exit. */ { - redirect_edge_and_branch_force (e, new_loop->header); - PENDING_STMT (e) = NULL; - set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src); - if (was_imm_dom) - set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header); + if (scalar_loop != loop) + { + gphi_iterator gsi; + new_exit = redirect_edge_and_branch (new_exit, exit_dest); + + for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gphi *phi = gsi.phi (); + tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e); + location_t orig_locus + = gimple_phi_arg_location_from_edge (phi, e); + + add_phi_arg (phi, orig_arg, new_exit, orig_locus); + } + } + redirect_edge_and_branch_force (e, new_preheader); + flush_pending_stmts (e); + set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src); + if (was_imm_dom || duplicate_outer_loop) + set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src); + + /* And remove the non-necessary forwarder again. Keep the other + one so we have a proper pre-header for the loop at the exit edge. */ + redirect_edge_pred (single_succ_edge (preheader), + single_pred (preheader)); + delete_basic_block (preheader); + set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header, + loop_preheader_edge (scalar_loop)->src); } else /* Add the copy at entry. */ { - edge new_exit_e; - edge entry_e = loop_preheader_edge (loop); - basic_block preheader = entry_e->src; - - if (!flow_bb_inside_loop_p (new_loop, - EDGE_SUCC (new_loop->header, 0)->dest)) - new_exit_e = EDGE_SUCC (new_loop->header, 0); - else - new_exit_e = EDGE_SUCC (new_loop->header, 1); - - redirect_edge_and_branch_force (new_exit_e, loop->header); - PENDING_STMT (new_exit_e) = NULL; - set_immediate_dominator (CDI_DOMINATORS, loop->header, - new_exit_e->src); - - /* We have to add phi args to the loop->header here as coming - from new_exit_e edge. */ - for (gsi = gsi_start_phis (loop->header); - !gsi_end_p (gsi); - gsi_next (&gsi)) + if (scalar_loop != loop) { - phi = gsi_stmt (gsi); - phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e); - if (phi_arg) - add_phi_arg (phi, phi_arg, new_exit_e, - gimple_phi_arg_location_from_edge (phi, entry_e)); + /* Remove the non-necessary forwarder of scalar_loop again. */ + redirect_edge_pred (single_succ_edge (preheader), + single_pred (preheader)); + delete_basic_block (preheader); + set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header, + loop_preheader_edge (scalar_loop)->src); + preheader = split_edge (loop_preheader_edge (loop)); + entry_e = single_pred_edge (preheader); } - redirect_edge_and_branch_force (entry_e, new_loop->header); - PENDING_STMT (entry_e) = NULL; - set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader); + redirect_edge_and_branch_force (entry_e, new_preheader); + flush_pending_stmts (entry_e); + set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src); + + redirect_edge_and_branch_force (new_exit, preheader); + flush_pending_stmts (new_exit); + set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src); + + /* And remove the non-necessary forwarder again. Keep the other + one so we have a proper pre-header for the loop at the exit edge. */ + redirect_edge_pred (single_succ_edge (new_preheader), + single_pred (new_preheader)); + delete_basic_block (new_preheader); + set_immediate_dominator (CDI_DOMINATORS, new_loop->header, + loop_preheader_edge (new_loop)->src); + } + + for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++) + rename_variables_in_bb (new_bbs[i], duplicate_outer_loop); + + if (scalar_loop != loop) + { + /* Update new_loop->header PHIs, so that on the preheader + edge they are the ones from loop rather than scalar_loop. */ + gphi_iterator gsi_orig, gsi_new; + edge orig_e = loop_preheader_edge (loop); + edge new_e = loop_preheader_edge (new_loop); + + for (gsi_orig = gsi_start_phis (loop->header), + gsi_new = gsi_start_phis (new_loop->header); + !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new); + gsi_next (&gsi_orig), gsi_next (&gsi_new)) + { + gphi *orig_phi = gsi_orig.phi (); + gphi *new_phi = gsi_new.phi (); + tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e); + location_t orig_locus + = gimple_phi_arg_location_from_edge (orig_phi, orig_e); + + add_phi_arg (new_phi, orig_arg, new_e, orig_locus); + } } free (new_bbs); free (bbs); +#ifdef ENABLE_CHECKING + verify_dominators (CDI_DOMINATORS); +#endif + return new_loop; } @@ -931,11 +935,12 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e) static edge slpeel_add_loop_guard (basic_block guard_bb, tree cond, gimple_seq cond_expr_stmt_list, - basic_block exit_bb, basic_block dom_bb) + basic_block exit_bb, basic_block dom_bb, + int probability) { gimple_stmt_iterator gsi; edge new_e, enter_e; - gimple cond_stmt; + gcond *cond_stmt; gimple_seq gimplify_stmt_list = NULL; enter_e = EDGE_SUCC (guard_bb, 0); @@ -956,17 +961,23 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, /* Add new edge to connect guard block to the merge/loop-exit block. */ new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); + + new_e->count = guard_bb->count; + new_e->probability = probability; + new_e->count = apply_probability (enter_e->count, probability); + enter_e->count -= new_e->count; + enter_e->probability = inverse_probability (probability); set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); return new_e; } /* This function verifies that the following restrictions apply to LOOP: - (1) it is innermost - (2) it consists of exactly 2 basic blocks - header, and an empty latch. - (3) it is single entry, single exit - (4) its exit condition is the last stmt in the header - (5) E is the entry/exit edge of LOOP. + (1) it consists of exactly 2 basic blocks - header, and an empty latch + for innermost loop and 5 basic blocks for outer-loop. + (2) it is single entry, single exit + (3) its exit condition is the last stmt in the header + (4) E is the entry/exit edge of LOOP. */ bool @@ -974,17 +985,14 @@ slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e) { edge exit_e = single_exit (loop); edge entry_e = loop_preheader_edge (loop); - gimple orig_cond = get_loop_exit_condition (loop); + gcond *orig_cond = get_loop_exit_condition (loop); gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src); + unsigned int num_bb = loop->inner? 5 : 2; - if (need_ssa_update_p (cfun)) - return false; - - if (loop->inner /* All loops have an outer scope; the only case loop->outer is NULL is for the function itself. */ - || !loop_outer (loop) - || loop->num_nodes != 2 + if (!loop_outer (loop) + || loop->num_nodes != num_bb || !empty_block_p (loop->latch) || !single_exit (loop) /* Verify that new loop exit condition can be trivially modified. */ @@ -1038,25 +1046,26 @@ static void set_prologue_iterations (basic_block bb_before_first_loop, tree *first_niters, struct loop *loop, - unsigned int th) + unsigned int th, + int probability) { edge e; basic_block cond_bb, then_bb; tree var, prologue_after_cost_adjust_name; gimple_stmt_iterator gsi; - gimple newphi; + gphi *newphi; edge e_true, e_false, e_fallthru; - gimple cond_stmt; + gcond *cond_stmt; gimple_seq stmts = NULL; tree cost_pre_condition = NULL_TREE; tree scalar_loop_iters = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop))); e = single_pred_edge (bb_before_first_loop); - cond_bb = split_edge(e); + cond_bb = split_edge (e); e = single_pred_edge (bb_before_first_loop); - then_bb = split_edge(e); + then_bb = split_edge (e); set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); e_false = make_single_succ_edge (cond_bb, bb_before_first_loop, @@ -1067,7 +1076,15 @@ set_prologue_iterations (basic_block bb_before_first_loop, e_true->flags &= ~EDGE_FALLTHRU; e_true->flags |= EDGE_TRUE_VALUE; + e_true->probability = probability; + e_false->probability = inverse_probability (probability); + e_true->count = apply_probability (cond_bb->count, probability); + e_false->count = cond_bb->count - e_true->count; + then_bb->frequency = EDGE_FREQUENCY (e_true); + then_bb->count = e_true->count; + e_fallthru = EDGE_SUCC (then_bb, 0); + e_fallthru->count = then_bb->count; gsi = gsi_last_bb (cond_bb); cost_pre_condition = @@ -1082,7 +1099,6 @@ set_prologue_iterations (basic_block bb_before_first_loop, var = create_tmp_var (TREE_TYPE (scalar_loop_iters), "prologue_after_cost_adjust"); - add_referenced_var (var); prologue_after_cost_adjust_name = force_gimple_operand (scalar_loop_iters, &stmts, false, var); @@ -1110,6 +1126,8 @@ set_prologue_iterations (basic_block bb_before_first_loop, Input: - LOOP: the loop to be peeled. + - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks + should be copied. - E: the exit or entry edge of LOOP. If it is the entry edge, we peel the first iterations of LOOP. In this case first-loop is LOOP, and second-loop is the newly created loop. @@ -1127,6 +1145,8 @@ set_prologue_iterations (basic_block bb_before_first_loop, prologue generation or whether cost model check has not occurred during prologue generation and hence needs to occur during epilogue generation. + - BOUND1 is the upper bound on number of iterations of the first loop (if known) + - BOUND2 is the upper bound on number of iterations of the second loop (if known) Output: @@ -1149,35 +1169,44 @@ set_prologue_iterations (basic_block bb_before_first_loop, FORNOW the resulting code will not be in loop-closed-ssa form. */ -static struct loop* -slpeel_tree_peel_loop_to_edge (struct loop *loop, +static struct loop * +slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop, edge e, tree *first_niters, tree niters, bool update_first_loop_count, unsigned int th, bool check_profitability, - tree cond_expr, gimple_seq cond_expr_stmt_list) + tree cond_expr, gimple_seq cond_expr_stmt_list, + int bound1, int bound2) { struct loop *new_loop = NULL, *first_loop, *second_loop; edge skip_e; tree pre_condition = NULL_TREE; - bitmap definitions; basic_block bb_before_second_loop, bb_after_second_loop; basic_block bb_before_first_loop; basic_block bb_between_loops; basic_block new_exit_bb; - gimple_stmt_iterator gsi; + gphi_iterator gsi; edge exit_e = single_exit (loop); - LOC loop_loc; - tree cost_pre_condition = NULL_TREE; + source_location loop_loc; + /* There are many aspects to how likely the first loop is going to be executed. + Without histogram we can't really do good job. Simply set it to + 2/3, so the first loop is not reordered to the end of function and + the hot path through stays short. */ + int first_guard_probability = 2 * REG_BR_PROB_BASE / 3; + int second_guard_probability = 2 * REG_BR_PROB_BASE / 3; + int probability_of_second_loop; if (!slpeel_can_duplicate_loop_p (loop, e)) return NULL; - /* We have to initialize cfg_hooks. Then, when calling - cfg_hooks->split_edge, the function tree_split_edge - is actually called and, when calling cfg_hooks->duplicate_block, - the function tree_duplicate_bb is called. */ - gimple_register_cfg_hooks (); - + /* We might have a queued need to update virtual SSA form. As we + delete the update SSA machinery below after doing a regular + incremental SSA update during loop copying make sure we don't + lose that fact. + ??? Needing to update virtual SSA form by renaming is unfortunate + but not all of the vectorizer code inserting new loads / stores + properly assigns virtual operands to those statements. */ + update_ssa (TODO_update_ssa_only_virtuals); + /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI in the exit bb and rename all the uses after the loop. This simplifies the *guard[12] routines, which assume loop closed SSA form for all PHIs @@ -1185,28 +1214,27 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, in the same form). Doing this early simplifies the checking what uses should be renamed. */ for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) - if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi)))) + if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi)))) { - gimple phi = gsi_stmt (gsi); + gphi *phi = gsi.phi (); for (gsi = gsi_start_phis (exit_e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) - if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi)))) + if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi)))) break; if (gsi_end_p (gsi)) { - gimple new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), - exit_e->dest); + tree new_vop = copy_ssa_name (PHI_RESULT (phi)); + gphi *new_phi = create_phi_node (new_vop, exit_e->dest); tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)); imm_use_iterator imm_iter; - gimple stmt; - tree new_vop = make_ssa_name (SSA_NAME_VAR (PHI_RESULT (phi)), - new_phi); + gimple *stmt; use_operand_p use_p; add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION); gimple_phi_set_result (new_phi, new_vop); FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop) - if (stmt != new_phi && gimple_bb (stmt) != loop->header) + if (stmt != new_phi + && !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) SET_USE (use_p, new_vop); } @@ -1227,23 +1255,19 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, orig_exit_bb: */ - if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e))) + if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, + e))) { loop_loc = find_loop_location (loop); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - if (loop_loc != UNKNOWN_LOC) - fprintf (dump_file, "\n%s:%d: note: ", - LOC_FILE (loop_loc), LOC_LINE (loop_loc)); - fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n"); - } + dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, + "tree_duplicate_loop_to_edge_cfg failed.\n"); return NULL; } if (MAY_HAVE_DEBUG_STMTS) { - gcc_assert (!adjust_vec); - adjust_vec = VEC_alloc (adjust_info, stack, 32); + gcc_assert (!adjust_vec.exists ()); + adjust_vec.create (32); } if (e == exit_e) @@ -1259,11 +1283,6 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, second_loop = loop; } - definitions = ssa_names_to_replace (); - slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); - rename_variables_in_loop (new_loop); - - /* 2. Add the guard code in one of the following ways: 2.a Add the guard that controls whether the first loop is executed. @@ -1350,26 +1369,38 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, */ bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); - bb_before_second_loop = split_edge (single_exit (first_loop)); + /* Loop copying insterted a forwarder block for us here. */ + bb_before_second_loop = single_exit (first_loop)->dest; + + probability_of_second_loop = (inverse_probability (first_guard_probability) + + combine_probabilities (second_guard_probability, + first_guard_probability)); + /* Theoretically preheader edge of first loop and exit edge should have + same frequencies. Loop exit probablities are however easy to get wrong. + It is safer to copy value from original loop entry. */ + bb_before_second_loop->frequency + = combine_probabilities (bb_before_first_loop->frequency, + probability_of_second_loop); + bb_before_second_loop->count + = apply_probability (bb_before_first_loop->count, + probability_of_second_loop); + single_succ_edge (bb_before_second_loop)->count + = bb_before_second_loop->count; /* Epilogue peeling. */ if (!update_first_loop_count) { + loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop); + tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo); + unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1; + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + limit = limit + 1; + if (check_profitability + && th > limit) + limit = th; pre_condition = - fold_build2 (LE_EXPR, boolean_type_node, *first_niters, - build_int_cst (TREE_TYPE (*first_niters), 0)); - if (check_profitability) - { - tree scalar_loop_iters - = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED - (loop_vec_info_for_loop (loop))); - cost_pre_condition = - fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, - build_int_cst (TREE_TYPE (scalar_loop_iters), th)); - - pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, - cost_pre_condition, pre_condition); - } + fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters, + build_int_cst (TREE_TYPE (scalar_loop_iters), limit)); if (cond_expr) { pre_condition = @@ -1385,7 +1416,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, { if (check_profitability) set_prologue_iterations (bb_before_first_loop, first_niters, - loop, th); + loop, th, first_guard_probability); pre_condition = fold_build2 (LE_EXPR, boolean_type_node, *first_niters, @@ -1394,10 +1425,13 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, cond_expr_stmt_list, - bb_before_second_loop, bb_before_first_loop); + bb_before_second_loop, bb_before_first_loop, + inverse_probability (first_guard_probability)); + scale_loop_profile (first_loop, first_guard_probability, + check_profitability && (int)th > bound1 ? th : bound1); slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop, first_loop == new_loop, - &new_exit_bb, &definitions); + &new_exit_bb); /* 3. Add the guard that controls whether the second loop is executed. @@ -1432,7 +1466,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, pre_condition = fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters); skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL, - bb_after_second_loop, bb_before_first_loop); + bb_after_second_loop, bb_before_first_loop, + inverse_probability (second_guard_probability)); + scale_loop_profile (second_loop, probability_of_second_loop, bound2); slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop, second_loop == new_loop, &new_exit_bb); @@ -1441,7 +1477,6 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, if (update_first_loop_count) slpeel_make_loop_iterate_ntimes (first_loop, *first_niters); - BITMAP_FREE (definitions); delete_update_ssa (); adjust_vec_debug_stmts (); @@ -1456,189 +1491,41 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, location is calculated. Return the loop location if succeed and NULL if not. */ -LOC +source_location find_loop_location (struct loop *loop) { - gimple stmt = NULL; + gimple *stmt = NULL; basic_block bb; gimple_stmt_iterator si; if (!loop) - return UNKNOWN_LOC; + return UNKNOWN_LOCATION; stmt = get_loop_exit_condition (loop); - if (stmt && gimple_location (stmt) != UNKNOWN_LOC) + if (stmt + && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION) return gimple_location (stmt); /* If we got here the loop is probably not "well formed", try to estimate the loop location */ if (!loop->header) - return UNKNOWN_LOC; + return UNKNOWN_LOCATION; bb = loop->header; for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) { stmt = gsi_stmt (si); - if (gimple_location (stmt) != UNKNOWN_LOC) + if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION) return gimple_location (stmt); } - return UNKNOWN_LOC; + return UNKNOWN_LOCATION; } -/* This function builds ni_name = number of iterations loop executes - on the loop preheader. If SEQ is given the stmt is instead emitted - there. */ - -static tree -vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq) -{ - tree ni_name, var; - gimple_seq stmts = NULL; - edge pe; - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); - - var = create_tmp_var (TREE_TYPE (ni), "niters"); - add_referenced_var (var); - ni_name = force_gimple_operand (ni, &stmts, false, var); - - pe = loop_preheader_edge (loop); - if (stmts) - { - if (seq) - gimple_seq_add_seq (&seq, stmts); - else - { - basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - } - - return ni_name; -} - - -/* This function generates the following statements: - - ni_name = number of iterations loop executes - ratio = ni_name / vf - ratio_mult_vf_name = ratio * vf - - and places them at the loop preheader edge or in COND_EXPR_STMT_LIST - if that is non-NULL. */ - -static void -vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, - tree *ni_name_ptr, - tree *ratio_mult_vf_name_ptr, - tree *ratio_name_ptr, - gimple_seq cond_expr_stmt_list) -{ - - edge pe; - basic_block new_bb; - gimple_seq stmts; - tree ni_name, ni_minus_gap_name; - tree var; - tree ratio_name; - tree ratio_mult_vf_name; - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - tree ni = LOOP_VINFO_NITERS (loop_vinfo); - int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - tree log_vf; - - pe = loop_preheader_edge (loop); - - /* Generate temporary variable that contains - number of iterations loop executes. */ - - ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list); - log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); - - /* If epilogue loop is required because of data accesses with gaps, we - subtract one iteration from the total number of iterations here for - correct calculation of RATIO. */ - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) - { - ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), - ni_name, - build_one_cst (TREE_TYPE (ni_name))); - if (!is_gimple_val (ni_minus_gap_name)) - { - var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); - add_referenced_var (var); - - stmts = NULL; - ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, - true, var); - if (cond_expr_stmt_list) - gimple_seq_add_seq (&cond_expr_stmt_list, stmts); - else - { - pe = loop_preheader_edge (loop); - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - } - } - else - ni_minus_gap_name = ni_name; - - /* Create: ratio = ni >> log2(vf) */ - - ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name), - ni_minus_gap_name, log_vf); - if (!is_gimple_val (ratio_name)) - { - var = create_tmp_var (TREE_TYPE (ni), "bnd"); - add_referenced_var (var); - - stmts = NULL; - ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); - if (cond_expr_stmt_list) - gimple_seq_add_seq (&cond_expr_stmt_list, stmts); - else - { - pe = loop_preheader_edge (loop); - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - } - - /* Create: ratio_mult_vf = ratio << log2 (vf). */ - - ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), - ratio_name, log_vf); - if (!is_gimple_val (ratio_mult_vf_name)) - { - var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); - add_referenced_var (var); - - stmts = NULL; - ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, - true, var); - if (cond_expr_stmt_list) - gimple_seq_add_seq (&cond_expr_stmt_list, stmts); - else - { - pe = loop_preheader_edge (loop); - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - } - - *ni_name_ptr = ni_name; - *ratio_mult_vf_name_ptr = ratio_mult_vf_name; - *ratio_name_ptr = ratio_name; - - return; -} - /* Function vect_can_advance_ivs_p In case the number of iterations that LOOP iterates is unknown at compile @@ -1653,33 +1540,33 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block bb = loop->header; - gimple phi; - gimple_stmt_iterator gsi; + gimple *phi; + gphi_iterator gsi; /* Analyze phi functions of the loop header. */ - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "vect_can_advance_ivs_p:"); - + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n"); for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { - tree access_fn = NULL; tree evolution_part; - phi = gsi_stmt (gsi); - if (vect_print_dump_info (REPORT_DETAILS)) + phi = gsi.phi (); + if (dump_enabled_p ()) { - fprintf (vect_dump, "Analyze phi: "); - print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); + dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); + dump_printf (MSG_NOTE, "\n"); } /* Skip virtual phi's. The data dependences that are associated with virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ - if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) + if (virtual_operand_p (PHI_RESULT (phi))) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "virtual phi. skip."); + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "virtual phi. skip.\n"); continue; } @@ -1687,35 +1574,21 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "reduc phi. skip."); + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduc phi. skip.\n"); continue; } /* Analyze the evolution function. */ - access_fn = instantiate_parameters - (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); - - if (!access_fn) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "No Access function."); - return false; - } - - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "Access function of PHI: "); - print_generic_expr (vect_dump, access_fn, TDF_SLIM); - } - - evolution_part = evolution_part_in_loop_num (access_fn, loop->num); - + evolution_part + = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi)); if (evolution_part == NULL_TREE) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "No evolution."); + if (dump_enabled_p ()) + dump_printf (MSG_MISSED_OPTIMIZATION, + "No access function or evolution.\n"); return false; } @@ -1777,11 +1650,11 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block exit_bb = single_exit (loop)->dest; - gimple phi, phi1; - gimple_stmt_iterator gsi, gsi1; + gphi *phi, *phi1; + gphi_iterator gsi, gsi1; basic_block update_bb = update_e->dest; - /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */ + gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo)); /* Make sure there exists a single-predecessor exit bb: */ gcc_assert (single_pred_p (exit_bb)); @@ -1797,19 +1670,22 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, gimple_stmt_iterator last_gsi; stmt_vec_info stmt_info; - phi = gsi_stmt (gsi); - phi1 = gsi_stmt (gsi1); - if (vect_print_dump_info (REPORT_DETAILS)) + phi = gsi.phi (); + phi1 = gsi1.phi (); + if (dump_enabled_p ()) { - fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: "); - print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); + dump_printf_loc (MSG_NOTE, vect_location, + "vect_update_ivs_after_vectorizer: phi: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); + dump_printf (MSG_NOTE, "\n"); } /* Skip virtual phi's. */ - if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) + if (virtual_operand_p (PHI_RESULT (phi))) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "virtual phi. skip."); + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "virtual phi. skip.\n"); continue; } @@ -1817,8 +1693,9 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, stmt_info = vinfo_for_stmt (phi); if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "reduc phi. skip."); + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduc phi. skip.\n"); continue; } @@ -1842,7 +1719,6 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, init_expr, fold_convert (type, off)); var = create_tmp_var (type, "tmp"); - add_referenced_var (var); last_gsi = gsi_last_bb (exit_bb); ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var, @@ -1853,34 +1729,6 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, } } -/* Return the more conservative threshold between the - min_profitable_iters returned by the cost model and the user - specified threshold, if provided. */ - -static unsigned int -conservative_cost_threshold (loop_vec_info loop_vinfo, - int min_profitable_iters) -{ - unsigned int th; - int min_scalar_loop_bound; - - min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) - * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1); - - /* Use the cost model only if it is more conservative than user specified - threshold. */ - th = (unsigned) min_scalar_loop_bound; - if (min_profitable_iters - && (!min_scalar_loop_bound - || min_profitable_iters > min_scalar_loop_bound)) - th = (unsigned) min_profitable_iters; - - if (th && vect_print_dump_info (REPORT_COST)) - fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th); - - return th; -} - /* Function vect_do_peeling_for_loop_bound Peel the last iterations of the loop represented by LOOP_VINFO. @@ -1895,55 +1743,34 @@ conservative_cost_threshold (loop_vec_info loop_vinfo, test. */ void -vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, - tree cond_expr, gimple_seq cond_expr_stmt_list) +vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, + tree ni_name, tree ratio_mult_vf_name, + unsigned int th, bool check_profitability) { - tree ni_name, ratio_mult_vf_name; struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); struct loop *new_loop; edge update_e; basic_block preheader; int loop_num; - bool check_profitability = false; - unsigned int th = 0; - int min_profitable_iters; + int max_iter; + tree cond_expr = NULL_TREE; + gimple_seq cond_expr_stmt_list = NULL; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ==="); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "=== vect_do_peeling_for_loop_bound ===\n"); initialize_original_copy_tables (); - /* Generate the following variables on the preheader of original loop: - - ni_name = number of iteration the original loop executes - ratio = ni_name / vf - ratio_mult_vf_name = ratio * vf */ - vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, - &ratio_mult_vf_name, ratio, - cond_expr_stmt_list); - loop_num = loop->num; - /* If cost model check not done during versioning and - peeling for alignment. */ - if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) - && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) - && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) - && !cond_expr) - { - check_profitability = true; - - /* Get profitability threshold for vectorized loop. */ - min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); - - th = conservative_cost_threshold (loop_vinfo, - min_profitable_iters); - } - - new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop), - &ratio_mult_vf_name, ni_name, false, - th, check_profitability, - cond_expr, cond_expr_stmt_list); + new_loop + = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop), + &ratio_mult_vf_name, ni_name, false, + th, check_profitability, + cond_expr, cond_expr_stmt_list, + 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); gcc_assert (new_loop); gcc_assert (loop_num == loop->num); #ifdef ENABLE_CHECKING @@ -1966,6 +1793,21 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, by ratio_mult_vf_name steps. */ vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); + /* For vectorization factor N, we need to copy last N-1 values in epilogue + and this means N-2 loopback edge executions. + + PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue + will execute at least LOOP_VINFO_VECT_FACTOR times. */ + max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2 + : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2; + if (check_profitability) + max_iter = MAX (max_iter, (int) th - 1); + record_niter_bound (new_loop, max_iter, false, true); + dump_printf (MSG_NOTE, + "Setting upper bound of nb iterations for epilogue " + "loop to %d\n", max_iter); + /* After peeling we have to reset scalar evolution analyzer. */ scev_reset (); @@ -1986,7 +1828,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, If the misalignment of DR is known at compile time: addr_mis = int mis = DR_MISALIGNMENT (dr); Else, compute address misalignment in bytes: - addr_mis = addr & (vectype_size - 1) + addr_mis = addr & (vectype_align - 1) prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step) @@ -2004,7 +1846,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, use TYPE_VECTOR_SUBPARTS. */ static tree -vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) +vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound) { struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); @@ -2013,7 +1855,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) tree iters, iters_name; edge pe; basic_block new_bb; - gimple dr_stmt = DR_STMT (dr); + gimple *dr_stmt = DR_STMT (dr); stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; @@ -2022,27 +1864,30 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) pe = loop_preheader_edge (loop); - if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) + if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) { - int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); + int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "known peeling = %d.", npeel); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "known peeling = %d.\n", npeel); iters = build_int_cst (niters_type, npeel); + *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); } else { gimple_seq new_stmts = NULL; bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; tree offset = negative - ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE; + ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node; tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, offset, loop); tree type = unsigned_type_for (TREE_TYPE (start_addr)); - tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); - tree elem_size_log = - build_int_cst (type, exact_log2 (vectype_align/nelements)); + tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1); + HOST_WIDE_INT elem_size = + int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); + tree elem_size_log = build_int_cst (type, exact_log2 (elem_size)); tree nelements_minus_1 = build_int_cst (type, nelements - 1); tree nelements_tree = build_int_cst (type, nelements); tree byte_misalign; @@ -2051,10 +1896,10 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts); gcc_assert (!new_bb); - /* Create: byte_misalign = addr & (vectype_size - 1) */ + /* Create: byte_misalign = addr & (vectype_align - 1) */ byte_misalign = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), - vectype_size_minus_1); + vectype_align_minus_1); /* Create: elem_misalign = byte_misalign / element_size */ elem_misalign = @@ -2067,6 +1912,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign); iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1); iters = fold_convert (niters_type, iters); + *bound = nelements; } /* Create: prolog_loop_niters = min (iters, loop_niters) */ @@ -2076,14 +1922,15 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) if (TREE_CODE (loop_niters) != INTEGER_CST) iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters); - if (vect_print_dump_info (REPORT_DETAILS)) + if (dump_enabled_p ()) { - fprintf (vect_dump, "niters for prolog loop: "); - print_generic_expr (vect_dump, iters, TDF_SLIM); + dump_printf_loc (MSG_NOTE, vect_location, + "niters for prolog loop: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, iters); + dump_printf (MSG_NOTE, "\n"); } var = create_tmp_var (niters_type, "prolog_loop_niters"); - add_referenced_var (var); stmts = NULL; iters_name = force_gimple_operand (iters, &stmts, false, var); @@ -2131,13 +1978,14 @@ static void vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) { unsigned int i; - VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); + vec datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); struct data_reference *dr; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "=== vect_update_inits_of_dr ===\n"); - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "=== vect_update_inits_of_dr ==="); - - FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) + FOR_EACH_VEC_ELT (datarefs, i, dr) vect_update_init_of_dr (dr, niters); } @@ -2151,51 +1999,58 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ void -vect_do_peeling_for_alignment (loop_vec_info loop_vinfo) +vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name, + unsigned int th, bool check_profitability) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - tree niters_of_prolog_loop, ni_name; - tree n_iters; + struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); + tree niters_of_prolog_loop; tree wide_prolog_niters; struct loop *new_loop; - unsigned int th = 0; - int min_profitable_iters; int max_iter; + int bound = 0; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "=== vect_do_peeling_for_alignment ==="); + if (dump_enabled_p ()) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, + "loop peeled for vectorization to enhance" + " alignment\n"); initialize_original_copy_tables (); - ni_name = vect_build_loop_niters (loop_vinfo, NULL); + gimple_seq stmts = NULL; + gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, - ni_name); - - /* Get profitability threshold for vectorized loop. */ - min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); - th = conservative_cost_threshold (loop_vinfo, - min_profitable_iters); + ni_name, + &bound); /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ new_loop = - slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop), + slpeel_tree_peel_loop_to_edge (loop, scalar_loop, + loop_preheader_edge (loop), &niters_of_prolog_loop, ni_name, true, - th, true, NULL_TREE, NULL); + th, check_profitability, NULL_TREE, NULL, + bound, 0); gcc_assert (new_loop); #ifdef ENABLE_CHECKING slpeel_verify_cfg_after_peeling (new_loop, loop); #endif - max_iter = MAX (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1, (int) th); - record_niter_bound (new_loop, shwi_to_double_int (max_iter), false, true); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Setting upper bound of nb iterations for prologue " - "loop to %d\n", max_iter); + /* For vectorization factor N, we need to copy at most N-1 values + for alignment and this means N-2 loopback edge executions. */ + max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2; + if (check_profitability) + max_iter = MAX (max_iter, (int) th - 1); + record_niter_bound (new_loop, max_iter, false, true); + dump_printf (MSG_NOTE, + "Setting upper bound of nb iterations for prologue " + "loop to %d\n", max_iter); /* Update number of times loop executes. */ - n_iters = LOOP_VINFO_NITERS (loop_vinfo); LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, - TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); + TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop); + LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR, + TREE_TYPE (ni_name), + LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop); if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop))) wide_prolog_niters = niters_of_prolog_loop; @@ -2205,7 +2060,6 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo) edge pe = loop_preheader_edge (loop); tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop); tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters"); - add_referenced_var (var); wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false, var); if (seq) @@ -2257,17 +2111,17 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, gimple_seq *cond_expr_stmt_list) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - VEC(gimple,heap) *may_misalign_stmts + vec may_misalign_stmts = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); - gimple ref_stmt; + gimple *ref_stmt; int mask = LOOP_VINFO_PTR_MASK (loop_vinfo); tree mask_cst; unsigned int i; tree int_ptrsize_type; char tmp_name[20]; tree or_tmp_name = NULL_TREE; - tree and_tmp, and_tmp_name; - gimple and_stmt; + tree and_tmp_name; + gimple *and_stmt; tree ptrsize_zero; tree part_cond_expr; @@ -2280,19 +2134,19 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address of the first vector of the i'th data reference. */ - FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, ref_stmt) + FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt) { gimple_seq new_stmt_list = NULL; tree addr_base; - tree addr_tmp, addr_tmp_name; - tree or_tmp, new_or_tmp_name; - gimple addr_stmt, or_stmt; + tree addr_tmp_name; + tree new_or_tmp_name; + gimple *addr_stmt, *or_stmt; stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); bool negative = tree_int_cst_compare (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0; tree offset = negative - ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE; + ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node; /* create: addr_tmp = (int)(address_of_first_vector) */ addr_base = @@ -2301,13 +2155,9 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, if (new_stmt_list != NULL) gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list); - sprintf (tmp_name, "%s%d", "addr2int", i); - addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name); - add_referenced_var (addr_tmp); - addr_tmp_name = make_ssa_name (addr_tmp, NULL); - addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name, - addr_base, NULL_TREE); - SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt; + sprintf (tmp_name, "addr2int%d", i); + addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name); + addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base); gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt); /* The addresses are OR together. */ @@ -2315,14 +2165,10 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, if (or_tmp_name != NULL_TREE) { /* create: or_tmp = or_tmp | addr_tmp */ - sprintf (tmp_name, "%s%d", "orptrs", i); - or_tmp = create_tmp_var (int_ptrsize_type, tmp_name); - add_referenced_var (or_tmp); - new_or_tmp_name = make_ssa_name (or_tmp, NULL); - or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, - new_or_tmp_name, - or_tmp_name, addr_tmp_name); - SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt; + sprintf (tmp_name, "orptrs%d", i); + new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name); + or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR, + or_tmp_name, addr_tmp_name); gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt); or_tmp_name = new_or_tmp_name; } @@ -2334,13 +2180,10 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, mask_cst = build_int_cst (int_ptrsize_type, mask); /* create: and_tmp = or_tmp & mask */ - and_tmp = create_tmp_var (int_ptrsize_type, "andmask" ); - add_referenced_var (and_tmp); - and_tmp_name = make_ssa_name (and_tmp, NULL); + and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask"); - and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name, - or_tmp_name, mask_cst); - SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt; + and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR, + or_tmp_name, mask_cst); gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt); /* Make and_tmp the left operand of the conditional test against zero. @@ -2355,44 +2198,6 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, *cond_expr = part_cond_expr; } - -/* Function vect_vfa_segment_size. - - Create an expression that computes the size of segment - that will be accessed for a data reference. The functions takes into - account that realignment loads may access one more vector. - - Input: - DR: The data reference. - LENGTH_FACTOR: segment length to consider. - - Return an expression whose value is the size of segment which will be - accessed by DR. */ - -static tree -vect_vfa_segment_size (struct data_reference *dr, tree length_factor) -{ - tree segment_length; - - if (!compare_tree_int (DR_STEP (dr), 0)) - segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); - else - segment_length = size_binop (MULT_EXPR, - fold_convert (sizetype, DR_STEP (dr)), - fold_convert (sizetype, length_factor)); - - if (vect_supportable_dr_alignment (dr, false) - == dr_explicit_realign_optimized) - { - tree vector_size = TYPE_SIZE_UNIT - (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)))); - - segment_length = size_binop (PLUS_EXPR, segment_length, vector_size); - } - return segment_length; -} - - /* Function vect_create_cond_for_alias_checks. Create a conditional expression that represents the run-time checks for @@ -2401,34 +2206,24 @@ vect_vfa_segment_size (struct data_reference *dr, tree length_factor) Input: COND_EXPR - input conditional expression. New conditions will be chained - with logical AND operation. + with logical AND operation. If it is NULL, then the function + is used to return the number of alias checks. LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs to be checked. Output: COND_EXPR - conditional expression. - COND_EXPR_STMT_LIST - statements needed to construct the conditional - expression. - - The returned value is the conditional expression to be used in the if + The returned COND_EXPR is the conditional expression to be used in the if statement that controls which version of the loop gets executed at runtime. */ -static void -vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, - tree * cond_expr, - gimple_seq * cond_expr_stmt_list) +void +vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - VEC (ddr_p, heap) * may_alias_ddrs = - LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); - int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); - - ddr_p ddr; - unsigned int i; - tree part_cond_expr, length_factor; + vec comp_alias_ddrs = + LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); + tree part_cond_expr; /* Create expression ((store_ptr_0 + store_segment_length_0) <= load_ptr_0) @@ -2439,68 +2234,51 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, ((store_ptr_n + store_segment_length_n) <= load_ptr_n) || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */ - if (VEC_empty (ddr_p, may_alias_ddrs)) + if (comp_alias_ddrs.is_empty ()) return; - FOR_EACH_VEC_ELT (ddr_p, may_alias_ddrs, i, ddr) + for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i) { - struct data_reference *dr_a, *dr_b; - gimple dr_group_first_a, dr_group_first_b; - tree addr_base_a, addr_base_b; - tree segment_length_a, segment_length_b; - gimple stmt_a, stmt_b; - tree seg_a_min, seg_a_max, seg_b_min, seg_b_max; - - dr_a = DDR_A (ddr); - stmt_a = DR_STMT (DDR_A (ddr)); - dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a)); - if (dr_group_first_a) - { - stmt_a = dr_group_first_a; - dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a)); - } - - dr_b = DDR_B (ddr); - stmt_b = DR_STMT (DDR_B (ddr)); - dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b)); - if (dr_group_first_b) - { - stmt_b = dr_group_first_b; - dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b)); - } + const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first; + const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second; + tree segment_length_a = dr_a.seg_len; + tree segment_length_b = dr_b.seg_len; - addr_base_a = - vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list, - NULL_TREE, loop); - addr_base_b = - vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list, - NULL_TREE, loop); + tree addr_base_a + = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset); + tree addr_base_b + = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset); - if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)) - length_factor = scalar_loop_iters; - else - length_factor = size_int (vect_factor); - segment_length_a = vect_vfa_segment_size (dr_a, length_factor); - segment_length_b = vect_vfa_segment_size (dr_b, length_factor); - - if (vect_print_dump_info (REPORT_DR_DETAILS)) + if (dump_enabled_p ()) { - fprintf (vect_dump, - "create runtime check for data references "); - print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM); - fprintf (vect_dump, " and "); - print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM); + dump_printf_loc (MSG_NOTE, vect_location, + "create runtime check for data references "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr)); + dump_printf (MSG_NOTE, " and "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr)); + dump_printf (MSG_NOTE, "\n"); } - seg_a_min = addr_base_a; - seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a); - if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0) - seg_a_min = seg_a_max, seg_a_max = addr_base_a; + tree seg_a_min = addr_base_a; + tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a); + /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT + bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of + [a, a+12) */ + if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0) + { + tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr))); + seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size); + seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size); + } - seg_b_min = addr_base_b; - seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b); - if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0) - seg_b_min = seg_b_max, seg_b_max = addr_base_b; + tree seg_b_min = addr_base_b; + tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b); + if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0) + { + tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr))); + seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size); + seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size); + } part_cond_expr = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, @@ -2514,9 +2292,12 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, *cond_expr = part_cond_expr; } - if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)) - fprintf (vect_dump, "created %u versioning for alias checks.\n", - VEC_length (ddr_p, may_alias_ddrs)); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "created %u versioning for alias checks.\n", + comp_alias_ddrs.length ()); + + comp_alias_ddrs.release (); } @@ -2537,59 +2318,102 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, cost model initially. The versioning precondition(s) are placed in *COND_EXPR and - *COND_EXPR_STMT_LIST. If DO_VERSIONING is true versioning is - also performed, otherwise only the conditions are generated. */ + *COND_EXPR_STMT_LIST. */ void -vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning, - tree *cond_expr, gimple_seq *cond_expr_stmt_list) +vect_loop_versioning (loop_vec_info loop_vinfo, + unsigned int th, bool check_profitability) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); basic_block condition_bb; - gimple_stmt_iterator gsi, cond_exp_gsi; + gphi_iterator gsi; + gimple_stmt_iterator cond_exp_gsi; basic_block merge_bb; basic_block new_exit_bb; edge new_exit_e, e; - gimple orig_phi, new_phi; + gphi *orig_phi, *new_phi; + tree cond_expr = NULL_TREE; + gimple_seq cond_expr_stmt_list = NULL; tree arg; unsigned prob = 4 * REG_BR_PROB_BASE / 5; gimple_seq gimplify_stmt_list = NULL; tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); - int min_profitable_iters = 0; - unsigned int th; - - /* Get profitability threshold for vectorized loop. */ - min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); + bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); + bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); - th = conservative_cost_threshold (loop_vinfo, - min_profitable_iters); + if (check_profitability) + { + cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, + build_int_cst (TREE_TYPE (scalar_loop_iters), th)); + cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list, + is_gimple_condexpr, NULL_TREE); + } - *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, - build_int_cst (TREE_TYPE (scalar_loop_iters), th)); - *cond_expr = force_gimple_operand_1 (*cond_expr, cond_expr_stmt_list, - is_gimple_condexpr, NULL_TREE); + if (version_align) + vect_create_cond_for_align_checks (loop_vinfo, &cond_expr, + &cond_expr_stmt_list); - if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)) - vect_create_cond_for_align_checks (loop_vinfo, cond_expr, - cond_expr_stmt_list); + if (version_alias) + vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); - if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) - vect_create_cond_for_alias_checks (loop_vinfo, cond_expr, - cond_expr_stmt_list); + cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list, + is_gimple_condexpr, NULL_TREE); + gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); - *cond_expr = force_gimple_operand_1 (*cond_expr, &gimplify_stmt_list, - is_gimple_condexpr, NULL_TREE); - gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list); + initialize_original_copy_tables (); + if (scalar_loop) + { + edge scalar_e; + basic_block preheader, scalar_preheader; + + /* We don't want to scale SCALAR_LOOP's frequencies, we need to + scale LOOP's frequencies instead. */ + loop_version (scalar_loop, cond_expr, &condition_bb, + prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true); + scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE); + /* CONDITION_BB was created above SCALAR_LOOP's preheader, + while we need to move it above LOOP's preheader. */ + e = loop_preheader_edge (loop); + scalar_e = loop_preheader_edge (scalar_loop); + gcc_assert (empty_block_p (e->src) + && single_pred_p (e->src)); + gcc_assert (empty_block_p (scalar_e->src) + && single_pred_p (scalar_e->src)); + gcc_assert (single_pred_p (condition_bb)); + preheader = e->src; + scalar_preheader = scalar_e->src; + scalar_e = find_edge (condition_bb, scalar_preheader); + e = single_pred_edge (preheader); + redirect_edge_and_branch_force (single_pred_edge (condition_bb), + scalar_preheader); + redirect_edge_and_branch_force (scalar_e, preheader); + redirect_edge_and_branch_force (e, condition_bb); + set_immediate_dominator (CDI_DOMINATORS, condition_bb, + single_pred (condition_bb)); + set_immediate_dominator (CDI_DOMINATORS, scalar_preheader, + single_pred (scalar_preheader)); + set_immediate_dominator (CDI_DOMINATORS, preheader, + condition_bb); + } + else + loop_version (loop, cond_expr, &condition_bb, + prob, prob, REG_BR_PROB_BASE - prob, true); - /* If we only needed the extra conditions and a new loop copy - bail out here. */ - if (!do_versioning) - return; + if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION + && dump_enabled_p ()) + { + if (version_alias) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, + "loop versioned for vectorization because of " + "possible aliasing\n"); + if (version_align) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, + "loop versioned for vectorization to enhance " + "alignment\n"); - initialize_original_copy_tables (); - loop_version (loop, *cond_expr, &condition_bb, - prob, prob, REG_BR_PROB_BASE - prob, true); - free_original_copy_tables(); + } + free_original_copy_tables (); /* Loop versioning violates an assumption we try to maintain during vectorization - that the loop exit block has a single predecessor. @@ -2597,35 +2421,38 @@ vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning, basic block (i.e. it has two predecessors). Just in order to simplify following transformations in the vectorizer, we fix this situation here by adding a new (empty) block on the exit-edge of the loop, - with the proper loop-exit phis to maintain loop-closed-form. */ + with the proper loop-exit phis to maintain loop-closed-form. + If loop versioning wasn't done from loop, but scalar_loop instead, + merge_bb will have already just a single successor. */ merge_bb = single_exit (loop)->dest; - gcc_assert (EDGE_COUNT (merge_bb->preds) == 2); - new_exit_bb = split_edge (single_exit (loop)); - new_exit_e = single_exit (loop); - e = EDGE_SUCC (new_exit_bb, 0); - - for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2) { - orig_phi = gsi_stmt (gsi); - new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), - new_exit_bb); - arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); - add_phi_arg (new_phi, arg, new_exit_e, - gimple_phi_arg_location_from_edge (orig_phi, e)); - adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi)); + gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2); + new_exit_bb = split_edge (single_exit (loop)); + new_exit_e = single_exit (loop); + e = EDGE_SUCC (new_exit_bb, 0); + + for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + tree new_res; + orig_phi = gsi.phi (); + new_res = copy_ssa_name (PHI_RESULT (orig_phi)); + new_phi = create_phi_node (new_res, new_exit_bb); + arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); + add_phi_arg (new_phi, arg, new_exit_e, + gimple_phi_arg_location_from_edge (orig_phi, e)); + adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi)); + } } /* End loop-exit-fixes after versioning. */ - update_ssa (TODO_update_ssa); - if (*cond_expr_stmt_list) + if (cond_expr_stmt_list) { cond_exp_gsi = gsi_last_bb (condition_bb); - gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list, + gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, GSI_SAME_STMT); - *cond_expr_stmt_list = NULL; } - *cond_expr = NULL_TREE; + update_ssa (TODO_update_ssa); } -