re PR target/66274 (gcc6: x86 -m32 emits 64bit register in inline asm)
[gcc.git] / gcc / tree-vect-loop-manip.c
index cea8a1b8c524a9455e984a8cf6053fce2a625ee7..a344a49948b7f7e54b7d9d4c5c45492ebd3f130b 100644 (file)
@@ -1,6 +1,5 @@
 /* Vectorizer Specific Loop Manipulations
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012
-   Free Software Foundation, Inc.
+   Copyright (C) 2003-2015 Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com>
    and Ira Rosen <irar@il.ibm.com>
 
@@ -23,12 +22,45 @@ along with GCC; see the file COPYING3.  If not see
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "dumpfile.h"
 #include "tm.h"
-#include "ggc.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "vec.h"
+#include "double-int.h"
+#include "input.h"
+#include "alias.h"
+#include "symtab.h"
+#include "wide-int.h"
+#include "inchash.h"
 #include "tree.h"
+#include "fold-const.h"
+#include "predict.h"
+#include "hard-reg-set.h"
+#include "input.h"
+#include "function.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfganal.h"
 #include "basic-block.h"
 #include "gimple-pretty-print.h"
-#include "tree-flow.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimple-ssa.h"
+#include "tree-cfg.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-into-ssa.h"
+#include "tree-ssa.h"
 #include "tree-pass.h"
 #include "cfgloop.h"
 #include "diagnostic-core.h"
@@ -67,10 +99,9 @@ rename_use_op (use_operand_p op_p)
 
 /* Renames the variables in basic block BB.  */
 
-void
+static void
 rename_variables_in_bb (basic_block bb)
 {
-  gimple_stmt_iterator gsi;
   gimple stmt;
   use_operand_p use_p;
   ssa_op_iter iter;
@@ -78,56 +109,38 @@ rename_variables_in_bb (basic_block bb)
   edge_iterator ei;
   struct loop *loop = bb->loop_father;
 
-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+       gsi_next (&gsi))
     {
       stmt = gsi_stmt (gsi);
       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
        rename_use_op (use_p);
     }
 
-  FOR_EACH_EDGE (e, ei, bb->succs)
+  FOR_EACH_EDGE (e, ei, bb->preds)
     {
-      if (!flow_bb_inside_loop_p (loop, e->dest))
+      if (!flow_bb_inside_loop_p (loop, e->src))
        continue;
-      for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
-        rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
+      for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
+          gsi_next (&gsi))
+        rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
     }
 }
 
 
-/* Renames variables in new generated LOOP.  */
-
-void
-rename_variables_in_loop (struct loop *loop)
-{
-  unsigned i;
-  basic_block *bbs;
-
-  bbs = get_loop_body (loop);
-
-  for (i = 0; i < loop->num_nodes; i++)
-    rename_variables_in_bb (bbs[i]);
-
-  free (bbs);
-}
-
 typedef struct
 {
   tree from, to;
   basic_block bb;
 } adjust_info;
 
-DEF_VEC_O(adjust_info);
-DEF_VEC_ALLOC_O_STACK(adjust_info);
-#define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
-
 /* A stack of values to be adjusted in debug stmts.  We have to
    process them LIFO, so that the closest substitution applies.  If we
    processed them FIFO, without the stack, we might substitute uses
    with a PHI DEF that would soon become non-dominant, and when we got
    to the suitable one, it wouldn't have anything to substitute any
    more.  */
-static VEC(adjust_info, stack) *adjust_vec;
+static vec<adjust_info, va_heap> adjust_vec;
 
 /* Adjust any debug stmts that referenced AI->from values to use the
    loop-closed AI->to, if the references are dominated by AI->bb and
@@ -184,15 +197,15 @@ adjust_vec_debug_stmts (void)
   if (!MAY_HAVE_DEBUG_STMTS)
     return;
 
-  gcc_assert (adjust_vec);
+  gcc_assert (adjust_vec.exists ());
 
-  while (!VEC_empty (adjust_info, adjust_vec))
+  while (!adjust_vec.is_empty ())
     {
-      adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
-      VEC_pop (adjust_info, adjust_vec);
+      adjust_debug_stmts_now (&adjust_vec.last ());
+      adjust_vec.pop ();
     }
 
-  VEC_free (adjust_info, stack, adjust_vec);
+  adjust_vec.release ();
 }
 
 /* Adjust any debug stmts that referenced FROM values to use the
@@ -205,15 +218,17 @@ adjust_debug_stmts (tree from, tree to, basic_block bb)
 {
   adjust_info ai;
 
-  if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
-      && SSA_NAME_VAR (from) != gimple_vop (cfun))
+  if (MAY_HAVE_DEBUG_STMTS
+      && TREE_CODE (from) == SSA_NAME
+      && ! SSA_NAME_IS_DEFAULT_DEF (from)
+      && ! virtual_operand_p (from))
     {
       ai.from = from;
       ai.to = to;
       ai.bb = bb;
 
-      if (adjust_vec)
-       VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
+      if (adjust_vec.exists ())
+       adjust_vec.safe_push (ai);
       else
        adjust_debug_stmts_now (&ai);
     }
@@ -237,101 +252,6 @@ adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 }
 
 
-/* Update the PHI nodes of NEW_LOOP.
-
-   NEW_LOOP is a duplicate of ORIG_LOOP.
-   AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
-   AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
-   executes before it.  */
-
-static void
-slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
-                                      struct loop *new_loop, bool after)
-{
-  tree new_ssa_name;
-  gimple phi_new, phi_orig;
-  tree def;
-  edge orig_loop_latch = loop_latch_edge (orig_loop);
-  edge orig_entry_e = loop_preheader_edge (orig_loop);
-  edge new_loop_exit_e = single_exit (new_loop);
-  edge new_loop_entry_e = loop_preheader_edge (new_loop);
-  edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
-  gimple_stmt_iterator gsi_new, gsi_orig;
-
-  /*
-     step 1. For each loop-header-phi:
-             Add the first phi argument for the phi in NEW_LOOP
-            (the one associated with the entry of NEW_LOOP)
-
-     step 2. For each loop-header-phi:
-             Add the second phi argument for the phi in NEW_LOOP
-            (the one associated with the latch of NEW_LOOP)
-
-     step 3. Update the phis in the successor block of NEW_LOOP.
-
-        case 1: NEW_LOOP was placed before ORIG_LOOP:
-                The successor block of NEW_LOOP is the header of ORIG_LOOP.
-                Updating the phis in the successor block can therefore be done
-                along with the scanning of the loop header phis, because the
-                header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
-                phi nodes, organized in the same order.
-
-        case 2: NEW_LOOP was placed after ORIG_LOOP:
-                The successor block of NEW_LOOP is the original exit block of
-                ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
-                We postpone updating these phis to a later stage (when
-                loop guards are added).
-   */
-
-
-  /* Scan the phis in the headers of the old and new loops
-     (they are organized in exactly the same order).  */
-
-  for (gsi_new = gsi_start_phis (new_loop->header),
-       gsi_orig = gsi_start_phis (orig_loop->header);
-       !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
-       gsi_next (&gsi_new), gsi_next (&gsi_orig))
-    {
-      source_location locus;
-      phi_new = gsi_stmt (gsi_new);
-      phi_orig = gsi_stmt (gsi_orig);
-
-      /* step 1.  */
-      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
-      locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
-      add_phi_arg (phi_new, def, new_loop_entry_e, locus);
-
-      /* step 2.  */
-      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
-      locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
-      if (TREE_CODE (def) != SSA_NAME)
-        continue;
-
-      new_ssa_name = get_current_def (def);
-      if (!new_ssa_name)
-       {
-         /* This only happens if there are no definitions
-            inside the loop. use the phi_result in this case.  */
-         new_ssa_name = PHI_RESULT (phi_new);
-       }
-
-      /* An ordinary ssa name defined in the loop.  */
-      add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
-
-      /* Drop any debug references outside the loop, if they would
-        become ill-formed SSA.  */
-      adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
-
-      /* step 3 (case 1).  */
-      if (!after)
-        {
-          gcc_assert (new_loop_exit_e == orig_entry_e);
-         adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
-        }
-    }
-}
-
-
 /* Update PHI nodes for a guard of the LOOP.
 
    Input:
@@ -489,8 +409,8 @@ static void
 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
                                     bool is_new_loop, basic_block *new_exit_bb)
 {
-  gimple orig_phi, new_phi;
-  gimple update_phi, update_phi2;
+  gphi *orig_phi, *new_phi;
+  gphi *update_phi, *update_phi2;
   tree guard_arg, loop_arg;
   basic_block new_merge_bb = guard_edge->dest;
   edge e = EDGE_SUCC (new_merge_bb, 0);
@@ -498,7 +418,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
   basic_block orig_bb = loop->header;
   edge new_exit_e;
   tree current_new_name;
-  gimple_stmt_iterator gsi_orig, gsi_update;
+  gphi_iterator gsi_orig, gsi_update;
 
   /* Create new bb between loop and new_merge_bb.  */
   *new_exit_bb = split_edge (single_exit (loop));
@@ -512,13 +432,13 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
     {
       source_location loop_locus, guard_locus;
       tree new_res;
-      orig_phi = gsi_stmt (gsi_orig);
-      update_phi = gsi_stmt (gsi_update);
+      orig_phi = gsi_orig.phi ();
+      update_phi = gsi_update.phi ();
 
       /** 1. Handle new-merge-point phis  **/
 
       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
-      new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
+      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
       new_phi = create_phi_node (new_res, new_merge_bb);
 
       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
@@ -544,11 +464,11 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 
       /** 2. Handle loop-closed-ssa-form phis  **/
 
-      if (!is_gimple_reg (PHI_RESULT (orig_phi)))
+      if (virtual_operand_p (PHI_RESULT (orig_phi)))
        continue;
 
       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
-      new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
+      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
       new_phi = create_phi_node (new_res, *new_exit_bb);
 
       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
@@ -581,7 +501,18 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
          if (!current_new_name)
            continue;
         }
-      gcc_assert (get_current_def (current_new_name) == NULL_TREE);
+      tree new_name = get_current_def (current_new_name);
+      /* Because of peeled_chrec optimization it is possible that we have
+        set this earlier.  Verify the PHI has the same value.  */
+      if (new_name)
+       {
+         gimple phi = SSA_NAME_DEF_STMT (new_name);
+         gcc_assert (gimple_code (phi) == GIMPLE_PHI
+                     && gimple_bb (phi) == *new_exit_bb
+                     && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
+                         == loop_arg));
+         continue;
+       }
 
       set_current_def (current_new_name, PHI_RESULT (new_phi));
     }
@@ -618,8 +549,8 @@ static void
 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
                                     bool is_new_loop, basic_block *new_exit_bb)
 {
-  gimple orig_phi, new_phi;
-  gimple update_phi, update_phi2;
+  gphi *orig_phi, *new_phi;
+  gphi *update_phi, *update_phi2;
   tree guard_arg, loop_arg;
   basic_block new_merge_bb = guard_edge->dest;
   edge e = EDGE_SUCC (new_merge_bb, 0);
@@ -628,7 +559,7 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
   tree orig_def, orig_def_new_name;
   tree new_name, new_name2;
   tree arg;
-  gimple_stmt_iterator gsi;
+  gphi_iterator gsi;
 
   /* Create new bb between loop and new_merge_bb.  */
   *new_exit_bb = split_edge (single_exit (loop));
@@ -638,7 +569,7 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
     {
       tree new_res;
-      update_phi = gsi_stmt (gsi);
+      update_phi = gsi.phi ();
       orig_phi = update_phi;
       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
       /* This loop-closed-phi actually doesn't represent a use
@@ -651,7 +582,7 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
       /** 1. Handle new-merge-point phis  **/
 
       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
-      new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
+      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
       new_phi = create_phi_node (new_res, new_merge_bb);
 
       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
@@ -693,7 +624,7 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
       /** 2. Handle loop-closed-ssa-form phis  **/
 
       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
-      new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
+      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
       new_phi = create_phi_node (new_res, *new_exit_bb);
 
       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
@@ -728,7 +659,7 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
       arg = guard_arg;
 
       /* 3.2. Generate new phi node in GUARD_BB:  */
-      new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
+      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
       new_phi = create_phi_node (new_res, guard_edge->src);
 
       /* 3.3. GUARD_BB has one incoming edge:  */
@@ -754,15 +685,15 @@ void
 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 {
   tree indx_before_incr, indx_after_incr;
-  gimple cond_stmt;
-  gimple orig_cond;
+  gcond *cond_stmt;
+  gcond *orig_cond;
   edge exit_edge = single_exit (loop);
   gimple_stmt_iterator loop_cond_gsi;
   gimple_stmt_iterator incr_gsi;
   bool insert_after;
   tree init = build_int_cst (TREE_TYPE (niters), 0);
   tree step = build_int_cst (TREE_TYPE (niters), 1);
-  LOC loop_loc;
+  source_location loop_loc;
   enum tree_code code;
 
   orig_cond = get_loop_exit_condition (loop);
@@ -787,136 +718,218 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 
   /* Remove old loop exit test:  */
   gsi_remove (&loop_cond_gsi, true);
+  free_stmt_vec_info (orig_cond);
 
   loop_loc = find_loop_location (loop);
-  if (dump_file && (dump_flags & TDF_DETAILS))
+  if (dump_enabled_p ())
     {
-      if (loop_loc != UNKNOWN_LOC)
-        fprintf (dump_file, "\nloop at %s:%d: ",
-                 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
-      print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
+      if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
+       dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
+                    LOCATION_LINE (loop_loc));
+      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
+      dump_printf (MSG_NOTE, "\n");
     }
-
   loop->nb_iterations = niters;
 }
 
+/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
+   For all PHI arguments in FROM->dest and TO->dest from those
+   edges ensure that TO->dest PHI arguments have current_def
+   to that in from.  */
+
+static void
+slpeel_duplicate_current_defs_from_edges (edge from, edge to)
+{
+  gimple_stmt_iterator gsi_from, gsi_to;
+
+  for (gsi_from = gsi_start_phis (from->dest),
+       gsi_to = gsi_start_phis (to->dest);
+       !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
+       gsi_next (&gsi_from), gsi_next (&gsi_to))
+    {
+      gimple from_phi = gsi_stmt (gsi_from);
+      gimple to_phi = gsi_stmt (gsi_to);
+      tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
+      tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
+      if (TREE_CODE (from_arg) == SSA_NAME
+         && TREE_CODE (to_arg) == SSA_NAME
+         && get_current_def (to_arg) == NULL_TREE)
+       set_current_def (to_arg, get_current_def (from_arg));
+    }
+}
+
 
 /* Given LOOP this function generates a new copy of it and puts it
-   on E which is either the entry or exit of LOOP.  */
+   on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
+   non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
+   basic blocks from SCALAR_LOOP instead of LOOP, but to either the
+   entry or exit of LOOP.  */
 
 struct loop *
-slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
+slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
+                                       struct loop *scalar_loop, edge e)
 {
   struct loop *new_loop;
   basic_block *new_bbs, *bbs;
   bool at_exit;
   bool was_imm_dom;
   basic_block exit_dest;
-  gimple phi;
-  tree phi_arg;
   edge exit, new_exit;
-  gimple_stmt_iterator gsi;
 
-  at_exit = (e == single_exit (loop));
+  exit = single_exit (loop);
+  at_exit = (e == exit);
   if (!at_exit && e != loop_preheader_edge (loop))
     return NULL;
 
-  bbs = get_loop_body (loop);
+  if (scalar_loop == NULL)
+    scalar_loop = loop;
+
+  bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
+  get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
 
   /* Check whether duplication is possible.  */
-  if (!can_copy_bbs_p (bbs, loop->num_nodes))
+  if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
     {
       free (bbs);
       return NULL;
     }
 
   /* Generate new loop structure.  */
-  new_loop = duplicate_loop (loop, loop_outer (loop));
-  if (!new_loop)
-    {
-      free (bbs);
-      return NULL;
-    }
+  new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
+  duplicate_subloops (scalar_loop, new_loop);
 
-  exit_dest = single_exit (loop)->dest;
+  exit_dest = exit->dest;
   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
                                          exit_dest) == loop->header ?
                 true : false);
 
-  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
+  /* Also copy the pre-header, this avoids jumping through hoops to
+     duplicate the loop entry PHI arguments.  Create an empty
+     pre-header unconditionally for this.  */
+  basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
+  edge entry_e = single_pred_edge (preheader);
+  bbs[scalar_loop->num_nodes] = preheader;
+  new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 
-  exit = single_exit (loop);
-  copy_bbs (bbs, loop->num_nodes, new_bbs,
+  exit = single_exit (scalar_loop);
+  copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
            &exit, 1, &new_exit, NULL,
-           e->src);
-
-  /* Duplicating phi args at exit bbs as coming
-     also from exit of duplicated loop.  */
-  for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
-    {
-      phi = gsi_stmt (gsi);
-      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
-      if (phi_arg)
-       {
-         edge new_loop_exit_edge;
-         source_location locus;
+           e->src, true);
+  exit = single_exit (loop);
+  basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
 
-         locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
-         if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
-           new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
-         else
-           new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
+  add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 
-         add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
-       }
+  if (scalar_loop != loop)
+    {
+      /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
+        SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
+        but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
+        the LOOP SSA_NAMEs (on the exit edge and edge from latch to
+        header) to have current_def set, so copy them over.  */
+      slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
+                                               exit);
+      slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
+                                                          0),
+                                               EDGE_SUCC (loop->latch, 0));
     }
 
   if (at_exit) /* Add the loop copy at exit.  */
     {
-      redirect_edge_and_branch_force (e, new_loop->header);
-      PENDING_STMT (e) = NULL;
-      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
+      if (scalar_loop != loop)
+       {
+         gphi_iterator gsi;
+         new_exit = redirect_edge_and_branch (new_exit, exit_dest);
+
+         for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
+              gsi_next (&gsi))
+           {
+             gphi *phi = gsi.phi ();
+             tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
+             location_t orig_locus
+               = gimple_phi_arg_location_from_edge (phi, e);
+
+             add_phi_arg (phi, orig_arg, new_exit, orig_locus);
+           }
+       }
+      redirect_edge_and_branch_force (e, new_preheader);
+      flush_pending_stmts (e);
+      set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
       if (was_imm_dom)
-       set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
+       set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
+
+      /* And remove the non-necessary forwarder again.  Keep the other
+         one so we have a proper pre-header for the loop at the exit edge.  */
+      redirect_edge_pred (single_succ_edge (preheader),
+                         single_pred (preheader));
+      delete_basic_block (preheader);
+      set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
+                              loop_preheader_edge (scalar_loop)->src);
     }
   else /* Add the copy at entry.  */
     {
-      edge new_exit_e;
-      edge entry_e = loop_preheader_edge (loop);
-      basic_block preheader = entry_e->src;
-
-      if (!flow_bb_inside_loop_p (new_loop,
-                                 EDGE_SUCC (new_loop->header, 0)->dest))
-        new_exit_e = EDGE_SUCC (new_loop->header, 0);
-      else
-       new_exit_e = EDGE_SUCC (new_loop->header, 1);
-
-      redirect_edge_and_branch_force (new_exit_e, loop->header);
-      PENDING_STMT (new_exit_e) = NULL;
-      set_immediate_dominator (CDI_DOMINATORS, loop->header,
-                              new_exit_e->src);
-
-      /* We have to add phi args to the loop->header here as coming
-        from new_exit_e edge.  */
-      for (gsi = gsi_start_phis (loop->header);
-           !gsi_end_p (gsi);
-           gsi_next (&gsi))
+      if (scalar_loop != loop)
        {
-         phi = gsi_stmt (gsi);
-         phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
-         if (phi_arg)
-           add_phi_arg (phi, phi_arg, new_exit_e,
-                        gimple_phi_arg_location_from_edge (phi, entry_e));
+         /* Remove the non-necessary forwarder of scalar_loop again.  */
+         redirect_edge_pred (single_succ_edge (preheader),
+                             single_pred (preheader));
+         delete_basic_block (preheader);
+         set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
+                                  loop_preheader_edge (scalar_loop)->src);
+         preheader = split_edge (loop_preheader_edge (loop));
+         entry_e = single_pred_edge (preheader);
        }
 
-      redirect_edge_and_branch_force (entry_e, new_loop->header);
-      PENDING_STMT (entry_e) = NULL;
-      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
+      redirect_edge_and_branch_force (entry_e, new_preheader);
+      flush_pending_stmts (entry_e);
+      set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
+
+      redirect_edge_and_branch_force (new_exit, preheader);
+      flush_pending_stmts (new_exit);
+      set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
+
+      /* And remove the non-necessary forwarder again.  Keep the other
+         one so we have a proper pre-header for the loop at the exit edge.  */
+      redirect_edge_pred (single_succ_edge (new_preheader),
+                         single_pred (new_preheader));
+      delete_basic_block (new_preheader);
+      set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
+                              loop_preheader_edge (new_loop)->src);
+    }
+
+  for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
+    rename_variables_in_bb (new_bbs[i]);
+
+  if (scalar_loop != loop)
+    {
+      /* Update new_loop->header PHIs, so that on the preheader
+        edge they are the ones from loop rather than scalar_loop.  */
+      gphi_iterator gsi_orig, gsi_new;
+      edge orig_e = loop_preheader_edge (loop);
+      edge new_e = loop_preheader_edge (new_loop);
+
+      for (gsi_orig = gsi_start_phis (loop->header),
+          gsi_new = gsi_start_phis (new_loop->header);
+          !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
+          gsi_next (&gsi_orig), gsi_next (&gsi_new))
+       {
+         gphi *orig_phi = gsi_orig.phi ();
+         gphi *new_phi = gsi_new.phi ();
+         tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
+         location_t orig_locus
+           = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
+
+         add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
+       }
     }
 
   free (new_bbs);
   free (bbs);
 
+#ifdef ENABLE_CHECKING
+  verify_dominators (CDI_DOMINATORS);
+#endif
+
   return new_loop;
 }
 
@@ -929,11 +942,12 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 static edge
 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
                       gimple_seq cond_expr_stmt_list,
-                      basic_block exit_bb, basic_block dom_bb)
+                      basic_block exit_bb, basic_block dom_bb,
+                      int probability)
 {
   gimple_stmt_iterator gsi;
   edge new_e, enter_e;
-  gimple cond_stmt;
+  gcond *cond_stmt;
   gimple_seq gimplify_stmt_list = NULL;
 
   enter_e = EDGE_SUCC (guard_bb, 0);
@@ -954,6 +968,12 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 
   /* Add new edge to connect guard block to the merge/loop-exit block.  */
   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
+
+  new_e->count = guard_bb->count;
+  new_e->probability = probability;
+  new_e->count = apply_probability (enter_e->count, probability);
+  enter_e->count -= new_e->count;
+  enter_e->probability = inverse_probability (probability);
   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
   return new_e;
 }
@@ -972,12 +992,9 @@ slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 {
   edge exit_e = single_exit (loop);
   edge entry_e = loop_preheader_edge (loop);
-  gimple orig_cond = get_loop_exit_condition (loop);
+  gcond *orig_cond = get_loop_exit_condition (loop);
   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 
-  if (need_ssa_update_p (cfun))
-    return false;
-
   if (loop->inner
       /* All loops have an outer scope; the only case loop->outer is NULL is for
          the function itself.  */
@@ -1036,25 +1053,26 @@ static void
 set_prologue_iterations (basic_block bb_before_first_loop,
                         tree *first_niters,
                         struct loop *loop,
-                        unsigned int th)
+                        unsigned int th,
+                        int probability)
 {
   edge e;
   basic_block cond_bb, then_bb;
   tree var, prologue_after_cost_adjust_name;
   gimple_stmt_iterator gsi;
-  gimple newphi;
+  gphi *newphi;
   edge e_true, e_false, e_fallthru;
-  gimple cond_stmt;
+  gcond *cond_stmt;
   gimple_seq stmts = NULL;
   tree cost_pre_condition = NULL_TREE;
   tree scalar_loop_iters =
     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
 
   e = single_pred_edge (bb_before_first_loop);
-  cond_bb = split_edge(e);
+  cond_bb = split_edge (e);
 
   e = single_pred_edge (bb_before_first_loop);
-  then_bb = split_edge(e);
+  then_bb = split_edge (e);
   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
 
   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
@@ -1065,7 +1083,15 @@ set_prologue_iterations (basic_block bb_before_first_loop,
   e_true->flags &= ~EDGE_FALLTHRU;
   e_true->flags |= EDGE_TRUE_VALUE;
 
+  e_true->probability = probability;
+  e_false->probability = inverse_probability (probability);
+  e_true->count = apply_probability (cond_bb->count, probability);
+  e_false->count = cond_bb->count - e_true->count;
+  then_bb->frequency = EDGE_FREQUENCY (e_true);
+  then_bb->count = e_true->count;
+
   e_fallthru = EDGE_SUCC (then_bb, 0);
+  e_fallthru->count = then_bb->count;
 
   gsi = gsi_last_bb (cond_bb);
   cost_pre_condition =
@@ -1107,6 +1133,8 @@ set_prologue_iterations (basic_block bb_before_first_loop,
 
    Input:
    - LOOP: the loop to be peeled.
+   - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
+       should be copied.
    - E: the exit or entry edge of LOOP.
         If it is the entry edge, we peel the first iterations of LOOP. In this
         case first-loop is LOOP, and second-loop is the newly created loop.
@@ -1124,6 +1152,8 @@ set_prologue_iterations (basic_block bb_before_first_loop,
                          prologue generation or whether cost model check
                          has not occurred during prologue generation and hence
                          needs to occur during epilogue generation.
+   - BOUND1 is the upper bound on number of iterations of the first loop (if known)
+   - BOUND2 is the upper bound on number of iterations of the second loop (if known)
 
 
    Output:
@@ -1146,12 +1176,13 @@ set_prologue_iterations (basic_block bb_before_first_loop,
    FORNOW the resulting code will not be in loop-closed-ssa form.
 */
 
-static struct loop*
-slpeel_tree_peel_loop_to_edge (struct loop *loop,
+static struct loop *
+slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
                               edge e, tree *first_niters,
                               tree niters, bool update_first_loop_count,
                               unsigned int th, bool check_profitability,
-                              tree cond_expr, gimple_seq cond_expr_stmt_list)
+                              tree cond_expr, gimple_seq cond_expr_stmt_list,
+                              int bound1, int bound2)
 {
   struct loop *new_loop = NULL, *first_loop, *second_loop;
   edge skip_e;
@@ -1160,14 +1191,29 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   basic_block bb_before_first_loop;
   basic_block bb_between_loops;
   basic_block new_exit_bb;
-  gimple_stmt_iterator gsi;
+  gphi_iterator gsi;
   edge exit_e = single_exit (loop);
-  LOC loop_loc;
-  tree cost_pre_condition = NULL_TREE;
+  source_location loop_loc;
+  /* There are many aspects to how likely the first loop is going to be executed.
+     Without histogram we can't really do good job.  Simply set it to
+     2/3, so the first loop is not reordered to the end of function and
+     the hot path through stays short.  */
+  int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
+  int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
+  int probability_of_second_loop;
 
   if (!slpeel_can_duplicate_loop_p (loop, e))
     return NULL;
 
+  /* We might have a queued need to update virtual SSA form.  As we
+     delete the update SSA machinery below after doing a regular
+     incremental SSA update during loop copying make sure we don't
+     lose that fact.
+     ???  Needing to update virtual SSA form by renaming is unfortunate
+     but not all of the vectorizer code inserting new loads / stores
+     properly assigns virtual operands to those statements.  */
+  update_ssa (TODO_update_ssa_only_virtuals);
   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
      in the exit bb and rename all the uses after the loop.  This simplifies
      the *guard[12] routines, which assume loop closed SSA form for all PHIs
@@ -1175,17 +1221,17 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
      in the same form).  Doing this early simplifies the checking what
      uses should be renamed.  */
   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
-    if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
+    if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
       {
-       gimple phi = gsi_stmt (gsi);
+       gphi *phi = gsi.phi ();
        for (gsi = gsi_start_phis (exit_e->dest);
             !gsi_end_p (gsi); gsi_next (&gsi))
-         if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
+         if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
            break;
        if (gsi_end_p (gsi))
          {
-           tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
-           gimple new_phi = create_phi_node (new_vop, exit_e->dest);
+           tree new_vop = copy_ssa_name (PHI_RESULT (phi));
+           gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
            tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
            imm_use_iterator imm_iter;
            gimple stmt;
@@ -1215,23 +1261,19 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
         orig_exit_bb:
    */
 
-  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
+  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
+                                                          e)))
     {
       loop_loc = find_loop_location (loop);
-      if (dump_file && (dump_flags & TDF_DETAILS))
-        {
-          if (loop_loc != UNKNOWN_LOC)
-            fprintf (dump_file, "\n%s:%d: note: ",
-                     LOC_FILE (loop_loc), LOC_LINE (loop_loc));
-          fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
-        }
+      dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
+                       "tree_duplicate_loop_to_edge_cfg failed.\n");
       return NULL;
     }
 
   if (MAY_HAVE_DEBUG_STMTS)
     {
-      gcc_assert (!adjust_vec);
-      adjust_vec = VEC_alloc (adjust_info, stack, 32);
+      gcc_assert (!adjust_vec.exists ());
+      adjust_vec.create (32);
     }
 
   if (e == exit_e)
@@ -1247,10 +1289,6 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
       second_loop = loop;
     }
 
-  slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
-  rename_variables_in_loop (new_loop);
-
-
   /* 2.  Add the guard code in one of the following ways:
 
      2.a Add the guard that controls whether the first loop is executed.
@@ -1337,26 +1375,38 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   */
 
   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
-  bb_before_second_loop = split_edge (single_exit (first_loop));
+  /* Loop copying insterted a forwarder block for us here.  */
+  bb_before_second_loop = single_exit (first_loop)->dest;
+
+  probability_of_second_loop = (inverse_probability (first_guard_probability)
+                               + combine_probabilities (second_guard_probability,
+                                                         first_guard_probability));
+  /* Theoretically preheader edge of first loop and exit edge should have
+     same frequencies.  Loop exit probablities are however easy to get wrong.
+     It is safer to copy value from original loop entry.  */
+  bb_before_second_loop->frequency
+     = combine_probabilities (bb_before_first_loop->frequency,
+                              probability_of_second_loop);
+  bb_before_second_loop->count
+     = apply_probability (bb_before_first_loop->count,
+                         probability_of_second_loop);
+  single_succ_edge (bb_before_second_loop)->count
+     = bb_before_second_loop->count;
 
   /* Epilogue peeling.  */
   if (!update_first_loop_count)
     {
+      loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
+      tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
+      unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
+      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+       limit = limit + 1;
+      if (check_profitability
+         && th > limit)
+       limit = th;
       pre_condition =
-       fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
-                    build_int_cst (TREE_TYPE (*first_niters), 0));
-      if (check_profitability)
-       {
-         tree scalar_loop_iters
-           = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
-                                       (loop_vec_info_for_loop (loop)));
-         cost_pre_condition =
-           fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
-                        build_int_cst (TREE_TYPE (scalar_loop_iters), th));
-
-         pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-                                      cost_pre_condition, pre_condition);
-       }
+       fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
+                    build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
       if (cond_expr)
        {
          pre_condition =
@@ -1372,7 +1422,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
     {
       if (check_profitability)
        set_prologue_iterations (bb_before_first_loop, first_niters,
-                                loop, th);
+                                loop, th, first_guard_probability);
 
       pre_condition =
        fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
@@ -1381,7 +1431,10 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
 
   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
                                  cond_expr_stmt_list,
-                                  bb_before_second_loop, bb_before_first_loop);
+                                  bb_before_second_loop, bb_before_first_loop,
+                                 inverse_probability (first_guard_probability));
+  scale_loop_profile (first_loop, first_guard_probability,
+                     check_profitability && (int)th > bound1 ? th : bound1);
   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
                                      first_loop == new_loop,
                                      &new_exit_bb);
@@ -1419,7 +1472,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   pre_condition =
        fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
-                                  bb_after_second_loop, bb_before_first_loop);
+                                  bb_after_second_loop, bb_before_first_loop,
+                                 inverse_probability (second_guard_probability));
+  scale_loop_profile (second_loop, probability_of_second_loop, bound2);
   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
                                      second_loop == new_loop, &new_exit_bb);
 
@@ -1442,7 +1497,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
    location is calculated.
    Return the loop location if succeed and NULL if not.  */
 
-LOC
+source_location
 find_loop_location (struct loop *loop)
 {
   gimple stmt = NULL;
@@ -1450,177 +1505,33 @@ find_loop_location (struct loop *loop)
   gimple_stmt_iterator si;
 
   if (!loop)
-    return UNKNOWN_LOC;
+    return UNKNOWN_LOCATION;
 
   stmt = get_loop_exit_condition (loop);
 
-  if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
+  if (stmt
+      && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
     return gimple_location (stmt);
 
   /* If we got here the loop is probably not "well formed",
      try to estimate the loop location */
 
   if (!loop->header)
-    return UNKNOWN_LOC;
+    return UNKNOWN_LOCATION;
 
   bb = loop->header;
 
   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
     {
       stmt = gsi_stmt (si);
-      if (gimple_location (stmt) != UNKNOWN_LOC)
+      if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
         return gimple_location (stmt);
     }
 
-  return UNKNOWN_LOC;
-}
-
-
-/* This function builds ni_name = number of iterations loop executes
-   on the loop preheader.  If SEQ is given the stmt is instead emitted
-   there.  */
-
-static tree
-vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
-{
-  tree ni_name, var;
-  gimple_seq stmts = NULL;
-  edge pe;
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
-
-  var = create_tmp_var (TREE_TYPE (ni), "niters");
-  ni_name = force_gimple_operand (ni, &stmts, false, var);
-
-  pe = loop_preheader_edge (loop);
-  if (stmts)
-    {
-      if (seq)
-       gimple_seq_add_seq (&seq, stmts);
-      else
-       {
-         basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
-         gcc_assert (!new_bb);
-       }
-    }
-
-  return ni_name;
+  return UNKNOWN_LOCATION;
 }
 
 
-/* This function generates the following statements:
-
- ni_name = number of iterations loop executes
- ratio = ni_name / vf
- ratio_mult_vf_name = ratio * vf
-
- and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
- if that is non-NULL.  */
-
-static void
-vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
-                                tree *ni_name_ptr,
-                                tree *ratio_mult_vf_name_ptr,
-                                tree *ratio_name_ptr,
-                                gimple_seq cond_expr_stmt_list)
-{
-
-  edge pe;
-  basic_block new_bb;
-  gimple_seq stmts;
-  tree ni_name, ni_minus_gap_name;
-  tree var;
-  tree ratio_name;
-  tree ratio_mult_vf_name;
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
-  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  tree log_vf;
-
-  pe = loop_preheader_edge (loop);
-
-  /* Generate temporary variable that contains
-     number of iterations loop executes.  */
-
-  ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
-  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
-
-  /* If epilogue loop is required because of data accesses with gaps, we
-     subtract one iteration from the total number of iterations here for
-     correct calculation of RATIO.  */
-  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
-    {
-      ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
-                                      ni_name,
-                                      build_one_cst (TREE_TYPE (ni_name)));
-      if (!is_gimple_val (ni_minus_gap_name))
-       {
-         var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
-
-          stmts = NULL;
-          ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
-                                                   true, var);
-          if (cond_expr_stmt_list)
-            gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
-          else
-            {
-              pe = loop_preheader_edge (loop);
-              new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
-              gcc_assert (!new_bb);
-            }
-        }
-    }
-  else
-    ni_minus_gap_name = ni_name;
-
-  /* Create: ratio = ni >> log2(vf) */
-
-  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
-                           ni_minus_gap_name, log_vf);
-  if (!is_gimple_val (ratio_name))
-    {
-      var = create_tmp_var (TREE_TYPE (ni), "bnd");
-
-      stmts = NULL;
-      ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
-      if (cond_expr_stmt_list)
-       gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
-      else
-       {
-         pe = loop_preheader_edge (loop);
-         new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
-         gcc_assert (!new_bb);
-       }
-    }
-
-  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
-
-  ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
-                                   ratio_name, log_vf);
-  if (!is_gimple_val (ratio_mult_vf_name))
-    {
-      var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
-
-      stmts = NULL;
-      ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
-                                                true, var);
-      if (cond_expr_stmt_list)
-       gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
-      else
-       {
-         pe = loop_preheader_edge (loop);
-         new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
-         gcc_assert (!new_bb);
-       }
-    }
-
-  *ni_name_ptr = ni_name;
-  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
-  *ratio_name_ptr = ratio_name;
-
-  return;
-}
-
 /* Function vect_can_advance_ivs_p
 
    In case the number of iterations that LOOP iterates is unknown at compile
@@ -1636,32 +1547,32 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block bb = loop->header;
   gimple phi;
-  gimple_stmt_iterator gsi;
+  gphi_iterator gsi;
 
   /* Analyze phi functions of the loop header.  */
 
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "vect_can_advance_ivs_p:");
-
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     {
-      tree access_fn = NULL;
       tree evolution_part;
 
-      phi = gsi_stmt (gsi);
-      if (vect_print_dump_info (REPORT_DETAILS))
+      phi = gsi.phi ();
+      if (dump_enabled_p ())
        {
-          fprintf (vect_dump, "Analyze phi: ");
-          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
+          dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
+          dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
+          dump_printf (MSG_NOTE, "\n");
        }
 
       /* Skip virtual phi's. The data dependences that are associated with
          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 
-      if (!is_gimple_reg (PHI_RESULT (phi)))
+      if (virtual_operand_p (PHI_RESULT (phi)))
        {
-         if (vect_print_dump_info (REPORT_DETAILS))
-           fprintf (vect_dump, "virtual phi. skip.");
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                             "virtual phi. skip.\n");
          continue;
        }
 
@@ -1669,35 +1580,21 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 
       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
         {
-          if (vect_print_dump_info (REPORT_DETAILS))
-            fprintf (vect_dump, "reduc phi. skip.");
+          if (dump_enabled_p ())
+            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                             "reduc phi. skip.\n");
           continue;
         }
 
       /* Analyze the evolution function.  */
 
-      access_fn = instantiate_parameters
-       (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
-
-      if (!access_fn)
-       {
-         if (vect_print_dump_info (REPORT_DETAILS))
-           fprintf (vect_dump, "No Access function.");
-         return false;
-       }
-
-      if (vect_print_dump_info (REPORT_DETAILS))
-        {
-         fprintf (vect_dump, "Access function of PHI: ");
-         print_generic_expr (vect_dump, access_fn, TDF_SLIM);
-        }
-
-      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
-
+      evolution_part
+       = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
       if (evolution_part == NULL_TREE)
         {
-         if (vect_print_dump_info (REPORT_DETAILS))
-           fprintf (vect_dump, "No evolution.");
+         if (dump_enabled_p ())
+           dump_printf (MSG_MISSED_OPTIMIZATION,
+                        "No access function or evolution.\n");
          return false;
         }
 
@@ -1759,11 +1656,11 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block exit_bb = single_exit (loop)->dest;
-  gimple phi, phi1;
-  gimple_stmt_iterator gsi, gsi1;
+  gphi *phi, *phi1;
+  gphi_iterator gsi, gsi1;
   basic_block update_bb = update_e->dest;
 
-  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
+  gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
 
   /* Make sure there exists a single-predecessor exit bb:  */
   gcc_assert (single_pred_p (exit_bb));
@@ -1779,19 +1676,22 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
       gimple_stmt_iterator last_gsi;
       stmt_vec_info stmt_info;
 
-      phi = gsi_stmt (gsi);
-      phi1 = gsi_stmt (gsi1);
-      if (vect_print_dump_info (REPORT_DETAILS))
+      phi = gsi.phi ();
+      phi1 = gsi1.phi ();
+      if (dump_enabled_p ())
         {
-          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
-         print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
+          dump_printf_loc (MSG_NOTE, vect_location,
+                           "vect_update_ivs_after_vectorizer: phi: ");
+         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
+          dump_printf (MSG_NOTE, "\n");
         }
 
       /* Skip virtual phi's.  */
-      if (!is_gimple_reg (PHI_RESULT (phi)))
+      if (virtual_operand_p (PHI_RESULT (phi)))
        {
-         if (vect_print_dump_info (REPORT_DETAILS))
-           fprintf (vect_dump, "virtual phi. skip.");
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                             "virtual phi. skip.\n");
          continue;
        }
 
@@ -1799,8 +1699,9 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
       stmt_info = vinfo_for_stmt (phi);
       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
         {
-          if (vect_print_dump_info (REPORT_DETAILS))
-            fprintf (vect_dump, "reduc phi. skip.");
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                             "reduc phi. skip.\n");
           continue;
         }
 
@@ -1848,11 +1749,12 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
    test.  */
 
 void
-vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
+vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
+                               tree ni_name, tree ratio_mult_vf_name,
                                unsigned int th, bool check_profitability)
 {
-  tree ni_name, ratio_mult_vf_name;
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
   struct loop *new_loop;
   edge update_e;
   basic_block preheader;
@@ -1861,26 +1763,20 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
   tree cond_expr = NULL_TREE;
   gimple_seq cond_expr_stmt_list = NULL;
 
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                     "=== vect_do_peeling_for_loop_bound ===\n");
 
   initialize_original_copy_tables ();
 
-  /* Generate the following variables on the preheader of original loop:
-
-     ni_name = number of iteration the original loop executes
-     ratio = ni_name / vf
-     ratio_mult_vf_name = ratio * vf  */
-  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
-                                  &ratio_mult_vf_name, ratio,
-                                  cond_expr_stmt_list);
-
   loop_num  = loop->num;
 
-  new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
-                                            &ratio_mult_vf_name, ni_name, false,
-                                            th, check_profitability,
-                                           cond_expr, cond_expr_stmt_list);
+  new_loop
+    = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
+                                    &ratio_mult_vf_name, ni_name, false,
+                                    th, check_profitability,
+                                    cond_expr, cond_expr_stmt_list,
+                                    0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   gcc_assert (new_loop);
   gcc_assert (loop_num == loop->num);
 #ifdef ENABLE_CHECKING
@@ -1903,13 +1799,20 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
      by ratio_mult_vf_name steps.  */
   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
 
-  max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
+  /* For vectorization factor N, we need to copy last N-1 values in epilogue
+     and this means N-2 loopback edge executions.
+
+     PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
+     will execute at least LOOP_VINFO_VECT_FACTOR times.  */
+  max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+             ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
+             : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
   if (check_profitability)
-    max_iter = MAX (max_iter, (int) th);
-  record_niter_bound (new_loop, shwi_to_double_int (max_iter), false, true);
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "Setting upper bound of nb iterations for epilogue "
-            "loop to %d\n", max_iter);
+    max_iter = MAX (max_iter, (int) th - 1);
+  record_niter_bound (new_loop, max_iter, false, true);
+  dump_printf (MSG_NOTE,
+               "Setting upper bound of nb iterations for epilogue "
+               "loop to %d\n", max_iter);
 
   /* After peeling we have to reset scalar evolution analyzer.  */
   scev_reset ();
@@ -1949,7 +1852,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
    use TYPE_VECTOR_SUBPARTS.  */
 
 static tree
-vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
+vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
 {
   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -1967,14 +1870,16 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
 
   pe = loop_preheader_edge (loop);
 
-  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
     {
-      int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+      int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "known peeling = %d.", npeel);
+      if (dump_enabled_p ())
+        dump_printf_loc (MSG_NOTE, vect_location,
+                         "known peeling = %d.\n", npeel);
 
       iters = build_int_cst (niters_type, npeel);
+      *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
     }
   else
     {
@@ -2013,6 +1918,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
        iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
       iters = fold_convert (niters_type, iters);
+      *bound = nelements;
     }
 
   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
@@ -2022,10 +1928,12 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
   if (TREE_CODE (loop_niters) != INTEGER_CST)
     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
 
-  if (vect_print_dump_info (REPORT_DETAILS))
+  if (dump_enabled_p ())
     {
-      fprintf (vect_dump, "niters for prolog loop: ");
-      print_generic_expr (vect_dump, iters, TDF_SLIM);
+      dump_printf_loc (MSG_NOTE, vect_location,
+                       "niters for prolog loop: ");
+      dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
+      dump_printf (MSG_NOTE, "\n");
     }
 
   var = create_tmp_var (niters_type, "prolog_loop_niters");
@@ -2076,13 +1984,14 @@ static void
 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
 {
   unsigned int i;
-  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+  vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
   struct data_reference *dr;
+ if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                     "=== vect_update_inits_of_dr ===\n");
 
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
-
-  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
     vect_update_init_of_dr (dr, niters);
 }
 
@@ -2096,47 +2005,58 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
 
 void
-vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
+vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
                               unsigned int th, bool check_profitability)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  tree niters_of_prolog_loop, ni_name;
-  tree n_iters;
+  struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
+  tree niters_of_prolog_loop;
   tree wide_prolog_niters;
   struct loop *new_loop;
   int max_iter;
+  int bound = 0;
 
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
+                     "loop peeled for vectorization to enhance"
+                     " alignment\n");
 
   initialize_original_copy_tables ();
 
-  ni_name = vect_build_loop_niters (loop_vinfo, NULL);
+  gimple_seq stmts = NULL;
+  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
-                                                          ni_name);
+                                                          ni_name,
+                                                          &bound);
 
   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
   new_loop =
-    slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
+    slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
+                                  loop_preheader_edge (loop),
                                   &niters_of_prolog_loop, ni_name, true,
-                                  th, check_profitability, NULL_TREE, NULL);
+                                  th, check_profitability, NULL_TREE, NULL,
+                                  bound, 0);
 
   gcc_assert (new_loop);
 #ifdef ENABLE_CHECKING
   slpeel_verify_cfg_after_peeling (new_loop, loop);
 #endif
-  max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
+  /* For vectorization factor N, we need to copy at most N-1 values 
+     for alignment and this means N-2 loopback edge executions.  */
+  max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
   if (check_profitability)
-    max_iter = MAX (max_iter, (int) th);
-  record_niter_bound (new_loop, shwi_to_double_int (max_iter), false, true);
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "Setting upper bound of nb iterations for prologue "
-            "loop to %d\n", max_iter);
+    max_iter = MAX (max_iter, (int) th - 1);
+  record_niter_bound (new_loop, max_iter, false, true);
+  dump_printf (MSG_NOTE,
+               "Setting upper bound of nb iterations for prologue "
+               "loop to %d\n", max_iter);
 
   /* Update number of times loop executes.  */
-  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
-               TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
+               TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
+  LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
+               TREE_TYPE (ni_name),
+               LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
 
   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
     wide_prolog_niters = niters_of_prolog_loop;
@@ -2197,7 +2117,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
                                   gimple_seq *cond_expr_stmt_list)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  VEC(gimple,heap) *may_misalign_stmts
+  vec<gimple> may_misalign_stmts
     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
   gimple ref_stmt;
   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
@@ -2206,7 +2126,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
   tree int_ptrsize_type;
   char tmp_name[20];
   tree or_tmp_name = NULL_TREE;
-  tree and_tmp, and_tmp_name;
+  tree and_tmp_name;
   gimple and_stmt;
   tree ptrsize_zero;
   tree part_cond_expr;
@@ -2220,12 +2140,12 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
      of the first vector of the i'th data reference. */
 
-  FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, ref_stmt)
+  FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
     {
       gimple_seq new_stmt_list = NULL;
       tree addr_base;
-      tree addr_tmp, addr_tmp_name;
-      tree or_tmp, new_or_tmp_name;
+      tree addr_tmp_name;
+      tree new_or_tmp_name;
       gimple addr_stmt, or_stmt;
       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
@@ -2241,12 +2161,9 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
       if (new_stmt_list != NULL)
        gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
 
-      sprintf (tmp_name, "%s%d", "addr2int", i);
-      addr_tmp = create_tmp_reg (int_ptrsize_type, tmp_name);
-      addr_tmp_name = make_ssa_name (addr_tmp, NULL);
-      addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
-                                               addr_base, NULL_TREE);
-      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
+      sprintf (tmp_name, "addr2int%d", i);
+      addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
+      addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
 
       /* The addresses are OR together.  */
@@ -2254,13 +2171,10 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
       if (or_tmp_name != NULL_TREE)
         {
           /* create: or_tmp = or_tmp | addr_tmp */
-          sprintf (tmp_name, "%s%d", "orptrs", i);
-          or_tmp = create_tmp_reg (int_ptrsize_type, tmp_name);
-         new_or_tmp_name = make_ssa_name (or_tmp, NULL);
-         or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
-                                                 new_or_tmp_name,
-                                                 or_tmp_name, addr_tmp_name);
-          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
+          sprintf (tmp_name, "orptrs%d", i);
+         new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
+         or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
+                                        or_tmp_name, addr_tmp_name);
          gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
           or_tmp_name = new_or_tmp_name;
         }
@@ -2272,12 +2186,10 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
   mask_cst = build_int_cst (int_ptrsize_type, mask);
 
   /* create: and_tmp = or_tmp & mask  */
-  and_tmp = create_tmp_reg (int_ptrsize_type, "andmask" );
-  and_tmp_name = make_ssa_name (and_tmp, NULL);
+  and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
 
-  and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
-                                          or_tmp_name, mask_cst);
-  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
+  and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
+                                 or_tmp_name, mask_cst);
   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
 
   /* Make and_tmp the left operand of the conditional test against zero.
@@ -2292,44 +2204,6 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
     *cond_expr = part_cond_expr;
 }
 
-
-/* Function vect_vfa_segment_size.
-
-   Create an expression that computes the size of segment
-   that will be accessed for a data reference.  The functions takes into
-   account that realignment loads may access one more vector.
-
-   Input:
-     DR: The data reference.
-     LENGTH_FACTOR: segment length to consider.
-
-   Return an expression whose value is the size of segment which will be
-   accessed by DR.  */
-
-static tree
-vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
-{
-  tree segment_length;
-
-  if (integer_zerop (DR_STEP (dr)))
-    segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
-  else
-    segment_length = size_binop (MULT_EXPR,
-                                 fold_convert (sizetype, DR_STEP (dr)),
-                                 fold_convert (sizetype, length_factor));
-
-  if (vect_supportable_dr_alignment (dr, false)
-        == dr_explicit_realign_optimized)
-    {
-      tree vector_size = TYPE_SIZE_UNIT
-                         (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
-
-      segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
-    }
-  return segment_length;
-}
-
-
 /* Function vect_create_cond_for_alias_checks.
 
    Create a conditional expression that represents the run-time checks for
@@ -2338,34 +2212,24 @@ vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
 
    Input:
    COND_EXPR  - input conditional expression.  New conditions will be chained
-                with logical AND operation.
+                with logical AND operation.  If it is NULL, then the function
+                is used to return the number of alias checks.
    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
                to be checked.
 
    Output:
    COND_EXPR - conditional expression.
-   COND_EXPR_STMT_LIST - statements needed to construct the conditional
-                         expression.
-
 
-   The returned value is the conditional expression to be used in the if
+   The returned COND_EXPR is the conditional expression to be used in the if
    statement that controls which version of the loop gets executed at runtime.
 */
 
-static void
-vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
-                                  tree * cond_expr,
-                                  gimple_seq * cond_expr_stmt_list)
+void
+vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
 {
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  VEC (ddr_p, heap) * may_alias_ddrs =
-    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
-  int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
-
-  ddr_p ddr;
-  unsigned int i;
-  tree part_cond_expr, length_factor;
+  vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
+    LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
+  tree part_cond_expr;
 
   /* Create expression
      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
@@ -2376,68 +2240,51 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
 
-  if (VEC_empty (ddr_p, may_alias_ddrs))
+  if (comp_alias_ddrs.is_empty ())
     return;
 
-  FOR_EACH_VEC_ELT (ddr_p, may_alias_ddrs, i, ddr)
+  for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
     {
-      struct data_reference *dr_a, *dr_b;
-      gimple dr_group_first_a, dr_group_first_b;
-      tree addr_base_a, addr_base_b;
-      tree segment_length_a, segment_length_b;
-      gimple stmt_a, stmt_b;
-      tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
-
-      dr_a = DDR_A (ddr);
-      stmt_a = DR_STMT (DDR_A (ddr));
-      dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
-      if (dr_group_first_a)
-        {
-         stmt_a = dr_group_first_a;
-         dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
-       }
-
-      dr_b = DDR_B (ddr);
-      stmt_b = DR_STMT (DDR_B (ddr));
-      dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
-      if (dr_group_first_b)
-        {
-         stmt_b = dr_group_first_b;
-         dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
-       }
-
-      addr_base_a =
-        vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
-                                             NULL_TREE, loop);
-      addr_base_b =
-        vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
-                                             NULL_TREE, loop);
+      const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
+      const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
+      tree segment_length_a = dr_a.seg_len;
+      tree segment_length_b = dr_b.seg_len;
 
-      if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
-       length_factor = scalar_loop_iters;
-      else
-       length_factor = size_int (vect_factor);
-      segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
-      segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
+      tree addr_base_a
+       = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
+      tree addr_base_b
+       = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
 
-      if (vect_print_dump_info (REPORT_DR_DETAILS))
+      if (dump_enabled_p ())
        {
-         fprintf (vect_dump,
-                  "create runtime check for data references ");
-         print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
-         fprintf (vect_dump, " and ");
-         print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "create runtime check for data references ");
+         dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
+         dump_printf (MSG_NOTE, " and ");
+         dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
+         dump_printf (MSG_NOTE, "\n");
        }
 
-      seg_a_min = addr_base_a;
-      seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
-      if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
-       seg_a_min = seg_a_max, seg_a_max = addr_base_a;
+      tree seg_a_min = addr_base_a;
+      tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
+      /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
+        bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
+        [a, a+12) */
+      if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
+       {
+         tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
+         seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
+         seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
+       }
 
-      seg_b_min = addr_base_b;
-      seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
-      if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
-       seg_b_min = seg_b_max, seg_b_max = addr_base_b;
+      tree seg_b_min = addr_base_b;
+      tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
+      if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
+       {
+         tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
+         seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
+         seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
+       }
 
       part_cond_expr =
        fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
@@ -2451,9 +2298,12 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
        *cond_expr = part_cond_expr;
     }
 
-  if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
-    fprintf (vect_dump, "created %u versioning for alias checks.\n",
-             VEC_length (ddr_p, may_alias_ddrs));
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "created %u versioning for alias checks.\n",
+                    comp_alias_ddrs.length ());
+
+  comp_alias_ddrs.release ();
 }
 
 
@@ -2481,18 +2331,22 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
                      unsigned int th, bool check_profitability)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
   basic_block condition_bb;
-  gimple_stmt_iterator gsi, cond_exp_gsi;
+  gphi_iterator gsi;
+  gimple_stmt_iterator cond_exp_gsi;
   basic_block merge_bb;
   basic_block new_exit_bb;
   edge new_exit_e, e;
-  gimple orig_phi, new_phi;
+  gphi *orig_phi, *new_phi;
   tree cond_expr = NULL_TREE;
   gimple_seq cond_expr_stmt_list = NULL;
   tree arg;
   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
   gimple_seq gimplify_stmt_list = NULL;
   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
+  bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
+  bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
 
   if (check_profitability)
     {
@@ -2502,22 +2356,70 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
                                          is_gimple_condexpr, NULL_TREE);
     }
 
-  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
+  if (version_align)
     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
                                       &cond_expr_stmt_list);
 
-  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
-    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
-                                      &cond_expr_stmt_list);
+  if (version_alias)
+    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
 
   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
                                      is_gimple_condexpr, NULL_TREE);
   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 
   initialize_original_copy_tables ();
-  loop_version (loop, cond_expr, &condition_bb,
-               prob, prob, REG_BR_PROB_BASE - prob, true);
-  free_original_copy_tables();
+  if (scalar_loop)
+    {
+      edge scalar_e;
+      basic_block preheader, scalar_preheader;
+
+      /* We don't want to scale SCALAR_LOOP's frequencies, we need to
+        scale LOOP's frequencies instead.  */
+      loop_version (scalar_loop, cond_expr, &condition_bb,
+                   prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
+      scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
+      /* CONDITION_BB was created above SCALAR_LOOP's preheader,
+        while we need to move it above LOOP's preheader.  */
+      e = loop_preheader_edge (loop);
+      scalar_e = loop_preheader_edge (scalar_loop);
+      gcc_assert (empty_block_p (e->src)
+                 && single_pred_p (e->src));
+      gcc_assert (empty_block_p (scalar_e->src)
+                 && single_pred_p (scalar_e->src));
+      gcc_assert (single_pred_p (condition_bb));
+      preheader = e->src;
+      scalar_preheader = scalar_e->src;
+      scalar_e = find_edge (condition_bb, scalar_preheader);
+      e = single_pred_edge (preheader);
+      redirect_edge_and_branch_force (single_pred_edge (condition_bb),
+                                     scalar_preheader);
+      redirect_edge_and_branch_force (scalar_e, preheader);
+      redirect_edge_and_branch_force (e, condition_bb);
+      set_immediate_dominator (CDI_DOMINATORS, condition_bb,
+                              single_pred (condition_bb));
+      set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
+                              single_pred (scalar_preheader));
+      set_immediate_dominator (CDI_DOMINATORS, preheader,
+                              condition_bb);
+    }
+  else
+    loop_version (loop, cond_expr, &condition_bb,
+                 prob, prob, REG_BR_PROB_BASE - prob, true);
+
+  if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
+      && dump_enabled_p ())
+    {
+      if (version_alias)
+        dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
+                         "loop versioned for vectorization because of "
+                        "possible aliasing\n");
+      if (version_align)
+        dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
+                         "loop versioned for vectorization to enhance "
+                        "alignment\n");
+
+    }
+  free_original_copy_tables ();
 
   /* Loop versioning violates an assumption we try to maintain during
      vectorization - that the loop exit block has a single predecessor.
@@ -2525,34 +2427,38 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
      basic block (i.e. it has two predecessors). Just in order to simplify
      following transformations in the vectorizer, we fix this situation
      here by adding a new (empty) block on the exit-edge of the loop,
-     with the proper loop-exit phis to maintain loop-closed-form.  */
+     with the proper loop-exit phis to maintain loop-closed-form.
+     If loop versioning wasn't done from loop, but scalar_loop instead,
+     merge_bb will have already just a single successor.  */
 
   merge_bb = single_exit (loop)->dest;
-  gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
-  new_exit_bb = split_edge (single_exit (loop));
-  new_exit_e = single_exit (loop);
-  e = EDGE_SUCC (new_exit_bb, 0);
-
-  for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
+  if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
     {
-      tree new_res;
-      orig_phi = gsi_stmt (gsi);
-      new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
-      new_phi = create_phi_node (new_res, new_exit_bb);
-      arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
-      add_phi_arg (new_phi, arg, new_exit_e,
-                  gimple_phi_arg_location_from_edge (orig_phi, e));
-      adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
+      gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
+      new_exit_bb = split_edge (single_exit (loop));
+      new_exit_e = single_exit (loop);
+      e = EDGE_SUCC (new_exit_bb, 0);
+
+      for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
+       {
+         tree new_res;
+         orig_phi = gsi.phi ();
+         new_res = copy_ssa_name (PHI_RESULT (orig_phi));
+         new_phi = create_phi_node (new_res, new_exit_bb);
+         arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+         add_phi_arg (new_phi, arg, new_exit_e,
+                      gimple_phi_arg_location_from_edge (orig_phi, e));
+         adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
+       }
     }
 
   /* End loop-exit-fixes after versioning.  */
 
-  update_ssa (TODO_update_ssa);
   if (cond_expr_stmt_list)
     {
       cond_exp_gsi = gsi_last_bb (condition_bb);
       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
                             GSI_SAME_STMT);
     }
+  update_ssa (TODO_update_ssa);
 }
-