PR c++/68795: fix uninitialized close_paren_loc in cp_parser_postfix_expression
[gcc.git] / gcc / tree-vect-data-refs.c
index 2f8e0c2c9d8246bf071bf2b561d28c96ca311b6d..d0e20da04a62cafda9d9c2dda2e96479ef6a2ffc 100644 (file)
@@ -1,5 +1,5 @@
 /* Data References Analysis and Manipulation Utilities for Vectorization.
-   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   Copyright (C) 2003-2016 Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com>
    and Ira Rosen <irar@il.ibm.com>
 
@@ -22,57 +22,33 @@ along with GCC; see the file COPYING3.  If not see
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
-#include "dumpfile.h"
-#include "tm.h"
-#include "tree.h"
-#include "stor-layout.h"
-#include "tm_p.h"
+#include "backend.h"
 #include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
 #include "predict.h"
-#include "vec.h"
-#include "hashtab.h"
-#include "hash-set.h"
-#include "machmode.h"
-#include "hard-reg-set.h"
-#include "input.h"
-#include "function.h"
-#include "dominance.h"
-#include "cfg.h"
-#include "basic-block.h"
-#include "gimple-pretty-print.h"
-#include "tree-ssa-alias.h"
-#include "internal-fn.h"
+#include "tm_p.h"
+#include "ssa.h"
+#include "optabs-tree.h"
+#include "cgraph.h"
+#include "dumpfile.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "stor-layout.h"
 #include "tree-eh.h"
-#include "gimple-expr.h"
-#include "is-a.h"
-#include "gimple.h"
 #include "gimplify.h"
 #include "gimple-iterator.h"
 #include "gimplify-me.h"
-#include "gimple-ssa.h"
-#include "tree-phinodes.h"
-#include "ssa-iterators.h"
-#include "stringpool.h"
-#include "tree-ssanames.h"
 #include "tree-ssa-loop-ivopts.h"
 #include "tree-ssa-loop-manip.h"
 #include "tree-ssa-loop.h"
-#include "dumpfile.h"
 #include "cfgloop.h"
-#include "tree-chrec.h"
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
-#include "diagnostic-core.h"
-#include "hash-map.h"
-#include "plugin-api.h"
-#include "ipa-ref.h"
-#include "cgraph.h"
-/* Need to include rtl.h, expr.h, etc. for optabs.  */
 #include "expr.h"
-#include "insn-codes.h"
-#include "optabs.h"
 #include "builtins.h"
-#include "varasm.h"
+#include "params.h"
 
 /* Return true if load- or store-lanes optab OPTAB is implemented for
    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
@@ -134,7 +110,7 @@ vect_lanes_optab_supported_p (const char *name, convert_optab optab,
    types.  */
 
 tree
-vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
+vect_get_smallest_scalar_type (gimple *stmt, HOST_WIDE_INT *lhs_size_unit,
                                HOST_WIDE_INT *rhs_size_unit)
 {
   tree scalar_type = gimple_expr_type (stmt);
@@ -276,8 +252,8 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
          return false;
        }
 
-      if (STMT_VINFO_GATHER_P (stmtinfo_a)
-         || STMT_VINFO_GATHER_P (stmtinfo_b))
+      if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
+         || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
        {
          if (dump_enabled_p ())
            {
@@ -324,8 +300,8 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
          return false;
        }
 
-      if (STMT_VINFO_GATHER_P (stmtinfo_a)
-         || STMT_VINFO_GATHER_P (stmtinfo_b))
+      if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
+         || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
        {
          if (dump_enabled_p ())
            {
@@ -398,7 +374,7 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
          if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
              || STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
            {
-             gimple earlier_stmt;
+             gimple *earlier_stmt;
              earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
              if (DR_IS_WRITE
                    (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
@@ -487,6 +463,9 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
     dump_printf_loc (MSG_NOTE, vect_location,
                      "=== vect_analyze_data_ref_dependences ===\n");
 
+  LOOP_VINFO_DDRS (loop_vinfo)
+    .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
+            * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
   if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
                                &LOOP_VINFO_DDRS (loop_vinfo),
@@ -558,24 +537,78 @@ vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
       dump_printf (MSG_NOTE,  "\n");
     }
 
-  /* We do not vectorize basic blocks with write-write dependencies.  */
-  if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
-    return true;
+  return true;
+}
 
-  /* If we have a read-write dependence check that the load is before the store.
-     When we vectorize basic blocks, vector load can be only before
-     corresponding scalar load, and vector store can be only after its
-     corresponding scalar store.  So the order of the acceses is preserved in
-     case the load is before the store.  */
-  gimple earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
-  if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
+
+/* Analyze dependences involved in the transform of SLP NODE.  STORES
+   contain the vector of scalar stores of this instance if we are
+   disambiguating the loads.  */
+
+static bool
+vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
+                                  vec<gimple *> stores, gimple *last_store)
+{
+  /* This walks over all stmts involved in the SLP load/store done
+     in NODE verifying we can sink them up to the last stmt in the
+     group.  */
+  gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
+  for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
     {
-      /* That only holds for load-store pairs taking part in vectorization.  */
-      if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dra)))
-         && STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (drb))))
-       return false;
-    }
+      gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
+      if (access == last_access)
+       continue;
+      data_reference *dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (access));
+      for (gimple_stmt_iterator gsi = gsi_for_stmt (access);
+          gsi_stmt (gsi) != last_access; gsi_next (&gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+         if (! gimple_vuse (stmt)
+             || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
+           continue;
+
+         /* If we couldn't record a (single) data reference for this
+            stmt we have to give up.  */
+         /* ???  Here and below if dependence analysis fails we can resort
+            to the alias oracle which can handle more kinds of stmts.  */
+         data_reference *dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+         if (!dr_b)
+           return false;
+
+         /* If we run into a store of this same instance (we've just
+            marked those) then delay dependence checking until we run
+            into the last store because this is where it will have
+            been sunk to (and we verify if we can do that as well).  */
+         if (gimple_visited_p (stmt))
+           {
+             if (stmt != last_store)
+               continue;
+             unsigned i;
+             gimple *store;
+             FOR_EACH_VEC_ELT (stores, i, store)
+               {
+                 data_reference *store_dr
+                   = STMT_VINFO_DATA_REF (vinfo_for_stmt (store));
+                 ddr_p ddr = initialize_data_dependence_relation
+                               (dr_a, store_dr, vNULL);
+                 if (vect_slp_analyze_data_ref_dependence (ddr))
+                   {
+                     free_dependence_relation (ddr);
+                     return false;
+                   }
+                 free_dependence_relation (ddr);
+               }
+           }
 
+         ddr_p ddr = initialize_data_dependence_relation (dr_a, dr_b, vNULL);
+         if (vect_slp_analyze_data_ref_dependence (ddr))
+           {
+             free_dependence_relation (ddr);
+             return false;
+           }
+         free_dependence_relation (ddr);
+       }
+    }
   return true;
 }
 
@@ -587,27 +620,53 @@ vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
    the maximum vectorization factor the data dependences allow.  */
 
 bool
-vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
+vect_slp_analyze_instance_dependence (slp_instance instance)
 {
-  struct data_dependence_relation *ddr;
-  unsigned int i;
-
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
-                     "=== vect_slp_analyze_data_ref_dependences ===\n");
+                     "=== vect_slp_analyze_instance_dependence ===\n");
 
-  if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
-                               &BB_VINFO_DDRS (bb_vinfo),
-                               vNULL, true))
-    return false;
+  /* The stores of this instance are at the root of the SLP tree.  */
+  slp_tree store = SLP_INSTANCE_TREE (instance);
+  if (! STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0])))
+    store = NULL;
 
-  FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
-    if (vect_slp_analyze_data_ref_dependence (ddr))
-      return false;
+  /* Verify we can sink stores to the vectorized stmt insert location.  */
+  gimple *last_store = NULL;
+  if (store)
+    {
+      if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
+       return false;
 
-  return true;
-}
+      /* Mark stores in this instance and remember the last one.  */
+      last_store = vect_find_last_scalar_stmt_in_slp (store);
+      for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
+       gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], true);
+    }
+
+  bool res = true;
 
+  /* Verify we can sink loads to the vectorized stmt insert location,
+     special-casing stores of this instance.  */
+  slp_tree load;
+  unsigned int i;
+  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
+    if (! vect_slp_analyze_node_dependences (instance, load,
+                                            store
+                                            ? SLP_TREE_SCALAR_STMTS (store)
+                                            : vNULL, last_store))
+      {
+       res = false;
+       break;
+      }
+
+  /* Unset the visited flag.  */
+  if (store)
+    for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
+      gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], false);
+
+  return res;
+}
 
 /* Function vect_compute_data_ref_alignment
 
@@ -621,19 +680,19 @@ vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
    FOR NOW: No analysis is actually performed. Misalignment is calculated
    only for trivial cases. TODO.  */
 
-static bool
+bool
 vect_compute_data_ref_alignment (struct data_reference *dr)
 {
-  gimple stmt = DR_STMT (dr);
+  gimple *stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop *loop = NULL;
   tree ref = DR_REF (dr);
   tree vectype;
   tree base, base_addr;
-  bool base_aligned;
-  tree misalign;
-  tree aligned_to, alignment;
+  tree misalign = NULL_TREE;
+  tree aligned_to;
+  unsigned HOST_WIDE_INT alignment;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -645,12 +704,8 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
   /* Initialize misalignment to unknown.  */
   SET_DR_MISALIGNMENT (dr, -1);
 
-  /* Strided loads perform only component accesses, misalignment information
-     is irrelevant for them.  */
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
-    return true;
-
-  misalign = DR_INIT (dr);
+  if (tree_fits_shwi_p (DR_STEP (dr)))
+    misalign = DR_INIT (dr);
   aligned_to = DR_ALIGNED_TO (dr);
   base_addr = DR_BASE_ADDRESS (dr);
   vectype = STMT_VINFO_VECTYPE (stmt_info);
@@ -664,9 +719,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
   if (loop && nested_in_vect_loop_p (loop, stmt))
     {
       tree step = DR_STEP (dr);
-      HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
 
-      if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
+      if (tree_fits_shwi_p (step)
+         && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
         {
           if (dump_enabled_p ())
             dump_printf_loc (MSG_NOTE, vect_location,
@@ -684,61 +739,63 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
        }
     }
 
-  /* Similarly, if we're doing basic-block vectorization, we can only use
-     base and misalignment information relative to an innermost loop if the
-     misalignment stays the same throughout the execution of the loop.
-     As above, this is the case if the stride of the dataref evenly divides
-     by the vector size.  */
-  if (!loop)
+  /* Similarly we can only use base and misalignment information relative to
+     an innermost loop if the misalignment stays the same throughout the
+     execution of the loop.  As above, this is the case if the stride of
+     the dataref evenly divides by the vector size.  */
+  else
     {
       tree step = DR_STEP (dr);
-      HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+      unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
 
-      if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
+      if (tree_fits_shwi_p (step)
+         && ((tree_to_shwi (step) * vf)
+             % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "SLP: step doesn't divide the vector-size.\n");
+                            "step doesn't divide the vector-size.\n");
          misalign = NULL_TREE;
        }
     }
 
-  base = build_fold_indirect_ref (base_addr);
-  alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
+  /* To look at alignment of the base we have to preserve an inner MEM_REF
+     as that carries alignment information of the actual access.  */
+  base = ref;
+  while (handled_component_p (base))
+    base = TREE_OPERAND (base, 0);
+  if (TREE_CODE (base) == MEM_REF)
+    base = build2 (MEM_REF, TREE_TYPE (base), base_addr,
+                  build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)), 0));
+  unsigned int base_alignment = get_object_alignment (base);
+
+  if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
+    DR_VECT_AUX (dr)->base_element_aligned = true;
+
+  alignment = TYPE_ALIGN_UNIT (vectype);
 
-  if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
+  if ((compare_tree_int (aligned_to, alignment) < 0)
       || !misalign)
     {
       if (dump_enabled_p ())
        {
          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                           "Unknown alignment for access: ");
-         dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
+         dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
          dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
        }
       return true;
     }
 
-  if ((DECL_P (base)
-       && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
-                               alignment) >= 0)
-      || (TREE_CODE (base_addr) == SSA_NAME
-         && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
-                                                     TREE_TYPE (base_addr)))),
-                                  alignment) >= 0)
-      || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
-    base_aligned = true;
-  else
-    base_aligned = false;
-
-  if (!base_aligned)
+  if (base_alignment < TYPE_ALIGN (vectype))
     {
-      /* Do not change the alignment of global variables here if
-        flag_section_anchors is enabled as we already generated
-        RTL for other functions.  Most global variables should
-        have been aligned during the IPA increase_alignment pass.  */
-      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
-         || (TREE_STATIC (base) && flag_section_anchors))
+      /* Strip an inner MEM_REF to a bare decl if possible.  */
+      if (TREE_CODE (base) == MEM_REF
+         && integer_zerop (TREE_OPERAND (base, 1))
+         && TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
+       base = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
+
+      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
        {
          if (dump_enabled_p ())
            {
@@ -760,14 +817,15 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
           dump_printf (MSG_NOTE, "\n");
         }
 
-      ((dataref_aux *)dr->aux)->base_decl = base;
-      ((dataref_aux *)dr->aux)->base_misaligned = true;
+      DR_VECT_AUX (dr)->base_decl = base;
+      DR_VECT_AUX (dr)->base_misaligned = true;
+      DR_VECT_AUX (dr)->base_element_aligned = true;
     }
 
   /* If this is a backward running DR then first access in the larger
      vectype actually is N-1 elements before the address in the DR.
      Adjust misalign accordingly.  */
-  if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+  if (tree_int_cst_sgn (DR_STEP (dr)) < 0)
     {
       tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
       /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
@@ -777,19 +835,8 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
       misalign = size_binop (PLUS_EXPR, misalign, offset);
     }
 
-  /* Modulo alignment.  */
-  misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
-
-  if (!tree_fits_uhwi_p (misalign))
-    {
-      /* Negative or overflowed misalignment value.  */
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "unexpected misalign value\n");
-      return false;
-    }
-
-  SET_DR_MISALIGNMENT (dr, tree_to_uhwi (misalign));
+  SET_DR_MISALIGNMENT (dr,
+                      wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ());
 
   if (dump_enabled_p ())
     {
@@ -803,42 +850,6 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
 }
 
 
-/* Function vect_compute_data_refs_alignment
-
-   Compute the misalignment of data references in the loop.
-   Return FALSE if a data reference is found that cannot be vectorized.  */
-
-static bool
-vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
-                                  bb_vec_info bb_vinfo)
-{
-  vec<data_reference_p> datarefs;
-  struct data_reference *dr;
-  unsigned int i;
-
-  if (loop_vinfo)
-    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
-  else
-    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-
-  FOR_EACH_VEC_ELT (datarefs, i, dr)
-    if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
-        && !vect_compute_data_ref_alignment (dr))
-      {
-        if (bb_vinfo)
-          {
-            /* Mark unsupported statement as unvectorizable.  */
-            STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
-            continue;
-          }
-        else
-          return false;
-      }
-
-  return true;
-}
-
-
 /* Function vect_update_misalignment_for_peel
 
    DR - the data reference whose misalignment is to be adjusted.
@@ -898,67 +909,76 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
 }
 
 
+/* Function verify_data_ref_alignment
+
+   Return TRUE if DR can be handled with respect to alignment.  */
+
+static bool
+verify_data_ref_alignment (data_reference_p dr)
+{
+  enum dr_alignment_support supportable_dr_alignment
+    = vect_supportable_dr_alignment (dr, false);
+  if (!supportable_dr_alignment)
+    {
+      if (dump_enabled_p ())
+       {
+         if (DR_IS_READ (dr))
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "not vectorized: unsupported unaligned load.");
+         else
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "not vectorized: unsupported unaligned "
+                            "store.");
+
+         dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
+                            DR_REF (dr));
+         dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+       }
+      return false;
+    }
+
+  if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "Vectorizing an unaligned access.\n");
+
+  return true;
+}
+
 /* Function vect_verify_datarefs_alignment
 
    Return TRUE if all data references in the loop can be
    handled with respect to alignment.  */
 
 bool
-vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+vect_verify_datarefs_alignment (loop_vec_info vinfo)
 {
-  vec<data_reference_p> datarefs;
+  vec<data_reference_p> datarefs = vinfo->datarefs;
   struct data_reference *dr;
-  enum dr_alignment_support supportable_dr_alignment;
   unsigned int i;
 
-  if (loop_vinfo)
-    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
-  else
-    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-
   FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
-      gimple stmt = DR_STMT (dr);
+      gimple *stmt = DR_STMT (dr);
       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
       if (!STMT_VINFO_RELEVANT_P (stmt_info))
        continue;
 
-      /* For interleaving, only the alignment of the first access matters. 
-         Skip statements marked as not vectorizable.  */
-      if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
-           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
-          || !STMT_VINFO_VECTORIZABLE (stmt_info))
-        continue;
+      /* For interleaving, only the alignment of the first access matters.   */
+      if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+         && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
+       continue;
 
-      /* Strided loads perform only component accesses, alignment is
+      /* Strided accesses perform only component accesses, alignment is
         irrelevant for them.  */
-      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+      if (STMT_VINFO_STRIDED_P (stmt_info)
+         && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
        continue;
 
-      supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
-      if (!supportable_dr_alignment)
-        {
-          if (dump_enabled_p ())
-            {
-              if (DR_IS_READ (dr))
-                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "not vectorized: unsupported unaligned load.");
-              else
-                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "not vectorized: unsupported unaligned "
-                                 "store.");
-
-              dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                                 DR_REF (dr));
-              dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-            }
-          return false;
-        }
-      if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location,
-                         "Vectorizing an unaligned access.\n");
+      if (! verify_data_ref_alignment (dr))
+       return false;
     }
+
   return true;
 }
 
@@ -983,7 +1003,7 @@ not_size_aligned (tree exp)
 static bool
 vector_alignment_reachable_p (struct data_reference *dr)
 {
-  gimple stmt = DR_STMT (dr);
+  gimple *stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 
@@ -1054,7 +1074,7 @@ vect_get_data_access_cost (struct data_reference *dr,
                            unsigned int *outside_cost,
                           stmt_vector_for_cost *body_cost_vec)
 {
-  gimple stmt = DR_STMT (dr);
+  gimple *stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@@ -1074,10 +1094,48 @@ vect_get_data_access_cost (struct data_reference *dr,
 }
 
 
+typedef struct _vect_peel_info
+{
+  int npeel;
+  struct data_reference *dr;
+  unsigned int count;
+} *vect_peel_info;
+
+typedef struct _vect_peel_extended_info
+{
+  struct _vect_peel_info peel_info;
+  unsigned int inside_cost;
+  unsigned int outside_cost;
+  stmt_vector_for_cost body_cost_vec;
+} *vect_peel_extended_info;
+
+
+/* Peeling hashtable helpers.  */
+
+struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
+{
+  static inline hashval_t hash (const _vect_peel_info *);
+  static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
+};
+
+inline hashval_t
+peel_info_hasher::hash (const _vect_peel_info *peel_info)
+{
+  return (hashval_t) peel_info->npeel;
+}
+
+inline bool
+peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
+{
+  return (a->npeel == b->npeel);
+}
+
+
 /* Insert DR into peeling hash table with NPEEL as key.  */
 
 static void
-vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
+vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
+                         loop_vec_info loop_vinfo, struct data_reference *dr,
                           int npeel)
 {
   struct _vect_peel_info elem, *slot;
@@ -1085,7 +1143,7 @@ vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
   bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
 
   elem.npeel = npeel;
-  slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo)->find (&elem);
+  slot = peeling_htab->find (&elem);
   if (slot)
     slot->count++;
   else
@@ -1094,8 +1152,7 @@ vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
       slot->npeel = npeel;
       slot->dr = dr;
       slot->count = 1;
-      new_slot
-               = LOOP_VINFO_PEELING_HTAB (loop_vinfo)->find_slot (slot, INSERT);
+      new_slot = peeling_htab->find_slot (slot, INSERT);
       *new_slot = slot;
     }
 
@@ -1137,13 +1194,12 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
   vect_peel_info elem = *slot;
   int save_misalignment, dummy;
   unsigned int inside_cost = 0, outside_cost = 0, i;
-  gimple stmt = DR_STMT (elem->dr);
+  gimple *stmt = DR_STMT (elem->dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
   struct data_reference *dr;
   stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
-  int single_iter_cost;
 
   prologue_cost_vec.create (2);
   body_cost_vec.create (2);
@@ -1159,6 +1215,12 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
         continue;
 
+      /* Strided accesses perform only component accesses, alignment is
+         irrelevant for them.  */
+      if (STMT_VINFO_STRIDED_P (stmt_info)
+         && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
+       continue;
+
       save_misalignment = DR_MISALIGNMENT (dr);
       vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
       vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
@@ -1166,11 +1228,10 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
       SET_DR_MISALIGNMENT (dr, save_misalignment);
     }
 
-  single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
-  outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
-                                              &dummy, single_iter_cost,
-                                              &prologue_cost_vec,
-                                              &epilogue_cost_vec);
+  outside_cost += vect_get_known_peeling_cost
+    (loop_vinfo, elem->npeel, &dummy,
+     &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+     &prologue_cost_vec, &epilogue_cost_vec);
 
   /* Prologue and epilogue costs are added to the target model later.
      These costs depend only on the scalar iteration cost, the
@@ -1201,7 +1262,8 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
    option that aligns as many accesses as possible.  */
 
 static struct data_reference *
-vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
+vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
+                                      loop_vec_info loop_vinfo,
                                        unsigned int *npeel,
                                       stmt_vector_for_cost *body_cost_vec)
 {
@@ -1214,16 +1276,14 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
      {
        res.inside_cost = INT_MAX;
        res.outside_cost = INT_MAX;
-       LOOP_VINFO_PEELING_HTAB (loop_vinfo)
-           ->traverse <_vect_peel_extended_info *,
-                       vect_peeling_hash_get_lowest_cost> (&res);
+       peeling_htab->traverse <_vect_peel_extended_info *,
+                              vect_peeling_hash_get_lowest_cost> (&res);
      }
    else
      {
        res.peel_info.count = 0;
-       LOOP_VINFO_PEELING_HTAB (loop_vinfo)
-           ->traverse <_vect_peel_extended_info *,
-                       vect_peeling_hash_get_most_frequent> (&res);
+       peeling_htab->traverse <_vect_peel_extended_info *,
+                              vect_peeling_hash_get_most_frequent> (&res);
      }
 
    *npeel = res.peel_info.npeel;
@@ -1335,7 +1395,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   bool do_peeling = false;
   bool do_versioning = false;
   bool stat;
-  gimple stmt;
+  gimple *stmt;
   stmt_vec_info stmt_info;
   unsigned int npeel = 0;
   bool all_misalignments_unknown = true;
@@ -1344,11 +1404,16 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   tree vectype;
   unsigned int nelements, mis, same_align_drs_max = 0;
   stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
+  hash_table<peel_info_hasher> peeling_htab (1);
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                      "=== vect_enhance_data_refs_alignment ===\n");
 
+  /* Reset data so we can safely be called multiple times.  */
+  LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
+  LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
+
   /* While cost model enhancements are expected in the future, the high level
      view of the code at this time is as follows:
 
@@ -1399,9 +1464,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
       if (integer_zerop (DR_STEP (dr)))
        continue;
 
-      /* Strided loads perform only component accesses, alignment is
+      /* Strided accesses perform only component accesses, alignment is
         irrelevant for them.  */
-      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+      if (STMT_VINFO_STRIDED_P (stmt_info)
+         && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
        continue;
 
       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
@@ -1415,10 +1481,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                                                    size_zero_node) < 0;
 
               /* Save info about DR in the hash table.  */
-              if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
-                LOOP_VINFO_PEELING_HTAB (loop_vinfo)
-                 = new hash_table<peel_info_hasher> (1);
-
               vectype = STMT_VINFO_VECTYPE (stmt_info);
               nelements = TYPE_VECTOR_SUBPARTS (vectype);
               mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
@@ -1441,7 +1503,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                  We do this automtically for cost model, since we calculate cost
                  for every peeling option.  */
               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
-                possible_npeel_number = vf /nelements;
+               {
+                 if (STMT_SLP_TYPE (stmt_info))
+                   possible_npeel_number
+                     = (vf * GROUP_SIZE (stmt_info)) / nelements;
+                 else
+                   possible_npeel_number = vf / nelements;
+               }
 
               /* Handle the aligned case. We may decide to align some other
                  access, making DR unaligned.  */
@@ -1454,8 +1522,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
               for (j = 0; j < possible_npeel_number; j++)
                 {
-                  gcc_assert (npeel_tmp <= vf);
-                  vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
+                  vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
+                                           dr, npeel_tmp);
                   npeel_tmp += nelements;
                 }
 
@@ -1526,17 +1594,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
   /* Check if we can possibly peel the loop.  */
   if (!vect_can_advance_ivs_p (loop_vinfo)
-      || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
-    do_peeling = false;
-
-  /* If we don't know how many times the peeling loop will run
-     assume it will run VF-1 times and disable peeling if the remaining
-     iters are less than the vectorization factor.  */
-  if (do_peeling
-      && all_misalignments_unknown
-      && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-      && (LOOP_VINFO_INT_NITERS (loop_vinfo)
-         < 2 * (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1))
+      || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
+      || loop->inner)
     do_peeling = false;
 
   if (do_peeling
@@ -1607,12 +1666,17 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
         }
 
       /* In case there are only loads with different unknown misalignments, use
-         peeling only if it may help to align other accesses in the loop.  */
+         peeling only if it may help to align other accesses in the loop or
+        if it may help improving load bandwith when we'd end up using
+        unaligned loads.  */
+      tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
       if (!first_store
          && !STMT_VINFO_SAME_ALIGN_REFS (
                  vinfo_for_stmt (DR_STMT (dr0))).length ()
-          && vect_supportable_dr_alignment (dr0, false)
-              != dr_unaligned_supported)
+         && (vect_supportable_dr_alignment (dr0, false)
+             != dr_unaligned_supported
+             || (builtin_vectorization_cost (vector_load, dr0_vt, 0)
+                 == builtin_vectorization_cost (unaligned_load, dr0_vt, -1))))
         do_peeling = false;
     }
 
@@ -1625,18 +1689,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
       gcc_assert (!all_misalignments_unknown);
 
       /* Choose the best peeling from the hash table.  */
-      dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
+      dr0 = vect_peeling_hash_choose_best_peeling (&peeling_htab,
+                                                  loop_vinfo, &npeel,
                                                   &body_cost_vec);
       if (!dr0 || !npeel)
         do_peeling = false;
-
-      /* If peeling by npeel will result in a remaining loop not iterating
-         enough to be vectorized then do not peel.  */
-      if (do_peeling
-         && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-         && (LOOP_VINFO_INT_NITERS (loop_vinfo)
-             < LOOP_VINFO_VECT_FACTOR (loop_vinfo) + npeel))
-       do_peeling = false;
     }
 
   if (do_peeling)
@@ -1691,9 +1748,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
              && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
            continue;
 
-         /* Strided loads perform only component accesses, alignment is
+         /* Strided accesses perform only component accesses, alignment is
             irrelevant for them.  */
-         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+         if (STMT_VINFO_STRIDED_P (stmt_info)
+             && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
            continue;
 
          save_misalignment = DR_MISALIGNMENT (dr);
@@ -1710,7 +1768,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
       if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
         {
-          stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+          stat = vect_verify_datarefs_alignment (loop_vinfo);
           if (!stat)
             do_peeling = false;
           else
@@ -1720,6 +1778,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
            }
         }
 
+      /* Cost model #1 - honor --param vect-max-peeling-for-alignment.  */
       if (do_peeling)
         {
           unsigned max_allowed_peel
@@ -1729,7 +1788,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
               unsigned max_peel = npeel;
               if (max_peel == 0)
                 {
-                  gimple dr_stmt = DR_STMT (dr0);
+                 gimple *dr_stmt = DR_STMT (dr0);
                   stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt);
                   tree vtype = STMT_VINFO_VECTYPE (vinfo);
                   max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1;
@@ -1744,11 +1803,20 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
             }
         }
 
+      /* Cost model #2 - if peeling may result in a remaining loop not
+        iterating enough to be vectorized then do not peel.  */
+      if (do_peeling
+         && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+       {
+         unsigned max_peel
+           = npeel == 0 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1 : npeel;
+         if (LOOP_VINFO_INT_NITERS (loop_vinfo)
+             < LOOP_VINFO_VECT_FACTOR (loop_vinfo) + max_peel)
+           do_peeling = false;
+       }
+
       if (do_peeling)
         {
-         stmt_info_for_cost *si;
-         void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
-
           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
              If the misalignment of DR_i is identical to that of dr0 then set
              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
@@ -1774,22 +1842,12 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
               dump_printf_loc (MSG_NOTE, vect_location,
                                "Peeling for alignment will be applied.\n");
             }
-         /* We've delayed passing the inside-loop peeling costs to the
-            target cost model until we were sure peeling would happen.
-            Do so now.  */
-         if (body_cost_vec.exists ())
-           {
-             FOR_EACH_VEC_ELT (body_cost_vec, i, si)
-               {
-                 struct _stmt_vec_info *stmt_info
-                   = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
-                 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
-                                       si->misalign, vect_body);
-               }
-             body_cost_vec.release ();
-           }
+         /* The inside-loop cost will be accounted for in vectorizable_load
+            and vectorizable_store correctly with adjusted alignments.
+            Drop the body_cst_vec on the floor here.  */
+         body_cost_vec.release ();
 
-         stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+         stat = vect_verify_datarefs_alignment (loop_vinfo);
          gcc_assert (stat);
           return stat;
         }
@@ -1824,16 +1882,21 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
            continue;
 
-         /* Strided loads perform only component accesses, alignment is
-            irrelevant for them.  */
-         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
-           continue;
+         if (STMT_VINFO_STRIDED_P (stmt_info))
+           {
+             /* Strided loads perform only component accesses, alignment is
+                irrelevant for them.  */
+             if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
+               continue;
+             do_versioning = false;
+             break;
+           }
 
          supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
 
           if (!supportable_dr_alignment)
             {
-              gimple stmt;
+             gimple *stmt;
               int mask;
               tree vectype;
 
@@ -1877,9 +1940,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
   if (do_versioning)
     {
-      vec<gimple> may_misalign_stmts
+      vec<gimple *> may_misalign_stmts
         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
-      gimple stmt;
+      gimple *stmt;
 
       /* It can now be assumed that the data references in the statements
          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
@@ -1901,7 +1964,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
       /* Peeling and versioning can't be done together at this time.  */
       gcc_assert (! (do_peeling && do_versioning));
 
-      stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+      stat = vect_verify_datarefs_alignment (loop_vinfo);
       gcc_assert (stat);
       return stat;
     }
@@ -1909,7 +1972,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   /* This point is reached if neither peeling nor versioning is being done.  */
   gcc_assert (! (do_peeling || do_versioning));
 
-  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+  stat = vect_verify_datarefs_alignment (loop_vinfo);
   return stat;
 }
 
@@ -1993,8 +2056,7 @@ vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
    Return FALSE if a data reference is found that cannot be vectorized.  */
 
 bool
-vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
-                                  bb_vec_info bb_vinfo)
+vect_analyze_data_refs_alignment (loop_vec_info vinfo)
 {
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -2002,55 +2064,150 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
 
   /* Mark groups of data references with same alignment using
      data dependence information.  */
-  if (loop_vinfo)
+  vec<ddr_p> ddrs = vinfo->ddrs;
+  struct data_dependence_relation *ddr;
+  unsigned int i;
+
+  FOR_EACH_VEC_ELT (ddrs, i, ddr)
+    vect_find_same_alignment_drs (ddr, vinfo);
+
+  vec<data_reference_p> datarefs = vinfo->datarefs;
+  struct data_reference *dr;
+
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
-      vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
-      struct data_dependence_relation *ddr;
-      unsigned int i;
+      stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
+      if (STMT_VINFO_VECTORIZABLE (stmt_info)
+         && !vect_compute_data_ref_alignment (dr))
+       {
+         /* Strided accesses perform only component accesses, misalignment
+            information is irrelevant for them.  */
+         if (STMT_VINFO_STRIDED_P (stmt_info)
+             && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
+           continue;
 
-      FOR_EACH_VEC_ELT (ddrs, i, ddr)
-       vect_find_same_alignment_drs (ddr, loop_vinfo);
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "not vectorized: can't calculate alignment "
+                            "for data ref.\n");
+
+         return false;
+       }
     }
 
-  if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
+  return true;
+}
+
+
+/* Analyze alignment of DRs of stmts in NODE.  */
+
+static bool
+vect_slp_analyze_and_verify_node_alignment (slp_tree node)
+{
+  /* We vectorize from the first scalar stmt in the node unless
+     the node is permuted in which case we start from the first
+     element in the group.  */
+  gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+  data_reference_p first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+  if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
+    first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
+
+  data_reference_p dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+  if (! vect_compute_data_ref_alignment (dr)
+      /* For creating the data-ref pointer we need alignment of the
+        first element anyway.  */
+      || (dr != first_dr
+         && ! vect_compute_data_ref_alignment (first_dr))
+      || ! verify_data_ref_alignment (dr))
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "not vectorized: can't calculate alignment "
-                        "for data ref.\n");
+                        "not vectorized: bad data alignment in basic "
+                        "block.\n");
       return false;
     }
 
   return true;
 }
 
+/* Function vect_slp_analyze_instance_alignment
+
+   Analyze the alignment of the data-references in the SLP instance.
+   Return FALSE if a data reference is found that cannot be vectorized.  */
+
+bool
+vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
+{
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                     "=== vect_slp_analyze_and_verify_instance_alignment ===\n");
+
+  slp_tree node;
+  unsigned i;
+  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
+    if (! vect_slp_analyze_and_verify_node_alignment (node))
+      return false;
+
+  node = SLP_INSTANCE_TREE (instance);
+  if (STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]))
+      && ! vect_slp_analyze_and_verify_node_alignment
+            (SLP_INSTANCE_TREE (instance)))
+    return false;
+
+  return true;
+}
+
 
 /* Analyze groups of accesses: check that DR belongs to a group of
    accesses of legal size, step, etc.  Detect gaps, single element
    interleaving, and other special cases. Set grouped access info.
-   Collect groups of strided stores for further use in SLP analysis.  */
+   Collect groups of strided stores for further use in SLP analysis.
+   Worker for vect_analyze_group_access.  */
 
 static bool
-vect_analyze_group_access (struct data_reference *dr)
+vect_analyze_group_access_1 (struct data_reference *dr)
 {
   tree step = DR_STEP (dr);
   tree scalar_type = TREE_TYPE (DR_REF (dr));
   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
-  gimple stmt = DR_STMT (dr);
+  gimple *stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+  HOST_WIDE_INT dr_step = -1;
   HOST_WIDE_INT groupsize, last_accessed_element = 1;
   bool slp_impossible = false;
-  struct loop *loop = NULL;
-
-  if (loop_vinfo)
-    loop = LOOP_VINFO_LOOP (loop_vinfo);
 
   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
      size of the interleaving group (including gaps).  */
-  groupsize = absu_hwi (dr_step) / type_size;
+  if (tree_fits_shwi_p (step))
+    {
+      dr_step = tree_to_shwi (step);
+      /* Check that STEP is a multiple of type size.  Otherwise there is
+         a non-element-sized gap at the end of the group which we
+        cannot represent in GROUP_GAP or GROUP_SIZE.
+        ???  As we can handle non-constant step fine here we should
+        simply remove uses of GROUP_GAP between the last and first
+        element and instead rely on DR_STEP.  GROUP_SIZE then would
+        simply not include that gap.  */
+      if ((dr_step % type_size) != 0)
+       {
+         if (dump_enabled_p ())
+           {
+             dump_printf_loc (MSG_NOTE, vect_location,
+                              "Step ");
+             dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
+             dump_printf (MSG_NOTE,
+                          " is not a multiple of the element size for ");
+             dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
+             dump_printf (MSG_NOTE, "\n");
+           }
+         return false;
+       }
+      groupsize = absu_hwi (dr_step) / type_size;
+    }
+  else
+    groupsize = 0;
 
   /* Not consecutive access is possible only if it is a part of interleaving.  */
   if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
@@ -2077,24 +2234,6 @@ vect_analyze_group_access (struct data_reference *dr)
              dump_printf (MSG_NOTE, "\n");
            }
 
-         if (loop_vinfo)
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_NOTE, vect_location,
-                                "Data access with gaps requires scalar "
-                                "epilogue loop\n");
-              if (loop->inner)
-                {
-                  if (dump_enabled_p ())
-                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                     "Peeling for outer loop is not"
-                                     " supported\n");
-                  return false;
-                }
-
-              LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
-           }
-
          return true;
        }
 
@@ -2103,7 +2242,6 @@ vect_analyze_group_access (struct data_reference *dr)
          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                           "not consecutive access ");
          dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
-         dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
         }
 
       if (bb_vinfo)
@@ -2113,19 +2251,20 @@ vect_analyze_group_access (struct data_reference *dr)
           return true;
         }
 
-      return false;
+      dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
+      STMT_VINFO_STRIDED_P (stmt_info) = true;
+      return true;
     }
 
   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
     {
       /* First stmt in the interleaving chain. Check the chain.  */
-      gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+      gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
       struct data_reference *data_ref = dr;
       unsigned int count = 1;
       tree prev_init = DR_INIT (data_ref);
-      gimple prev = stmt;
+      gimple *prev = stmt;
       HOST_WIDE_INT diff, gaps = 0;
-      unsigned HOST_WIDE_INT count_in_bytes;
 
       while (next)
         {
@@ -2145,6 +2284,10 @@ vect_analyze_group_access (struct data_reference *dr)
                   return false;
                 }
 
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "Two or more load stmts share the same dr.\n");
+
               /* For load use the same data-ref load.  */
               GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
 
@@ -2190,72 +2333,51 @@ vect_analyze_group_access (struct data_reference *dr)
           count++;
         }
 
-      /* COUNT is the number of accesses found, we multiply it by the size of
-         the type to get COUNT_IN_BYTES.  */
-      count_in_bytes = type_size * count;
+      if (groupsize == 0)
+        groupsize = count + gaps;
 
-      /* Check that the size of the interleaving (including gaps) is not
-         greater than STEP.  */
-      if (dr_step != 0
-         && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
-        {
-          if (dump_enabled_p ())
-            {
-              dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                               "interleaving size is greater than step for ");
-              dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                                 DR_REF (dr));
-              dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-            }
-          return false;
-        }
+      if (groupsize > UINT_MAX)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "group is too large\n");
+         return false;
+       }
 
-      /* Check that the size of the interleaving is equal to STEP for stores,
+      /* Check that the size of the interleaving is equal to count for stores,
          i.e., that there are no gaps.  */
-      if (dr_step != 0
-         && absu_hwi (dr_step) != count_in_bytes)
+      if (groupsize != count
+         && !DR_IS_READ (dr))
         {
-          if (DR_IS_READ (dr))
-            {
-              slp_impossible = true;
-              /* There is a gap after the last load in the group. This gap is a
-                 difference between the groupsize and the number of elements.
-                When there is no gap, this difference should be 0.  */
-              GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
-            }
-          else
-            {
-              if (dump_enabled_p ())
-                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "interleaved store with gaps\n");
-              return false;
-            }
-        }
-
-      /* Check that STEP is a multiple of type size.  */
-      if (dr_step != 0
-         && (dr_step % type_size) != 0)
-        {
-          if (dump_enabled_p ())
-            {
-              dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                               "step is not a multiple of type size: step ");
-              dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
-              dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
-              dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                                 TYPE_SIZE_UNIT (scalar_type));
-              dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-            }
-          return false;
-        }
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "interleaved store with gaps\n");
+         return false;
+       }
 
-      if (groupsize == 0)
-        groupsize = count;
+      /* If there is a gap after the last load in the group it is the
+        difference between the groupsize and the last accessed
+        element.
+        When there is no gap, this difference should be 0.  */
+      GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - last_accessed_element;
 
       GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
       if (dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location,
-                         "Detected interleaving of size %d\n", (int)groupsize);
+       {
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "Detected interleaving ");
+         if (DR_IS_READ (dr))
+           dump_printf (MSG_NOTE, "load ");
+         else
+           dump_printf (MSG_NOTE, "store ");
+         dump_printf (MSG_NOTE, "of size %u starting with ",
+                      (unsigned)groupsize);
+         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+         if (GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "There is a gap of %u elements after the group\n",
+                            GROUP_GAP (vinfo_for_stmt (stmt)));
+       }
 
       /* SLP: create an SLP data structure for every interleaving group of
         stores for further analysis in vect_analyse_slp.  */
@@ -2266,30 +2388,37 @@ vect_analyze_group_access (struct data_reference *dr)
           if (bb_vinfo)
             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
         }
+    }
 
-      /* There is a gap in the end of the group.  */
-      if (groupsize - last_accessed_element > 0 && loop_vinfo)
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "Data access with gaps requires scalar "
-                            "epilogue loop\n");
-          if (loop->inner)
-            {
-              if (dump_enabled_p ())
-                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "Peeling for outer loop is not supported\n");
-              return false;
-            }
+  return true;
+}
+
+/* Analyze groups of accesses: check that DR belongs to a group of
+   accesses of legal size, step, etc.  Detect gaps, single element
+   interleaving, and other special cases. Set grouped access info.
+   Collect groups of strided stores for further use in SLP analysis.  */
 
-          LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+static bool
+vect_analyze_group_access (struct data_reference *dr)
+{
+  if (!vect_analyze_group_access_1 (dr))
+    {
+      /* Dissolve the group if present.  */
+      gimple *next;
+      gimple *stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dr)));
+      while (stmt)
+       {
+         stmt_vec_info vinfo = vinfo_for_stmt (stmt);
+         next = GROUP_NEXT_ELEMENT (vinfo);
+         GROUP_FIRST_ELEMENT (vinfo) = NULL;
+         GROUP_NEXT_ELEMENT (vinfo) = NULL;
+         stmt = next;
        }
+      return false;
     }
-
   return true;
 }
 
-
 /* Analyze the access pattern of the data-reference DR.
    In case of non-consecutive accesses call vect_analyze_group_access() to
    analyze groups of accesses.  */
@@ -2299,7 +2428,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
 {
   tree step = DR_STEP (dr);
   tree scalar_type = TREE_TYPE (DR_REF (dr));
-  gimple stmt = DR_STMT (dr);
+  gimple *stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop *loop = NULL;
@@ -2315,18 +2444,22 @@ vect_analyze_data_ref_access (struct data_reference *dr)
       return false;
     }
 
-  /* Allow invariant loads in not nested loops.  */
+  /* Allow loads with zero step in inner-loop vectorization.  */
   if (loop_vinfo && integer_zerop (step))
     {
       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
-      if (nested_in_vect_loop_p (loop, stmt))
+      if (!nested_in_vect_loop_p (loop, stmt))
+       return DR_IS_READ (dr);
+      /* Allow references with zero step for outer loops marked
+        with pragma omp simd only - it guarantees absence of
+        loop-carried dependencies between inner loop iterations.  */
+      if (!loop->force_vectorize)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_NOTE, vect_location,
                             "zero step in inner loop of nest\n");
          return false;
        }
-      return DR_IS_READ (dr);
     }
 
   if (loop && nested_in_vect_loop_p (loop, stmt))
@@ -2342,10 +2475,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
          if (dump_enabled_p ())
            dump_printf_loc (MSG_NOTE, vect_location,
                             "zero step in outer loop.\n");
-         if (DR_IS_READ (dr))
-           return true;
-         else
-           return false;
+         return DR_IS_READ (dr);
        }
     }
 
@@ -2371,9 +2501,12 @@ vect_analyze_data_ref_access (struct data_reference *dr)
       return false;
     }
 
+
   /* Assume this is a DR handled by non-constant strided load case.  */
   if (TREE_CODE (step) != INTEGER_CST)
-    return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
+    return (STMT_VINFO_STRIDED_P (stmt_info)
+           && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
+               || vect_analyze_group_access (dr)));
 
   /* Not consecutive access - check if it's a part of interleaving group.  */
   return vect_analyze_group_access (dr);
@@ -2399,6 +2532,8 @@ compare_tree (tree t1, tree t2)
   if (t2 == NULL)
     return 1;
 
+  STRIP_NOPS (t1);
+  STRIP_NOPS (t2);
 
   if (TREE_CODE (t1) != TREE_CODE (t2))
     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
@@ -2468,6 +2603,12 @@ dr_group_sort_cmp (const void *dra_, const void *drb_)
   if (dra == drb)
     return 0;
 
+  /* DRs in different loops never belong to the same group.  */
+  loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
+  loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
+  if (loopa != loopb)
+    return loopa->num < loopb->num ? -1 : 1;
+
   /* Ordering of DRs according to base.  */
   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
     {
@@ -2523,21 +2664,16 @@ dr_group_sort_cmp (const void *dra_, const void *drb_)
    FORNOW: handle only arrays and pointer accesses.  */
 
 bool
-vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+vect_analyze_data_ref_accesses (vec_info *vinfo)
 {
   unsigned int i;
-  vec<data_reference_p> datarefs;
+  vec<data_reference_p> datarefs = vinfo->datarefs;
   struct data_reference *dr;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                      "=== vect_analyze_data_ref_accesses ===\n");
 
-  if (loop_vinfo)
-    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
-  else
-    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-
   if (datarefs.is_empty ())
     return true;
 
@@ -2564,6 +2700,12 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
             matters we can push those to a worklist and re-iterate
             over them.  The we can just skip ahead to the next DR here.  */
 
+         /* DRs in a different loop should not be put into the same
+            interleaving group.  */
+         if (gimple_bb (DR_STMT (dra))->loop_father
+             != gimple_bb (DR_STMT (drb))->loop_father)
+           break;
+
          /* Check that the data-refs have same first location (except init)
             and they are both either store or load (not load and store,
             not masked loads or stores).  */
@@ -2575,15 +2717,16 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
              || !gimple_assign_single_p (DR_STMT (drb)))
            break;
 
-         /* Check that the data-refs have the same constant size and step.  */
+         /* Check that the data-refs have the same constant size.  */
          tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
          tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
          if (!tree_fits_uhwi_p (sza)
              || !tree_fits_uhwi_p (szb)
-             || !tree_int_cst_equal (sza, szb)
-             || !tree_fits_shwi_p (DR_STEP (dra))
-             || !tree_fits_shwi_p (DR_STEP (drb))
-             || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
+             || !tree_int_cst_equal (sza, szb))
+           break;
+
+         /* Check that the data-refs have the same step.  */
+         if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
            break;
 
          /* Do not place the same access in the interleaving chain twice.  */
@@ -2604,19 +2747,37 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
          /* If init_b == init_a + the size of the type * k, we have an
             interleaving, and DRA is accessed before DRB.  */
          HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
-         if ((init_b - init_a) % type_size_a != 0)
+         if (type_size_a == 0
+             || (init_b - init_a) % type_size_a != 0)
            break;
 
-         /* The step (if not zero) is greater than the difference between
-            data-refs' inits.  This splits groups into suitable sizes.  */
-         HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
-         if (step != 0 && step <= (init_b - init_a))
+         /* If we have a store, the accesses are adjacent.  This splits
+            groups into chunks we support (we don't support vectorization
+            of stores with gaps).  */
+         if (!DR_IS_READ (dra)
+             && (init_b - (HOST_WIDE_INT) TREE_INT_CST_LOW
+                                            (DR_INIT (datarefs_copy[i-1]))
+                 != type_size_a))
            break;
 
+         /* If the step (if not zero or non-constant) is greater than the
+            difference between data-refs' inits this splits groups into
+            suitable sizes.  */
+         if (tree_fits_shwi_p (DR_STEP (dra)))
+           {
+             HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
+             if (step != 0 && step <= (init_b - init_a))
+               break;
+           }
+
          if (dump_enabled_p ())
            {
              dump_printf_loc (MSG_NOTE, vect_location,
                               "Detected interleaving ");
+             if (DR_IS_READ (dra))
+               dump_printf (MSG_NOTE, "load ");
+             else
+               dump_printf (MSG_NOTE, "store ");
              dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
              dump_printf (MSG_NOTE,  " and ");
              dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
@@ -2643,7 +2804,7 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                           "not vectorized: complicated access pattern.\n");
 
-        if (bb_vinfo)
+        if (is_a <bb_vec_info> (vinfo))
           {
             /* Mark the statement as not vectorizable.  */
             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
@@ -2816,9 +2977,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
     {
       struct data_reference *dr_a, *dr_b;
-      gimple dr_group_first_a, dr_group_first_b;
+      gimple *dr_group_first_a, *dr_group_first_b;
       tree segment_length_a, segment_length_b;
-      gimple stmt_a, stmt_b;
+      gimple *stmt_a, *stmt_b;
 
       dr_a = DDR_A (ddr);
       stmt_a = DR_STMT (DDR_A (ddr));
@@ -2974,12 +3135,12 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
   return true;
 }
 
-/* Check whether a non-affine read in stmt is suitable for gather load
-   and if so, return a builtin decl for that operation.  */
+/* Check whether a non-affine read or write in stmt is suitable for gather load
+   or scatter store and if so, return a builtin decl for that operation.  */
 
 tree
-vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
-                  tree *offp, int *scalep)
+vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
+                          tree *offp, int *scalep)
 {
   HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -2988,7 +3149,7 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
   tree offtype = NULL_TREE;
   tree decl, base, off;
   machine_mode pmode;
-  int punsignedp, pvolatilep;
+  int punsignedp, reversep, pvolatilep = 0;
 
   base = DR_REF (dr);
   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
@@ -3002,13 +3163,13 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
       && integer_zerop (TREE_OPERAND (base, 1))
       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
     {
-      gimple def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
+      gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
       if (is_gimple_assign (def_stmt)
          && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
        base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
     }
 
-  /* The gather builtins need address of the form
+  /* The gather and scatter builtins need address of the form
      loop_invariant + vector * {1, 2, 4, 8}
      or
      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
@@ -3020,9 +3181,9 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
      vectorized.  The following code attempts to find such a preexistng
      SSA_NAME OFF and put the loop invariants into a tree BASE
      that can be gimplified before the loop.  */
-  base = get_inner_reference (base, &pbitsize, &pbitpos, &off,
-                             &pmode, &punsignedp, &pvolatilep, false);
-  gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
+  base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
+                             &punsignedp, &reversep, &pvolatilep, false);
+  gcc_assert (base && (pbitpos % BITS_PER_UNIT) == 0 && !reversep);
 
   if (TREE_CODE (base) == MEM_REF)
     {
@@ -3076,7 +3237,7 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
 
       if (TREE_CODE (off) == SSA_NAME)
        {
-         gimple def_stmt = SSA_NAME_DEF_STMT (off);
+         gimple *def_stmt = SSA_NAME_DEF_STMT (off);
 
          if (expr_invariant_in_loop_p (loop, off))
            return NULL_TREE;
@@ -3171,8 +3332,13 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
   if (offtype == NULL_TREE)
     offtype = TREE_TYPE (off);
 
-  decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
-                                          offtype, scale);
+  if (DR_IS_READ (dr))
+    decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
+                                            offtype, scale);
+  else
+    decl = targetm.vectorize.builtin_scatter (STMT_VINFO_VECTYPE (stmt_info),
+                                             offtype, scale);
+
   if (decl == NULL_TREE)
     return NULL_TREE;
 
@@ -3201,127 +3367,30 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
 */
 
 bool
-vect_analyze_data_refs (loop_vec_info loop_vinfo,
-                       bb_vec_info bb_vinfo,
-                       int *min_vf, unsigned *n_stmts)
+vect_analyze_data_refs (vec_info *vinfo, int *min_vf)
 {
   struct loop *loop = NULL;
-  basic_block bb = NULL;
   unsigned int i;
-  vec<data_reference_p> datarefs;
   struct data_reference *dr;
   tree scalar_type;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
-                     "=== vect_analyze_data_refs ===\n");
-
-  if (loop_vinfo)
-    {
-      basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
-
-      loop = LOOP_VINFO_LOOP (loop_vinfo);
-      datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
-      if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "not vectorized: loop contains function calls"
-                            " or data references that cannot be analyzed\n");
-         return false;
-       }
-
-      for (i = 0; i < loop->num_nodes; i++)
-       {
-         gimple_stmt_iterator gsi;
-
-         for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
-           {
-             gimple stmt = gsi_stmt (gsi);
-             if (is_gimple_debug (stmt))
-               continue;
-             ++*n_stmts;
-             if (!find_data_references_in_stmt (loop, stmt, &datarefs))
-               {
-                 if (is_gimple_call (stmt) && loop->safelen)
-                   {
-                     tree fndecl = gimple_call_fndecl (stmt), op;
-                     if (fndecl != NULL_TREE)
-                       {
-                         struct cgraph_node *node = cgraph_node::get (fndecl);
-                         if (node != NULL && node->simd_clones != NULL)
-                           {
-                             unsigned int j, n = gimple_call_num_args (stmt);
-                             for (j = 0; j < n; j++)
-                               {
-                                 op = gimple_call_arg (stmt, j);
-                                 if (DECL_P (op)
-                                     || (REFERENCE_CLASS_P (op)
-                                         && get_base_address (op)))
-                                   break;
-                               }
-                             op = gimple_call_lhs (stmt);
-                             /* Ignore #pragma omp declare simd functions
-                                if they don't have data references in the
-                                call stmt itself.  */
-                             if (j == n
-                                 && !(op
-                                      && (DECL_P (op)
-                                          || (REFERENCE_CLASS_P (op)
-                                              && get_base_address (op)))))
-                               continue;
-                           }
-                       }
-                   }
-                 LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
-                 if (dump_enabled_p ())
-                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                    "not vectorized: loop contains function "
-                                    "calls or data references that cannot "
-                                    "be analyzed\n");
-                 return false;
-               }
-           }
-       }
-
-      LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
-    }
-  else
-    {
-      gimple_stmt_iterator gsi;
+                    "=== vect_analyze_data_refs ===\n");
 
-      bb = BB_VINFO_BB (bb_vinfo);
-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-       {
-         gimple stmt = gsi_stmt (gsi);
-         if (is_gimple_debug (stmt))
-           continue;
-         ++*n_stmts;
-         if (!find_data_references_in_stmt (NULL, stmt,
-                                            &BB_VINFO_DATAREFS (bb_vinfo)))
-           {
-             /* Mark the rest of the basic-block as unvectorizable.  */
-             for (; !gsi_end_p (gsi); gsi_next (&gsi))
-               {
-                 stmt = gsi_stmt (gsi);
-                 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
-               }
-             break;
-           }
-       }
-
-      datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-    }
+  if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+    loop = LOOP_VINFO_LOOP (loop_vinfo);
 
   /* Go through the data-refs, check that the analysis succeeded.  Update
      pointer from stmt_vec_info struct to DR and vectype.  */
 
+  vec<data_reference_p> datarefs = vinfo->datarefs;
   FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
-      gimple stmt;
+      gimple *stmt;
       stmt_vec_info stmt_info;
       tree base, offset, init;
-      bool gather = false;
+      enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
       bool simd_lane_access = false;
       int vf;
 
@@ -3360,18 +3429,22 @@ again:
            = DR_IS_READ (dr)
              && !TREE_THIS_VOLATILE (DR_REF (dr))
              && targetm.vectorize.builtin_gather != NULL;
+         bool maybe_scatter
+           = DR_IS_WRITE (dr)
+             && !TREE_THIS_VOLATILE (DR_REF (dr))
+             && targetm.vectorize.builtin_scatter != NULL;
          bool maybe_simd_lane_access
-           = loop_vinfo && loop->simduid;
+           = is_a <loop_vec_info> (vinfo) && loop->simduid;
 
-         /* If target supports vector gather loads, or if this might be
-            a SIMD lane access, see if they can't be used.  */
-         if (loop_vinfo
-             && (maybe_gather || maybe_simd_lane_access)
+         /* If target supports vector gather loads or scatter stores, or if
+            this might be a SIMD lane access, see if they can't be used.  */
+         if (is_a <loop_vec_info> (vinfo)
+             && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
              && !nested_in_vect_loop_p (loop, stmt))
            {
              struct data_reference *newdr
                = create_data_ref (NULL, loop_containing_stmt (stmt),
-                                  DR_REF (dr), stmt, true);
+                                  DR_REF (dr), stmt, maybe_scatter ? false : true);
              gcc_assert (newdr != NULL && DR_REF (newdr));
              if (DR_BASE_ADDRESS (newdr)
                  && DR_OFFSET (newdr)
@@ -3397,7 +3470,7 @@ again:
                            off = TREE_OPERAND (off, 0);
                          if (TREE_CODE (off) == SSA_NAME)
                            {
-                             gimple def = SSA_NAME_DEF_STMT (off);
+                             gimple *def = SSA_NAME_DEF_STMT (off);
                              tree reft = TREE_TYPE (DR_REF (newdr));
                              if (is_gimple_call (def)
                                  && gimple_call_internal_p (def)
@@ -3424,17 +3497,20 @@ again:
                            }
                        }
                    }
-                 if (!simd_lane_access && maybe_gather)
+                 if (!simd_lane_access && (maybe_gather || maybe_scatter))
                    {
                      dr = newdr;
-                     gather = true;
+                     if (maybe_gather)
+                       gatherscatter = GATHER;
+                     else
+                       gatherscatter = SCATTER;
                    }
                }
-             if (!gather && !simd_lane_access)
+             if (gatherscatter == SG_NONE && !simd_lane_access)
                free_data_ref (newdr);
            }
 
-         if (!gather && !simd_lane_access)
+         if (gatherscatter == SG_NONE && !simd_lane_access)
            {
              if (dump_enabled_p ())
                {
@@ -3445,7 +3521,7 @@ again:
                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
                }
 
-             if (bb_vinfo)
+             if (is_a <bb_vec_info> (vinfo))
                break;
 
              return false;
@@ -3459,10 +3535,10 @@ again:
                              "not vectorized: base addr of dr is a "
                              "constant\n");
 
-          if (bb_vinfo)
+          if (is_a <bb_vec_info> (vinfo))
            break;
 
-         if (gather || simd_lane_access)
+         if (gatherscatter != SG_NONE || simd_lane_access)
            free_data_ref (dr);
          return false;
         }
@@ -3477,7 +3553,7 @@ again:
               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
             }
 
-          if (bb_vinfo)
+          if (is_a <bb_vec_info> (vinfo))
            break;
 
           return false;
@@ -3494,10 +3570,10 @@ again:
               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
             }
 
-          if (bb_vinfo)
+          if (is_a <bb_vec_info> (vinfo))
            break;
 
-         if (gather || simd_lane_access)
+         if (gatherscatter != SG_NONE || simd_lane_access)
            free_data_ref (dr);
           return false;
         }
@@ -3514,10 +3590,10 @@ again:
               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
             }
 
-          if (bb_vinfo)
+          if (is_a <bb_vec_info> (vinfo))
            break;
 
-         if (gather || simd_lane_access)
+         if (gatherscatter != SG_NONE || simd_lane_access)
            free_data_ref (dr);
           return false;
        }
@@ -3539,10 +3615,10 @@ again:
              dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
            }
 
-         if (bb_vinfo)
+         if (is_a <bb_vec_info> (vinfo))
            break;
 
-         if (gather || simd_lane_access)
+         if (gatherscatter != SG_NONE || simd_lane_access)
            free_data_ref (dr);
          return false;
        }
@@ -3561,7 +3637,7 @@ again:
          HOST_WIDE_INT pbitsize, pbitpos;
          tree poffset;
          machine_mode pmode;
-         int punsignedp, pvolatilep;
+         int punsignedp, preversep, pvolatilep;
          affine_iv base_iv, offset_iv;
          tree dinit;
 
@@ -3580,7 +3656,8 @@ again:
            }
 
          outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
-                         &poffset, &pmode, &punsignedp, &pvolatilep, false);
+                                           &poffset, &pmode, &punsignedp,
+                                           &preversep, &pvolatilep, false);
          gcc_assert (outer_base != NULL_TREE);
 
          if (pbitpos % BITS_PER_UNIT != 0)
@@ -3591,6 +3668,14 @@ again:
              return false;
            }
 
+         if (preversep)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "failed: reverse storage order.\n");
+             return false;
+           }
+
          outer_base = build_fold_addr_expr (outer_base);
          if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
                           &base_iv, false))
@@ -3677,10 +3762,10 @@ again:
               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
             }
 
-          if (bb_vinfo)
+          if (is_a <bb_vec_info> (vinfo))
            break;
 
-         if (gather || simd_lane_access)
+         if (gatherscatter != SG_NONE || simd_lane_access)
            free_data_ref (dr);
           return false;
         }
@@ -3710,13 +3795,18 @@ again:
               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
             }
 
-          if (bb_vinfo)
-           break;
+          if (is_a <bb_vec_info> (vinfo))
+           {
+             /* No vector type is fine, the ref can still participate
+                in dependence analysis, we just can't vectorize it.  */
+             STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+             continue;
+           }
 
-         if (gather || simd_lane_access)
+         if (gatherscatter != SG_NONE || simd_lane_access)
            {
              STMT_VINFO_DATA_REF (stmt_info) = NULL;
-             if (gather)
+             if (gatherscatter != SG_NONE)
                free_data_ref (dr);
            }
          return false;
@@ -3740,37 +3830,38 @@ again:
       if (vf > *min_vf)
        *min_vf = vf;
 
-      if (gather)
+      if (gatherscatter != SG_NONE)
        {
          tree off;
-
-         gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
-         if (gather
-             && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
-           gather = false;
-         if (!gather)
+         if (!vect_check_gather_scatter (stmt, as_a <loop_vec_info> (vinfo),
+                                         NULL, &off, NULL)
+             || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
            {
              STMT_VINFO_DATA_REF (stmt_info) = NULL;
              free_data_ref (dr);
              if (dump_enabled_p ())
                {
-                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
-                                   "not vectorized: not suitable for gather "
-                                   "load ");
+                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                  (gatherscatter == GATHER) ?
+                                  "not vectorized: not suitable for gather "
+                                  "load " :
+                                  "not vectorized: not suitable for scatter "
+                                  "store ");
                  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
                }
              return false;
            }
 
+         free_data_ref (datarefs[i]);
          datarefs[i] = dr;
-         STMT_VINFO_GATHER_P (stmt_info) = true;
+         STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
        }
-      else if (loop_vinfo
+
+      else if (is_a <loop_vec_info> (vinfo)
               && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
        {
-         if (nested_in_vect_loop_p (loop, stmt)
-             || !DR_IS_READ (dr))
+         if (nested_in_vect_loop_p (loop, stmt))
            {
              if (dump_enabled_p ())
                {
@@ -3782,7 +3873,7 @@ again:
                }
              return false;
            }
-         STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
+         STMT_VINFO_STRIDED_P (stmt_info) = true;
        }
     }
 
@@ -3792,7 +3883,7 @@ again:
      avoids spending useless time in analyzing their dependence.  */
   if (i != datarefs.length ())
     {
-      gcc_assert (bb_vinfo != NULL);
+      gcc_assert (is_a <bb_vec_info> (vinfo));
       for (unsigned j = i; j < datarefs.length (); ++j)
        {
          data_reference_p dr = datarefs[j];
@@ -3827,6 +3918,9 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
   case vect_scalar_var:
     prefix = "stmp";
     break;
+  case vect_mask_var:
+    prefix = "mask";
+    break;
   case vect_pointer_var:
     prefix = "vectp";
     break;
@@ -3846,6 +3940,55 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
   return new_vect_var;
 }
 
+/* Like vect_get_new_vect_var but return an SSA name.  */
+
+tree
+vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
+{
+  const char *prefix;
+  tree new_vect_var;
+
+  switch (var_kind)
+  {
+  case vect_simple_var:
+    prefix = "vect";
+    break;
+  case vect_scalar_var:
+    prefix = "stmp";
+    break;
+  case vect_pointer_var:
+    prefix = "vectp";
+    break;
+  default:
+    gcc_unreachable ();
+  }
+
+  if (name)
+    {
+      char* tmp = concat (prefix, "_", name, NULL);
+      new_vect_var = make_temp_ssa_name (type, NULL, tmp);
+      free (tmp);
+    }
+  else
+    new_vect_var = make_temp_ssa_name (type, NULL, prefix);
+
+  return new_vect_var;
+}
+
+/* Duplicate ptr info and set alignment/misaligment on NAME from DR.  */
+
+static void
+vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr,
+                                 stmt_vec_info stmt_info)
+{
+  duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr));
+  unsigned int align = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmt_info));
+  int misalign = DR_MISALIGNMENT (dr);
+  if (misalign == -1)
+    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
+  else
+    set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), align, misalign);
+}
 
 /* Function vect_create_addr_base_for_vector_ref.
 
@@ -3882,7 +4025,7 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
    FORNOW: We are only handling array accesses with step 1.  */
 
 tree
-vect_create_addr_base_for_vector_ref (gimple stmt,
+vect_create_addr_base_for_vector_ref (gimple *stmt,
                                      gimple_seq *new_stmt_list,
                                      tree offset,
                                      struct loop *loop,
@@ -3957,21 +4100,17 @@ vect_create_addr_base_for_vector_ref (gimple stmt,
     }
 
   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
-  addr_base = fold_convert (vect_ptr_type, addr_base);
   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
-  addr_base = force_gimple_operand (addr_base, &seq, false, dest);
+  addr_base = force_gimple_operand (addr_base, &seq, true, dest);
   gimple_seq_add_seq (new_stmt_list, seq);
 
   if (DR_PTR_INFO (dr)
-      && TREE_CODE (addr_base) == SSA_NAME)
+      && TREE_CODE (addr_base) == SSA_NAME
+      && !SSA_NAME_PTR_INFO (addr_base))
     {
-      duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr));
-      unsigned int align = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmt_info));
-      int misalign = DR_MISALIGNMENT (dr);
-      if (offset || byte_offset || (misalign == -1))
+      vect_duplicate_ssa_name_ptr_info (addr_base, dr, stmt_info);
+      if (offset || byte_offset)
        mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
-      else
-       set_ptr_info_alignment (SSA_NAME_PTR_INFO (addr_base), align, misalign);
     }
 
   if (dump_enabled_p ())
@@ -4039,9 +4178,9 @@ vect_create_addr_base_for_vector_ref (gimple stmt,
    4. Return the pointer.  */
 
 tree
-vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
+vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
                          tree offset, tree *initial_address,
-                         gimple_stmt_iterator *gsi, gimple *ptr_incr,
+                         gimple_stmt_iterator *gsi, gimple **ptr_incr,
                          bool only_init, bool *inv_p, tree byte_offset)
 {
   const char *base_name;
@@ -4053,7 +4192,6 @@ vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
   tree aggr_ptr_type;
   tree aggr_ptr;
   tree new_temp;
-  gimple vec_stmt;
   gimple_seq new_stmt_list = NULL;
   edge pe = NULL;
   basic_block new_bb;
@@ -4063,7 +4201,7 @@ vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
   gimple_stmt_iterator incr_gsi;
   bool insert_after;
   tree indx_before_incr, indx_after_incr;
-  gimple incr;
+  gimple *incr;
   tree step;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
 
@@ -4131,7 +4269,7 @@ vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
   /* Likewise for any of the data references in the stmt group.  */
   else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
     {
-      gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
+      gimple *orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
       do
        {
          stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
@@ -4201,28 +4339,7 @@ vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
     }
 
   *initial_address = new_temp;
-
-  /* Create: p = (aggr_type *) initial_base  */
-  if (TREE_CODE (new_temp) != SSA_NAME
-      || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
-    {
-      vec_stmt = gimple_build_assign (aggr_ptr,
-                                     fold_convert (aggr_ptr_type, new_temp));
-      aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
-      /* Copy the points-to information if it exists. */
-      if (DR_PTR_INFO (dr))
-       duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
-      gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
-      if (pe)
-       {
-         new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
-         gcc_assert (!new_bb);
-       }
-      else
-       gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
-    }
-  else
-    aggr_ptr_init = new_temp;
+  aggr_ptr_init = new_temp;
 
   /* (3) Handle the updating of the aggregate-pointer inside the loop.
      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
@@ -4249,13 +4366,13 @@ vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
                 aggr_ptr, loop, &incr_gsi, insert_after,
                 &indx_before_incr, &indx_after_incr);
       incr = gsi_stmt (incr_gsi);
-      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
 
       /* Copy the points-to information if it exists. */
       if (DR_PTR_INFO (dr))
        {
-         duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
-         duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
+         vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info);
+         vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info);
        }
       if (ptr_incr)
        *ptr_incr = incr;
@@ -4279,13 +4396,13 @@ vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
                 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
                 &indx_after_incr);
       incr = gsi_stmt (incr_gsi);
-      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
 
       /* Copy the points-to information if it exists. */
       if (DR_PTR_INFO (dr))
        {
-         duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
-         duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
+         vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info);
+         vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info);
        }
       if (ptr_incr)
        *ptr_incr = incr;
@@ -4332,14 +4449,14 @@ vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
 */
 
 tree
-bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
-                gimple stmt, tree bump)
+bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
+                gimple *stmt, tree bump)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree update = TYPE_SIZE_UNIT (vectype);
-  gimple incr_stmt;
+  gassign *incr_stmt;
   ssa_op_iter iter;
   use_operand_p use_p;
   tree new_dataref_ptr;
@@ -4347,9 +4464,12 @@ bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
   if (bump)
     update = bump;
 
-  new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
-  incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
-                                           dataref_ptr, update);
+  if (TREE_CODE (dataref_ptr) == SSA_NAME)
+    new_dataref_ptr = copy_ssa_name (dataref_ptr);
+  else
+    new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
+  incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
+                                  dataref_ptr, update);
   vect_finish_stmt_generation (stmt, incr_stmt, gsi);
 
   /* Copy the points-to information if it exists. */
@@ -4390,16 +4510,20 @@ vect_create_destination_var (tree scalar_dest, tree vectype)
   tree type;
   enum vect_var_kind kind;
 
-  kind = vectype ? vect_simple_var : vect_scalar_var;
+  kind = vectype
+    ? VECTOR_BOOLEAN_TYPE_P (vectype)
+    ? vect_mask_var
+    : vect_simple_var
+    : vect_scalar_var;
   type = vectype ? vectype : TREE_TYPE (scalar_dest);
 
   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
 
   name = get_name (scalar_dest);
   if (name)
-    asprintf (&new_name, "%s_%u", name, SSA_NAME_VERSION (scalar_dest));
+    new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
   else
-    asprintf (&new_name, "_%u", SSA_NAME_VERSION (scalar_dest));
+    new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
   vec_dest = vect_get_new_vect_var (type, kind, new_name);
   free (new_name);
 
@@ -4582,12 +4706,12 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
 void
 vect_permute_store_chain (vec<tree> dr_chain,
                          unsigned int length,
-                         gimple stmt,
+                         gimple *stmt,
                          gimple_stmt_iterator *gsi,
                          vec<tree> *result_chain)
 {
   tree vect1, vect2, high, low;
-  gimple perm_stmt;
+  gimple *perm_stmt;
   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
   tree perm_mask_low, perm_mask_high;
   tree data_ref;
@@ -4640,9 +4764,8 @@ vect_permute_store_chain (vec<tree> dr_chain,
                                  {j, nelt, *, j + 1, nelt + j + 1, *,
                                   j + 2, nelt + j + 2, *, ...}>  */
          data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
-         perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                   vect1, vect2,
-                                                   perm3_mask_low);
+         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
+                                          vect2, perm3_mask_low);
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
 
          vect1 = data_ref;
@@ -4652,9 +4775,8 @@ vect_permute_store_chain (vec<tree> dr_chain,
                                  {0, 1, nelt + j, 3, 4, nelt + j + 1,
                                   6, 7, nelt + j + 2, ...}>  */
          data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
-         perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                   vect1, vect2,
-                                                   perm3_mask_high);
+         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
+                                          vect2, perm3_mask_high);
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
          (*result_chain)[j] = data_ref;
        }
@@ -4686,9 +4808,8 @@ vect_permute_store_chain (vec<tree> dr_chain,
                   high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
                                                        ...}>  */
                high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
-               perm_stmt
-                 = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
-                                                 vect1, vect2, perm_mask_high);
+               perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
+                                                vect2, perm_mask_high);
                vect_finish_stmt_generation (stmt, perm_stmt, gsi);
                (*result_chain)[2*j] = high;
 
@@ -4697,9 +4818,8 @@ vect_permute_store_chain (vec<tree> dr_chain,
                                        {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
                                         ...}>  */
                low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
-               perm_stmt
-                 = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
-                                                 vect1, vect2, perm_mask_low);
+               perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
+                                                vect2, perm_mask_low);
                vect_finish_stmt_generation (stmt, perm_stmt, gsi);
                (*result_chain)[2*j+1] = low;
              }
@@ -4761,7 +4881,7 @@ vect_permute_store_chain (vec<tree> dr_chain,
    Return value - the result of the loop-header phi node.  */
 
 tree
-vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
+vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
                         tree *realignment_token,
                        enum dr_alignment_support alignment_support_scheme,
                        tree init_addr,
@@ -4775,14 +4895,13 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
   edge pe = NULL;
   tree scalar_dest = gimple_assign_lhs (stmt);
   tree vec_dest;
-  gimple inc;
+  gimple *inc;
   tree ptr;
   tree data_ref;
-  gimple new_stmt;
   basic_block new_bb;
   tree msq_init = NULL_TREE;
   tree new_temp;
-  gimple phi_stmt;
+  gphi *phi_stmt;
   tree msq = NULL_TREE;
   gimple_seq stmts = NULL;
   bool inv_p;
@@ -4873,15 +4992,19 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
   if (alignment_support_scheme == dr_explicit_realign_optimized)
     {
       /* Create msq_init = *(floor(p1)) in the loop preheader  */
+      gassign *new_stmt;
 
       gcc_assert (!compute_in_loop);
       vec_dest = vect_create_destination_var (scalar_dest, vectype);
       ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
                                      NULL_TREE, &init_addr, NULL, &inc,
                                      true, &inv_p);
-      new_temp = copy_ssa_name (ptr, NULL);
-      new_stmt = gimple_build_assign_with_ops
-                  (BIT_AND_EXPR, new_temp, ptr,
+      if (TREE_CODE (ptr) == SSA_NAME)
+       new_temp = copy_ssa_name (ptr);
+      else
+       new_temp = make_ssa_name (TREE_TYPE (ptr));
+      new_stmt = gimple_build_assign
+                  (new_temp, BIT_AND_EXPR, ptr,
                    build_int_cst (TREE_TYPE (ptr),
                                   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
@@ -4909,6 +5032,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
 
   if (targetm.vectorize.builtin_mask_for_load)
     {
+      gcall *new_stmt;
       tree builtin_decl;
 
       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
@@ -4966,7 +5090,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
 
   pe = loop_preheader_edge (containing_loop);
   vec_dest = vect_create_destination_var (scalar_dest, vectype);
-  msq = make_ssa_name (vec_dest, NULL);
+  msq = make_ssa_name (vec_dest);
   phi_stmt = create_phi_node (msq, containing_loop->header);
   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
 
@@ -5017,7 +5141,7 @@ vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                     "shuffle of 3 loads is not supported by"
                                     " target\n");
-                   return false;
+                 return false;
                }
              for (i = 0, j = 0; i < nelt; i++)
                if (3 * i + k < 2 * nelt)
@@ -5148,14 +5272,14 @@ vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
 static void
 vect_permute_load_chain (vec<tree> dr_chain,
                         unsigned int length,
-                        gimple stmt,
+                        gimple *stmt,
                         gimple_stmt_iterator *gsi,
                         vec<tree> *result_chain)
 {
   tree data_ref, first_vect, second_vect;
   tree perm_mask_even, perm_mask_odd;
   tree perm3_mask_low, perm3_mask_high;
-  gimple perm_stmt;
+  gimple *perm_stmt;
   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
   unsigned int i, j, log_length = exact_log2 (length);
   unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
@@ -5193,9 +5317,8 @@ vect_permute_load_chain (vec<tree> dr_chain,
             low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
                                                             ...}>  */
          data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
-         perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                   first_vect, second_vect,
-                                                   perm3_mask_low);
+         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
+                                          second_vect, perm3_mask_low);
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
 
          /* Create interleaving stmt (high part of):
@@ -5204,9 +5327,8 @@ vect_permute_load_chain (vec<tree> dr_chain,
          first_vect = data_ref;
          second_vect = dr_chain[2];
          data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
-         perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                   first_vect, second_vect,
-                                                   perm3_mask_high);
+         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
+                                          second_vect, perm3_mask_high);
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
          (*result_chain)[k] = data_ref;
        }
@@ -5233,17 +5355,17 @@ vect_permute_load_chain (vec<tree> dr_chain,
 
              /* data_ref = permute_even (first_data_ref, second_data_ref);  */
              data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
-             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                       first_vect, second_vect,
-                                                       perm_mask_even);
+             perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+                                              first_vect, second_vect,
+                                              perm_mask_even);
              vect_finish_stmt_generation (stmt, perm_stmt, gsi);
              (*result_chain)[j/2] = data_ref;
 
              /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
              data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
-             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                       first_vect, second_vect,
-                                                       perm_mask_odd);
+             perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+                                              first_vect, second_vect,
+                                              perm_mask_odd);
              vect_finish_stmt_generation (stmt, perm_stmt, gsi);
              (*result_chain)[j/2+length/2] = data_ref;
            }
@@ -5343,14 +5465,14 @@ vect_permute_load_chain (vec<tree> dr_chain,
 static bool
 vect_shift_permute_load_chain (vec<tree> dr_chain,
                               unsigned int length,
-                              gimple stmt,
+                              gimple *stmt,
                               gimple_stmt_iterator *gsi,
                               vec<tree> *result_chain)
 {
   tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
   tree perm2_mask1, perm2_mask2, perm3_mask;
   tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
-  gimple perm_stmt;
+  gimple *perm_stmt;
 
   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
   unsigned int i;
@@ -5430,30 +5552,28 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
              second_vect = dr_chain[j + 1];
 
              data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
-             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                       first_vect, first_vect,
-                                                       perm2_mask1);
+             perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+                                              first_vect, first_vect,
+                                              perm2_mask1);
              vect_finish_stmt_generation (stmt, perm_stmt, gsi);
              vect[0] = data_ref;
 
              data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
-             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                       second_vect, second_vect,
-                                                       perm2_mask2);
+             perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+                                              second_vect, second_vect,
+                                              perm2_mask2);
              vect_finish_stmt_generation (stmt, perm_stmt, gsi);
              vect[1] = data_ref;
 
              data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
-             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                       vect[0], vect[1],
-                                                       shift1_mask);
+             perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+                                              vect[0], vect[1], shift1_mask);
              vect_finish_stmt_generation (stmt, perm_stmt, gsi);
              (*result_chain)[j/2 + length/2] = data_ref;
 
              data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
-             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                       vect[0], vect[1],
-                                                       select_mask);
+             perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+                                              vect[0], vect[1], select_mask);
              vect_finish_stmt_generation (stmt, perm_stmt, gsi);
              (*result_chain)[j/2] = data_ref;
            }
@@ -5543,9 +5663,9 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
       for (k = 0; k < 3; k++)
        {
          data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
-         perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                   dr_chain[k], dr_chain[k],
-                                                   perm3_mask);
+         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+                                          dr_chain[k], dr_chain[k],
+                                          perm3_mask);
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
          vect[k] = data_ref;
        }
@@ -5553,10 +5673,9 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
       for (k = 0; k < 3; k++)
        {
          data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
-         perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                   vect[k % 3],
-                                                   vect[(k + 1) % 3],
-                                                   shift1_mask);
+         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+                                          vect[k % 3], vect[(k + 1) % 3],
+                                          shift1_mask);
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
          vect_shift[k] = data_ref;
        }
@@ -5564,10 +5683,10 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
       for (k = 0; k < 3; k++)
        {
          data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
-         perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                                   vect_shift[(4 - k) % 3],
-                                                   vect_shift[(3 - k) % 3],
-                                                   shift2_mask);
+         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+                                          vect_shift[(4 - k) % 3],
+                                          vect_shift[(3 - k) % 3],
+                                          shift2_mask);
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
          vect[k] = data_ref;
        }
@@ -5575,16 +5694,14 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
       (*result_chain)[3 - (nelt % 3)] = vect[2];
 
       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
-      perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                               vect[0], vect[0],
-                                               shift3_mask);
+      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
+                                      vect[0], shift3_mask);
       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
       (*result_chain)[nelt % 3] = data_ref;
 
       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
-      perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
-                                               vect[1], vect[1],
-                                               shift4_mask);
+      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
+                                      vect[1], shift4_mask);
       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
       (*result_chain)[0] = data_ref;
       return true;
@@ -5600,7 +5717,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
 */
 
 void
-vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
+vect_transform_grouped_load (gimple *stmt, vec<tree> dr_chain, int size,
                             gimple_stmt_iterator *gsi)
 {
   machine_mode mode;
@@ -5616,6 +5733,7 @@ vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
      get chain for loads group using vect_shift_permute_load_chain.  */
   mode = TYPE_MODE (STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)));
   if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
+      || exact_log2 (size) != -1
       || !vect_shift_permute_load_chain (dr_chain, size, stmt,
                                         gsi, &result_chain))
     vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
@@ -5628,10 +5746,10 @@ vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
    for each vector to the associated scalar statement.  */
 
 void
-vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
+vect_record_grouped_load_vectors (gimple *stmt, vec<tree> result_chain)
 {
-  gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
-  gimple next_stmt, new_stmt;
+  gimple *first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
+  gimple *next_stmt, *new_stmt;
   unsigned int i, gap_count;
   tree tmp_data_ref;
 
@@ -5670,9 +5788,9 @@ vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
             {
               if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
                 {
-                 gimple prev_stmt =
+                 gimple *prev_stmt =
                    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
-                 gimple rel_stmt =
+                 gimple *rel_stmt =
                    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
                  while (rel_stmt)
                    {
@@ -5708,58 +5826,10 @@ vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
   if (TREE_CODE (decl) != VAR_DECL)
     return false;
 
-  /* With -fno-toplevel-reorder we may have already output the constant.  */
-  if (TREE_ASM_WRITTEN (decl))
-    return false;
-
-  /* Constant pool entries may be shared and not properly merged by LTO.  */
-  if (DECL_IN_CONSTANT_POOL (decl))
-    return false;
-
-  if (TREE_PUBLIC (decl) || DECL_EXTERNAL (decl))
-    {
-      symtab_node *snode;
-
-      /* We cannot change alignment of symbols that may bind to symbols
-        in other translation unit that may contain a definition with lower
-        alignment.  */
-      if (!decl_binds_to_current_def_p (decl))
-       return false;
-
-      /* When compiling partition, be sure the symbol is not output by other
-        partition.  */
-      snode = symtab_node::get (decl);
-      if (flag_ltrans
-         && (snode->in_other_partition
-             || snode->get_partitioning_class () == SYMBOL_DUPLICATE))
-       return false;
-    }
-
-  /* Do not override the alignment as specified by the ABI when the used
-     attribute is set.  */
-  if (DECL_PRESERVE_P (decl))
-    return false;
-
-  /* Do not override explicit alignment set by the user when an explicit
-     section name is also used.  This is a common idiom used by many
-     software projects.  */
-  if (TREE_STATIC (decl) 
-      && DECL_SECTION_NAME (decl) != NULL
-      && !symtab_node::get (decl)->implicit_section)
+  if (decl_in_symtab_p (decl)
+      && !symtab_node::get (decl)->can_increase_alignment_p ())
     return false;
 
-  /* If symbol is an alias, we need to check that target is OK.  */
-  if (TREE_STATIC (decl))
-    {
-      tree target = symtab_node::get (decl)->ultimate_alias_target ()->decl;
-      if (target != decl)
-       {
-         if (DECL_PRESERVE_P (target))
-           return false;
-         decl = target;
-       }
-    }
-
   if (TREE_STATIC (decl))
     return (alignment <= MAX_OFILE_ALIGNMENT);
   else
@@ -5777,7 +5847,7 @@ enum dr_alignment_support
 vect_supportable_dr_alignment (struct data_reference *dr,
                                bool check_aligned_accesses)
 {
-  gimple stmt = DR_STMT (dr);
+  gimple *stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   machine_mode mode = TYPE_MODE (vectype);