c++: Improve access checking inside templates [PR41437]
[gcc.git] / gcc / tree-vect-stmts.c
index 750af2e3dd0175a1f2b4d4c9cb9e8afd9749fcc8..cf2d979fea11e1a4cb47f31e5b591a5f0a91a852 100644 (file)
@@ -1,5 +1,5 @@
 /* Statement Analysis and Transformation for Vectorization
-   Copyright (C) 2003-2019 Free Software Foundation, Inc.
+   Copyright (C) 2003-2020 Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com>
    and Ira Rosen <irar@il.ibm.com>
 
@@ -70,11 +70,11 @@ stmt_vectype (class _stmt_vec_info *stmt_info)
 /* Return TRUE iff the given statement is in an inner loop relative to
    the loop being vectorized.  */
 bool
-stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
+stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
 {
   gimple *stmt = STMT_VINFO_STMT (stmt_info);
   basic_block bb = gimple_bb (stmt);
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   class loop* loop;
 
   if (!loop_vinfo)
@@ -92,19 +92,19 @@ stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
 unsigned
 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
                  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
-                 int misalign, enum vect_cost_model_location where)
+                 tree vectype, int misalign,
+                 enum vect_cost_model_location where)
 {
   if ((kind == vector_load || kind == unaligned_load)
-      && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+      && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
     kind = vector_gather_load;
   if ((kind == vector_store || kind == unaligned_store)
-      && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+      && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
     kind = vector_scatter_store;
 
-  stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
+  stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
   body_cost_vec->safe_push (si);
 
-  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
   return (unsigned)
       (builtin_vectorization_cost (kind, vectype, misalign) * count);
 }
@@ -124,7 +124,8 @@ create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
    with scalar destination SCALAR_DEST.  */
 
 static tree
-read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+read_vector_array (vec_info *vinfo,
+                  stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                   tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
 {
   tree vect_type, vect, vect_name, array_ref;
@@ -140,7 +141,7 @@ read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   new_stmt = gimple_build_assign (vect, array_ref);
   vect_name = make_ssa_name (vect, new_stmt);
   gimple_assign_set_lhs (new_stmt, vect_name);
-  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
   return vect_name;
 }
@@ -150,7 +151,8 @@ read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    The store is part of the vectorization of STMT_INFO.  */
 
 static void
-write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+write_vector_array (vec_info *vinfo,
+                   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                    tree vect, tree array, unsigned HOST_WIDE_INT n)
 {
   tree array_ref;
@@ -161,7 +163,7 @@ write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                      NULL_TREE, NULL_TREE);
 
   new_stmt = gimple_build_assign (array_ref, vect);
-  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 }
 
 /* PTR is a pointer to an array of type TYPE.  Return a representation
@@ -183,12 +185,12 @@ create_array_ref (tree type, tree ptr, tree alias_ptr_type)
    Emit the clobber before *GSI.  */
 
 static void
-vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                      tree var)
+vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
+                      gimple_stmt_iterator *gsi, tree var)
 {
   tree clobber = build_clobber (TREE_TYPE (var));
   gimple *new_stmt = gimple_build_assign (var, clobber);
-  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 }
 
 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
@@ -634,6 +636,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
        }
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
        {
+         if (is_gimple_debug (gsi_stmt (si)))
+           continue;
          stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
          if (dump_enabled_p ())
              dump_printf_loc (MSG_NOTE, vect_location,
@@ -784,71 +788,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
   return opt_result::success ();
 }
 
-/* Compute the prologue cost for invariant or constant operands.  */
-
-static unsigned
-vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
-                              unsigned opno, enum vect_def_type dt,
-                              stmt_vector_for_cost *cost_vec)
-{
-  vec_info *vinfo = stmt_info->vinfo;
-  gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-  tree op = gimple_op (stmt, opno);
-  unsigned prologue_cost = 0;
-
-  /* Without looking at the actual initializer a vector of
-     constants can be implemented as load from the constant pool.
-     When all elements are the same we can use a splat.  */
-  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
-  unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
-  unsigned num_vects_to_check;
-  unsigned HOST_WIDE_INT const_nunits;
-  unsigned nelt_limit;
-  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
-      && ! multiple_p (const_nunits, group_size))
-    {
-      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
-      nelt_limit = const_nunits;
-    }
-  else
-    {
-      /* If either the vector has variable length or the vectors
-        are composed of repeated whole groups we only need to
-        cost construction once.  All vectors will be the same.  */
-      num_vects_to_check = 1;
-      nelt_limit = group_size;
-    }
-  tree elt = NULL_TREE;
-  unsigned nelt = 0;
-  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
-    {
-      unsigned si = j % group_size;
-      if (nelt == 0)
-       elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
-      /* ???  We're just tracking whether all operands of a single
-        vector initializer are the same, ideally we'd check if
-        we emitted the same one already.  */
-      else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
-                                opno))
-       elt = NULL_TREE;
-      nelt++;
-      if (nelt == nelt_limit)
-       {
-         /* ???  We need to pass down stmt_info for a vector type
-            even if it points to the wrong stmt.  */
-         prologue_cost += record_stmt_cost
-             (cost_vec, 1,
-              dt == vect_external_def
-              ? (elt ? scalar_to_vec : vec_construct)
-              : vector_load,
-              stmt_info, 0, vect_prologue);
-         nelt = 0;
-       }
-    }
-
-  return prologue_cost;
-}
-
 /* Function vect_model_simple_cost.
 
    Models cost for simple operations, i.e. those that only emit ncopies of a
@@ -856,11 +795,13 @@ vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
    be generated for the single vector op.  We will handle that shortly.  */
 
 static void
-vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
+vect_model_simple_cost (vec_info *,
+                       stmt_vec_info stmt_info, int ncopies,
                        enum vect_def_type *dt,
                        int ndts,
                        slp_tree node,
-                       stmt_vector_for_cost *cost_vec)
+                       stmt_vector_for_cost *cost_vec,
+                       vect_cost_for_stmt kind = vector_stmt)
 {
   int inside_cost = 0, prologue_cost = 0;
 
@@ -870,26 +811,7 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
   if (node)
     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
 
-  if (node)
-    {
-      /* Scan operands and account for prologue cost of constants/externals.
-        ???  This over-estimates cost for multiple uses and should be
-        re-engineered.  */
-      gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-      tree lhs = gimple_get_lhs (stmt);
-      for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
-       {
-         tree op = gimple_op (stmt, i);
-         enum vect_def_type dt;
-         if (!op || op == lhs)
-           continue;
-         if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
-             && (dt == vect_constant_def || dt == vect_external_def))
-           prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
-                                                           i, dt, cost_vec);
-       }
-    }
-  else
+  if (!node)
     /* Cost the "broadcast" of a scalar operand in to a vector operand.
        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
        cost model.  */
@@ -907,7 +829,7 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
     }
 
   /* Pass the inside-of-loop statements to the target-specific cost model.  */
-  inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
+  inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
                                   stmt_info, 0, vect_body);
 
   if (dump_enabled_p ())
@@ -917,26 +839,27 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
 }
 
 
-/* Model cost for type demotion and promotion operations.  PWR is normally
-   zero for single-step promotions and demotions.  It will be one if 
-   two-step promotion/demotion is required, and so on.  Each additional
+/* Model cost for type demotion and promotion operations.  PWR is
+   normally zero for single-step promotions and demotions.  It will be
+   one if two-step promotion/demotion is required, and so on.  NCOPIES
+   is the number of vector results (and thus number of instructions)
+   for the narrowest end of the operation chain.  Each additional
    step doubles the number of instructions required.  */
 
 static void
 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
-                                   enum vect_def_type *dt, int pwr,
+                                   enum vect_def_type *dt,
+                                   unsigned int ncopies, int pwr,
                                    stmt_vector_for_cost *cost_vec)
 {
-  int i, tmp;
+  int i;
   int inside_cost = 0, prologue_cost = 0;
 
   for (i = 0; i < pwr + 1; i++)
     {
-      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
-       (i + 1) : i;
-      inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
-                                      vec_promote_demote, stmt_info, 0,
-                                      vect_body);
+      inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
+                                      stmt_info, 0, vect_body);
+      ncopies *= 2;
     }
 
   /* FORNOW: Assuming maximum 2 args per stmts.  */
@@ -988,8 +911,7 @@ cfun_returns (tree decl)
    has the overhead of the grouped access attributed to it.  */
 
 static void
-vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
-                      enum vect_def_type dt,
+vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
                       vect_memory_access_type memory_access_type,
                       vec_load_store_type vls_type, slp_tree slp_node,
                       stmt_vector_for_cost *cost_vec)
@@ -1004,10 +926,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
 
   if (vls_type == VLS_STORE_INVARIANT)
     {
-      if (slp_node)
-       prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
-                                                       1, dt, cost_vec);
-      else
+      if (!slp_node)
        prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
                                           stmt_info, 0, vect_prologue);
     }
@@ -1054,7 +973,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
                                       scalar_store, stmt_info, 0, vect_body);
     }
   else
-    vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
+    vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
 
   if (memory_access_type == VMAT_ELEMENTWISE
       || memory_access_type == VMAT_STRIDED_SLP)
@@ -1106,13 +1025,13 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
 
 /* Calculate cost of DR's memory access.  */
 void
-vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
+vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
                     unsigned int *inside_cost,
                     stmt_vector_for_cost *body_cost_vec)
 {
   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   int alignment_support_scheme
-    = vect_supportable_dr_alignment (dr_info, false);
+    = vect_supportable_dr_alignment (vinfo, dr_info, false);
 
   switch (alignment_support_scheme)
     {
@@ -1166,9 +1085,9 @@ vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
    access scheme chosen.  */
 
 static void
-vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
+vect_model_load_cost (vec_info *vinfo,
+                     stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
                      vect_memory_access_type memory_access_type,
-                     slp_instance instance,
                      slp_tree slp_node,
                      stmt_vector_for_cost *cost_vec)
 {
@@ -1190,10 +1109,8 @@ vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
       unsigned n_perms;
       unsigned assumed_nunits
        = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
-      unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size; 
-      vect_transform_slp_perm_load (slp_node, vNULL, NULL,
-                                   slp_vf, instance, true,
-                                   &n_perms);
+      vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
+                                   vf, true, &n_perms);
       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
                                       first_stmt_info, 0, vect_body);
       /* And adjust the number of loads performed.  This handles
@@ -1267,7 +1184,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
                                       scalar_load, stmt_info, 0, vect_body);
     }
   else
-    vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
+    vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
                        &inside_cost, &prologue_cost, 
                        cost_vec, cost_vec, true);
   if (memory_access_type == VMAT_ELEMENTWISE
@@ -1284,7 +1201,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
 
 /* Calculate cost of DR's memory access.  */
 void
-vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
+vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
                    bool add_realign_cost, unsigned int *inside_cost,
                    unsigned int *prologue_cost,
                    stmt_vector_for_cost *prologue_cost_vec,
@@ -1293,7 +1210,7 @@ vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
 {
   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   int alignment_support_scheme
-    = vect_supportable_dr_alignment (dr_info, false);
+    = vect_supportable_dr_alignment (vinfo, dr_info, false);
 
   switch (alignment_support_scheme)
     {
@@ -1400,14 +1317,14 @@ vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
    the loop preheader for the vectorized stmt STMT_VINFO.  */
 
 static void
-vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
+vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
                    gimple_stmt_iterator *gsi)
 {
   if (gsi)
-    vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
+    vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
   else
     {
-      loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+      loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
 
       if (loop_vinfo)
         {
@@ -1415,7 +1332,7 @@ vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
          basic_block new_bb;
          edge pe;
 
-         if (nested_in_vect_loop_p (loop, stmt_vinfo))
+         if (stmt_vinfo && nested_in_vect_loop_p (loop, stmt_vinfo))
            loop = loop->inner;
 
          pe = loop_preheader_edge (loop);
@@ -1424,14 +1341,9 @@ vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
        }
       else
        {
-          bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
-          basic_block bb;
-          gimple_stmt_iterator gsi_bb_start;
-
-          gcc_assert (bb_vinfo);
-          bb = BB_VINFO_BB (bb_vinfo);
-          gsi_bb_start = gsi_after_labels (bb);
-          gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
+          bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
+         gimple_stmt_iterator gsi_region_begin = bb_vinfo->region_begin;
+         gsi_insert_before (&gsi_region_begin, new_stmt, GSI_SAME_STMT);
        }
     }
 
@@ -1451,7 +1363,7 @@ vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
    It will be used in the vectorization of STMT_INFO.  */
 
 tree
-vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
+vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
                  gimple_stmt_iterator *gsi)
 {
   gimple *init_stmt;
@@ -1477,7 +1389,7 @@ vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
                  new_temp = make_ssa_name (TREE_TYPE (type));
                  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
                                                   val, true_val, false_val);
-                 vect_init_vector_1 (stmt_info, init_stmt, gsi);
+                 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
                  val = new_temp;
                }
            }
@@ -1496,7 +1408,7 @@ vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
                {
                  init_stmt = gsi_stmt (gsi2);
                  gsi_remove (&gsi2, false);
-                 vect_init_vector_1 (stmt_info, init_stmt, gsi);
+                 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
                }
            }
        }
@@ -1505,88 +1417,36 @@ vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
 
   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
   init_stmt = gimple_build_assign (new_temp, val);
-  vect_init_vector_1 (stmt_info, init_stmt, gsi);
+  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
   return new_temp;
 }
 
-/* Function vect_get_vec_def_for_operand_1.
-
-   For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
-   with type DT that will be used in the vectorized stmt.  */
-
-tree
-vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
-                               enum vect_def_type dt)
-{
-  tree vec_oprnd;
-  stmt_vec_info vec_stmt_info;
-
-  switch (dt)
-    {
-    /* operand is a constant or a loop invariant.  */
-    case vect_constant_def:
-    case vect_external_def:
-      /* Code should use vect_get_vec_def_for_operand.  */
-      gcc_unreachable ();
-
-    /* Operand is defined by a loop header phi.  In case of nested
-       cycles we also may have uses of the backedge def.  */
-    case vect_reduction_def:
-    case vect_double_reduction_def:
-    case vect_nested_cycle:
-    case vect_induction_def:
-      gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
-                 || dt == vect_nested_cycle);
-      /* Fallthru.  */
-
-    /* operand is defined inside the loop.  */
-    case vect_internal_def:
-      {
-        /* Get the def from the vectorized stmt.  */
-       vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
-       /* Get vectorized pattern statement.  */
-       if (!vec_stmt_info
-           && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
-           && !STMT_VINFO_RELEVANT (def_stmt_info))
-         vec_stmt_info = (STMT_VINFO_VEC_STMT
-                          (STMT_VINFO_RELATED_STMT (def_stmt_info)));
-       gcc_assert (vec_stmt_info);
-       if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
-         vec_oprnd = PHI_RESULT (phi);
-       else
-         vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
-       return vec_oprnd;
-      }
-
-    default:
-      gcc_unreachable ();
-    }
-}
-
 
-/* Function vect_get_vec_def_for_operand.
+/* Function vect_get_vec_defs_for_operand.
 
-   OP is an operand in STMT_VINFO.  This function returns a (vector) def
-   that will be used in the vectorized stmt for STMT_VINFO.
+   OP is an operand in STMT_VINFO.  This function returns a vector of
+   NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
 
    In the case that OP is an SSA_NAME which is defined in the loop, then
-   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
+   STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
 
    In case OP is an invariant or constant, a new stmt that creates a vector def
    needs to be introduced.  VECTYPE may be used to specify a required type for
    vector invariant.  */
 
-tree
-vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
+void
+vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+                              unsigned ncopies,
+                              tree op, vec<tree> *vec_oprnds, tree vectype)
 {
   gimple *def_stmt;
   enum vect_def_type dt;
   bool is_simple_use;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
-                    "vect_get_vec_def_for_operand: %T\n", op);
+                    "vect_get_vec_defs_for_operand: %T\n", op);
 
   stmt_vec_info def_stmt_info;
   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
@@ -1595,6 +1455,7 @@ vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
   if (def_stmt && dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
 
+  vec_oprnds->create (ncopies);
   if (dt == vect_constant_def || dt == vect_external_def)
     {
       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
@@ -1604,109 +1465,22 @@ vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
        vector_type = vectype;
       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
               && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
-       vector_type = build_same_sized_truth_vector_type (stmt_vectype);
+       vector_type = truth_type_for (stmt_vectype);
       else
        vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
 
       gcc_assert (vector_type);
-      return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
+      tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
+      while (ncopies--)
+       vec_oprnds->quick_push (vop);
     }
   else
-    return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
-}
-
-
-/* Function vect_get_vec_def_for_stmt_copy
-
-   Return a vector-def for an operand.  This function is used when the
-   vectorized stmt to be created (by the caller to this function) is a "copy"
-   created in case the vectorized result cannot fit in one vector, and several
-   copies of the vector-stmt are required.  In this case the vector-def is
-   retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
-   of the stmt that defines VEC_OPRND.  VINFO describes the vectorization.
-
-   Context:
-        In case the vectorization factor (VF) is bigger than the number
-   of elements that can fit in a vectype (nunits), we have to generate
-   more than one vector stmt to vectorize the scalar stmt.  This situation
-   arises when there are multiple data-types operated upon in the loop; the
-   smallest data-type determines the VF, and as a result, when vectorizing
-   stmts operating on wider types we need to create 'VF/nunits' "copies" of the
-   vector stmt (each computing a vector of 'nunits' results, and together
-   computing 'VF' results in each iteration).  This function is called when
-   vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
-   which VF=16 and nunits=4, so the number of copies required is 4):
-
-   scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
-
-   S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
-                        VS1.1:  vx.1 = memref1      VS1.2
-                        VS1.2:  vx.2 = memref2      VS1.3
-                        VS1.3:  vx.3 = memref3
-
-   S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
-                        VSnew.1:  vz1 = vx.1 + ...  VSnew.2
-                        VSnew.2:  vz2 = vx.2 + ...  VSnew.3
-                        VSnew.3:  vz3 = vx.3 + ...
-
-   The vectorization of S1 is explained in vectorizable_load.
-   The vectorization of S2:
-        To create the first vector-stmt out of the 4 copies - VSnew.0 -
-   the function 'vect_get_vec_def_for_operand' is called to
-   get the relevant vector-def for each operand of S2.  For operand x it
-   returns  the vector-def 'vx.0'.
-
-        To create the remaining copies of the vector-stmt (VSnew.j), this
-   function is called to get the relevant vector-def for each operand.  It is
-   obtained from the respective VS1.j stmt, which is recorded in the
-   STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
-
-        For example, to obtain the vector-def 'vx.1' in order to create the
-   vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
-   Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
-   STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
-   and return its def ('vx.1').
-   Overall, to create the above sequence this function will be called 3 times:
-       vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
-       vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
-       vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2);  */
-
-tree
-vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
-{
-  stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
-  if (!def_stmt_info)
-    /* Do nothing; can reuse same def.  */
-    return vec_oprnd;
-
-  def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
-  gcc_assert (def_stmt_info);
-  if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
-    vec_oprnd = PHI_RESULT (phi);
-  else
-    vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
-  return vec_oprnd;
-}
-
-
-/* Get vectorized definitions for the operands to create a copy of an original
-   stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
-
-void
-vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
-                                vec<tree> *vec_oprnds0,
-                                vec<tree> *vec_oprnds1)
-{
-  tree vec_oprnd = vec_oprnds0->pop ();
-
-  vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
-  vec_oprnds0->quick_push (vec_oprnd);
-
-  if (vec_oprnds1 && vec_oprnds1->length ())
     {
-      vec_oprnd = vec_oprnds1->pop ();
-      vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
-      vec_oprnds1->quick_push (vec_oprnd);
+      def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
+      gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
+      for (unsigned i = 0; i < ncopies; ++i)
+       vec_oprnds->quick_push (gimple_get_lhs
+                                 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
     }
 }
 
@@ -1714,85 +1488,108 @@ vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
 /* Get vectorized definitions for OP0 and OP1.  */
 
 void
-vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
-                  vec<tree> *vec_oprnds0,
-                  vec<tree> *vec_oprnds1,
-                  slp_tree slp_node)
+vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
+                  unsigned ncopies,
+                  tree op0, vec<tree> *vec_oprnds0, tree vectype0,
+                  tree op1, vec<tree> *vec_oprnds1, tree vectype1,
+                  tree op2, vec<tree> *vec_oprnds2, tree vectype2,
+                  tree op3, vec<tree> *vec_oprnds3, tree vectype3)
 {
   if (slp_node)
     {
-      auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
-      vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
-      *vec_oprnds0 = vec_defs[0];
+      if (op0)
+       vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
       if (op1)
-       *vec_oprnds1 = vec_defs[1];
+       vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
+      if (op2)
+       vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
+      if (op3)
+       vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
     }
   else
     {
-      tree vec_oprnd;
-
-      vec_oprnds0->create (1);
-      vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
-      vec_oprnds0->quick_push (vec_oprnd);
-
+      if (op0)
+       vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+                                      op0, vec_oprnds0, vectype0);
       if (op1)
-       {
-         vec_oprnds1->create (1);
-         vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
-         vec_oprnds1->quick_push (vec_oprnd);
-       }
+       vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+                                      op1, vec_oprnds1, vectype1);
+      if (op2)
+       vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+                                      op2, vec_oprnds2, vectype2);
+      if (op3)
+       vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+                                      op3, vec_oprnds3, vectype3);
     }
 }
 
+void
+vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
+                  unsigned ncopies,
+                  tree op0, vec<tree> *vec_oprnds0,
+                  tree op1, vec<tree> *vec_oprnds1,
+                  tree op2, vec<tree> *vec_oprnds2,
+                  tree op3, vec<tree> *vec_oprnds3)
+{
+  vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+                    op0, vec_oprnds0, NULL_TREE,
+                    op1, vec_oprnds1, NULL_TREE,
+                    op2, vec_oprnds2, NULL_TREE,
+                    op3, vec_oprnds3, NULL_TREE);
+}
+
 /* Helper function called by vect_finish_replace_stmt and
    vect_finish_stmt_generation.  Set the location of the new
    statement and create and return a stmt_vec_info for it.  */
 
-static stmt_vec_info
-vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
+static void
+vect_finish_stmt_generation_1 (vec_info *,
+                              stmt_vec_info stmt_info, gimple *vec_stmt)
 {
-  vec_info *vinfo = stmt_info->vinfo;
-
-  stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
-
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
 
-  gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
-
-  /* While EH edges will generally prevent vectorization, stmt might
-     e.g. be in a must-not-throw region.  Ensure newly created stmts
-     that could throw are part of the same region.  */
-  int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
-  if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
-    add_stmt_to_eh_lp (vec_stmt, lp_nr);
+  if (stmt_info)
+    {
+      gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
 
-  return vec_stmt_info;
+      /* While EH edges will generally prevent vectorization, stmt might
+        e.g. be in a must-not-throw region.  Ensure newly created stmts
+        that could throw are part of the same region.  */
+      int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
+      if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
+       add_stmt_to_eh_lp (vec_stmt, lp_nr);
+    }
+  else
+    gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
 }
 
 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
    which sets the same scalar result as STMT_INFO did.  Create and return a
    stmt_vec_info for VEC_STMT.  */
 
-stmt_vec_info
-vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
+void
+vect_finish_replace_stmt (vec_info *vinfo,
+                         stmt_vec_info stmt_info, gimple *vec_stmt)
 {
-  gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
+  gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
+  gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
 
-  gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
+  gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
   gsi_replace (&gsi, vec_stmt, true);
 
-  return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
+  vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
 }
 
 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
 
-stmt_vec_info
-vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
+void
+vect_finish_stmt_generation (vec_info *vinfo,
+                            stmt_vec_info stmt_info, gimple *vec_stmt,
                             gimple_stmt_iterator *gsi)
 {
-  gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
+  gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
 
   if (!gsi_end_p (*gsi)
       && gimple_has_mem_ops (vec_stmt))
@@ -1821,7 +1618,7 @@ vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
        }
     }
   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
-  return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
+  vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
 }
 
 /* We want to vectorize a call to combined function CFN with function
@@ -1854,13 +1651,13 @@ vectorizable_internal_function (combined_fn cfn, tree fndecl,
 }
 
 
-static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
+static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
                                  gimple_stmt_iterator *);
 
 /* Check whether a load or store statement in the loop described by
-   LOOP_VINFO is possible in a fully-masked loop.  This is testing
-   whether the vectorizer pass has the appropriate support, as well as
-   whether the target does.
+   LOOP_VINFO is possible in a loop using partial vectors.  This is
+   testing whether the vectorizer pass has the appropriate support,
+   as well as whether the target does.
 
    VLS_TYPE says whether the statement is a load or store and VECTYPE
    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
@@ -1870,14 +1667,18 @@ static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
    its arguments.  If the load or store is conditional, SCALAR_MASK is the
    condition under which it occurs.
 
-   Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
-   supported, otherwise record the required mask types.  */
+   Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
+   vectors is not supported, otherwise record the required rgroup control
+   types.  */
 
 static void
-check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
-                         vec_load_store_type vls_type, int group_size,
-                         vect_memory_access_type memory_access_type,
-                         gather_scatter_info *gs_info, tree scalar_mask)
+check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
+                                     vec_load_store_type vls_type,
+                                     int group_size,
+                                     vect_memory_access_type
+                                     memory_access_type,
+                                     gather_scatter_info *gs_info,
+                                     tree scalar_mask)
 {
   /* Invariant loads need no special support.  */
   if (memory_access_type == VMAT_INVARIANT)
@@ -1894,10 +1695,10 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't use a fully-masked loop because the"
-                            " target doesn't have an appropriate masked"
+                            "can't operate on partial vectors because"
+                            " the target doesn't have an appropriate"
                             " load/store-lanes instruction.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
          return;
        }
       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
@@ -1917,10 +1718,10 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't use a fully-masked loop because the"
-                            " target doesn't have an appropriate masked"
+                            "can't operate on partial vectors because"
+                            " the target doesn't have an appropriate"
                             " gather load or scatter store instruction.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
          return;
        }
       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
@@ -1935,16 +1736,15 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
         scalar loop.  We need more work to support other mappings.  */
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "can't use a fully-masked loop because an access"
-                        " isn't contiguous.\n");
-      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+                        "can't operate on partial vectors because an"
+                        " access isn't contiguous.\n");
+      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
       return;
     }
 
   machine_mode mask_mode;
-  if (!(targetm.vectorize.get_mask_mode
-       (GET_MODE_NUNITS (vecmode),
-        GET_MODE_SIZE (vecmode)).exists (&mask_mode))
+  if (!VECTOR_MODE_P (vecmode)
+      || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
     {
       if (dump_enabled_p ())
@@ -1952,7 +1752,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
                         "can't use a fully-masked loop because the target"
                         " doesn't have the appropriate masked load or"
                         " store.\n");
-      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
       return;
     }
   /* We might load more scalars than we need for permuting SLP loads.
@@ -2121,10 +1921,10 @@ vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
 
 static int
-compare_step_with_zero (stmt_vec_info stmt_info)
+compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
 {
   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
-  return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
+  return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
                               size_zero_node);
 }
 
@@ -2152,7 +1952,8 @@ perm_mask_for_reverse (tree vectype)
    accesses consecutive elements with a negative step.  */
 
 static vect_memory_access_type
-get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
+get_negative_load_store_type (vec_info *vinfo,
+                             stmt_vec_info stmt_info, tree vectype,
                              vec_load_store_type vls_type,
                              unsigned int ncopies)
 {
@@ -2167,7 +1968,8 @@ get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
       return VMAT_ELEMENTWISE;
     }
 
-  alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
+  alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
+                                                           dr_info, false);
   if (alignment_support_scheme != dr_aligned
       && alignment_support_scheme != dr_unaligned_supported)
     {
@@ -2218,6 +2020,62 @@ vect_get_store_rhs (stmt_vec_info stmt_info)
   gcc_unreachable ();
 }
 
+/* Function VECTOR_VECTOR_COMPOSITION_TYPE
+
+   This function returns a vector type which can be composed with NETLS pieces,
+   whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
+   same vector size as the return vector.  It checks target whether supports
+   pieces-size vector mode for construction firstly, if target fails to, check
+   pieces-size scalar mode for construction further.  It returns NULL_TREE if
+   fails to find the available composition.
+
+   For example, for (vtype=V16QI, nelts=4), we can probably get:
+     - V16QI with PTYPE V4QI.
+     - V4SI with PTYPE SI.
+     - NULL_TREE.  */
+
+static tree
+vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
+{
+  gcc_assert (VECTOR_TYPE_P (vtype));
+  gcc_assert (known_gt (nelts, 0U));
+
+  machine_mode vmode = TYPE_MODE (vtype);
+  if (!VECTOR_MODE_P (vmode))
+    return NULL_TREE;
+
+  poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
+  unsigned int pbsize;
+  if (constant_multiple_p (vbsize, nelts, &pbsize))
+    {
+      /* First check if vec_init optab supports construction from
+        vector pieces directly.  */
+      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
+      poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
+      machine_mode rmode;
+      if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
+         && (convert_optab_handler (vec_init_optab, vmode, rmode)
+             != CODE_FOR_nothing))
+       {
+         *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
+         return vtype;
+       }
+
+      /* Otherwise check if exists an integer type of the same piece size and
+        if vec_init optab supports construction from it directly.  */
+      if (int_mode_for_size (pbsize, 0).exists (&elmode)
+         && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
+         && (convert_optab_handler (vec_init_optab, rmode, elmode)
+             != CODE_FOR_nothing))
+       {
+         *ptype = build_nonstandard_integer_type (pbsize, 1);
+         return build_vector_type (*ptype, nelts);
+       }
+    }
+
+  return NULL_TREE;
+}
+
 /* A subroutine of get_load_store_type, with a subset of the same
    arguments.  Handle the case where STMT_INFO is part of a grouped load
    or store.
@@ -2228,13 +2086,13 @@ vect_get_store_rhs (stmt_vec_info stmt_info)
    as well as at the end.  */
 
 static bool
-get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
+get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
+                          tree vectype, bool slp,
                           bool masked_p, vec_load_store_type vls_type,
                           vect_memory_access_type *memory_access_type,
                           gather_scatter_info *gs_info)
 {
-  vec_info *vinfo = stmt_info->vinfo;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
@@ -2298,22 +2156,18 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
             by simply loading half of the vector only.  Usually
             the construction with an upper zero half will be elided.  */
          dr_alignment_support alignment_support_scheme;
-         scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
-         machine_mode vmode;
+         tree half_vtype;
          if (overrun_p
              && !masked_p
              && (((alignment_support_scheme
-                     = vect_supportable_dr_alignment (first_dr_info, false)))
+                     = vect_supportable_dr_alignment (vinfo,
+                                                      first_dr_info, false)))
                   == dr_aligned
                  || alignment_support_scheme == dr_unaligned_supported)
              && known_eq (nunits, (group_size - gap) * 2)
              && known_eq (nunits, group_size)
-             && mode_for_vector (elmode, (group_size - gap)).exists (&vmode)
-             && VECTOR_MODE_P (vmode)
-             && targetm.vector_mode_supported_p (vmode)
-             && (convert_optab_handler (vec_init_optab,
-                                        TYPE_MODE (vectype), vmode)
-                 != CODE_FOR_nothing))
+             && (vector_vector_composition_type (vectype, 2, &half_vtype)
+                 != NULL_TREE))
            overrun_p = false;
 
          if (overrun_p && !can_overrun_p)
@@ -2323,10 +2177,10 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
                                 "Peeling for outer loop is not supported\n");
              return false;
            }
-         int cmp = compare_step_with_zero (stmt_info);
+         int cmp = compare_step_with_zero (vinfo, stmt_info);
          if (cmp < 0)
            *memory_access_type = get_negative_load_store_type
-             (stmt_info, vectype, vls_type, 1);
+             (vinfo, stmt_info, vectype, vls_type, 1);
          else
            {
              gcc_assert (!loop_vinfo || cmp > 0);
@@ -2355,7 +2209,7 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
 
       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
          && (can_overrun_p || !would_overrun_p)
-         && compare_step_with_zero (stmt_info) > 0)
+         && compare_step_with_zero (vinfo, stmt_info) > 0)
        {
          /* First cope with the degenerate case of a single-element
             vector.  */
@@ -2444,14 +2298,14 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
    NCOPIES is the number of vector statements that will be needed.  */
 
 static bool
-get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
+get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
+                    tree vectype, bool slp,
                     bool masked_p, vec_load_store_type vls_type,
                     unsigned int ncopies,
                     vect_memory_access_type *memory_access_type,
                     gather_scatter_info *gs_info)
 {
-  vec_info *vinfo = stmt_info->vinfo;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     {
@@ -2471,7 +2325,7 @@ get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
     }
   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
-      if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
+      if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp, masked_p,
                                      vls_type, memory_access_type, gs_info))
        return false;
     }
@@ -2487,10 +2341,10 @@ get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
     }
   else
     {
-      int cmp = compare_step_with_zero (stmt_info);
+      int cmp = compare_step_with_zero (vinfo, stmt_info);
       if (cmp < 0)
        *memory_access_type = get_negative_load_store_type
-         (stmt_info, vectype, vls_type, ncopies);
+         (vinfo, stmt_info, vectype, vls_type, ncopies);
       else if (cmp == 0)
        {
          gcc_assert (vls_type == VLS_LOAD);
@@ -2532,16 +2386,15 @@ get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
 }
 
 /* Return true if boolean argument MASK is suitable for vectorizing
-   conditional load or store STMT_INFO.  When returning true, store the type
+   conditional operation STMT_INFO.  When returning true, store the type
    of the definition in *MASK_DT_OUT and the type of the vectorized mask
    in *MASK_VECTYPE_OUT.  */
 
 static bool
-vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
-                           vect_def_type *mask_dt_out,
-                           tree *mask_vectype_out)
+vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
+                       vect_def_type *mask_dt_out,
+                       tree *mask_vectype_out)
 {
-  vec_info *vinfo = stmt_info->vinfo;
   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
     {
       if (dump_enabled_p ())
@@ -2560,7 +2413,7 @@ vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
 
   enum vect_def_type mask_dt;
   tree mask_vectype;
-  if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
+  if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2603,7 +2456,8 @@ vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
 
 static bool
-vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
+vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
+                     slp_tree slp_node, tree rhs,
                      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
                      vec_load_store_type *vls_type_out)
 {
@@ -2619,7 +2473,9 @@ vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
 
   enum vect_def_type rhs_dt;
   tree rhs_vectype;
-  if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
+  slp_tree slp_op;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
+                          &rhs, &slp_op, &rhs_dt, &rhs_vectype))
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2650,7 +2506,8 @@ vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
    floats are interpreted as a bitmask.  */
 
 static tree
-vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
+vect_build_all_ones_mask (vec_info *vinfo,
+                         stmt_vec_info stmt_info, tree masktype)
 {
   if (TREE_CODE (masktype) == INTEGER_TYPE)
     return build_int_cst (masktype, -1);
@@ -2658,7 +2515,7 @@ vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
     {
       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
       mask = build_vector_from_val (masktype, mask);
-      return vect_init_vector (stmt_info, mask, masktype, NULL);
+      return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
     }
   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
     {
@@ -2669,7 +2526,7 @@ vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
       tree mask = build_real (TREE_TYPE (masktype), r);
       mask = build_vector_from_val (masktype, mask);
-      return vect_init_vector (stmt_info, mask, masktype, NULL);
+      return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
     }
   gcc_unreachable ();
 }
@@ -2678,7 +2535,8 @@ vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
    STMT_INFO as a gather load.  */
 
 static tree
-vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
+vect_build_zero_merge_argument (vec_info *vinfo,
+                               stmt_vec_info stmt_info, tree vectype)
 {
   tree merge;
   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
@@ -2695,7 +2553,7 @@ vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
   else
     gcc_unreachable ();
   merge = build_vector_from_val (vectype, merge);
-  return vect_init_vector (stmt_info, merge, vectype, NULL);
+  return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
 }
 
 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
@@ -2705,13 +2563,13 @@ vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
    MASK is null.  */
 
 static void
-vect_build_gather_load_calls (stmt_vec_info stmt_info,
+vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
                              gimple_stmt_iterator *gsi,
-                             stmt_vec_info *vec_stmt,
+                             gimple **vec_stmt,
                              gather_scatter_info *gs_info,
                              tree mask)
 {
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
@@ -2734,7 +2592,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
                           || TREE_CODE (masktype) == INTEGER_TYPE
                           || types_compatible_p (srctype, masktype)));
   if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
-    masktype = build_same_sized_truth_vector_type (srctype);
+    masktype = truth_type_for (srctype);
 
   tree mask_halftype = masktype;
   tree perm_mask = NULL_TREE;
@@ -2780,8 +2638,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
          mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
        }
       else if (mask)
-       mask_halftype
-         = build_same_sized_truth_vector_type (gs_info->offset_vectype);
+       mask_halftype = truth_type_for (gs_info->offset_vectype);
     }
   else
     gcc_unreachable ();
@@ -2805,26 +2662,30 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
   tree src_op = NULL_TREE;
   tree mask_op = NULL_TREE;
   tree prev_res = NULL_TREE;
-  stmt_vec_info prev_stmt_info = NULL;
 
   if (!mask)
     {
-      src_op = vect_build_zero_merge_argument (stmt_info, rettype);
-      mask_op = vect_build_all_ones_mask (stmt_info, masktype);
+      src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
+      mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
     }
 
+  auto_vec<tree> vec_oprnds0;
+  auto_vec<tree> vec_masks;
+  vect_get_vec_defs_for_operand (vinfo, stmt_info,
+                                modifier == WIDEN ? ncopies / 2 : ncopies,
+                                gs_info->offset, &vec_oprnds0);
+  if (mask)
+    vect_get_vec_defs_for_operand (vinfo, stmt_info,
+                                  modifier == NARROW ? ncopies / 2 : ncopies,
+                                  mask, &vec_masks);
   for (int j = 0; j < ncopies; ++j)
     {
       tree op, var;
       if (modifier == WIDEN && (j & 1))
-       op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
+       op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
                                   perm_mask, stmt_info, gsi);
-      else if (j == 0)
-       op = vec_oprnd0
-         = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
       else
-       op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
-                                                         vec_oprnd0);
+       op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
 
       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
        {
@@ -2833,22 +2694,24 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
          var = vect_get_new_ssa_name (idxtype, vect_simple_var);
          op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
          gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
-         vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
          op = var;
        }
 
       if (mask)
        {
          if (mask_perm_mask && (j & 1))
-           mask_op = permute_vec_elements (mask_op, mask_op,
+           mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
                                            mask_perm_mask, stmt_info, gsi);
          else
            {
-             if (j == 0)
-               vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
-             else if (modifier != NARROW || (j & 1) == 0)
-               vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
-                                                          vec_mask);
+             if (modifier == NARROW)
+               {
+                 if ((j & 1) == 0)
+                   vec_mask = vec_masks[j / 2];
+               }
+             else
+               vec_mask = vec_masks[j];
 
              mask_op = vec_mask;
              if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
@@ -2860,7 +2723,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
                  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
                  gassign *new_stmt
                    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
-                 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
                  mask_op = var;
                }
            }
@@ -2871,7 +2734,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
                = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
                                                    : VEC_UNPACK_LO_EXPR,
                                       mask_op);
-             vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
              mask_op = var;
            }
          src_op = mask_op;
@@ -2889,7 +2752,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
          mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
          gassign *new_stmt
            = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
-         vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
          mask_arg = var;
          if (!useless_type_conversion_p (real_masktype, utype))
            {
@@ -2897,34 +2760,31 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
                          <= TYPE_PRECISION (real_masktype));
              var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
              new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
-             vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
              mask_arg = var;
            }
          src_op = build_zero_cst (srctype);
        }
-      gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
-                                          mask_arg, scale);
+      gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
+                                           mask_arg, scale);
 
-      stmt_vec_info new_stmt_info;
       if (!useless_type_conversion_p (vectype, rettype))
        {
          gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
                                TYPE_VECTOR_SUBPARTS (rettype)));
          op = vect_get_new_ssa_name (rettype, vect_simple_var);
-         gimple_call_set_lhs (new_call, op);
-         vect_finish_stmt_generation (stmt_info, new_call, gsi);
+         gimple_call_set_lhs (new_stmt, op);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
          var = make_ssa_name (vec_dest);
          op = build1 (VIEW_CONVERT_EXPR, vectype, op);
-         gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
-         new_stmt_info
-           = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+         new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
        }
       else
        {
-         var = make_ssa_name (vec_dest, new_call);
-         gimple_call_set_lhs (new_call, var);
-         new_stmt_info
-           = vect_finish_stmt_generation (stmt_info, new_call, gsi);
+         var = make_ssa_name (vec_dest, new_stmt);
+         gimple_call_set_lhs (new_stmt, var);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
        }
 
       if (modifier == NARROW)
@@ -2934,17 +2794,14 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
              prev_res = var;
              continue;
            }
-         var = permute_vec_elements (prev_res, var, perm_mask,
+         var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
                                      stmt_info, gsi);
-         new_stmt_info = loop_vinfo->lookup_def (var);
+         new_stmt = SSA_NAME_DEF_STMT (var);
        }
 
-      if (prev_stmt_info == NULL)
-       STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-      else
-       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-      prev_stmt_info = new_stmt_info;
+      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
     }
+  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
 }
 
 /* Prepare the base and offset in GS_INFO for vectorization.
@@ -2954,9 +2811,11 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
    containing loop.  */
 
 static void
-vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
+vect_get_gather_scatter_ops (vec_info *vinfo,
+                            class loop *loop, stmt_vec_info stmt_info,
                             gather_scatter_info *gs_info,
-                            tree *dataref_ptr, tree *vec_offset)
+                            tree *dataref_ptr, vec<tree> *vec_offset,
+                            unsigned ncopies)
 {
   gimple_seq stmts = NULL;
   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
@@ -2967,8 +2826,8 @@ vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
       gcc_assert (!new_bb);
     }
-  *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
-                                             gs_info->offset_vectype);
+  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
+                                vec_offset, gs_info->offset_vectype);
 }
 
 /* Prepare to implement a grouped or strided load or store using
@@ -2992,7 +2851,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
   gimple_seq stmts;
 
   tree bump = size_binop (MULT_EXPR,
-                         fold_convert (sizetype, DR_STEP (dr)),
+                         fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
                          size_int (TYPE_VECTOR_SUBPARTS (vectype)));
   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
   if (stmts)
@@ -3004,7 +2863,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
   offset_type = TREE_TYPE (gs_info->offset_vectype);
 
   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
-  tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
+  tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
                          ssize_int (gs_info->scale));
   step = fold_convert (offset_type, step);
   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
@@ -3022,30 +2881,32 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
    vectorization.  */
 
 static tree
-vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
+vect_get_data_ptr_increment (vec_info *vinfo,
+                            dr_vec_info *dr_info, tree aggr_type,
                             vect_memory_access_type memory_access_type)
 {
   if (memory_access_type == VMAT_INVARIANT)
     return size_zero_node;
 
   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
-  tree step = vect_dr_behavior (dr_info)->step;
+  tree step = vect_dr_behavior (vinfo, dr_info)->step;
   if (tree_int_cst_sgn (step) == -1)
     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
   return iv_step;
 }
 
-/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
+/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}.  */
 
 static bool
-vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                   stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_bswap (vec_info *vinfo,
+                   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                   gimple **vec_stmt, slp_tree slp_node,
+                   slp_tree *slp_op,
                    tree vectype_in, stmt_vector_for_cost *cost_vec)
 {
   tree op, vectype;
   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
-  vec_info *vinfo = stmt_info->vinfo;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   unsigned ncopies;
 
   op = gimple_call_arg (stmt, 0);
@@ -3083,6 +2944,15 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   if (! vec_stmt)
     {
+      if (slp_node
+         && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "incompatible vector types for invariants\n");
+         return false;
+       }
+
       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_bswap");
       if (! slp_node)
@@ -3099,50 +2969,35 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   /* Transform.  */
   vec<tree> vec_oprnds = vNULL;
-  stmt_vec_info new_stmt_info = NULL;
-  stmt_vec_info prev_stmt_info = NULL;
-  for (unsigned j = 0; j < ncopies; j++)
-    {
-      /* Handle uses.  */
-      if (j == 0)
-       vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
-      else
-       vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
-
-      /* Arguments are ready. create the new vector stmt.  */
-      unsigned i;
-      tree vop;
-      FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
-       {
-        gimple *new_stmt;
-        tree tem = make_ssa_name (char_vectype);
-        new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
-                                                     char_vectype, vop));
-        vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-        tree tem2 = make_ssa_name (char_vectype);
-        new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
-                                        tem, tem, bswap_vconst);
-        vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-        tem = make_ssa_name (vectype);
-        new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
-                                                     vectype, tem2));
-        new_stmt_info
-          = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-         if (slp_node)
-          SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-       }
-
+  vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+                    op, &vec_oprnds);
+  /* Arguments are ready. create the new vector stmt.  */
+  unsigned i;
+  tree vop;
+  FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
+    {
+      gimple *new_stmt;
+      tree tem = make_ssa_name (char_vectype);
+      new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
+                                                  char_vectype, vop));
+      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+      tree tem2 = make_ssa_name (char_vectype);
+      new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
+                                     tem, tem, bswap_vconst);
+      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+      tem = make_ssa_name (vectype);
+      new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
+                                                  vectype, tem2));
+      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
       if (slp_node)
-        continue;
-
-      if (j == 0)
-       STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+       SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
       else
-       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
-      prev_stmt_info = new_stmt_info;
+       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
     }
 
+  if (!slp_node)
+    *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
   vec_oprnds.release ();
   return true;
 }
@@ -3153,7 +3008,7 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    *CONVERT_CODE.  */
 
 static bool
-simple_integer_narrowing (vec_info *vinfo, tree vectype_out, tree vectype_in,
+simple_integer_narrowing (tree vectype_out, tree vectype_in,
                          tree_code *convert_code)
 {
   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
@@ -3163,9 +3018,8 @@ simple_integer_narrowing (vec_info *vinfo, tree vectype_out, tree vectype_in,
   tree_code code;
   int multi_step_cvt = 0;
   auto_vec <tree, 8> interm_types;
-  if (!supportable_narrowing_operation (vinfo, NOP_EXPR, vectype_out,
-                                       vectype_in, &code, &multi_step_cvt,
-                                       &interm_types)
+  if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
+                                       &code, &multi_step_cvt, &interm_types)
       || multi_step_cvt)
     return false;
 
@@ -3181,8 +3035,9 @@ simple_integer_narrowing (vec_info *vinfo, tree vectype_out, tree vectype_in,
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                  stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_call (vec_info *vinfo,
+                  stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                  gimple **vec_stmt, slp_tree slp_node,
                   stmt_vector_for_cost *cost_vec)
 {
   gcall *stmt;
@@ -3190,18 +3045,17 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   tree scalar_dest;
   tree op;
   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
-  stmt_vec_info prev_stmt_info;
   tree vectype_out, vectype_in;
   poly_uint64 nunits_in;
   poly_uint64 nunits_out;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  vec_info *vinfo = stmt_info->vinfo;
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   tree fndecl, new_temp, rhs_type;
   enum vect_def_type dt[4]
     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
        vect_unknown_def_type };
   tree vectypes[ARRAY_SIZE (dt)] = {};
+  slp_tree slp_op[ARRAY_SIZE (dt)] = {};
   int ndts = ARRAY_SIZE (dt);
   int ncopies, j;
   auto_vec<tree, 8> vargs;
@@ -3241,7 +3095,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   vectype_in = NULL_TREE;
   nargs = gimple_call_num_args (stmt);
 
-  /* Bail out if the function has more than three arguments, we do not have
+  /* Bail out if the function has more than four arguments, we do not have
      interesting builtin functions to vectorize with more than two arguments
      except for fma.  No arguments is also not good.  */
   if (nargs == 0 || nargs > 4)
@@ -3261,8 +3115,17 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   for (i = 0; i < nargs; i++)
     {
-      op = gimple_call_arg (stmt, i);
-      if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
+      if ((int) i == mask_opno)
+       {
+         op = gimple_call_arg (stmt, i);
+         if (!vect_check_scalar_mask (vinfo,
+                                      stmt_info, op, &dt[i], &vectypes[i]))
+           return false;
+         continue;
+       }
+
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+                              i, &op, &slp_op[i], &dt[i], &vectypes[i]))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3270,11 +3133,6 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          return false;
        }
 
-      /* Skip the mask argument to an internal function.  This operand
-        has been converted via a pattern if necessary.  */
-      if ((int) i == mask_opno)
-       continue;
-
       /* We can only handle calls with arguments of the same type.  */
       if (rhs_type
          && !types_compatible_p (rhs_type, TREE_TYPE (op)))
@@ -3298,10 +3156,10 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          return false;
        }
     }
-  /* If all arguments are external or constant defs use a vector type with
-     the same size as the output vector type.  */
+  /* If all arguments are external or constant defs, infer the vector type
+     from the scalar type.  */
   if (!vectype_in)
-    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
+    vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
   if (vec_stmt)
     gcc_assert (vectype_in);
   if (!vectype_in)
@@ -3312,6 +3170,28 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
       return false;
     }
+  /* FORNOW: we don't yet support mixtures of vector sizes for calls,
+     just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
+     are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
+     by a pack of the two vectors into an SI vector.  We would need
+     separate code to handle direct VnDI->VnSI IFN_CTZs.  */
+  if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "mismatched vector sizes %T and %T\n",
+                        vectype_in, vectype_out);
+      return false;
+    }
+
+  if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
+      != VECTOR_BOOLEAN_TYPE_P (vectype_in))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "mixed mask and nonmask vector types\n");
+      return false;
+    }
 
   /* FORNOW */
   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
@@ -3347,7 +3227,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   if (cfn != CFN_LAST
       && (modifier == NONE
          || (modifier == NARROW
-             && simple_integer_narrowing (vinfo, vectype_out, vectype_in,
+             && simple_integer_narrowing (vectype_out, vectype_in,
                                           &convert_code))))
     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
                                          vectype_in);
@@ -3380,9 +3260,10 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       else if (modifier == NONE
               && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
                   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
-                  || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
-       return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
-                                  vectype_in, cost_vec);
+                  || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
+                  || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
+       return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
+                                  slp_op, vectype_in, cost_vec);
       else
        {
          if (dump_enabled_p ())
@@ -3406,9 +3287,19 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
   if (!vec_stmt) /* transformation not required.  */
     {
+      if (slp_node)
+       for (i = 0; i < nargs; ++i)
+         if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "incompatible vector types for invariants\n");
+             return false;
+           }
       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_call");
-      vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+      vect_model_simple_cost (vinfo, stmt_info,
+                             ncopies, dt, ndts, slp_node, cost_vec);
       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
        record_stmt_cost (cost_vec, ncopies / 2,
                          vec_promote_demote, stmt_info, 0, vect_body);
@@ -3436,22 +3327,20 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
 
-  stmt_vec_info new_stmt_info = NULL;
-  prev_stmt_info = NULL;
   if (modifier == NONE || ifn != IFN_LAST)
     {
       tree prev_res = NULL_TREE;
       vargs.safe_grow (nargs);
       orig_vargs.safe_grow (nargs);
+      auto_vec<vec<tree> > vec_defs (nargs);
       for (j = 0; j < ncopies; ++j)
        {
          /* Build argument list for the vectorized call.  */
          if (slp_node)
            {
-             auto_vec<vec<tree> > vec_defs (nargs);
              vec<tree> vec_oprnds0;
 
-             vect_get_slp_defs (slp_node, &vec_defs);
+             vect_get_slp_defs (vinfo, slp_node, &vec_defs);
              vec_oprnds0 = vec_defs[0];
 
              /* Arguments are ready.  Create the new vector stmt.  */
@@ -3463,6 +3352,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                      vec<tree> vec_oprndsk = vec_defs[k];
                      vargs[k] = vec_oprndsk[i];
                    }
+                 gimple *new_stmt;
                  if (modifier == NARROW)
                    {
                      /* We don't define any narrowing conditional functions
@@ -3473,19 +3363,17 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                        = gimple_build_call_internal_vec (ifn, vargs);
                      gimple_call_set_lhs (call, half_res);
                      gimple_call_set_nothrow (call, true);
-                     vect_finish_stmt_generation (stmt_info, call, gsi);
+                     vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
                      if ((i & 1) == 0)
                        {
                          prev_res = half_res;
                          continue;
                        }
                      new_temp = make_ssa_name (vec_dest);
-                     gimple *new_stmt
-                       = gimple_build_assign (new_temp, convert_code,
-                                              prev_res, half_res);
-                     new_stmt_info
-                       = vect_finish_stmt_generation (stmt_info, new_stmt,
-                                                      gsi);
+                     new_stmt = gimple_build_assign (new_temp, convert_code,
+                                                     prev_res, half_res);
+                     vect_finish_stmt_generation (vinfo, stmt_info,
+                                                  new_stmt, gsi);
                    }
                  else
                    {
@@ -3508,38 +3396,24 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                      new_temp = make_ssa_name (vec_dest, call);
                      gimple_call_set_lhs (call, new_temp);
                      gimple_call_set_nothrow (call, true);
-                     new_stmt_info
-                       = vect_finish_stmt_generation (stmt_info, call, gsi);
+                     vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+                     new_stmt = call;
                    }
-                 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-               }
-
-             for (i = 0; i < nargs; i++)
-               {
-                 vec<tree> vec_oprndsi = vec_defs[i];
-                 vec_oprndsi.release ();
+                 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
                }
              continue;
            }
 
-         if (mask_opno >= 0 && !vectypes[mask_opno])
-           {
-             gcc_assert (modifier != WIDEN);
-             vectypes[mask_opno]
-               = build_same_sized_truth_vector_type (vectype_in);
-           }
-
          for (i = 0; i < nargs; i++)
            {
              op = gimple_call_arg (stmt, i);
              if (j == 0)
-               vec_oprnd0
-                 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
-             else
-               vec_oprnd0
-                 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
-
-             orig_vargs[i] = vargs[i] = vec_oprnd0;
+               {
+                 vec_defs.quick_push (vNULL);
+                 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+                                                op, &vec_defs[i]);
+               }
+             orig_vargs[i] = vargs[i] = vec_defs[i][j];
            }
 
          if (mask_opno >= 0 && masked_loop_p)
@@ -3551,17 +3425,17 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                           vargs[mask_opno], gsi);
            }
 
+         gimple *new_stmt;
          if (cfn == CFN_GOMP_SIMD_LANE)
            {
              tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
              tree new_var
                = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
              gimple *init_stmt = gimple_build_assign (new_var, cst);
-             vect_init_vector_1 (stmt_info, init_stmt, NULL);
+             vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
              new_temp = make_ssa_name (vec_dest);
-             gimple *new_stmt = gimple_build_assign (new_temp, new_var);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             new_stmt = gimple_build_assign (new_temp, new_var);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
            }
          else if (modifier == NARROW)
            {
@@ -3572,17 +3446,16 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              gcall *call = gimple_build_call_internal_vec (ifn, vargs);
              gimple_call_set_lhs (call, half_res);
              gimple_call_set_nothrow (call, true);
-             vect_finish_stmt_generation (stmt_info, call, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
              if ((j & 1) == 0)
                {
                  prev_res = half_res;
                  continue;
                }
              new_temp = make_ssa_name (vec_dest);
-             gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
-                                                      prev_res, half_res);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             new_stmt = gimple_build_assign (new_temp, convert_code,
+                                             prev_res, half_res);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
            }
          else
            {
@@ -3594,20 +3467,23 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              new_temp = make_ssa_name (vec_dest, call);
              gimple_call_set_lhs (call, new_temp);
              gimple_call_set_nothrow (call, true);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, call, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+             new_stmt = call;
            }
 
          if (j == (modifier == NARROW ? 1 : 0))
-           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-         else
-           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
-         prev_stmt_info = new_stmt_info;
+           *vec_stmt = new_stmt;
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+       }
+      for (i = 0; i < nargs; i++)
+       {
+         vec<tree> vec_oprndsi = vec_defs[i];
+         vec_oprndsi.release ();
        }
     }
   else if (modifier == NARROW)
     {
+      auto_vec<vec<tree> > vec_defs (nargs);
       /* We don't define any narrowing conditional functions at present.  */
       gcc_assert (mask_opno < 0);
       for (j = 0; j < ncopies; ++j)
@@ -3620,10 +3496,9 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
          if (slp_node)
            {
-             auto_vec<vec<tree> > vec_defs (nargs);
              vec<tree> vec_oprnds0;
 
-             vect_get_slp_defs (slp_node, &vec_defs);
+             vect_get_slp_defs (vinfo, slp_node, &vec_defs);
              vec_oprnds0 = vec_defs[0];
 
              /* Arguments are ready.  Create the new vector stmt.  */
@@ -3645,15 +3520,8 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                  new_temp = make_ssa_name (vec_dest, call);
                  gimple_call_set_lhs (call, new_temp);
                  gimple_call_set_nothrow (call, true);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, call, gsi);
-                 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-               }
-
-             for (i = 0; i < nargs; i++)
-               {
-                 vec<tree> vec_oprndsi = vec_defs[i];
-                 vec_oprndsi.release ();
+                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+                 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
                }
              continue;
            }
@@ -3663,21 +3531,12 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              op = gimple_call_arg (stmt, i);
              if (j == 0)
                {
-                 vec_oprnd0
-                   = vect_get_vec_def_for_operand (op, stmt_info,
-                                                   vectypes[i]);
-                 vec_oprnd1
-                   = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
-               }
-             else
-               {
-                 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
-                                               2 * i + 1);
-                 vec_oprnd0
-                   = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
-                 vec_oprnd1
-                   = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
+                 vec_defs.quick_push (vNULL);
+                 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
+                                                op, &vec_defs[i], vectypes[i]);
                }
+             vec_oprnd0 = vec_defs[i][2*j];
+             vec_oprnd1 = vec_defs[i][2*j+1];
 
              vargs.quick_push (vec_oprnd0);
              vargs.quick_push (vec_oprnd1);
@@ -3686,18 +3545,19 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
          new_temp = make_ssa_name (vec_dest, new_stmt);
          gimple_call_set_lhs (new_stmt, new_temp);
-         new_stmt_info
-           = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-
-         if (j == 0)
-           STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
-         else
-           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
-         prev_stmt_info = new_stmt_info;
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
        }
 
-      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+      if (!slp_node)
+       *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
+      for (i = 0; i < nargs; i++)
+       {
+         vec<tree> vec_oprndsi = vec_defs[i];
+         vec_oprndsi.release ();
+       }
     }
   else
     /* No current target implements this case.  */
@@ -3821,21 +3681,19 @@ simd_clone_subparts (tree vectype)
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_simd_clone_call (stmt_vec_info stmt_info,
+vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
                              gimple_stmt_iterator *gsi,
-                             stmt_vec_info *vec_stmt, slp_tree slp_node,
+                             gimple **vec_stmt, slp_tree slp_node,
                              stmt_vector_for_cost *)
 {
   tree vec_dest;
   tree scalar_dest;
   tree op, type;
   tree vec_oprnd0 = NULL_TREE;
-  stmt_vec_info prev_stmt_info;
   tree vectype;
   unsigned int nunits;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  vec_info *vinfo = stmt_info->vinfo;
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
   tree fndecl, new_temp;
   int ncopies, j;
@@ -3914,7 +3772,16 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
          || thisarginfo.dt == vect_external_def)
        gcc_assert (thisarginfo.vectype == NULL_TREE);
       else
-       gcc_assert (thisarginfo.vectype != NULL_TREE);
+       {
+         gcc_assert (thisarginfo.vectype != NULL_TREE);
+         if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "vector mask arguments are not supported\n");
+             return false;
+           }
+       }
 
       /* For linear arguments, the analyze phase should have saved
         the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
@@ -4078,7 +3945,8 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
        && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
       {
        tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
-       arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
+       arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
+                                                         slp_node);
        if (arginfo[i].vectype == NULL
            || (simd_clone_subparts (arginfo[i].vectype)
                > bestn->simdclone->simdlen))
@@ -4122,7 +3990,8 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
          }
       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
-/*      vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
+/*      vect_model_simple_cost (vinfo, stmt_info, ncopies,
+                               dt, slp_node, cost_vec); */
       return true;
     }
 
@@ -4147,7 +4016,10 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
        }
     }
 
-  prev_stmt_info = NULL;
+  auto_vec<vec<tree> > vec_oprnds;
+  auto_vec<unsigned> vec_oprnds_i;
+  vec_oprnds.safe_grow_cleared (nargs);
+  vec_oprnds_i.safe_grow_cleared (nargs);
   for (j = 0; j < ncopies; ++j)
     {
       /* Build argument list for the vectorized call.  */
@@ -4176,15 +4048,18 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
                           / simd_clone_subparts (atype));
                      gcc_assert ((k & (k - 1)) == 0);
                      if (m == 0)
-                       vec_oprnd0
-                         = vect_get_vec_def_for_operand (op, stmt_info);
+                       {
+                         vect_get_vec_defs_for_operand (vinfo, stmt_info,
+                                                        ncopies * o / k, op,
+                                                        &vec_oprnds[i]);
+                         vec_oprnds_i[i] = 0;
+                         vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+                       }
                      else
                        {
                          vec_oprnd0 = arginfo[i].op;
                          if ((m & (k - 1)) == 0)
-                           vec_oprnd0
-                             = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                               vec_oprnd0);
+                           vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
                        }
                      arginfo[i].op = vec_oprnd0;
                      vec_oprnd0
@@ -4194,7 +4069,8 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
                      gassign *new_stmt
                        = gimple_build_assign (make_ssa_name (atype),
                                               vec_oprnd0);
-                     vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                     vect_finish_stmt_generation (vinfo, stmt_info,
+                                                  new_stmt, gsi);
                      vargs.safe_push (gimple_assign_lhs (new_stmt));
                    }
                  else
@@ -4210,12 +4086,16 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
                      for (l = 0; l < k; l++)
                        {
                          if (m == 0 && l == 0)
-                           vec_oprnd0
-                             = vect_get_vec_def_for_operand (op, stmt_info);
+                           {
+                             vect_get_vec_defs_for_operand (vinfo, stmt_info,
+                                                            k * o * ncopies,
+                                                            op,
+                                                            &vec_oprnds[i]);
+                             vec_oprnds_i[i] = 0;
+                             vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+                           }
                          else
-                           vec_oprnd0
-                             = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                               arginfo[i].op);
+                           vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
                          arginfo[i].op = vec_oprnd0;
                          if (k == 1)
                            break;
@@ -4230,8 +4110,8 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
                          gassign *new_stmt
                            = gimple_build_assign (make_ssa_name (atype),
                                                   vec_oprnd0);
-                         vect_finish_stmt_generation (stmt_info, new_stmt,
-                                                      gsi);
+                         vect_finish_stmt_generation (vinfo, stmt_info,
+                                                      new_stmt, gsi);
                          vargs.safe_push (gimple_assign_lhs (new_stmt));
                        }
                    }
@@ -4262,7 +4142,6 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
                    }
                  tree phi_res = copy_ssa_name (op);
                  gphi *new_phi = create_phi_node (phi_res, loop->header);
-                 loop_vinfo->add_stmt (new_phi);
                  add_phi_arg (new_phi, arginfo[i].op,
                               loop_preheader_edge (loop), UNKNOWN_LOCATION);
                  enum tree_code code
@@ -4279,7 +4158,6 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
                    = gimple_build_assign (phi_arg, code, phi_res, tcst);
                  gimple_stmt_iterator si = gsi_after_labels (loop->header);
                  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
-                 loop_vinfo->add_stmt (new_stmt);
                  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
                               UNKNOWN_LOCATION);
                  arginfo[i].op = phi_res;
@@ -4300,7 +4178,7 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
                  gassign *new_stmt
                    = gimple_build_assign (new_temp, code,
                                           arginfo[i].op, tcst);
-                 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
                  vargs.safe_push (new_temp);
                }
              break;
@@ -4328,8 +4206,8 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
            new_temp = make_ssa_name (rtype, new_call);
          gimple_call_set_lhs (new_call, new_temp);
        }
-      stmt_vec_info new_stmt_info
-       = vect_finish_stmt_generation (stmt_info, new_call, gsi);
+      vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
+      gimple *new_stmt = new_call;
 
       if (vec_dest)
        {
@@ -4352,22 +4230,16 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
                  else
                    t = build3 (BIT_FIELD_REF, vectype, new_temp,
                                bitsize_int (prec), bitsize_int (l * prec));
-                 gimple *new_stmt
-                   = gimple_build_assign (make_ssa_name (vectype), t);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
                  if (j == 0 && l == 0)
-                   STMT_VINFO_VEC_STMT (stmt_info)
-                     = *vec_stmt = new_stmt_info;
-                 else
-                   STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
-                 prev_stmt_info = new_stmt_info;
+                   *vec_stmt = new_stmt;
+                 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
                }
 
              if (ratype)
-               vect_clobber_variable (stmt_info, gsi, new_temp);
+               vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
              continue;
            }
          else if (simd_clone_subparts (vectype) > nunits)
@@ -4384,32 +4256,27 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
                    {
                      tree tem = build4 (ARRAY_REF, rtype, new_temp,
                                         size_int (m), NULL_TREE, NULL_TREE);
-                     gimple *new_stmt
-                       = gimple_build_assign (make_ssa_name (rtype), tem);
-                     new_stmt_info
-                       = vect_finish_stmt_generation (stmt_info, new_stmt,
-                                                      gsi);
+                     new_stmt = gimple_build_assign (make_ssa_name (rtype),
+                                                     tem);
+                     vect_finish_stmt_generation (vinfo, stmt_info,
+                                                  new_stmt, gsi);
                      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
                                              gimple_assign_lhs (new_stmt));
                    }
-                 vect_clobber_variable (stmt_info, gsi, new_temp);
+                 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
                }
              else
                CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
              if ((j & (k - 1)) != k - 1)
                continue;
              vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
-             gimple *new_stmt
+             new_stmt
                = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
              if ((unsigned) j == k - 1)
-               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-             else
-               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
-             prev_stmt_info = new_stmt_info;
+               *vec_stmt = new_stmt;
+             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
              continue;
            }
          else if (ratype)
@@ -4417,22 +4284,22 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
              tree t = build_fold_addr_expr (new_temp);
              t = build2 (MEM_REF, vectype, t,
                          build_int_cst (TREE_TYPE (t), 0));
-             gimple *new_stmt
-               = gimple_build_assign (make_ssa_name (vec_dest), t);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-             vect_clobber_variable (stmt_info, gsi, new_temp);
+             new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
            }
        }
 
       if (j == 0)
-       STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-      else
-       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
-      prev_stmt_info = new_stmt_info;
+       *vec_stmt = new_stmt;
+      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
     }
 
+  for (i = 0; i < nargs; ++i)
+    {
+      vec<tree> oprndsi = vec_oprnds[i];
+      oprndsi.release ();
+    }
   vargs.release ();
 
   /* The call in STMT might prevent it from being removed in dce.
@@ -4469,8 +4336,7 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
    STMT_INFO is the original scalar stmt that we are vectorizing.  */
 
 static gimple *
-vect_gen_widened_results_half (enum tree_code code,
-                              tree decl,
+vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
                               tree vec_dest, gimple_stmt_iterator *gsi,
                               stmt_vec_info stmt_info)
@@ -4479,82 +4345,29 @@ vect_gen_widened_results_half (enum tree_code code,
   tree new_temp;
 
   /* Generate half of the widened result:  */
-  if (code == CALL_EXPR)
-    {
-      /* Target specific support  */
-      if (op_type == binary_op)
-       new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
-      else
-       new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
-      new_temp = make_ssa_name (vec_dest, new_stmt);
-      gimple_call_set_lhs (new_stmt, new_temp);
-    }
-  else
-    {
-      /* Generic support */
-      gcc_assert (op_type == TREE_CODE_LENGTH (code));
-      if (op_type != binary_op)
-       vec_oprnd1 = NULL;
-      new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
-      new_temp = make_ssa_name (vec_dest, new_stmt);
-      gimple_assign_set_lhs (new_stmt, new_temp);
-    }
-  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+  gcc_assert (op_type == TREE_CODE_LENGTH (code));
+  if (op_type != binary_op)
+    vec_oprnd1 = NULL;
+  new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
+  new_temp = make_ssa_name (vec_dest, new_stmt);
+  gimple_assign_set_lhs (new_stmt, new_temp);
+  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
   return new_stmt;
 }
 
 
-/* Get vectorized definitions for loop-based vectorization of STMT_INFO.
-   For the first operand we call vect_get_vec_def_for_operand (with OPRND
-   containing scalar operand), and for the rest we get a copy with
-   vect_get_vec_def_for_stmt_copy() using the previous vector definition
-   (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
-   The vectors are collected into VEC_OPRNDS.  */
-
-static void
-vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
-                         vec<tree> *vec_oprnds, int multi_step_cvt)
-{
-  vec_info *vinfo = stmt_info->vinfo;
-  tree vec_oprnd;
-
-  /* Get first vector operand.  */
-  /* All the vector operands except the very first one (that is scalar oprnd)
-     are stmt copies.  */
-  if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
-    vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
-  else
-    vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
-
-  vec_oprnds->quick_push (vec_oprnd);
-
-  /* Get second vector operand.  */
-  vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
-  vec_oprnds->quick_push (vec_oprnd);
-
-  *oprnd = vec_oprnd;
-
-  /* For conversion in multiple steps, continue to get operands
-     recursively.  */
-  if (multi_step_cvt)
-    vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
-                             multi_step_cvt - 1);
-}
-
-
 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
    For multi-step conversions store the resulting vectors and call the function
    recursively.  */
 
 static void
-vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
+vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
                                       int multi_step_cvt,
                                       stmt_vec_info stmt_info,
                                       vec<tree> vec_dsts,
                                       gimple_stmt_iterator *gsi,
-                                      slp_tree slp_node, enum tree_code code,
-                                      stmt_vec_info *prev_stmt_info)
+                                      slp_tree slp_node, enum tree_code code)
 {
   unsigned int i;
   tree vop0, vop1, new_tmp, vec_dest;
@@ -4569,8 +4382,7 @@ vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
       new_tmp = make_ssa_name (vec_dest, new_stmt);
       gimple_assign_set_lhs (new_stmt, new_tmp);
-      stmt_vec_info new_stmt_info
-       = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
       if (multi_step_cvt)
        /* Store the resulting vector for next recursive call.  */
@@ -4581,16 +4393,9 @@ vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
             vectors in SLP_NODE or in vector info of the scalar statement
             (or in STMT_VINFO_RELATED_STMT chain).  */
          if (slp_node)
-           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
          else
-           {
-             if (!*prev_stmt_info)
-               STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
-             else
-               STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
-
-             *prev_stmt_info = new_stmt_info;
-           }
+           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
        }
     }
 
@@ -4603,10 +4408,10 @@ vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
       /* At each level of recursion we have half of the operands we had at the
         previous level.  */
       vec_oprnds->truncate ((i+1)/2);
-      vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
+      vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
+                                            multi_step_cvt - 1,
                                             stmt_info, vec_dsts, gsi,
-                                            slp_node, VEC_PACK_TRUNC_EXPR,
-                                            prev_stmt_info);
+                                            slp_node, VEC_PACK_TRUNC_EXPR);
     }
 
   vec_dsts.quick_push (vec_dest);
@@ -4619,13 +4424,13 @@ vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
    call the function recursively.  */
 
 static void
-vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
+vect_create_vectorized_promotion_stmts (vec_info *vinfo,
+                                       vec<tree> *vec_oprnds0,
                                        vec<tree> *vec_oprnds1,
                                        stmt_vec_info stmt_info, tree vec_dest,
                                        gimple_stmt_iterator *gsi,
                                        enum tree_code code1,
-                                       enum tree_code code2, tree decl1,
-                                       tree decl2, int op_type)
+                                       enum tree_code code2, int op_type)
 {
   int i;
   tree vop0, vop1, new_tmp1, new_tmp2;
@@ -4641,10 +4446,10 @@ vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
        vop1 = NULL_TREE;
 
       /* Generate the two halves of promotion operation.  */
-      new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
+      new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
                                                 op_type, vec_dest, gsi,
                                                 stmt_info);
-      new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
+      new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
                                                 op_type, vec_dest, gsi,
                                                 stmt_info);
       if (is_gimple_call (new_stmt1))
@@ -4674,36 +4479,33 @@ vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                        stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_conversion (vec_info *vinfo,
+                        stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                        gimple **vec_stmt, slp_tree slp_node,
                         stmt_vector_for_cost *cost_vec)
 {
   tree vec_dest;
   tree scalar_dest;
   tree op0, op1 = NULL_TREE;
-  tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
-  tree decl1 = NULL_TREE, decl2 = NULL_TREE;
   tree new_temp;
   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
   int ndts = 2;
-  stmt_vec_info prev_stmt_info;
   poly_uint64 nunits_in;
   poly_uint64 nunits_out;
   tree vectype_out, vectype_in;
-  int ncopies, i, j;
+  int ncopies, i;
   tree lhs_type, rhs_type;
   enum { NARROW, NONE, WIDEN } modifier;
   vec<tree> vec_oprnds0 = vNULL;
   vec<tree> vec_oprnds1 = vNULL;
   tree vop0;
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  vec_info *vinfo = stmt_info->vinfo;
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   int multi_step_cvt = 0;
   vec<tree> interm_types = vNULL;
-  tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
+  tree intermediate_type, cvt_type = NULL_TREE;
   int op_type;
   unsigned short fltsz;
 
@@ -4738,9 +4540,18 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   lhs_type = TREE_TYPE (scalar_dest);
   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
 
-  op0 = gimple_assign_rhs1 (stmt);
-  rhs_type = TREE_TYPE (op0);
+  /* Check the operands of the operation.  */
+  slp_tree slp_op0, slp_op1 = NULL;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+                          0, &op0, &slp_op0, &dt[0], &vectype_in))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "use not simple.\n");
+      return false;
+    }
 
+  rhs_type = TREE_TYPE (op0);
   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
       && !((INTEGRAL_TYPE_P (lhs_type)
            && INTEGRAL_TYPE_P (rhs_type))
@@ -4761,40 +4572,30 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       return false;
     }
 
-  /* Check the operands of the operation.  */
-  if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
-    {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                         "use not simple.\n");
-      return false;
-    }
   if (op_type == binary_op)
     {
-      bool ok;
-
-      op1 = gimple_assign_rhs2 (stmt);
       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
-      /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
-        OP1.  */
-      if (CONSTANT_CLASS_P (op0))
-       ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
-      else
-       ok = vect_is_simple_use (op1, vinfo, &dt[1]);
 
-      if (!ok)
+      op1 = gimple_assign_rhs2 (stmt);
+      tree vectype1_in;
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
+                              &op1, &slp_op1, &dt[1], &vectype1_in))
        {
           if (dump_enabled_p ())
             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                              "use not simple.\n");
          return false;
        }
+      /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
+        OP1.  */
+      if (!vectype_in)
+       vectype_in = vectype1_in;
     }
 
-  /* If op0 is an external or constant defs use a vector type of
-     the same size as the output vector type.  */
+  /* If op0 is an external or constant def, infer the vector type
+     from the scalar type.  */
   if (!vectype_in)
-    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
+    vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
   if (vec_stmt)
     gcc_assert (vectype_in);
   if (!vectype_in)
@@ -4852,10 +4653,11 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   switch (modifier)
     {
     case NONE:
-      if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
+      if (code != FIX_TRUNC_EXPR
+         && code != FLOAT_EXPR
+         && !CONVERT_EXPR_CODE_P (code))
        return false;
-      if (supportable_convert_operation (code, vectype_out, vectype_in,
-                                        &decl1, &code1))
+      if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
        break;
       /* FALLTHRU */
     unsupported:
@@ -4865,7 +4667,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       return false;
 
     case WIDEN:
-      if (supportable_widening_operation (code, stmt_info, vectype_out,
+      if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
                                          vectype_in, &code1, &code2,
                                          &multi_step_cvt, &interm_types))
        {
@@ -4895,10 +4697,10 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          if (GET_MODE_SIZE (rhs_mode) == fltsz)
            {
              if (!supportable_convert_operation (code, vectype_out,
-                                                 cvt_type, &decl1, &codecvt1))
+                                                 cvt_type, &codecvt1))
                goto unsupported;
            }
-         else if (!supportable_widening_operation (code, stmt_info,
+         else if (!supportable_widening_operation (vinfo, code, stmt_info,
                                                    vectype_out, cvt_type,
                                                    &codecvt1, &codecvt2,
                                                    &multi_step_cvt,
@@ -4907,7 +4709,8 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          else
            gcc_assert (multi_step_cvt == 0);
 
-         if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
+         if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
+                                             cvt_type,
                                              vectype_in, &code1, &code2,
                                              &multi_step_cvt, &interm_types))
            {
@@ -4931,8 +4734,8 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
     case NARROW:
       gcc_assert (op_type == unary_op);
-      if (supportable_narrowing_operation (vinfo, code, vectype_out,
-                                          vectype_in, &code1, &multi_step_cvt,
+      if (supportable_narrowing_operation (code, vectype_out, vectype_in,
+                                          &code1, &multi_step_cvt,
                                           &interm_types))
        break;
 
@@ -4946,10 +4749,10 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       if (cvt_type == NULL_TREE)
        goto unsupported;
       if (!supportable_convert_operation (code, cvt_type, vectype_in,
-                                         &decl1, &codecvt1))
+                                         &codecvt1))
        goto unsupported;
-      if (supportable_narrowing_operation (vinfo, NOP_EXPR, vectype_out,
-                                          cvt_type, &code1, &multi_step_cvt,
+      if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
+                                          &code1, &multi_step_cvt,
                                           &interm_types))
        break;
       goto unsupported;
@@ -4960,24 +4763,43 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   if (!vec_stmt)               /* transformation not required.  */
     {
+      if (slp_node
+         && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
+             || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "incompatible vector types for invariants\n");
+         return false;
+       }
       DUMP_VECT_SCOPE ("vectorizable_conversion");
-      if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
+      if (modifier == NONE)
         {
          STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
-         vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
+         vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
                                  cost_vec);
        }
       else if (modifier == NARROW)
        {
          STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
-         vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
-                                             cost_vec);
+         /* The final packing step produces one vector result per copy.  */
+         unsigned int nvectors
+           = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
+         vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+                                             multi_step_cvt, cost_vec);
        }
       else
        {
          STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
-         vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
-                                             cost_vec);
+         /* The initial unpacking step produces two vector results
+            per copy.  MULTI_STEP_CVT is 0 for a single conversion,
+            so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
+         unsigned int nvectors
+           = (slp_node
+              ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
+              : ncopies * 2);
+         vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+                                             multi_step_cvt, cost_vec);
        }
       interm_types.release ();
       return true;
@@ -5023,69 +4845,37 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                            modifier == WIDEN
                                            ? vectype_out : cvt_type);
 
+  int ninputs = 1;
   if (!slp_node)
     {
       if (modifier == WIDEN)
+       ;
+      else if (modifier == NARROW)
        {
-         vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
-         if (op_type == binary_op)
-           vec_oprnds1.create (1);
+         if (multi_step_cvt)
+           ninputs = vect_pow2 (multi_step_cvt);
+         ninputs *= 2;
        }
-      else if (modifier == NARROW)
-       vec_oprnds0.create (
-                  2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
     }
-  else if (code == WIDEN_LSHIFT_EXPR)
-    vec_oprnds1.create (slp_node->vec_stmts_size);
 
-  last_oprnd = op0;
-  prev_stmt_info = NULL;
   switch (modifier)
     {
     case NONE:
-      for (j = 0; j < ncopies; j++)
+      vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+                        op0, &vec_oprnds0);
+      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
        {
-         if (j == 0)
-           vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
-                              NULL, slp_node);
-         else
-           vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
-
-         FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
-           {
-             stmt_vec_info new_stmt_info;
-             /* Arguments are ready, create the new vector stmt.  */
-             if (code1 == CALL_EXPR)
-               {
-                 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
-                 new_temp = make_ssa_name (vec_dest, new_stmt);
-                 gimple_call_set_lhs (new_stmt, new_temp);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-               }
-             else
-               {
-                 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
-                 gassign *new_stmt
-                   = gimple_build_assign (vec_dest, code1, vop0);
-                 new_temp = make_ssa_name (vec_dest, new_stmt);
-                 gimple_assign_set_lhs (new_stmt, new_temp);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-               }
+         /* Arguments are ready, create the new vector stmt.  */
+         gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
+         gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
+         new_temp = make_ssa_name (vec_dest, new_stmt);
+         gimple_assign_set_lhs (new_stmt, new_temp);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
-             if (slp_node)
-               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-             else
-               {
-                 if (!prev_stmt_info)
-                   STMT_VINFO_VEC_STMT (stmt_info)
-                     = *vec_stmt = new_stmt_info;
-                 else
-                   STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-                 prev_stmt_info = new_stmt_info;
-               }
-           }
+         if (slp_node)
+           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+         else
+           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
        }
       break;
 
@@ -5094,122 +4884,50 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
         of elements that we can fit in a vectype (nunits), we have to
         generate more than one vector stmt - i.e - we need to "unroll"
         the vector stmt by a factor VF/nunits.  */
-      for (j = 0; j < ncopies; j++)
-       {
-         /* Handle uses.  */
-         if (j == 0)
-           {
-             if (slp_node)
-               {
-                 if (code == WIDEN_LSHIFT_EXPR)
-                   {
-                     unsigned int k;
-
-                     vec_oprnd1 = op1;
-                     /* Store vec_oprnd1 for every vector stmt to be created
-                        for SLP_NODE.  We check during the analysis that all
-                        the shift arguments are the same.  */
-                     for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
-                       vec_oprnds1.quick_push (vec_oprnd1);
-
-                     vect_get_vec_defs (op0, NULL_TREE, stmt_info,
-                                        &vec_oprnds0, NULL, slp_node);
-                   }
-                 else
-                   vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
-                                      &vec_oprnds1, slp_node);
-               }
-             else
-               {
-                 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
-                 vec_oprnds0.quick_push (vec_oprnd0);
-                 if (op_type == binary_op)
-                   {
-                     if (code == WIDEN_LSHIFT_EXPR)
-                       vec_oprnd1 = op1;
-                     else
-                       vec_oprnd1
-                         = vect_get_vec_def_for_operand (op1, stmt_info);
-                     vec_oprnds1.quick_push (vec_oprnd1);
-                   }
-               }
-           }
-         else
+      vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
+                        op0, &vec_oprnds0,
+                        code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
+                        &vec_oprnds1);
+      if (code == WIDEN_LSHIFT_EXPR)
+       {
+         vec_oprnds1.create (ncopies * ninputs);
+         for (i = 0; i < ncopies * ninputs; ++i)
+           vec_oprnds1.quick_push (op1);
+       }
+      /* Arguments are ready.  Create the new vector stmts.  */
+      for (i = multi_step_cvt; i >= 0; i--)
+       {
+         tree this_dest = vec_dsts[i];
+         enum tree_code c1 = code1, c2 = code2;
+         if (i == 0 && codecvt2 != ERROR_MARK)
            {
-             vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
-             vec_oprnds0.truncate (0);
-             vec_oprnds0.quick_push (vec_oprnd0);
-             if (op_type == binary_op)
-               {
-                 if (code == WIDEN_LSHIFT_EXPR)
-                   vec_oprnd1 = op1;
-                 else
-                   vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                                vec_oprnd1);
-                 vec_oprnds1.truncate (0);
-                 vec_oprnds1.quick_push (vec_oprnd1);
-               }
+             c1 = codecvt1;
+             c2 = codecvt2;
            }
+         vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
+                                                 &vec_oprnds1, stmt_info,
+                                                 this_dest, gsi,
+                                                 c1, c2, op_type);
+       }
 
-         /* Arguments are ready.  Create the new vector stmts.  */
-         for (i = multi_step_cvt; i >= 0; i--)
+      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+       {
+         gimple *new_stmt;
+         if (cvt_type)
            {
-             tree this_dest = vec_dsts[i];
-             enum tree_code c1 = code1, c2 = code2;
-             if (i == 0 && codecvt2 != ERROR_MARK)
-               {
-                 c1 = codecvt1;
-                 c2 = codecvt2;
-               }
-             vect_create_vectorized_promotion_stmts (&vec_oprnds0,
-                                                     &vec_oprnds1, stmt_info,
-                                                     this_dest, gsi,
-                                                     c1, c2, decl1, decl2,
-                                                     op_type);
+             gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
+             new_temp = make_ssa_name (vec_dest);
+             new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
            }
+         else
+           new_stmt = SSA_NAME_DEF_STMT (vop0);
 
-         FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
-           {
-             stmt_vec_info new_stmt_info;
-             if (cvt_type)
-               {
-                 if (codecvt1 == CALL_EXPR)
-                   {
-                     gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
-                     new_temp = make_ssa_name (vec_dest, new_stmt);
-                     gimple_call_set_lhs (new_stmt, new_temp);
-                     new_stmt_info
-                       = vect_finish_stmt_generation (stmt_info, new_stmt,
-                                                      gsi);
-                   }
-                 else
-                   {
-                     gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
-                     new_temp = make_ssa_name (vec_dest);
-                     gassign *new_stmt
-                       = gimple_build_assign (new_temp, codecvt1, vop0);
-                     new_stmt_info
-                       = vect_finish_stmt_generation (stmt_info, new_stmt,
-                                                      gsi);
-                   }
-               }
-             else
-               new_stmt_info = vinfo->lookup_def (vop0);
-
-             if (slp_node)
-               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-             else
-               {
-                 if (!prev_stmt_info)
-                   STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
-                 else
-                   STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-                 prev_stmt_info = new_stmt_info;
-               }
-           }
+         if (slp_node)
+           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+         else
+           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
        }
-
-      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
       break;
 
     case NARROW:
@@ -5217,51 +4935,28 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
         of elements that we can fit in a vectype (nunits), we have to
         generate more than one vector stmt - i.e - we need to "unroll"
         the vector stmt by a factor VF/nunits.  */
-      for (j = 0; j < ncopies; j++)
-       {
-         /* Handle uses.  */
-         if (slp_node)
-           vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
-                              slp_node);
-         else
-           {
-             vec_oprnds0.truncate (0);
-             vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
-                                       vect_pow2 (multi_step_cvt) - 1);
-           }
-
-         /* Arguments are ready.  Create the new vector stmts.  */
-         if (cvt_type)
-           FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
-             {
-               if (codecvt1 == CALL_EXPR)
-                 {
-                   gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
-                   new_temp = make_ssa_name (vec_dest, new_stmt);
-                   gimple_call_set_lhs (new_stmt, new_temp);
-                   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-                 }
-               else
-                 {
-                   gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
-                   new_temp = make_ssa_name (vec_dest);
-                   gassign *new_stmt
-                     = gimple_build_assign (new_temp, codecvt1, vop0);
-                   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-                 }
-
-               vec_oprnds0[i] = new_temp;
-             }
-
-         vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
-                                                stmt_info, vec_dsts, gsi,
-                                                slp_node, code1,
-                                                &prev_stmt_info);
-       }
+      vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
+                        op0, &vec_oprnds0);
+      /* Arguments are ready.  Create the new vector stmts.  */
+      if (cvt_type)
+       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+         {
+           gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
+           new_temp = make_ssa_name (vec_dest);
+           gassign *new_stmt
+             = gimple_build_assign (new_temp, codecvt1, vop0);
+           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+           vec_oprnds0[i] = new_temp;
+         }
 
-      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+      vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
+                                            multi_step_cvt,
+                                            stmt_info, vec_dsts, gsi,
+                                            slp_node, code1);
       break;
     }
+  if (!slp_node)
+    *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
 
   vec_oprnds0.release ();
   vec_oprnds1.release ();
@@ -5270,6 +4965,29 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   return true;
 }
 
+/* Return true if we can assume from the scalar form of STMT_INFO that
+   neither the scalar nor the vector forms will generate code.  STMT_INFO
+   is known not to involve a data reference.  */
+
+bool
+vect_nop_conversion_p (stmt_vec_info stmt_info)
+{
+  gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
+  if (!stmt)
+    return false;
+
+  tree lhs = gimple_assign_lhs (stmt);
+  tree_code code = gimple_assign_rhs_code (stmt);
+  tree rhs = gimple_assign_rhs1 (stmt);
+
+  if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
+    return true;
+
+  if (CONVERT_EXPR_CODE_P (code))
+    return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
+
+  return false;
+}
 
 /* Function vectorizable_assignment.
 
@@ -5279,24 +4997,23 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                        stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_assignment (vec_info *vinfo,
+                        stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                        gimple **vec_stmt, slp_tree slp_node,
                         stmt_vector_for_cost *cost_vec)
 {
   tree vec_dest;
   tree scalar_dest;
   tree op;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   tree new_temp;
   enum vect_def_type dt[1] = {vect_unknown_def_type};
   int ndts = 1;
   int ncopies;
-  int i, j;
+  int i;
   vec<tree> vec_oprnds = vNULL;
   tree vop;
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  vec_info *vinfo = stmt_info->vinfo;
-  stmt_vec_info prev_stmt_info = NULL;
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   enum tree_code code;
   tree vectype_in;
 
@@ -5316,16 +5033,14 @@ vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   if (TREE_CODE (scalar_dest) != SSA_NAME)
     return false;
 
-  code = gimple_assign_rhs_code (stmt);
-  if (gimple_assign_single_p (stmt)
-      || code == PAREN_EXPR
-      || CONVERT_EXPR_CODE_P (code))
-    op = gimple_assign_rhs1 (stmt);
-  else
+  if (STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
-  if (code == VIEW_CONVERT_EXPR)
-    op = TREE_OPERAND (op, 0);
+  code = gimple_assign_rhs_code (stmt);
+  if (!(gimple_assign_single_p (stmt)
+       || code == PAREN_EXPR
+       || CONVERT_EXPR_CODE_P (code)))
+    return false;
 
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
@@ -5340,13 +5055,17 @@ vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   gcc_assert (ncopies >= 1);
 
-  if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
+  slp_tree slp_op;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
+                          &dt[0], &vectype_in))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                          "use not simple.\n");
       return false;
     }
+  if (!vectype_in)
+    vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
 
   /* We can handle NOP_EXPR conversions that do not change the number
      of elements or the vector size.  */
@@ -5383,9 +5102,19 @@ vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   if (!vec_stmt) /* transformation not required.  */
     {
+      if (slp_node
+         && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "incompatible vector types for invariants\n");
+         return false;
+       }
       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_assignment");
-      vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+      if (!vect_nop_conversion_p (stmt_info))
+       vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
+                               cost_vec);
       return true;
     }
 
@@ -5397,40 +5126,25 @@ vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   vec_dest = vect_create_destination_var (scalar_dest, vectype);
 
   /* Handle use.  */
-  for (j = 0; j < ncopies; j++)
-    {
-      /* Handle uses.  */
-      if (j == 0)
-       vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
-      else
-       vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
-
-      /* Arguments are ready. create the new vector stmt.  */
-      stmt_vec_info new_stmt_info = NULL;
-      FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
-       {
-        if (CONVERT_EXPR_CODE_P (code)
-            || code == VIEW_CONVERT_EXPR)
-          vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
-        gassign *new_stmt = gimple_build_assign (vec_dest, vop);
-         new_temp = make_ssa_name (vec_dest, new_stmt);
-         gimple_assign_set_lhs (new_stmt, new_temp);
-        new_stmt_info
-          = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-         if (slp_node)
-          SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-       }
+  vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
 
+  /* Arguments are ready. create the new vector stmt.  */
+  FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
+    {
+      if (CONVERT_EXPR_CODE_P (code)
+         || code == VIEW_CONVERT_EXPR)
+       vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
+      gassign *new_stmt = gimple_build_assign (vec_dest, vop);
+      new_temp = make_ssa_name (vec_dest, new_stmt);
+      gimple_assign_set_lhs (new_stmt, new_temp);
+      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
       if (slp_node)
-        continue;
-
-      if (j == 0)
-       STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+       SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
       else
-       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
-      prev_stmt_info = new_stmt_info;
+       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
     }
+  if (!slp_node)
+    *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
 
   vec_oprnds.release ();
   return true;
@@ -5481,8 +5195,9 @@ vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                   stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_shift (vec_info *vinfo,
+                   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                   gimple **vec_stmt, slp_tree slp_node,
                    stmt_vector_for_cost *cost_vec)
 {
   tree vec_dest;
@@ -5490,7 +5205,7 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   tree op0, op1 = NULL;
   tree vec_oprnd1 = NULL_TREE;
   tree vectype;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   enum tree_code code;
   machine_mode vec_mode;
   tree new_temp;
@@ -5499,20 +5214,19 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   machine_mode optab_op2_mode;
   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
   int ndts = 2;
-  stmt_vec_info prev_stmt_info;
   poly_uint64 nunits_in;
   poly_uint64 nunits_out;
   tree vectype_out;
   tree op1_vectype;
   int ncopies;
-  int j, i;
+  int i;
   vec<tree> vec_oprnds0 = vNULL;
   vec<tree> vec_oprnds1 = vNULL;
   tree vop0, vop1;
   unsigned int k;
   bool scalar_shift_arg = true;
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  vec_info *vinfo = stmt_info->vinfo;
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
+  bool incompatible_op1_vectype_p = false;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -5546,18 +5260,19 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       return false;
     }
 
-  op0 = gimple_assign_rhs1 (stmt);
-  if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
+  slp_tree slp_op0;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+                          0, &op0, &slp_op0, &dt[0], &vectype))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                          "use not simple.\n");
       return false;
     }
-  /* If op0 is an external or constant def use a vector type with
-     the same size as the output vector type.  */
+  /* If op0 is an external or constant def, infer the vector type
+     from the scalar type.  */
   if (!vectype)
-    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
+    vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
   if (vec_stmt)
     gcc_assert (vectype);
   if (!vectype)
@@ -5573,10 +5288,10 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   if (maybe_ne (nunits_out, nunits_in))
     return false;
 
-  op1 = gimple_assign_rhs2 (stmt);
   stmt_vec_info op1_def_stmt_info;
-  if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
-                          &op1_def_stmt_info))
+  slp_tree slp_op1;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
+                          &dt[1], &op1_vectype, &op1_def_stmt_info))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5656,12 +5371,17 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                          "vector/vector shift/rotate found.\n");
 
       if (!op1_vectype)
-       op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
-      if ((op1_vectype == NULL_TREE
-          || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
+       op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
+                                                  slp_op1);
+      incompatible_op1_vectype_p
+       = (op1_vectype == NULL_TREE
+          || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
+                       TYPE_VECTOR_SUBPARTS (vectype))
+          || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
+      if (incompatible_op1_vectype_p
          && (!slp_node
-             || SLP_TREE_DEF_TYPE
-                  (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
+             || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
+             || slp_op1->refcnt != 1))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5695,25 +5415,19 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                 dump_printf_loc (MSG_NOTE, vect_location,
                                  "vector/vector shift/rotate found.\n");
 
+             if (!op1_vectype)
+               op1_vectype = get_vectype_for_scalar_type (vinfo,
+                                                          TREE_TYPE (op1),
+                                                          slp_node);
+
               /* Unlike the other binary operators, shifts/rotates have
                  the rhs being int, instead of the same type as the lhs,
                  so make sure the scalar is the right type if we are
                 dealing with vectors of long long/long/short/char.  */
-              if (dt[1] == vect_constant_def)
-               {
-                 if (!slp_node)
-                   op1 = fold_convert (TREE_TYPE (vectype), op1);
-               }
-             else if (!useless_type_conversion_p (TREE_TYPE (vectype),
-                                                  TREE_TYPE (op1)))
-               {
-                 if (vec_stmt && !slp_node)
-                   {
-                     op1 = fold_convert (TREE_TYPE (vectype), op1);
-                     op1 = vect_init_vector (stmt_info, op1,
-                                             TREE_TYPE (vectype), NULL);
-                   }
-               }
+             incompatible_op1_vectype_p
+               = (!op1_vectype
+                  || !tree_nop_conversion_p (TREE_TYPE (vectype),
+                                             TREE_TYPE (op1)));
             }
         }
     }
@@ -5756,9 +5470,39 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   if (!vec_stmt) /* transformation not required.  */
     {
+      if (slp_node
+         && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
+             || (!scalar_shift_arg
+                 && (!incompatible_op1_vectype_p
+                     || dt[1] == vect_constant_def)
+                 && !vect_maybe_update_slp_op_vectype
+                       (slp_op1,
+                        incompatible_op1_vectype_p ? vectype : op1_vectype))))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "incompatible vector types for invariants\n");
+         return false;
+       }
+      /* Now adjust the constant shift amount in place.  */
+      if (slp_node
+         && incompatible_op1_vectype_p
+         && dt[1] == vect_constant_def)
+       {
+         for (unsigned i = 0;
+              i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
+           {
+             SLP_TREE_SCALAR_OPS (slp_op1)[i]
+               = fold_convert (TREE_TYPE (vectype),
+                               SLP_TREE_SCALAR_OPS (slp_op1)[i]);
+             gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
+                          == INTEGER_CST));
+           }
+       }
       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_shift");
-      vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+      vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
+                             scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
       return true;
     }
 
@@ -5768,115 +5512,88 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     dump_printf_loc (MSG_NOTE, vect_location,
                      "transform binary/unary operation.\n");
 
+  if (incompatible_op1_vectype_p && !slp_node)
+    {
+      op1 = fold_convert (TREE_TYPE (vectype), op1);
+      if (dt[1] != vect_constant_def)
+       op1 = vect_init_vector (vinfo, stmt_info, op1,
+                               TREE_TYPE (vectype), NULL);
+    }
+
   /* Handle def.  */
   vec_dest = vect_create_destination_var (scalar_dest, vectype);
 
-  prev_stmt_info = NULL;
-  for (j = 0; j < ncopies; j++)
+  if (scalar_shift_arg)
     {
-      /* Handle uses.  */
-      if (j == 0)
-        {
-          if (scalar_shift_arg)
-            {
-              /* Vector shl and shr insn patterns can be defined with scalar
-                 operand 2 (shift operand).  In this case, use constant or loop
-                 invariant op1 directly, without extending it to vector mode
-                 first.  */
-              optab_op2_mode = insn_data[icode].operand[2].mode;
-              if (!VECTOR_MODE_P (optab_op2_mode))
-                {
-                  if (dump_enabled_p ())
-                    dump_printf_loc (MSG_NOTE, vect_location,
-                                     "operand 1 using scalar mode.\n");
-                  vec_oprnd1 = op1;
-                  vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
-                  vec_oprnds1.quick_push (vec_oprnd1);
-                  if (slp_node)
-                    {
-                      /* Store vec_oprnd1 for every vector stmt to be created
-                         for SLP_NODE.  We check during the analysis that all
-                         the shift arguments are the same.
-                         TODO: Allow different constants for different vector
-                         stmts generated for an SLP instance.  */
-                      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
-                        vec_oprnds1.quick_push (vec_oprnd1);
-                    }
-                }
-            }
-         else if (slp_node
-                  && !useless_type_conversion_p (TREE_TYPE (vectype),
-                                                 TREE_TYPE (op1)))
-           {
-             if (was_scalar_shift_arg)
-               {
-                 /* If the argument was the same in all lanes create
-                    the correctly typed vector shift amount directly.  */
-                 op1 = fold_convert (TREE_TYPE (vectype), op1);
-                 op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
-                                         !loop_vinfo ? gsi : NULL);
-                 vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
-                                                !loop_vinfo ? gsi : NULL);
-                  vec_oprnds1.create (slp_node->vec_stmts_size);
-                 for (k = 0; k < slp_node->vec_stmts_size; k++)
-                   vec_oprnds1.quick_push (vec_oprnd1);
-               }
-             else if (dt[1] == vect_constant_def)
-               {
-                 /* Convert the scalar constant shift amounts in-place.  */
-                 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
-                 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
-                 for (unsigned i = 0;
-                      i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
-                   {
-                     SLP_TREE_SCALAR_OPS (shift)[i]
-                         = fold_convert (TREE_TYPE (vectype),
-                                         SLP_TREE_SCALAR_OPS (shift)[i]);
-                     gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
-                                  == INTEGER_CST));
-                   }
-               }
-             else
-               gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
-           }
-
-          /* vec_oprnd1 is available if operand 1 should be of a scalar-type
-             (a special case for certain kind of vector shifts); otherwise,
-             operand 1 should be of a vector type (the usual case).  */
-          if (vec_oprnd1)
-           vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
-                              slp_node);
-          else
-           vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
-                              slp_node);
-        }
+      /* Vector shl and shr insn patterns can be defined with scalar
+        operand 2 (shift operand).  In this case, use constant or loop
+        invariant op1 directly, without extending it to vector mode
+        first.  */
+      optab_op2_mode = insn_data[icode].operand[2].mode;
+      if (!VECTOR_MODE_P (optab_op2_mode))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "operand 1 using scalar mode.\n");
+         vec_oprnd1 = op1;
+         vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
+         vec_oprnds1.quick_push (vec_oprnd1);
+             /* Store vec_oprnd1 for every vector stmt to be created.
+                We check during the analysis that all the shift arguments
+                are the same.
+                TODO: Allow different constants for different vector
+                stmts generated for an SLP instance.  */
+         for (k = 0;
+              k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
+           vec_oprnds1.quick_push (vec_oprnd1);
+       }
+    }
+  else if (slp_node && incompatible_op1_vectype_p)
+    {
+      if (was_scalar_shift_arg)
+       {
+         /* If the argument was the same in all lanes create
+            the correctly typed vector shift amount directly.  */
+         op1 = fold_convert (TREE_TYPE (vectype), op1);
+         op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
+                                 !loop_vinfo ? gsi : NULL);
+         vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
+                                        !loop_vinfo ? gsi : NULL);
+         vec_oprnds1.create (slp_node->vec_stmts_size);
+         for (k = 0; k < slp_node->vec_stmts_size; k++)
+           vec_oprnds1.quick_push (vec_oprnd1);
+       }
+      else if (dt[1] == vect_constant_def)
+       /* The constant shift amount has been adjusted in place.  */
+       ;
       else
-       vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
+       gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
+    }
 
-      /* Arguments are ready.  Create the new vector stmt.  */
-      stmt_vec_info new_stmt_info = NULL;
-      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
-        {
-          vop1 = vec_oprnds1[i];
-         gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
-          new_temp = make_ssa_name (vec_dest, new_stmt);
-          gimple_assign_set_lhs (new_stmt, new_temp);
-         new_stmt_info
-           = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-          if (slp_node)
-           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-        }
+  /* vec_oprnd1 is available if operand 1 should be of a scalar-type
+     (a special case for certain kind of vector shifts); otherwise,
+     operand 1 should be of a vector type (the usual case).  */
+  vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+                    op0, &vec_oprnds0,
+                    vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
 
+  /* Arguments are ready.  Create the new vector stmt.  */
+  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+    {
+      vop1 = vec_oprnds1[i];
+      gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
+      new_temp = make_ssa_name (vec_dest, new_stmt);
+      gimple_assign_set_lhs (new_stmt, new_temp);
+      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
       if (slp_node)
-        continue;
-
-      if (j == 0)
-       STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+       SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
       else
-       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-      prev_stmt_info = new_stmt_info;
+       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
     }
 
+  if (!slp_node)
+    *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
   vec_oprnds0.release ();
   vec_oprnds1.release ();
 
@@ -5893,15 +5610,16 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                       stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_operation (vec_info *vinfo,
+                       stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                       gimple **vec_stmt, slp_tree slp_node,
                        stmt_vector_for_cost *cost_vec)
 {
   tree vec_dest;
   tree scalar_dest;
   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
   tree vectype;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   enum tree_code code, orig_code;
   machine_mode vec_mode;
   tree new_temp;
@@ -5911,18 +5629,16 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   enum vect_def_type dt[3]
     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
   int ndts = 3;
-  stmt_vec_info prev_stmt_info;
   poly_uint64 nunits_in;
   poly_uint64 nunits_out;
   tree vectype_out;
   int ncopies, vec_num;
-  int j, i;
+  int i;
   vec<tree> vec_oprnds0 = vNULL;
   vec<tree> vec_oprnds1 = vNULL;
   vec<tree> vec_oprnds2 = vNULL;
   tree vop0, vop1, vop2;
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  vec_info *vinfo = stmt_info->vinfo;
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -5936,11 +5652,27 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   if (!stmt)
     return false;
 
-  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
+  /* Loads and stores are handled in vectorizable_{load,store}.  */
+  if (STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
   orig_code = code = gimple_assign_rhs_code (stmt);
 
+  /* Shifts are handled in vectorizable_shift.  */
+  if (code == LSHIFT_EXPR
+      || code == RSHIFT_EXPR
+      || code == LROTATE_EXPR
+      || code == RROTATE_EXPR)
+   return false;
+
+  /* Comparisons are handled in vectorizable_comparison.  */
+  if (TREE_CODE_CLASS (code) == tcc_comparison)
+    return false;
+
+  /* Conditions are handled in vectorizable_condition.  */
+  if (code == COND_EXPR)
+    return false;
+
   /* For pointer addition and subtraction, we should use the normal
      plus and minus for the vector operation.  */
   if (code == POINTER_PLUS_EXPR)
@@ -5964,7 +5696,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   /* Most operations cannot handle bit-precision types without extra
      truncations.  */
-  if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
+  bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
+  if (!mask_op_p
       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
       /* Exception are bitwise binary operations.  */
       && code != BIT_IOR_EXPR
@@ -5977,16 +5710,17 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       return false;
     }
 
-  op0 = gimple_assign_rhs1 (stmt);
-  if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
+  slp_tree slp_op0;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+                          0, &op0, &slp_op0, &dt[0], &vectype))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                          "use not simple.\n");
       return false;
     }
-  /* If op0 is an external or constant def use a vector type with
-     the same size as the output vector type.  */
+  /* If op0 is an external or constant def, infer the vector type
+     from the scalar type.  */
   if (!vectype)
     {
       /* For boolean type we cannot determine vectype by
@@ -6006,7 +5740,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          vectype = vectype_out;
        }
       else
-       vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
+       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
+                                              slp_node);
     }
   if (vec_stmt)
     gcc_assert (vectype);
@@ -6025,10 +5760,12 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   if (maybe_ne (nunits_out, nunits_in))
     return false;
 
+  tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
+  slp_tree slp_op1 = NULL, slp_op2 = NULL;
   if (op_type == binary_op || op_type == ternary_op)
     {
-      op1 = gimple_assign_rhs2 (stmt);
-      if (!vect_is_simple_use (op1, vinfo, &dt[1]))
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+                              1, &op1, &slp_op1, &dt[1], &vectype2))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6038,8 +5775,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     }
   if (op_type == ternary_op)
     {
-      op2 = gimple_assign_rhs3 (stmt);
-      if (!vect_is_simple_use (op2, vinfo, &dt[2]))
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+                              2, &op2, &slp_op2, &dt[2], &vectype3))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6064,10 +5801,20 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   gcc_assert (ncopies >= 1);
 
-  /* Shifts are handled in vectorizable_shift ().  */
-  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
-      || code == RROTATE_EXPR)
-   return false;
+  /* Reject attempts to combine mask types with nonmask types, e.g. if
+     we have an AND between a (nonmask) boolean loaded from memory and
+     a (mask) boolean result of a comparison.
+
+     TODO: We could easily fix these cases up using pattern statements.  */
+  if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
+      || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
+      || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "mixed mask and nonmask vector types\n");
+      return false;
+    }
 
   /* Supportable by target?  */
 
@@ -6123,7 +5870,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
         should only change the active lanes of the reduction chain,
         keeping the inactive lanes as-is.  */
       if (loop_vinfo
-         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
          && reduc_idx >= 0)
        {
          if (cond_fn == IFN_LAST
@@ -6134,16 +5881,29 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "can't use a fully-masked loop because no"
                                 " conditional operation is available.\n");
-             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
          else
            vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
                                   vectype, NULL);
        }
 
+      /* Put types on constant and invariant SLP children.  */
+      if (slp_node
+         && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
+             || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
+             || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "incompatible vector types for invariants\n");
+         return false;
+       }
+
       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_operation");
-      vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+      vect_model_simple_cost (vinfo, stmt_info,
+                             ncopies, dt, ndts, slp_node, cost_vec);
       return true;
     }
 
@@ -6222,106 +5982,57 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
         VS2_3:  vz3 = vx3 + v1  -               -
         S2:     z = x + 1       -               VS2_0  */
 
-  prev_stmt_info = NULL;
-  for (j = 0; j < ncopies; j++)
-    {
-      /* Handle uses.  */
-      if (j == 0)
-       {
-         if (op_type == binary_op)
-           vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
-                              slp_node);
-         else if (op_type == ternary_op)
-           {
-             if (slp_node)
-               {
-                 auto_vec<vec<tree> > vec_defs(3);
-                 vect_get_slp_defs (slp_node, &vec_defs);
-                 vec_oprnds0 = vec_defs[0];
-                 vec_oprnds1 = vec_defs[1];
-                 vec_oprnds2 = vec_defs[2];
-               }
-             else
-               {
-                 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
-                                    &vec_oprnds1, NULL);
-                 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
-                                    NULL, NULL);
-               }
-           }
-         else
-           vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
-                              slp_node);
+  vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+                    op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
+  /* Arguments are ready.  Create the new vector stmt.  */
+  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+    {
+      gimple *new_stmt = NULL;
+      vop1 = ((op_type == binary_op || op_type == ternary_op)
+             ? vec_oprnds1[i] : NULL_TREE);
+      vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
+      if (masked_loop_p && reduc_idx >= 0)
+       {
+         /* Perform the operation on active elements only and take
+            inactive elements from the reduction chain input.  */
+         gcc_assert (!vop2);
+         vop2 = reduc_idx == 1 ? vop1 : vop0;
+         tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+                                         vectype, i);
+         gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
+                                                   vop0, vop1, vop2);
+         new_temp = make_ssa_name (vec_dest, call);
+         gimple_call_set_lhs (call, new_temp);
+         gimple_call_set_nothrow (call, true);
+         vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+         new_stmt = call;
        }
       else
        {
-         vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
-         if (op_type == ternary_op)
-           {
-             tree vec_oprnd = vec_oprnds2.pop ();
-             vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
-                                                                  vec_oprnd));
-           }
-       }
-
-      /* Arguments are ready.  Create the new vector stmt.  */
-      stmt_vec_info new_stmt_info = NULL;
-      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
-        {
-         vop1 = ((op_type == binary_op || op_type == ternary_op)
-                 ? vec_oprnds1[i] : NULL_TREE);
-         vop2 = ((op_type == ternary_op)
-                 ? vec_oprnds2[i] : NULL_TREE);
-         if (masked_loop_p && reduc_idx >= 0)
-           {
-             /* Perform the operation on active elements only and take
-                inactive elements from the reduction chain input.  */
-             gcc_assert (!vop2);
-             vop2 = reduc_idx == 1 ? vop1 : vop0;
-             tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
-                                             vectype, i * ncopies + j);
-             gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
-                                                       vop0, vop1, vop2);
-             new_temp = make_ssa_name (vec_dest, call);
-             gimple_call_set_lhs (call, new_temp);
-             gimple_call_set_nothrow (call, true);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, call, gsi);
-           }
-         else
+         new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
+         new_temp = make_ssa_name (vec_dest, new_stmt);
+         gimple_assign_set_lhs (new_stmt, new_temp);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+         if (vec_cvt_dest)
            {
-             gassign *new_stmt = gimple_build_assign (vec_dest, code,
-                                                      vop0, vop1, vop2);
-             new_temp = make_ssa_name (vec_dest, new_stmt);
+             new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
+             new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
+                                             new_temp);
+             new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
              gimple_assign_set_lhs (new_stmt, new_temp);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-             if (vec_cvt_dest)
-               {
-                 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
-                 gassign *new_stmt
-                   = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
-                                          new_temp);
-                 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
-                 gimple_assign_set_lhs (new_stmt, new_temp);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-               }
+             vect_finish_stmt_generation (vinfo, stmt_info,
+                                          new_stmt, gsi);
            }
-          if (slp_node)
-           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-        }
-
+       }
       if (slp_node)
-        continue;
-
-      if (j == 0)
-       STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+       SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
       else
-       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-      prev_stmt_info = new_stmt_info;
+       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
     }
 
+  if (!slp_node)
+    *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
   vec_oprnds0.release ();
   vec_oprnds1.release ();
   vec_oprnds2.release ();
@@ -6547,7 +6258,7 @@ scan_store_can_perm_p (tree vectype, tree init,
                   && TREE_CODE (init) != REAL_CST)
                  || !initializer_zerop (init))
                {
-                 tree masktype = build_same_sized_truth_vector_type (vectype);
+                 tree masktype = truth_type_for (vectype);
                  if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
                    return -1;
                  whole_vector_shift_kind = scan_store_kind_lshift_cond;
@@ -6573,11 +6284,11 @@ scan_store_can_perm_p (tree vectype, tree init,
    Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
 
 static bool
-check_scan_store (stmt_vec_info stmt_info, tree vectype,
+check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
                  enum vect_def_type rhs_dt, bool slp, tree mask,
                  vect_memory_access_type memory_access_type)
 {
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   tree ref_type;
 
@@ -6590,7 +6301,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
       || loop_vinfo == NULL
       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
-      || !integer_zerop (DR_OFFSET (dr_info->dr))
+      || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
       || !integer_zerop (DR_INIT (dr_info->dr))
       || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
       || !alias_sets_conflict_p (get_alias_set (vectype),
@@ -6977,13 +6688,13 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
    Handle only the transformation, checking is done in check_scan_store.  */
 
 static bool
-vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                        stmt_vec_info *vec_stmt, int ncopies)
+vectorizable_scan_store (vec_info *vinfo,
+                        stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                        gimple **vec_stmt, int ncopies)
 {
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
-  vec_info *vinfo = stmt_info->vinfo;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 
   if (dump_enabled_p ())
@@ -7072,44 +6783,41 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
            zero_vec = build_zero_cst (vectype);
          if (masktype == NULL_TREE
              && use_whole_vector[i] == scan_store_kind_lshift_cond)
-           masktype = build_same_sized_truth_vector_type (vectype);
+           masktype = truth_type_for (vectype);
          perms[i] = vect_gen_perm_mask_any (vectype, indices);
        }
       else
        perms[i] = vect_gen_perm_mask_checked (vectype, indices);
     }
 
-  stmt_vec_info prev_stmt_info = NULL;
   tree vec_oprnd1 = NULL_TREE;
   tree vec_oprnd2 = NULL_TREE;
   tree vec_oprnd3 = NULL_TREE;
   tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
   tree dataref_offset = build_int_cst (ref_type, 0);
-  tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
+  tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
+                                          vectype, VMAT_CONTIGUOUS);
   tree ldataref_ptr = NULL_TREE;
   tree orig = NULL_TREE;
   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
     ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
+  auto_vec<tree> vec_oprnds1;
+  auto_vec<tree> vec_oprnds2;
+  auto_vec<tree> vec_oprnds3;
+  vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
+                    *init, &vec_oprnds1,
+                    ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
+                    rhs2, &vec_oprnds3);
   for (int j = 0; j < ncopies; j++)
     {
-      stmt_vec_info new_stmt_info;
+      vec_oprnd1 = vec_oprnds1[j];
+      if (ldataref_ptr == NULL)
+       vec_oprnd2 = vec_oprnds2[j];
+      vec_oprnd3 = vec_oprnds3[j];
       if (j == 0)
-       {
-         vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
-         if (ldataref_ptr == NULL)
-           vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
-         vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
-         orig = vec_oprnd3;
-       }
-      else
-       {
-         vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
-         if (ldataref_ptr == NULL)
-           vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
-         vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
-         if (!inscan_var_store)
-           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
-       }
+       orig = vec_oprnd3;
+      else if (!inscan_var_store)
+       dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
 
       if (ldataref_ptr)
        {
@@ -7119,12 +6827,9 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                       dataref_offset);
          vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
          gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
-         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
-         if (prev_stmt_info == NULL)
-           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-         else
-           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-         prev_stmt_info = new_stmt_info;
+         vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
+         *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
        }
 
       tree v = vec_oprnd2;
@@ -7137,12 +6842,9 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                                != scan_store_kind_perm))
                                           ? zero_vec : vec_oprnd1, v,
                                           perms[i]);
-         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
-         if (prev_stmt_info == NULL)
-           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-         else
-           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-         prev_stmt_info = new_stmt_info;
+         vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
+         *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
 
          if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
            {
@@ -7157,9 +6859,9 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              tree new_temp2 = make_ssa_name (vectype);
              g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
                                       new_temp, vec_oprnd1);
-             new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
-             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-             prev_stmt_info = new_stmt_info;
+             vect_finish_stmt_generation (vinfo, stmt_info,
+                                                          g, gsi);
+             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
              new_temp = new_temp2;
            }
 
@@ -7176,18 +6878,16 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
          tree new_temp2 = make_ssa_name (vectype);
          g = gimple_build_assign (new_temp2, code, v, new_temp);
-         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
-         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-         prev_stmt_info = new_stmt_info;
+         vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
 
          v = new_temp2;
        }
 
       tree new_temp = make_ssa_name (vectype);
       gimple *g = gimple_build_assign (new_temp, code, orig, v);
-      new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
-      STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-      prev_stmt_info = new_stmt_info;
+      vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
 
       tree last_perm_arg = new_temp;
       /* For exclusive scan, new_temp computed above is the exclusive scan
@@ -7197,17 +6897,15 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        {
          last_perm_arg = make_ssa_name (vectype);
          g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
-         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
-         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-         prev_stmt_info = new_stmt_info;
+         vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
        }
 
       orig = make_ssa_name (vectype);
       g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
                               last_perm_arg, perms[units_log2]);
-      new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
-      STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-      prev_stmt_info = new_stmt_info;
+      vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
 
       if (!inscan_var_store)
        {
@@ -7216,9 +6914,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                       dataref_offset);
          vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
          g = gimple_build_assign (data_ref, new_temp);
-         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
-         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-         prev_stmt_info = new_stmt_info;
+         vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
        }
     }
 
@@ -7233,10 +6930,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                     dataref_offset);
        vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
        gimple *g = gimple_build_assign (data_ref, orig);
-       stmt_vec_info new_stmt_info
-         = vect_finish_stmt_generation (stmt_info, g, gsi);
-       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-       prev_stmt_info = new_stmt_info;
+       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
       }
   return true;
 }
@@ -7251,22 +6946,22 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                   stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_store (vec_info *vinfo,
+                   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                   gimple **vec_stmt, slp_tree slp_node,
                    stmt_vector_for_cost *cost_vec)
 {
   tree data_ref;
   tree op;
   tree vec_oprnd = NULL_TREE;
   tree elem_type;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   class loop *loop = NULL;
   machine_mode vec_mode;
   tree dummy;
   enum dr_alignment_support alignment_support_scheme;
   enum vect_def_type rhs_dt = vect_unknown_def_type;
   enum vect_def_type mask_dt = vect_unknown_def_type;
-  stmt_vec_info prev_stmt_info = NULL;
   tree dataref_ptr = NULL_TREE;
   tree dataref_offset = NULL_TREE;
   gimple *ptr_incr = NULL;
@@ -7281,8 +6976,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   vec<tree> vec_oprnds = vNULL;
   bool slp = (slp_node != NULL);
   unsigned int vec_num;
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  vec_info *vinfo = stmt_info->vinfo;
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   tree aggr_type;
   gather_scatter_info gs_info;
   poly_uint64 vf;
@@ -7336,8 +7030,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       if (mask_index >= 0)
        {
          mask = gimple_call_arg (call, mask_index);
-         if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
-                                          &mask_vectype))
+         if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
+                                      &mask_vectype))
            return false;
        }
     }
@@ -7378,7 +7072,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       return false;
     }
 
-  if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
+  if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
+                            op, &rhs_dt, &rhs_vectype, &vls_type))
     return false;
 
   elem_type = TREE_TYPE (vectype);
@@ -7388,8 +7083,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     return false;
 
   vect_memory_access_type memory_access_type;
-  if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
-                           &memory_access_type, &gs_info))
+  if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, vls_type,
+                           ncopies, &memory_access_type, &gs_info))
     return false;
 
   if (mask)
@@ -7438,7 +7133,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
     {
-      if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
+      if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
                             memory_access_type))
        return false;
     }
@@ -7448,13 +7143,24 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
 
       if (loop_vinfo
-         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
-       check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
-                                 memory_access_type, &gs_info, mask);
+         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+       check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
+                                             group_size, memory_access_type,
+                                             &gs_info, mask);
+
+      if (slp_node
+         && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
+                                               vectype))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "incompatible vector types for invariants\n");
+         return false;
+       }
 
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
-      vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
-                            vls_type, slp_node, cost_vec);
+      vect_model_store_cost (vinfo, stmt_info, ncopies,
+                            memory_access_type, vls_type, slp_node, cost_vec);
       return true;
     }
   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
@@ -7513,8 +7219,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          ncopies *= 2;
 
          if (mask)
-           mask_halfvectype
-             = build_same_sized_truth_vector_type (gs_info.offset_vectype);
+           mask_halfvectype = truth_type_for (gs_info.offset_vectype);
        }
       else
        gcc_unreachable ();
@@ -7540,56 +7245,61 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       if (mask == NULL_TREE)
        {
          mask_arg = build_int_cst (masktype, -1);
-         mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
+         mask_arg = vect_init_vector (vinfo, stmt_info,
+                                      mask_arg, masktype, NULL);
        }
 
       scale = build_int_cst (scaletype, gs_info.scale);
 
-      prev_stmt_info = NULL;
+      auto_vec<tree> vec_oprnds0;
+      auto_vec<tree> vec_oprnds1;
+      auto_vec<tree> vec_masks;
+      if (mask)
+       {
+         tree mask_vectype = truth_type_for (vectype);
+         vect_get_vec_defs_for_operand (vinfo, stmt_info,
+                                        modifier == NARROW
+                                        ? ncopies / 2 : ncopies,
+                                        mask, &vec_masks, mask_vectype);
+       }
+      vect_get_vec_defs_for_operand (vinfo, stmt_info,
+                                    modifier == WIDEN
+                                    ? ncopies / 2 : ncopies,
+                                    gs_info.offset, &vec_oprnds0);
+      vect_get_vec_defs_for_operand (vinfo, stmt_info,
+                                    modifier == NARROW
+                                    ? ncopies / 2 : ncopies,
+                                    op, &vec_oprnds1);
       for (j = 0; j < ncopies; ++j)
        {
-         if (j == 0)
+         if (modifier == WIDEN)
            {
-             src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
-             op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
-                                                             stmt_info);
+             if (j & 1)
+               op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
+                                          perm_mask, stmt_info, gsi);
+             else
+               op = vec_oprnd0 = vec_oprnds0[j / 2];
+             src = vec_oprnd1 = vec_oprnds1[j];
              if (mask)
-               mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
-                                                                  stmt_info);
+               mask_op = vec_mask = vec_masks[j];
            }
-         else if (modifier != NONE && (j & 1))
+         else if (modifier == NARROW)
            {
-             if (modifier == WIDEN)
-               {
-                 src
-                   = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                                  vec_oprnd1);
-                 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
-                                            stmt_info, gsi);
-                 if (mask)
-                   mask_op
-                     = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                                  vec_mask);
-               }
-             else if (modifier == NARROW)
-               {
-                 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
-                                             stmt_info, gsi);
-                 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                                   vec_oprnd0);
-               }
+             if (j & 1)
+               src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
+                                           perm_mask, stmt_info, gsi);
              else
-               gcc_unreachable ();
+               src = vec_oprnd1 = vec_oprnds1[j / 2];
+             op = vec_oprnd0 = vec_oprnds0[j];
+             if (mask)
+               mask_op = vec_mask = vec_masks[j / 2];
            }
          else
            {
-             src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                                vec_oprnd1);
-             op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                               vec_oprnd0);
+             op = vec_oprnd0 = vec_oprnds0[j];
+             src = vec_oprnd1 = vec_oprnds1[j];
              if (mask)
-               mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                                    vec_mask);
+               mask_op = vec_mask = vec_masks[j];
            }
 
          if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
@@ -7600,7 +7310,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              src = build1 (VIEW_CONVERT_EXPR, srctype, src);
              gassign *new_stmt
                = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
-             vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
              src = var;
            }
 
@@ -7612,7 +7322,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
              gassign *new_stmt
                = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
-             vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
              op = var;
            }
 
@@ -7628,7 +7338,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                    = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
                                                        : VEC_UNPACK_LO_EXPR,
                                           mask_op);
-                 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
                  mask_arg = var;
                }
              tree optype = TREE_TYPE (mask_arg);
@@ -7640,7 +7350,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
              gassign *new_stmt
                = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
-             vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
              mask_arg = var;
              if (!useless_type_conversion_p (masktype, utype))
                {
@@ -7648,26 +7358,22 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                              <= TYPE_PRECISION (masktype));
                  var = vect_get_new_ssa_name (masktype, vect_scalar_var);
                  new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
-                 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
                  mask_arg = var;
                }
            }
 
          gcall *new_stmt
            = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
-         stmt_vec_info new_stmt_info
-           = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+          vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
-         if (prev_stmt_info == NULL)
-           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-         else
-           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-         prev_stmt_info = new_stmt_info;
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
        }
+      *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
       return true;
     }
   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
-    return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
+    return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
 
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
@@ -7724,6 +7430,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       tree running_off;
       tree stride_base, stride_step, alias_off;
       tree vec_oprnd;
+      tree dr_offset;
       unsigned int g;
       /* Checked by get_load_store_type.  */
       unsigned int const_nunits = nunits.to_constant ();
@@ -7731,11 +7438,12 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
 
+      dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
       stride_base
        = fold_build_pointer_plus
            (DR_BASE_ADDRESS (first_dr_info->dr),
             size_binop (PLUS_EXPR,
-                        convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
+                        convert_to_ptrofftype (dr_offset),
                         convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
 
@@ -7775,9 +7483,9 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                 of vector elts directly.  */
              scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
              machine_mode vmode;
-             if (!mode_for_vector (elmode, group_size).exists (&vmode)
-                 || !VECTOR_MODE_P (vmode)
-                 || !targetm.vector_mode_supported_p (vmode)
+             if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+                 || !related_vector_mode (TYPE_MODE (vectype), elmode,
+                                          group_size).exists (&vmode)
                  || (convert_optab_handler (vec_extract_optab,
                                             TYPE_MODE (vectype), vmode)
                      == CODE_FOR_nothing))
@@ -7794,9 +7502,9 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                     element extracts from the original vector type and
                     element size stores.  */
                  if (int_mode_for_size (lsize, 0).exists (&elmode)
-                     && mode_for_vector (elmode, lnunits).exists (&vmode)
-                     && VECTOR_MODE_P (vmode)
-                     && targetm.vector_mode_supported_p (vmode)
+                     && VECTOR_MODE_P (TYPE_MODE (vectype))
+                     && related_vector_mode (TYPE_MODE (vectype), elmode,
+                                             lnunits).exists (&vmode)
                      && (convert_optab_handler (vec_extract_optab,
                                                 vmode, elmode)
                          != CODE_FOR_nothing))
@@ -7837,11 +7545,9 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                 loop, &incr_gsi, insert_after,
                 &offvar, NULL);
       incr = gsi_stmt (incr_gsi);
-      loop_vinfo->add_stmt (incr);
 
       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
 
-      prev_stmt_info = NULL;
       alias_off = build_int_cst (ref_type, 0);
       stmt_vec_info next_stmt_info = first_stmt_info;
       for (g = 0; g < group_size; g++)
@@ -7855,39 +7561,19 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              tree newoff = copy_ssa_name (running_off, NULL);
              incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
                                          running_off, pos);
-             vect_finish_stmt_generation (stmt_info, incr, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
              running_off = newoff;
            }
+         if (!slp)
+           op = vect_get_store_rhs (next_stmt_info);
+         vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
+                            op, &vec_oprnds);
          unsigned int group_el = 0;
          unsigned HOST_WIDE_INT
            elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
          for (j = 0; j < ncopies; j++)
            {
-             /* We've set op and dt above, from vect_get_store_rhs,
-                and first_stmt_info == stmt_info.  */
-             if (j == 0)
-               {
-                 if (slp)
-                   {
-                     vect_get_vec_defs (op, NULL_TREE, stmt_info,
-                                        &vec_oprnds, NULL, slp_node);
-                     vec_oprnd = vec_oprnds[0];
-                   }
-                 else
-                   {
-                     op = vect_get_store_rhs (next_stmt_info);
-                     vec_oprnd = vect_get_vec_def_for_operand
-                       (op, next_stmt_info);
-                   }
-               }
-             else
-               {
-                 if (slp)
-                   vec_oprnd = vec_oprnds[j];
-                 else
-                   vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                               vec_oprnd);
-               }
+             vec_oprnd = vec_oprnds[j];
              /* Pun the vector to extract from if necessary.  */
              if (lvectype != vectype)
                {
@@ -7895,7 +7581,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                  gimple *pun
                    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
                                                        lvectype, vec_oprnd));
-                 vect_finish_stmt_generation (stmt_info, pun, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
                  vec_oprnd = tem;
                }
              for (i = 0; i < nstores; i++)
@@ -7921,8 +7607,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
                  /* And store it to *running_off.  */
                  assign = gimple_build_assign (newref, elem);
-                 stmt_vec_info assign_info
-                   = vect_finish_stmt_generation (stmt_info, assign, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
 
                  group_el += lnel;
                  if (! slp
@@ -7931,7 +7616,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                      newoff = copy_ssa_name (running_off, NULL);
                      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
                                                  running_off, stride_step);
-                     vect_finish_stmt_generation (stmt_info, incr, gsi);
+                     vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
 
                      running_off = newoff;
                      group_el = 0;
@@ -7940,11 +7625,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                      && !slp)
                    {
                      if (j == 0 && i == 0)
-                       STMT_VINFO_VEC_STMT (stmt_info)
-                           = *vec_stmt = assign_info;
-                     else
-                       STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
-                     prev_stmt_info = assign_info;
+                       *vec_stmt = assign;
+                     STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
                    }
                }
            }
@@ -7960,8 +7642,14 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   auto_vec<tree> dr_chain (group_size);
   oprnds.create (group_size);
 
-  alignment_support_scheme
-    = vect_supportable_dr_alignment (first_dr_info, false);
+  /* Gather-scatter accesses perform only component accesses, alignment
+     is irrelevant for them.  */
+  if (memory_access_type == VMAT_GATHER_SCATTER)
+    alignment_support_scheme = dr_unaligned_supported;
+  else
+    alignment_support_scheme
+      = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
+
   gcc_assert (alignment_support_scheme);
   vec_loop_masks *loop_masks
     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
@@ -7999,7 +7687,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
       else
        aggr_type = vectype;
-      bump = vect_get_data_ptr_increment (dr_info, aggr_type,
+      bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
                                          memory_access_type);
     }
 
@@ -8009,8 +7697,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   /* In case the vectorization factor (VF) is bigger than the number
      of elements that we can fit in a vectype (nunits), we have to generate
      more than one vector stmt - i.e - we need to "unroll" the
-     vector stmt by a factor VF/nunits.  For more details see documentation in
-     vect_get_vec_def_for_copy_stmt.  */
+     vector stmt by a factor VF/nunits.  */
 
   /* In case of interleaving (non-unit grouped access):
 
@@ -8045,30 +7732,31 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
      STMT_VINFO_RELATED_STMT for the next copies.
   */
 
-  prev_stmt_info = NULL;
-  tree vec_mask = NULL_TREE;
+  auto_vec<tree> vec_masks;
+  tree vec_mask = NULL;
+  auto_vec<tree> vec_offsets;
+  auto_vec<vec<tree> > gvec_oprnds;
+  gvec_oprnds.safe_grow_cleared (group_size);
   for (j = 0; j < ncopies; j++)
     {
-      stmt_vec_info new_stmt_info;
+      gimple *new_stmt;
       if (j == 0)
        {
           if (slp)
             {
              /* Get vectorized arguments for SLP_NODE.  */
-             vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
-                                NULL, slp_node);
-
+             vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
+                                op, &vec_oprnds);
               vec_oprnd = vec_oprnds[0];
             }
           else
             {
              /* For interleaved stores we collect vectorized defs for all the
                 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
-                used as an input to vect_permute_store_chain(), and OPRNDS as
-                an input to vect_get_vec_def_for_stmt_copy() for the next copy.
+                used as an input to vect_permute_store_chain().
 
-                If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
-                OPRNDS are of size 1.  */
+                If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
+                and OPRNDS are of size 1.  */
              stmt_vec_info next_stmt_info = first_stmt_info;
              for (i = 0; i < group_size; i++)
                {
@@ -8078,15 +7766,19 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                     that there is no interleaving, DR_GROUP_SIZE is 1,
                     and only one iteration of the loop will be executed.  */
                  op = vect_get_store_rhs (next_stmt_info);
-                 vec_oprnd = vect_get_vec_def_for_operand
-                   (op, next_stmt_info);
-                 dr_chain.quick_push (vec_oprnd);
-                 oprnds.quick_push (vec_oprnd);
+                 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
+                                                ncopies, op, &gvec_oprnds[i]);
+                 vec_oprnd = gvec_oprnds[i][0];
+                 dr_chain.quick_push (gvec_oprnds[i][0]);
+                 oprnds.quick_push (gvec_oprnds[i][0]);
                  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
                }
              if (mask)
-               vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
-                                                        mask_vectype);
+               {
+                 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+                                                mask, &vec_masks, mask_vectype);
+                 vec_mask = vec_masks[0];
+               }
            }
 
          /* We should have catched mismatched types earlier.  */
@@ -8098,7 +7790,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              && !loop_masks
              && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
              && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
-             && integer_zerop (DR_OFFSET (first_dr_info->dr))
+             && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
              && integer_zerop (DR_INIT (first_dr_info->dr))
              && alias_sets_conflict_p (get_alias_set (aggr_type),
                                        get_alias_set (TREE_TYPE (ref_type))))
@@ -8107,11 +7799,14 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              dataref_offset = build_int_cst (ref_type, 0);
            }
          else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
-                                        &dataref_ptr, &vec_offset);
+           {
+             vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
+                                          &dataref_ptr, &vec_offsets, ncopies);
+             vec_offset = vec_offsets[0];
+           }
          else
            dataref_ptr
-             = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
+             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
                                          simd_lane_access_p ? loop : NULL,
                                          offset, &dummy, gsi, &ptr_incr,
                                          simd_lane_access_p, NULL_TREE, bump);
@@ -8120,27 +7815,24 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        {
          /* For interleaved stores we created vectorized defs for all the
             defs stored in OPRNDS in the previous iteration (previous copy).
-            DR_CHAIN is then used as an input to vect_permute_store_chain(),
-            and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
-            next copy.
+            DR_CHAIN is then used as an input to vect_permute_store_chain().
             If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
             OPRNDS are of size 1.  */
          for (i = 0; i < group_size; i++)
            {
-             op = oprnds[i];
-             vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
-             dr_chain[i] = vec_oprnd;
-             oprnds[i] = vec_oprnd;
+             vec_oprnd = gvec_oprnds[i][j];
+             dr_chain[i] = gvec_oprnds[i][j];
+             oprnds[i] = gvec_oprnds[i][j];
            }
          if (mask)
-           vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
+           vec_mask = vec_masks[j];
          if (dataref_offset)
            dataref_offset
              = int_const_binop (PLUS_EXPR, dataref_offset, bump);
          else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
+           vec_offset = vec_offsets[j];
          else
-           dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
                                           stmt_info, bump);
        }
 
@@ -8154,13 +7846,14 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          /* Invalidate the current contents of VEC_ARRAY.  This should
             become an RTL clobber too, which prevents the vector registers
             from being upward-exposed.  */
-         vect_clobber_variable (stmt_info, gsi, vec_array);
+         vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
 
          /* Store the individual vectors into the array.  */
          for (i = 0; i < vec_num; i++)
            {
              vec_oprnd = dr_chain[i];
-             write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
+             write_vector_array (vinfo, stmt_info,
+                                 gsi, vec_oprnd, vec_array, i);
            }
 
          tree final_mask = NULL;
@@ -8193,21 +7886,22 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              gimple_call_set_lhs (call, data_ref);
            }
          gimple_call_set_nothrow (call, true);
-         new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
+         vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+         new_stmt = call;
 
          /* Record that VEC_ARRAY is now dead.  */
-         vect_clobber_variable (stmt_info, gsi, vec_array);
+         vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
        }
       else
        {
-         new_stmt_info = NULL;
+         new_stmt = NULL;
          if (grouped_store)
            {
              if (j == 0)
                result_chain.create (group_size);
              /* Permute.  */
-             vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
-                                       &result_chain);
+             vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
+                                       gsi, &result_chain);
            }
 
          stmt_vec_info next_stmt_info = first_stmt_info;
@@ -8238,15 +7932,15 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
                       scale, vec_oprnd);
                  gimple_call_set_nothrow (call, true);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, call, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+                 new_stmt = call;
                  break;
                }
 
              if (i > 0)
                /* Bump the vector pointer.  */
-               dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
-                                              stmt_info, bump);
+               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+                                              gsi, stmt_info, bump);
 
              if (slp)
                vec_oprnd = vec_oprnds[i];
@@ -8260,7 +7954,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                misalign = 0;
              else if (DR_MISALIGNMENT (first_dr_info) == -1)
                {
-                 align = dr_alignment (vect_dr_behavior (first_dr_info));
+                 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
                  misalign = 0;
                }
              else
@@ -8281,7 +7975,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                  gimple *perm_stmt 
                    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
                                           vec_oprnd, perm_mask);
-                 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
 
                  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
                  vec_oprnd = new_temp;
@@ -8297,8 +7991,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                                  dataref_ptr, ptr,
                                                  final_mask, vec_oprnd);
                  gimple_call_set_nothrow (call, true);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, call, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+                 new_stmt = call;
                }
              else
                {
@@ -8318,10 +8012,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                      = build_aligned_type (TREE_TYPE (data_ref),
                                            TYPE_ALIGN (elem_type));
                  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
-                 gassign *new_stmt
-                   = gimple_build_assign (data_ref, vec_oprnd);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
                }
 
              if (slp)
@@ -8335,13 +8027,16 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       if (!slp)
        {
          if (j == 0)
-           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-         else
-           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-         prev_stmt_info = new_stmt_info;
+           *vec_stmt = new_stmt;
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
        }
     }
 
+  for (i = 0; i < group_size; ++i)
+    {
+      vec<tree> oprndsi = gvec_oprnds[i];
+      oprndsi.release ();
+    }
   oprnds.release ();
   result_chain.release ();
   vec_oprnds.release ();
@@ -8382,7 +8077,8 @@ vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
    permuted vector variable.  */
 
 static tree
-permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
+permute_vec_elements (vec_info *vinfo,
+                     tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
                      gimple_stmt_iterator *gsi)
 {
   tree vectype = TREE_TYPE (x);
@@ -8398,7 +8094,7 @@ permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
 
   /* Generate the permute statement.  */
   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
-  vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
+  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
 
   return data_ref;
 }
@@ -8467,16 +8163,15 @@ hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                  stmt_vec_info *vec_stmt, slp_tree slp_node,
-                  slp_instance slp_node_instance,
+vectorizable_load (vec_info *vinfo,
+                  stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                  gimple **vec_stmt, slp_tree slp_node,
                   stmt_vector_for_cost *cost_vec)
 {
   tree scalar_dest;
   tree vec_dest = NULL;
   tree data_ref = NULL;
-  stmt_vec_info prev_stmt_info;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   class loop *loop = NULL;
   class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
   bool nested_in_vect_loop = false;
@@ -8506,11 +8201,10 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   int vec_num;
   bool slp = (slp_node != NULL);
   bool slp_perm = false;
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   poly_uint64 vf;
   tree aggr_type;
   gather_scatter_info gs_info;
-  vec_info *vinfo = stmt_info->vinfo;
   tree ref_type;
   enum vect_def_type mask_dt = vect_unknown_def_type;
 
@@ -8521,6 +8215,20 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       && ! vec_stmt)
     return false;
 
+  if (!STMT_VINFO_DATA_REF (stmt_info))
+    return false;
+
+  /* ???  Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
+     for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
+     which can be different when reduction chains were re-ordered.
+     Now that we figured we're a dataref reset stmt_info back to
+     SLP_TREE_SCALAR_STMTS[0].  When we're SLP only things should be
+     refactored in a way to maintain the dr_vec_info pointer for the
+     relevant access explicitely.  */
+  stmt_vec_info orig_stmt_info = stmt_info;
+  if (slp_node)
+    stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+
   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
     {
@@ -8557,15 +8265,12 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       if (mask_index >= 0)
        {
          mask = gimple_call_arg (call, mask_index);
-         if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
-                                          &mask_vectype))
+         if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
+                                      &mask_vectype))
            return false;
        }
     }
 
-  if (!STMT_VINFO_DATA_REF (stmt_info))
-    return false;
-
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
@@ -8645,7 +8350,44 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        }
 
       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
-       slp_perm = true;
+       {
+         slp_perm = true;
+
+         if (!loop_vinfo)
+           {
+             /* In BB vectorization we may not actually use a loaded vector
+                accessing elements in excess of DR_GROUP_SIZE.  */
+             stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+             group_info = DR_GROUP_FIRST_ELEMENT (group_info);
+             unsigned HOST_WIDE_INT nunits;
+             unsigned j, k, maxk = 0;
+             FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
+               if (k > maxk)
+                 maxk = k;
+             tree vectype = STMT_VINFO_VECTYPE (group_info);
+             if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
+                 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
+               {
+                 if (dump_enabled_p ())
+                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                    "BB vectorization with gaps at the end of "
+                                    "a load is not supported\n");
+                 return false;
+               }
+           }
+
+         auto_vec<tree> tem;
+         unsigned n_perms;
+         if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
+                                            true, &n_perms))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+                                vect_location,
+                                "unsupported load permutation\n");
+             return false;
+           }
+       }
 
       /* Invalidate assumptions made by dependence analysis when vectorization
         on the unrolled body effectively re-orders stmts.  */
@@ -8665,8 +8407,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     group_size = 1;
 
   vect_memory_access_type memory_access_type;
-  if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
-                           &memory_access_type, &gs_info))
+  if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, VLS_LOAD,
+                           ncopies, &memory_access_type, &gs_info))
     return false;
 
   if (mask)
@@ -8695,13 +8437,14 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
 
       if (loop_vinfo
-         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
-       check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
-                                 memory_access_type, &gs_info, mask);
-
-      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
-      vect_model_load_cost (stmt_info, ncopies, memory_access_type,
-                           slp_node_instance, slp_node, cost_vec);
+         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+       check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
+                                             group_size, memory_access_type,
+                                             &gs_info, mask);
+
+      STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
+      vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
+                           slp_node, cost_vec);
       return true;
     }
 
@@ -8720,7 +8463,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
     {
-      vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
+      vect_build_gather_load_calls (vinfo,
+                                   stmt_info, gsi, vec_stmt, &gs_info, mask);
       return true;
     }
 
@@ -8748,32 +8492,25 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        }
       /* These copies are all equivalent, but currently the representation
         requires a separate STMT_VINFO_VEC_STMT for each one.  */
-      prev_stmt_info = NULL;
       gimple_stmt_iterator gsi2 = *gsi;
       gsi_next (&gsi2);
       for (j = 0; j < ncopies; j++)
        {
-         stmt_vec_info new_stmt_info;
          if (hoist_p)
-           {
-             new_temp = vect_init_vector (stmt_info, scalar_dest,
-                                          vectype, NULL);
-             gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
-             new_stmt_info = vinfo->add_stmt (new_stmt);
-           }
+           new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
+                                        vectype, NULL);
          else
-           {
-             new_temp = vect_init_vector (stmt_info, scalar_dest,
-                                          vectype, &gsi2);
-             new_stmt_info = vinfo->lookup_def (new_temp);
-           }
+           new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
+                                        vectype, &gsi2);
+         gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
          if (slp)
-           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-         else if (j == 0)
-           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
          else
-           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-         prev_stmt_info = new_stmt_info;
+           {
+             if (j == 0)
+               *vec_stmt = new_stmt;
+             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+           }
        }
       return true;
     }
@@ -8783,7 +8520,6 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     {
       gimple_stmt_iterator incr_gsi;
       bool insert_after;
-      gimple *incr;
       tree offvar;
       tree ivstep;
       tree running_off;
@@ -8792,6 +8528,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       /* Checked by get_load_store_type.  */
       unsigned int const_nunits = nunits.to_constant ();
       unsigned HOST_WIDE_INT cst_offset = 0;
+      tree dr_offset;
 
       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
       gcc_assert (!nested_in_vect_loop);
@@ -8822,11 +8559,12 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
        }
 
+      dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
       stride_base
        = fold_build_pointer_plus
            (DR_BASE_ADDRESS (first_dr_info->dr),
             size_binop (PLUS_EXPR,
-                        convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
+                        convert_to_ptrofftype (dr_offset),
                         convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
 
@@ -8856,12 +8594,9 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       create_iv (stride_base, ivstep, NULL,
                 loop, &incr_gsi, insert_after,
                 &offvar, NULL);
-      incr = gsi_stmt (incr_gsi);
-      loop_vinfo->add_stmt (incr);
 
       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
 
-      prev_stmt_info = NULL;
       running_off = offvar;
       alias_off = build_int_cst (ref_type, 0);
       int nloads = const_nunits;
@@ -8873,47 +8608,24 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        {
          if (group_size < const_nunits)
            {
-             /* First check if vec_init optab supports construction from
-                vector elts directly.  */
-             scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
-             machine_mode vmode;
-             if (mode_for_vector (elmode, group_size).exists (&vmode)
-                 && VECTOR_MODE_P (vmode)
-                 && targetm.vector_mode_supported_p (vmode)
-                 && (convert_optab_handler (vec_init_optab,
-                                            TYPE_MODE (vectype), vmode)
-                     != CODE_FOR_nothing))
+             /* First check if vec_init optab supports construction from vector
+                elts directly.  Otherwise avoid emitting a constructor of
+                vector elements by performing the loads using an integer type
+                of the same size, constructing a vector of those and then
+                re-interpreting it as the original vector type.  This avoids a
+                huge runtime penalty due to the general inability to perform
+                store forwarding from smaller stores to a larger load.  */
+             tree ptype;
+             tree vtype
+               = vector_vector_composition_type (vectype,
+                                                 const_nunits / group_size,
+                                                 &ptype);
+             if (vtype != NULL_TREE)
                {
                  nloads = const_nunits / group_size;
                  lnel = group_size;
-                 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
-               }
-             else
-               {
-                 /* Otherwise avoid emitting a constructor of vector elements
-                    by performing the loads using an integer type of the same
-                    size, constructing a vector of those and then
-                    re-interpreting it as the original vector type.
-                    This avoids a huge runtime penalty due to the general
-                    inability to perform store forwarding from smaller stores
-                    to a larger load.  */
-                 unsigned lsize
-                   = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
-                 unsigned int lnunits = const_nunits / group_size;
-                 /* If we can't construct such a vector fall back to
-                    element loads of the original vector type.  */
-                 if (int_mode_for_size (lsize, 0).exists (&elmode)
-                     && mode_for_vector (elmode, lnunits).exists (&vmode)
-                     && VECTOR_MODE_P (vmode)
-                     && targetm.vector_mode_supported_p (vmode)
-                     && (convert_optab_handler (vec_init_optab, vmode, elmode)
-                         != CODE_FOR_nothing))
-                   {
-                     nloads = lnunits;
-                     lnel = group_size;
-                     ltype = build_nonstandard_integer_type (lsize, 1);
-                     lvectype = build_vector_type (ltype, nloads);
-                   }
+                 lvectype = vtype;
+                 ltype = ptype;
                }
            }
          else
@@ -8951,17 +8663,15 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        {
          if (nloads > 1)
            vec_alloc (v, nloads);
-         stmt_vec_info new_stmt_info = NULL;
+         gimple *new_stmt = NULL;
          for (i = 0; i < nloads; i++)
            {
              tree this_off = build_int_cst (TREE_TYPE (alias_off),
                                             group_el * elsz + cst_offset);
              tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
              vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
-             gassign *new_stmt
-               = gimple_build_assign (make_ssa_name (ltype), data_ref);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
              if (nloads > 1)
                CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
                                        gimple_assign_lhs (new_stmt));
@@ -8973,7 +8683,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                  tree newoff = copy_ssa_name (running_off);
                  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
                                                      running_off, stride_step);
-                 vect_finish_stmt_generation (stmt_info, incr, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
 
                  running_off = newoff;
                  group_el = 0;
@@ -8982,41 +8692,38 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          if (nloads > 1)
            {
              tree vec_inv = build_constructor (lvectype, v);
-             new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
-             new_stmt_info = vinfo->lookup_def (new_temp);
+             new_temp = vect_init_vector (vinfo, stmt_info,
+                                          vec_inv, lvectype, gsi);
+             new_stmt = SSA_NAME_DEF_STMT (new_temp);
              if (lvectype != vectype)
                {
-                 gassign *new_stmt
-                   = gimple_build_assign (make_ssa_name (vectype),
-                                          VIEW_CONVERT_EXPR,
-                                          build1 (VIEW_CONVERT_EXPR,
-                                                  vectype, new_temp));
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                 new_stmt = gimple_build_assign (make_ssa_name (vectype),
+                                                 VIEW_CONVERT_EXPR,
+                                                 build1 (VIEW_CONVERT_EXPR,
+                                                         vectype, new_temp));
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
                }
            }
 
          if (slp)
            {
              if (slp_perm)
-               dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
+               dr_chain.quick_push (gimple_assign_lhs (new_stmt));
              else
-               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
            }
          else
            {
              if (j == 0)
-               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-             else
-               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-             prev_stmt_info = new_stmt_info;
+               *vec_stmt = new_stmt;
+             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
            }
        }
       if (slp_perm)
        {
          unsigned n_perms;
-         vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
-                                       slp_node_instance, false, &n_perms);
+         vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
+                                       false, &n_perms);
        }
       return true;
     }
@@ -9039,7 +8746,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
 
       /* Check if the chain of loads is already vectorized.  */
-      if (STMT_VINFO_VEC_STMT (first_stmt_info)
+      if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
          /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
             ???  But we can only do so if there is exactly one
             as we have no way to get at the rest.  Leave the CSE
@@ -9050,7 +8757,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
             is even wrong code.  See PR56270.  */
          && !slp)
        {
-         *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+         *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
          return true;
        }
       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
@@ -9066,8 +8773,9 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
             unpermuted sequence.  In other cases we need to load the
             whole group, not only the number of vector stmts the
             permutation result fits in.  */
+         unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
          if (slp_perm
-             && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
+             && (group_size != scalar_lanes 
                  || !multiple_p (nunits, group_size)))
            {
              /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
@@ -9081,7 +8789,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
            {
              vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
              group_gap_adj
-               = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
+               = group_size - scalar_lanes;
            }
        }
       else
@@ -9098,8 +8806,14 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
     }
 
-  alignment_support_scheme
-    = vect_supportable_dr_alignment (first_dr_info, false);
+  /* Gather-scatter accesses perform only component accesses, alignment
+     is irrelevant for them.  */
+  if (memory_access_type == VMAT_GATHER_SCATTER)
+    alignment_support_scheme = dr_unaligned_supported;
+  else
+    alignment_support_scheme
+      = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
+
   gcc_assert (alignment_support_scheme);
   vec_loop_masks *loop_masks
     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
@@ -9144,10 +8858,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
         VS1_3:  vx3 = memref3   -               -
         S1:     x = load        -               VS1_0
         S2:     z = x + 1       -               -
-
-     See in documentation in vect_get_vec_def_for_stmt_copy for how the
-     information we recorded in RELATED_STMT field is used to vectorize
-     stmt S2.  */
+  */
 
   /* In case of interleaving (non-unit grouped access):
 
@@ -9225,20 +8936,28 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       compute_in_loop = true;
     }
 
+  bool diff_first_stmt_info
+    = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
+
   if ((alignment_support_scheme == dr_explicit_realign_optimized
        || alignment_support_scheme == dr_explicit_realign)
       && !compute_in_loop)
     {
-      msq = vect_setup_realignment (first_stmt_info_for_drptr
-                                   ? first_stmt_info_for_drptr
-                                   : first_stmt_info, gsi, &realignment_token,
-                                   alignment_support_scheme, NULL_TREE,
-                                   &at_loop);
+      /* If we have different first_stmt_info, we can't set up realignment
+        here, since we can't guarantee first_stmt_info DR has been
+        initialized yet, use first_stmt_info_for_drptr DR by bumping the
+        distance from first_stmt_info DR instead as below.  */
+      if (!diff_first_stmt_info)
+       msq = vect_setup_realignment (vinfo,
+                                     first_stmt_info, gsi, &realignment_token,
+                                     alignment_support_scheme, NULL_TREE,
+                                     &at_loop);
       if (alignment_support_scheme == dr_explicit_realign_optimized)
        {
          phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
          byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
                                    size_one_node);
+         gcc_assert (!first_stmt_info_for_drptr);
        }
     }
   else
@@ -9266,16 +8985,19 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
       else
        aggr_type = vectype;
-      bump = vect_get_data_ptr_increment (dr_info, aggr_type,
+      bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
                                          memory_access_type);
     }
 
+  vec<tree> vec_offsets = vNULL;
+  auto_vec<tree> vec_masks;
+  if (mask)
+    vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+                      mask, &vec_masks, mask_vectype, NULL_TREE);
   tree vec_mask = NULL_TREE;
-  prev_stmt_info = NULL;
   poly_uint64 group_elt = 0;
   for (j = 0; j < ncopies; j++)
     {
-      stmt_vec_info new_stmt_info = NULL;
       /* 1. Create the vector or array pointer update chain.  */
       if (j == 0)
        {
@@ -9284,7 +9006,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          if (simd_lane_access_p
              && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
              && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
-             && integer_zerop (DR_OFFSET (first_dr_info->dr))
+             && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
              && integer_zerop (DR_INIT (first_dr_info->dr))
              && alias_sets_conflict_p (get_alias_set (aggr_type),
                                        get_alias_set (TREE_TYPE (ref_type)))
@@ -9294,11 +9016,10 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
              dataref_offset = build_int_cst (ref_type, 0);
            }
-         else if (first_stmt_info_for_drptr
-                  && first_stmt_info != first_stmt_info_for_drptr)
+         else if (diff_first_stmt_info)
            {
              dataref_ptr
-               = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
+               = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
                                            aggr_type, at_loop, offset, &dummy,
                                            gsi, &ptr_incr, simd_lane_access_p,
                                            byte_offset, bump);
@@ -9310,30 +9031,33 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                size_binop (MINUS_EXPR,
                                            DR_INIT (first_dr_info->dr),
                                            DR_INIT (ptrdr)));
-             dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+             dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
                                             stmt_info, diff);
+             if (alignment_support_scheme == dr_explicit_realign)
+               {
+                 msq = vect_setup_realignment (vinfo,
+                                               first_stmt_info_for_drptr, gsi,
+                                               &realignment_token,
+                                               alignment_support_scheme,
+                                               dataref_ptr, &at_loop);
+                 gcc_assert (!compute_in_loop);
+               }
            }
          else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
-                                        &dataref_ptr, &vec_offset);
+           {
+             vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
+                                          &dataref_ptr, &vec_offsets, ncopies);
+             vec_offset = vec_offsets[0];
+           }
          else
            dataref_ptr
-             = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
+             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                         at_loop,
                                          offset, &dummy, gsi, &ptr_incr,
                                          simd_lane_access_p,
                                          byte_offset, bump);
          if (mask)
-           {
-             if (slp_node)
-               {
-                 auto_vec<vec<tree> > vec_defs (1);
-                 vect_get_slp_defs (slp_node, &vec_defs);
-                 vec_mask = vec_defs[0][0];
-               }
-             else
-               vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
-                                                        mask_vectype);
-           }
+           vec_mask = vec_masks[0];
        }
       else
        {
@@ -9341,17 +9065,18 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
            dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
                                              bump);
          else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
+           vec_offset = vec_offsets[j];
          else
-           dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
                                           stmt_info, bump);
          if (mask)
-           vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
+           vec_mask = vec_masks[j];
        }
 
       if (grouped_load || slp_perm)
        dr_chain.create (vec_num);
 
+      gimple *new_stmt = NULL;
       if (memory_access_type == VMAT_LOAD_STORE_LANES)
        {
          tree vec_array;
@@ -9387,21 +9112,22 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
            }
          gimple_call_set_lhs (call, vec_array);
          gimple_call_set_nothrow (call, true);
-         new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
+         vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+         new_stmt = call;
 
          /* Extract each vector into an SSA_NAME.  */
          for (i = 0; i < vec_num; i++)
            {
-             new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
+             new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
                                            vec_array, i);
              dr_chain.quick_push (new_temp);
            }
 
          /* Record the mapping between SSA_NAMEs and statements.  */
-         vect_record_grouped_load_vectors (stmt_info, dr_chain);
+         vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
 
          /* Record that VEC_ARRAY is now dead.  */
-         vect_clobber_variable (stmt_info, gsi, vec_array);
+         vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
        }
       else
        {
@@ -9418,11 +9144,10 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                                      vec_mask, gsi);
 
              if (i > 0)
-               dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
-                                              stmt_info, bump);
+               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+                                              gsi, stmt_info, bump);
 
              /* 2. Create the vector-load in the loop.  */
-             gimple *new_stmt = NULL;
              switch (alignment_support_scheme)
                {
                case dr_aligned:
@@ -9460,7 +9185,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                    else if (DR_MISALIGNMENT (first_dr_info) == -1)
                      {
                        align = dr_alignment
-                         (vect_dr_behavior (first_dr_info));
+                         (vect_dr_behavior (vinfo, first_dr_info));
                        misalign = 0;
                      }
                    else
@@ -9485,6 +9210,13 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                    else
                      {
                        tree ltype = vectype;
+                       tree new_vtype = NULL_TREE;
+                       unsigned HOST_WIDE_INT gap
+                         = DR_GROUP_GAP (first_stmt_info);
+                       unsigned int vect_align
+                         = vect_known_alignment_in_bytes (first_dr_info);
+                       unsigned int scalar_dr_size
+                         = vect_get_scalar_dr_size (first_dr_info);
                        /* If there's no peeling for gaps but we have a gap
                           with slp loads then load the lower half of the
                           vector only.  See get_group_load_store_type for
@@ -9492,20 +9224,31 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                        if (slp
                            && loop_vinfo
                            && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
-                           && DR_GROUP_GAP (first_stmt_info) != 0
-                           && known_eq (nunits,
-                                        (group_size
-                                         - DR_GROUP_GAP (first_stmt_info)) * 2)
-                           && known_eq (nunits, group_size))
-                         ltype = build_vector_type (TREE_TYPE (vectype),
-                                                    (group_size
-                                                     - DR_GROUP_GAP
-                                                         (first_stmt_info)));
+                           && gap != 0
+                           && known_eq (nunits, (group_size - gap) * 2)
+                           && known_eq (nunits, group_size)
+                           && gap >= (vect_align / scalar_dr_size))
+                         {
+                           tree half_vtype;
+                           new_vtype
+                             = vector_vector_composition_type (vectype, 2,
+                                                               &half_vtype);
+                           if (new_vtype != NULL_TREE)
+                             ltype = half_vtype;
+                         }
+                       tree offset
+                         = (dataref_offset ? dataref_offset
+                                           : build_int_cst (ref_type, 0));
+                       if (ltype != vectype
+                           && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                         {
+                           unsigned HOST_WIDE_INT gap_offset
+                             = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
+                           tree gapcst = build_int_cst (ref_type, gap_offset);
+                           offset = size_binop (PLUS_EXPR, offset, gapcst);
+                         }
                        data_ref
-                         = fold_build2 (MEM_REF, ltype, dataref_ptr,
-                                        dataref_offset
-                                        ? dataref_offset
-                                        : build_int_cst (ref_type, 0));
+                         = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
                        if (alignment_support_scheme == dr_aligned)
                          ;
                        else if (DR_MISALIGNMENT (first_dr_info) == -1)
@@ -9518,20 +9261,42 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                                  TYPE_ALIGN (elem_type));
                        if (ltype != vectype)
                          {
-                           vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+                           vect_copy_ref_info (data_ref,
+                                               DR_REF (first_dr_info->dr));
                            tree tem = make_ssa_name (ltype);
                            new_stmt = gimple_build_assign (tem, data_ref);
-                           vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                           vect_finish_stmt_generation (vinfo, stmt_info,
+                                                        new_stmt, gsi);
                            data_ref = NULL;
                            vec<constructor_elt, va_gc> *v;
                            vec_alloc (v, 2);
-                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
-                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
-                                                   build_zero_cst (ltype));
-                           new_stmt
-                             = gimple_build_assign (vec_dest,
-                                                    build_constructor
-                                                      (vectype, v));
+                           if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                             {
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                                       build_zero_cst (ltype));
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+                             }
+                           else
+                             {
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                                       build_zero_cst (ltype));
+                             }
+                           gcc_assert (new_vtype != NULL_TREE);
+                           if (new_vtype == vectype)
+                             new_stmt = gimple_build_assign (
+                               vec_dest, build_constructor (vectype, v));
+                           else
+                             {
+                               tree new_vname = make_ssa_name (new_vtype);
+                               new_stmt = gimple_build_assign (
+                                 new_vname, build_constructor (new_vtype, v));
+                               vect_finish_stmt_generation (vinfo, stmt_info,
+                                                            new_stmt, gsi);
+                               new_stmt = gimple_build_assign (
+                                 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
+                                                   new_vname));
+                             }
                          }
                      }
                    break;
@@ -9543,7 +9308,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
 
                    if (compute_in_loop)
-                     msq = vect_setup_realignment (first_stmt_info, gsi,
+                     msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
                                                    &realignment_token,
                                                    dr_explicit_realign,
                                                    dataref_ptr, NULL);
@@ -9561,7 +9326,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                  build_int_cst
                                  (TREE_TYPE (dataref_ptr),
                                   -(HOST_WIDE_INT) align));
-                   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                   vect_finish_stmt_generation (vinfo, stmt_info,
+                                                new_stmt, gsi);
                    data_ref
                      = build2 (MEM_REF, vectype, ptr,
                                build_int_cst (ref_type, 0));
@@ -9572,13 +9338,14 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                    new_temp = make_ssa_name (vec_dest, new_stmt);
                    gimple_assign_set_lhs (new_stmt, new_temp);
                    gimple_move_vops (new_stmt, stmt_info->stmt);
-                   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                   vect_finish_stmt_generation (vinfo, stmt_info,
+                                                new_stmt, gsi);
                    msq = new_temp;
 
                    bump = size_binop (MULT_EXPR, vs,
                                       TYPE_SIZE_UNIT (elem_type));
                    bump = size_binop (MINUS_EXPR, bump, size_one_node);
-                   ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
+                   ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
                                           stmt_info, bump);
                    new_stmt = gimple_build_assign
                                 (NULL_TREE, BIT_AND_EXPR, ptr,
@@ -9586,7 +9353,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
                    ptr = copy_ssa_name (ptr, new_stmt);
                    gimple_assign_set_lhs (new_stmt, ptr);
-                   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                   vect_finish_stmt_generation (vinfo, stmt_info,
+                                                new_stmt, gsi);
                    data_ref
                      = build2 (MEM_REF, vectype, ptr,
                                build_int_cst (ref_type, 0));
@@ -9606,7 +9374,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                      (new_temp, BIT_AND_EXPR, dataref_ptr,
                       build_int_cst (TREE_TYPE (dataref_ptr),
                                     -(HOST_WIDE_INT) align));
-                   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                   vect_finish_stmt_generation (vinfo, stmt_info,
+                                                new_stmt, gsi);
                    data_ref
                      = build2 (MEM_REF, vectype, new_temp,
                                build_int_cst (ref_type, 0));
@@ -9624,8 +9393,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                }
              new_temp = make_ssa_name (vec_dest, new_stmt);
              gimple_set_lhs (new_stmt, new_temp);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
              /* 3. Handle explicit realignment if necessary/supported.
                 Create in loop:
@@ -9641,8 +9409,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                                  msq, lsq, realignment_token);
                  new_temp = make_ssa_name (vec_dest, new_stmt);
                  gimple_assign_set_lhs (new_stmt, new_temp);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
                  if (alignment_support_scheme == dr_explicit_realign_optimized)
                    {
@@ -9658,9 +9425,9 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
              if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
                {
                  tree perm_mask = perm_mask_for_reverse (vectype);
-                 new_temp = permute_vec_elements (new_temp, new_temp,
+                 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
                                                   perm_mask, stmt_info, gsi);
-                 new_stmt_info = vinfo->lookup_def (new_temp);
+                 new_stmt = SSA_NAME_DEF_STMT (new_temp);
                }
 
              /* Collect vector loads and later create their permutation in
@@ -9670,7 +9437,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
              /* Store vector loads in the corresponding SLP_NODE.  */
              if (slp && !slp_perm)
-               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
 
              /* With SLP permutation we load the gaps as well, without
                 we need to skip the gaps after we manage to fully load
@@ -9684,8 +9451,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
                       * group_gap_adj);
                  tree bump = wide_int_to_tree (sizetype, bump_val);
-                 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
-                                                stmt_info, bump);
+                 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+                                                gsi, stmt_info, bump);
                  group_elt = 0;
                }
            }
@@ -9697,7 +9464,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
                   * group_gap_adj);
              tree bump = wide_int_to_tree (sizetype, bump_val);
-             dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+             dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
                                             stmt_info, bump);
            }
        }
@@ -9708,34 +9475,28 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       if (slp_perm)
         {
          unsigned n_perms;
-          if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
-                                             slp_node_instance, false,
-                                            &n_perms))
-            {
-              dr_chain.release ();
-              return false;
-            }
+         bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
+                                                 gsi, vf, false, &n_perms);
+         gcc_assert (ok);
         }
       else
         {
           if (grouped_load)
            {
              if (memory_access_type != VMAT_LOAD_STORE_LANES)
-               vect_transform_grouped_load (stmt_info, dr_chain,
+               vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
                                             group_size, gsi);
-             *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+             *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
            }
           else
            {
-             if (j == 0)
-               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-             else
-               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-             prev_stmt_info = new_stmt_info;
+             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
            }
         }
       dr_chain.release ();
     }
+  if (!slp)
+    *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
 
   return true;
 }
@@ -9754,18 +9515,20 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    condition operands are supportable using vec_is_simple_use.  */
 
 static bool
-vect_is_simple_cond (tree cond, vec_info *vinfo,
-                    tree *comp_vectype, enum vect_def_type *dts,
-                    tree vectype)
+vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
+                    slp_tree slp_node, tree *comp_vectype,
+                    enum vect_def_type *dts, tree vectype)
 {
   tree lhs, rhs;
   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
+  slp_tree slp_op;
 
   /* Mask case.  */
   if (TREE_CODE (cond) == SSA_NAME
       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
     {
-      if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
+                              &slp_op, &dts[0], comp_vectype)
          || !*comp_vectype
          || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
        return false;
@@ -9780,7 +9543,8 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
 
   if (TREE_CODE (lhs) == SSA_NAME)
     {
-      if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
+                              &lhs, &slp_op, &dts[0], &vectype1))
        return false;
     }
   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
@@ -9791,7 +9555,8 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
 
   if (TREE_CODE (rhs) == SSA_NAME)
     {
-      if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
+                              &rhs, &slp_op, &dts[1], &vectype2))
        return false;
     }
   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
@@ -9810,15 +9575,20 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
   if (! *comp_vectype)
     {
       tree scalar_type = TREE_TYPE (lhs);
-      /* If we can widen the comparison to match vectype do so.  */
-      if (INTEGRAL_TYPE_P (scalar_type)
-         && vectype
-         && tree_int_cst_lt (TYPE_SIZE (scalar_type),
-                             TYPE_SIZE (TREE_TYPE (vectype))))
-       scalar_type = build_nonstandard_integer_type
-         (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
-          TYPE_UNSIGNED (scalar_type));
-      *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+      if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
+       *comp_vectype = truth_type_for (vectype);
+      else
+       {
+         /* If we can widen the comparison to match vectype do so.  */
+         if (INTEGRAL_TYPE_P (scalar_type)
+             && !slp_node
+             && tree_int_cst_lt (TYPE_SIZE (scalar_type),
+                                 TYPE_SIZE (TREE_TYPE (vectype))))
+           scalar_type = build_nonstandard_integer_type
+             (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
+         *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
+                                                      slp_node);
+       }
     }
 
   return true;
@@ -9836,11 +9606,11 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                       stmt_vec_info *vec_stmt,
+vectorizable_condition (vec_info *vinfo,
+                       stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                       gimple **vec_stmt,
                        slp_tree slp_node, stmt_vector_for_cost *cost_vec)
 {
-  vec_info *vinfo = stmt_info->vinfo;
   tree scalar_dest = NULL_TREE;
   tree vec_dest = NULL_TREE;
   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
@@ -9850,16 +9620,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
   tree vec_compare;
   tree new_temp;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   enum vect_def_type dts[4]
     = {vect_unknown_def_type, vect_unknown_def_type,
        vect_unknown_def_type, vect_unknown_def_type};
   int ndts = 4;
   int ncopies;
+  int vec_num;
   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
-  stmt_vec_info prev_stmt_info = NULL;
-  int i, j;
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  int i;
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   vec<tree> vec_oprnds0 = vNULL;
   vec<tree> vec_oprnds1 = vNULL;
   vec<tree> vec_oprnds2 = vNULL;
@@ -9888,7 +9658,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     {
       if (STMT_SLP_TYPE (stmt_info))
        return false;
-      reduc_info = info_for_reduction (stmt_info);
+      reduc_info = info_for_reduction (vinfo, stmt_info);
       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
       reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
@@ -9913,26 +9683,34 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
 
   if (slp_node)
-    ncopies = 1;
+    {
+      ncopies = 1;
+      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+    }
   else
-    ncopies = vect_get_num_copies (loop_vinfo, vectype);
+    {
+      ncopies = vect_get_num_copies (loop_vinfo, vectype);
+      vec_num = 1;
+    }
 
   gcc_assert (ncopies >= 1);
   if (for_reduction && ncopies > 1)
     return false; /* FORNOW */
 
   cond_expr = gimple_assign_rhs1 (stmt);
-  then_clause = gimple_assign_rhs2 (stmt);
-  else_clause = gimple_assign_rhs3 (stmt);
 
-  if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
-                           &comp_vectype, &dts[0], slp_node ? NULL : vectype)
+  if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
+                           &comp_vectype, &dts[0], vectype)
       || !comp_vectype)
     return false;
 
-  if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
+  unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
+  slp_tree then_slp_node, else_slp_node;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
+                          &then_clause, &then_slp_node, &dts[2], &vectype1))
     return false;
-  if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
+                          &else_clause, &else_slp_node, &dts[3], &vectype2))
     return false;
 
   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
@@ -9942,7 +9720,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     return false;
 
   masked = !COMPARISON_CLASS_P (cond_expr);
-  vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
+  vec_cmp_type = truth_type_for (comp_vectype);
 
   if (vec_cmp_type == NULL_TREE)
     return false;
@@ -9970,10 +9748,12 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          if (new_code == ERROR_MARK)
            must_invert_cmp_result = true;
          else
-           cond_code = new_code;
+           {
+             cond_code = new_code;
+             /* Make sure we don't accidentally use the old condition.  */
+             cond_expr = NULL_TREE;
+           }
        }
-      /* Make sure we don't accidentally use the old condition.  */
-      cond_expr = NULL_TREE;
       std::swap (then_clause, else_clause);
     }
 
@@ -10018,6 +9798,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       cond_code = SSA_NAME;
     }
 
+  if (TREE_CODE_CLASS (cond_code) == tcc_comparison
+      && reduction_type == EXTRACT_LAST_REDUCTION
+      && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "reduction comparison operation not supported.\n");
+      return false;
+    }
+
   if (!vec_stmt)
     {
       if (bitop1 != NOP_EXPR)
@@ -10037,15 +9827,39 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                return false;
            }
        }
-      if (expand_vec_cond_expr_p (vectype, comp_vectype,
-                                    cond_code))
+
+      vect_cost_for_stmt kind = vector_stmt;
+      if (reduction_type == EXTRACT_LAST_REDUCTION)
+       /* Count one reduction-like operation per vector.  */
+       kind = vec_to_scalar;
+      else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
+       return false;
+
+      if (slp_node
+         && (!vect_maybe_update_slp_op_vectype
+                (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
+             || (op_adjust == 1
+                 && !vect_maybe_update_slp_op_vectype
+                       (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
+             || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
+             || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
        {
-         STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
-         vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
-                                 cost_vec);
-         return true;
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "incompatible vector types for invariants\n");
+         return false;
        }
-      return false;
+
+      if (loop_vinfo
+         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+         && reduction_type == EXTRACT_LAST_REDUCTION)
+       vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
+                              ncopies * vec_num, vectype, NULL);
+
+      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
+      vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
+                             cost_vec, kind);
+      return true;
     }
 
   /* Transform.  */
@@ -10063,252 +9877,199 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   if (reduction_type != EXTRACT_LAST_REDUCTION)
     vec_dest = vect_create_destination_var (scalar_dest, vectype);
 
-  /* Handle cond expr.  */
-  for (j = 0; j < ncopies; j++)
-    {
-      bool swap_cond_operands = false;
+  bool swap_cond_operands = false;
 
-      /* See whether another part of the vectorized code applies a loop
-        mask to the condition, or to its inverse.  */
+  /* See whether another part of the vectorized code applies a loop
+     mask to the condition, or to its inverse.  */
 
-      vec_loop_masks *masks = NULL;
-      if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+  vec_loop_masks *masks = NULL;
+  if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+    {
+      if (reduction_type == EXTRACT_LAST_REDUCTION)
+       masks = &LOOP_VINFO_MASKS (loop_vinfo);
+      else
        {
-         if (reduction_type == EXTRACT_LAST_REDUCTION)
+         scalar_cond_masked_key cond (cond_expr, ncopies);
+         if (loop_vinfo->scalar_cond_masked_set.contains (cond))
            masks = &LOOP_VINFO_MASKS (loop_vinfo);
          else
            {
-             scalar_cond_masked_key cond (cond_expr, ncopies);
+             bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
+             cond.code = invert_tree_comparison (cond.code, honor_nans);
              if (loop_vinfo->scalar_cond_masked_set.contains (cond))
-               masks = &LOOP_VINFO_MASKS (loop_vinfo);
-             else
                {
-                 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
-                 cond.code = invert_tree_comparison (cond.code, honor_nans);
-                 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
-                   {
-                     masks = &LOOP_VINFO_MASKS (loop_vinfo);
-                     cond_code = cond.code;
-                     swap_cond_operands = true;
-                   }
+                 masks = &LOOP_VINFO_MASKS (loop_vinfo);
+                 cond_code = cond.code;
+                 swap_cond_operands = true;
                }
            }
        }
+    }
 
-      stmt_vec_info new_stmt_info = NULL;
-      if (j == 0)
-       {
-          if (slp_node)
-            {
-             auto_vec<vec<tree>, 4> vec_defs;
-              vect_get_slp_defs (slp_node, &vec_defs);
-             vec_oprnds3 = vec_defs.pop ();
-             vec_oprnds2 = vec_defs.pop ();
-             if (!masked)
-               vec_oprnds1 = vec_defs.pop ();
-             vec_oprnds0 = vec_defs.pop ();
-            }
-          else
-            {
-             if (masked)
-               {
-                 vec_cond_lhs
-                   = vect_get_vec_def_for_operand (cond_expr, stmt_info,
-                                                   comp_vectype);
-               }
-             else
-               {
-                 vec_cond_lhs
-                   = vect_get_vec_def_for_operand (cond_expr0,
-                                                   stmt_info, comp_vectype);
-                 vec_cond_rhs
-                   = vect_get_vec_def_for_operand (cond_expr1,
-                                                   stmt_info, comp_vectype);
-               }
-             vec_then_clause = vect_get_vec_def_for_operand (then_clause,
-                                                             stmt_info);
-             if (reduction_type != EXTRACT_LAST_REDUCTION)
-               vec_else_clause = vect_get_vec_def_for_operand (else_clause,
-                                                               stmt_info);
-           }
-       }
-      else
-       {
-         vec_cond_lhs
-           = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
-         if (!masked)
-           vec_cond_rhs
-             = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
-
-         vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                           vec_oprnds2.pop ());
-         vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                           vec_oprnds3.pop ());
-       }
-
-      if (!slp_node)
-        {
-         vec_oprnds0.quick_push (vec_cond_lhs);
-         if (!masked)
-           vec_oprnds1.quick_push (vec_cond_rhs);
-         vec_oprnds2.quick_push (vec_then_clause);
-         vec_oprnds3.quick_push (vec_else_clause);
-       }
+  /* Handle cond expr.  */
+  if (masked)
+    vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+                      cond_expr, &vec_oprnds0, comp_vectype,
+                      then_clause, &vec_oprnds2, vectype,
+                      reduction_type != EXTRACT_LAST_REDUCTION
+                      ? else_clause : NULL, &vec_oprnds3, vectype);
+  else
+    vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+                      cond_expr0, &vec_oprnds0, comp_vectype,
+                      cond_expr1, &vec_oprnds1, comp_vectype,
+                      then_clause, &vec_oprnds2, vectype,
+                      reduction_type != EXTRACT_LAST_REDUCTION
+                      ? else_clause : NULL, &vec_oprnds3, vectype);
 
-      /* Arguments are ready.  Create the new vector stmt.  */
-      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
-        {
-          vec_then_clause = vec_oprnds2[i];
-          vec_else_clause = vec_oprnds3[i];
+  /* Arguments are ready.  Create the new vector stmt.  */
+  FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
+    {
+      vec_then_clause = vec_oprnds2[i];
+      if (reduction_type != EXTRACT_LAST_REDUCTION)
+       vec_else_clause = vec_oprnds3[i];
 
-         if (swap_cond_operands)
-           std::swap (vec_then_clause, vec_else_clause);
+      if (swap_cond_operands)
+       std::swap (vec_then_clause, vec_else_clause);
 
-         if (masked)
-           vec_compare = vec_cond_lhs;
+      if (masked)
+       vec_compare = vec_cond_lhs;
+      else
+       {
+         vec_cond_rhs = vec_oprnds1[i];
+         if (bitop1 == NOP_EXPR)
+           vec_compare = build2 (cond_code, vec_cmp_type,
+                                 vec_cond_lhs, vec_cond_rhs);
          else
            {
-             vec_cond_rhs = vec_oprnds1[i];
-             if (bitop1 == NOP_EXPR)
-               vec_compare = build2 (cond_code, vec_cmp_type,
-                                     vec_cond_lhs, vec_cond_rhs);
+             new_temp = make_ssa_name (vec_cmp_type);
+             gassign *new_stmt;
+             if (bitop1 == BIT_NOT_EXPR)
+               new_stmt = gimple_build_assign (new_temp, bitop1,
+                                               vec_cond_rhs);
              else
+               new_stmt
+                 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
+                                        vec_cond_rhs);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             if (bitop2 == NOP_EXPR)
+               vec_compare = new_temp;
+             else if (bitop2 == BIT_NOT_EXPR)
                {
-                 new_temp = make_ssa_name (vec_cmp_type);
-                 gassign *new_stmt;
-                 if (bitop1 == BIT_NOT_EXPR)
-                   new_stmt = gimple_build_assign (new_temp, bitop1,
-                                                   vec_cond_rhs);
-                 else
-                   new_stmt
-                     = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
-                                            vec_cond_rhs);
-                 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-                 if (bitop2 == NOP_EXPR)
-                   vec_compare = new_temp;
-                 else if (bitop2 == BIT_NOT_EXPR)
-                   {
-                     /* Instead of doing ~x ? y : z do x ? z : y.  */
-                     vec_compare = new_temp;
-                     std::swap (vec_then_clause, vec_else_clause);
-                   }
-                 else
-                   {
-                     vec_compare = make_ssa_name (vec_cmp_type);
-                     new_stmt
-                       = gimple_build_assign (vec_compare, bitop2,
-                                              vec_cond_lhs, new_temp);
-                     vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-                   }
+                 /* Instead of doing ~x ? y : z do x ? z : y.  */
+                 vec_compare = new_temp;
+                 std::swap (vec_then_clause, vec_else_clause);
+               }
+             else
+               {
+                 vec_compare = make_ssa_name (vec_cmp_type);
+                 new_stmt
+                   = gimple_build_assign (vec_compare, bitop2,
+                                          vec_cond_lhs, new_temp);
+                 vect_finish_stmt_generation (vinfo, stmt_info,
+                                              new_stmt, gsi);
                }
            }
+       }
 
-         /* If we decided to apply a loop mask to the result of the vector
-             comparison, AND the comparison with the mask now.  Later passes
-             should then be able to reuse the AND results between mulitple
-             vector statements.
+      /* If we decided to apply a loop mask to the result of the vector
+        comparison, AND the comparison with the mask now.  Later passes
+        should then be able to reuse the AND results between mulitple
+        vector statements.
 
-            For example:
-            for (int i = 0; i < 100; ++i)
-              x[i] = y[i] ? z[i] : 10;
+        For example:
+        for (int i = 0; i < 100; ++i)
+        x[i] = y[i] ? z[i] : 10;
 
-            results in following optimized GIMPLE:
+        results in following optimized GIMPLE:
 
-            mask__35.8_43 = vect__4.7_41 != { 0, ... };
-            vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
-            _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
-            vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
-            vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
-                                              vect_iftmp.11_47, { 10, ... }>;
+        mask__35.8_43 = vect__4.7_41 != { 0, ... };
+        vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
+        _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
+        vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
+        vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
+        vect_iftmp.11_47, { 10, ... }>;
 
-            instead of using a masked and unmasked forms of
-            vec != { 0, ... } (masked in the MASK_LOAD,
-            unmasked in the VEC_COND_EXPR).  */
+        instead of using a masked and unmasked forms of
+        vec != { 0, ... } (masked in the MASK_LOAD,
+        unmasked in the VEC_COND_EXPR).  */
 
-         /* Force vec_compare to be an SSA_NAME rather than a comparison,
-            in cases where that's necessary.  */
+      /* Force vec_compare to be an SSA_NAME rather than a comparison,
+        in cases where that's necessary.  */
 
-         if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
+      if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
+       {
+         if (!is_gimple_val (vec_compare))
            {
-             if (!is_gimple_val (vec_compare))
-               {
-                 tree vec_compare_name = make_ssa_name (vec_cmp_type);
-                 gassign *new_stmt = gimple_build_assign (vec_compare_name,
-                                                          vec_compare);
-                 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-                 vec_compare = vec_compare_name;
-               }
-
-             if (must_invert_cmp_result)
-               {
-                 tree vec_compare_name = make_ssa_name (vec_cmp_type);
-                 gassign *new_stmt = gimple_build_assign (vec_compare_name,
-                                                          BIT_NOT_EXPR,
-                                                          vec_compare);
-                 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-                 vec_compare = vec_compare_name;
-               }
+             tree vec_compare_name = make_ssa_name (vec_cmp_type);
+             gassign *new_stmt = gimple_build_assign (vec_compare_name,
+                                                      vec_compare);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             vec_compare = vec_compare_name;
+           }
 
-             if (masks)
-               {
-                 unsigned vec_num = vec_oprnds0.length ();
-                 tree loop_mask
-                   = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
-                                         vectype, vec_num * j + i);
-                 tree tmp2 = make_ssa_name (vec_cmp_type);
-                 gassign *g
-                   = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
-                                          loop_mask);
-                 vect_finish_stmt_generation (stmt_info, g, gsi);
-                 vec_compare = tmp2;
-               }
+         if (must_invert_cmp_result)
+           {
+             tree vec_compare_name = make_ssa_name (vec_cmp_type);
+             gassign *new_stmt = gimple_build_assign (vec_compare_name,
+                                                      BIT_NOT_EXPR,
+                                                      vec_compare);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             vec_compare = vec_compare_name;
            }
 
-         if (reduction_type == EXTRACT_LAST_REDUCTION)
+         if (masks)
            {
-             gcall *new_stmt = gimple_build_call_internal
-               (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
-                vec_then_clause);
-             gimple_call_set_lhs (new_stmt, scalar_dest);
-             SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
-             if (stmt_info->stmt == gsi_stmt (*gsi))
-               new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
-             else
-               {
-                 /* In this case we're moving the definition to later in the
-                    block.  That doesn't matter because the only uses of the
-                    lhs are in phi statements.  */
-                 gimple_stmt_iterator old_gsi
-                   = gsi_for_stmt (stmt_info->stmt);
-                 gsi_remove (&old_gsi, true);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-               }
+             unsigned vec_num = vec_oprnds0.length ();
+             tree loop_mask
+               = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+                                     vectype, i);
+             tree tmp2 = make_ssa_name (vec_cmp_type);
+             gassign *g
+               = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
+                                      loop_mask);
+             vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+             vec_compare = tmp2;
            }
+       }
+
+      gimple *new_stmt;
+      if (reduction_type == EXTRACT_LAST_REDUCTION)
+       {
+         gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
+         tree lhs = gimple_get_lhs (old_stmt);
+         new_stmt = gimple_build_call_internal
+             (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
+              vec_then_clause);
+         gimple_call_set_lhs (new_stmt, lhs);
+         SSA_NAME_DEF_STMT (lhs) = new_stmt;
+         if (old_stmt == gsi_stmt (*gsi))
+           vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
          else
            {
-             new_temp = make_ssa_name (vec_dest);
-             gassign *new_stmt
-               = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
-                                      vec_then_clause, vec_else_clause);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             /* In this case we're moving the definition to later in the
+                block.  That doesn't matter because the only uses of the
+                lhs are in phi statements.  */
+             gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
+             gsi_remove (&old_gsi, true);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
            }
-          if (slp_node)
-           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-        }
-
-        if (slp_node)
-          continue;
-
-       if (j == 0)
-         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-       else
-         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
-       prev_stmt_info = new_stmt_info;
+       }
+      else
+       {
+         new_temp = make_ssa_name (vec_dest);
+         new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
+                                         vec_then_clause, vec_else_clause);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+       }
+      if (slp_node)
+       SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+      else
+       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
     }
 
+  if (!slp_node)
+    *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
   vec_oprnds0.release ();
   vec_oprnds1.release ();
   vec_oprnds2.release ();
@@ -10326,25 +10087,24 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    Return true if STMT_INFO is vectorizable in this way.  */
 
 static bool
-vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                        stmt_vec_info *vec_stmt,
+vectorizable_comparison (vec_info *vinfo,
+                        stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                        gimple **vec_stmt,
                         slp_tree slp_node, stmt_vector_for_cost *cost_vec)
 {
-  vec_info *vinfo = stmt_info->vinfo;
   tree lhs, rhs1, rhs2;
   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
   tree new_temp;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
   int ndts = 2;
   poly_uint64 nunits;
   int ncopies;
   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
-  stmt_vec_info prev_stmt_info = NULL;
-  int i, j;
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  int i;
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   vec<tree> vec_oprnds0 = vNULL;
   vec<tree> vec_oprnds1 = vNULL;
   tree mask_type;
@@ -10385,13 +10145,13 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
     return false;
 
-  rhs1 = gimple_assign_rhs1 (stmt);
-  rhs2 = gimple_assign_rhs2 (stmt);
-
-  if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
+  slp_tree slp_rhs1, slp_rhs2;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+                          0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
     return false;
 
-  if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+                          1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
     return false;
 
   if (vectype1 && vectype2
@@ -10404,8 +10164,12 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   /* Invariant comparison.  */
   if (!vectype)
     {
-      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
-      if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
+      if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
+       vectype = mask_type;
+      else
+       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
+                                              slp_node);
+      if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
        return false;
     }
   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
@@ -10480,8 +10244,20 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
            }
        }
 
+      /* Put types on constant and invariant SLP children.  */
+      if (slp_node
+         && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
+             || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "incompatible vector types for invariants\n");
+         return false;
+       }
+
       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
-      vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
+      vect_model_simple_cost (vinfo, stmt_info,
+                             ncopies * (1 + (bitop2 != NOP_EXPR)),
                              dts, ndts, slp_node, cost_vec);
       return true;
     }
@@ -10497,95 +10273,53 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   lhs = gimple_assign_lhs (stmt);
   mask = vect_create_destination_var (lhs, mask_type);
 
-  /* Handle cmp expr.  */
-  for (j = 0; j < ncopies; j++)
+  vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+                    rhs1, &vec_oprnds0, vectype,
+                    rhs2, &vec_oprnds1, vectype);
+  if (swap_p)
+    std::swap (vec_oprnds0, vec_oprnds1);
+
+  /* Arguments are ready.  Create the new vector stmt.  */
+  FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
     {
-      stmt_vec_info new_stmt_info = NULL;
-      if (j == 0)
-       {
-         if (slp_node)
-           {
-             auto_vec<vec<tree>, 2> vec_defs;
-             vect_get_slp_defs (slp_node, &vec_defs);
-             vec_oprnds1 = vec_defs.pop ();
-             vec_oprnds0 = vec_defs.pop ();
-             if (swap_p)
-               std::swap (vec_oprnds0, vec_oprnds1);
-           }
-         else
-           {
-             vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
-                                                      vectype);
-             vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
-                                                      vectype);
-           }
-       }
-      else
-       {
-         vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                    vec_oprnds0.pop ());
-         vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
-                                                    vec_oprnds1.pop ());
-       }
+      gimple *new_stmt;
+      vec_rhs2 = vec_oprnds1[i];
 
-      if (!slp_node)
+      new_temp = make_ssa_name (mask);
+      if (bitop1 == NOP_EXPR)
        {
-         if (swap_p && j == 0)
-           std::swap (vec_rhs1, vec_rhs2);
-         vec_oprnds0.quick_push (vec_rhs1);
-         vec_oprnds1.quick_push (vec_rhs2);
+         new_stmt = gimple_build_assign (new_temp, code,
+                                         vec_rhs1, vec_rhs2);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
        }
-
-      /* Arguments are ready.  Create the new vector stmt.  */
-      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
+      else
        {
-         vec_rhs2 = vec_oprnds1[i];
-
-         new_temp = make_ssa_name (mask);
-         if (bitop1 == NOP_EXPR)
-           {
-             gassign *new_stmt = gimple_build_assign (new_temp, code,
-                                                      vec_rhs1, vec_rhs2);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-           }
+         if (bitop1 == BIT_NOT_EXPR)
+           new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
          else
+           new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
+                                           vec_rhs2);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+         if (bitop2 != NOP_EXPR)
            {
-             gassign *new_stmt;
-             if (bitop1 == BIT_NOT_EXPR)
-               new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
+             tree res = make_ssa_name (mask);
+             if (bitop2 == BIT_NOT_EXPR)
+               new_stmt = gimple_build_assign (res, bitop2, new_temp);
              else
-               new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
-                                               vec_rhs2);
-             new_stmt_info
-               = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-             if (bitop2 != NOP_EXPR)
-               {
-                 tree res = make_ssa_name (mask);
-                 if (bitop2 == BIT_NOT_EXPR)
-                   new_stmt = gimple_build_assign (res, bitop2, new_temp);
-                 else
-                   new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
-                                                   new_temp);
-                 new_stmt_info
-                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-               }
+               new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
+                                               new_temp);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
            }
-         if (slp_node)
-           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
        }
-
       if (slp_node)
-       continue;
-
-      if (j == 0)
-       STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+       SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
       else
-       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
-      prev_stmt_info = new_stmt_info;
+       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
     }
 
+  if (!slp_node)
+    *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
   vec_oprnds0.release ();
   vec_oprnds1.release ();
 
@@ -10598,7 +10332,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    GSI and VEC_STMT_P are as for vectorizable_live_operation.  */
 
 static bool
-can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+can_vectorize_live_stmts (loop_vec_info loop_vinfo,
+                         stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                          slp_tree slp_node, slp_instance slp_node_instance,
                          bool vec_stmt_p,
                          stmt_vector_for_cost *cost_vec)
@@ -10610,15 +10345,16 @@ can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
        {
          if (STMT_VINFO_LIVE_P (slp_stmt_info)
-             && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
+             && !vectorizable_live_operation (loop_vinfo,
+                                              slp_stmt_info, gsi, slp_node,
                                               slp_node_instance, i,
                                               vec_stmt_p, cost_vec))
            return false;
        }
     }
   else if (STMT_VINFO_LIVE_P (stmt_info)
-          && !vectorizable_live_operation (stmt_info, gsi, slp_node,
-                                           slp_node_instance, -1,
+          && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
+                                           slp_node, slp_node_instance, -1,
                                            vec_stmt_p, cost_vec))
     return false;
 
@@ -10628,12 +10364,12 @@ can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 /* Make sure the statement is vectorizable.  */
 
 opt_result
-vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
+vect_analyze_stmt (vec_info *vinfo,
+                  stmt_vec_info stmt_info, bool *need_to_vectorize,
                   slp_tree node, slp_instance node_instance,
                   stmt_vector_for_cost *cost_vec)
 {
-  vec_info *vinfo = stmt_info->vinfo;
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
   bool ok;
   gimple_seq pattern_def_seq;
@@ -10668,7 +10404,7 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
                                 pattern_def_stmt_info->stmt);
 
              opt_result res
-               = vect_analyze_stmt (pattern_def_stmt_info,
+               = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
                                     need_to_vectorize, node, node_instance,
                                     cost_vec);
              if (!res)
@@ -10728,7 +10464,7 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
                         pattern_stmt_info->stmt);
 
       opt_result res
-       = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
+       = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
                             node_instance, cost_vec);
       if (!res)
        return res;
@@ -10785,39 +10521,49 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
        -mveclibabi= takes preference over library functions with
        the simd attribute.  */
-    ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
-         || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
+    ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+         || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
                                           cost_vec)
-         || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
-         || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
-         || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
-         || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
-                               cost_vec)
-         || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
-         || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
-         || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
-         || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
-         || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
-         || vectorizable_comparison (stmt_info, NULL, NULL, node,
+         || vectorizable_conversion (vinfo, stmt_info,
+                                     NULL, NULL, node, cost_vec)
+         || vectorizable_operation (vinfo, stmt_info,
+                                    NULL, NULL, node, cost_vec)
+         || vectorizable_assignment (vinfo, stmt_info,
+                                     NULL, NULL, node, cost_vec)
+         || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+         || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+         || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
+                                    node, node_instance, cost_vec)
+         || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
+                                    NULL, NULL, node, cost_vec)
+         || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+         || vectorizable_condition (vinfo, stmt_info,
+                                    NULL, NULL, node, cost_vec)
+         || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
                                      cost_vec)
-         || vectorizable_lc_phi (stmt_info, NULL, node));
+         || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
+                                 stmt_info, NULL, node));
   else
     {
       if (bb_vinfo)
-       ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
-             || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
-                                              cost_vec)
-             || vectorizable_conversion (stmt_info, NULL, NULL, node,
+       ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+             || vectorizable_simd_clone_call (vinfo, stmt_info,
+                                              NULL, NULL, node, cost_vec)
+             || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
                                          cost_vec)
-             || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
-             || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
-             || vectorizable_assignment (stmt_info, NULL, NULL, node,
+             || vectorizable_shift (vinfo, stmt_info,
+                                    NULL, NULL, node, cost_vec)
+             || vectorizable_operation (vinfo, stmt_info,
+                                        NULL, NULL, node, cost_vec)
+             || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
                                          cost_vec)
-             || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
-                                   cost_vec)
-             || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
-             || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
-             || vectorizable_comparison (stmt_info, NULL, NULL, node,
+             || vectorizable_load (vinfo, stmt_info,
+                                   NULL, NULL, node, cost_vec)
+             || vectorizable_store (vinfo, stmt_info,
+                                    NULL, NULL, node, cost_vec)
+             || vectorizable_condition (vinfo, stmt_info,
+                                        NULL, NULL, node, cost_vec)
+             || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
                                          cost_vec));
     }
 
@@ -10832,7 +10578,8 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
   if (!bb_vinfo
       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
       && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
-      && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
+      && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
+                                   stmt_info, NULL, node, node_instance,
                                    false, cost_vec))
     return opt_result::failure_at (stmt_info->stmt,
                                   "not vectorized:"
@@ -10848,64 +10595,60 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
    Create a vectorized stmt to replace STMT_INFO, and insert it at GSI.  */
 
 bool
-vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+vect_transform_stmt (vec_info *vinfo,
+                    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                     slp_tree slp_node, slp_instance slp_node_instance)
 {
-  vec_info *vinfo = stmt_info->vinfo;
   bool is_store = false;
-  stmt_vec_info vec_stmt = NULL;
+  gimple *vec_stmt = NULL;
   bool done;
 
   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
-  stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
-
-  bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
-                  && nested_in_vect_loop_p
-                       (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
-                        stmt_info));
 
-  gimple *stmt = stmt_info->stmt;
   switch (STMT_VINFO_TYPE (stmt_info))
     {
     case type_demotion_vec_info_type:
     case type_promotion_vec_info_type:
     case type_conversion_vec_info_type:
-      done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
-                                     NULL);
+      done = vectorizable_conversion (vinfo, stmt_info,
+                                     gsi, &vec_stmt, slp_node, NULL);
       gcc_assert (done);
       break;
 
     case induc_vec_info_type:
-      done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
+      done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
+                                    stmt_info, gsi, &vec_stmt, slp_node,
                                     NULL);
       gcc_assert (done);
       break;
 
     case shift_vec_info_type:
-      done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
+      done = vectorizable_shift (vinfo, stmt_info,
+                                gsi, &vec_stmt, slp_node, NULL);
       gcc_assert (done);
       break;
 
     case op_vec_info_type:
-      done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
+      done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
                                     NULL);
       gcc_assert (done);
       break;
 
     case assignment_vec_info_type:
-      done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
-                                     NULL);
+      done = vectorizable_assignment (vinfo, stmt_info,
+                                     gsi, &vec_stmt, slp_node, NULL);
       gcc_assert (done);
       break;
 
     case load_vec_info_type:
-      done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
-                                slp_node_instance, NULL);
+      done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
+                               NULL);
       gcc_assert (done);
       break;
 
     case store_vec_info_type:
-      done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
+      done = vectorizable_store (vinfo, stmt_info,
+                                gsi, &vec_stmt, slp_node, NULL);
       gcc_assert (done);
       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
        {
@@ -10922,40 +10665,42 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       break;
 
     case condition_vec_info_type:
-      done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
+      done = vectorizable_condition (vinfo, stmt_info,
+                                    gsi, &vec_stmt, slp_node, NULL);
       gcc_assert (done);
       break;
 
     case comparison_vec_info_type:
-      done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
+      done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
                                      slp_node, NULL);
       gcc_assert (done);
       break;
 
     case call_vec_info_type:
-      done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
-      stmt = gsi_stmt (*gsi);
+      done = vectorizable_call (vinfo, stmt_info,
+                               gsi, &vec_stmt, slp_node, NULL);
       break;
 
     case call_simd_clone_vec_info_type:
-      done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
+      done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
                                           slp_node, NULL);
-      stmt = gsi_stmt (*gsi);
       break;
 
     case reduc_vec_info_type:
-      done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
+      done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
+                                      gsi, &vec_stmt, slp_node);
       gcc_assert (done);
       break;
 
     case cycle_phi_info_type:
-      done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
-                                      slp_node_instance);
+      done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
+                                      &vec_stmt, slp_node, slp_node_instance);
       gcc_assert (done);
       break;
 
     case lc_phi_info_type:
-      done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
+      done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
+                                 stmt_info, &vec_stmt, slp_node);
       gcc_assert (done);
       break;
 
@@ -10967,52 +10712,11 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                              "stmt not supported.\n");
          gcc_unreachable ();
        }
+      done = true;
     }
 
-  /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
-     This would break hybrid SLP vectorization.  */
-  if (slp_node)
-    gcc_assert (!vec_stmt
-               && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
-
-  /* Handle inner-loop stmts whose DEF is used in the loop-nest that
-     is being vectorized, but outside the immediately enclosing loop.  */
-  if (vec_stmt
-      && nested_p
-      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
-      && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
-          || STMT_VINFO_RELEVANT (stmt_info) ==
-                                           vect_used_in_outer_by_reduction))
-    {
-      class loop *innerloop = LOOP_VINFO_LOOP (
-                                STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
-      imm_use_iterator imm_iter;
-      use_operand_p use_p;
-      tree scalar_dest;
-
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location,
-                         "Record the vdef for outer-loop vectorization.\n");
-
-      /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
-        (to be used when vectorizing outer-loop stmts that use the DEF of
-        STMT).  */
-      if (gimple_code (stmt) == GIMPLE_PHI)
-        scalar_dest = PHI_RESULT (stmt);
-      else
-        scalar_dest = gimple_get_lhs (stmt);
-
-      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
-       if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
-         {
-           stmt_vec_info exit_phi_info
-             = vinfo->lookup_stmt (USE_STMT (use_p));
-           STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
-         }
-    }
-
-  if (vec_stmt)
-    STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+  if (!slp_node && vec_stmt)
+    gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
 
   if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
     return is_store;
@@ -11023,7 +10727,7 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   stmt_vec_info reduc_info;
   if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
       && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
-      && (reduc_info = info_for_reduction (orig_stmt_info))
+      && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
       && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
       && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
     {
@@ -11038,19 +10742,15 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          && (PHI_ARG_DEF_FROM_EDGE (phi, e)
              == gimple_get_lhs (orig_stmt_info->stmt)))
        {
-         stmt_vec_info phi_info
-           = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
-         stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
-         do
-           {
-             add_phi_arg (as_a <gphi *> (phi_info->stmt),
-                          gimple_get_lhs (vec_stmt->stmt), e,
-                          gimple_phi_arg_location (phi, e->dest_idx));
-             phi_info = STMT_VINFO_RELATED_STMT (phi_info);
-             vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
-           }
-         while (phi_info);
-         gcc_assert (!vec_stmt);
+         vec<gimple *> &phi_info
+           = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info));
+         vec<gimple *> &vec_stmt
+           = STMT_VINFO_VEC_STMTS (stmt_info);
+         gcc_assert (phi_info.length () == vec_stmt.length ());
+         for (unsigned i = 0; i < phi_info.length (); ++i)
+           add_phi_arg (as_a <gphi *> (phi_info[i]),
+                        gimple_get_lhs (vec_stmt[i]), e,
+                        gimple_phi_arg_location (phi, e->dest_idx));
        }
       else if (slp_node
               && slp_node != slp_node_instance->reduc_phis)
@@ -11061,16 +10761,18 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
                      == SLP_TREE_VEC_STMTS (slp_node).length ());
          for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
-           add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
-                        gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
+           add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
+                        vect_get_slp_vect_def (slp_node, i),
                         e, gimple_phi_arg_location (phi, e->dest_idx));
        }
     }
 
   /* Handle stmts whose DEF is used outside the loop-nest that is
      being vectorized.  */
-  done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
-                                  slp_node_instance, true, NULL);
+  if (is_a <loop_vec_info> (vinfo))
+    done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
+                                    stmt_info, gsi, slp_node,
+                                    slp_node_instance, true, NULL);
   gcc_assert (done);
 
   return false;
@@ -11081,9 +10783,8 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    stmt_vec_info.  */
 
 void
-vect_remove_stores (stmt_vec_info first_stmt_info)
+vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
 {
-  vec_info *vinfo = first_stmt_info->vinfo;
   stmt_vec_info next_stmt_info = first_stmt_info;
 
   while (next_stmt_info)
@@ -11096,18 +10797,28 @@ vect_remove_stores (stmt_vec_info first_stmt_info)
     }
 }
 
-/* Function get_vectype_for_scalar_type_and_size.
+/* If NUNITS is nonzero, return a vector type that contains NUNITS
+   elements of type SCALAR_TYPE, or null if the target doesn't support
+   such a type.
+
+   If NUNITS is zero, return a vector type that contains elements of
+   type SCALAR_TYPE, choosing whichever vector size the target prefers.
 
-   Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
-   by the target.  */
+   If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
+   for this vectorization region and want to "autodetect" the best choice.
+   Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
+   and we want the new type to be interoperable with it.   PREVAILING_MODE
+   in this case can be a scalar integer mode or a vector mode; when it
+   is a vector mode, the function acts like a tree-level version of
+   related_vector_mode.  */
 
 tree
-get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
+get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
+                                    tree scalar_type, poly_uint64 nunits)
 {
   tree orig_scalar_type = scalar_type;
   scalar_mode inner_mode;
   machine_mode simd_mode;
-  poly_uint64 nunits;
   tree vectype;
 
   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
@@ -11147,19 +10858,45 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
   if (scalar_type == NULL_TREE)
     return NULL_TREE;
 
-  /* If no size was supplied use the mode the target prefers.   Otherwise
-     lookup a vector mode of the specified size.  */
-  if (known_eq (size, 0U))
-    simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
-  else if (!multiple_p (size, nbytes, &nunits)
-          || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
-    return NULL_TREE;
-  /* NOTE: nunits == 1 is allowed to support single element vector types.  */
-  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
-    return NULL_TREE;
+  /* If no prevailing mode was supplied, use the mode the target prefers.
+     Otherwise lookup a vector mode based on the prevailing mode.  */
+  if (prevailing_mode == VOIDmode)
+    {
+      gcc_assert (known_eq (nunits, 0U));
+      simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
+      if (SCALAR_INT_MODE_P (simd_mode))
+       {
+         /* Traditional behavior is not to take the integer mode
+            literally, but simply to use it as a way of determining
+            the vector size.  It is up to mode_for_vector to decide
+            what the TYPE_MODE should be.
 
-  vectype = build_vector_type (scalar_type, nunits);
+            Note that nunits == 1 is allowed in order to support single
+            element vector types.  */
+         if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
+             || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
+           return NULL_TREE;
+       }
+    }
+  else if (SCALAR_INT_MODE_P (prevailing_mode)
+          || !related_vector_mode (prevailing_mode,
+                                   inner_mode, nunits).exists (&simd_mode))
+    {
+      /* Fall back to using mode_for_vector, mostly in the hope of being
+        able to use an integer mode.  */
+      if (known_eq (nunits, 0U)
+         && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
+       return NULL_TREE;
+
+      if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
+       return NULL_TREE;
+    }
 
+  vectype = build_vector_type_for_mode (scalar_type, simd_mode);
+
+  /* In cases where the mode was chosen by mode_for_vector, check that
+     the target actually supports the chosen mode, or that it at least
+     allows the vector mode to be replaced by a like-sized integer.  */
   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
     return NULL_TREE;
@@ -11176,35 +10913,95 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
 /* Function get_vectype_for_scalar_type.
 
    Returns the vector type corresponding to SCALAR_TYPE as supported
-   by the target.  */
+   by the target.  If GROUP_SIZE is nonzero and we're performing BB
+   vectorization, make sure that the number of elements in the vector
+   is no bigger than GROUP_SIZE.  */
 
 tree
-get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
+                            unsigned int group_size)
 {
-  tree vectype;
-  vectype = get_vectype_for_scalar_type_and_size (scalar_type,
-                                                 vinfo->vector_size);
+  /* For BB vectorization, we should always have a group size once we've
+     constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
+     are tentative requests during things like early data reference
+     analysis and pattern recognition.  */
+  if (is_a <bb_vec_info> (vinfo))
+    gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
+  else
+    group_size = 0;
+
+  tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
+                                                     scalar_type);
+  if (vectype && vinfo->vector_mode == VOIDmode)
+    vinfo->vector_mode = TYPE_MODE (vectype);
+
+  /* Register the natural choice of vector type, before the group size
+     has been applied.  */
+  if (vectype)
+    vinfo->used_vector_modes.add (TYPE_MODE (vectype));
+
+  /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
+     try again with an explicit number of elements.  */
   if (vectype
-      && known_eq (vinfo->vector_size, 0U))
-    vinfo->vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+      && group_size
+      && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
+    {
+      /* Start with the biggest number of units that fits within
+        GROUP_SIZE and halve it until we find a valid vector type.
+        Usually either the first attempt will succeed or all will
+        fail (in the latter case because GROUP_SIZE is too small
+        for the target), but it's possible that a target could have
+        a hole between supported vector types.
+
+        If GROUP_SIZE is not a power of 2, this has the effect of
+        trying the largest power of 2 that fits within the group,
+        even though the group is not a multiple of that vector size.
+        The BB vectorizer will then try to carve up the group into
+        smaller pieces.  */
+      unsigned int nunits = 1 << floor_log2 (group_size);
+      do
+       {
+         vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
+                                                        scalar_type, nunits);
+         nunits /= 2;
+       }
+      while (nunits > 1 && !vectype);
+    }
+
   return vectype;
 }
 
+/* Return the vector type corresponding to SCALAR_TYPE as supported
+   by the target.  NODE, if nonnull, is the SLP tree node that will
+   use the returned vector type.  */
+
+tree
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
+{
+  unsigned int group_size = 0;
+  if (node)
+    group_size = SLP_TREE_LANES (node);
+  return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
+}
+
 /* Function get_mask_type_for_scalar_type.
 
    Returns the mask type corresponding to a result of comparison
-   of vectors of specified SCALAR_TYPE as supported by target.  */
+   of vectors of specified SCALAR_TYPE as supported by target.
+   If GROUP_SIZE is nonzero and we're performing BB vectorization,
+   make sure that the number of elements in the vector is no bigger
+   than GROUP_SIZE.  */
 
 tree
-get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
+get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
+                              unsigned int group_size)
 {
-  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
 
   if (!vectype)
     return NULL;
 
-  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
-                                 vinfo->vector_size);
+  return truth_type_for (vectype);
 }
 
 /* Function get_same_sized_vectype
@@ -11216,10 +11013,29 @@ tree
 get_same_sized_vectype (tree scalar_type, tree vector_type)
 {
   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
-    return build_same_sized_truth_vector_type (vector_type);
+    return truth_type_for (vector_type);
+
+  poly_uint64 nunits;
+  if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
+                  GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
+    return NULL_TREE;
 
-  return get_vectype_for_scalar_type_and_size
-          (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
+  return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
+                                             scalar_type, nunits);
+}
+
+/* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
+   would not change the chosen vector modes.  */
+
+bool
+vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
+{
+  for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
+       i != vinfo->used_vector_modes.end (); ++i)
+    if (!VECTOR_MODE_P (*i)
+       || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
+      return false;
+  return true;
 }
 
 /* Function vect_is_simple_use.
@@ -11394,6 +11210,82 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
   return true;
 }
 
+/* Function vect_is_simple_use.
+
+   Same as vect_is_simple_use but determines the operand by operand
+   position OPERAND from either STMT or SLP_NODE, filling in *OP
+   and *SLP_DEF (when SLP_NODE is not NULL).  */
+
+bool
+vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
+                   unsigned operand, tree *op, slp_tree *slp_def,
+                   enum vect_def_type *dt,
+                   tree *vectype, stmt_vec_info *def_stmt_info_out)
+{
+  if (slp_node)
+    {
+      slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
+      *slp_def = child;
+      if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+       *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
+      else
+       {
+         if (def_stmt_info_out)
+           *def_stmt_info_out = NULL;
+         *op = SLP_TREE_SCALAR_OPS (child)[0];
+         *dt = SLP_TREE_DEF_TYPE (child);
+         *vectype = SLP_TREE_VECTYPE (child);
+         return true;
+       }
+    }
+  else
+    {
+      if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
+       {
+         if (gimple_assign_rhs_code (ass) == COND_EXPR
+             && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
+           {
+             if (operand < 2)
+               *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
+             else
+               *op = gimple_op (ass, operand);
+           }
+         else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
+           *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
+         else
+           *op = gimple_op (ass, operand + 1);
+       }
+      else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
+       {
+         if (gimple_call_internal_p (call)
+             && internal_store_fn_p (gimple_call_internal_fn (call)))
+           operand = internal_fn_stored_value_index (gimple_call_internal_fn
+                                                                       (call));
+         *op = gimple_call_arg (call, operand);
+       }
+      else
+       gcc_unreachable ();
+    }
+
+  /* ???  We might want to update *vectype from *slp_def here though
+     when sharing nodes this would prevent unsharing in the caller.  */
+  return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
+}
+
+/* If OP is not NULL and is external or constant update its vector
+   type with VECTYPE.  Returns true if successful or false if not,
+   for example when conflicting vector types are present.  */
+
+bool
+vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
+{
+  if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
+    return true;
+  if (SLP_TREE_VECTYPE (op))
+    return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
+  SLP_TREE_VECTYPE (op) = vectype;
+  return true;
+}
 
 /* Function supportable_widening_operation
 
@@ -11417,14 +11309,14 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
    widening operation (short in the above example).  */
 
 bool
-supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
+supportable_widening_operation (vec_info *vinfo,
+                               enum tree_code code, stmt_vec_info stmt_info,
                                tree vectype_out, tree vectype_in,
                                 enum tree_code *code1, enum tree_code *code2,
                                 int *multi_step_cvt,
                                 vec<tree> *interm_types)
 {
-  vec_info *vinfo = stmt_info->vinfo;
-  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+  loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
   class loop *vect_loop = NULL;
   machine_mode vec_mode;
   enum insn_code icode1, icode2;
@@ -11480,7 +11372,7 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
       if (vect_loop
          && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
          && !nested_in_vect_loop_p (vect_loop, stmt_info)
-         && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
+         && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
                                             stmt_info, vectype_out,
                                             vectype_in, code1, code2,
                                             multi_step_cvt, interm_types))
@@ -11608,11 +11500,8 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
     {
       intermediate_mode = insn_data[icode1].operand[0].mode;
       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
-       {
-         intermediate_type = vect_halve_mask_nunits (vinfo, prev_type);
-         if (intermediate_mode != TYPE_MODE (intermediate_type))
-           return false;
-       }
+       intermediate_type
+         = vect_halve_mask_nunits (prev_type, intermediate_mode);
       else
        intermediate_type
          = lang_hooks.types.type_for_mode (intermediate_mode,
@@ -11688,7 +11577,7 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
    narrowing operation (short in the above example).   */
 
 bool
-supportable_narrowing_operation (vec_info *vinfo, enum tree_code code,
+supportable_narrowing_operation (enum tree_code code,
                                 tree vectype_out, tree vectype_in,
                                 enum tree_code *code1, int *multi_step_cvt,
                                  vec<tree> *interm_types)
@@ -11796,11 +11685,8 @@ supportable_narrowing_operation (vec_info *vinfo, enum tree_code code,
     {
       intermediate_mode = insn_data[icode1].operand[0].mode;
       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
-       {
-         intermediate_type = vect_double_mask_nunits (vinfo, prev_type);
-         if (intermediate_mode != TYPE_MODE (intermediate_type))
-           return false;
-       }
+       intermediate_type
+         = vect_double_mask_nunits (prev_type, intermediate_mode);
       else
        intermediate_type
          = lang_hooks.types.type_for_mode (intermediate_mode, uns);
@@ -11874,14 +11760,14 @@ vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
 
 /* Try to compute the vector types required to vectorize STMT_INFO,
    returning true on success and false if vectorization isn't possible.
+   If GROUP_SIZE is nonzero and we're performing BB vectorization,
+   take sure that the number of elements in the vectors is no bigger
+   than GROUP_SIZE.
 
    On success:
 
    - Set *STMT_VECTYPE_OUT to:
      - NULL_TREE if the statement doesn't need to be vectorized;
-     - boolean_type_node if the statement is a boolean operation whose
-       vector type can only be determined once all the other vector types
-       are known; and
      - the equivalent of STMT_VINFO_VECTYPE otherwise.
 
    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
@@ -11889,13 +11775,22 @@ vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
      statement does not help to determine the overall number of units.  */
 
 opt_result
-vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
+vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
                                tree *stmt_vectype_out,
-                               tree *nunits_vectype_out)
+                               tree *nunits_vectype_out,
+                               unsigned int group_size)
 {
-  vec_info *vinfo = stmt_info->vinfo;
   gimple *stmt = stmt_info->stmt;
 
+  /* For BB vectorization, we should always have a group size once we've
+     constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
+     are tentative requests during things like early data reference
+     analysis and pattern recognition.  */
+  if (is_a <bb_vec_info> (vinfo))
+    gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
+  else
+    group_size = 0;
+
   *stmt_vectype_out = NULL_TREE;
   *nunits_vectype_out = NULL_TREE;
 
@@ -11926,92 +11821,88 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
 
   tree vectype;
   tree scalar_type = NULL_TREE;
-  if (STMT_VINFO_VECTYPE (stmt_info))
-    *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
+  if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
+    {
+      vectype = STMT_VINFO_VECTYPE (stmt_info);
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "precomputed vectype: %T\n", vectype);
+    }
+  else if (vect_use_mask_type_p (stmt_info))
+    {
+      unsigned int precision = stmt_info->mask_precision;
+      scalar_type = build_nonstandard_integer_type (precision, 1);
+      vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
+      if (!vectype)
+       return opt_result::failure_at (stmt, "not vectorized: unsupported"
+                                      " data-type %T\n", scalar_type);
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
+    }
   else
     {
-      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
-      if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
+      if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
+       scalar_type = TREE_TYPE (DR_REF (dr));
+      else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
        scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
       else
        scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 
-      /* Pure bool ops don't participate in number-of-units computation.
-        For comparisons use the types being compared.  */
-      if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
-         && is_gimple_assign (stmt)
-         && gimple_assign_rhs_code (stmt) != COND_EXPR)
+      if (dump_enabled_p ())
        {
-         *stmt_vectype_out = boolean_type_node;
-
-         tree rhs1 = gimple_assign_rhs1 (stmt);
-         if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
-             && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
-           scalar_type = TREE_TYPE (rhs1);
+         if (group_size)
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "get vectype for scalar type (group size %d):"
+                            " %T\n", group_size, scalar_type);
          else
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_NOTE, vect_location,
-                                "pure bool operation.\n");
-             return opt_result::success ();
-           }
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "get vectype for scalar type: %T\n", scalar_type);
        }
-
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_NOTE, vect_location,
-                        "get vectype for scalar type:  %T\n", scalar_type);
-      vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+      vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
       if (!vectype)
        return opt_result::failure_at (stmt,
                                       "not vectorized:"
                                       " unsupported data-type %T\n",
                                       scalar_type);
 
-      if (!*stmt_vectype_out)
-       *stmt_vectype_out = vectype;
-
       if (dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
     }
+  *stmt_vectype_out = vectype;
 
   /* Don't try to compute scalar types if the stmt produces a boolean
      vector; use the existing vector type instead.  */
-  tree nunits_vectype;
-  if (VECTOR_BOOLEAN_TYPE_P (vectype))
-    nunits_vectype = vectype;
-  else
+  tree nunits_vectype = vectype;
+  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
     {
       /* The number of units is set according to the smallest scalar
         type (or the largest vector size, but we only support one
         vector size per vectorization).  */
-      if (*stmt_vectype_out != boolean_type_node)
+      HOST_WIDE_INT dummy;
+      scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
+      if (scalar_type != TREE_TYPE (vectype))
        {
-         HOST_WIDE_INT dummy;
-         scalar_type = vect_get_smallest_scalar_type (stmt_info,
-                                                      &dummy, &dummy);
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "get vectype for smallest scalar type: %T\n",
+                            scalar_type);
+         nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
+                                                       group_size);
+         if (!nunits_vectype)
+           return opt_result::failure_at
+             (stmt, "not vectorized: unsupported data-type %T\n",
+              scalar_type);
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
+                            nunits_vectype);
        }
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_NOTE, vect_location,
-                        "get vectype for scalar type:  %T\n", scalar_type);
-      nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
     }
-  if (!nunits_vectype)
-    return opt_result::failure_at (stmt,
-                                  "not vectorized: unsupported data-type %T\n",
-                                  scalar_type);
 
-  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
-               GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
-    return opt_result::failure_at (stmt,
-                                  "not vectorized: different sized vector "
-                                  "types in statement, %T and %T\n",
-                                  vectype, nunits_vectype);
+  gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
+                         TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
 
   if (dump_enabled_p ())
     {
-      dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
-                      nunits_vectype);
-
       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
       dump_printf (MSG_NOTE, "\n");
@@ -12020,81 +11911,3 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
   *nunits_vectype_out = nunits_vectype;
   return opt_result::success ();
 }
-
-/* Try to determine the correct vector type for STMT_INFO, which is a
-   statement that produces a scalar boolean result.  Return the vector
-   type on success, otherwise return NULL_TREE.  */
-
-opt_tree
-vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
-{
-  vec_info *vinfo = stmt_info->vinfo;
-  gimple *stmt = stmt_info->stmt;
-  tree mask_type = NULL;
-  tree vectype, scalar_type;
-
-  if (is_gimple_assign (stmt)
-      && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
-      && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
-    {
-      scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
-      mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
-
-      if (!mask_type)
-       return opt_tree::failure_at (stmt,
-                                    "not vectorized: unsupported mask\n");
-    }
-  else
-    {
-      tree rhs;
-      ssa_op_iter iter;
-      enum vect_def_type dt;
-
-      FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
-       {
-         if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
-           return opt_tree::failure_at (stmt,
-                                        "not vectorized:can't compute mask"
-                                        " type for statement, %G", stmt);
-
-         /* No vectype probably means external definition.
-            Allow it in case there is another operand which
-            allows to determine mask type.  */
-         if (!vectype)
-           continue;
-
-         if (!mask_type)
-           mask_type = vectype;
-         else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
-                            TYPE_VECTOR_SUBPARTS (vectype)))
-           return opt_tree::failure_at (stmt,
-                                        "not vectorized: different sized mask"
-                                        " types in statement, %T and %T\n",
-                                        mask_type, vectype);
-         else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
-                  != VECTOR_BOOLEAN_TYPE_P (vectype))
-           return opt_tree::failure_at (stmt,
-                                        "not vectorized: mixed mask and "
-                                        "nonmask vector types in statement, "
-                                        "%T and %T\n",
-                                        mask_type, vectype);
-       }
-
-      /* We may compare boolean value loaded as vector of integers.
-        Fix mask_type in such case.  */
-      if (mask_type
-         && !VECTOR_BOOLEAN_TYPE_P (mask_type)
-         && gimple_code (stmt) == GIMPLE_ASSIGN
-         && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
-       mask_type = build_same_sized_truth_vector_type (mask_type);
-    }
-
-  /* No mask_type should mean loop invariant predicate.
-     This is probably a subject for optimization in if-conversion.  */
-  if (!mask_type)
-    return opt_tree::failure_at (stmt,
-                                "not vectorized: can't compute mask type "
-                                "for statement: %G", stmt);
-
-  return opt_tree::success (mask_type);
-}