PR c++/68795: fix uninitialized close_paren_loc in cp_parser_postfix_expression
[gcc.git] / gcc / tree-vect-stmts.c
index 1f68ff52698e035378c25224237643251371ed84..465826e61945a3f136407bcddd4903780ff518c8 100644 (file)
@@ -1,5 +1,5 @@
 /* Statement Analysis and Transformation for Vectorization
-   Copyright (C) 2003-2015 Free Software Foundation, Inc.
+   Copyright (C) 2003-2016 Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com>
    and Ira Rosen <irar@il.ibm.com>
 
@@ -22,72 +22,32 @@ along with GCC; see the file COPYING3.  If not see
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "ssa.h"
+#include "optabs-tree.h"
+#include "insn-config.h"
+#include "recog.h"             /* FIXME: for insn_data */
+#include "cgraph.h"
 #include "dumpfile.h"
-#include "tm.h"
-#include "hash-set.h"
-#include "machmode.h"
-#include "vec.h"
-#include "double-int.h"
-#include "input.h"
 #include "alias.h"
-#include "symtab.h"
-#include "wide-int.h"
-#include "inchash.h"
-#include "tree.h"
 #include "fold-const.h"
 #include "stor-layout.h"
-#include "target.h"
-#include "predict.h"
-#include "hard-reg-set.h"
-#include "function.h"
-#include "dominance.h"
-#include "cfg.h"
-#include "basic-block.h"
-#include "gimple-pretty-print.h"
-#include "tree-ssa-alias.h"
-#include "internal-fn.h"
 #include "tree-eh.h"
-#include "gimple-expr.h"
-#include "is-a.h"
-#include "gimple.h"
 #include "gimplify.h"
 #include "gimple-iterator.h"
 #include "gimplify-me.h"
-#include "gimple-ssa.h"
 #include "tree-cfg.h"
-#include "tree-phinodes.h"
-#include "ssa-iterators.h"
-#include "stringpool.h"
-#include "tree-ssanames.h"
 #include "tree-ssa-loop-manip.h"
 #include "cfgloop.h"
 #include "tree-ssa-loop.h"
 #include "tree-scalar-evolution.h"
-#include "hashtab.h"
-#include "rtl.h"
-#include "flags.h"
-#include "statistics.h"
-#include "real.h"
-#include "fixed-value.h"
-#include "insn-config.h"
-#include "expmed.h"
-#include "dojump.h"
-#include "explow.h"
-#include "calls.h"
-#include "emit-rtl.h"
-#include "varasm.h"
-#include "stmt.h"
-#include "expr.h"
-#include "recog.h"             /* FIXME: for insn_data */
-#include "insn-codes.h"
-#include "optabs.h"
-#include "diagnostic-core.h"
 #include "tree-vectorizer.h"
-#include "hash-map.h"
-#include "plugin-api.h"
-#include "ipa-ref.h"
-#include "cgraph.h"
 #include "builtins.h"
+#include "internal-fn.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -105,7 +65,7 @@ stmt_vectype (struct _stmt_vec_info *stmt_info)
 bool
 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
 {
-  gimple stmt = STMT_VINFO_STMT (stmt_info);
+  gimple *stmt = STMT_VINFO_STMT (stmt_info);
   basic_block bb = gimple_bb (stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop* loop;
@@ -130,27 +90,16 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
   if (body_cost_vec)
     {
       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
-      add_stmt_info_to_vec (body_cost_vec, count, kind,
-                           stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
-                           misalign);
+      stmt_info_for_cost si = { count, kind,
+                               stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
+                               misalign };
+      body_cost_vec->safe_push (si);
       return (unsigned)
        (builtin_vectorization_cost (kind, vectype, misalign) * count);
-        
     }
   else
-    {
-      loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-      bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-      void *target_cost_data;
-
-      if (loop_vinfo)
-       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
-      else
-       target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
-
-      return add_stmt_cost (target_cost_data, count, kind, stmt_info,
-                           misalign, where);
-    }
+    return add_stmt_cost (stmt_info->vinfo->target_cost_data,
+                         count, kind, stmt_info, misalign, where);
 }
 
 /* Return a variable of type ELEM_TYPE[NELEMS].  */
@@ -168,11 +117,11 @@ create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
    with scalar destination SCALAR_DEST.  */
 
 static tree
-read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
+read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
                   tree array, unsigned HOST_WIDE_INT n)
 {
   tree vect_type, vect, vect_name, array_ref;
-  gimple new_stmt;
+  gimple *new_stmt;
 
   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
   vect_type = TREE_TYPE (TREE_TYPE (array));
@@ -194,11 +143,11 @@ read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
    The store is part of the vectorization of STMT.  */
 
 static void
-write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
+write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
                    tree array, unsigned HOST_WIDE_INT n)
 {
   tree array_ref;
-  gimple new_stmt;
+  gimple *new_stmt;
 
   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
                      build_int_cst (size_type_node, n),
@@ -231,14 +180,14 @@ create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
 
 static void
-vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
+vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
                    enum vect_relevant relevant, bool live_p,
                    bool used_in_pattern)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
-  gimple pattern_stmt;
+  gimple *pattern_stmt;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -255,7 +204,7 @@ vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
         {
           imm_use_iterator imm_iter;
           use_operand_p use_p;
-          gimple use_stmt;
+          gimple *use_stmt;
           tree lhs;
          loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
          struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -338,7 +287,7 @@ vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
    CHECKME: what other side effects would the vectorizer allow?  */
 
 static bool
-vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
+vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
                      enum vect_relevant *relevant, bool *live_p)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -402,7 +351,7 @@ vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
    used in STMT for anything other than indexing an array.  */
 
 static bool
-exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
+exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
 {
   tree operand;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -489,16 +438,15 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
    Return true if everything is as expected. Return false otherwise.  */
 
 static bool
-process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
-            enum vect_relevant relevant, vec<gimple> *worklist,
+process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
+            enum vect_relevant relevant, vec<gimple *> *worklist,
             bool force)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
   stmt_vec_info dstmt_vinfo;
   basic_block bb, def_bb;
-  tree def;
-  gimple def_stmt;
+  gimple *def_stmt;
   enum vect_def_type dt;
 
   /* case 1: we are only interested in uses that need to be vectorized.  Uses
@@ -506,7 +454,7 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
      return true;
 
-  if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
+  if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -650,11 +598,11 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
   unsigned int nbbs = loop->num_nodes;
   gimple_stmt_iterator si;
-  gimple stmt;
+  gimple *stmt;
   unsigned int i;
   stmt_vec_info stmt_vinfo;
   basic_block bb;
-  gimple phi;
+  gimple *phi;
   bool live_p;
   enum vect_relevant relevant, tmp_relevant;
   enum vect_def_type def_type;
@@ -663,7 +611,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
     dump_printf_loc (MSG_NOTE, vect_location,
                      "=== vect_mark_stmts_to_be_vectorized ===\n");
 
-  auto_vec<gimple, 64> worklist;
+  auto_vec<gimple *, 64> worklist;
 
   /* 1. Init worklist.  */
   for (i = 0; i < nbbs; i++)
@@ -838,10 +786,10 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
               return false;
           }
 
-      if (STMT_VINFO_GATHER_P (stmt_vinfo))
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
        {
          tree off;
-         tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
+         tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
          gcc_assert (decl);
          if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
                            &worklist, true))
@@ -943,7 +891,7 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
 static int
 vect_cost_group_size (stmt_vec_info stmt_info)
 {
-  gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+  gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
 
   if (first_stmt == STMT_VINFO_STMT (stmt_info))
     return GROUP_SIZE (stmt_info);
@@ -967,7 +915,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
   int group_size;
   unsigned int inside_cost = 0, prologue_cost = 0;
   struct data_reference *first_dr;
-  gimple first_stmt;
+  gimple *first_stmt;
 
   if (dt == vect_constant_def || dt == vect_external_def)
     prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
@@ -1000,7 +948,8 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
      equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
      access is instead being provided by a permute-and-store operation,
      include the cost of the permutes.  */
-  if (!store_lanes_p && group_size > 1)
+  if (!store_lanes_p && group_size > 1
+      && !STMT_VINFO_STRIDED_P (stmt_info))
     {
       /* Uses a high and low interleave or shuffle operations for each
         needed permute.  */
@@ -1014,21 +963,24 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
                          group_size);
     }
 
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   /* Costs of the stores.  */
-  if (STMT_VINFO_STRIDED_P (stmt_info))
+  if (STMT_VINFO_STRIDED_P (stmt_info)
+      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
       /* N scalar stores plus extracting the elements.  */
-      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
       inside_cost += record_stmt_cost (body_cost_vec,
                                       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
                                       scalar_store, stmt_info, 0, vect_body);
-      inside_cost += record_stmt_cost (body_cost_vec,
-                                      ncopies * TYPE_VECTOR_SUBPARTS (vectype),
-                                      vec_to_scalar, stmt_info, 0, vect_body);
     }
   else
     vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
 
+  if (STMT_VINFO_STRIDED_P (stmt_info))
+    inside_cost += record_stmt_cost (body_cost_vec,
+                                    ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+                                    vec_to_scalar, stmt_info, 0, vect_body);
+
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                      "vect_model_store_cost: inside_cost = %d, "
@@ -1043,7 +995,7 @@ vect_get_store_cost (struct data_reference *dr, int ncopies,
                     stmt_vector_for_cost *body_cost_vec)
 {
   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
-  gimple stmt = DR_STMT (dr);
+  gimple *stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
   switch (alignment_support_scheme)
@@ -1103,7 +1055,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
                      stmt_vector_for_cost *body_cost_vec)
 {
   int group_size;
-  gimple first_stmt;
+  gimple *first_stmt;
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
   unsigned int inside_cost = 0, prologue_cost = 0;
 
@@ -1177,7 +1129,7 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
                    bool record_prologue_costs)
 {
   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
-  gimple stmt = DR_STMT (dr);
+  gimple *stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
   switch (alignment_support_scheme)
@@ -1284,7 +1236,7 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
    the loop preheader for the vectorized stmt STMT.  */
 
 static void
-vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
+vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
 {
   if (gsi)
     vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -1338,11 +1290,9 @@ vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
    It will be used in the vectorization of STMT.  */
 
 tree
-vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
+vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
 {
-  tree new_var;
-  gimple init_stmt;
-  tree vec_oprnd;
+  gimple *init_stmt;
   tree new_temp;
 
   if (TREE_CODE (type) == VECTOR_TYPE
@@ -1350,8 +1300,26 @@ vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
     {
       if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
        {
-         if (CONSTANT_CLASS_P (val))
-           val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
+         /* Scalar boolean value should be transformed into
+            all zeros or all ones value before building a vector.  */
+         if (VECTOR_BOOLEAN_TYPE_P (type))
+           {
+             tree true_val = build_all_ones_cst (TREE_TYPE (type));
+             tree false_val = build_zero_cst (TREE_TYPE (type));
+
+             if (CONSTANT_CLASS_P (val))
+               val = integer_zerop (val) ? false_val : true_val;
+             else
+               {
+                 new_temp = make_ssa_name (TREE_TYPE (type));
+                 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
+                                                  val, true_val, false_val);
+                 vect_init_vector_1 (stmt, init_stmt, gsi);
+                 val = new_temp;
+               }
+           }
+         else if (CONSTANT_CLASS_P (val))
+           val = fold_convert (TREE_TYPE (type), val);
          else
            {
              new_temp = make_ssa_name (TREE_TYPE (type));
@@ -1363,13 +1331,10 @@ vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
       val = build_vector_from_val (type, val);
     }
 
-  new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
-  init_stmt = gimple_build_assign  (new_var, val);
-  new_temp = make_ssa_name (new_var, init_stmt);
-  gimple_assign_set_lhs (init_stmt, new_temp);
+  new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
+  init_stmt = gimple_build_assign  (new_temp, val);
   vect_init_vector_1 (stmt, init_stmt, gsi);
-  vec_oprnd = gimple_assign_lhs (init_stmt);
-  return vec_oprnd;
+  return new_temp;
 }
 
 
@@ -1382,19 +1347,19 @@ vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
 
    In case OP is an invariant or constant, a new stmt that creates a vector def
-   needs to be introduced.  */
+   needs to be introduced.  VECTYPE may be used to specify a required type for
+   vector invariant.  */
 
 tree
-vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
+vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
 {
   tree vec_oprnd;
-  gimple vec_stmt;
-  gimple def_stmt;
+  gimple *vec_stmt;
+  gimple *def_stmt;
   stmt_vec_info def_stmt_info = NULL;
   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
-  unsigned int nunits;
+  tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
-  tree def;
   enum vect_def_type dt;
   bool is_simple_use;
   tree vector_type;
@@ -1407,19 +1372,11 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
       dump_printf (MSG_NOTE, "\n");
     }
 
-  is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
-                                     &def_stmt, &def, &dt);
+  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
   gcc_assert (is_simple_use);
   if (dump_enabled_p ())
     {
       int loc_printed = 0;
-      if (def)
-        {
-          dump_printf_loc (MSG_NOTE, vect_location, "def =  ");
-          loc_printed = 1;
-          dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
-          dump_printf (MSG_NOTE, "\n");
-        }
       if (def_stmt)
         {
           if (loc_printed)
@@ -1432,46 +1389,25 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
 
   switch (dt)
     {
-    /* Case 1: operand is a constant.  */
+    /* operand is a constant or a loop invariant.  */
     case vect_constant_def:
-      {
-       vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
-       gcc_assert (vector_type);
-       nunits = TYPE_VECTOR_SUBPARTS (vector_type);
-
-       if (scalar_def)
-         *scalar_def = op;
-
-        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
-        if (dump_enabled_p ())
-          dump_printf_loc (MSG_NOTE, vect_location,
-                           "Create vector_cst. nunits = %d\n", nunits);
-
-        return vect_init_vector (stmt, op, vector_type, NULL);
-      }
-
-    /* Case 2: operand is defined outside the loop - loop invariant.  */
     case vect_external_def:
       {
-       vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
-       gcc_assert (vector_type);
-
-       if (scalar_def)
-         *scalar_def = def;
-
-        /* Create 'vec_inv = {inv,inv,..,inv}'  */
-        if (dump_enabled_p ())
-          dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
+       if (vectype)
+         vector_type = vectype;
+       else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
+                && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
+         vector_type = build_same_sized_truth_vector_type (stmt_vectype);
+       else
+         vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
 
-        return vect_init_vector (stmt, def, vector_type, NULL);
+       gcc_assert (vector_type);
+        return vect_init_vector (stmt, op, vector_type, NULL);
       }
 
-    /* Case 3: operand is defined inside the loop.  */
+    /* operand is defined inside the loop.  */
     case vect_internal_def:
       {
-       if (scalar_def)
-         *scalar_def = NULL/* FIXME tuples: def_stmt*/;
-
         /* Get the def from the vectorized stmt.  */
         def_stmt_info = vinfo_for_stmt (def_stmt);
 
@@ -1492,22 +1428,14 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
         return vec_oprnd;
       }
 
-    /* Case 4: operand is defined by a loop header phi - reduction  */
+    /* operand is defined by a loop header phi - reduction  */
     case vect_reduction_def:
     case vect_double_reduction_def:
     case vect_nested_cycle:
-      {
-       struct loop *loop;
-
-       gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
-       loop = (gimple_bb (def_stmt))->loop_father;
-
-        /* Get the def before the loop  */
-        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
-        return get_initial_def_for_reduction (stmt, op, scalar_def);
-     }
+      /* Code should use get_initial_def_for_reduction.  */
+      gcc_unreachable ();
 
-    /* Case 5: operand is defined by loop-header phi - induction.  */
+    /* operand is defined by loop-header phi - induction.  */
     case vect_induction_def:
       {
        gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
@@ -1587,7 +1515,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
 tree
 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
 {
-  gimple vec_stmt_for_operand;
+  gimple *vec_stmt_for_operand;
   stmt_vec_info def_stmt_info;
 
   /* Do nothing; can reuse same def.  */
@@ -1599,7 +1527,6 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
   gcc_assert (def_stmt_info);
   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
   gcc_assert (vec_stmt_for_operand);
-  vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
   else
@@ -1635,7 +1562,7 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
    and -1 otherwise.  */
 
 void
-vect_get_vec_defs (tree op0, tree op1, gimple stmt,
+vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
                   vec<tree> *vec_oprnds0,
                   vec<tree> *vec_oprnds1,
                   slp_tree slp_node, int reduc_index)
@@ -1661,13 +1588,13 @@ vect_get_vec_defs (tree op0, tree op1, gimple stmt,
       tree vec_oprnd;
 
       vec_oprnds0->create (1);
-      vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
+      vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
       vec_oprnds0->quick_push (vec_oprnd);
 
       if (op1)
        {
          vec_oprnds1->create (1);
-         vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
+         vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
          vec_oprnds1->quick_push (vec_oprnd);
        }
     }
@@ -1679,19 +1606,18 @@ vect_get_vec_defs (tree op0, tree op1, gimple stmt,
    Insert a new stmt.  */
 
 void
-vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
+vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
                             gimple_stmt_iterator *gsi)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  vec_info *vinfo = stmt_info->vinfo;
 
   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
 
   if (!gsi_end_p (*gsi)
       && gimple_has_mem_ops (vec_stmt))
     {
-      gimple at_stmt = gsi_stmt (*gsi);
+      gimple *at_stmt = gsi_stmt (*gsi);
       tree vuse = gimple_vuse (at_stmt);
       if (vuse && TREE_CODE (vuse) == SSA_NAME)
        {
@@ -1716,8 +1642,7 @@ vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
     }
   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
 
-  set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
-                                                   bb_vinfo));
+  set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
 
   if (dump_enabled_p ())
     {
@@ -1735,31 +1660,37 @@ vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
     add_stmt_to_eh_lp (vec_stmt, lp_nr);
 }
 
-/* Checks if CALL can be vectorized in type VECTYPE.  Returns
-   a function declaration if the target has a vectorized version
-   of the function, or NULL_TREE if the function cannot be vectorized.  */
+/* We want to vectorize a call to combined function CFN with function
+   decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
+   as the types of all inputs.  Check whether this is possible using
+   an internal function, returning its code if so or IFN_LAST if not.  */
 
-tree
-vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
+static internal_fn
+vectorizable_internal_function (combined_fn cfn, tree fndecl,
+                               tree vectype_out, tree vectype_in)
 {
-  tree fndecl = gimple_call_fndecl (call);
-
-  /* We only handle functions that do not read or clobber memory -- i.e.
-     const or novops ones.  */
-  if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
-    return NULL_TREE;
-
-  if (!fndecl
-      || TREE_CODE (fndecl) != FUNCTION_DECL
-      || !DECL_BUILT_IN (fndecl))
-    return NULL_TREE;
-
-  return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
-                                                       vectype_in);
+  internal_fn ifn;
+  if (internal_fn_p (cfn))
+    ifn = as_internal_fn (cfn);
+  else
+    ifn = associated_internal_fn (fndecl);
+  if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
+    {
+      const direct_internal_fn_info &info = direct_internal_fn (ifn);
+      if (info.vectorizable)
+       {
+         tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
+         tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
+         if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
+                                             OPTIMIZE_FOR_SPEED))
+           return ifn;
+       }
+    }
+  return IFN_LAST;
 }
 
 
-static tree permute_vec_elements (tree, tree, tree, gimple,
+static tree permute_vec_elements (tree, tree, tree, gimple *,
                                  gimple_stmt_iterator *);
 
 
@@ -1771,8 +1702,8 @@ static tree permute_vec_elements (tree, tree, tree, gimple,
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
-                             gimple *vec_stmt, slp_tree slp_node)
+vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
+                             gimple **vec_stmt, slp_tree slp_node)
 {
   tree vec_dest = NULL;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -1782,11 +1713,13 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
   bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree rhs_vectype = NULL_TREE;
+  tree mask_vectype;
   tree elem_type;
-  gimple new_stmt;
+  gimple *new_stmt;
   tree dummy;
   tree dataref_ptr = NULL_TREE;
-  gimple ptr_incr;
+  gimple *ptr_incr;
   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
   int ncopies;
   int i, j;
@@ -1797,8 +1730,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
   enum vect_def_type gather_dt = vect_unknown_def_type;
   bool is_store;
   tree mask;
-  gimple def_stmt;
-  tree def;
+  gimple *def_stmt;
   enum vect_def_type dt;
 
   if (slp_node != NULL)
@@ -1809,8 +1741,8 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
 
   is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
   mask = gimple_call_arg (stmt, 2);
-  if (TYPE_PRECISION (TREE_TYPE (mask))
-      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
+
+  if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
     return false;
 
   /* FORNOW. This restriction should be relaxed.  */
@@ -1839,16 +1771,33 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
   if (STMT_VINFO_STRIDED_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_GATHER_P (stmt_info))
+  if (TREE_CODE (mask) != SSA_NAME)
+    return false;
+
+  if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
+    return false;
+
+  if (!mask_vectype)
+    mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
+
+  if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
+    return false;
+
+  if (is_store)
+    {
+      tree rhs = gimple_call_arg (stmt, 3);
+      if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
+       return false;
+    }
+
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     {
-      gimple def_stmt;
-      tree def;
-      gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
+      gimple *def_stmt;
+      gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
                                       &gather_off, &gather_scale);
       gcc_assert (gather_decl);
-      if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
-                                &def_stmt, &def, &gather_dt,
-                                &gather_off_vectype))
+      if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
+                              &gather_off_vectype))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1872,24 +1821,13 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
                                 : DR_STEP (dr), size_zero_node) <= 0)
     return false;
   else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
-          || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
-    return false;
-
-  if (TREE_CODE (mask) != SSA_NAME)
-    return false;
-
-  if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
-                          &def_stmt, &def, &dt))
+          || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
+                                         TYPE_MODE (mask_vectype),
+                                         !is_store)
+          || (rhs_vectype
+              && !useless_type_conversion_p (vectype, rhs_vectype)))
     return false;
 
-  if (is_store)
-    {
-      tree rhs = gimple_call_arg (stmt, 3);
-      if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
-                              &def_stmt, &def, &dt))
-       return false;
-    }
-
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
@@ -1903,7 +1841,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
 
   /** Transform.  **/
 
-  if (STMT_VINFO_GATHER_P (stmt_info))
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     {
       tree vec_oprnd0 = NULL_TREE, op;
       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
@@ -1976,7 +1914,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
                                       perm_mask, stmt, gsi);
          else if (j == 0)
            op = vec_oprnd0
-             = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
+             = vect_get_vec_def_for_operand (gather_off, stmt);
          else
            op = vec_oprnd0
              = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
@@ -1985,8 +1923,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
            {
              gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
                          == TYPE_VECTOR_SUBPARTS (idxtype));
-             var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
-             var = make_ssa_name (var);
+             var = vect_get_new_ssa_name (idxtype, vect_simple_var);
              op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
              new_stmt
                = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
@@ -2000,11 +1937,10 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
          else
            {
              if (j == 0)
-               vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
+               vec_mask = vect_get_vec_def_for_operand (mask, stmt);
              else
                {
-                 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
-                                     &def_stmt, &def, &dt);
+                 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
                  vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
                }
 
@@ -2013,9 +1949,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
                {
                  gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
                              == TYPE_VECTOR_SUBPARTS (masktype));
-                 var = vect_get_new_vect_var (masktype, vect_simple_var,
-                                              NULL);
-                 var = make_ssa_name (var);
+                 var = vect_get_new_ssa_name (masktype, vect_simple_var);
                  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
                  new_stmt
                    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
@@ -2032,8 +1966,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
            {
              gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
                          == TYPE_VECTOR_SUBPARTS (rettype));
-             var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
-             op = make_ssa_name (var, new_stmt);
+             op = vect_get_new_ssa_name (rettype, vect_simple_var);
              gimple_call_set_lhs (new_stmt, op);
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
              var = make_ssa_name (vec_dest);
@@ -2069,6 +2002,11 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
 
       /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
         from the IL.  */
+      if (STMT_VINFO_RELATED_STMT (stmt_info))
+       {
+         stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+         stmt_info = vinfo_for_stmt (stmt);
+       }
       tree lhs = gimple_call_lhs (stmt);
       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
       set_vinfo_for_stmt (new_stmt, stmt_info);
@@ -2088,8 +2026,8 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
          if (i == 0)
            {
              tree rhs = gimple_call_arg (stmt, 3);
-             vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
-             vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
+             vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
+             vec_mask = vect_get_vec_def_for_operand (mask, stmt);
              /* We should have catched mismatched types earlier.  */
              gcc_assert (useless_type_conversion_p (vectype,
                                                     TREE_TYPE (vec_rhs)));
@@ -2100,11 +2038,9 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
            }
          else
            {
-             vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
-                                 &def, &dt);
+             vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
              vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
-             vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
-                                 &def, &dt);
+             vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
              vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
              dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
                                             TYPE_SIZE_UNIT (vectype));
@@ -2122,10 +2058,11 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
            misalign = DR_MISALIGNMENT (dr);
          set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
                                  misalign);
+         tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
+                                   misalign ? misalign & -misalign : align);
          new_stmt
            = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
-                                         gimple_call_arg (stmt, 1),
-                                         vec_mask, vec_rhs);
+                                         ptr, vec_mask, vec_rhs);
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
          if (i == 0)
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
@@ -2145,7 +2082,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
 
          if (i == 0)
            {
-             vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
+             vec_mask = vect_get_vec_def_for_operand (mask, stmt);
              dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
                                                      NULL_TREE, &dummy, gsi,
                                                      &ptr_incr, false, &inv_p);
@@ -2153,8 +2090,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
            }
          else
            {
-             vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
-                                 &def, &dt);
+             vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
              vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
              dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
                                             TYPE_SIZE_UNIT (vectype));
@@ -2172,10 +2108,11 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
            misalign = DR_MISALIGNMENT (dr);
          set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
                                  misalign);
+         tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
+                                   misalign ? misalign & -misalign : align);
          new_stmt
            = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
-                                         gimple_call_arg (stmt, 1),
-                                         vec_mask);
+                                         ptr, vec_mask);
          gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
          if (i == 0)
@@ -2190,6 +2127,11 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
     {
       /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
         from the IL.  */
+      if (STMT_VINFO_RELATED_STMT (stmt_info))
+       {
+         stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+         stmt_info = vinfo_for_stmt (stmt);
+       }
       tree lhs = gimple_call_lhs (stmt);
       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
       set_vinfo_for_stmt (new_stmt, stmt_info);
@@ -2201,6 +2143,31 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
   return true;
 }
 
+/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
+   integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
+   in a single step.  On success, store the binary pack code in
+   *CONVERT_CODE.  */
+
+static bool
+simple_integer_narrowing (tree vectype_out, tree vectype_in,
+                         tree_code *convert_code)
+{
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
+      || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
+    return false;
+
+  tree_code code;
+  int multi_step_cvt = 0;
+  auto_vec <tree, 8> interm_types;
+  if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
+                                       &code, &multi_step_cvt,
+                                       &interm_types)
+      || multi_step_cvt)
+    return false;
+
+  *convert_code = code;
+  return true;
+}
 
 /* Function vectorizable_call.
 
@@ -2210,7 +2177,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                   slp_tree slp_node)
 {
   gcall *stmt;
@@ -2224,11 +2191,12 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   int nunits_out;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  tree fndecl, new_temp, def, rhs_type;
-  gimple def_stmt;
+  vec_info *vinfo = stmt_info->vinfo;
+  tree fndecl, new_temp, rhs_type;
+  gimple *def_stmt;
   enum vect_def_type dt[3]
     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
-  gimple new_stmt = NULL;
+  gimple *new_stmt = NULL;
   int ncopies, j;
   vec<tree> vargs = vNULL;
   enum { NARROW, NONE, WIDEN } modifier;
@@ -2297,8 +2265,7 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
       if (!rhs_type)
        rhs_type = TREE_TYPE (op);
 
-      if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
-                                &def_stmt, &def, &dt[i], &opvectype))
+      if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2348,15 +2315,48 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   else
     return false;
 
+  /* We only handle functions that do not read or clobber memory.  */
+  if (gimple_vuse (stmt))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "function reads from or writes to memory.\n");
+      return false;
+    }
+
   /* For now, we only vectorize functions if a target specific builtin
      is available.  TODO -- in some cases, it might be profitable to
      insert the calls for pieces of the vector, in order to be able
      to vectorize other operations in the loop.  */
-  fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
-  if (fndecl == NULL_TREE)
+  fndecl = NULL_TREE;
+  internal_fn ifn = IFN_LAST;
+  combined_fn cfn = gimple_call_combined_fn (stmt);
+  tree callee = gimple_call_fndecl (stmt);
+
+  /* First try using an internal function.  */
+  tree_code convert_code = ERROR_MARK;
+  if (cfn != CFN_LAST
+      && (modifier == NONE
+         || (modifier == NARROW
+             && simple_integer_narrowing (vectype_out, vectype_in,
+                                          &convert_code))))
+    ifn = vectorizable_internal_function (cfn, callee, vectype_out,
+                                         vectype_in);
+
+  /* If that fails, try asking for a target-specific built-in function.  */
+  if (ifn == IFN_LAST)
+    {
+      if (cfn != CFN_LAST)
+       fndecl = targetm.vectorize.builtin_vectorized_function
+         (cfn, vectype_out, vectype_in);
+      else
+       fndecl = targetm.vectorize.builtin_md_vectorized_function
+         (callee, vectype_out, vectype_in);
+    }
+
+  if (ifn == IFN_LAST && !fndecl)
     {
-      if (gimple_call_internal_p (stmt)
-         && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
+      if (cfn == CFN_GOMP_SIMD_LANE
          && !slp_node
          && loop_vinfo
          && LOOP_VINFO_LOOP (loop_vinfo)->simduid
@@ -2377,11 +2377,9 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
        }
     }
 
-  gcc_assert (!gimple_vuse (stmt));
-
   if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
-  else if (modifier == NARROW)
+  else if (modifier == NARROW && ifn == IFN_LAST)
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2397,6 +2395,10 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
                          "\n");
       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
+      if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
+       add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
+                      vec_promote_demote, stmt_info, 0, vect_body);
+
       return true;
     }
 
@@ -2410,9 +2412,9 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
 
   prev_stmt_info = NULL;
-  switch (modifier)
+  if (modifier == NONE || ifn != IFN_LAST)
     {
-    case NONE:
+      tree prev_res = NULL_TREE;
       for (j = 0; j < ncopies; ++j)
        {
          /* Build argument list for the vectorized call.  */
@@ -2440,9 +2442,30 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                      vec<tree> vec_oprndsk = vec_defs[k];
                      vargs[k] = vec_oprndsk[i];
                    }
-                 new_stmt = gimple_build_call_vec (fndecl, vargs);
-                 new_temp = make_ssa_name (vec_dest, new_stmt);
-                 gimple_call_set_lhs (new_stmt, new_temp);
+                 if (modifier == NARROW)
+                   {
+                     tree half_res = make_ssa_name (vectype_in);
+                     new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+                     gimple_call_set_lhs (new_stmt, half_res);
+                     vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                     if ((i & 1) == 0)
+                       {
+                         prev_res = half_res;
+                         continue;
+                       }
+                     new_temp = make_ssa_name (vec_dest);
+                     new_stmt = gimple_build_assign (new_temp, convert_code,
+                                                     prev_res, half_res);
+                   }
+                 else
+                   {
+                     if (ifn != IFN_LAST)
+                       new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+                     else
+                       new_stmt = gimple_build_call_vec (fndecl, vargs);
+                     new_temp = make_ssa_name (vec_dest, new_stmt);
+                     gimple_call_set_lhs (new_stmt, new_temp);
+                   }
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
                  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
                }
@@ -2460,7 +2483,7 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
              op = gimple_call_arg (stmt, i);
              if (j == 0)
                vec_oprnd0
-                 = vect_get_vec_def_for_operand (op, stmt, NULL);
+                 = vect_get_vec_def_for_operand (op, stmt);
              else
                {
                  vec_oprnd0 = gimple_call_arg (new_stmt, i);
@@ -2480,34 +2503,48 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
              tree cst = build_vector (vectype_out, v);
              tree new_var
-               = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
-             gimple init_stmt = gimple_build_assign (new_var, cst);
-             new_temp = make_ssa_name (new_var, init_stmt);
-             gimple_assign_set_lhs (init_stmt, new_temp);
+               = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
+             gimple *init_stmt = gimple_build_assign (new_var, cst);
              vect_init_vector_1 (stmt, init_stmt, NULL);
              new_temp = make_ssa_name (vec_dest);
-             new_stmt = gimple_build_assign (new_temp,
-                                             gimple_assign_lhs (init_stmt));
+             new_stmt = gimple_build_assign (new_temp, new_var);
+           }
+         else if (modifier == NARROW)
+           {
+             tree half_res = make_ssa_name (vectype_in);
+             new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+             gimple_call_set_lhs (new_stmt, half_res);
+             vect_finish_stmt_generation (stmt, new_stmt, gsi);
+             if ((j & 1) == 0)
+               {
+                 prev_res = half_res;
+                 continue;
+               }
+             new_temp = make_ssa_name (vec_dest);
+             new_stmt = gimple_build_assign (new_temp, convert_code,
+                                             prev_res, half_res);
            }
          else
            {
-             new_stmt = gimple_build_call_vec (fndecl, vargs);
+             if (ifn != IFN_LAST)
+               new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+             else
+               new_stmt = gimple_build_call_vec (fndecl, vargs);
              new_temp = make_ssa_name (vec_dest, new_stmt);
              gimple_call_set_lhs (new_stmt, new_temp);
            }
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
 
-         if (j == 0)
+         if (j == (modifier == NARROW ? 1 : 0))
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
          else
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
 
          prev_stmt_info = vinfo_for_stmt (new_stmt);
        }
-
-      break;
-
-    case NARROW:
+    }
+  else if (modifier == NARROW)
+    {
       for (j = 0; j < ncopies; ++j)
        {
          /* Build argument list for the vectorized call.  */
@@ -2537,7 +2574,10 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                      vargs.quick_push (vec_oprndsk[i]);
                      vargs.quick_push (vec_oprndsk[i + 1]);
                    }
-                 new_stmt = gimple_build_call_vec (fndecl, vargs);
+                 if (ifn != IFN_LAST)
+                   new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+                 else
+                   new_stmt = gimple_build_call_vec (fndecl, vargs);
                  new_temp = make_ssa_name (vec_dest, new_stmt);
                  gimple_call_set_lhs (new_stmt, new_temp);
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -2558,7 +2598,7 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
              if (j == 0)
                {
                  vec_oprnd0
-                   = vect_get_vec_def_for_operand (op, stmt, NULL);
+                   = vect_get_vec_def_for_operand (op, stmt);
                  vec_oprnd1
                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
                }
@@ -2589,13 +2629,10 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
        }
 
       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
-
-      break;
-
-    case WIDEN:
-      /* No current target implements this case.  */
-      return false;
     }
+  else
+    /* No current target implements this case.  */
+    return false;
 
   vargs.release ();
 
@@ -2612,6 +2649,30 @@ vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
   else
     lhs = gimple_call_lhs (stmt);
+
+  if (gimple_call_internal_p (stmt)
+      && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
+    {
+      /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
+        with vf - 1 rather than 0, that is the last iteration of the
+        vectorized loop.  */
+      imm_use_iterator iter;
+      use_operand_p use_p;
+      gimple *use_stmt;
+      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+       {
+         basic_block use_bb = gimple_bb (use_stmt);
+         if (use_bb
+             && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
+           {
+             FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+               SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
+                                              ncopies * nunits_out - 1));
+             update_stmt (use_stmt);
+           }
+       }
+    }
+
   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
   set_vinfo_for_stmt (new_stmt, stmt_info);
   set_vinfo_for_stmt (stmt, NULL);
@@ -2629,8 +2690,79 @@ struct simd_call_arg_info
   enum vect_def_type dt;
   HOST_WIDE_INT linear_step;
   unsigned int align;
+  bool simd_lane_linear;
 };
 
+/* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
+   is linear within simd lane (but not within whole loop), note it in
+   *ARGINFO.  */
+
+static void
+vect_simd_lane_linear (tree op, struct loop *loop,
+                      struct simd_call_arg_info *arginfo)
+{
+  gimple *def_stmt = SSA_NAME_DEF_STMT (op);
+
+  if (!is_gimple_assign (def_stmt)
+      || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
+      || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
+    return;
+
+  tree base = gimple_assign_rhs1 (def_stmt);
+  HOST_WIDE_INT linear_step = 0;
+  tree v = gimple_assign_rhs2 (def_stmt);
+  while (TREE_CODE (v) == SSA_NAME)
+    {
+      tree t;
+      def_stmt = SSA_NAME_DEF_STMT (v);
+      if (is_gimple_assign (def_stmt))
+       switch (gimple_assign_rhs_code (def_stmt))
+         {
+         case PLUS_EXPR:
+           t = gimple_assign_rhs2 (def_stmt);
+           if (linear_step || TREE_CODE (t) != INTEGER_CST)
+             return;
+           base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
+           v = gimple_assign_rhs1 (def_stmt);
+           continue;
+         case MULT_EXPR:
+           t = gimple_assign_rhs2 (def_stmt);
+           if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
+             return;
+           linear_step = tree_to_shwi (t);
+           v = gimple_assign_rhs1 (def_stmt);
+           continue;
+         CASE_CONVERT:
+           t = gimple_assign_rhs1 (def_stmt);
+           if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
+               || (TYPE_PRECISION (TREE_TYPE (v))
+                   < TYPE_PRECISION (TREE_TYPE (t))))
+             return;
+           if (!linear_step)
+             linear_step = 1;
+           v = t;
+           continue;
+         default:
+           return;
+         }
+      else if (is_gimple_call (def_stmt)
+              && gimple_call_internal_p (def_stmt)
+              && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
+              && loop->simduid
+              && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
+              && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
+                  == loop->simduid))
+       {
+         if (!linear_step)
+           linear_step = 1;
+         arginfo->linear_step = linear_step;
+         arginfo->op = base;
+         arginfo->simd_lane_linear = true;
+         return;
+       }
+    }
+}
+
 /* Function vectorizable_simd_clone_call.
 
    Check if STMT performs a function call that can be vectorized
@@ -2640,8 +2772,8 @@ struct simd_call_arg_info
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
-                             gimple *vec_stmt, slp_tree slp_node)
+vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
+                             gimple **vec_stmt, slp_tree slp_node)
 {
   tree vec_dest;
   tree scalar_dest;
@@ -2652,10 +2784,11 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
   unsigned int nunits;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  vec_info *vinfo = stmt_info->vinfo;
   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
-  tree fndecl, new_temp, def;
-  gimple def_stmt;
-  gimple new_stmt = NULL;
+  tree fndecl, new_temp;
+  gimple *def_stmt;
+  gimple *new_stmt = NULL;
   int ncopies, j;
   vec<simd_call_arg_info> arginfo = vNULL;
   vec<tree> vargs = vNULL;
@@ -2713,11 +2846,11 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
       thisarginfo.linear_step = 0;
       thisarginfo.align = 0;
       thisarginfo.op = NULL_TREE;
+      thisarginfo.simd_lane_linear = false;
 
       op = gimple_call_arg (stmt, i);
-      if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
-                                &def_stmt, &def, &thisarginfo.dt,
-                                &thisarginfo.vectype)
+      if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
+                              &thisarginfo.vectype)
          || thisarginfo.dt == vect_uninitialized_def)
        {
          if (dump_enabled_p ())
@@ -2735,21 +2868,24 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
 
       /* For linear arguments, the analyze phase should have saved
         the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
-      if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
-         && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
+      if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
+         && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
        {
          gcc_assert (vec_stmt);
          thisarginfo.linear_step
-           = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
+           = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
          thisarginfo.op
-           = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
+           = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
+         thisarginfo.simd_lane_linear
+           = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
+              == boolean_true_node);
          /* If loop has been peeled for alignment, we need to adjust it.  */
          tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
          tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
-         if (n1 != n2)
+         if (n1 != n2 && !thisarginfo.simd_lane_linear)
            {
              tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
-             tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
+             tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
              tree opt = TREE_TYPE (thisarginfo.op);
              bias = fold_convert (TREE_TYPE (step), bias);
              bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
@@ -2775,6 +2911,17 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                || thisarginfo.dt == vect_external_def)
               && POINTER_TYPE_P (TREE_TYPE (op)))
        thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
+      /* Addresses of array elements indexed by GOMP_SIMD_LANE are
+        linear too.  */
+      if (POINTER_TYPE_P (TREE_TYPE (op))
+         && !thisarginfo.linear_step
+         && !vec_stmt
+         && thisarginfo.dt != vect_constant_def
+         && thisarginfo.dt != vect_external_def
+         && loop_vinfo
+         && !slp_node
+         && TREE_CODE (op) == SSA_NAME)
+       vect_simd_lane_linear (op, loop, &thisarginfo);
 
       arginfo.quick_push (thisarginfo);
     }
@@ -2825,6 +2972,7 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                  i = -1;
                break;
              case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
+             case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
                if (arginfo[i].dt == vect_constant_def
                    || arginfo[i].dt == vect_external_def
                    || (arginfo[i].linear_step
@@ -2832,6 +2980,11 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                  i = -1;
                break;
              case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
+             case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
+             case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
+             case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
+             case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
+             case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
                /* FORNOW */
                i = -1;
                break;
@@ -2906,13 +3059,16 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
        if (bestn->simdclone->args[i].arg_type
            == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
          {
-           STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
+           STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
                                                                        + 1);
            STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
            tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
                       ? size_type_node : TREE_TYPE (arginfo[i].op);
            tree ls = build_int_cst (lst, arginfo[i].linear_step);
            STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
+           tree sll = arginfo[i].simd_lane_linear
+                      ? boolean_true_node : boolean_false_node;
+           STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
          }
       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
       if (dump_enabled_p ())
@@ -2974,7 +3130,7 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                      gcc_assert ((k & (k - 1)) == 0);
                      if (m == 0)
                        vec_oprnd0
-                         = vect_get_vec_def_for_operand (op, stmt, NULL);
+                         = vect_get_vec_def_for_operand (op, stmt);
                      else
                        {
                          vec_oprnd0 = arginfo[i].op;
@@ -3008,7 +3164,7 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                        {
                          if (m == 0 && l == 0)
                            vec_oprnd0
-                             = vect_get_vec_def_for_operand (op, stmt, NULL);
+                             = vect_get_vec_def_for_operand (op, stmt);
                          else
                            vec_oprnd0
                              = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
@@ -3050,11 +3206,15 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
                      gcc_assert (!new_bb);
                    }
+                 if (arginfo[i].simd_lane_linear)
+                   {
+                     vargs.safe_push (arginfo[i].op);
+                     break;
+                   }
                  tree phi_res = copy_ssa_name (op);
                  gphi *new_phi = create_phi_node (phi_res, loop->header);
                  set_vinfo_for_stmt (new_phi,
-                                     new_stmt_vec_info (new_phi, loop_vinfo,
-                                                        NULL));
+                                     new_stmt_vec_info (new_phi, loop_vinfo));
                  add_phi_arg (new_phi, arginfo[i].op,
                               loop_preheader_edge (loop), UNKNOWN_LOCATION);
                  enum tree_code code
@@ -3072,8 +3232,7 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                  gimple_stmt_iterator si = gsi_after_labels (loop->header);
                  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
                  set_vinfo_for_stmt (new_stmt,
-                                     new_stmt_vec_info (new_stmt, loop_vinfo,
-                                                        NULL));
+                                     new_stmt_vec_info (new_stmt, loop_vinfo));
                  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
                               UNKNOWN_LOCATION);
                  arginfo[i].op = phi_res;
@@ -3098,6 +3257,9 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                }
              break;
            case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
+           case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
+           case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
+           case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
            default:
              gcc_unreachable ();
            }
@@ -3265,14 +3427,14 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
    needs to be created (DECL is a function-decl of a target-builtin).
    STMT is the original scalar stmt that we are vectorizing.  */
 
-static gimple
+static gimple *
 vect_gen_widened_results_half (enum tree_code code,
                               tree decl,
                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
                               tree vec_dest, gimple_stmt_iterator *gsi,
-                              gimple stmt)
+                              gimple *stmt)
 {
-  gimple new_stmt;
+  gimple *new_stmt;
   tree new_temp;
 
   /* Generate half of the widened result:  */
@@ -3310,7 +3472,7 @@ vect_gen_widened_results_half (enum tree_code code,
    The vectors are collected into VEC_OPRNDS.  */
 
 static void
-vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
+vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
                          vec<tree> *vec_oprnds, int multi_step_cvt)
 {
   tree vec_oprnd;
@@ -3319,7 +3481,7 @@ vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
   /* All the vector operands except the very first one (that is scalar oprnd)
      are stmt copies.  */
   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
-    vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
+    vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
   else
     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
 
@@ -3344,7 +3506,7 @@ vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
 
 static void
 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
-                                      int multi_step_cvt, gimple stmt,
+                                      int multi_step_cvt, gimple *stmt,
                                       vec<tree> vec_dsts,
                                       gimple_stmt_iterator *gsi,
                                       slp_tree slp_node, enum tree_code code,
@@ -3352,7 +3514,7 @@ vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
 {
   unsigned int i;
   tree vop0, vop1, new_tmp, vec_dest;
-  gimple new_stmt;
+  gimple *new_stmt;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
   vec_dest = vec_dsts.pop ();
@@ -3415,7 +3577,7 @@ vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
 static void
 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
                                        vec<tree> *vec_oprnds1,
-                                       gimple stmt, tree vec_dest,
+                                       gimple *stmt, tree vec_dest,
                                        gimple_stmt_iterator *gsi,
                                        enum tree_code code1,
                                        enum tree_code code2, tree decl1,
@@ -3423,7 +3585,7 @@ vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
 {
   int i;
   tree vop0, vop1, new_tmp1, new_tmp2;
-  gimple new_stmt1, new_stmt2;
+  gimple *new_stmt1, *new_stmt2;
   vec<tree> vec_tmp = vNULL;
 
   vec_tmp.create (vec_oprnds0->length () * 2);
@@ -3466,8 +3628,8 @@ vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
-                        gimple *vec_stmt, slp_tree slp_node)
+vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
+                        gimple **vec_stmt, slp_tree slp_node)
 {
   tree vec_dest;
   tree scalar_dest;
@@ -3479,10 +3641,9 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
   tree new_temp;
-  tree def;
-  gimple def_stmt;
+  gimple *def_stmt;
   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
-  gimple new_stmt = NULL;
+  gimple *new_stmt = NULL;
   stmt_vec_info prev_stmt_info;
   int nunits_in;
   int nunits_out;
@@ -3494,6 +3655,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
   vec<tree> vec_oprnds1 = vNULL;
   tree vop0;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  vec_info *vinfo = stmt_info->vinfo;
   int multi_step_cvt = 0;
   vec<tree> vec_dsts = vNULL;
   vec<tree> interm_types = vNULL;
@@ -3541,12 +3703,13 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
               && SCALAR_FLOAT_TYPE_P (rhs_type))))
     return false;
 
-  if ((INTEGRAL_TYPE_P (lhs_type)
-       && (TYPE_PRECISION (lhs_type)
-          != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
-      || (INTEGRAL_TYPE_P (rhs_type)
-         && (TYPE_PRECISION (rhs_type)
-             != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
+  if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
+      && ((INTEGRAL_TYPE_P (lhs_type)
+          && (TYPE_PRECISION (lhs_type)
+              != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
+         || (INTEGRAL_TYPE_P (rhs_type)
+             && (TYPE_PRECISION (rhs_type)
+                 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3556,8 +3719,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   /* Check the operands of the operation.  */
-  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
-                            &def_stmt, &def, &dt[0], &vectype_in))
+  if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3573,11 +3735,9 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
         OP1.  */
       if (CONSTANT_CLASS_P (op0))
-       ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
-                                  &def_stmt, &def, &dt[1], &vectype_in);
+       ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
       else
-       ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
-                                &def, &dt[1]);
+       ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
 
       if (!ok)
        {
@@ -3607,6 +3767,21 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
       return false;
     }
 
+  if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
+      && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
+    {
+      if (dump_enabled_p ())
+       {
+         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                           "can't convert between boolean and non "
+                          "boolean vectors");
+         dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
+          dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+       }
+
+      return false;
+    }
+
   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
   if (nunits_in < nunits_out)
@@ -3850,13 +4025,15 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
              if (slp_node)
                SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+             else
+               {
+                 if (!prev_stmt_info)
+                   STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+                 else
+                   STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+                 prev_stmt_info = vinfo_for_stmt (new_stmt);
+               }
            }
-
-         if (j == 0)
-           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
-         else
-           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
-         prev_stmt_info = vinfo_for_stmt (new_stmt);
        }
       break;
 
@@ -3892,15 +4069,14 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
                }
              else
                {
-                 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
+                 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
                  vec_oprnds0.quick_push (vec_oprnd0);
                  if (op_type == binary_op)
                    {
                      if (code == WIDEN_LSHIFT_EXPR)
                        vec_oprnd1 = op1;
                      else
-                       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
-                                                                  NULL);
+                       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
                      vec_oprnds1.quick_push (vec_oprnd1);
                    }
                }
@@ -4045,40 +4221,28 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
-                        gimple *vec_stmt, slp_tree slp_node)
+vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
+                        gimple **vec_stmt, slp_tree slp_node)
 {
   tree vec_dest;
   tree scalar_dest;
   tree op;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   tree new_temp;
-  tree def;
-  gimple def_stmt;
+  gimple *def_stmt;
   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
-  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
   int ncopies;
   int i, j;
   vec<tree> vec_oprnds = vNULL;
   tree vop;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  gimple new_stmt = NULL;
+  vec_info *vinfo = stmt_info->vinfo;
+  gimple *new_stmt = NULL;
   stmt_vec_info prev_stmt_info = NULL;
   enum tree_code code;
   tree vectype_in;
 
-  /* Multiple types in SLP are handled by creating the appropriate number of
-     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
-     case of SLP.  */
-  if (slp_node || PURE_SLP_STMT (stmt_info))
-    ncopies = 1;
-  else
-    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
-
-  gcc_assert (ncopies >= 1);
-
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
 
@@ -4104,8 +4268,20 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
   if (code == VIEW_CONVERT_EXPR)
     op = TREE_OPERAND (op, 0);
 
-  if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
-                            &def_stmt, &def, &dt[0], &vectype_in))
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+
+  /* Multiple types in SLP are handled by creating the appropriate number of
+     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
+     case of SLP.  */
+  if (slp_node || PURE_SLP_STMT (stmt_info))
+    ncopies = 1;
+  else
+    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+
+  gcc_assert (ncopies >= 1);
+
+  if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4134,7 +4310,12 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
       /* But a conversion that does not change the bit-pattern is ok.  */
       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
            > TYPE_PRECISION (TREE_TYPE (op)))
-          && TYPE_UNSIGNED (TREE_TYPE (op))))
+          && TYPE_UNSIGNED (TREE_TYPE (op)))
+      /* Conversion between boolean types of different sizes is
+        a simple assignment in case their vectypes are same
+        boolean vectors.  */
+      && (!VECTOR_BOOLEAN_TYPE_P (vectype)
+         || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4243,8 +4424,8 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
-                    gimple *vec_stmt, slp_tree slp_node)
+vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
+                    gimple **vec_stmt, slp_tree slp_node)
 {
   tree vec_dest;
   tree scalar_dest;
@@ -4259,10 +4440,9 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
   optab optab;
   int icode;
   machine_mode optab_op2_mode;
-  tree def;
-  gimple def_stmt;
+  gimple *def_stmt;
   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
-  gimple new_stmt = NULL;
+  gimple *new_stmt = NULL;
   stmt_vec_info prev_stmt_info;
   int nunits_in;
   int nunits_out;
@@ -4276,6 +4456,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
   unsigned int k;
   bool scalar_shift_arg = true;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  vec_info *vinfo = stmt_info->vinfo;
   int vf;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
@@ -4309,8 +4490,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   op0 = gimple_assign_rhs1 (stmt);
-  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
-                             &def_stmt, &def, &dt[0], &vectype))
+  if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4337,8 +4517,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
     return false;
 
   op1 = gimple_assign_rhs2 (stmt);
-  if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
-                            &def, &dt[1], &op1_vectype))
+  if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4364,7 +4543,9 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
   /* Determine whether the shift amount is a vector, or scalar.  If the
      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
 
-  if (dt[1] == vect_internal_def && !slp_node)
+  if ((dt[1] == vect_internal_def
+       || dt[1] == vect_induction_def)
+      && !slp_node)
     scalar_shift_arg = false;
   else if (dt[1] == vect_constant_def
           || dt[1] == vect_external_def
@@ -4375,8 +4556,8 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
         a scalar shift.  */
       if (slp_node)
        {
-         vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
-         gimple slpstmt;
+         vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+         gimple *slpstmt;
 
          FOR_EACH_VEC_ELT (stmts, k, slpstmt)
            if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
@@ -4606,8 +4787,8 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
-                       gimple *vec_stmt, slp_tree slp_node)
+vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
+                       gimple **vec_stmt, slp_tree slp_node)
 {
   tree vec_dest;
   tree scalar_dest;
@@ -4620,12 +4801,11 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
   tree new_temp;
   int op_type;
   optab optab;
-  int icode;
-  tree def;
-  gimple def_stmt;
+  bool target_support_p;
+  gimple *def_stmt;
   enum vect_def_type dt[3]
     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
-  gimple new_stmt = NULL;
+  gimple *new_stmt = NULL;
   stmt_vec_info prev_stmt_info;
   int nunits_in;
   int nunits_out;
@@ -4637,6 +4817,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
   vec<tree> vec_oprnds2 = vNULL;
   tree vop0, vop1, vop2;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  vec_info *vinfo = stmt_info->vinfo;
   int vf;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
@@ -4675,8 +4856,9 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
 
   /* Most operations cannot handle bit-precision types without extra
      truncations.  */
-  if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
-       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+  if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
+      && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
+         != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
       /* Exception are bitwise binary operations.  */
       && code != BIT_IOR_EXPR
       && code != BIT_XOR_EXPR
@@ -4689,8 +4871,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   op0 = gimple_assign_rhs1 (stmt);
-  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
-                            &def_stmt, &def, &dt[0], &vectype))
+  if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4700,7 +4881,26 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
   /* If op0 is an external or constant def use a vector type with
      the same size as the output vector type.  */
   if (!vectype)
-    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
+    {
+      /* For boolean type we cannot determine vectype by
+        invariant value (don't know whether it is a vector
+        of booleans or vector of integers).  We use output
+        vectype because operations on boolean don't change
+        type.  */
+      if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
+       {
+         if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "not supported operation on bool value.\n");
+             return false;
+           }
+         vectype = vectype_out;
+       }
+      else
+       vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
+    }
   if (vec_stmt)
     gcc_assert (vectype);
   if (!vectype)
@@ -4725,8 +4925,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
   if (op_type == binary_op || op_type == ternary_op)
     {
       op1 = gimple_assign_rhs2 (stmt);
-      if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
-                              &def, &dt[1]))
+      if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4737,8 +4936,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
   if (op_type == ternary_op)
     {
       op2 = gimple_assign_rhs3 (stmt);
-      if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
-                              &def, &dt[2]))
+      if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4771,12 +4969,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
 
   vec_mode = TYPE_MODE (vectype);
   if (code == MULT_HIGHPART_EXPR)
-    {
-      if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
-       icode = LAST_INSN_CODE;
-      else
-       icode = CODE_FOR_nothing;
-    }
+    target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
   else
     {
       optab = optab_for_tree_code (code, vectype, optab_default);
@@ -4787,10 +4980,11 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
                              "no optab.\n");
          return false;
        }
-      icode = (int) optab_handler (optab, vec_mode);
+      target_support_p = (optab_handler (optab, vec_mode)
+                         != CODE_FOR_nothing);
     }
 
-  if (icode == CODE_FOR_nothing)
+  if (!target_support_p)
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4903,8 +5097,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
            {
              vec_oprnds2.create (1);
              vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
-                                                                   stmt,
-                                                                   NULL));
+                                                                   stmt));
            }
        }
       else
@@ -4959,10 +5152,10 @@ ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
   if (!dr->aux)
     return;
 
-  if (((dataref_aux *)dr->aux)->base_misaligned)
+  if (DR_VECT_AUX (dr)->base_misaligned)
     {
       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-      tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
+      tree base_decl = DR_VECT_AUX (dr)->base_decl;
 
       if (decl_in_symtab_p (base_decl))
        symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
@@ -4971,7 +5164,7 @@ ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
           DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
           DECL_USER_ALIGN (base_decl) = 1;
        }
-      ((dataref_aux *)dr->aux)->base_misaligned = false;
+      DR_VECT_AUX (dr)->base_misaligned = false;
     }
 }
 
@@ -5006,7 +5199,7 @@ perm_mask_for_reverse (tree vectype)
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                     slp_tree slp_node)
 {
   tree scalar_dest;
@@ -5015,24 +5208,21 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   tree vec_oprnd = NULL_TREE;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
-  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree elem_type;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop *loop = NULL;
   machine_mode vec_mode;
   tree dummy;
   enum dr_alignment_support alignment_support_scheme;
-  tree def;
-  gimple def_stmt;
+  gimple *def_stmt;
   enum vect_def_type dt;
   stmt_vec_info prev_stmt_info = NULL;
   tree dataref_ptr = NULL_TREE;
   tree dataref_offset = NULL_TREE;
-  gimple ptr_incr = NULL;
-  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  gimple *ptr_incr = NULL;
   int ncopies;
   int j;
-  gimple next_stmt, first_stmt = NULL;
+  gimple *next_stmt, *first_stmt = NULL;
   bool grouped_store = false;
   bool store_lanes_p = false;
   unsigned int group_size, i;
@@ -5046,29 +5236,14 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   bool slp = (slp_node != NULL);
   unsigned int vec_num;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  vec_info *vinfo = stmt_info->vinfo;
   tree aggr_type;
-
-  if (loop_vinfo)
-    loop = LOOP_VINFO_LOOP (loop_vinfo);
-
-  /* Multiple types in SLP are handled by creating the appropriate number of
-     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
-     case of SLP.  */
-  if (slp || PURE_SLP_STMT (stmt_info))
-    ncopies = 1;
-  else
-    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
-
-  gcc_assert (ncopies >= 1);
-
-  /* FORNOW. This restriction should be relaxed.  */
-  if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                         "multiple types in nested loop.\n");
-      return false;
-    }
+  tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
+  tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
+  int scatter_scale = 1;
+  enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
+  enum vect_def_type scatter_src_dt = vect_unknown_def_type;
+  gimple *new_stmt;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -5095,9 +5270,34 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     return false;
 
   gcc_assert (gimple_assign_single_p (stmt));
+
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+
+  if (loop_vinfo)
+    loop = LOOP_VINFO_LOOP (loop_vinfo);
+
+  /* Multiple types in SLP are handled by creating the appropriate number of
+     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
+     case of SLP.  */
+  if (slp || PURE_SLP_STMT (stmt_info))
+    ncopies = 1;
+  else
+    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+
+  gcc_assert (ncopies >= 1);
+
+  /* FORNOW.  This restriction should be relaxed.  */
+  if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "multiple types in nested loop.\n");
+      return false;
+    }
+
   op = gimple_assign_rhs1 (stmt);
-  if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
-                          &def, &dt))
+  if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5157,15 +5357,27 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     {
       grouped_store = true;
       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
-      if (!slp && !PURE_SLP_STMT (stmt_info))
+      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+      if (!slp
+         && !PURE_SLP_STMT (stmt_info)
+         && !STMT_VINFO_STRIDED_P (stmt_info))
        {
-         group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
          if (vect_store_lanes_supported (vectype, group_size))
            store_lanes_p = true;
          else if (!vect_grouped_store_supported (vectype, group_size))
            return false;
        }
 
+      if (STMT_VINFO_STRIDED_P (stmt_info)
+         && (slp || PURE_SLP_STMT (stmt_info))
+         && (group_size > nunits
+             || nunits % group_size != 0))
+       {
+         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                          "unhandled strided group store\n");
+         return false;
+       }
+
       if (first_stmt == stmt)
        {
           /* STMT is the leader of the group. Check the operands of all the
@@ -5175,8 +5387,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
             {
              gcc_assert (gimple_assign_single_p (next_stmt));
              op = gimple_assign_rhs1 (next_stmt);
-              if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
-                                      &def_stmt, &def, &dt))
+              if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
                 {
                   if (dump_enabled_p ())
                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5188,6 +5399,22 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
         }
     }
 
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    {
+      gimple *def_stmt;
+      scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
+                                               &scatter_off, &scatter_scale);
+      gcc_assert (scatter_decl);
+      if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
+                              &scatter_off_vectype))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                             "scatter index use not simple.");
+         return false;
+       }
+    }
+
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
@@ -5202,6 +5429,144 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
   ensure_base_align (stmt_info, dr);
 
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    {
+      tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
+      tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
+      tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
+      tree ptr, mask, var, scale, perm_mask = NULL_TREE;
+      edge pe = loop_preheader_edge (loop);
+      gimple_seq seq;
+      basic_block new_bb;
+      enum { NARROW, NONE, WIDEN } modifier;
+      int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
+
+      if (nunits == (unsigned int) scatter_off_nunits)
+       modifier = NONE;
+      else if (nunits == (unsigned int) scatter_off_nunits / 2)
+       {
+         unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
+         modifier = WIDEN;
+
+         for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
+           sel[i] = i | nunits;
+
+         perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
+         gcc_assert (perm_mask != NULL_TREE);
+       }
+      else if (nunits == (unsigned int) scatter_off_nunits * 2)
+       {
+         unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
+         modifier = NARROW;
+
+         for (i = 0; i < (unsigned int) nunits; ++i)
+           sel[i] = i | scatter_off_nunits;
+
+         perm_mask = vect_gen_perm_mask_checked (vectype, sel);
+         gcc_assert (perm_mask != NULL_TREE);
+         ncopies *= 2;
+       }
+      else
+       gcc_unreachable ();
+
+      rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
+      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      scaletype = TREE_VALUE (arglist);
+
+      gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
+                          && TREE_CODE (rettype) == VOID_TYPE);
+
+      ptr = fold_convert (ptrtype, scatter_base);
+      if (!is_gimple_min_invariant (ptr))
+       {
+         ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
+         new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+         gcc_assert (!new_bb);
+       }
+
+      /* Currently we support only unconditional scatter stores,
+        so mask should be all ones.  */
+      mask = build_int_cst (masktype, -1);
+      mask = vect_init_vector (stmt, mask, masktype, NULL);
+
+      scale = build_int_cst (scaletype, scatter_scale);
+
+      prev_stmt_info = NULL;
+      for (j = 0; j < ncopies; ++j)
+       {
+         if (j == 0)
+           {
+             src = vec_oprnd1
+               = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
+             op = vec_oprnd0
+               = vect_get_vec_def_for_operand (scatter_off, stmt);
+           }
+         else if (modifier != NONE && (j & 1))
+           {
+             if (modifier == WIDEN)
+               {
+                 src = vec_oprnd1
+                   = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
+                 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
+                                            stmt, gsi);
+               }
+             else if (modifier == NARROW)
+               {
+                 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
+                                             stmt, gsi);
+                 op = vec_oprnd0
+                   = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
+               }
+             else
+               gcc_unreachable ();
+           }
+         else
+           {
+             src = vec_oprnd1
+               = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
+             op = vec_oprnd0
+               = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
+           }
+
+         if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
+           {
+             gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
+                         == TYPE_VECTOR_SUBPARTS (srctype));
+             var = vect_get_new_ssa_name (srctype, vect_simple_var);
+             src = build1 (VIEW_CONVERT_EXPR, srctype, src);
+             new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
+             vect_finish_stmt_generation (stmt, new_stmt, gsi);
+             src = var;
+           }
+
+         if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
+           {
+             gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
+                         == TYPE_VECTOR_SUBPARTS (idxtype));
+             var = vect_get_new_ssa_name (idxtype, vect_simple_var);
+             op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
+             new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
+             vect_finish_stmt_generation (stmt, new_stmt, gsi);
+             op = var;
+           }
+
+         new_stmt
+           = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
+
+         vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+         if (prev_stmt_info == NULL)
+           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+         else
+           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+         prev_stmt_info = vinfo_for_stmt (new_stmt);
+       }
+      return true;
+    }
+
   if (grouped_store)
     {
       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
@@ -5229,6 +5594,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
              group.  */
           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 
+         gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
          op = gimple_assign_rhs1 (first_stmt);
         } 
@@ -5252,23 +5618,24 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     {
       gimple_stmt_iterator incr_gsi;
       bool insert_after;
-      gimple incr;
+      gimple *incr;
       tree offvar;
       tree ivstep;
       tree running_off;
       gimple_seq stmts = NULL;
       tree stride_base, stride_step, alias_off;
       tree vec_oprnd;
+      unsigned int g;
 
       gcc_assert (!nested_in_vect_loop_p (loop, stmt));
 
       stride_base
        = fold_build_pointer_plus
-           (unshare_expr (DR_BASE_ADDRESS (dr)),
+           (unshare_expr (DR_BASE_ADDRESS (first_dr)),
             size_binop (PLUS_EXPR,
-                        convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
-                        convert_to_ptrofftype (DR_INIT(dr))));
-      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
+                        convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
+                        convert_to_ptrofftype (DR_INIT(first_dr))));
+      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
 
       /* For a store with loop-invariant (but other than power-of-2)
          stride (i.e. not a grouped access) like so:
@@ -5288,10 +5655,24 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
             ...
          */
 
+      unsigned nstores = nunits;
+      tree ltype = elem_type;
+      if (slp)
+       {
+         nstores = nunits / group_size;
+         if (group_size < nunits)
+           ltype = build_vector_type (elem_type, group_size);
+         else
+           ltype = vectype;
+         ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
+         ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+         group_size = 1;
+       }
+
       ivstep = stride_step;
       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
                            build_int_cst (TREE_TYPE (ivstep),
-                                          ncopies * nunits));
+                                          ncopies * nstores));
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
@@ -5299,58 +5680,100 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                 loop, &incr_gsi, insert_after,
                 &offvar, NULL);
       incr = gsi_stmt (incr_gsi);
-      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
 
       stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
       if (stmts)
        gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
 
       prev_stmt_info = NULL;
-      running_off = offvar;
-      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
-      for (j = 0; j < ncopies; j++)
+      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
+      next_stmt = first_stmt;
+      for (g = 0; g < group_size; g++)
        {
-         /* We've set op and dt above, from gimple_assign_rhs1(stmt),
-            and first_stmt == stmt.  */
-         if (j == 0)
-           vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
-         else
-           vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
-
-         for (i = 0; i < nunits; i++)
+         running_off = offvar;
+         if (g)
            {
-             tree newref, newoff;
-             gimple incr, assign;
-             tree size = TYPE_SIZE (elem_type);
-             /* Extract the i'th component.  */
-             tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
+             tree size = TYPE_SIZE_UNIT (ltype);
+             tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
                                      size);
-             tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
-                                      size, pos);
-
-             elem = force_gimple_operand_gsi (gsi, elem, true,
-                                              NULL_TREE, true,
-                                              GSI_SAME_STMT);
-
-             newref = build2 (MEM_REF, TREE_TYPE (vectype),
-                              running_off, alias_off);
-
-             /* And store it to *running_off.  */
-             assign = gimple_build_assign (newref, elem);
-             vect_finish_stmt_generation (stmt, assign, gsi);
-
-             newoff = copy_ssa_name (running_off, NULL);
+             tree newoff = copy_ssa_name (running_off, NULL);
              incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
-                                         running_off, stride_step);
+                                         running_off, pos);
              vect_finish_stmt_generation (stmt, incr, gsi);
-
              running_off = newoff;
-             if (j == 0 && i == i)
-               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
+           }
+         for (j = 0; j < ncopies; j++)
+           {
+             /* We've set op and dt above, from gimple_assign_rhs1(stmt),
+                and first_stmt == stmt.  */
+             if (j == 0)
+               {
+                 if (slp)
+                   {
+                     vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
+                                        slp_node, -1);
+                     vec_oprnd = vec_oprnds[0];
+                   }
+                 else
+                   {
+                     gcc_assert (gimple_assign_single_p (next_stmt));
+                     op = gimple_assign_rhs1 (next_stmt);
+                     vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
+                   }
+               }
              else
-               STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
-             prev_stmt_info = vinfo_for_stmt (assign);
+               {
+                 if (slp)
+                   vec_oprnd = vec_oprnds[j];
+                 else
+                   {
+                     vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
+                     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
+                   }
+               }
+
+             for (i = 0; i < nstores; i++)
+               {
+                 tree newref, newoff;
+                 gimple *incr, *assign;
+                 tree size = TYPE_SIZE (ltype);
+                 /* Extract the i'th component.  */
+                 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
+                                         bitsize_int (i), size);
+                 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
+                                          size, pos);
+
+                 elem = force_gimple_operand_gsi (gsi, elem, true,
+                                                  NULL_TREE, true,
+                                                  GSI_SAME_STMT);
+
+                 newref = build2 (MEM_REF, ltype,
+                                  running_off, alias_off);
+
+                 /* And store it to *running_off.  */
+                 assign = gimple_build_assign (newref, elem);
+                 vect_finish_stmt_generation (stmt, assign, gsi);
+
+                 newoff = copy_ssa_name (running_off, NULL);
+                 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+                                             running_off, stride_step);
+                 vect_finish_stmt_generation (stmt, incr, gsi);
+
+                 running_off = newoff;
+                 if (g == group_size - 1
+                     && !slp)
+                   {
+                     if (j == 0 && i == 0)
+                       STMT_VINFO_VEC_STMT (stmt_info)
+                           = *vec_stmt = assign;
+                     else
+                       STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
+                     prev_stmt_info = vinfo_for_stmt (assign);
+                   }
+               }
            }
+         next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
        }
       return true;
     }
@@ -5416,7 +5839,6 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   prev_stmt_info = NULL;
   for (j = 0; j < ncopies; j++)
     {
-      gimple new_stmt;
 
       if (j == 0)
        {
@@ -5449,8 +5871,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                              && gimple_assign_single_p (next_stmt));
                  op = gimple_assign_rhs1 (next_stmt);
 
-                 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
-                                                           NULL);
+                 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
                  dr_chain.quick_push (vec_oprnd);
                  oprnds.quick_push (vec_oprnd);
                  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
@@ -5495,8 +5916,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
          for (i = 0; i < group_size; i++)
            {
              op = oprnds[i];
-             vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
-                                 &def, &dt);
+             vect_is_simple_use (op, vinfo, &def_stmt, &dt);
              vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
              dr_chain[i] = vec_oprnd;
              oprnds[i] = vec_oprnd;
@@ -5558,21 +5978,26 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                   vect_permute_store_chain().  */
                vec_oprnd = result_chain[i];
 
-             data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
-                                dataref_offset
-                                ? dataref_offset
-                                : build_int_cst (reference_alias_ptr_type
-                                                 (DR_REF (first_dr)), 0));
+             data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
+                                     dataref_ptr,
+                                     dataref_offset
+                                     ? dataref_offset
+                                     : build_int_cst (reference_alias_ptr_type
+                                                      (DR_REF (first_dr)), 0));
              align = TYPE_ALIGN_UNIT (vectype);
              if (aligned_access_p (first_dr))
                misalign = 0;
              else if (DR_MISALIGNMENT (first_dr) == -1)
                {
+                 if (DR_VECT_AUX (first_dr)->base_element_aligned)
+                   align = TYPE_ALIGN_UNIT (elem_type);
+                 else
+                   align = get_object_alignment (DR_REF (first_dr))
+                       / BITS_PER_UNIT;
+                 misalign = 0;
                  TREE_TYPE (data_ref)
                    = build_aligned_type (TREE_TYPE (data_ref),
-                                         TYPE_ALIGN (elem_type));
-                 align = TYPE_ALIGN_UNIT (elem_type);
-                 misalign = 0;
+                                         align * BITS_PER_UNIT);
                }
              else
                {
@@ -5581,7 +6006,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                                          TYPE_ALIGN (elem_type));
                  misalign = DR_MISALIGNMENT (first_dr);
                }
-             if (dataref_offset == NULL_TREE)
+             if (dataref_offset == NULL_TREE
+                 && TREE_CODE (dataref_ptr) == SSA_NAME)
                set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
                                        misalign);
 
@@ -5596,7 +6022,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                  tree new_temp = make_ssa_name (perm_dest);
 
                  /* Generate the permute statement.  */
-                 gimple perm_stmt 
+                 gimple *perm_stmt 
                    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
                                           vec_oprnd, perm_mask);
                  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
@@ -5676,12 +6102,12 @@ vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
    permuted vector variable.  */
 
 static tree
-permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
+permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
                      gimple_stmt_iterator *gsi)
 {
   tree vectype = TREE_TYPE (x);
   tree perm_dest, data_ref;
-  gimple perm_stmt;
+  gimple *perm_stmt;
 
   perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
   data_ref = make_ssa_name (perm_dest);
@@ -5699,7 +6125,7 @@ permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
    otherwise returns false.  */
 
 static bool
-hoist_defs_of_uses (gimple stmt, struct loop *loop)
+hoist_defs_of_uses (gimple *stmt, struct loop *loop)
 {
   ssa_op_iter i;
   tree op;
@@ -5707,7 +6133,7 @@ hoist_defs_of_uses (gimple stmt, struct loop *loop)
 
   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
     {
-      gimple def_stmt = SSA_NAME_DEF_STMT (op);
+      gimple *def_stmt = SSA_NAME_DEF_STMT (op);
       if (!gimple_nop_p (def_stmt)
          && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
        {
@@ -5721,7 +6147,7 @@ hoist_defs_of_uses (gimple stmt, struct loop *loop)
            return false;
          FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
            {
-             gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
+             gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
              if (!gimple_nop_p (def_stmt2)
                  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
                return false;
@@ -5735,7 +6161,7 @@ hoist_defs_of_uses (gimple stmt, struct loop *loop)
 
   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
     {
-      gimple def_stmt = SSA_NAME_DEF_STMT (op);
+      gimple *def_stmt = SSA_NAME_DEF_STMT (op);
       if (!gimple_nop_p (def_stmt)
          && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
        {
@@ -5757,7 +6183,7 @@ hoist_defs_of_uses (gimple stmt, struct loop *loop)
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                    slp_tree slp_node, slp_instance slp_node_instance)
 {
   tree scalar_dest;
@@ -5770,19 +6196,17 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
   bool nested_in_vect_loop = false;
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
-  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree elem_type;
   tree new_temp;
   machine_mode mode;
-  gimple new_stmt = NULL;
+  gimple *new_stmt = NULL;
   tree dummy;
   enum dr_alignment_support alignment_support_scheme;
   tree dataref_ptr = NULL_TREE;
   tree dataref_offset = NULL_TREE;
-  gimple ptr_incr = NULL;
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  gimple *ptr_incr = NULL;
   int ncopies;
-  int i, j, group_size = -1, group_gap;
+  int i, j, group_size = -1, group_gap_adj;
   tree msq = NULL_TREE, lsq;
   tree offset = NULL_TREE;
   tree byte_offset = NULL_TREE;
@@ -5791,7 +6215,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   vec<tree> dr_chain = vNULL;
   bool grouped_load = false;
   bool load_lanes_p = false;
-  gimple first_stmt;
+  gimple *first_stmt;
+  gimple *first_stmt_for_drptr = NULL;
   bool inv_p;
   bool negative = false;
   bool compute_in_loop = false;
@@ -5807,6 +6232,38 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
   int gather_scale = 1;
   enum vect_def_type gather_dt = vect_unknown_def_type;
+  vec_info *vinfo = stmt_info->vinfo;
+
+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+    return false;
+
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+    return false;
+
+  /* Is vectorizable load? */
+  if (!is_gimple_assign (stmt))
+    return false;
+
+  scalar_dest = gimple_assign_lhs (stmt);
+  if (TREE_CODE (scalar_dest) != SSA_NAME)
+    return false;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != ARRAY_REF
+      && code != BIT_FIELD_REF
+      && code != INDIRECT_REF
+      && code != COMPONENT_REF
+      && code != IMAGPART_EXPR
+      && code != REALPART_EXPR
+      && code != MEM_REF
+      && TREE_CODE_CLASS (code) != tcc_declaration)
+    return false;
+
+  if (!STMT_VINFO_DATA_REF (stmt_info))
+    return false;
+
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
   if (loop_vinfo)
     {
@@ -5850,36 +6307,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
       return false;
     }
 
-  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
-    return false;
-
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
-    return false;
-
-  /* Is vectorizable load? */
-  if (!is_gimple_assign (stmt))
-    return false;
-
-  scalar_dest = gimple_assign_lhs (stmt);
-  if (TREE_CODE (scalar_dest) != SSA_NAME)
-    return false;
-
-  code = gimple_assign_rhs_code (stmt);
-  if (code != ARRAY_REF
-      && code != BIT_FIELD_REF
-      && code != INDIRECT_REF
-      && code != COMPONENT_REF
-      && code != IMAGPART_EXPR
-      && code != REALPART_EXPR
-      && code != MEM_REF
-      && TREE_CODE_CLASS (code) != tcc_declaration)
-    return false;
-
-  if (!STMT_VINFO_DATA_REF (stmt_info))
-    return false;
-
-  elem_type = TREE_TYPE (vectype);
-  mode = TYPE_MODE (vectype);
+  elem_type = TREE_TYPE (vectype);
+  mode = TYPE_MODE (vectype);
 
   /* FORNOW. In some cases can vectorize even if data-type not supported
     (e.g. - data copies).  */
@@ -5896,7 +6325,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     {
       grouped_load = true;
       /* FORNOW */
-      gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
+      gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
 
       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
 
@@ -5904,17 +6333,50 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
          that leaves unused vector loads around punt - we at least create
         very sub-optimal code in that case (and blow up memory,
         see PR65518).  */
+      bool force_peeling = false;
       if (first_stmt == stmt
-         && !GROUP_NEXT_ELEMENT (stmt_info)
-         && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
+         && !GROUP_NEXT_ELEMENT (stmt_info))
+       {
+         if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "single-element interleaving not supported "
+                                "for not adjacent vector loads\n");
+             return false;
+           }
+
+         /* Single-element interleaving requires peeling for gaps.  */
+         force_peeling = true;
+       }
+
+      /* If there is a gap in the end of the group or the group size cannot
+         be made a multiple of the vector element count then we access excess
+        elements in the last iteration and thus need to peel that off.  */
+      if (loop_vinfo
+         && ! STMT_VINFO_STRIDED_P (stmt_info)
+         && (force_peeling
+             || GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
+             || (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "single-element interleaving not supported "
-                            "for not adjacent vector loads\n");
-         return false;
+                            "Data access with gaps requires scalar "
+                            "epilogue loop\n");
+         if (loop->inner)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "Peeling for outer loop is not supported\n");
+             return false;
+           }
+
+         LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
        }
 
+      if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+       slp_perm = true;
+
       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
       if (!slp
          && !PURE_SLP_STMT (stmt_info)
@@ -5956,16 +6418,14 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     }
 
 
-  if (STMT_VINFO_GATHER_P (stmt_info))
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     {
-      gimple def_stmt;
-      tree def;
-      gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
-                                      &gather_off, &gather_scale);
+      gimple *def_stmt;
+      gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
+                                              &gather_off, &gather_scale);
       gcc_assert (gather_decl);
-      if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
-                                &def_stmt, &def, &gather_dt,
-                                &gather_off_vectype))
+      if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
+                              &gather_off_vectype))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5978,12 +6438,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
       if ((grouped_load
           && (slp || PURE_SLP_STMT (stmt_info)))
          && (group_size > nunits
-             || nunits % group_size != 0
-             /* ???  During analysis phase we are not called with the
-                slp node/instance we are in so whether we'll end up
-                with a permutation we don't know.  Still we don't
-                support load permutations.  */
-             || slp_perm))
+             || nunits % group_size != 0))
        {
          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                           "unhandled strided group load\n");
@@ -6052,7 +6507,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
   ensure_base_align (stmt_info, dr);
 
-  if (STMT_VINFO_GATHER_P (stmt_info))
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     {
       tree vec_oprnd0 = NULL_TREE, op;
       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
@@ -6159,7 +6614,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                                       perm_mask, stmt, gsi);
          else if (j == 0)
            op = vec_oprnd0
-             = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
+             = vect_get_vec_def_for_operand (gather_off, stmt);
          else
            op = vec_oprnd0
              = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
@@ -6168,8 +6623,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
            {
              gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
                          == TYPE_VECTOR_SUBPARTS (idxtype));
-             var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
-             var = make_ssa_name (var);
+             var = vect_get_new_ssa_name (idxtype, vect_simple_var);
              op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
              new_stmt
                = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
@@ -6184,8 +6638,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
            {
              gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
                          == TYPE_VECTOR_SUBPARTS (rettype));
-             var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
-             op = make_ssa_name (var, new_stmt);
+             op = vect_get_new_ssa_name (rettype, vect_simple_var);
              gimple_call_set_lhs (new_stmt, op);
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
              var = make_ssa_name (vec_dest);
@@ -6225,7 +6678,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     {
       gimple_stmt_iterator incr_gsi;
       bool insert_after;
-      gimple incr;
+      gimple *incr;
       tree offvar;
       tree ivstep;
       tree running_off;
@@ -6235,13 +6688,19 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
       gcc_assert (!nested_in_vect_loop);
 
+      if (slp && grouped_load)
+       first_dr = STMT_VINFO_DATA_REF
+           (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
+      else
+       first_dr = dr;
+
       stride_base
        = fold_build_pointer_plus
-           (unshare_expr (DR_BASE_ADDRESS (dr)),
+           (DR_BASE_ADDRESS (first_dr),
             size_binop (PLUS_EXPR,
-                        convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
-                        convert_to_ptrofftype (DR_INIT (dr))));
-      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
+                        convert_to_ptrofftype (DR_OFFSET (first_dr)),
+                        convert_to_ptrofftype (DR_INIT (first_dr))));
+      stride_step = fold_convert (sizetype, DR_STEP (first_dr));
 
       /* For a load with loop-invariant (but other than power-of-2)
          stride (i.e. not a grouped access) like so:
@@ -6259,27 +6718,28 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
             vectemp = {tmp1, tmp2, ...}
          */
 
-      ivstep = stride_step;
-      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
-                           build_int_cst (TREE_TYPE (ivstep), vf));
+      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
+                           build_int_cst (TREE_TYPE (stride_step), vf));
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
-      create_iv (stride_base, ivstep, NULL,
+      create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
                 loop, &incr_gsi, insert_after,
                 &offvar, NULL);
       incr = gsi_stmt (incr_gsi);
-      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
 
-      stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
+      stride_step = force_gimple_operand (unshare_expr (stride_step),
+                                         &stmts, true, NULL_TREE);
       if (stmts)
        gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
 
       prev_stmt_info = NULL;
       running_off = offvar;
-      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
       int nloads = nunits;
       tree ltype = TREE_TYPE (vectype);
+      auto_vec<tree> dr_chain;
       if (slp)
        {
          nloads = nunits / group_size;
@@ -6289,7 +6749,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
            ltype = vectype;
          ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
          ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-         gcc_assert (!slp_perm);
+         if (slp_perm)
+           dr_chain.create (ncopies);
        }
       for (j = 0; j < ncopies; j++)
        {
@@ -6301,7 +6762,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
              for (i = 0; i < nloads; i++)
                {
                  tree newref, newoff;
-                 gimple incr;
+                 gimple *incr;
                  newref = build2 (MEM_REF, ltype, running_off, alias_off);
 
                  newref = force_gimple_operand_gsi (gsi, newref, true,
@@ -6328,7 +6789,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
 
              tree newoff = copy_ssa_name (running_off);
-             gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+             gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
                                          running_off, stride_step);
              vect_finish_stmt_generation (stmt, incr, gsi);
 
@@ -6336,23 +6797,37 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
            }
 
          if (slp)
-           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
-         if (j == 0)
-           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+           {
+             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+             if (slp_perm)
+               dr_chain.quick_push (gimple_assign_lhs (new_stmt));
+           }
          else
-           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
-         prev_stmt_info = vinfo_for_stmt (new_stmt);
+           {
+             if (j == 0)
+               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+             else
+               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+             prev_stmt_info = vinfo_for_stmt (new_stmt);
+           }
        }
+      if (slp_perm)
+       vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
+                                     slp_node_instance, false);
       return true;
     }
 
   if (grouped_load)
     {
       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
-      if (slp
-          && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
-         && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
-        first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+      /* For SLP vectorization we directly vectorize a subchain
+         without permutation.  */
+      if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+       first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+      /* For BB vectorization always use the first stmt to base
+        the data ref pointer on.  */
+      if (bb_vinfo)
+       first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
 
       /* Check if the chain of loads is already vectorized.  */
       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
@@ -6371,28 +6846,30 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
        }
       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+      group_gap_adj = 0;
 
       /* VEC_NUM is the number of vect stmts to be created for this group.  */
       if (slp)
        {
          grouped_load = false;
-         vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-          if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
-            slp_perm = true;
-         group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
+         /* For SLP permutation support we need to load the whole group,
+            not only the number of vector stmts the permutation result
+            fits in.  */
+         if (slp_perm)
+           vec_num = (group_size * vf + nunits - 1) / nunits;
+         else
+           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+         group_gap_adj = vf * group_size - nunits * vec_num;
        }
       else
-       {
-         vec_num = group_size;
-         group_gap = 0;
-       }
+       vec_num = group_size;
     }
   else
     {
       first_stmt = stmt;
       first_dr = dr;
       group_size = vec_num = 1;
-      group_gap = 0;
+      group_gap_adj = 0;
     }
 
   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
@@ -6562,6 +7039,24 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                                              (DR_REF (first_dr)), 0);
              inv_p = false;
            }
+         else if (first_stmt_for_drptr
+                  && first_stmt != first_stmt_for_drptr)
+           {
+             dataref_ptr
+               = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
+                                           at_loop, offset, &dummy, gsi,
+                                           &ptr_incr, simd_lane_access_p,
+                                           &inv_p, byte_offset);
+             /* Adjust the pointer by the difference to first_stmt.  */
+             data_reference_p ptrdr
+               = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
+             tree diff = fold_convert (sizetype,
+                                       size_binop (MINUS_EXPR,
+                                                   DR_INIT (first_dr),
+                                                   DR_INIT (ptrdr)));
+             dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+                                            stmt, diff);
+           }
          else
            dataref_ptr
              = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
@@ -6620,11 +7115,11 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                    unsigned int align, misalign;
 
                    data_ref
-                     = build2 (MEM_REF, vectype, dataref_ptr,
-                               dataref_offset
-                               ? dataref_offset
-                               : build_int_cst (reference_alias_ptr_type
-                                                (DR_REF (first_dr)), 0));
+                     = fold_build2 (MEM_REF, vectype, dataref_ptr,
+                                    dataref_offset
+                                    ? dataref_offset
+                                    : build_int_cst (reference_alias_ptr_type
+                                                     (DR_REF (first_dr)), 0));
                    align = TYPE_ALIGN_UNIT (vectype);
                    if (alignment_support_scheme == dr_aligned)
                      {
@@ -6633,11 +7128,15 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                      }
                    else if (DR_MISALIGNMENT (first_dr) == -1)
                      {
+                       if (DR_VECT_AUX (first_dr)->base_element_aligned)
+                         align = TYPE_ALIGN_UNIT (elem_type);
+                       else
+                         align = (get_object_alignment (DR_REF (first_dr))
+                                  / BITS_PER_UNIT);
+                       misalign = 0;
                        TREE_TYPE (data_ref)
                          = build_aligned_type (TREE_TYPE (data_ref),
-                                               TYPE_ALIGN (elem_type));
-                       align = TYPE_ALIGN_UNIT (elem_type);
-                       misalign = 0;
+                                               align * BITS_PER_UNIT);
                      }
                    else
                      {
@@ -6646,7 +7145,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                                                TYPE_ALIGN (elem_type));
                        misalign = DR_MISALIGNMENT (first_dr);
                      }
-                   if (dataref_offset == NULL_TREE)
+                   if (dataref_offset == NULL_TREE
+                       && TREE_CODE (dataref_ptr) == SSA_NAME)
                      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
                                              align, misalign);
                    break;
@@ -6663,7 +7163,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                                                    dr_explicit_realign,
                                                    dataref_ptr, NULL);
 
-                   ptr = copy_ssa_name (dataref_ptr);
+                   if (TREE_CODE (dataref_ptr) == SSA_NAME)
+                     ptr = copy_ssa_name (dataref_ptr);
+                   else
+                     ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
                    new_stmt = gimple_build_assign
                                 (ptr, BIT_AND_EXPR, dataref_ptr,
                                  build_int_cst
@@ -6693,7 +7196,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                                  build_int_cst
                                  (TREE_TYPE (ptr),
                                   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-                   ptr = copy_ssa_name (dataref_ptr, new_stmt);
+                   ptr = copy_ssa_name (ptr, new_stmt);
                    gimple_assign_set_lhs (new_stmt, ptr);
                    vect_finish_stmt_generation (stmt, new_stmt, gsi);
                    data_ref
@@ -6703,7 +7206,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                    break;
                  }
                case dr_explicit_realign_optimized:
-                 new_temp = copy_ssa_name (dataref_ptr);
+                 if (TREE_CODE (dataref_ptr) == SSA_NAME)
+                   new_temp = copy_ssa_name (dataref_ptr);
+                 else
+                   new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
                  new_stmt = gimple_build_assign
                               (new_temp, BIT_AND_EXPR, dataref_ptr,
                                build_int_cst
@@ -6777,6 +7283,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                                              unshare_expr
                                                (gimple_assign_rhs1 (stmt))));
                      new_temp = vect_init_vector (stmt, tem, vectype, NULL);
+                     new_stmt = SSA_NAME_DEF_STMT (new_temp);
+                     set_vinfo_for_stmt (new_stmt,
+                                         new_stmt_vec_info (new_stmt, vinfo));
                    }
                  else
                    {
@@ -6784,11 +7293,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
                      gsi_next (&gsi2);
                      new_temp = vect_init_vector (stmt, scalar_dest,
                                                   vectype, &gsi2);
+                     new_stmt = SSA_NAME_DEF_STMT (new_temp);
                    }
-                 new_stmt = SSA_NAME_DEF_STMT (new_temp);
-                 set_vinfo_for_stmt (new_stmt,
-                                     new_stmt_vec_info (new_stmt, loop_vinfo,
-                                                        bb_vinfo));
                }
 
              if (negative)
@@ -6808,12 +7314,15 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
              if (slp && !slp_perm)
                SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
            }
-         /* Bump the vector pointer to account for a gap.  */
-         if (slp && group_gap != 0)
+         /* Bump the vector pointer to account for a gap or for excess
+            elements loaded for a permuted SLP load.  */
+         if (group_gap_adj != 0)
            {
-             tree bump = size_binop (MULT_EXPR,
-                                     TYPE_SIZE_UNIT (elem_type),
-                                     size_int (group_gap));
+             bool ovf;
+             tree bump
+               = wide_int_to_tree (sizetype,
+                                   wi::smul (TYPE_SIZE_UNIT (elem_type),
+                                             group_gap_adj, &ovf));
              dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
                                             stmt, bump);
            }
@@ -6867,14 +7376,25 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
    condition operands are supportable using vec_is_simple_use.  */
 
 static bool
-vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
-                    bb_vec_info bb_vinfo, tree *comp_vectype)
+vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
 {
   tree lhs, rhs;
-  tree def;
   enum vect_def_type dt;
   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
 
+  /* Mask case.  */
+  if (TREE_CODE (cond) == SSA_NAME
+      && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
+    {
+      gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
+      if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
+                              &dt, comp_vectype)
+         || !*comp_vectype
+         || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
+       return false;
+      return true;
+    }
+
   if (!COMPARISON_CLASS_P (cond))
     return false;
 
@@ -6883,9 +7403,8 @@ vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
 
   if (TREE_CODE (lhs) == SSA_NAME)
     {
-      gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
-      if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
-                                &lhs_def_stmt, &def, &dt, &vectype1))
+      gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
+      if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
        return false;
     }
   else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
@@ -6894,9 +7413,8 @@ vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
 
   if (TREE_CODE (rhs) == SSA_NAME)
     {
-      gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
-      if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
-                                &rhs_def_stmt, &def, &dt, &vectype2))
+      gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
+      if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
        return false;
     }
   else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
@@ -6916,29 +7434,26 @@ vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
 
    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
-   else caluse if it is 2).
+   else clause if it is 2).
 
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 bool
-vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
-                       gimple *vec_stmt, tree reduc_def, int reduc_index,
+vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
+                       gimple **vec_stmt, tree reduc_def, int reduc_index,
                        slp_tree slp_node)
 {
   tree scalar_dest = NULL_TREE;
   tree vec_dest = NULL_TREE;
   tree cond_expr, then_clause, else_clause;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree comp_vectype = NULL_TREE;
   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
   tree vec_compare, vec_cond_expr;
   tree new_temp;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-  tree def;
   enum vect_def_type dt, dts[4];
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
   int ncopies;
   enum tree_code code;
   stmt_vec_info prev_stmt_info = NULL;
@@ -6949,34 +7464,29 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
   vec<tree> vec_oprnds2 = vNULL;
   vec<tree> vec_oprnds3 = vNULL;
   tree vec_cmp_type;
-
-  if (slp_node || PURE_SLP_STMT (stmt_info))
-    ncopies = 1;
-  else
-    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
-
-  gcc_assert (ncopies >= 1);
-  if (reduc_index && ncopies > 1)
-    return false; /* FORNOW */
+  bool masked = false;
 
   if (reduc_index && STMT_SLP_TYPE (stmt_info))
     return false;
 
-  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
-    return false;
+  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
+    {
+      if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+       return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
-      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
-           && reduc_def))
-    return false;
+      if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+         && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+              && reduc_def))
+       return false;
 
-  /* FORNOW: not yet supported.  */
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                         "value used after loop.\n");
-      return false;
+      /* FORNOW: not yet supported.  */
+      if (STMT_VINFO_LIVE_P (stmt_info))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "value used after loop.\n");
+         return false;
+       }
     }
 
   /* Is vectorizable conditional operation?  */
@@ -6988,43 +7498,39 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
   if (code != COND_EXPR)
     return false;
 
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+
+  if (slp_node || PURE_SLP_STMT (stmt_info))
+    ncopies = 1;
+  else
+    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+
+  gcc_assert (ncopies >= 1);
+  if (reduc_index && ncopies > 1)
+    return false; /* FORNOW */
+
   cond_expr = gimple_assign_rhs1 (stmt);
   then_clause = gimple_assign_rhs2 (stmt);
   else_clause = gimple_assign_rhs3 (stmt);
 
-  if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
-                           &comp_vectype)
+  if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
       || !comp_vectype)
     return false;
 
-  if (TREE_CODE (then_clause) == SSA_NAME)
-    {
-      gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
-      if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
-                              &then_def_stmt, &def, &dt))
-       return false;
-    }
-  else if (TREE_CODE (then_clause) != INTEGER_CST
-          && TREE_CODE (then_clause) != REAL_CST
-          && TREE_CODE (then_clause) != FIXED_CST)
+  gimple *def_stmt;
+  if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt))
+    return false;
+  if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt))
     return false;
 
-  if (TREE_CODE (else_clause) == SSA_NAME)
+  if (VECTOR_BOOLEAN_TYPE_P (comp_vectype))
     {
-      gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
-      if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
-                              &else_def_stmt, &def, &dt))
-       return false;
+      vec_cmp_type = comp_vectype;
+      masked = true;
     }
-  else if (TREE_CODE (else_clause) != INTEGER_CST
-          && TREE_CODE (else_clause) != REAL_CST
-          && TREE_CODE (else_clause) != FIXED_CST)
-    return false;
-
-  unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
-  /* The result of a vector comparison should be signed type.  */
-  tree cmp_type = build_nonstandard_integer_type (prec, 0);
-  vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
+  else
+    vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
   if (vec_cmp_type == NULL_TREE)
     return false;
 
@@ -7059,14 +7565,20 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
               auto_vec<tree, 4> ops;
              auto_vec<vec<tree>, 4> vec_defs;
 
-              ops.safe_push (TREE_OPERAND (cond_expr, 0));
-              ops.safe_push (TREE_OPERAND (cond_expr, 1));
+             if (masked)
+                 ops.safe_push (cond_expr);
+             else
+               {
+                 ops.safe_push (TREE_OPERAND (cond_expr, 0));
+                 ops.safe_push (TREE_OPERAND (cond_expr, 1));
+               }
               ops.safe_push (then_clause);
               ops.safe_push (else_clause);
               vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
              vec_oprnds3 = vec_defs.pop ();
              vec_oprnds2 = vec_defs.pop ();
-             vec_oprnds1 = vec_defs.pop ();
+             if (!masked)
+               vec_oprnds1 = vec_defs.pop ();
              vec_oprnds0 = vec_defs.pop ();
 
               ops.release ();
@@ -7074,44 +7586,58 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
             }
           else
             {
-             gimple gtemp;
-             vec_cond_lhs =
-             vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
-                                           stmt, NULL);
-             vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
-                                 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
-
-             vec_cond_rhs =
-               vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
-                                               stmt, NULL);
-             vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
-                                 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
+             gimple *gtemp;
+             if (masked)
+               {
+                 vec_cond_lhs
+                   = vect_get_vec_def_for_operand (cond_expr, stmt,
+                                                   comp_vectype);
+                 vect_is_simple_use (cond_expr, stmt_info->vinfo,
+                                     &gtemp, &dts[0]);
+               }
+             else
+               {
+                 vec_cond_lhs =
+                   vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
+                                                 stmt, comp_vectype);
+                 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
+                                     loop_vinfo, &gtemp, &dts[0]);
+
+                 vec_cond_rhs =
+                   vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
+                                                 stmt, comp_vectype);
+                 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
+                                     loop_vinfo, &gtemp, &dts[1]);
+               }
              if (reduc_index == 1)
                vec_then_clause = reduc_def;
              else
                {
                  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
-                                                             stmt, NULL);
-                 vect_is_simple_use (then_clause, stmt, loop_vinfo,
-                                         NULL, &gtemp, &def, &dts[2]);
+                                                                 stmt);
+                 vect_is_simple_use (then_clause, loop_vinfo,
+                                     &gtemp, &dts[2]);
                }
              if (reduc_index == 2)
                vec_else_clause = reduc_def;
              else
                {
                  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
-                                                             stmt, NULL);
-                 vect_is_simple_use (else_clause, stmt, loop_vinfo,
-                                 NULL, &gtemp, &def, &dts[3]);
+                                                                 stmt);
+                 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
                }
            }
        }
       else
        {
-         vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
-                                                        vec_oprnds0.pop ());
-         vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
-                                                        vec_oprnds1.pop ());
+         vec_cond_lhs
+           = vect_get_vec_def_for_stmt_copy (dts[0],
+                                             vec_oprnds0.pop ());
+         if (!masked)
+           vec_cond_rhs
+             = vect_get_vec_def_for_stmt_copy (dts[1],
+                                               vec_oprnds1.pop ());
+
          vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
                                                            vec_oprnds2.pop ());
          vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
@@ -7121,7 +7647,8 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
       if (!slp_node)
         {
          vec_oprnds0.quick_push (vec_cond_lhs);
-         vec_oprnds1.quick_push (vec_cond_rhs);
+         if (!masked)
+           vec_oprnds1.quick_push (vec_cond_rhs);
          vec_oprnds2.quick_push (vec_then_clause);
          vec_oprnds3.quick_push (vec_else_clause);
        }
@@ -7129,12 +7656,17 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
       /* Arguments are ready.  Create the new vector stmt.  */
       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
         {
-          vec_cond_rhs = vec_oprnds1[i];
           vec_then_clause = vec_oprnds2[i];
           vec_else_clause = vec_oprnds3[i];
 
-         vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
-                               vec_cond_lhs, vec_cond_rhs);
+         if (masked)
+           vec_compare = vec_cond_lhs;
+         else
+           {
+             vec_cond_rhs = vec_oprnds1[i];
+             vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
+                                   vec_cond_lhs, vec_cond_rhs);
+           }
           vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
                         vec_compare, vec_then_clause, vec_else_clause);
 
@@ -7165,18 +7697,197 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
   return true;
 }
 
+/* vectorizable_comparison.
+
+   Check if STMT is comparison expression that can be vectorized.
+   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
+   comparison, put it in VEC_STMT, and insert it at GSI.
+
+   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
+
+bool
+vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
+                        gimple **vec_stmt, tree reduc_def,
+                        slp_tree slp_node)
+{
+  tree lhs, rhs1, rhs2;
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
+  tree new_temp;
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
+  unsigned nunits;
+  int ncopies;
+  enum tree_code code;
+  stmt_vec_info prev_stmt_info = NULL;
+  int i, j;
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  vec<tree> vec_oprnds0 = vNULL;
+  vec<tree> vec_oprnds1 = vNULL;
+  gimple *def_stmt;
+  tree mask_type;
+  tree mask;
+
+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+    return false;
+
+  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
+    return false;
+
+  mask_type = vectype;
+  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+
+  if (slp_node || PURE_SLP_STMT (stmt_info))
+    ncopies = 1;
+  else
+    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+
+  gcc_assert (ncopies >= 1);
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+          && reduc_def))
+    return false;
+
+  if (STMT_VINFO_LIVE_P (stmt_info))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "value used after loop.\n");
+      return false;
+    }
+
+  if (!is_gimple_assign (stmt))
+    return false;
+
+  code = gimple_assign_rhs_code (stmt);
+
+  if (TREE_CODE_CLASS (code) != tcc_comparison)
+    return false;
+
+  rhs1 = gimple_assign_rhs1 (stmt);
+  rhs2 = gimple_assign_rhs2 (stmt);
+
+  if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
+                          &dts[0], &vectype1))
+    return false;
+
+  if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
+                          &dts[1], &vectype2))
+    return false;
+
+  if (vectype1 && vectype2
+      && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
+    return false;
+
+  vectype = vectype1 ? vectype1 : vectype2;
+
+  /* Invariant comparison.  */
+  if (!vectype)
+    {
+      vectype = build_vector_type (TREE_TYPE (rhs1), nunits);
+      if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size)
+       return false;
+    }
+  else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
+    return false;
+
+  if (!vec_stmt)
+    {
+      STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
+      vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
+      return expand_vec_cmp_expr_p (vectype, mask_type);
+    }
+
+  /* Transform.  */
+  if (!slp_node)
+    {
+      vec_oprnds0.create (1);
+      vec_oprnds1.create (1);
+    }
+
+  /* Handle def.  */
+  lhs = gimple_assign_lhs (stmt);
+  mask = vect_create_destination_var (lhs, mask_type);
+
+  /* Handle cmp expr.  */
+  for (j = 0; j < ncopies; j++)
+    {
+      gassign *new_stmt = NULL;
+      if (j == 0)
+       {
+         if (slp_node)
+           {
+             auto_vec<tree, 2> ops;
+             auto_vec<vec<tree>, 2> vec_defs;
+
+             ops.safe_push (rhs1);
+             ops.safe_push (rhs2);
+             vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
+             vec_oprnds1 = vec_defs.pop ();
+             vec_oprnds0 = vec_defs.pop ();
+           }
+         else
+           {
+             vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
+             vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
+           }
+       }
+      else
+       {
+         vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
+                                                    vec_oprnds0.pop ());
+         vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
+                                                    vec_oprnds1.pop ());
+       }
+
+      if (!slp_node)
+       {
+         vec_oprnds0.quick_push (vec_rhs1);
+         vec_oprnds1.quick_push (vec_rhs2);
+       }
+
+      /* Arguments are ready.  Create the new vector stmt.  */
+      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
+       {
+         vec_rhs2 = vec_oprnds1[i];
+
+         new_temp = make_ssa_name (mask);
+         new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
+         vect_finish_stmt_generation (stmt, new_stmt, gsi);
+         if (slp_node)
+           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+       }
+
+      if (slp_node)
+       continue;
+
+      if (j == 0)
+       STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+      else
+       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+
+      prev_stmt_info = vinfo_for_stmt (new_stmt);
+    }
+
+  vec_oprnds0.release ();
+  vec_oprnds1.release ();
+
+  return true;
+}
 
 /* Make sure the statement is vectorizable.  */
 
 bool
-vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
+vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
   bool ok;
   tree scalar_type, vectype;
-  gimple pattern_stmt;
+  gimple *pattern_stmt;
   gimple_seq pattern_def_seq;
 
   if (dump_enabled_p ())
@@ -7261,7 +7972,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
 
       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
        {
-         gimple pattern_def_stmt = gsi_stmt (si);
+         gimple *pattern_def_stmt = gsi_stmt (si);
          if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
              || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
            {
@@ -7287,9 +7998,11 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
 
       case vect_reduction_def:
       case vect_nested_cycle:
-         gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
-                     || relevance == vect_used_in_outer_by_reduction
-                     || relevance == vect_unused_in_scope));
+         gcc_assert (!bb_vinfo
+                    && (relevance == vect_used_in_outer
+                        || relevance == vect_used_in_outer_by_reduction
+                        || relevance == vect_used_by_reduction
+                        || relevance == vect_unused_in_scope));
          break;
 
       case vect_induction_def:
@@ -7346,33 +8059,42 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
       *need_to_vectorize = true;
     }
 
-   ok = true;
-   if (!bb_vinfo
-       && (STMT_VINFO_RELEVANT_P (stmt_info)
-           || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
-      ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
-           || vectorizable_conversion (stmt, NULL, NULL, NULL)
-            || vectorizable_shift (stmt, NULL, NULL, NULL)
-            || vectorizable_operation (stmt, NULL, NULL, NULL)
-            || vectorizable_assignment (stmt, NULL, NULL, NULL)
-            || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
-           || vectorizable_call (stmt, NULL, NULL, NULL)
-            || vectorizable_store (stmt, NULL, NULL, NULL)
-            || vectorizable_reduction (stmt, NULL, NULL, NULL)
-            || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
-    else
-      {
-        if (bb_vinfo)
-         ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
-               || vectorizable_conversion (stmt, NULL, NULL, node)
-               || vectorizable_shift (stmt, NULL, NULL, node)
-                || vectorizable_operation (stmt, NULL, NULL, node)
-                || vectorizable_assignment (stmt, NULL, NULL, node)
-                || vectorizable_load (stmt, NULL, NULL, node, NULL)
-               || vectorizable_call (stmt, NULL, NULL, node)
-                || vectorizable_store (stmt, NULL, NULL, node)
-                || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
-      }
+  if (PURE_SLP_STMT (stmt_info) && !node)
+    {
+      dump_printf_loc (MSG_NOTE, vect_location,
+                      "handled only by SLP analysis\n");
+      return true;
+    }
+
+  ok = true;
+  if (!bb_vinfo
+      && (STMT_VINFO_RELEVANT_P (stmt_info)
+         || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
+    ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
+         || vectorizable_conversion (stmt, NULL, NULL, node)
+         || vectorizable_shift (stmt, NULL, NULL, node)
+         || vectorizable_operation (stmt, NULL, NULL, node)
+         || vectorizable_assignment (stmt, NULL, NULL, node)
+         || vectorizable_load (stmt, NULL, NULL, node, NULL)
+         || vectorizable_call (stmt, NULL, NULL, node)
+         || vectorizable_store (stmt, NULL, NULL, node)
+         || vectorizable_reduction (stmt, NULL, NULL, node)
+         || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
+         || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
+  else
+    {
+      if (bb_vinfo)
+       ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
+             || vectorizable_conversion (stmt, NULL, NULL, node)
+             || vectorizable_shift (stmt, NULL, NULL, node)
+             || vectorizable_operation (stmt, NULL, NULL, node)
+             || vectorizable_assignment (stmt, NULL, NULL, node)
+             || vectorizable_load (stmt, NULL, NULL, node, NULL)
+             || vectorizable_call (stmt, NULL, NULL, node)
+             || vectorizable_store (stmt, NULL, NULL, node)
+             || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
+             || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
+    }
 
   if (!ok)
     {
@@ -7418,15 +8140,17 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
 
 bool
-vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
+vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
                     bool *grouped_store, slp_tree slp_node,
                      slp_instance slp_node_instance)
 {
   bool is_store = false;
-  gimple vec_stmt = NULL;
+  gimple *vec_stmt = NULL;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   bool done;
 
+  gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+
   switch (STMT_VINFO_TYPE (stmt_info))
     {
     case type_demotion_vec_info_type:
@@ -7485,6 +8209,11 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
       gcc_assert (done);
       break;
 
+    case comparison_vec_info_type:
+      done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
+      gcc_assert (done);
+      break;
+
     case call_vec_info_type:
       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
       stmt = gsi_stmt (*gsi);
@@ -7514,6 +8243,12 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
        }
     }
 
+  /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
+     This would break hybrid SLP vectorization.  */
+  if (slp_node)
+    gcc_assert (!vec_stmt
+               && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
+
   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
      is being vectorized, but outside the immediately enclosing loop.  */
   if (vec_stmt
@@ -7530,7 +8265,7 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
       imm_use_iterator imm_iter;
       use_operand_p use_p;
       tree scalar_dest;
-      gimple exit_phi;
+      gimple *exit_phi;
 
       if (dump_enabled_p ())
         dump_printf_loc (MSG_NOTE, vect_location,
@@ -7574,10 +8309,10 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
    stmt_vec_info.  */
 
 void
-vect_remove_stores (gimple first_stmt)
+vect_remove_stores (gimple *first_stmt)
 {
-  gimple next = first_stmt;
-  gimple tmp;
+  gimple *next = first_stmt;
+  gimple *tmp;
   gimple_stmt_iterator next_si;
 
   while (next)
@@ -7603,16 +8338,14 @@ vect_remove_stores (gimple first_stmt)
    Create and initialize a new stmt_vec_info struct for STMT.  */
 
 stmt_vec_info
-new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
-                   bb_vec_info bb_vinfo)
+new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
 {
   stmt_vec_info res;
   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
 
   STMT_VINFO_TYPE (res) = undef_vec_info_type;
   STMT_VINFO_STMT (res) = stmt;
-  STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
-  STMT_VINFO_BB_VINFO (res) = bb_vinfo;
+  res->vinfo = vinfo;
   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
   STMT_VINFO_LIVE_P (res) = false;
   STMT_VINFO_VECTYPE (res) = NULL;
@@ -7622,6 +8355,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
   STMT_VINFO_RELATED_STMT (res) = NULL;
   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
   STMT_VINFO_DATA_REF (res) = NULL;
+  STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
 
   STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
   STMT_VINFO_DR_OFFSET (res) = NULL;
@@ -7664,10 +8398,10 @@ void
 free_stmt_vec_info_vec (void)
 {
   unsigned int i;
-  vec_void_p info;
+  stmt_vec_info info;
   FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
     if (info != NULL)
-      free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
+      free_stmt_vec_info (STMT_VINFO_STMT (info));
   gcc_assert (stmt_vec_info_vec.exists ());
   stmt_vec_info_vec.release ();
 }
@@ -7676,7 +8410,7 @@ free_stmt_vec_info_vec (void)
 /* Free stmt vectorization related info.  */
 
 void
-free_stmt_vec_info (gimple stmt)
+free_stmt_vec_info (gimple *stmt)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
@@ -7694,20 +8428,20 @@ free_stmt_vec_info (gimple stmt)
       if (patt_info)
        {
          gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
-         gimple patt_stmt = STMT_VINFO_STMT (patt_info);
+         gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
          gimple_set_bb (patt_stmt, NULL);
          tree lhs = gimple_get_lhs (patt_stmt);
-         if (TREE_CODE (lhs) == SSA_NAME)
+         if (lhs && TREE_CODE (lhs) == SSA_NAME)
            release_ssa_name (lhs);
          if (seq)
            {
              gimple_stmt_iterator si;
              for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
                {
-                 gimple seq_stmt = gsi_stmt (si);
+                 gimple *seq_stmt = gsi_stmt (si);
                  gimple_set_bb (seq_stmt, NULL);
-                 lhs = gimple_get_lhs (patt_stmt);
-                 if (TREE_CODE (lhs) == SSA_NAME)
+                 lhs = gimple_get_lhs (seq_stmt);
+                 if (lhs && TREE_CODE (lhs) == SSA_NAME)
                    release_ssa_name (lhs);
                  free_stmt_vec_info (seq_stmt);
                }
@@ -7813,6 +8547,23 @@ get_vectype_for_scalar_type (tree scalar_type)
   return vectype;
 }
 
+/* Function get_mask_type_for_scalar_type.
+
+   Returns the mask type corresponding to a result of comparison
+   of vectors of specified SCALAR_TYPE as supported by target.  */
+
+tree
+get_mask_type_for_scalar_type (tree scalar_type)
+{
+  tree vectype = get_vectype_for_scalar_type (scalar_type);
+
+  if (!vectype)
+    return NULL;
+
+  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
+                                 current_vector_size);
+}
+
 /* Function get_same_sized_vectype
 
    Returns a vector type corresponding to SCALAR_TYPE of size
@@ -7821,6 +8572,9 @@ get_vectype_for_scalar_type (tree scalar_type)
 tree
 get_same_sized_vectype (tree scalar_type, tree vector_type)
 {
+  if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
+    return build_same_sized_truth_vector_type (vector_type);
+
   return get_vectype_for_scalar_type_and_size
           (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
 }
@@ -7828,10 +8582,11 @@ get_same_sized_vectype (tree scalar_type, tree vector_type)
 /* Function vect_is_simple_use.
 
    Input:
-   LOOP_VINFO - the vect info of the loop that is being vectorized.
-   BB_VINFO - the vect info of the basic block that is being vectorized.
-   OPERAND - operand of STMT in the loop or bb.
-   DEF - the defining stmt in case OPERAND is an SSA_NAME.
+   VINFO - the vect info of the loop or basic block that is being vectorized.
+   OPERAND - operand in the loop or bb.
+   Output:
+   DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
+   DT - the type of definition
 
    Returns whether a stmt with OPERAND can be vectorized.
    For loops, supportable operands are constants, loop invariants, and operands
@@ -7842,19 +8597,11 @@ get_same_sized_vectype (tree scalar_type, tree vector_type)
    For now, operands defined outside the basic block are not supported.  */
 
 bool
-vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
-                    bb_vec_info bb_vinfo, gimple *def_stmt,
-                   tree *def, enum vect_def_type *dt)
+vect_is_simple_use (tree operand, vec_info *vinfo,
+                    gimple **def_stmt, enum vect_def_type *dt)
 {
-  basic_block bb;
-  stmt_vec_info stmt_vinfo;
-  struct loop *loop = NULL;
-
-  if (loop_vinfo)
-    loop = LOOP_VINFO_LOOP (loop_vinfo);
-
   *def_stmt = NULL;
-  *def = NULL_TREE;
+  *dt = vect_unknown_def_type;
 
   if (dump_enabled_p ())
     {
@@ -7872,63 +8619,37 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
 
   if (is_gimple_min_invariant (operand))
     {
-      *def = operand;
       *dt = vect_external_def;
       return true;
     }
 
-  if (TREE_CODE (operand) == PAREN_EXPR)
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
-      operand = TREE_OPERAND (operand, 0);
-    }
-
   if (TREE_CODE (operand) != SSA_NAME)
     {
       if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                         "not ssa-name.\n");
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "not ssa-name.\n");
       return false;
     }
 
-  *def_stmt = SSA_NAME_DEF_STMT (operand);
-  if (*def_stmt == NULL)
+  if (SSA_NAME_IS_DEFAULT_DEF (operand))
     {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                         "no def_stmt.\n");
-      return false;
+      *dt = vect_external_def;
+      return true;
     }
 
+  *def_stmt = SSA_NAME_DEF_STMT (operand);
   if (dump_enabled_p ())
     {
       dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
     }
 
-  /* Empty stmt is expected only in case of a function argument.
-     (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
-  if (gimple_nop_p (*def_stmt))
-    {
-      *def = operand;
-      *dt = vect_external_def;
-      return true;
-    }
-
-  bb = gimple_bb (*def_stmt);
-
-  if ((loop && !flow_bb_inside_loop_p (loop, bb))
-      || (!loop && bb != BB_VINFO_BB (bb_vinfo))
-      || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
+  if (! vect_stmt_in_region_p (vinfo, *def_stmt))
     *dt = vect_external_def;
   else
     {
-      stmt_vinfo = vinfo_for_stmt (*def_stmt);
-      if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
-       *dt = vect_external_def;
-      else
-       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
+      stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
+      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
     }
 
   if (dump_enabled_p ())
@@ -7966,10 +8687,7 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
        }
     }
 
-  if (*dt == vect_unknown_def_type
-      || (stmt
-         && *dt == vect_double_reduction_def
-         && gimple_code (stmt) != GIMPLE_PHI))
+  if (*dt == vect_unknown_def_type)
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7980,18 +8698,9 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
   switch (gimple_code (*def_stmt))
     {
     case GIMPLE_PHI:
-      *def = gimple_phi_result (*def_stmt);
-      break;
-
     case GIMPLE_ASSIGN:
-      *def = gimple_assign_lhs (*def_stmt);
-      break;
-
     case GIMPLE_CALL:
-      *def = gimple_call_lhs (*def_stmt);
-      if (*def != NULL)
-       break;
-      /* FALLTHRU */
+      break;
     default:
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -8002,9 +8711,9 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
   return true;
 }
 
-/* Function vect_is_simple_use_1.
+/* Function vect_is_simple_use.
 
-   Same as vect_is_simple_use_1 but also determines the vector operand
+   Same as vect_is_simple_use but also determines the vector operand
    type of OPERAND and stores it to *VECTYPE.  If the definition of
    OPERAND is vect_uninitialized_def, vect_constant_def or
    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
@@ -8012,12 +8721,10 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
    scalar operand.  */
 
 bool
-vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
-                     bb_vec_info bb_vinfo, gimple *def_stmt,
-                     tree *def, enum vect_def_type *dt, tree *vectype)
+vect_is_simple_use (tree operand, vec_info *vinfo,
+                   gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
 {
-  if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
-                          def, dt))
+  if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
     return false;
 
   /* Now get a vector type if the def is internal, otherwise supply
@@ -8072,7 +8779,7 @@ vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
    widening operation (short in the above example).  */
 
 bool
-supportable_widening_operation (enum tree_code code, gimple stmt,
+supportable_widening_operation (enum tree_code code, gimple *stmt,
                                tree vectype_out, tree vectype_in,
                                 enum tree_code *code1, enum tree_code *code2,
                                 int *multi_step_cvt,
@@ -8147,7 +8854,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
              by STMT is only directly used in the reduction statement.  */
           tree lhs = gimple_assign_lhs (stmt);
           use_operand_p dummy;
-          gimple use_stmt;
+          gimple *use_stmt;
           stmt_vec_info use_stmt_info = NULL;
           if (single_imm_use (lhs, &dummy, &use_stmt)
               && (use_stmt_info = vinfo_for_stmt (use_stmt))
@@ -8158,6 +8865,16 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
       c2 = VEC_WIDEN_MULT_HI_EXPR;
       break;
 
+    case DOT_PROD_EXPR:
+      c1 = DOT_PROD_EXPR;
+      c2 = DOT_PROD_EXPR;
+      break;
+
+    case SAD_EXPR:
+      c1 = SAD_EXPR;
+      c2 = SAD_EXPR;
+      break;
+
     case VEC_WIDEN_MULT_EVEN_EXPR:
       /* Support the recursion induced just above.  */
       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
@@ -8190,11 +8907,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
     }
 
   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
-    {
-      enum tree_code ctmp = c1;
-      c1 = c2;
-      c2 = ctmp;
-    }
+    std::swap (c1, c2);
 
   if (code == FIX_TRUNC_EXPR)
     {