PR c++/68795: fix uninitialized close_paren_loc in cp_parser_postfix_expression
[gcc.git] / gcc / tree-vect-stmts.c
index af203ab438ceb7abfa6d6a6ed5a9ff5f9683faec..465826e61945a3f136407bcddd4903780ff518c8 100644 (file)
@@ -1,5 +1,5 @@
 /* Statement Analysis and Transformation for Vectorization
-   Copyright (C) 2003-2015 Free Software Foundation, Inc.
+   Copyright (C) 2003-2016 Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com>
    and Ira Rosen <irar@il.ibm.com>
 
@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
 #include "builtins.h"
+#include "internal-fn.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -1299,7 +1300,25 @@ vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
     {
       if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
        {
-         if (CONSTANT_CLASS_P (val))
+         /* Scalar boolean value should be transformed into
+            all zeros or all ones value before building a vector.  */
+         if (VECTOR_BOOLEAN_TYPE_P (type))
+           {
+             tree true_val = build_all_ones_cst (TREE_TYPE (type));
+             tree false_val = build_zero_cst (TREE_TYPE (type));
+
+             if (CONSTANT_CLASS_P (val))
+               val = integer_zerop (val) ? false_val : true_val;
+             else
+               {
+                 new_temp = make_ssa_name (TREE_TYPE (type));
+                 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
+                                                  val, true_val, false_val);
+                 vect_init_vector_1 (stmt, init_stmt, gsi);
+                 val = new_temp;
+               }
+           }
+         else if (CONSTANT_CLASS_P (val))
            val = fold_convert (TREE_TYPE (type), val);
          else
            {
@@ -1641,27 +1660,33 @@ vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
     add_stmt_to_eh_lp (vec_stmt, lp_nr);
 }
 
-/* Checks if CALL can be vectorized in type VECTYPE.  Returns
-   a function declaration if the target has a vectorized version
-   of the function, or NULL_TREE if the function cannot be vectorized.  */
+/* We want to vectorize a call to combined function CFN with function
+   decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
+   as the types of all inputs.  Check whether this is possible using
+   an internal function, returning its code if so or IFN_LAST if not.  */
 
-tree
-vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
+static internal_fn
+vectorizable_internal_function (combined_fn cfn, tree fndecl,
+                               tree vectype_out, tree vectype_in)
 {
-  tree fndecl = gimple_call_fndecl (call);
-
-  /* We only handle functions that do not read or clobber memory -- i.e.
-     const or novops ones.  */
-  if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
-    return NULL_TREE;
-
-  if (!fndecl
-      || TREE_CODE (fndecl) != FUNCTION_DECL
-      || !DECL_BUILT_IN (fndecl))
-    return NULL_TREE;
-
-  return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
-                                                       vectype_in);
+  internal_fn ifn;
+  if (internal_fn_p (cfn))
+    ifn = as_internal_fn (cfn);
+  else
+    ifn = associated_internal_fn (fndecl);
+  if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
+    {
+      const direct_internal_fn_info &info = direct_internal_fn (ifn);
+      if (info.vectorizable)
+       {
+         tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
+         tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
+         if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
+                                             OPTIMIZE_FOR_SPEED))
+           return ifn;
+       }
+    }
+  return IFN_LAST;
 }
 
 
@@ -1688,6 +1713,8 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
   bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree rhs_vectype = NULL_TREE;
+  tree mask_vectype;
   tree elem_type;
   gimple *new_stmt;
   tree dummy;
@@ -1714,8 +1741,8 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
 
   is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
   mask = gimple_call_arg (stmt, 2);
-  if (TYPE_PRECISION (TREE_TYPE (mask))
-      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
+
+  if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
     return false;
 
   /* FORNOW. This restriction should be relaxed.  */
@@ -1744,6 +1771,25 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
   if (STMT_VINFO_STRIDED_P (stmt_info))
     return false;
 
+  if (TREE_CODE (mask) != SSA_NAME)
+    return false;
+
+  if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
+    return false;
+
+  if (!mask_vectype)
+    mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
+
+  if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
+    return false;
+
+  if (is_store)
+    {
+      tree rhs = gimple_call_arg (stmt, 3);
+      if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
+       return false;
+    }
+
   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     {
       gimple *def_stmt;
@@ -1775,22 +1821,13 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
                                 : DR_STEP (dr), size_zero_node) <= 0)
     return false;
   else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
-          || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
-    return false;
-
-  if (TREE_CODE (mask) != SSA_NAME)
-    return false;
-
-  if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt))
+          || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
+                                         TYPE_MODE (mask_vectype),
+                                         !is_store)
+          || (rhs_vectype
+              && !useless_type_conversion_p (vectype, rhs_vectype)))
     return false;
 
-  if (is_store)
-    {
-      tree rhs = gimple_call_arg (stmt, 3);
-      if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt))
-       return false;
-    }
-
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
@@ -1965,6 +2002,11 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
 
       /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
         from the IL.  */
+      if (STMT_VINFO_RELATED_STMT (stmt_info))
+       {
+         stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+         stmt_info = vinfo_for_stmt (stmt);
+       }
       tree lhs = gimple_call_lhs (stmt);
       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
       set_vinfo_for_stmt (new_stmt, stmt_info);
@@ -2016,10 +2058,11 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
            misalign = DR_MISALIGNMENT (dr);
          set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
                                  misalign);
+         tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
+                                   misalign ? misalign & -misalign : align);
          new_stmt
            = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
-                                         gimple_call_arg (stmt, 1),
-                                         vec_mask, vec_rhs);
+                                         ptr, vec_mask, vec_rhs);
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
          if (i == 0)
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
@@ -2065,10 +2108,11 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
            misalign = DR_MISALIGNMENT (dr);
          set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
                                  misalign);
+         tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
+                                   misalign ? misalign & -misalign : align);
          new_stmt
            = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
-                                         gimple_call_arg (stmt, 1),
-                                         vec_mask);
+                                         ptr, vec_mask);
          gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
          if (i == 0)
@@ -2083,6 +2127,11 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
     {
       /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
         from the IL.  */
+      if (STMT_VINFO_RELATED_STMT (stmt_info))
+       {
+         stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+         stmt_info = vinfo_for_stmt (stmt);
+       }
       tree lhs = gimple_call_lhs (stmt);
       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
       set_vinfo_for_stmt (new_stmt, stmt_info);
@@ -2094,6 +2143,31 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
   return true;
 }
 
+/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
+   integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
+   in a single step.  On success, store the binary pack code in
+   *CONVERT_CODE.  */
+
+static bool
+simple_integer_narrowing (tree vectype_out, tree vectype_in,
+                         tree_code *convert_code)
+{
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
+      || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
+    return false;
+
+  tree_code code;
+  int multi_step_cvt = 0;
+  auto_vec <tree, 8> interm_types;
+  if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
+                                       &code, &multi_step_cvt,
+                                       &interm_types)
+      || multi_step_cvt)
+    return false;
+
+  *convert_code = code;
+  return true;
+}
 
 /* Function vectorizable_call.
 
@@ -2241,15 +2315,48 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   else
     return false;
 
+  /* We only handle functions that do not read or clobber memory.  */
+  if (gimple_vuse (stmt))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "function reads from or writes to memory.\n");
+      return false;
+    }
+
   /* For now, we only vectorize functions if a target specific builtin
      is available.  TODO -- in some cases, it might be profitable to
      insert the calls for pieces of the vector, in order to be able
      to vectorize other operations in the loop.  */
-  fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
-  if (fndecl == NULL_TREE)
+  fndecl = NULL_TREE;
+  internal_fn ifn = IFN_LAST;
+  combined_fn cfn = gimple_call_combined_fn (stmt);
+  tree callee = gimple_call_fndecl (stmt);
+
+  /* First try using an internal function.  */
+  tree_code convert_code = ERROR_MARK;
+  if (cfn != CFN_LAST
+      && (modifier == NONE
+         || (modifier == NARROW
+             && simple_integer_narrowing (vectype_out, vectype_in,
+                                          &convert_code))))
+    ifn = vectorizable_internal_function (cfn, callee, vectype_out,
+                                         vectype_in);
+
+  /* If that fails, try asking for a target-specific built-in function.  */
+  if (ifn == IFN_LAST)
+    {
+      if (cfn != CFN_LAST)
+       fndecl = targetm.vectorize.builtin_vectorized_function
+         (cfn, vectype_out, vectype_in);
+      else
+       fndecl = targetm.vectorize.builtin_md_vectorized_function
+         (callee, vectype_out, vectype_in);
+    }
+
+  if (ifn == IFN_LAST && !fndecl)
     {
-      if (gimple_call_internal_p (stmt)
-         && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
+      if (cfn == CFN_GOMP_SIMD_LANE
          && !slp_node
          && loop_vinfo
          && LOOP_VINFO_LOOP (loop_vinfo)->simduid
@@ -2270,11 +2377,9 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
        }
     }
 
-  gcc_assert (!gimple_vuse (stmt));
-
   if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
-  else if (modifier == NARROW)
+  else if (modifier == NARROW && ifn == IFN_LAST)
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2290,6 +2395,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
                          "\n");
       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
+      if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
+       add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
+                      vec_promote_demote, stmt_info, 0, vect_body);
+
       return true;
     }
 
@@ -2303,9 +2412,9 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
 
   prev_stmt_info = NULL;
-  switch (modifier)
+  if (modifier == NONE || ifn != IFN_LAST)
     {
-    case NONE:
+      tree prev_res = NULL_TREE;
       for (j = 0; j < ncopies; ++j)
        {
          /* Build argument list for the vectorized call.  */
@@ -2333,9 +2442,30 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                      vec<tree> vec_oprndsk = vec_defs[k];
                      vargs[k] = vec_oprndsk[i];
                    }
-                 new_stmt = gimple_build_call_vec (fndecl, vargs);
-                 new_temp = make_ssa_name (vec_dest, new_stmt);
-                 gimple_call_set_lhs (new_stmt, new_temp);
+                 if (modifier == NARROW)
+                   {
+                     tree half_res = make_ssa_name (vectype_in);
+                     new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+                     gimple_call_set_lhs (new_stmt, half_res);
+                     vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                     if ((i & 1) == 0)
+                       {
+                         prev_res = half_res;
+                         continue;
+                       }
+                     new_temp = make_ssa_name (vec_dest);
+                     new_stmt = gimple_build_assign (new_temp, convert_code,
+                                                     prev_res, half_res);
+                   }
+                 else
+                   {
+                     if (ifn != IFN_LAST)
+                       new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+                     else
+                       new_stmt = gimple_build_call_vec (fndecl, vargs);
+                     new_temp = make_ssa_name (vec_dest, new_stmt);
+                     gimple_call_set_lhs (new_stmt, new_temp);
+                   }
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
                  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
                }
@@ -2379,25 +2509,42 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
              new_temp = make_ssa_name (vec_dest);
              new_stmt = gimple_build_assign (new_temp, new_var);
            }
+         else if (modifier == NARROW)
+           {
+             tree half_res = make_ssa_name (vectype_in);
+             new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+             gimple_call_set_lhs (new_stmt, half_res);
+             vect_finish_stmt_generation (stmt, new_stmt, gsi);
+             if ((j & 1) == 0)
+               {
+                 prev_res = half_res;
+                 continue;
+               }
+             new_temp = make_ssa_name (vec_dest);
+             new_stmt = gimple_build_assign (new_temp, convert_code,
+                                             prev_res, half_res);
+           }
          else
            {
-             new_stmt = gimple_build_call_vec (fndecl, vargs);
+             if (ifn != IFN_LAST)
+               new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+             else
+               new_stmt = gimple_build_call_vec (fndecl, vargs);
              new_temp = make_ssa_name (vec_dest, new_stmt);
              gimple_call_set_lhs (new_stmt, new_temp);
            }
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
 
-         if (j == 0)
+         if (j == (modifier == NARROW ? 1 : 0))
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
          else
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
 
          prev_stmt_info = vinfo_for_stmt (new_stmt);
        }
-
-      break;
-
-    case NARROW:
+    }
+  else if (modifier == NARROW)
+    {
       for (j = 0; j < ncopies; ++j)
        {
          /* Build argument list for the vectorized call.  */
@@ -2427,7 +2574,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                      vargs.quick_push (vec_oprndsk[i]);
                      vargs.quick_push (vec_oprndsk[i + 1]);
                    }
-                 new_stmt = gimple_build_call_vec (fndecl, vargs);
+                 if (ifn != IFN_LAST)
+                   new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+                 else
+                   new_stmt = gimple_build_call_vec (fndecl, vargs);
                  new_temp = make_ssa_name (vec_dest, new_stmt);
                  gimple_call_set_lhs (new_stmt, new_temp);
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -2479,13 +2629,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
        }
 
       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
-
-      break;
-
-    case WIDEN:
-      /* No current target implements this case.  */
-      return false;
     }
+  else
+    /* No current target implements this case.  */
+    return false;
 
   vargs.release ();
 
@@ -3556,12 +3703,13 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
               && SCALAR_FLOAT_TYPE_P (rhs_type))))
     return false;
 
-  if ((INTEGRAL_TYPE_P (lhs_type)
-       && (TYPE_PRECISION (lhs_type)
-          != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
-      || (INTEGRAL_TYPE_P (rhs_type)
-         && (TYPE_PRECISION (rhs_type)
-             != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
+  if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
+      && ((INTEGRAL_TYPE_P (lhs_type)
+          && (TYPE_PRECISION (lhs_type)
+              != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
+         || (INTEGRAL_TYPE_P (rhs_type)
+             && (TYPE_PRECISION (rhs_type)
+                 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3619,6 +3767,21 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
       return false;
     }
 
+  if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
+      && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
+    {
+      if (dump_enabled_p ())
+       {
+         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                           "can't convert between boolean and non "
+                          "boolean vectors");
+         dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
+          dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+       }
+
+      return false;
+    }
+
   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
   if (nunits_in < nunits_out)
@@ -4147,7 +4310,12 @@ vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
       /* But a conversion that does not change the bit-pattern is ok.  */
       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
            > TYPE_PRECISION (TREE_TYPE (op)))
-          && TYPE_UNSIGNED (TREE_TYPE (op))))
+          && TYPE_UNSIGNED (TREE_TYPE (op)))
+      /* Conversion between boolean types of different sizes is
+        a simple assignment in case their vectypes are same
+        boolean vectors.  */
+      && (!VECTOR_BOOLEAN_TYPE_P (vectype)
+         || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4688,8 +4856,9 @@ vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
 
   /* Most operations cannot handle bit-precision types without extra
      truncations.  */
-  if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
-       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+  if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
+      && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
+         != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
       /* Exception are bitwise binary operations.  */
       && code != BIT_IOR_EXPR
       && code != BIT_XOR_EXPR
@@ -5425,6 +5594,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
              group.  */
           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 
+         gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
          op = gimple_assign_rhs1 (first_stmt);
         } 
@@ -6046,6 +6216,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   bool grouped_load = false;
   bool load_lanes_p = false;
   gimple *first_stmt;
+  gimple *first_stmt_for_drptr = NULL;
   bool inv_p;
   bool negative = false;
   bool compute_in_loop = false;
@@ -6162,15 +6333,45 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
          that leaves unused vector loads around punt - we at least create
         very sub-optimal code in that case (and blow up memory,
         see PR65518).  */
+      bool force_peeling = false;
       if (first_stmt == stmt
-         && !GROUP_NEXT_ELEMENT (stmt_info)
-         && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
+         && !GROUP_NEXT_ELEMENT (stmt_info))
+       {
+         if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "single-element interleaving not supported "
+                                "for not adjacent vector loads\n");
+             return false;
+           }
+
+         /* Single-element interleaving requires peeling for gaps.  */
+         force_peeling = true;
+       }
+
+      /* If there is a gap in the end of the group or the group size cannot
+         be made a multiple of the vector element count then we access excess
+        elements in the last iteration and thus need to peel that off.  */
+      if (loop_vinfo
+         && ! STMT_VINFO_STRIDED_P (stmt_info)
+         && (force_peeling
+             || GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
+             || (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "single-element interleaving not supported "
-                            "for not adjacent vector loads\n");
-         return false;
+                            "Data access with gaps requires scalar "
+                            "epilogue loop\n");
+         if (loop->inner)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "Peeling for outer loop is not supported\n");
+             return false;
+           }
+
+         LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
        }
 
       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
@@ -6619,10 +6820,14 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   if (grouped_load)
     {
       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
-      if (slp
-          && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
-         && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
-        first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+      /* For SLP vectorization we directly vectorize a subchain
+         without permutation.  */
+      if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+       first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+      /* For BB vectorization always use the first stmt to base
+        the data ref pointer on.  */
+      if (bb_vinfo)
+       first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
 
       /* Check if the chain of loads is already vectorized.  */
       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
@@ -6834,6 +7039,24 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                                              (DR_REF (first_dr)), 0);
              inv_p = false;
            }
+         else if (first_stmt_for_drptr
+                  && first_stmt != first_stmt_for_drptr)
+           {
+             dataref_ptr
+               = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
+                                           at_loop, offset, &dummy, gsi,
+                                           &ptr_incr, simd_lane_access_p,
+                                           &inv_p, byte_offset);
+             /* Adjust the pointer by the difference to first_stmt.  */
+             data_reference_p ptrdr
+               = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
+             tree diff = fold_convert (sizetype,
+                                       size_binop (MINUS_EXPR,
+                                                   DR_INIT (first_dr),
+                                                   DR_INIT (ptrdr)));
+             dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+                                            stmt, diff);
+           }
          else
            dataref_ptr
              = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
@@ -7060,6 +7283,9 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                                              unshare_expr
                                                (gimple_assign_rhs1 (stmt))));
                      new_temp = vect_init_vector (stmt, tem, vectype, NULL);
+                     new_stmt = SSA_NAME_DEF_STMT (new_temp);
+                     set_vinfo_for_stmt (new_stmt,
+                                         new_stmt_vec_info (new_stmt, vinfo));
                    }
                  else
                    {
@@ -7067,10 +7293,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                      gsi_next (&gsi2);
                      new_temp = vect_init_vector (stmt, scalar_dest,
                                                   vectype, &gsi2);
+                     new_stmt = SSA_NAME_DEF_STMT (new_temp);
                    }
-                 new_stmt = SSA_NAME_DEF_STMT (new_temp);
-                 set_vinfo_for_stmt (new_stmt,
-                                     new_stmt_vec_info (new_stmt, vinfo));
                }
 
              if (negative)
@@ -7158,6 +7382,19 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
   enum vect_def_type dt;
   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
 
+  /* Mask case.  */
+  if (TREE_CODE (cond) == SSA_NAME
+      && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
+    {
+      gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
+      if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
+                              &dt, comp_vectype)
+         || !*comp_vectype
+         || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
+       return false;
+      return true;
+    }
+
   if (!COMPARISON_CLASS_P (cond))
     return false;
 
@@ -7227,6 +7464,7 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
   vec<tree> vec_oprnds2 = vNULL;
   vec<tree> vec_oprnds3 = vNULL;
   tree vec_cmp_type;
+  bool masked = false;
 
   if (reduc_index && STMT_SLP_TYPE (stmt_info))
     return false;
@@ -7286,7 +7524,13 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt))
     return false;
 
-  vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
+  if (VECTOR_BOOLEAN_TYPE_P (comp_vectype))
+    {
+      vec_cmp_type = comp_vectype;
+      masked = true;
+    }
+  else
+    vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
   if (vec_cmp_type == NULL_TREE)
     return false;
 
@@ -7321,14 +7565,20 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
               auto_vec<tree, 4> ops;
              auto_vec<vec<tree>, 4> vec_defs;
 
-              ops.safe_push (TREE_OPERAND (cond_expr, 0));
-              ops.safe_push (TREE_OPERAND (cond_expr, 1));
+             if (masked)
+                 ops.safe_push (cond_expr);
+             else
+               {
+                 ops.safe_push (TREE_OPERAND (cond_expr, 0));
+                 ops.safe_push (TREE_OPERAND (cond_expr, 1));
+               }
               ops.safe_push (then_clause);
               ops.safe_push (else_clause);
               vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
              vec_oprnds3 = vec_defs.pop ();
              vec_oprnds2 = vec_defs.pop ();
-             vec_oprnds1 = vec_defs.pop ();
+             if (!masked)
+               vec_oprnds1 = vec_defs.pop ();
              vec_oprnds0 = vec_defs.pop ();
 
               ops.release ();
@@ -7337,17 +7587,28 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
           else
             {
              gimple *gtemp;
-             vec_cond_lhs =
-               vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
-                                             stmt, comp_vectype);
-             vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
-                                 loop_vinfo, &gtemp, &dts[0]);
-
-             vec_cond_rhs =
-               vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
-                                             stmt, comp_vectype);
-             vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
-                                 loop_vinfo, &gtemp, &dts[1]);
+             if (masked)
+               {
+                 vec_cond_lhs
+                   = vect_get_vec_def_for_operand (cond_expr, stmt,
+                                                   comp_vectype);
+                 vect_is_simple_use (cond_expr, stmt_info->vinfo,
+                                     &gtemp, &dts[0]);
+               }
+             else
+               {
+                 vec_cond_lhs =
+                   vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
+                                                 stmt, comp_vectype);
+                 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
+                                     loop_vinfo, &gtemp, &dts[0]);
+
+                 vec_cond_rhs =
+                   vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
+                                                 stmt, comp_vectype);
+                 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
+                                     loop_vinfo, &gtemp, &dts[1]);
+               }
              if (reduc_index == 1)
                vec_then_clause = reduc_def;
              else
@@ -7369,10 +7630,14 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
        }
       else
        {
-         vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
-                                                        vec_oprnds0.pop ());
-         vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
-                                                        vec_oprnds1.pop ());
+         vec_cond_lhs
+           = vect_get_vec_def_for_stmt_copy (dts[0],
+                                             vec_oprnds0.pop ());
+         if (!masked)
+           vec_cond_rhs
+             = vect_get_vec_def_for_stmt_copy (dts[1],
+                                               vec_oprnds1.pop ());
+
          vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
                                                            vec_oprnds2.pop ());
          vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
@@ -7382,7 +7647,8 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
       if (!slp_node)
         {
          vec_oprnds0.quick_push (vec_cond_lhs);
-         vec_oprnds1.quick_push (vec_cond_rhs);
+         if (!masked)
+           vec_oprnds1.quick_push (vec_cond_rhs);
          vec_oprnds2.quick_push (vec_then_clause);
          vec_oprnds3.quick_push (vec_else_clause);
        }
@@ -7390,12 +7656,17 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
       /* Arguments are ready.  Create the new vector stmt.  */
       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
         {
-          vec_cond_rhs = vec_oprnds1[i];
           vec_then_clause = vec_oprnds2[i];
           vec_else_clause = vec_oprnds3[i];
 
-         vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
-                               vec_cond_lhs, vec_cond_rhs);
+         if (masked)
+           vec_compare = vec_cond_lhs;
+         else
+           {
+             vec_cond_rhs = vec_oprnds1[i];
+             vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
+                                   vec_cond_lhs, vec_cond_rhs);
+           }
           vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
                         vec_compare, vec_then_clause, vec_else_clause);
 
@@ -7459,6 +7730,9 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
   tree mask_type;
   tree mask;
 
+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+    return false;
+
   if (!VECTOR_BOOLEAN_TYPE_P (vectype))
     return false;
 
@@ -7471,9 +7745,6 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
 
   gcc_assert (ncopies >= 1);
-  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
-    return false;
-
   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
           && reduc_def))
@@ -7559,8 +7830,8 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
            }
          else
            {
-             vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, NULL);
-             vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, NULL);
+             vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
+             vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
            }
        }
       else
@@ -8160,7 +8431,7 @@ free_stmt_vec_info (gimple *stmt)
          gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
          gimple_set_bb (patt_stmt, NULL);
          tree lhs = gimple_get_lhs (patt_stmt);
-         if (TREE_CODE (lhs) == SSA_NAME)
+         if (lhs && TREE_CODE (lhs) == SSA_NAME)
            release_ssa_name (lhs);
          if (seq)
            {
@@ -8170,7 +8441,7 @@ free_stmt_vec_info (gimple *stmt)
                  gimple *seq_stmt = gsi_stmt (si);
                  gimple_set_bb (seq_stmt, NULL);
                  lhs = gimple_get_lhs (seq_stmt);
-                 if (TREE_CODE (lhs) == SSA_NAME)
+                 if (lhs && TREE_CODE (lhs) == SSA_NAME)
                    release_ssa_name (lhs);
                  free_stmt_vec_info (seq_stmt);
                }
@@ -8378,10 +8649,7 @@ vect_is_simple_use (tree operand, vec_info *vinfo,
   else
     {
       stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
-      if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
-       *dt = vect_external_def;
-      else
-       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
+      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
     }
 
   if (dump_enabled_p ())