md.texi (vec_load_lanes, [...]): Document.
authorRichard Sandiford <richard.sandiford@linaro.org>
Wed, 20 Apr 2011 09:10:36 +0000 (09:10 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Wed, 20 Apr 2011 09:10:36 +0000 (09:10 +0000)
gcc/
* doc/md.texi (vec_load_lanes, vec_store_lanes): Document.
* optabs.h (COI_vec_load_lanes, COI_vec_store_lanes): New
convert_optab_index values.
(vec_load_lanes_optab, vec_store_lanes_optab): New convert optabs.
* genopinit.c (optabs): Initialize the new optabs.
* internal-fn.def (LOAD_LANES, STORE_LANES): New internal functions.
* internal-fn.c (get_multi_vector_move, expand_LOAD_LANES)
(expand_STORE_LANES): New functions.
* tree.h (build_array_type_nelts): Declare.
* tree.c (build_array_type_nelts): New function.
* tree-vectorizer.h (vect_model_store_cost): Add a bool argument.
(vect_model_load_cost): Likewise.
(vect_store_lanes_supported, vect_load_lanes_supported)
(vect_record_strided_load_vectors): Declare.
* tree-vect-data-refs.c (vect_lanes_optab_supported_p)
(vect_store_lanes_supported, vect_load_lanes_supported): New functions.
(vect_transform_strided_load): Split out statement recording into...
(vect_record_strided_load_vectors): ...this new function.
* tree-vect-stmts.c (create_vector_array, read_vector_array)
(write_vector_array, create_array_ref): New functions.
(vect_model_store_cost): Add store_lanes_p argument.
(vect_model_load_cost): Add load_lanes_p argument.
(vectorizable_store): Try to use store-lanes functions for
interleaved stores.
(vectorizable_load): Likewise load-lanes and loads.
* tree-vect-slp.c (vect_get_and_check_slp_defs)
(vect_build_slp_tree):

From-SVN: r172760

12 files changed:
gcc/ChangeLog
gcc/doc/md.texi
gcc/genopinit.c
gcc/internal-fn.c
gcc/internal-fn.def
gcc/optabs.h
gcc/tree-vect-data-refs.c
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h
gcc/tree.c
gcc/tree.h

index 3474847c9d1ddab7cb8abdec5cab8ae166d228dd..7e751adf07a1fd3b90a56ffb0f8d0d9fa3664e92 100644 (file)
@@ -1,3 +1,33 @@
+2011-04-20  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * doc/md.texi (vec_load_lanes, vec_store_lanes): Document.
+       * optabs.h (COI_vec_load_lanes, COI_vec_store_lanes): New
+       convert_optab_index values.
+       (vec_load_lanes_optab, vec_store_lanes_optab): New convert optabs.
+       * genopinit.c (optabs): Initialize the new optabs.
+       * internal-fn.def (LOAD_LANES, STORE_LANES): New internal functions.
+       * internal-fn.c (get_multi_vector_move, expand_LOAD_LANES)
+       (expand_STORE_LANES): New functions.
+       * tree.h (build_array_type_nelts): Declare.
+       * tree.c (build_array_type_nelts): New function.
+       * tree-vectorizer.h (vect_model_store_cost): Add a bool argument.
+       (vect_model_load_cost): Likewise.
+       (vect_store_lanes_supported, vect_load_lanes_supported)
+       (vect_record_strided_load_vectors): Declare.
+       * tree-vect-data-refs.c (vect_lanes_optab_supported_p)
+       (vect_store_lanes_supported, vect_load_lanes_supported): New functions.
+       (vect_transform_strided_load): Split out statement recording into...
+       (vect_record_strided_load_vectors): ...this new function.
+       * tree-vect-stmts.c (create_vector_array, read_vector_array)
+       (write_vector_array, create_array_ref): New functions.
+       (vect_model_store_cost): Add store_lanes_p argument.
+       (vect_model_load_cost): Add load_lanes_p argument.
+       (vectorizable_store): Try to use store-lanes functions for
+       interleaved stores.
+       (vectorizable_load): Likewise load-lanes and loads.
+       * tree-vect-slp.c (vect_get_and_check_slp_defs)
+       (vect_build_slp_tree):
+
 2011-04-20  Richard Sandiford  <richard.sandiford@linaro.org>
 
        * tree-vect-stmts.c (vectorizable_store): Only chain one related
index df99ad4d6261d079a90b4da5879c369ab2acc898..0e68db707486a9a22cf346400d75b56b6e7b44d4 100644 (file)
@@ -3846,6 +3846,48 @@ into consecutive memory locations.  Operand 0 is the first of the
 consecutive memory locations, operand 1 is the first register, and
 operand 2 is a constant: the number of consecutive registers.
 
+@cindex @code{vec_load_lanes@var{m}@var{n}} instruction pattern
+@item @samp{vec_load_lanes@var{m}@var{n}}
+Perform an interleaved load of several vectors from memory operand 1
+into register operand 0.  Both operands have mode @var{m}.  The register
+operand is viewed as holding consecutive vectors of mode @var{n},
+while the memory operand is a flat array that contains the same number
+of elements.  The operation is equivalent to:
+
+@smallexample
+int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
+for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
+  for (i = 0; i < c; i++)
+    operand0[i][j] = operand1[j * c + i];
+@end smallexample
+
+For example, @samp{vec_load_lanestiv4hi} loads 8 16-bit values
+from memory into a register of mode @samp{TI}@.  The register
+contains two consecutive vectors of mode @samp{V4HI}@.
+
+This pattern can only be used if:
+@smallexample
+TARGET_ARRAY_MODE_SUPPORTED_P (@var{n}, @var{c})
+@end smallexample
+is true.  GCC assumes that, if a target supports this kind of
+instruction for some mode @var{n}, it also supports unaligned
+loads for vectors of mode @var{n}.
+
+@cindex @code{vec_store_lanes@var{m}@var{n}} instruction pattern
+@item @samp{vec_store_lanes@var{m}@var{n}}
+Equivalent to @samp{vec_load_lanes@var{m}@var{n}}, with the memory
+and register operands reversed.  That is, the instruction is
+equivalent to:
+
+@smallexample
+int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
+for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
+  for (i = 0; i < c; i++)
+    operand0[j * c + i] = operand1[i][j];
+@end smallexample
+
+for a memory operand 0 and register operand 1.
+
 @cindex @code{vec_set@var{m}} instruction pattern
 @item @samp{vec_set@var{m}}
 Set given field in the vector value.  Operand 0 is the vector to modify,
index ab31968281c0a657031e683cf396a379be4ab705..35e994b024c5181e18d9e00cdacc41f999c09b11 100644 (file)
@@ -74,6 +74,8 @@ static const char * const optabs[] =
   "set_convert_optab_handler (fractuns_optab, $B, $A, CODE_FOR_$(fractuns$Q$a$I$b2$))",
   "set_convert_optab_handler (satfract_optab, $B, $A, CODE_FOR_$(satfract$a$Q$b2$))",
   "set_convert_optab_handler (satfractuns_optab, $B, $A, CODE_FOR_$(satfractuns$I$a$Q$b2$))",
+  "set_convert_optab_handler (vec_load_lanes_optab, $A, $B, CODE_FOR_$(vec_load_lanes$a$b$))",
+  "set_convert_optab_handler (vec_store_lanes_optab, $A, $B, CODE_FOR_$(vec_store_lanes$a$b$))",
   "set_optab_handler (add_optab, $A, CODE_FOR_$(add$P$a3$))",
   "set_optab_handler (addv_optab, $A, CODE_FOR_$(add$F$a3$)),\n\
     set_optab_handler (add_optab, $A, CODE_FOR_$(add$F$a3$))",
index 9f087b83105e2764ffeaddfd6394ad2ec9bcbf36..ddf4157d25a3df3e9087a88cc6d6b83de0be4645 100644 (file)
@@ -42,6 +42,73 @@ const int internal_fn_flags_array[] = {
   0
 };
 
+/* ARRAY_TYPE is an array of vector modes.  Return the associated insn
+   for load-lanes-style optab OPTAB.  The insn must exist.  */
+
+static enum insn_code
+get_multi_vector_move (tree array_type, convert_optab optab)
+{
+  enum insn_code icode;
+  enum machine_mode imode;
+  enum machine_mode vmode;
+
+  gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE);
+  imode = TYPE_MODE (array_type);
+  vmode = TYPE_MODE (TREE_TYPE (array_type));
+
+  icode = convert_optab_handler (optab, imode, vmode);
+  gcc_assert (icode != CODE_FOR_nothing);
+  return icode;
+}
+
+/* Expand LOAD_LANES call STMT.  */
+
+static void
+expand_LOAD_LANES (gimple stmt)
+{
+  struct expand_operand ops[2];
+  tree type, lhs, rhs;
+  rtx target, mem;
+
+  lhs = gimple_call_lhs (stmt);
+  rhs = gimple_call_arg (stmt, 0);
+  type = TREE_TYPE (lhs);
+
+  target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  mem = expand_normal (rhs);
+
+  gcc_assert (MEM_P (mem));
+  PUT_MODE (mem, TYPE_MODE (type));
+
+  create_output_operand (&ops[0], target, TYPE_MODE (type));
+  create_fixed_operand (&ops[1], mem);
+  expand_insn (get_multi_vector_move (type, vec_load_lanes_optab), 2, ops);
+}
+
+/* Expand STORE_LANES call STMT.  */
+
+static void
+expand_STORE_LANES (gimple stmt)
+{
+  struct expand_operand ops[2];
+  tree type, lhs, rhs;
+  rtx target, reg;
+
+  lhs = gimple_call_lhs (stmt);
+  rhs = gimple_call_arg (stmt, 0);
+  type = TREE_TYPE (rhs);
+
+  target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  reg = expand_normal (rhs);
+
+  gcc_assert (MEM_P (target));
+  PUT_MODE (target, TYPE_MODE (type));
+
+  create_fixed_operand (&ops[0], target);
+  create_input_operand (&ops[1], reg, TYPE_MODE (type));
+  expand_insn (get_multi_vector_move (type, vec_store_lanes_optab), 2, ops);
+}
+
 /* Routines to expand each internal function, indexed by function number.
    Each routine has the prototype:
 
index b9b622db71c6e7cce4f2044ce4e997a9c302ae9e..17ce304bff176e32146f24abc4308a074d5c91ef 100644 (file)
@@ -37,3 +37,6 @@ along with GCC; see the file COPYING3.  If not see
      void expand_NAME (gimple stmt)
 
    where STMT is the statement that performs the call.  */
+
+DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF)
+DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF)
index be61f549fd4ae6461d0b2b6b0d34c29b93369066..db6e65e89758c75d9fedf7fd9af22d3b0447cf8c 100644 (file)
@@ -578,6 +578,9 @@ enum convert_optab_index
   COI_satfract,
   COI_satfractuns,
 
+  COI_vec_load_lanes,
+  COI_vec_store_lanes,
+
   COI_MAX
 };
 
@@ -598,6 +601,8 @@ enum convert_optab_index
 #define fractuns_optab (&convert_optab_table[COI_fractuns])
 #define satfract_optab (&convert_optab_table[COI_satfract])
 #define satfractuns_optab (&convert_optab_table[COI_satfractuns])
+#define vec_load_lanes_optab (&convert_optab_table[COI_vec_load_lanes])
+#define vec_store_lanes_optab (&convert_optab_table[COI_vec_store_lanes])
 
 /* Contains the optab used for each rtx code.  */
 extern optab code_to_optab[NUM_RTX_CODE + 1];
index d7d174fbba2fb84be1d3e2c03274e242931dfd86..8f198c67a5a802d06b79ff1efe96e729722af8b2 100644 (file)
@@ -43,6 +43,45 @@ along with GCC; see the file COPYING3.  If not see
 #include "expr.h"
 #include "optabs.h"
 
+/* Return true if load- or store-lanes optab OPTAB is implemented for
+   COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
+
+static bool
+vect_lanes_optab_supported_p (const char *name, convert_optab optab,
+                             tree vectype, unsigned HOST_WIDE_INT count)
+{
+  enum machine_mode mode, array_mode;
+  bool limit_p;
+
+  mode = TYPE_MODE (vectype);
+  limit_p = !targetm.array_mode_supported_p (mode, count);
+  array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
+                             MODE_INT, limit_p);
+
+  if (array_mode == BLKmode)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
+                GET_MODE_NAME (mode), count);
+      return false;
+    }
+
+  if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "cannot use %s<%s><%s>",
+                name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
+      return false;
+    }
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "can use %s<%s><%s>",
+            name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
+
+  return true;
+}
+
+
 /* Return the smallest scalar part of STMT.
    This is used to determine the vectype of the stmt.  We generally set the
    vectype according to the type of the result (lhs).  For stmts whose
@@ -3376,6 +3415,18 @@ vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
 }
 
 
+/* Return TRUE if vec_store_lanes is available for COUNT vectors of
+   type VECTYPE.  */
+
+bool
+vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
+{
+  return vect_lanes_optab_supported_p ("vec_store_lanes",
+                                      vec_store_lanes_optab,
+                                      vectype, count);
+}
+
+
 /* Function vect_permute_store_chain.
 
    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
@@ -3830,6 +3881,16 @@ vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
   return true;
 }
 
+/* Return TRUE if vec_load_lanes is available for COUNT vectors of
+   type VECTYPE.  */
+
+bool
+vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
+{
+  return vect_lanes_optab_supported_p ("vec_load_lanes",
+                                      vec_load_lanes_optab,
+                                      vectype, count);
+}
 
 /* Function vect_permute_load_chain.
 
@@ -3977,19 +4038,28 @@ void
 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
                             gimple_stmt_iterator *gsi)
 {
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
-  gimple next_stmt, new_stmt;
   VEC(tree,heap) *result_chain = NULL;
-  unsigned int i, gap_count;
-  tree tmp_data_ref;
 
   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
      vectors, that are ready for vector computation.  */
   result_chain = VEC_alloc (tree, heap, size);
-  /* Permute.  */
   vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
+  vect_record_strided_load_vectors (stmt, result_chain);
+  VEC_free (tree, heap, result_chain);
+}
+
+/* RESULT_CHAIN contains the output of a group of strided loads that were
+   generated as part of the vectorization of STMT.  Assign the statement
+   for each vector to the associated scalar statement.  */
+
+void
+vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
+{
+  gimple first_stmt = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+  gimple next_stmt, new_stmt;
+  unsigned int i, gap_count;
+  tree tmp_data_ref;
 
   /* Put a permuted data-ref in the VECTORIZED_STMT field.
      Since we scan the chain starting from it's first node, their order
@@ -4051,8 +4121,6 @@ vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
            break;
         }
     }
-
-  VEC_free (tree, heap, result_chain);
 }
 
 /* Function vect_force_dr_alignment_p.
index 6eb67ae5a75e40fccdd9da6660efd92a0496e509..a9540c229a8154500156d14449c79a20e84ba033 100644 (file)
@@ -215,7 +215,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
            vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
          else
            /* Store.  */
-           vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
+           vect_model_store_cost (stmt_info, ncopies_for_cost, false,
+                                  dt[0], slp_node);
        }
 
       else
@@ -579,7 +580,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 
                   /* Analyze costs (for the first stmt in the group).  */
                   vect_model_load_cost (vinfo_for_stmt (stmt),
-                                        ncopies_for_cost, *node);
+                                        ncopies_for_cost, false, *node);
                 }
 
               /* Store the place of this load in the interleaving chain.  In
index 879153d8031bbf3e73aed9cdbe9a96e64f38b7d4..256f7f2e187dac26a544c9230e98a61d836b0dcc 100644 (file)
@@ -42,6 +42,82 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 
 
+/* Return a variable of type ELEM_TYPE[NELEMS].  */
+
+static tree
+create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
+{
+  return create_tmp_var (build_array_type_nelts (elem_type, nelems),
+                        "vect_array");
+}
+
+/* ARRAY is an array of vectors created by create_vector_array.
+   Return an SSA_NAME for the vector in index N.  The reference
+   is part of the vectorization of STMT and the vector is associated
+   with scalar destination SCALAR_DEST.  */
+
+static tree
+read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
+                  tree array, unsigned HOST_WIDE_INT n)
+{
+  tree vect_type, vect, vect_name, array_ref;
+  gimple new_stmt;
+
+  gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
+  vect_type = TREE_TYPE (TREE_TYPE (array));
+  vect = vect_create_destination_var (scalar_dest, vect_type);
+  array_ref = build4 (ARRAY_REF, vect_type, array,
+                     build_int_cst (size_type_node, n),
+                     NULL_TREE, NULL_TREE);
+
+  new_stmt = gimple_build_assign (vect, array_ref);
+  vect_name = make_ssa_name (vect, new_stmt);
+  gimple_assign_set_lhs (new_stmt, vect_name);
+  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+  mark_symbols_for_renaming (new_stmt);
+
+  return vect_name;
+}
+
+/* ARRAY is an array of vectors created by create_vector_array.
+   Emit code to store SSA_NAME VECT in index N of the array.
+   The store is part of the vectorization of STMT.  */
+
+static void
+write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
+                   tree array, unsigned HOST_WIDE_INT n)
+{
+  tree array_ref;
+  gimple new_stmt;
+
+  array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
+                     build_int_cst (size_type_node, n),
+                     NULL_TREE, NULL_TREE);
+
+  new_stmt = gimple_build_assign (array_ref, vect);
+  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+  mark_symbols_for_renaming (new_stmt);
+}
+
+/* PTR is a pointer to an array of type TYPE.  Return a representation
+   of *PTR.  The memory reference replaces those in FIRST_DR
+   (and its group).  */
+
+static tree
+create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
+{
+  struct ptr_info_def *pi;
+  tree mem_ref, alias_ptr_type;
+
+  alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
+  mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
+  /* Arrays have the same alignment as their type.  */
+  pi = get_ptr_info (ptr);
+  pi->align = TYPE_ALIGN_UNIT (type);
+  pi->misalign = 0;
+  return mem_ref;
+}
+
 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
 
 /* Function vect_mark_relevant.
@@ -648,7 +724,8 @@ vect_cost_strided_group_size (stmt_vec_info stmt_info)
 
 void
 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
-                      enum vect_def_type dt, slp_tree slp_node)
+                      bool store_lanes_p, enum vect_def_type dt,
+                      slp_tree slp_node)
 {
   int group_size;
   unsigned int inside_cost = 0, outside_cost = 0;
@@ -685,9 +762,11 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
       first_dr = STMT_VINFO_DATA_REF (stmt_info);
     }
 
-  /* Is this an access in a group of stores, which provide strided access?
-     If so, add in the cost of the permutes.  */
-  if (group_size > 1)
+  /* We assume that the cost of a single store-lanes instruction is
+     equivalent to the cost of GROUP_SIZE separate stores.  If a strided
+     access is instead being provided by a permute-and-store operation,
+     include the cost of the permutes.  */
+  if (!store_lanes_p && group_size > 1)
     {
       /* Uses a high and low interleave operation for each needed permute.  */
       inside_cost = ncopies * exact_log2(group_size) * group_size
@@ -763,8 +842,8 @@ vect_get_store_cost (struct data_reference *dr, int ncopies,
    access scheme chosen.  */
 
 void
-vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
-
+vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
+                     slp_tree slp_node)
 {
   int group_size;
   gimple first_stmt;
@@ -789,9 +868,11 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
       first_dr = dr;
     }
 
-  /* Is this an access in a group of loads providing strided access?
-     If so, add in the cost of the permutes.  */
-  if (group_size > 1)
+  /* We assume that the cost of a single load-lanes instruction is
+     equivalent to the cost of GROUP_SIZE separate loads.  If a strided
+     access is instead being provided by a load-and-permute operation,
+     include the cost of the permutes.  */
+  if (!load_lanes_p && group_size > 1)
     {
       /* Uses an even and odd extract operations for each needed permute.  */
       inside_cost = ncopies * exact_log2(group_size) * group_size
@@ -3329,6 +3410,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree elem_type;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop *loop = NULL;
   enum machine_mode vec_mode;
@@ -3344,6 +3426,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   int j;
   gimple next_stmt, first_stmt = NULL;
   bool strided_store = false;
+  bool store_lanes_p = false;
   unsigned int group_size, i;
   VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
   bool inv_p;
@@ -3351,6 +3434,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   bool slp = (slp_node != NULL);
   unsigned int vec_num;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  tree aggr_type;
 
   if (loop_vinfo)
     loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -3404,7 +3488,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
   /* The scalar rhs type needs to be trivially convertible to the vector
      component type.  This should always be the case.  */
-  if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
+  elem_type = TREE_TYPE (vectype);
+  if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "???  operands of different types");
@@ -3434,7 +3519,9 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
       if (!slp && !PURE_SLP_STMT (stmt_info))
        {
          group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
-         if (!vect_strided_store_supported (vectype, group_size))
+         if (vect_store_lanes_supported (vectype, group_size))
+           store_lanes_p = true;
+         else if (!vect_strided_store_supported (vectype, group_size))
            return false;
        }
 
@@ -3462,7 +3549,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
-      vect_model_store_cost (stmt_info, ncopies, dt, NULL);
+      vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
       return true;
     }
 
@@ -3517,6 +3604,16 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
   gcc_assert (alignment_support_scheme);
+  /* Targets with store-lane instructions must not require explicit
+     realignment.  */
+  gcc_assert (!store_lanes_p
+             || alignment_support_scheme == dr_aligned
+             || alignment_support_scheme == dr_unaligned_supported);
+
+  if (store_lanes_p)
+    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
+  else
+    aggr_type = vectype;
 
   /* In case the vectorization factor (VF) is bigger than the number
      of elements that we can fit in a vectype (nunits), we have to generate
@@ -3605,7 +3702,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
          /* We should have catched mismatched types earlier.  */
          gcc_assert (useless_type_conversion_p (vectype,
                                                 TREE_TYPE (vec_oprnd)));
-         dataref_ptr = vect_create_data_ref_ptr (first_stmt, vectype, NULL,
+         dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
                                                  NULL_TREE, &dummy, gsi,
                                                  &ptr_incr, false, &inv_p);
          gcc_assert (bb_vinfo || !inv_p);
@@ -3628,70 +3725,93 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
              VEC_replace(tree, dr_chain, i, vec_oprnd);
              VEC_replace(tree, oprnds, i, vec_oprnd);
            }
-         dataref_ptr =
-               bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
+         dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
+                                        TYPE_SIZE_UNIT (aggr_type));
        }
 
-      new_stmt = NULL;
-      if (strided_store)
+      if (store_lanes_p)
        {
-         result_chain = VEC_alloc (tree, heap, group_size);
-         /* Permute.  */
-         vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
-                                   &result_chain);
-       }
+         tree vec_array;
 
-      next_stmt = first_stmt;
-      for (i = 0; i < vec_num; i++)
-       {
-         struct ptr_info_def *pi;
-
-         if (i > 0)
-           /* Bump the vector pointer.  */
-           dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
-                                          NULL_TREE);
-
-         if (slp)
-           vec_oprnd = VEC_index (tree, vec_oprnds, i);
-         else if (strided_store)
-           /* For strided stores vectorized defs are interleaved in
-              vect_permute_store_chain().  */
-           vec_oprnd = VEC_index (tree, result_chain, i);
-
-         data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
-                            build_int_cst (reference_alias_ptr_type
-                                           (DR_REF (first_dr)), 0));
-         pi = get_ptr_info (dataref_ptr);
-         pi->align = TYPE_ALIGN_UNIT (vectype);
-          if (aligned_access_p (first_dr))
-           pi->misalign = 0;
-          else if (DR_MISALIGNMENT (first_dr) == -1)
+         /* Combine all the vectors into an array.  */
+         vec_array = create_vector_array (vectype, vec_num);
+         for (i = 0; i < vec_num; i++)
            {
-             TREE_TYPE (data_ref)
-               = build_aligned_type (TREE_TYPE (data_ref),
-                                     TYPE_ALIGN (TREE_TYPE (vectype)));
-             pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
-             pi->misalign = 0;
-           }
-         else
-           {
-             TREE_TYPE (data_ref)
-               = build_aligned_type (TREE_TYPE (data_ref),
-                                     TYPE_ALIGN (TREE_TYPE (vectype)));
-             pi->misalign = DR_MISALIGNMENT (first_dr);
+             vec_oprnd = VEC_index (tree, dr_chain, i);
+             write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
            }
 
-         /* Arguments are ready.  Create the new vector stmt.  */
-         new_stmt = gimple_build_assign (data_ref, vec_oprnd);
+         /* Emit:
+              MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
+         data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
+         new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
+         gimple_call_set_lhs (new_stmt, data_ref);
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
          mark_symbols_for_renaming (new_stmt);
+       }
+      else
+       {
+         new_stmt = NULL;
+         if (strided_store)
+           {
+             result_chain = VEC_alloc (tree, heap, group_size);
+             /* Permute.  */
+             vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
+                                       &result_chain);
+           }
 
-          if (slp)
-            continue;
+         next_stmt = first_stmt;
+         for (i = 0; i < vec_num; i++)
+           {
+             struct ptr_info_def *pi;
+
+             if (i > 0)
+               /* Bump the vector pointer.  */
+               dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+                                              stmt, NULL_TREE);
+
+             if (slp)
+               vec_oprnd = VEC_index (tree, vec_oprnds, i);
+             else if (strided_store)
+               /* For strided stores vectorized defs are interleaved in
+                  vect_permute_store_chain().  */
+               vec_oprnd = VEC_index (tree, result_chain, i);
+
+             data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
+                                build_int_cst (reference_alias_ptr_type
+                                               (DR_REF (first_dr)), 0));
+             pi = get_ptr_info (dataref_ptr);
+             pi->align = TYPE_ALIGN_UNIT (vectype);
+             if (aligned_access_p (first_dr))
+               pi->misalign = 0;
+             else if (DR_MISALIGNMENT (first_dr) == -1)
+               {
+                 TREE_TYPE (data_ref)
+                   = build_aligned_type (TREE_TYPE (data_ref),
+                                         TYPE_ALIGN (elem_type));
+                 pi->align = TYPE_ALIGN_UNIT (elem_type);
+                 pi->misalign = 0;
+               }
+             else
+               {
+                 TREE_TYPE (data_ref)
+                   = build_aligned_type (TREE_TYPE (data_ref),
+                                         TYPE_ALIGN (elem_type));
+                 pi->misalign = DR_MISALIGNMENT (first_dr);
+               }
 
-         next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
-         if (!next_stmt)
-           break;
+             /* Arguments are ready.  Create the new vector stmt.  */
+             new_stmt = gimple_build_assign (data_ref, vec_oprnd);
+             vect_finish_stmt_generation (stmt, new_stmt, gsi);
+             mark_symbols_for_renaming (new_stmt);
+
+             if (slp)
+               continue;
+
+             next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
+             if (!next_stmt)
+               break;
+           }
        }
       if (!slp)
        {
@@ -3810,6 +3930,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   bool nested_in_vect_loop = false;
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree elem_type;
   tree new_temp;
   enum machine_mode mode;
   gimple new_stmt = NULL;
@@ -3826,6 +3947,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   gimple phi = NULL;
   VEC(tree,heap) *dr_chain = NULL;
   bool strided_load = false;
+  bool load_lanes_p = false;
   gimple first_stmt;
   tree scalar_type;
   bool inv_p;
@@ -3838,6 +3960,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   enum tree_code code;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
   int vf;
+  tree aggr_type;
 
   if (loop_vinfo)
     {
@@ -3914,7 +4037,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
   /* The vector component type needs to be trivially convertible to the
      scalar lhs.  This should always be the case.  */
-  if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
+  elem_type = TREE_TYPE (vectype);
+  if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "???  operands of different types");
@@ -3932,7 +4056,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
       if (!slp && !PURE_SLP_STMT (stmt_info))
        {
          group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
-         if (!vect_strided_load_supported (vectype, group_size))
+         if (vect_load_lanes_supported (vectype, group_size))
+           load_lanes_p = true;
+         else if (!vect_strided_load_supported (vectype, group_size))
            return false;
        }
     }
@@ -3959,7 +4085,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
-      vect_model_load_cost (stmt_info, ncopies, NULL);
+      vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
       return true;
     }
 
@@ -4000,6 +4126,11 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
   gcc_assert (alignment_support_scheme);
+  /* Targets with load-lane instructions must not require explicit
+     realignment.  */
+  gcc_assert (!load_lanes_p
+             || alignment_support_scheme == dr_aligned
+             || alignment_support_scheme == dr_unaligned_supported);
 
   /* In case the vectorization factor (VF) is bigger than the number
      of elements that we can fit in a vectype (nunits), we have to generate
@@ -4131,208 +4262,250 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   if (negative)
     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
 
+  if (load_lanes_p)
+    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
+  else
+    aggr_type = vectype;
+
   prev_stmt_info = NULL;
   for (j = 0; j < ncopies; j++)
     {
-      /* 1. Create the vector pointer update chain.  */
+      /* 1. Create the vector or array pointer update chain.  */
       if (j == 0)
-        dataref_ptr = vect_create_data_ref_ptr (first_stmt, vectype, at_loop,
+        dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
                                                offset, &dummy, gsi,
                                                &ptr_incr, false, &inv_p);
       else
-        dataref_ptr =
-               bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
+        dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
+                                      TYPE_SIZE_UNIT (aggr_type));
 
       if (strided_load || slp_perm)
        dr_chain = VEC_alloc (tree, heap, vec_num);
 
-      for (i = 0; i < vec_num; i++)
+      if (load_lanes_p)
        {
-         if (i > 0)
-           dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
-                                          NULL_TREE);
+         tree vec_array;
+
+         vec_array = create_vector_array (vectype, vec_num);
+
+         /* Emit:
+              VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
+         data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
+         new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
+         gimple_call_set_lhs (new_stmt, vec_array);
+         vect_finish_stmt_generation (stmt, new_stmt, gsi);
+         mark_symbols_for_renaming (new_stmt);
 
-         /* 2. Create the vector-load in the loop.  */
-         switch (alignment_support_scheme)
+         /* Extract each vector into an SSA_NAME.  */
+         for (i = 0; i < vec_num; i++)
            {
-           case dr_aligned:
-           case dr_unaligned_supported:
-             {
-               struct ptr_info_def *pi;
-               data_ref
-                 = build2 (MEM_REF, vectype, dataref_ptr,
-                           build_int_cst (reference_alias_ptr_type
-                                          (DR_REF (first_dr)), 0));
-               pi = get_ptr_info (dataref_ptr);
-               pi->align = TYPE_ALIGN_UNIT (vectype);
-               if (alignment_support_scheme == dr_aligned)
-                 {
-                   gcc_assert (aligned_access_p (first_dr));
-                   pi->misalign = 0;
-                 }
-               else if (DR_MISALIGNMENT (first_dr) == -1)
+             new_temp = read_vector_array (stmt, gsi, scalar_dest,
+                                           vec_array, i);
+             VEC_quick_push (tree, dr_chain, new_temp);
+           }
+
+         /* Record the mapping between SSA_NAMEs and statements.  */
+         vect_record_strided_load_vectors (stmt, dr_chain);
+       }
+      else
+       {
+         for (i = 0; i < vec_num; i++)
+           {
+             if (i > 0)
+               dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+                                              stmt, NULL_TREE);
+
+             /* 2. Create the vector-load in the loop.  */
+             switch (alignment_support_scheme)
+               {
+               case dr_aligned:
+               case dr_unaligned_supported:
                  {
-                   TREE_TYPE (data_ref)
-                     = build_aligned_type (TREE_TYPE (data_ref),
-                                           TYPE_ALIGN (TREE_TYPE (vectype)));
-                   pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
-                   pi->misalign = 0;
+                   struct ptr_info_def *pi;
+                   data_ref
+                     = build2 (MEM_REF, vectype, dataref_ptr,
+                               build_int_cst (reference_alias_ptr_type
+                                              (DR_REF (first_dr)), 0));
+                   pi = get_ptr_info (dataref_ptr);
+                   pi->align = TYPE_ALIGN_UNIT (vectype);
+                   if (alignment_support_scheme == dr_aligned)
+                     {
+                       gcc_assert (aligned_access_p (first_dr));
+                       pi->misalign = 0;
+                     }
+                   else if (DR_MISALIGNMENT (first_dr) == -1)
+                     {
+                       TREE_TYPE (data_ref)
+                         = build_aligned_type (TREE_TYPE (data_ref),
+                                               TYPE_ALIGN (elem_type));
+                       pi->align = TYPE_ALIGN_UNIT (elem_type);
+                       pi->misalign = 0;
+                     }
+                   else
+                     {
+                       TREE_TYPE (data_ref)
+                         = build_aligned_type (TREE_TYPE (data_ref),
+                                               TYPE_ALIGN (elem_type));
+                       pi->misalign = DR_MISALIGNMENT (first_dr);
+                     }
+                   break;
                  }
-               else
+               case dr_explicit_realign:
                  {
-                   TREE_TYPE (data_ref)
-                     = build_aligned_type (TREE_TYPE (data_ref),
-                                           TYPE_ALIGN (TREE_TYPE (vectype)));
-                   pi->misalign = DR_MISALIGNMENT (first_dr);
+                   tree ptr, bump;
+                   tree vs_minus_1;
+
+                   vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+
+                   if (compute_in_loop)
+                     msq = vect_setup_realignment (first_stmt, gsi,
+                                                   &realignment_token,
+                                                   dr_explicit_realign,
+                                                   dataref_ptr, NULL);
+
+                   new_stmt = gimple_build_assign_with_ops
+                                (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
+                                 build_int_cst
+                                 (TREE_TYPE (dataref_ptr),
+                                  -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+                   ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
+                   gimple_assign_set_lhs (new_stmt, ptr);
+                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                   data_ref
+                     = build2 (MEM_REF, vectype, ptr,
+                               build_int_cst (reference_alias_ptr_type
+                                                (DR_REF (first_dr)), 0));
+                   vec_dest = vect_create_destination_var (scalar_dest,
+                                                           vectype);
+                   new_stmt = gimple_build_assign (vec_dest, data_ref);
+                   new_temp = make_ssa_name (vec_dest, new_stmt);
+                   gimple_assign_set_lhs (new_stmt, new_temp);
+                   gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+                   gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                   msq = new_temp;
+
+                   bump = size_binop (MULT_EXPR, vs_minus_1,
+                                      TYPE_SIZE_UNIT (scalar_type));
+                   ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
+                   new_stmt = gimple_build_assign_with_ops
+                                (BIT_AND_EXPR, NULL_TREE, ptr,
+                                 build_int_cst
+                                 (TREE_TYPE (ptr),
+                                  -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+                   ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
+                   gimple_assign_set_lhs (new_stmt, ptr);
+                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                   data_ref
+                     = build2 (MEM_REF, vectype, ptr,
+                               build_int_cst (reference_alias_ptr_type
+                                                (DR_REF (first_dr)), 0));
+                   break;
                  }
-               break;
-             }
-           case dr_explicit_realign:
-             {
-               tree ptr, bump;
-               tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
-
-               if (compute_in_loop)
-                 msq = vect_setup_realignment (first_stmt, gsi,
-                                               &realignment_token,
-                                               dr_explicit_realign,
-                                               dataref_ptr, NULL);
-
-               new_stmt = gimple_build_assign_with_ops
-                            (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
-                             build_int_cst
-                               (TREE_TYPE (dataref_ptr),
-                                -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-               ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
-               gimple_assign_set_lhs (new_stmt, ptr);
-               vect_finish_stmt_generation (stmt, new_stmt, gsi);
-               data_ref
-                 = build2 (MEM_REF, vectype, ptr,
-                           build_int_cst (reference_alias_ptr_type
-                                            (DR_REF (first_dr)), 0));
-               vec_dest = vect_create_destination_var (scalar_dest, vectype);
-               new_stmt = gimple_build_assign (vec_dest, data_ref);
-               new_temp = make_ssa_name (vec_dest, new_stmt);
-               gimple_assign_set_lhs (new_stmt, new_temp);
-               gimple_set_vdef (new_stmt, gimple_vdef (stmt));
-               gimple_set_vuse (new_stmt, gimple_vuse (stmt));
-               vect_finish_stmt_generation (stmt, new_stmt, gsi);
-               msq = new_temp;
-
-               bump = size_binop (MULT_EXPR, vs_minus_1,
-                                  TYPE_SIZE_UNIT (scalar_type));
-               ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
-               new_stmt = gimple_build_assign_with_ops
-                            (BIT_AND_EXPR, NULL_TREE, ptr,
-                             build_int_cst
-                               (TREE_TYPE (ptr),
-                                -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-               ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
-               gimple_assign_set_lhs (new_stmt, ptr);
-               vect_finish_stmt_generation (stmt, new_stmt, gsi);
-               data_ref
-                 = build2 (MEM_REF, vectype, ptr,
-                           build_int_cst (reference_alias_ptr_type
-                                            (DR_REF (first_dr)), 0));
-               break;
-             }
-           case dr_explicit_realign_optimized:
-             new_stmt = gimple_build_assign_with_ops
-                          (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
-                           build_int_cst
-                             (TREE_TYPE (dataref_ptr),
-                              -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-             new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
-             gimple_assign_set_lhs (new_stmt, new_temp);
-             vect_finish_stmt_generation (stmt, new_stmt, gsi);
-             data_ref
-               = build2 (MEM_REF, vectype, new_temp,
-                         build_int_cst (reference_alias_ptr_type
-                                          (DR_REF (first_dr)), 0));
-             break;
-           default:
-             gcc_unreachable ();
-           }
-         vec_dest = vect_create_destination_var (scalar_dest, vectype);
-         new_stmt = gimple_build_assign (vec_dest, data_ref);
-         new_temp = make_ssa_name (vec_dest, new_stmt);
-         gimple_assign_set_lhs (new_stmt, new_temp);
-         vect_finish_stmt_generation (stmt, new_stmt, gsi);
-         mark_symbols_for_renaming (new_stmt);
-
-         /* 3. Handle explicit realignment if necessary/supported.  Create in
-               loop: vec_dest = realign_load (msq, lsq, realignment_token)  */
-         if (alignment_support_scheme == dr_explicit_realign_optimized
-             || alignment_support_scheme == dr_explicit_realign)
-           {
-             lsq = gimple_assign_lhs (new_stmt);
-             if (!realignment_token)
-               realignment_token = dataref_ptr;
+               case dr_explicit_realign_optimized:
+                 new_stmt = gimple_build_assign_with_ops
+                              (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
+                               build_int_cst
+                                 (TREE_TYPE (dataref_ptr),
+                                  -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+                 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
+                                           new_stmt);
+                 gimple_assign_set_lhs (new_stmt, new_temp);
+                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                 data_ref
+                   = build2 (MEM_REF, vectype, new_temp,
+                             build_int_cst (reference_alias_ptr_type
+                                              (DR_REF (first_dr)), 0));
+                 break;
+               default:
+                 gcc_unreachable ();
+               }
              vec_dest = vect_create_destination_var (scalar_dest, vectype);
-             new_stmt
-               = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR, vec_dest,
-                                                msq, lsq, realignment_token);
+             new_stmt = gimple_build_assign (vec_dest, data_ref);
              new_temp = make_ssa_name (vec_dest, new_stmt);
              gimple_assign_set_lhs (new_stmt, new_temp);
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
+             mark_symbols_for_renaming (new_stmt);
 
-             if (alignment_support_scheme == dr_explicit_realign_optimized)
+             /* 3. Handle explicit realignment if necessary/supported.
+                Create in loop:
+                  vec_dest = realign_load (msq, lsq, realignment_token)  */
+             if (alignment_support_scheme == dr_explicit_realign_optimized
+                 || alignment_support_scheme == dr_explicit_realign)
                {
-                 gcc_assert (phi);
-                 if (i == vec_num - 1 && j == ncopies - 1)
-                   add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
-                                UNKNOWN_LOCATION);
-                 msq = lsq;
+                 lsq = gimple_assign_lhs (new_stmt);
+                 if (!realignment_token)
+                   realignment_token = dataref_ptr;
+                 vec_dest = vect_create_destination_var (scalar_dest, vectype);
+                 new_stmt
+                   = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
+                                                    vec_dest, msq, lsq,
+                                                    realignment_token);
+                 new_temp = make_ssa_name (vec_dest, new_stmt);
+                 gimple_assign_set_lhs (new_stmt, new_temp);
+                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+                 if (alignment_support_scheme == dr_explicit_realign_optimized)
+                   {
+                     gcc_assert (phi);
+                     if (i == vec_num - 1 && j == ncopies - 1)
+                       add_phi_arg (phi, lsq,
+                                    loop_latch_edge (containing_loop),
+                                    UNKNOWN_LOCATION);
+                     msq = lsq;
+                   }
                }
-           }
 
-         /* 4. Handle invariant-load.  */
-         if (inv_p && !bb_vinfo)
-           {
-             gcc_assert (!strided_load);
-             gcc_assert (nested_in_vect_loop_p (loop, stmt));
-             if (j == 0)
+             /* 4. Handle invariant-load.  */
+             if (inv_p && !bb_vinfo)
                {
-                 int k;
-                 tree t = NULL_TREE;
-                 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
-
-                 /* CHECKME: bitpos depends on endianess?  */
-                 bitpos = bitsize_zero_node;
-                 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
-                                   bitsize, bitpos);
-                 vec_dest =
-                       vect_create_destination_var (scalar_dest, NULL_TREE);
-                 new_stmt = gimple_build_assign (vec_dest, vec_inv);
-                  new_temp = make_ssa_name (vec_dest, new_stmt);
-                 gimple_assign_set_lhs (new_stmt, new_temp);
-                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                 gcc_assert (!strided_load);
+                 gcc_assert (nested_in_vect_loop_p (loop, stmt));
+                 if (j == 0)
+                   {
+                     int k;
+                     tree t = NULL_TREE;
+                     tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
+
+                     /* CHECKME: bitpos depends on endianess?  */
+                     bitpos = bitsize_zero_node;
+                     vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
+                                       bitsize, bitpos);
+                     vec_dest = vect_create_destination_var (scalar_dest,
+                                                             NULL_TREE);
+                     new_stmt = gimple_build_assign (vec_dest, vec_inv);
+                     new_temp = make_ssa_name (vec_dest, new_stmt);
+                     gimple_assign_set_lhs (new_stmt, new_temp);
+                     vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+                     for (k = nunits - 1; k >= 0; --k)
+                       t = tree_cons (NULL_TREE, new_temp, t);
+                     /* FIXME: use build_constructor directly.  */
+                     vec_inv = build_constructor_from_list (vectype, t);
+                     new_temp = vect_init_vector (stmt, vec_inv,
+                                                  vectype, gsi);
+                     new_stmt = SSA_NAME_DEF_STMT (new_temp);
+                   }
+                 else
+                   gcc_unreachable (); /* FORNOW. */
+               }
 
-                 for (k = nunits - 1; k >= 0; --k)
-                   t = tree_cons (NULL_TREE, new_temp, t);
-                 /* FIXME: use build_constructor directly.  */
-                 vec_inv = build_constructor_from_list (vectype, t);
-                 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
+             if (negative)
+               {
+                 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
                  new_stmt = SSA_NAME_DEF_STMT (new_temp);
                }
-             else
-               gcc_unreachable (); /* FORNOW. */
-           }
-
-         if (negative)
-           {
-             new_temp = reverse_vec_elements (new_temp, stmt, gsi);
-             new_stmt = SSA_NAME_DEF_STMT (new_temp);
-           }
 
-         /* Collect vector loads and later create their permutation in
-            vect_transform_strided_load ().  */
-          if (strided_load || slp_perm)
-            VEC_quick_push (tree, dr_chain, new_temp);
+             /* Collect vector loads and later create their permutation in
+                vect_transform_strided_load ().  */
+             if (strided_load || slp_perm)
+               VEC_quick_push (tree, dr_chain, new_temp);
 
-         /* Store vector loads in the corresponding SLP_NODE.  */
-         if (slp && !slp_perm)
-           VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
+             /* Store vector loads in the corresponding SLP_NODE.  */
+             if (slp && !slp_perm)
+               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
+                               new_stmt);
+           }
        }
 
       if (slp && !slp_perm)
@@ -4351,7 +4524,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
         {
           if (strided_load)
            {
-             vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
+             if (!load_lanes_p)
+               vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
              *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
            }
           else
index 6e63afcc716790eb1bd0dd58c4610f1d49576c3f..0793c99424a32bfde621ddc03c9514ad7f936f77 100644 (file)
@@ -788,9 +788,9 @@ extern void free_stmt_vec_info (gimple stmt);
 extern tree vectorizable_function (gimple, tree, tree);
 extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
                                     slp_tree);
-extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type,
-                                   slp_tree);
-extern void vect_model_load_cost (stmt_vec_info, int, slp_tree);
+extern void vect_model_store_cost (stmt_vec_info, int, bool,
+                                  enum vect_def_type, slp_tree);
+extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree);
 extern void vect_finish_stmt_generation (gimple, gimple,
                                          gimple_stmt_iterator *);
 extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
@@ -829,7 +829,9 @@ extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
 extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
 extern tree vect_create_destination_var (tree, tree);
 extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT);
+extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
 extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT);
+extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
 extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple,
                                     gimple_stmt_iterator *, VEC(tree,heap) **);
 extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *,
@@ -837,6 +839,7 @@ extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *,
                                     struct loop **);
 extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int,
                                          gimple_stmt_iterator *);
+extern void vect_record_strided_load_vectors (gimple, VEC(tree,heap) *);
 extern int vect_get_place_in_interleaving_chain (gimple, gimple);
 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
index d0c18b17ae2ff9671158865a5ceda18697967f45..b16c0aa9f975db5c2307366742763bf86f0d7e57 100644 (file)
@@ -7340,6 +7340,15 @@ build_nonshared_array_type (tree elt_type, tree index_type)
   return build_array_type_1 (elt_type, index_type, false);
 }
 
+/* Return a representation of ELT_TYPE[NELTS], using indices of type
+   sizetype.  */
+
+tree
+build_array_type_nelts (tree elt_type, unsigned HOST_WIDE_INT nelts)
+{
+  return build_array_type (elt_type, build_index_type (size_int (nelts - 1)));
+}
+
 /* Recursively examines the array elements of TYPE, until a non-array
    element type is found.  */
 
index 0bc98cd01b46691ae9fdab162326e3c65adb74e6..d28054a8af35fe9c286dc551f21d060532f35652 100644 (file)
@@ -4247,6 +4247,7 @@ extern tree build_type_no_quals (tree);
 extern tree build_index_type (tree);
 extern tree build_array_type (tree, tree);
 extern tree build_nonshared_array_type (tree, tree);
+extern tree build_array_type_nelts (tree, unsigned HOST_WIDE_INT);
 extern tree build_function_type (tree, tree);
 extern tree build_function_type_list (tree, ...);
 extern tree build_function_type_skip_args (tree, bitmap);