Use gather loads for strided accesses
authorRichard Sandiford <richard.sandiford@linaro.org>
Sat, 13 Jan 2018 18:01:42 +0000 (18:01 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Sat, 13 Jan 2018 18:01:42 +0000 (18:01 +0000)
This patch tries to use gather loads for strided accesses,
rather than falling back to VMAT_ELEMENTWISE.

2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
    Alan Hayward  <alan.hayward@arm.com>
    David Sherwood  <david.sherwood@arm.com>

gcc/
* tree-vectorizer.h (vect_create_data_ref_ptr): Take an extra
optional tree argument.
* tree-vect-data-refs.c (vect_check_gather_scatter): Check for
null target hooks.
(vect_create_data_ref_ptr): Take the iv_step as an optional argument,
but continue to use the current value as a fallback.
(bump_vector_ptr): Use operand_equal_p rather than tree_int_cst_compare
to compare the updates.
* tree-vect-stmts.c (vect_use_strided_gather_scatters_p): New function.
(get_load_store_type): Use it when handling a strided access.
(vect_get_strided_load_store_ops): New function.
(vect_get_data_ptr_increment): Likewise.
(vectorizable_load): Handle strided gather loads.  Always pass
a step to vect_create_data_ref_ptr and bump_vector_ptr.

gcc/testsuite/
* gcc.target/aarch64/sve/strided_load_1.c: New test.
* gcc.target/aarch64/sve/strided_load_2.c: Likewise.
* gcc.target/aarch64/sve/strided_load_3.c: Likewise.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256641

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/strided_load_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/strided_load_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/strided_load_3.c [new file with mode: 0644]
gcc/tree-vect-data-refs.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index 8c808374bbc8184ec3263f8d0bce663d82857214..fb7a205be00936a699c37da18bb353c76517a3c8 100644 (file)
@@ -1,3 +1,22 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * tree-vectorizer.h (vect_create_data_ref_ptr): Take an extra
+       optional tree argument.
+       * tree-vect-data-refs.c (vect_check_gather_scatter): Check for
+       null target hooks.
+       (vect_create_data_ref_ptr): Take the iv_step as an optional argument,
+       but continue to use the current value as a fallback.
+       (bump_vector_ptr): Use operand_equal_p rather than tree_int_cst_compare
+       to compare the updates.
+       * tree-vect-stmts.c (vect_use_strided_gather_scatters_p): New function.
+       (get_load_store_type): Use it when handling a strided access.
+       (vect_get_strided_load_store_ops): New function.
+       (vect_get_data_ptr_increment): Likewise.
+       (vectorizable_load): Handle strided gather loads.  Always pass
+       a step to vect_create_data_ref_ptr and bump_vector_ptr.
+
 2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
index 78e5f4c5ababa4049c6c0d76e4bc89f23e83fad1..30bcb7a057c2e12895b18426acbf3545a15cebdf 100644 (file)
@@ -1,3 +1,11 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * gcc.target/aarch64/sve/strided_load_1.c: New test.
+       * gcc.target/aarch64/sve/strided_load_2.c: Likewise.
+       * gcc.target/aarch64/sve/strided_load_3.c: Likewise.
+
 2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_load_1.c
new file mode 100644 (file)
index 0000000..cab5021
--- /dev/null
@@ -0,0 +1,40 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX8
+#define INDEX8 int8_t
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS)                             \
+  void __attribute__ ((noinline, noclone))                     \
+  f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest,            \
+                         DATA_TYPE *restrict src,              \
+                         INDEX##BITS stride, INDEX##BITS n)    \
+  {                                                            \
+    for (INDEX##BITS i = 0; i < n; ++i)                                \
+      dest[i] += src[i * stride];                              \
+  }
+
+#define TEST_TYPE(T, DATA_TYPE)                        \
+  T (DATA_TYPE, 8)                             \
+  T (DATA_TYPE, 16)                            \
+  T (DATA_TYPE, 32)                            \
+  T (DATA_TYPE, 64)
+
+#define TEST_ALL(T)                            \
+  TEST_TYPE (T, int32_t)                       \
+  TEST_TYPE (T, uint32_t)                      \
+  TEST_TYPE (T, float)                         \
+  TEST_TYPE (T, int64_t)                       \
+  TEST_TYPE (T, uint64_t)                      \
+  TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_load_2.c
new file mode 100644 (file)
index 0000000..762805f
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define INDEX8 uint8_t
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "strided_load_1.c"
+
+/* 8 and 16 bits are signed because the multiplication promotes to int.
+   Using uxtw for all 9 would be OK.  */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* The 32-bit loop needs to honor the defined overflow in uint32_t,
+   so we vectorize the offset calculation.  This means that the
+   64-bit version needs two copies.  */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_load_3.c
new file mode 100644 (file)
index 0000000..8f720dc
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, OTHER_TYPE)                               \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest,                    \
+                         DATA_TYPE *restrict src,                      \
+                         OTHER_TYPE *restrict other,                   \
+                         OTHER_TYPE mask,                              \
+                         int stride, int n)                            \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                \
+      dest[i] = src[i * stride] + (OTHER_TYPE) (other[i] | mask);      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int32_t, int16_t)                         \
+  T (uint32_t, int16_t)                                \
+  T (float, int16_t)                           \
+  T (int64_t, int32_t)                         \
+  T (uint64_t, int32_t)                                \
+  T (double, int32_t)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
index f79be863194cb02038691fba5f6f707d1494d366..69721a9a1f967991b50236bdc46b635137b2a96b 100644 (file)
@@ -3616,9 +3616,15 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
   else
     {
       if (DR_IS_READ (dr))
-       decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
+       {
+         if (targetm.vectorize.builtin_gather)
+           decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
+       }
       else
-       decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
+       {
+         if (targetm.vectorize.builtin_scatter)
+           decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
+       }
 
       if (!decl)
        return false;
@@ -4367,6 +4373,10 @@ vect_create_addr_base_for_vector_ref (gimple *stmt,
        to the initial address accessed by the data-ref in STMT.  This is
        similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
        in bytes.
+   8. IV_STEP (optional, defaults to NULL): the amount that should be added
+       to the IV during each iteration of the loop.  NULL says to move
+       by one copy of AGGR_TYPE up or down, depending on the step of the
+       data reference.
 
    Output:
    1. Declare a new ptr to vector_type, and have it point to the base of the
@@ -4399,7 +4409,8 @@ tree
 vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
                          tree offset, tree *initial_address,
                          gimple_stmt_iterator *gsi, gimple **ptr_incr,
-                         bool only_init, bool *inv_p, tree byte_offset)
+                         bool only_init, bool *inv_p, tree byte_offset,
+                         tree iv_step)
 {
   const char *base_name;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -4423,7 +4434,8 @@ vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
   tree step;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
 
-  gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
+  gcc_assert (iv_step != NULL_TREE
+             || TREE_CODE (aggr_type) == ARRAY_TYPE
              || TREE_CODE (aggr_type) == VECTOR_TYPE);
 
   if (loop_vinfo)
@@ -4564,14 +4576,17 @@ vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
     aptr = aggr_ptr_init;
   else
     {
-      /* The step of the aggregate pointer is the type size.  */
-      tree iv_step = TYPE_SIZE_UNIT (aggr_type);
-      /* One exception to the above is when the scalar step of the load in
-        LOOP is zero. In this case the step here is also zero.  */
-      if (*inv_p)
-       iv_step = size_zero_node;
-      else if (tree_int_cst_sgn (step) == -1)
-       iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
+      if (iv_step == NULL_TREE)
+       {
+         /* The step of the aggregate pointer is the type size.  */
+         iv_step = TYPE_SIZE_UNIT (aggr_type);
+         /* One exception to the above is when the scalar step of the load in
+            LOOP is zero. In this case the step here is also zero.  */
+         if (*inv_p)
+           iv_step = size_zero_node;
+         else if (tree_int_cst_sgn (step) == -1)
+           iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
+       }
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
@@ -4704,7 +4719,7 @@ bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
       if (use == dataref_ptr)
         SET_USE (use_p, new_dataref_ptr);
       else
-        gcc_assert (tree_int_cst_compare (use, update) == 0);
+        gcc_assert (operand_equal_p (use, update, 0));
     }
 
   return new_dataref_ptr;
index a308d801082c1ece2d4cf645326eec78297f73f8..079cbddfd5ebb77876b2e1c7e0a8bb5d363ac00b 100644 (file)
@@ -1849,6 +1849,44 @@ prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
   return and_res;
 }
 
+/* Return true if we can use gather/scatter internal functions to
+   vectorize STMT, which is a grouped or strided load or store.
+   When returning true, fill in GS_INFO with the information required
+   to perform the operation.  */
+
+static bool
+vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
+                                   gather_scatter_info *gs_info)
+{
+  if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
+      || gs_info->decl)
+    return false;
+
+  scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
+  unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
+  tree offset_type = TREE_TYPE (gs_info->offset);
+  unsigned int offset_bits = TYPE_PRECISION (offset_type);
+
+  /* Enforced by vect_check_gather_scatter.  */
+  gcc_assert (element_bits >= offset_bits);
+
+  /* If the elements are wider than the offset, convert the offset to the
+     same width, without changing its sign.  */
+  if (element_bits > offset_bits)
+    {
+      bool unsigned_p = TYPE_UNSIGNED (offset_type);
+      offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
+      gs_info->offset = fold_convert (offset_type, gs_info->offset);
+    }
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "using gather/scatter for strided/grouped access,"
+                    " scale = %d\n", gs_info->scale);
+
+  return true;
+}
+
 /* STMT is a non-strided load or store, meaning that it accesses
    elements with a known constant step.  Return -1 if that step
    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
@@ -2168,7 +2206,11 @@ get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
   else if (STMT_VINFO_STRIDED_P (stmt_info))
     {
       gcc_assert (!slp);
-      *memory_access_type = VMAT_ELEMENTWISE;
+      if (loop_vinfo
+         && vect_use_strided_gather_scatters_p (stmt, loop_vinfo, gs_info))
+       *memory_access_type = VMAT_GATHER_SCATTER;
+      else
+       *memory_access_type = VMAT_ELEMENTWISE;
     }
   else
     {
@@ -2612,6 +2654,71 @@ vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
                                              offset_vectype);
 }
 
+/* Prepare to implement a grouped or strided load or store using
+   the gather load or scatter store operation described by GS_INFO.
+   STMT is the load or store statement.
+
+   Set *DATAREF_BUMP to the amount that should be added to the base
+   address after each copy of the vectorized statement.  Set *VEC_OFFSET
+   to an invariant offset vector in which element I has the value
+   I * DR_STEP / SCALE.  */
+
+static void
+vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
+                                gather_scatter_info *gs_info,
+                                tree *dataref_bump, tree *vec_offset)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  gimple_seq stmts;
+
+  tree bump = size_binop (MULT_EXPR,
+                         fold_convert (sizetype, DR_STEP (dr)),
+                         size_int (TYPE_VECTOR_SUBPARTS (vectype)));
+  *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
+  if (stmts)
+    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+
+  /* The offset given in GS_INFO can have pointer type, so use the element
+     type of the vector instead.  */
+  tree offset_type = TREE_TYPE (gs_info->offset);
+  tree offset_vectype = get_vectype_for_scalar_type (offset_type);
+  offset_type = TREE_TYPE (offset_vectype);
+
+  /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
+  tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
+                         ssize_int (gs_info->scale));
+  step = fold_convert (offset_type, step);
+  step = force_gimple_operand (step, &stmts, true, NULL_TREE);
+
+  /* Create {0, X, X*2, X*3, ...}.  */
+  *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
+                             build_zero_cst (offset_type), step);
+  if (stmts)
+    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+}
+
+/* Return the amount that should be added to a vector pointer to move
+   to the next or previous copy of AGGR_TYPE.  DR is the data reference
+   being vectorized and MEMORY_ACCESS_TYPE describes the type of
+   vectorization.  */
+
+static tree
+vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
+                            vect_memory_access_type memory_access_type)
+{
+  if (memory_access_type == VMAT_INVARIANT)
+    return size_zero_node;
+
+  tree iv_step = TYPE_SIZE_UNIT (aggr_type);
+  tree step = vect_dr_behavior (dr)->step;
+  if (tree_int_cst_sgn (step) == -1)
+    iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
+  return iv_step;
+}
+
 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
 
 static bool
@@ -7412,6 +7519,9 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
       return true;
     }
 
+  if (memory_access_type == VMAT_GATHER_SCATTER)
+    grouped_load = false;
+
   if (grouped_load)
     {
       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
@@ -7623,13 +7733,29 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
 
-  if (memory_access_type == VMAT_LOAD_STORE_LANES)
-    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
+  tree bump;
+  tree vec_offset = NULL_TREE;
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    {
+      aggr_type = NULL_TREE;
+      bump = NULL_TREE;
+    }
+  else if (memory_access_type == VMAT_GATHER_SCATTER)
+    {
+      aggr_type = elem_type;
+      vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
+                                      &bump, &vec_offset);
+    }
   else
-    aggr_type = vectype;
+    {
+      if (memory_access_type == VMAT_LOAD_STORE_LANES)
+       aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
+      else
+       aggr_type = vectype;
+      bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
+    }
 
   tree vec_mask = NULL_TREE;
-  tree vec_offset = NULL_TREE;
   prev_stmt_info = NULL;
   poly_uint64 group_elt = 0;
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
@@ -7661,7 +7787,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
                                            at_loop, offset, &dummy, gsi,
                                            &ptr_incr, simd_lane_access_p,
-                                           &inv_p, byte_offset);
+                                           &inv_p, byte_offset, bump);
              /* Adjust the pointer by the difference to first_stmt.  */
              data_reference_p ptrdr
                = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
@@ -7683,7 +7809,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
              = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
                                          offset, &dummy, gsi, &ptr_incr,
                                          simd_lane_access_p, &inv_p,
-                                         byte_offset);
+                                         byte_offset, bump);
          if (mask)
            vec_mask = vect_get_vec_def_for_operand (mask, stmt,
                                                     mask_vectype);
@@ -7692,7 +7818,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
        {
          if (dataref_offset)
            dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
-                                             TYPE_SIZE_UNIT (aggr_type));
+                                             bump);
          else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
            {
              gimple *def_stmt;
@@ -7701,8 +7827,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
              vec_offset = vect_get_vec_def_for_stmt_copy (dt, vec_offset);
            }
          else
-           dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
-                                          TYPE_SIZE_UNIT (aggr_type));
+           dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+                                          stmt, bump);
          if (mask)
            {
              gimple *def_stmt;
@@ -7778,7 +7904,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 
              if (i > 0)
                dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
-                                              stmt, NULL_TREE);
+                                              stmt, bump);
 
              /* 2. Create the vector-load in the loop.  */
              switch (alignment_support_scheme)
index a9ccdfd11367ba6fe2f6725ff82aadd288604c63..c661578403a6fab60d2dfe0cb93242b4043c3196 100644 (file)
@@ -1462,7 +1462,7 @@ extern void vect_record_base_alignments (vec_info *);
 extern tree vect_create_data_ref_ptr (gimple *, tree, struct loop *, tree,
                                      tree *, gimple_stmt_iterator *,
                                      gimple **, bool, bool *,
-                                     tree = NULL_TREE);
+                                     tree = NULL_TREE, tree = NULL_TREE);
 extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *,
                             tree);
 extern tree vect_create_destination_var (tree, tree);