tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p to strided_p.
authorMichael Matz <matz@gcc.gnu.org>
Thu, 21 May 2015 14:36:04 +0000 (14:36 +0000)
committerMichael Matz <matz@gcc.gnu.org>
Thu, 21 May 2015 14:36:04 +0000 (14:36 +0000)
* tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p
to strided_p.
(STMT_VINFO_STRIDE_LOAD_P): Rename to ...
(STMT_VINFO_STRIDED_P): ... this.
* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust.
(vect_verify_datarefs_alignment): Likewise.
(vect_enhance_data_refs_alignment): Likewise.
(vect_analyze_data_ref_access): Likewise.
(vect_analyze_data_refs): Accept strided stores.
* tree-vect-stmts.c (vect_model_store_cost): Count strided stores.
(vect_model_load_cost): Adjust for macro rename.
(vectorizable_mask_load_store): Likewise.
(vectorizable_load): Likewise.
(vectorizable_store): Open code strided stores.

testsuite/
* gcc.dg/vect/vect-strided-store.c: New test.
* gfortran.dg/vect/fast-math-pr37021.f90: Adjust.
* gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust.

From-SVN: r223486

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/vect-strided-store.c [new file with mode: 0644]
gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
gcc/tree-vect-data-refs.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index 46784af723504805ea2615e4a5004d6fd41a4832..2474aa10ab000ca133d251269022b4d800917cc3 100644 (file)
@@ -1,3 +1,20 @@
+2015-05-08  Michael Matz  <matz@suse.de>
+
+       * tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p
+       to strided_p.
+       (STMT_VINFO_STRIDE_LOAD_P): Rename to ...
+       (STMT_VINFO_STRIDED_P): ... this.
+       * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust.
+       (vect_verify_datarefs_alignment): Likewise.
+       (vect_enhance_data_refs_alignment): Likewise.
+       (vect_analyze_data_ref_access): Likewise.
+       (vect_analyze_data_refs): Accept strided stores.
+       * tree-vect-stmts.c (vect_model_store_cost): Count strided stores.
+       (vect_model_load_cost): Adjust for macro rename.
+       (vectorizable_mask_load_store): Likewise.
+       (vectorizable_load): Likewise.
+       (vectorizable_store): Open code strided stores.
+
 2015-05-21  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        * doc/sourcebuild.texi (7.2.3.9 Other hardware attributes):
index a5b78aa738e0cec3a8f3431ca7103ee21dbb1c4e..46a6bb7ad675f2173217180564bdf9f5a430b49f 100644 (file)
@@ -1,3 +1,9 @@
+2015-05-08  Michael Matz  <matz@suse.de>
+
+       * gcc.dg/vect/vect-strided-store.c: New test.
+       * gfortran.dg/vect/fast-math-pr37021.f90: Adjust.
+       * gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust.
+
 2015-05-21  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        * lib/target-supports.exp (check_effective_target_sqrt_insn): New check.
@@ -51,7 +57,7 @@
 
 2015-05-20  Alex Velenko  <Alex.Velenko@arm.com>
 
-        * gcc.target/arm/thumb1-far-jump-2.c (r4): Added int in definition.
+       * gcc.target/arm/thumb1-far-jump-2.c (r4): Added int in definition.
 
 2015-05-20  David Malcolm  <dmalcolm@redhat.com>
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
new file mode 100644 (file)
index 0000000..5014bc7
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+void __attribute__((noinline))
+sumit (float * __restrict dest,
+       float * __restrict src, float * __restrict src2,
+       int stride, int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    dest[i*stride] = src[i] + src2[i];
+}
+
+int main()
+{
+  int i, stride;
+  float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  float dest[64];
+  check_vect ();
+  for (stride = 0; stride < 8; stride++)
+    {
+      sumit (dest, src, src, stride, 8);
+      if (!stride && dest[0] != 16)
+       abort();
+      else if (stride)
+       for (i = 0; i < 8; i++)
+         if (2*src[i] != dest[i*stride])
+           abort ();
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
index b17ac9c32776327d249bf9fdb23f9409b7ac6e90..d5f5d40765fc532821609fe0fa752e3d5b7fde83 100644 (file)
@@ -14,5 +14,5 @@ subroutine to_product_of(self,a,b,a1,a2)
   end do
 end subroutine
 
-! { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } }
+! { dg-final { scan-tree-dump "vectorized 2 loops" "vect" } }
 ! { dg-final { cleanup-tree-dump "vect" } }
index 1d13cea80e0f1bb085862273c4f14ea54edd5d42..625be8390dccdd3efc3811d7eca636bcd186fc33 100644 (file)
@@ -29,5 +29,5 @@
       return
       end function trs2a2
 
-! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } }
+! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  } }
 ! { dg-final { cleanup-tree-dump "vect" } }
index 7e938996866a11868309ccac91ca5fc7dce032be..3b8405f597cb8c534b754991e1b6ead0a23dd41d 100644 (file)
@@ -663,9 +663,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
   /* Initialize misalignment to unknown.  */
   SET_DR_MISALIGNMENT (dr, -1);
 
-  /* Strided loads perform only component accesses, misalignment information
+  /* Strided accesses perform only component accesses, misalignment information
      is irrelevant for them.  */
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+  if (STMT_VINFO_STRIDED_P (stmt_info)
       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
     return true;
 
@@ -942,9 +942,9 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
           || !STMT_VINFO_VECTORIZABLE (stmt_info))
         continue;
 
-      /* Strided loads perform only component accesses, alignment is
+      /* Strided accesses perform only component accesses, alignment is
         irrelevant for them.  */
-      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+      if (STMT_VINFO_STRIDED_P (stmt_info)
          && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
        continue;
 
@@ -1410,9 +1410,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
       if (integer_zerop (DR_STEP (dr)))
        continue;
 
-      /* Strided loads perform only component accesses, alignment is
+      /* Strided accesses perform only component accesses, alignment is
         irrelevant for them.  */
-      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+      if (STMT_VINFO_STRIDED_P (stmt_info)
          && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
        continue;
 
@@ -1703,9 +1703,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
              && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
            continue;
 
-         /* Strided loads perform only component accesses, alignment is
+         /* Strided accesses perform only component accesses, alignment is
             irrelevant for them.  */
-         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+         if (STMT_VINFO_STRIDED_P (stmt_info)
              && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
            continue;
 
@@ -1824,7 +1824,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
            continue;
 
-         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+         if (STMT_VINFO_STRIDED_P (stmt_info))
            {
              /* Strided loads perform only component accesses, alignment is
                 irrelevant for them.  */
@@ -2346,7 +2346,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
 
   /* Assume this is a DR handled by non-constant strided load case.  */
   if (TREE_CODE (step) != INTEGER_CST)
-    return (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+    return (STMT_VINFO_STRIDED_P (stmt_info)
            && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
                || vect_analyze_group_access (dr)));
 
@@ -3758,8 +3758,7 @@ again:
       else if (loop_vinfo
               && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
        {
-         if (nested_in_vect_loop_p (loop, stmt)
-             || !DR_IS_READ (dr))
+         if (nested_in_vect_loop_p (loop, stmt))
            {
              if (dump_enabled_p ())
                {
@@ -3771,7 +3770,7 @@ again:
                }
              return false;
            }
-         STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
+         STMT_VINFO_STRIDED_P (stmt_info) = true;
        }
     }
 
index f82decb798e100f5e985da645aa61bfd0e0ad4f9..37a706fa849456b7a5689c0bd366d7e5831749e9 100644 (file)
@@ -1014,7 +1014,19 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
     }
 
   /* Costs of the stores.  */
-  vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
+  if (STMT_VINFO_STRIDED_P (stmt_info))
+    {
+      /* N scalar stores plus extracting the elements.  */
+      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+      inside_cost += record_stmt_cost (body_cost_vec,
+                                      ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+                                      scalar_store, stmt_info, 0, vect_body);
+      inside_cost += record_stmt_cost (body_cost_vec,
+                                      ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+                                      vec_to_scalar, stmt_info, 0, vect_body);
+    }
+  else
+    vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -1113,7 +1125,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
      access is instead being provided by a load-and-permute operation,
      include the cost of the permutes.  */
   if (!load_lanes_p && group_size > 1
-      && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+      && !STMT_VINFO_STRIDED_P (stmt_info))
     {
       /* Uses an even and odd extract operations or shuffle operations
         for each needed permute.  */
@@ -1128,7 +1140,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
     }
 
   /* The loads themselves.  */
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+  if (STMT_VINFO_STRIDED_P (stmt_info)
       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
       /* N scalar loads plus gathering them into a vector.  */
@@ -1143,7 +1155,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
                         || group_size > 1 || slp_node),
                        &inside_cost, &prologue_cost, 
                        prologue_cost_vec, body_cost_vec, true);
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  if (STMT_VINFO_STRIDED_P (stmt_info))
       inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
                                       stmt_info, 0, vect_body);
 
@@ -1823,7 +1835,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
     return false;
 
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  if (STMT_VINFO_STRIDED_P (stmt_info))
     return false;
 
   if (STMT_VINFO_GATHER_P (stmt_info))
@@ -5016,7 +5028,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   tree dataref_ptr = NULL_TREE;
   tree dataref_offset = NULL_TREE;
   gimple ptr_incr = NULL;
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
   int ncopies;
   int j;
   gimple next_stmt, first_stmt = NULL;
@@ -5103,38 +5115,40 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
-  negative = 
-    tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
-                         ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
-                         size_zero_node) < 0;
-  if (negative && ncopies > 1)
+  if (!STMT_VINFO_STRIDED_P (stmt_info))
     {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "multiple types with negative step.\n");
-      return false;
-    }
-
-  if (negative)
-    {
-      gcc_assert (!grouped_store);
-      alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
-      if (alignment_support_scheme != dr_aligned
-         && alignment_support_scheme != dr_unaligned_supported)
+      negative = 
+         tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
+                               ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
+                               size_zero_node) < 0;
+      if (negative && ncopies > 1)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "negative step but alignment required.\n");
+                            "multiple types with negative step.\n");
          return false;
        }
-      if (dt != vect_constant_def 
-         && dt != vect_external_def
-         && !perm_mask_for_reverse (vectype))
+      if (negative)
        {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "negative step and reversing not supported.\n");
-         return false;
+         gcc_assert (!grouped_store);
+         alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+         if (alignment_support_scheme != dr_aligned
+             && alignment_support_scheme != dr_unaligned_supported)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "negative step but alignment required.\n");
+             return false;
+           }
+         if (dt != vect_constant_def 
+             && dt != vect_external_def
+             && !perm_mask_for_reverse (vectype))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "negative step and reversing not supported.\n");
+             return false;
+           }
        }
     }
 
@@ -5233,6 +5247,113 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     dump_printf_loc (MSG_NOTE, vect_location,
                      "transform store. ncopies = %d\n", ncopies);
 
+  if (STMT_VINFO_STRIDED_P (stmt_info))
+    {
+      gimple_stmt_iterator incr_gsi;
+      bool insert_after;
+      gimple incr;
+      tree offvar;
+      tree ivstep;
+      tree running_off;
+      gimple_seq stmts = NULL;
+      tree stride_base, stride_step, alias_off;
+      tree vec_oprnd;
+
+      gcc_assert (!nested_in_vect_loop_p (loop, stmt));
+
+      stride_base
+       = fold_build_pointer_plus
+           (unshare_expr (DR_BASE_ADDRESS (dr)),
+            size_binop (PLUS_EXPR,
+                        convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
+                        convert_to_ptrofftype (DR_INIT(dr))));
+      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
+
+      /* For a store with loop-invariant (but other than power-of-2)
+         stride (i.e. not a grouped access) like so:
+
+          for (i = 0; i < n; i += stride)
+            array[i] = ...;
+
+        we generate a new induction variable and new stores from
+        the components of the (vectorized) rhs:
+
+          for (j = 0; ; j += VF*stride)
+            vectemp = ...;
+            tmp1 = vectemp[0];
+            array[j] = tmp1;
+            tmp2 = vectemp[1];
+            array[j + stride] = tmp2;
+            ...
+         */
+
+      ivstep = stride_step;
+      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
+                           build_int_cst (TREE_TYPE (ivstep),
+                                          ncopies * nunits));
+
+      standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+
+      create_iv (stride_base, ivstep, NULL,
+                loop, &incr_gsi, insert_after,
+                &offvar, NULL);
+      incr = gsi_stmt (incr_gsi);
+      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+
+      stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
+      if (stmts)
+       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+
+      prev_stmt_info = NULL;
+      running_off = offvar;
+      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+      for (j = 0; j < ncopies; j++)
+       {
+         /* We've set op and dt above, from gimple_assign_rhs1(stmt),
+            and first_stmt == stmt.  */
+         if (j == 0)
+           vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
+         else
+           vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
+
+         for (i = 0; i < nunits; i++)
+           {
+             tree newref, newoff;
+             gimple incr, assign;
+             tree size = TYPE_SIZE (elem_type);
+             /* Extract the i'th component.  */
+             tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
+                                     size);
+             tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
+                                      size, pos);
+
+             elem = force_gimple_operand_gsi (gsi, elem, true,
+                                              NULL_TREE, true,
+                                              GSI_SAME_STMT);
+
+             newref = build2 (MEM_REF, TREE_TYPE (vectype),
+                              running_off, alias_off);
+
+             /* And store it to *running_off.  */
+             assign = gimple_build_assign (newref, elem);
+             vect_finish_stmt_generation (stmt, assign, gsi);
+
+             newoff = copy_ssa_name (running_off, NULL);
+             incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+                                         running_off, stride_step);
+             vect_finish_stmt_generation (stmt, incr, gsi);
+
+             running_off = newoff;
+             if (j == 0 && i == i)
+               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
+             else
+               STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
+             prev_stmt_info = vinfo_for_stmt (assign);
+           }
+       }
+      return true;
+    }
+
   dr_chain.create (group_size);
   oprnds.create (group_size);
 
@@ -5796,7 +5917,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
       if (!slp
          && !PURE_SLP_STMT (stmt_info)
-         && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+         && !STMT_VINFO_STRIDED_P (stmt_info))
        {
          if (vect_load_lanes_supported (vectype, group_size))
            load_lanes_p = true;
@@ -5851,7 +5972,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
          return false;
        }
     }
-  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  else if (STMT_VINFO_STRIDED_P (stmt_info))
     {
       if ((grouped_load
           && (slp || PURE_SLP_STMT (stmt_info)))
@@ -6099,7 +6220,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
        }
       return true;
     }
-  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  else if (STMT_VINFO_STRIDED_P (stmt_info))
     {
       gimple_stmt_iterator incr_gsi;
       bool insert_after;
index 5a4fdbb63accf8fee9dd09a362cfbcb52c12362b..17e590e67defcd6ea9a04e3035678033d80655c0 100644 (file)
@@ -646,7 +646,9 @@ typedef struct _stmt_vec_info {
 
   /* For loads only, true if this is a gather load.  */
   bool gather_p;
-  bool stride_load_p;
+
+  /* True if this is an access with loop-invariant stride.  */
+  bool strided_p;
 
   /* For both loads and stores.  */
   bool simd_lane_access_p;
@@ -664,7 +666,7 @@ typedef struct _stmt_vec_info {
 #define STMT_VINFO_VECTORIZABLE(S)         (S)->vectorizable
 #define STMT_VINFO_DATA_REF(S)             (S)->data_ref_info
 #define STMT_VINFO_GATHER_P(S)            (S)->gather_p
-#define STMT_VINFO_STRIDE_LOAD_P(S)       (S)->stride_load_p
+#define STMT_VINFO_STRIDED_P(S)                   (S)->strided_p
 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
 
 #define STMT_VINFO_DR_BASE_ADDRESS(S)      (S)->dr_base_address