tree-vect-stmts.c (vectorizable_store): Remove strided grouped store restrictions.
authorRichard Biener <rguenther@suse.de>
Wed, 15 Jun 2016 09:54:17 +0000 (09:54 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 15 Jun 2016 09:54:17 +0000 (09:54 +0000)
2016-06-15  Richard Biener  <rguenther@suse.de>

* tree-vect-stmts.c (vectorizable_store): Remove strided grouped
store restrictions.

* gcc.dg/vect/slp-45.c: New testcase.

From-SVN: r237474

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/slp-45.c [new file with mode: 0644]
gcc/tree-vect-stmts.c

index f593bd52b706647de82d1dfb16dcb2fe4f74c8e4..ebc4d6eeca1589572692f5fce7edf831a370bf14 100644 (file)
@@ -1,3 +1,8 @@
+2016-06-15  Richard Biener  <rguenther@suse.de>
+
+       * tree-vect-stmts.c (vectorizable_store): Remove strided grouped
+       store restrictions.
+
 2016-06-15  Richard Biener  <rguenther@suse.de>
 
        * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Do
index 3eaaa91b03354b2224ebd9f7b402d1dc51b6e840..679f0225109bd3a7f77f8011e09a1eaf224117f1 100644 (file)
@@ -1,3 +1,7 @@
+2016-06-15  Richard Biener  <rguenther@suse.de>
+
+       * gcc.dg/vect/slp-45.c: New testcase.
+
 2016-06-15  Richard Biener  <rguenther@suse.de>
 
        * gcc.dg/vect/bb-slp-pattern-2.c: Disable loop vectorization.
diff --git a/gcc/testsuite/gcc.dg/vect/slp-45.c b/gcc/testsuite/gcc.dg/vect/slp-45.c
new file mode 100644 (file)
index 0000000..be721cb
--- /dev/null
@@ -0,0 +1,78 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+#include <string.h>
+#include "tree-vect.h"
+
+#define FOO(T,N) \
+void __attribute__((noinline,noclone)) \
+foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \
+{ \
+  T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \
+  T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \
+  for (int i = 0; i < 16; i++) \
+    { \
+      for (int j = 0; j < N; ++j) \
+        out[j] = in[j]; \
+      in += N; \
+      out += s*N; \
+    } \
+}
+
+#define TEST(T,N) \
+ do { \
+  memset (out, 0, 4096); \
+  foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \
+  if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \
+    __builtin_abort (); \
+  for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \
+    if (out[i] != 0) \
+      __builtin_abort (); \
+ } while (0)
+
+FOO(char, 1)
+FOO(char, 2)
+FOO(char, 3)
+FOO(char, 4)
+FOO(char, 6)
+FOO(char, 8)
+FOO(int, 1)
+FOO(int, 2)
+FOO(int, 3)
+FOO(int, 4)
+FOO(int, 6)
+FOO(int, 8)
+FOO(int, 16)
+
+char in[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
+char out[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
+
+int main()
+{
+  check_vect ();
+
+  for (int i = 0; i < 4096; ++i)
+    {
+      in[i] = i;
+      __asm__ volatile ("" : : : "memory");
+    }
+
+  TEST(char, 1);
+  TEST(char, 2);
+  TEST(char, 3);
+  TEST(char, 4);
+  TEST(char, 6);
+  TEST(char, 8);
+  TEST(int, 1);
+  TEST(int, 2);
+  TEST(int, 3);
+  TEST(int, 4);
+  TEST(int, 6);
+  TEST(int, 8);
+  TEST(int, 16);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 13 "vect" } } */
index bee064ef38e583ebb06fce3bef89e80623205dc2..c74f14f0205b65d225224c99871f7e76f281e885 100644 (file)
@@ -5234,6 +5234,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
   enum vect_def_type scatter_src_dt = vect_unknown_def_type;
   gimple *new_stmt;
+  int vf;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -5270,7 +5271,12 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
   if (loop_vinfo)
-    loop = LOOP_VINFO_LOOP (loop_vinfo);
+    {
+      loop = LOOP_VINFO_LOOP (loop_vinfo);
+      vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+    }
+  else
+    vf = 1;
 
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
@@ -5365,16 +5371,6 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
            return false;
        }
 
-      if (STMT_VINFO_STRIDED_P (stmt_info)
-         && slp
-         && (group_size > nunits
-             || nunits % group_size != 0))
-       {
-         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                          "unhandled strided group store\n");
-         return false;
-       }
-
       if (first_stmt == stmt)
        {
           /* STMT is the leader of the group. Check the operands of all the
@@ -5653,23 +5649,31 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
          */
 
       unsigned nstores = nunits;
+      unsigned lnel = 1;
       tree ltype = elem_type;
       if (slp)
        {
-         nstores = nunits / group_size;
-         if (group_size < nunits)
-           ltype = build_vector_type (elem_type, group_size);
-         else
-           ltype = vectype;
+         if (group_size < nunits
+             && nunits % group_size == 0)
+           {
+             nstores = nunits / group_size;
+             lnel = group_size;
+             ltype = build_vector_type (elem_type, group_size);
+           }
+         else if (group_size >= nunits
+                  && group_size % nunits == 0)
+           {
+             nstores = 1;
+             lnel = nunits;
+             ltype = vectype;
+           }
          ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
          ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-         group_size = 1;
        }
 
       ivstep = stride_step;
       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
-                           build_int_cst (TREE_TYPE (ivstep),
-                                          ncopies * nstores));
+                           build_int_cst (TREE_TYPE (ivstep), vf));
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
@@ -5700,6 +5704,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
              vect_finish_stmt_generation (stmt, incr, gsi);
              running_off = newoff;
            }
+         unsigned int group_el = 0;
+         unsigned HOST_WIDE_INT
+           elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
          for (j = 0; j < ncopies; j++)
            {
              /* We've set op and dt above, from gimple_assign_rhs1(stmt),
@@ -5745,19 +5752,27 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                                                   NULL_TREE, true,
                                                   GSI_SAME_STMT);
 
+                 tree this_off = build_int_cst (TREE_TYPE (alias_off),
+                                                group_el * elsz);
                  newref = build2 (MEM_REF, ltype,
-                                  running_off, alias_off);
+                                  running_off, this_off);
 
                  /* And store it to *running_off.  */
                  assign = gimple_build_assign (newref, elem);
                  vect_finish_stmt_generation (stmt, assign, gsi);
 
-                 newoff = copy_ssa_name (running_off, NULL);
-                 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
-                                             running_off, stride_step);
-                 vect_finish_stmt_generation (stmt, incr, gsi);
+                 group_el += lnel;
+                 if (! slp
+                     || group_el == group_size)
+                   {
+                     newoff = copy_ssa_name (running_off, NULL);
+                     incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+                                                 running_off, stride_step);
+                     vect_finish_stmt_generation (stmt, incr, gsi);
 
-                 running_off = newoff;
+                     running_off = newoff;
+                     group_el = 0;
+                   }
                  if (g == group_size - 1
                      && !slp)
                    {
@@ -5771,6 +5786,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                }
            }
          next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
+         if (slp)
+           break;
        }
       return true;
     }