tree-vect-stmts.c (vectorizable_load): Remove restrictions on strided SLP loads and...
authorRichard Biener <rguenther@suse.de>
Wed, 8 Jun 2016 13:17:41 +0000 (13:17 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 8 Jun 2016 13:17:41 +0000 (13:17 +0000)
2016-06-08  Richard Biener  <rguenther@suse.de>

* tree-vect-stmts.c (vectorizable_load): Remove restrictions
on strided SLP loads and fall back to scalar loads in case
we can't chunk them.

* gcc.dg/vect/slp-43.c: New testcase.

From-SVN: r237215

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/slp-43.c [new file with mode: 0644]
gcc/tree-vect-stmts.c

index 4f5d3177f1e2d24f64eb96a06ccbda44f909ba71..474b064ff0480bba7d8fb8540135b48729672b8f 100644 (file)
@@ -1,3 +1,9 @@
+2016-06-08  Richard Biener  <rguenther@suse.de>
+
+       * tree-vect-stmts.c (vectorizable_load): Remove restrictions
+       on strided SLP loads and fall back to scalar loads in case
+       we can't chunk them.
+
 2016-06-08  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/71452
index 28b17cf4d02751e1cbe4d3ad1ff6c8dcb653cd8a..1d80915d65e773d0612d0e98cbbad8496a7bf3e9 100644 (file)
@@ -1,3 +1,7 @@
+2016-06-08  Richard Biener  <rguenther@suse.de>
+
+       * gcc.dg/vect/slp-43.c: New testcase.
+
 2016-06-08  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/71452
diff --git a/gcc/testsuite/gcc.dg/vect/slp-43.c b/gcc/testsuite/gcc.dg/vect/slp-43.c
new file mode 100644 (file)
index 0000000..4e8df46
--- /dev/null
@@ -0,0 +1,78 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+#include <string.h>
+#include "tree-vect.h"
+
+#define FOO(T,N) \
+void __attribute__((noinline,noclone)) \
+foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \
+{ \
+  T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \
+  T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \
+  for (int i = 0; i < 16; i++) \
+    { \
+      for (int j = 0; j < N; ++j) \
+        out[j] = in[j]; \
+      in += s*N; \
+      out += N; \
+    } \
+}
+
+#define TEST(T,N) \
+ do { \
+  memset (out, 0, 4096); \
+  foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \
+  if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \
+    __builtin_abort (); \
+  for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \
+    if (out[i] != 0) \
+      __builtin_abort (); \
+ } while (0)
+
+FOO(char, 1)
+FOO(char, 2)
+FOO(char, 3)
+FOO(char, 4)
+FOO(char, 6)
+FOO(char, 8)
+FOO(int, 1)
+FOO(int, 2)
+FOO(int, 3)
+FOO(int, 4)
+FOO(int, 6)
+FOO(int, 8)
+FOO(int, 16)
+
+char in[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
+char out[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
+
+int main()
+{
+  check_vect ();
+
+  for (int i = 0; i < 4096; ++i)
+    {
+      in[i] = i;
+      __asm__ volatile ("" : : : "memory");
+    }
+
+  TEST(char, 1);
+  TEST(char, 2);
+  TEST(char, 3);
+  TEST(char, 4);
+  TEST(char, 6);
+  TEST(char, 8);
+  TEST(int, 1);
+  TEST(int, 2);
+  TEST(int, 3);
+  TEST(int, 4);
+  TEST(int, 6);
+  TEST(int, 8);
+  TEST(int, 16);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 13 "vect" } } */
index 373ecd7278d2c4edbd1191349bee389b97f0c321..bee064ef38e583ebb06fce3bef89e80623205dc2 100644 (file)
@@ -6440,17 +6440,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
        }
     }
   else if (STMT_VINFO_STRIDED_P (stmt_info))
-    {
-      if (grouped_load
-         && slp
-         && (group_size > nunits
-             || nunits % group_size != 0))
-       {
-         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                          "unhandled strided group load\n");
-         return false;
-       }
-    }
+    ;
   else
     {
       negative = tree_int_cst_compare (nested_in_vect_loop
@@ -6744,16 +6734,29 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
       running_off = offvar;
       alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
       int nloads = nunits;
+      int lnel = 1;
       tree ltype = TREE_TYPE (vectype);
       auto_vec<tree> dr_chain;
       if (slp)
        {
-         nloads = nunits / group_size;
-         if (group_size < nunits)
-           ltype = build_vector_type (TREE_TYPE (vectype), group_size);
-         else
-           ltype = vectype;
-         ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
+         if (group_size < nunits
+             && nunits % group_size == 0)
+           {
+             nloads = nunits / group_size;
+             lnel = group_size;
+             ltype = build_vector_type (TREE_TYPE (vectype), group_size);
+             ltype = build_aligned_type (ltype,
+                                         TYPE_ALIGN (TREE_TYPE (vectype)));
+           }
+         else if (group_size >= nunits
+                  && group_size % nunits == 0)
+           {
+             nloads = 1;
+             lnel = nunits;
+             ltype = vectype;
+             ltype = build_aligned_type (ltype,
+                                         TYPE_ALIGN (TREE_TYPE (vectype)));
+           }
          /* For SLP permutation support we need to load the whole group,
             not only the number of vector stmts the permutation result
             fits in.  */
@@ -6765,48 +6768,43 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
          else
            ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
        }
+      int group_el = 0;
+      unsigned HOST_WIDE_INT
+       elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
       for (j = 0; j < ncopies; j++)
        {
-         tree vec_inv;
-
          if (nloads > 1)
+           vec_alloc (v, nloads);
+         for (i = 0; i < nloads; i++)
            {
-             vec_alloc (v, nloads);
-             for (i = 0; i < nloads; i++)
+             tree this_off = build_int_cst (TREE_TYPE (alias_off),
+                                            group_el * elsz);
+             new_stmt = gimple_build_assign (make_ssa_name (ltype),
+                                             build2 (MEM_REF, ltype,
+                                                     running_off, this_off));
+             vect_finish_stmt_generation (stmt, new_stmt, gsi);
+             if (nloads > 1)
+               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                       gimple_assign_lhs (new_stmt));
+
+             group_el += lnel;
+             if (! slp
+                 || group_el == group_size)
                {
-                 tree newref, newoff;
-                 gimple *incr;
-                 newref = build2 (MEM_REF, ltype, running_off, alias_off);
-
-                 newref = force_gimple_operand_gsi (gsi, newref, true,
-                                                    NULL_TREE, true,
-                                                    GSI_SAME_STMT);
-                 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
-                 newoff = copy_ssa_name (running_off);
-                 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
-                                             running_off, stride_step);
+                 tree newoff = copy_ssa_name (running_off);
+                 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+                                                     running_off, stride_step);
                  vect_finish_stmt_generation (stmt, incr, gsi);
 
                  running_off = newoff;
+                 group_el = 0;
                }
-
-             vec_inv = build_constructor (vectype, v);
-             new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
-             new_stmt = SSA_NAME_DEF_STMT (new_temp);
            }
-         else
+         if (nloads > 1)
            {
-             new_stmt = gimple_build_assign (make_ssa_name (ltype),
-                                             build2 (MEM_REF, ltype,
-                                                     running_off, alias_off));
-             vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
-             tree newoff = copy_ssa_name (running_off);
-             gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
-                                         running_off, stride_step);
-             vect_finish_stmt_generation (stmt, incr, gsi);
-
-             running_off = newoff;
+             tree vec_inv = build_constructor (vectype, v);
+             new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
+             new_stmt = SSA_NAME_DEF_STMT (new_temp);
            }
 
          if (slp)