PR/67682, break SLP groups up if only some elements match

author Alan Lawrence <alan.lawrence@arm.com>

Fri, 13 Nov 2015 16:12:52 +0000 (16:12 +0000)

committer Alan Lawrence <alalaw01@gcc.gnu.org>

Fri, 13 Nov 2015 16:12:52 +0000 (16:12 +0000)
author Alan Lawrence <alan.lawrence@arm.com>
Fri, 13 Nov 2015 16:12:52 +0000 (16:12 +0000)
committer Alan Lawrence <alalaw01@gcc.gnu.org>
Fri, 13 Nov 2015 16:12:52 +0000 (16:12 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 60d7a4d77199426f209eed0f5bcd367c587f03c1..9b2bd332845fca861ca93a9b56f68c56944d18a4 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2015-11-13  Alan Lawrence  <alan.lawrence@arm.com>
+
+       PR tree-optimization/67682
+       * tree-vect-slp.c (vect_split_slp_store_group): New.
+       (vect_analyze_slp_instance): During basic block SLP, recurse on
+       subgroups if vect_build_slp_tree fails after 1st vector.
+
  2015-11-13  Christian Bruel  <christian.bruel@st.com>
  
         PR target/65837
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 10a2c808d1988a40c6e47fa32922d3e5d5696f4c..ed4d7cb8aee7f143d2f0ebc44f5905f5ccbfaafa 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2015-11-13  Alan Lawrence  <alan.lawrence@arm.com>
+
+       PR tree-optimization/67682
+       * gcc.dg/vect/bb-slp-7.c (main1): Make subgroups non-isomorphic.
+       * gcc.dg/vect/bb-slp-subgroups-1.c: New.
+       * gcc.dg/vect/bb-slp-subgroups-2.c: New.
+       * gcc.dg/vect/bb-slp-subgroups-3.c: New.
+
  2015-11-13  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
  
         * gcc.target/aarch64/umaddl_combine_1.c: New test.
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-7.c b/gcc/testsuite/gcc.dg/vect/bb-slp-7.c

index ab54a48332f68cea9d74b90a7879c74421b73399..b8bef8cffb4173eea6264901f1adcb57295ec61b 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
@@ -16,12 +16,12 @@ main1 (unsigned int x, unsigned int y)
    unsigned int *pout = &out[0];
    unsigned int a0, a1, a2, a3;
  
-  /* Non isomorphic.  */
+  /* Non isomorphic, even 64-bit subgroups.  */
    a0 = *pin++ + 23;
-  a1 = *pin++ + 142;
+  a1 = *pin++ * 142;
    a2 = *pin++ + 2;
    a3 = *pin++ * 31;
-  
+
    *pout++ = a0 * x;
    *pout++ = a1 * y;
    *pout++ = a2 * x;
@@ -29,7 +29,7 @@ main1 (unsigned int x, unsigned int y)
  
    /* Check results.  */
    if (out[0] != (in[0] + 23) * x
-      || out[1] != (in[1] + 142) * y
+      || out[1] != (in[1] * 142) * y
        || out[2] != (in[2] + 2) * x
        || out[3] != (in[3] * 31) * y)
      abort();
@@ -47,4 +47,4 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */
-  
+
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-1.c

new file mode 100644 (file)

index 0000000..39c23c3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-1.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_int } */
+/* PR tree-optimization/67682.  */
+
+#include "tree-vect.h"
+
+int __attribute__((__aligned__(8))) a[8];
+int __attribute__((__aligned__(8))) b[4];
+
+__attribute__ ((noinline)) void
+test ()
+{
+    a[0] = b[0];
+    a[1] = b[1];
+    a[2] = b[2];
+    a[3] = b[3];
+    a[4] = 0;
+    a[5] = 0;
+    a[6] = 0;
+    a[7] = 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  check_vect ();
+
+  for (int i = 0; i < 8; i++)
+    a[i] = 1;
+  for (int i = 0; i < 4; i++)
+    b[i] = i + 4;
+  __asm__ volatile ("" : : : "memory");
+  test (a, b);
+  __asm__ volatile ("" : : : "memory");
+  for (int i = 0; i < 4; i++)
+    if (a[i] != i+4)
+      abort ();
+  for (int i = 4; i < 8; i++)
+    if (a[i] != 0)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-2.c

new file mode 100644 (file)

index 0000000..13c51f3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-2.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_int } */
+/* PR tree-optimization/67682.  */
+
+#include "tree-vect.h"
+
+int __attribute__((__aligned__(8))) a[8];
+int __attribute__((__aligned__(8))) b[4];
+
+__attribute__ ((noinline)) void
+test ()
+{
+    a[0] = b[2] + 1;
+    a[1] = b[0] + 2;
+    a[2] = b[1] + 3;
+    a[3] = b[1] + 4;
+    a[4] = b[3] * 3;
+    a[5] = b[0] * 4;
+    a[6] = b[2] * 5;
+    a[7] = b[1] * 7;
+}
+
+int
+main (int argc, char **argv)
+{
+  check_vect ();
+
+  for (int i = 0; i < 8; i++)
+    a[i] = 1;
+  for (int i = 0; i < 4; i++)
+    b[i] = i + 4;
+  __asm__ volatile ("" : : : "memory");
+  test (a, b);
+  __asm__ volatile ("" : : : "memory");
+  if ((a[0] != 7) || a[1] != 6 || (a[2] != 8) || (a[3] != 9)
+      || (a[4] != 21) || (a[5] != 16) || (a[6] != 30) || (a[7] != 35))
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-3.c b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-3.c

new file mode 100644 (file)

index 0000000..6ae9a89
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-3.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_int } */
+/* PR tree-optimization/67682.  */
+
+#include "tree-vect.h"
+
+int __attribute__((__aligned__(8))) a[8];
+int __attribute__((__aligned__(8))) b[8];
+
+__attribute__ ((noinline)) void
+test ()
+{
+    a[0] = b[0] + 1;
+    a[1] = b[1] + 2;
+    a[2] = b[2] + 3;
+    a[3] = b[3] + 4;
+    a[4] = b[0] * 3;
+    a[5] = b[2] * 4;
+    a[6] = b[4] * 5;
+    a[7] = b[6] * 7;
+}
+
+int
+main (int argc, char **argv)
+{
+  check_vect ();
+
+  for (int i = 0; i < 8; i++)
+    a[i] = 1;
+  for (int i = 0; i < 8; i++)
+    b[i] = i + 4;
+  __asm__ volatile ("" : : : "memory");
+  test (a, b);
+  __asm__ volatile ("" : : : "memory");
+  if ((a[0] != 5) || (a[1] != 7) || (a[2] != 9) || (a[3] != 11)
+      || (a[4] != 12) || (a[5] != 24) || (a[6] != 40) || (a[7] != 70))
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

index f65837df4fcba2e8edfe2a60ff4d028cbad376fa..704f42fd886c7db636e20ecf2c71e4cf4f93e357 100644 (file)
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1565,6 +1565,54 @@ vect_analyze_slp_cost (slp_instance instance, void *data)
    body_cost_vec.release ();
  }
  
+/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
+   one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
+   the first GROUP1_SIZE stmts, since stores are consecutive), the second
+   containing the remainder.
+   Return the first stmt in the second group.  */
+
+static gimple *
+vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
+{
+  stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
+  gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
+  gcc_assert (group1_size > 0);
+  int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
+  gcc_assert (group2_size > 0);
+  GROUP_SIZE (first_vinfo) = group1_size;
+
+  gimple *stmt = first_stmt;
+  for (unsigned i = group1_size; i > 1; i--)
+    {
+      stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+      gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
+    }
+  /* STMT is now the last element of the first group.  */
+  gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+  GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
+
+  GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
+  for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
+    {
+      GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
+      gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
+    }
+
+  /* For the second group, the GROUP_GAP is that before the original group,
+     plus skipping over the first vector.  */
+  GROUP_GAP (vinfo_for_stmt (group2)) =
+    GROUP_GAP (first_vinfo) + group1_size;
+
+  /* GROUP_GAP of the first group now has to skip over the second group too.  */
+  GROUP_GAP (first_vinfo) += group2_size;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
+                    group1_size, group2_size);
+
+  return group2;
+}
+
  /* Analyze an SLP instance starting from a group of grouped stores.  Call
     vect_build_slp_tree to build a tree of packed stmts if possible.
     Return FALSE if it's impossible to SLP any stmt in the loop.  */
@@ -1580,7 +1628,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
    tree vectype, scalar_type = NULL_TREE;
    gimple *next;
    unsigned int vectorization_factor = 0;
-  int i;
+  unsigned int i;
    unsigned int max_nunits = 0;
    vec<slp_tree> loads;
    struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
@@ -1774,6 +1822,41 @@ vect_analyze_slp_instance (vec_info *vinfo,
    vect_free_slp_tree (node);
    loads.release ();
  
+  /* For basic block SLP, try to break the group up into multiples of the
+     vectorization factor.  */
+  if (is_a <bb_vec_info> (vinfo)
+      && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
+      && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
+    {
+      /* We consider breaking the group only on VF boundaries from the existing
+        start.  */
+      for (i = 0; i < group_size; i++)
+       if (!matches[i]) break;
+
+      if (i >= vectorization_factor && i < group_size)
+       {
+         /* Split into two groups at the first vector boundary before i.  */
+         gcc_assert ((vectorization_factor & (vectorization_factor - 1)) == 0);
+         unsigned group1_size = i & ~(vectorization_factor - 1);
+
+         gimple *rest = vect_split_slp_store_group (stmt, group1_size);
+         bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
+         /* If the first non-match was in the middle of a vector,
+            skip the rest of that vector.  */
+         if (group1_size < i)
+           {
+             i = group1_size + vectorization_factor;
+             if (i < group_size)
+               rest = vect_split_slp_store_group (rest, vectorization_factor);
+           }
+         if (i < group_size)
+           res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
+         return res;
+       }
+      /* Even though the first vector did not all match, we might be able to SLP
+        (some) of the remainder.  FORNOW ignore this possibility.  */
+    }
+
    return false;
  }
author	Alan Lawrence <alan.lawrence@arm.com>
	Fri, 13 Nov 2015 16:12:52 +0000 (16:12 +0000)
committer	Alan Lawrence <alalaw01@gcc.gnu.org>
	Fri, 13 Nov 2015 16:12:52 +0000 (16:12 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/bb-slp-7.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-3.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-slp.c		patch \| blob \| history