From a52206ae28ed3e55d601118bedd52739456401ab Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Thu, 30 Nov 2017 07:53:31 +0000
Subject: [PATCH] re PR tree-optimization/83202 (Try joining operations on
 consecutive array elements during tree vectorization)

2017-11-30  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/83202
	* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Add
	allow_peel argument and guard peeling.
	(canonicalize_loop_induction_variables): Likewise.
	(canonicalize_induction_variables): Pass false.
	(tree_unroll_loops_completely_1): Pass unroll_outer to disallow
	peeling from cunrolli.

	* gcc.dg/vect/pr83202-1.c: New testcase.
	* gcc.dg/tree-ssa/pr61743-1.c: Adjust.

From-SVN: r255267
---
 gcc/ChangeLog                             | 10 ++++++++++
 gcc/testsuite/ChangeLog                   |  6 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c |  5 +++--
 gcc/testsuite/gcc.dg/vect/pr83202-1.c     | 19 +++++++++++++++++++
 gcc/tree-ssa-loop-ivcanon.c               | 14 ++++++++------
 5 files changed, 46 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr83202-1.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d1838f19d6e..6630c47869e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2017-11-30  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/83202
+	* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Add
+	allow_peel argument and guard peeling.
+	(canonicalize_loop_induction_variables): Likewise.
+	(canonicalize_induction_variables): Pass false.
+	(tree_unroll_loops_completely_1): Pass unroll_outer to disallow
+	peeling from cunrolli.
+
 2017-11-29  Segher Boessenkool  <segher@kernel.crashing.org>
 
 	* combine.c (try_combine): Print a message to dump file whenever
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5f237ae78c2..1b8cb2cddd9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2017-11-30  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/83202
+	* gcc.dg/vect/pr83202-1.c: New testcase.
+	* gcc.dg/tree-ssa/pr61743-1.c: Adjust.
+
 2017-11-29  Jim Wilson  <jimw@sifive.com>
 
 	* gcc.target/riscv/riscv.exp: New.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c
index f7cbda68324..669d357045c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c
@@ -48,5 +48,6 @@ int foo1 (e_u8 a[4][N], int b1, int b2, e_u8 b[M+1][4][N])
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 8 "cunroll" } } */
-/* { dg-final { scan-tree-dump-times "loop with 8 iterations completely unrolled" 2 "cunrolli" } } */
+/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 2 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 2 "cunroll" } } */
+/* { dg-final { scan-tree-dump-not "completely unrolled" "cunrolli" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr83202-1.c b/gcc/testsuite/gcc.dg/vect/pr83202-1.c
new file mode 100644
index 00000000000..33c83de29b8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr83202-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+void test(double data[8][8])
+{
+  for (int i = 0; i < 8; i++)
+    {
+      for (int j = 0; j < i; j+=4)
+	{
+	  data[i][j] *= data[i][j];
+	  data[i][j+1] *= data[i][j+1];
+	  data[i][j+2] *= data[i][j+2];
+	  data[i][j+3] *= data[i][j+3];
+	}
+    }
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+/* { dg-final { scan-tree-dump "ectorized 1 loops" "vect" } } */
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
index a32e12bee07..25193b48151 100644
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@@ -679,7 +679,7 @@ try_unroll_loop_completely (struct loop *loop,
 			    edge exit, tree niter,
 			    enum unroll_level ul,
 			    HOST_WIDE_INT maxiter,
-			    location_t locus)
+			    location_t locus, bool allow_peel)
 {
   unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
@@ -711,7 +711,8 @@ try_unroll_loop_completely (struct loop *loop,
     exit = NULL;
 
   /* See if we can improve our estimate by using recorded loop bounds.  */
-  if (maxiter >= 0
+  if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
+      && maxiter >= 0
       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
     {
       n_unroll = maxiter;
@@ -1139,7 +1140,7 @@ try_peel_loop (struct loop *loop,
 static bool
 canonicalize_loop_induction_variables (struct loop *loop,
 				       bool create_iv, enum unroll_level ul,
-				       bool try_eval)
+				       bool try_eval, bool allow_peel)
 {
   edge exit = NULL;
   tree niter;
@@ -1207,7 +1208,8 @@ canonicalize_loop_induction_variables (struct loop *loop,
      populates the loop bounds.  */
   modified |= remove_redundant_iv_tests (loop);
 
-  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
+  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus,
+				  allow_peel))
     return true;
 
   if (create_iv
@@ -1238,7 +1240,7 @@ canonicalize_induction_variables (void)
     {
       changed |= canonicalize_loop_induction_variables (loop,
 							true, UL_SINGLE_ITER,
-							true);
+							true, false);
     }
   gcc_assert (!need_ssa_update_p (cfun));
 
@@ -1353,7 +1355,7 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
     ul = UL_NO_GROWTH;
 
   if (canonicalize_loop_induction_variables
-        (loop, false, ul, !flag_tree_loop_ivcanon))
+        (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
     {
       /* If we'll continue unrolling, we need to propagate constants
 	 within the new basic blocks to fold away induction variable
-- 
2.30.2