From 2fd579ab530ca9ca5682eeba305c4946bf710c25 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Thu, 11 Apr 2019 07:30:59 +0000
Subject: [PATCH] re PR tree-optimization/90018 (r265453 miscompiled 527.cam4_r
 in SPEC CPU 2017)

2019-04-11  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/90018
	* tree-vect-data-refs.c (vect_preserves_scalar_order_p):
	Test both SLP and interleaving variants.

	* gcc.dg/vect/pr90018.c: New testcase.

From-SVN: r270273
---
 gcc/ChangeLog                       |  6 +++
 gcc/testsuite/ChangeLog             |  5 +++
 gcc/testsuite/gcc.dg/vect/pr90018.c | 52 +++++++++++++++++++++++
 gcc/tree-vect-data-refs.c           | 64 ++++++++++++++++++++++-------
 4 files changed, 112 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr90018.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a111272da86..f1ed98e6f66 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2019-04-11  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/90018
+	* tree-vect-data-refs.c (vect_preserves_scalar_order_p):
+	Test both SLP and interleaving variants.
+
 2019-04-11  Robin Dapp  <rdapp@linux.ibm.com>
 
 	* config/s390/8561.md: New file.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 71ae61702e3..2443e443215 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-04-11  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/90018
+	* gcc.dg/vect/pr90018.c: New testcase.
+
 2018-04-10  Steve Ellcey  <sellcey@marvell.com>
 
 	PR rtl-optimization/87763
diff --git a/gcc/testsuite/gcc.dg/vect/pr90018.c b/gcc/testsuite/gcc.dg/vect/pr90018.c
new file mode 100644
index 00000000000..d98b4c86c06
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr90018.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_double } */
+
+#include "tree-vect.h"
+
+void __attribute__((noinline,noclone))
+foo (double *a4, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      /* We may not apply interleaving to the group (a), (b) because of (c).
+         Instead group (d) and (b).  */
+      double tem1 = a4[i*4] + a4[i*4+n*4] /* (a) */;
+      double tem2 = a4[i*4+2*n*4+1];
+      a4[i*4+n*4+1] = tem1; /* (c) */
+      a4[i*4+1] = tem2;
+      double tem3 = a4[i*4] - tem2;
+      double tem4 = tem3 + a4[i*4+n*4] /* (d) */;
+      a4[i*4+n*4+1] = tem4 + a4[i*4+n*4+1] /* (b) */;
+    }
+}
+int main(int argc, char **argv)
+{
+  int n = 11;
+  double a4[4 * n * 3];
+  double a42[4 * n * 3];
+  check_vect ();
+  for (int i = 0; i < 4 * n * 3; ++i)
+    {
+      a4[i] = a42[i] = i;
+      __asm__ volatile ("": : : "memory");
+    }
+  foo (a4, n);
+  for (int i = 0; i < n; ++i)
+    {
+      double tem1 = a42[i*4] + a42[i*4+n*4];
+      double tem2 = a42[i*4+2*n*4+1];
+      a42[i*4+n*4+1] = tem1;
+      a42[i*4+1] = tem2;
+      double tem3 = a42[i*4] - tem2;
+      double tem4 = tem3 + a42[i*4+n*4];
+      a42[i*4+n*4+1] = tem4 + a42[i*4+n*4+1];
+      __asm__ volatile ("": : : "memory");
+    }
+  for (int i = 0; i < 4 * n * 3; ++i)
+    if (a4[i] != a42[i])
+      __builtin_abort ();
+  return 0;
+}
+
+/* For v2df we try to use SLP and fail miserably.  */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_sizes_32B_16B } } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 85997cf9617..d71a39ffd78 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -234,26 +234,60 @@ vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
     return true;
 
   /* STMT_A and STMT_B belong to overlapping groups.  All loads in a
-     group are emitted at the position of the last scalar load and all
-     stores in a group are emitted at the position of the last scalar store.
+     SLP group are emitted at the position of the last scalar load and
+     all loads in an interleaving group are emitted at the position
+     of the first scalar load.
+     Stores in a group are emitted at the position of the last scalar store.
      Compute that position and check whether the resulting order matches
-     the current one.  */
-  stmt_vec_info last_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
+     the current one.
+     We have not yet decided between SLP and interleaving so we have
+     to conservatively assume both.  */
+  stmt_vec_info il_a;
+  stmt_vec_info last_a = il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
   if (last_a)
-    for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s;
-	 s = DR_GROUP_NEXT_ELEMENT (s))
-      last_a = get_later_stmt (last_a, s);
+    {
+      for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s;
+	   s = DR_GROUP_NEXT_ELEMENT (s))
+	last_a = get_later_stmt (last_a, s);
+      if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
+	{
+	  for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
+	       s = DR_GROUP_NEXT_ELEMENT (s))
+	    if (get_later_stmt (il_a, s) == il_a)
+	      il_a = s;
+	}
+      else
+	il_a = last_a;
+    }
   else
-    last_a = stmtinfo_a;
-  stmt_vec_info last_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
+    last_a = il_a = stmtinfo_a;
+  stmt_vec_info il_b;
+  stmt_vec_info last_b = il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
   if (last_b)
-    for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s;
-	 s = DR_GROUP_NEXT_ELEMENT (s))
-      last_b = get_later_stmt (last_b, s);
+    {
+      for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s;
+	   s = DR_GROUP_NEXT_ELEMENT (s))
+	last_b = get_later_stmt (last_b, s);
+      if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
+	{
+	  for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
+	       s = DR_GROUP_NEXT_ELEMENT (s))
+	    if (get_later_stmt (il_b, s) == il_b)
+	      il_b = s;
+	}
+      else
+	il_b = last_b;
+    }
   else
-    last_b = stmtinfo_b;
-  return ((get_later_stmt (last_a, last_b) == last_a)
-	  == (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a));
+    last_b = il_b = stmtinfo_b;
+  bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
+  return (/* SLP */
+	  (get_later_stmt (last_a, last_b) == last_a) == a_after_b
+	  /* Interleaving */
+	  && (get_later_stmt (il_a, il_b) == il_a) == a_after_b
+	  /* Mixed */
+	  && (get_later_stmt (il_a, last_b) == il_a) == a_after_b
+	  && (get_later_stmt (last_a, il_b) == last_a) == a_after_b);
 }
 
 /* A subroutine of vect_analyze_data_ref_dependence.  Handle
-- 
2.30.2