Use range info in split_constant_offset (PR 81635)

author Richard Sandiford <richard.sandiford@linaro.org>

Thu, 1 Feb 2018 14:17:07 +0000 (14:17 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Thu, 1 Feb 2018 14:17:07 +0000 (14:17 +0000)
author Richard Sandiford <richard.sandiford@linaro.org>
Thu, 1 Feb 2018 14:17:07 +0000 (14:17 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Thu, 1 Feb 2018 14:17:07 +0000 (14:17 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 8cec738653b15fe1044ea5751cb65012846568ad..e62b93d9a38a5db38bbc5f0689f5ed81da08258a 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2018-02-01  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       PR tree-optimization/81635
+       * tree-data-ref.c (split_constant_offset_1): For types that
+       wrap on overflow, try to use range info to prove that wrapping
+       cannot occur.
+
  2018-02-01  Renlin Li  <renlin.li@arm.com>
  
         PR target/83370
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 8fb5c321031badc55099d5ea7dac9538bf2c0157..3f2f44736115bc2e2cfc9adedc0b67331a77bae2 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2018-02-01  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       PR tree-optimization/81635
+       * gcc.dg/vect/bb-slp-pr81635-1.c: New test.
+       * gcc.dg/vect/bb-slp-pr81635-2.c: Likewise.
+
  2018-02-01  Richard Sandiford  <richard.sandiford@linaro.org>
  
         PR target/83370
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c

new file mode 100644 (file)

index 0000000..f024dc7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c
@@ -0,0 +1,92 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target lp64 } */
+
+void
+f1 (double *p, double *q)
+{
+  p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+  q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+  for (unsigned int i = 0; i < 1000; i += 4)
+    {
+      double a = q[i] + p[i];
+      double b = q[i + 1] + p[i + 1];
+      q[i] = a;
+      q[i + 1] = b;
+    }
+}
+
+void
+f2 (double *p, double *q)
+{
+  p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+  q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+  for (unsigned int i = 2; i < ~0U - 4; i += 4)
+    {
+      double a = q[i] + p[i];
+      double b = q[i + 1] + p[i + 1];
+      q[i] = a;
+      q[i + 1] = b;
+    }
+}
+
+void
+f3 (double *p, double *q)
+{
+  p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+  q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+  for (unsigned int i = 0; i < ~0U - 3; i += 4)
+    {
+      double a = q[i + 2] + p[i + 2];
+      double b = q[i + 3] + p[i + 3];
+      q[i + 2] = a;
+      q[i + 3] = b;
+    }
+}
+
+void
+f4 (double *p, double *q)
+{
+  p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+  q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+  for (unsigned int i = 0; i < 500; i += 6)
+    for (unsigned int j = 0; j < 500; j += 4)
+      {
+       double a = q[j] + p[i];
+       double b = q[j + 1] + p[i + 1];
+       q[i] = a;
+       q[i + 1] = b;
+      }
+}
+
+void
+f5 (double *p, double *q)
+{
+  p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+  q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+  for (unsigned int i = 2; i < 1000; i += 4)
+    {
+      double a = q[i - 2] + p[i - 2];
+      double b = q[i - 1] + p[i - 1];
+      q[i - 2] = a;
+      q[i - 1] = b;
+    }
+}
+
+double p[1000];
+double q[1000];
+
+void
+f6 (int n)
+{
+  for (unsigned int i = 0; i < n; i += 4)
+    {
+      double a = q[i] + p[i];
+      double b = q[i + 1] + p[i + 1];
+      q[i] = a;
+      q[i + 1] = b;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 6 "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c

new file mode 100644 (file)

index 0000000..11e8f0f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target lp64 } */
+
+double p[1000];
+double q[1000];
+
+void
+f1 (double *p, double *q)
+{
+  p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+  q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+  for (unsigned int i = 2; i < ~0U - 4; i += 4)
+    {
+      double a = q[i + 2] + p[i + 2];
+      double b = q[i + 3] + p[i + 3];
+      q[i + 2] = a;
+      q[i + 3] = b;
+    }
+}
+
+void
+f2 (double *p, double *q)
+{
+  p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+  q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+  for (unsigned int i = 0; i < ~0U - 3; i += 4)
+    {
+      double a = q[i + 4] + p[i + 4];
+      double b = q[i + 5] + p[i + 5];
+      q[i + 4] = a;
+      q[i + 5] = b;
+    }
+}
+
+void
+f3 (double *p, double *q)
+{
+  p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+  q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+  for (unsigned int i = 0; i < 1000; i += 4)
+    {
+      double a = q[i - 2] + p[i - 2];
+      double b = q[i - 1] + p[i - 1];
+      q[i - 2] = a;
+      q[i - 1] = b;
+    }
+}
+
+void
+f4 (double *p, double *q)
+{
+  p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+  q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+  for (unsigned int i = 2; i < 1000; i += 4)
+    {
+      double a = q[i - 4] + p[i - 4];
+      double b = q[i - 3] + p[i - 3];
+      q[i - 4] = a;
+      q[i - 3] = b;
+    }
+}
+
+/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c

index b5c0b7f4281a566d292edda405485e222304e20d..f3070d3a1188c9bc2821580ab5905cf1a55ab0c7 100644 (file)
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -705,11 +705,46 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
            and the outer precision is at least as large as the inner.  */
         tree itype = TREE_TYPE (op0);
         if ((POINTER_TYPE_P (itype)
-            || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+            || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
           {
-           split_constant_offset (op0, &var0, off);
+           if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
+             {
+               /* Split the unconverted operand and try to prove that
+                  wrapping isn't a problem.  */
+               tree tmp_var, tmp_off;
+               split_constant_offset (op0, &tmp_var, &tmp_off);
+
+               /* See whether we have an SSA_NAME whose range is known
+                  to be [A, B].  */
+               if (TREE_CODE (tmp_var) != SSA_NAME)
+                 return false;
+               wide_int var_min, var_max;
+               if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
+                 return false;
+
+               /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
+                  is known to be [A + TMP_OFF, B + TMP_OFF], with all
+                  operations done in ITYPE.  The addition must overflow
+                  at both ends of the range or at neither.  */
+               bool overflow[2];
+               signop sgn = TYPE_SIGN (itype);
+               unsigned int prec = TYPE_PRECISION (itype);
+               wide_int woff = wi::to_wide (tmp_off, prec);
+               wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
+               wi::add (var_max, woff, sgn, &overflow[1]);
+               if (overflow[0] != overflow[1])
+                 return false;
+
+               /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
+               widest_int diff = (widest_int::from (op0_min, sgn)
+                                  - widest_int::from (var_min, sgn));
+               var0 = tmp_var;
+               *off = wide_int_to_tree (ssizetype, diff);
+             }
+           else
+             split_constant_offset (op0, &var0, off);
             *var = fold_convert (type, var0);
             return true;
           }
author	Richard Sandiford <richard.sandiford@linaro.org>
	Thu, 1 Feb 2018 14:17:07 +0000 (14:17 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Thu, 1 Feb 2018 14:17:07 +0000 (14:17 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c	[new file with mode: 0644]	patch \| blob
gcc/tree-data-ref.c		patch \| blob \| history