+2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+
+ PR tree-optimization/81635
+ * tree-data-ref.c (split_constant_offset_1): For types that
+ wrap on overflow, try to use range info to prove that wrapping
+ cannot occur.
+
2018-02-01 Renlin Li <renlin.li@arm.com>
PR target/83370
+2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+
+ PR tree-optimization/81635
+ * gcc.dg/vect/bb-slp-pr81635-1.c: New test.
+ * gcc.dg/vect/bb-slp-pr81635-2.c: Likewise.
+
2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
PR target/83370
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target lp64 } */
+
+void
+f1 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < 1000; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f2 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < ~0U - 4; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f3 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < ~0U - 3; i += 4)
+ {
+ double a = q[i + 2] + p[i + 2];
+ double b = q[i + 3] + p[i + 3];
+ q[i + 2] = a;
+ q[i + 3] = b;
+ }
+}
+
+void
+f4 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < 500; i += 6)
+ for (unsigned int j = 0; j < 500; j += 4)
+ {
+ double a = q[j] + p[i];
+ double b = q[j + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f5 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < 1000; i += 4)
+ {
+ double a = q[i - 2] + p[i - 2];
+ double b = q[i - 1] + p[i - 1];
+ q[i - 2] = a;
+ q[i - 1] = b;
+ }
+}
+
+double p[1000];
+double q[1000];
+
+void
+f6 (int n)
+{
+ for (unsigned int i = 0; i < n; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 6 "slp1" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target lp64 } */
+
+double p[1000];
+double q[1000];
+
+void
+f1 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < ~0U - 4; i += 4)
+ {
+ double a = q[i + 2] + p[i + 2];
+ double b = q[i + 3] + p[i + 3];
+ q[i + 2] = a;
+ q[i + 3] = b;
+ }
+}
+
+void
+f2 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < ~0U - 3; i += 4)
+ {
+ double a = q[i + 4] + p[i + 4];
+ double b = q[i + 5] + p[i + 5];
+ q[i + 4] = a;
+ q[i + 5] = b;
+ }
+}
+
+void
+f3 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < 1000; i += 4)
+ {
+ double a = q[i - 2] + p[i - 2];
+ double b = q[i - 1] + p[i - 1];
+ q[i - 2] = a;
+ q[i - 1] = b;
+ }
+}
+
+void
+f4 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < 1000; i += 4)
+ {
+ double a = q[i - 4] + p[i - 4];
+ double b = q[i - 3] + p[i - 3];
+ q[i - 4] = a;
+ q[i - 3] = b;
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */
and the outer precision is at least as large as the inner. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
- || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+ || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
{
- split_constant_offset (op0, &var0, off);
+ if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
+ {
+ /* Split the unconverted operand and try to prove that
+ wrapping isn't a problem. */
+ tree tmp_var, tmp_off;
+ split_constant_offset (op0, &tmp_var, &tmp_off);
+
+ /* See whether we have an SSA_NAME whose range is known
+ to be [A, B]. */
+ if (TREE_CODE (tmp_var) != SSA_NAME)
+ return false;
+ wide_int var_min, var_max;
+ if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
+ return false;
+
+ /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
+ is known to be [A + TMP_OFF, B + TMP_OFF], with all
+ operations done in ITYPE. The addition must overflow
+ at both ends of the range or at neither. */
+ bool overflow[2];
+ signop sgn = TYPE_SIGN (itype);
+ unsigned int prec = TYPE_PRECISION (itype);
+ wide_int woff = wi::to_wide (tmp_off, prec);
+ wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
+ wi::add (var_max, woff, sgn, &overflow[1]);
+ if (overflow[0] != overflow[1])
+ return false;
+
+ /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
+ widest_int diff = (widest_int::from (op0_min, sgn)
+ - widest_int::from (var_min, sgn));
+ var0 = tmp_var;
+ *off = wide_int_to_tree (ssizetype, diff);
+ }
+ else
+ split_constant_offset (op0, &var0, off);
*var = fold_convert (type, var0);
return true;
}