+2018-02-08 Richard Sandiford <richard.sandiford@linaro.org>
+
+ PR tree-optimization/81635
+ * wide-int.h (wi::round_down_for_mask, wi::round_up_for_mask): Declare.
+ * wide-int.cc (wi::round_down_for_mask, wi::round_up_for_mask)
+ (test_round_for_mask): New functions.
+ (wide_int_cc_tests): Call test_round_for_mask.
+ * tree-vrp.h (intersect_range_with_nonzero_bits): Declare.
+ * tree-vrp.c (intersect_range_with_nonzero_bits): New function.
+ * tree-data-ref.c (split_constant_offset_1): Use it to refine the
+ range returned by get_range_info.
+
2018-02-08 Jan Hubicka <hubicka@ucw.cz>
PR ipa/81360
+2018-02-08 Richard Sandiford <richard.sandiford@linaro.org>
+
+ PR tree-optimization/81635
+ * gcc.dg/vect/bb-slp-pr81635-3.c: New test.
+ * gcc.dg/vect/bb-slp-pr81635-4.c: Likewise.
+
2018-02-08 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* gcc.target/s390/nobp-function-pointer-attr.c: New test.
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target lp64 } */
+
+void
+f1 (double *p, double *q, unsigned int n)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < n; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f2 (double *p, double *q, unsigned int n)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < n; i += 2)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f3 (double *p, double *q, unsigned int n)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < n; i += 6)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f4 (double *p, double *q, unsigned int start, unsigned int n)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = start & -2; i < n; i += 2)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 4 "slp1" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target lp64 } */
+
+void
+f1 (double *p, double *q, unsigned int n)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < n; i += 1)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f2 (double *p, double *q, unsigned int n)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < n; i += 3)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f3 (double *p, double *q, unsigned int start, unsigned int n)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = start; i < n; i += 2)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */
if (TREE_CODE (tmp_var) != SSA_NAME)
return false;
wide_int var_min, var_max;
- if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
+ value_range_type vr_type = get_range_info (tmp_var, &var_min,
+ &var_max);
+ wide_int var_nonzero = get_nonzero_bits (tmp_var);
+ signop sgn = TYPE_SIGN (itype);
+ if (intersect_range_with_nonzero_bits (vr_type, &var_min,
+ &var_max, var_nonzero,
+ sgn) != VR_RANGE)
return false;
/* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
operations done in ITYPE. The addition must overflow
at both ends of the range or at neither. */
bool overflow[2];
- signop sgn = TYPE_SIGN (itype);
unsigned int prec = TYPE_PRECISION (itype);
wide_int woff = wi::to_wide (tmp_off, prec);
wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
&& operand_equal_p (val, type_min, 0)));
}
+/* VR_TYPE describes a range with mininum value *MIN and maximum
+ value *MAX. Restrict the range to the set of values that have
+ no bits set outside NONZERO_BITS. Update *MIN and *MAX and
+ return the new range type.
+
+ SGN gives the sign of the values described by the range. */
+
+enum value_range_type
+intersect_range_with_nonzero_bits (enum value_range_type vr_type,
+ wide_int *min, wide_int *max,
+ const wide_int &nonzero_bits,
+ signop sgn)
+{
+ if (vr_type == VR_RANGE)
+ {
+ *max = wi::round_down_for_mask (*max, nonzero_bits);
+
+ /* Check that the range contains at least one valid value. */
+ if (wi::gt_p (*min, *max, sgn))
+ return VR_UNDEFINED;
+
+ *min = wi::round_up_for_mask (*min, nonzero_bits);
+ gcc_checking_assert (wi::le_p (*min, *max, sgn));
+ }
+ if (vr_type == VR_ANTI_RANGE)
+ {
+ *max = wi::round_up_for_mask (*max, nonzero_bits);
+
+ /* If the calculation wrapped, we now have a VR_RANGE whose
+ lower bound is *MAX and whose upper bound is *MIN. */
+ if (wi::gt_p (*min, *max, sgn))
+ {
+ std::swap (*min, *max);
+ *max = wi::round_down_for_mask (*max, nonzero_bits);
+ gcc_checking_assert (wi::le_p (*min, *max, sgn));
+ return VR_RANGE;
+ }
+
+ *min = wi::round_down_for_mask (*min, nonzero_bits);
+ gcc_checking_assert (wi::le_p (*min, *max, sgn));
+
+ /* Check whether we now have an empty set of values. */
+ if (*min - 1 == *max)
+ return VR_UNDEFINED;
+ }
+ return vr_type;
+}
/* Set value range VR to VR_UNDEFINED. */
tree op0_type);
extern bool vrp_operand_equal_p (const_tree, const_tree);
+extern enum value_range_type intersect_range_with_nonzero_bits
+ (enum value_range_type, wide_int *, wide_int *, const wide_int &, signop);
struct assert_info
{
return only_sign_bit_p (x, x.precision);
}
+/* Return VAL if VAL has no bits set outside MASK. Otherwise round VAL
+ down to the previous value that has no bits set outside MASK.
+ This rounding wraps for signed values if VAL is negative and
+ the top bit of MASK is clear.
+
+ For example, round_down_for_mask (6, 0xf1) would give 1 and
+ round_down_for_mask (24, 0xf1) would give 17. */
+
+wide_int
+wi::round_down_for_mask (const wide_int &val, const wide_int &mask)
+{
+ /* Get the bits in VAL that are outside the mask. */
+ wide_int extra_bits = wi::bit_and_not (val, mask);
+ if (extra_bits == 0)
+ return val;
+
+ /* Get a mask that includes the top bit in EXTRA_BITS and is all 1s
+ below that bit. */
+ unsigned int precision = val.get_precision ();
+ wide_int lower_mask = wi::mask (precision - wi::clz (extra_bits),
+ false, precision);
+
+ /* Clear the bits that aren't in MASK, but ensure that all bits
+ in MASK below the top cleared bit are set. */
+ return (val & mask) | (mask & lower_mask);
+}
+
+/* Return VAL if VAL has no bits set outside MASK. Otherwise round VAL
+ up to the next value that has no bits set outside MASK. The rounding
+ wraps if there are no suitable values greater than VAL.
+
+ For example, round_up_for_mask (6, 0xf1) would give 16 and
+ round_up_for_mask (24, 0xf1) would give 32. */
+
+wide_int
+wi::round_up_for_mask (const wide_int &val, const wide_int &mask)
+{
+ /* Get the bits in VAL that are outside the mask. */
+ wide_int extra_bits = wi::bit_and_not (val, mask);
+ if (extra_bits == 0)
+ return val;
+
+ /* Get a mask that is all 1s above the top bit in EXTRA_BITS. */
+ unsigned int precision = val.get_precision ();
+ wide_int upper_mask = wi::mask (precision - wi::clz (extra_bits),
+ true, precision);
+
+ /* Get the bits of the mask that are above the top bit in EXTRA_BITS. */
+ upper_mask &= mask;
+
+ /* Conceptually we need to:
+
+ - clear bits of VAL outside UPPER_MASK
+ - add the lowest bit in UPPER_MASK to VAL (or add 0 if UPPER_MASK is 0)
+ - propagate the carry through the bits of VAL in UPPER_MASK
+
+ If (~VAL & UPPER_MASK) is nonzero, the carry eventually
+ reaches that bit and the process leaves all lower bits clear.
+ If (~VAL & UPPER_MASK) is zero then the result is also zero. */
+ wide_int tmp = wi::bit_and_not (upper_mask, val);
+
+ return (val | tmp) & -tmp;
+}
+
/*
* Private utilities.
*/
}
}
+/* Test the round_{down,up}_for_mask functions. */
+
+static void
+test_round_for_mask ()
+{
+ unsigned int prec = 18;
+ ASSERT_EQ (17, wi::round_down_for_mask (wi::shwi (17, prec),
+ wi::shwi (0xf1, prec)));
+ ASSERT_EQ (17, wi::round_up_for_mask (wi::shwi (17, prec),
+ wi::shwi (0xf1, prec)));
+
+ ASSERT_EQ (1, wi::round_down_for_mask (wi::shwi (6, prec),
+ wi::shwi (0xf1, prec)));
+ ASSERT_EQ (16, wi::round_up_for_mask (wi::shwi (6, prec),
+ wi::shwi (0xf1, prec)));
+
+ ASSERT_EQ (17, wi::round_down_for_mask (wi::shwi (24, prec),
+ wi::shwi (0xf1, prec)));
+ ASSERT_EQ (32, wi::round_up_for_mask (wi::shwi (24, prec),
+ wi::shwi (0xf1, prec)));
+
+ ASSERT_EQ (0x011, wi::round_down_for_mask (wi::shwi (0x22, prec),
+ wi::shwi (0x111, prec)));
+ ASSERT_EQ (0x100, wi::round_up_for_mask (wi::shwi (0x22, prec),
+ wi::shwi (0x111, prec)));
+
+ ASSERT_EQ (100, wi::round_down_for_mask (wi::shwi (101, prec),
+ wi::shwi (0xfc, prec)));
+ ASSERT_EQ (104, wi::round_up_for_mask (wi::shwi (101, prec),
+ wi::shwi (0xfc, prec)));
+
+ ASSERT_EQ (0x2bc, wi::round_down_for_mask (wi::shwi (0x2c2, prec),
+ wi::shwi (0xabc, prec)));
+ ASSERT_EQ (0x800, wi::round_up_for_mask (wi::shwi (0x2c2, prec),
+ wi::shwi (0xabc, prec)));
+
+ ASSERT_EQ (0xabc, wi::round_down_for_mask (wi::shwi (0xabd, prec),
+ wi::shwi (0xabc, prec)));
+ ASSERT_EQ (0, wi::round_up_for_mask (wi::shwi (0xabd, prec),
+ wi::shwi (0xabc, prec)));
+
+ ASSERT_EQ (0xabc, wi::round_down_for_mask (wi::shwi (0x1000, prec),
+ wi::shwi (0xabc, prec)));
+ ASSERT_EQ (0, wi::round_up_for_mask (wi::shwi (0x1000, prec),
+ wi::shwi (0xabc, prec)));
+}
+
/* Run all of the selftests within this file, for all value types. */
void
run_all_wide_int_tests <offset_int> ();
run_all_wide_int_tests <widest_int> ();
test_overflow ();
+ test_round_for_mask ();
}
} // namespace selftest
wide_int set_bit_in_zero (unsigned int, unsigned int);
wide_int insert (const wide_int &x, const wide_int &y, unsigned int,
unsigned int);
+ wide_int round_down_for_mask (const wide_int &, const wide_int &);
+ wide_int round_up_for_mask (const wide_int &, const wide_int &);
template <typename T>
T mask (unsigned int, bool);