This file is consumed by genmatch which produces gimple-match.c
and generic-match.c from it.
- Copyright (C) 2014-2019 Free Software Foundation, Inc.
+ Copyright (C) 2014-2020 Free Software Foundation, Inc.
Contributed by Richard Biener <rguenther@suse.de>
and Prathamesh Kulkarni <bilbotheelffriend@gmail.com>
(define_operator_list COND_TERNARY
IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
-/* As opposed to convert?, this still creates a single pattern, so
- it is not a suitable replacement for convert? in all cases. */
+/* With nop_convert? combine convert? and view_convert? in one pattern
+ plus conditionalize on tree_nop_conversion_p conversions. */
(match (nop_convert @0)
(convert @0)
(if (tree_nop_conversion_p (type, TREE_TYPE (@0)))))
&& known_eq (TYPE_VECTOR_SUBPARTS (type),
TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0)))
&& tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE (@0))))))
-/* This one has to be last, or it shadows the others. */
-(match (nop_convert @0)
- @0)
/* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR <x>
ABSU_EXPR returns unsigned absolute value of the operand and the operand
(with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
(convert (absu:utype @0)))))
+#if GIMPLE
+/* Optimize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) into abs (X). */
+(simplify
+ (bit_xor:c (plus:c @0 (rshift@2 @0 INTEGER_CST@1)) @2)
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
+ && wi::to_widest (@1) == element_precision (TREE_TYPE (@0)) - 1)
+ (abs @0)))
+#endif
/* Simplifications of operations with one constant operand and
simplifications to constants or single values. */
@0))
#endif
+/* ~(~X - Y) -> X + Y and ~(~X + Y) -> X - Y. */
+(simplify
+ (bit_not (minus (bit_not @0) @1))
+ (plus @0 @1))
+(simplify
+ (bit_not (plus:c (bit_not @0) @1))
+ (minus @0 @1))
+
/* x + (x & 1) -> (x + 1) & ~1 */
(simplify
(plus:c @0 (bit_and:s @0 integer_onep@1))
&& !TYPE_SATURATING (type))
(bit_ior @0 @1)))
+/* (x | y) - y -> (x & ~y) */
+(simplify
+ (minus (bit_ior:cs @0 @1) @1)
+ (bit_and @0 (bit_not @1)))
+
/* (x | y) - (x ^ y) -> x & y */
(simplify
(minus (bit_ior @0 @1) (bit_xor @0 @1))
(bit_xor (bit_ior:c (bit_not @0) @1) (bit_ior:c @0 (bit_not @1)))
(bit_xor @0 @1))
+/* ((x & y) - (x | y)) - 1 -> ~(x ^ y) */
+(simplify
+ (plus (nop_convert1? (minus@2 (nop_convert2? (bit_and:c @0 @1))
+ (nop_convert2? (bit_ior @0 @1))))
+ integer_all_onesp)
+ (if (!TYPE_OVERFLOW_SANITIZED (type) && !TYPE_OVERFLOW_TRAPS (type)
+ && !TYPE_SATURATING (type) && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@2))
+ && !TYPE_OVERFLOW_TRAPS (TREE_TYPE (@2))
+ && !TYPE_SATURATING (TREE_TYPE (@2)))
+ (bit_not (convert (bit_xor @0 @1)))))
+(simplify
+ (minus (nop_convert1? (plus@2 (nop_convert2? (bit_and:c @0 @1))
+ integer_all_onesp))
+ (nop_convert3? (bit_ior @0 @1)))
+ (if (!TYPE_OVERFLOW_SANITIZED (type) && !TYPE_OVERFLOW_TRAPS (type)
+ && !TYPE_SATURATING (type) && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@2))
+ && !TYPE_OVERFLOW_TRAPS (TREE_TYPE (@2))
+ && !TYPE_SATURATING (TREE_TYPE (@2)))
+ (bit_not (convert (bit_xor @0 @1)))))
+(simplify
+ (minus (nop_convert1? (bit_and @0 @1))
+ (nop_convert2? (plus@2 (nop_convert3? (bit_ior:c @0 @1))
+ integer_onep)))
+ (if (!TYPE_OVERFLOW_SANITIZED (type) && !TYPE_OVERFLOW_TRAPS (type)
+ && !TYPE_SATURATING (type) && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@2))
+ && !TYPE_OVERFLOW_TRAPS (TREE_TYPE (@2))
+ && !TYPE_SATURATING (TREE_TYPE (@2)))
+ (bit_not (convert (bit_xor @0 @1)))))
+
/* ~x & ~y -> ~(x | y)
~x | ~y -> ~(x & y) */
(for op (bit_and bit_ior)
We combine the above two cases by using a conditional convert. */
(for bitop (bit_and bit_ior bit_xor)
(simplify
- (bitop (convert @0) (convert? @1))
+ (bitop (convert@2 @0) (convert?@3 @1))
(if (((TREE_CODE (@1) == INTEGER_CST
&& INTEGRAL_TYPE_P (TREE_TYPE (@0))
&& int_fits_type_p (@1, TREE_TYPE (@0)))
|| GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT
/* Or if the precision of TO is not the same as the precision
of its mode. */
- || !type_has_mode_precision_p (type)))
- (convert (bitop @0 (convert @1))))))
+ || !type_has_mode_precision_p (type)
+ /* In GIMPLE, getting rid of 2 conversions for one new results
+ in smaller IL. */
+ || (GIMPLE
+ && TREE_CODE (@1) != INTEGER_CST
+ && tree_nop_conversion_p (type, TREE_TYPE (@0))
+ && single_use (@2)
+ && single_use (@3))))
+ (convert (bitop @0 (convert @1)))))
+ /* In GIMPLE, getting rid of 2 conversions for one new results
+ in smaller IL. */
+ (simplify
+ (convert (bitop:cs@2 (nop_convert:s @0) @1))
+ (if (GIMPLE
+ && TREE_CODE (@1) != INTEGER_CST
+ && tree_nop_conversion_p (type, TREE_TYPE (@2))
+ && types_match (type, @0))
+ (bitop @0 (convert @1)))))
(for bitop (bit_and bit_ior)
rbitop (bit_ior bit_and)
/* Convert - (~A) to A + 1. */
(simplify
- (negate (nop_convert (bit_not @0)))
+ (negate (nop_convert? (bit_not @0)))
(plus (view_convert @0) { build_each_one_cst (type); }))
/* Convert ~ (A - 1) or ~ (A + -1) to -A. */
/* Otherwise prefer ~(X ^ Y) to ~X ^ Y as more canonical. */
(simplify
- (bit_xor:c (nop_convert:s (bit_not:s @0)) @1)
+ (bit_xor:c (nop_convert?:s (bit_not:s @0)) @1)
(if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
(bit_not (bit_xor (view_convert @0) @1))))
(for cmp (gt lt ge le)
(simplify
(mult (convert (cmp @0 @1)) @2)
- (cond (cmp @0 @1) @2 { build_zero_cst (type); })))
+ (if (GIMPLE || !TREE_SIDE_EFFECTS (@2))
+ (cond (cmp @0 @1) @2 { build_zero_cst (type); }))))
/* For integral types with undefined overflow and C != 0 fold
x * C EQ/NE y * C into x EQ/NE y. */
/* For equality, this is also true with wrapping overflow. */
(for op (eq ne)
(simplify
- (op:c (nop_convert@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
+ (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
(if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
&& (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
|| TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
&& tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
(op @0 { build_zero_cst (TREE_TYPE (@0)); })))
(simplify
- (op:c (nop_convert@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0))
+ (op:c (nop_convert?@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0))
(if (tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0))
&& tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0))
&& (CONSTANT_CLASS_P (@1) || (single_use (@2) && single_use (@3))))
(if (ptr_difference_const (@0, @1, &diff))
{ build_int_cst_type (type, diff); }))))
+/* Canonicalize (T *)(ptr - ptr-cst) to &MEM[ptr + -ptr-cst]. */
+(simplify
+ (convert (pointer_diff @0 INTEGER_CST@1))
+ (if (POINTER_TYPE_P (type))
+ { build_fold_addr_expr_with_type
+ (build2 (MEM_REF, char_type_node, @0,
+ wide_int_to_tree (ptr_type_node, wi::neg (wi::to_wide (@1)))),
+ type); }))
+
/* If arg0 is derived from the address of an object or function, we may
be able to fold this expression using the object or function's
alignment. */
{ constant_boolean_node (false, type); })
)))))
+/* Convert (X == CST1) || (X OP2 CST2) to a known value
+ based on CST1 OP2 CST2. Similarly for (X != CST1). */
+
+(for code1 (eq ne)
+ (for code2 (eq ne lt gt le ge)
+ (simplify
+ (bit_ior:c (code1@3 @0 INTEGER_CST@1) (code2@4 @0 INTEGER_CST@2))
+ (with
+ {
+ int cmp = tree_int_cst_compare (@1, @2);
+ bool val;
+ switch (code2)
+ {
+ case EQ_EXPR: val = (cmp == 0); break;
+ case NE_EXPR: val = (cmp != 0); break;
+ case LT_EXPR: val = (cmp < 0); break;
+ case GT_EXPR: val = (cmp > 0); break;
+ case LE_EXPR: val = (cmp <= 0); break;
+ case GE_EXPR: val = (cmp >= 0); break;
+ default: gcc_unreachable ();
+ }
+ }
+ (switch
+ (if (code1 == EQ_EXPR && val) @4)
+ (if (code1 == NE_EXPR && val) { constant_boolean_node (true, type); })
+ (if (code1 == NE_EXPR && !val) @3))))))
+
+/* Convert (X OP1 CST1) || (X OP2 CST2). */
+
+(for code1 (lt le gt ge)
+ (for code2 (lt le gt ge)
+ (simplify
+ (bit_ior (code1@3 @0 INTEGER_CST@1) (code2@4 @0 INTEGER_CST@2))
+ (with
+ {
+ int cmp = tree_int_cst_compare (@1, @2);
+ }
+ (switch
+ /* Choose the more restrictive of two < or <= comparisons. */
+ (if ((code1 == LT_EXPR || code1 == LE_EXPR)
+ && (code2 == LT_EXPR || code2 == LE_EXPR))
+ (if ((cmp < 0) || (cmp == 0 && code1 == LT_EXPR))
+ @4
+ @3))
+ /* Likewise chose the more restrictive of two > or >= comparisons. */
+ (if ((code1 == GT_EXPR || code1 == GE_EXPR)
+ && (code2 == GT_EXPR || code2 == GE_EXPR))
+ (if ((cmp > 0) || (cmp == 0 && code1 == GT_EXPR))
+ @4
+ @3))
+ /* Check for singleton ranges. */
+ (if (cmp == 0
+ && ((code1 == LT_EXPR && code2 == GT_EXPR)
+ || (code1 == GT_EXPR && code2 == LT_EXPR)))
+ (ne @0 @2))
+ /* Check for disjoint ranges. */
+ (if (cmp >= 0
+ && (code1 == LT_EXPR || code1 == LE_EXPR)
+ && (code2 == GT_EXPR || code2 == GE_EXPR))
+ { constant_boolean_node (true, type); })
+ (if (cmp <= 0
+ && (code1 == GT_EXPR || code1 == GE_EXPR)
+ && (code2 == LT_EXPR || code2 == LE_EXPR))
+ { constant_boolean_node (true, type); })
+ )))))
+
/* We can't reassociate at all for saturating types. */
(if (!TYPE_SATURATING (type))
|| !HONOR_SIGN_DEPENDENT_ROUNDING (type)))
(convert (negate @1))))
(simplify
- (negate (nop_convert (negate @1)))
+ (negate (nop_convert? (negate @1)))
(if (!TYPE_OVERFLOW_SANITIZED (type)
&& !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
(view_convert @1)))
/* A - (A +- B) -> -+ B */
/* A +- (B -+ A) -> +- B */
(simplify
- (minus (plus:c @0 @1) @0)
- @1)
+ (minus (nop_convert1? (plus:c (nop_convert2? @0) @1)) @0)
+ (view_convert @1))
(simplify
- (minus (minus @0 @1) @0)
- (negate @1))
+ (minus (nop_convert1? (minus (nop_convert2? @0) @1)) @0)
+ (if (!ANY_INTEGRAL_TYPE_P (type)
+ || TYPE_OVERFLOW_WRAPS (type))
+ (negate (view_convert @1))
+ (view_convert (negate @1))))
(simplify
- (plus:c (minus @0 @1) @1)
- @0)
+ (plus:c (nop_convert1? (minus @0 (nop_convert2? @1))) @1)
+ (view_convert @0))
(simplify
- (minus @0 (plus:c @0 @1))
- (negate @1))
+ (minus @0 (nop_convert1? (plus:c (nop_convert2? @0) @1)))
+ (if (!ANY_INTEGRAL_TYPE_P (type)
+ || TYPE_OVERFLOW_WRAPS (type))
+ (negate (view_convert @1))
+ (view_convert (negate @1))))
(simplify
- (minus @0 (minus @0 @1))
- @1)
+ (minus @0 (nop_convert1? (minus (nop_convert2? @0) @1)))
+ (view_convert @1))
/* (A +- B) + (C - A) -> C +- B */
/* (A + B) - (A - C) -> B + C */
/* More cases are handled with comparisons. */
(for inner_op (plus minus)
neg_inner_op (minus plus)
(simplify
- (outer_op (nop_convert (inner_op @0 CONSTANT_CLASS_P@1))
+ (outer_op (nop_convert? (inner_op @0 CONSTANT_CLASS_P@1))
CONSTANT_CLASS_P@2)
/* If one of the types wraps, use that one. */
(if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
/* (CST1 - A) +- CST2 -> CST3 - A */
(for outer_op (plus minus)
(simplify
- (outer_op (minus CONSTANT_CLASS_P@1 @0) CONSTANT_CLASS_P@2)
- (with { tree cst = const_binop (outer_op, type, @1, @2); }
- (if (cst && !TREE_OVERFLOW (cst))
- (minus { cst; } @0)))))
-
- /* CST1 - (CST2 - A) -> CST3 + A */
+ (outer_op (nop_convert? (minus CONSTANT_CLASS_P@1 @0)) CONSTANT_CLASS_P@2)
+ /* If one of the types wraps, use that one. */
+ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
+ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse
+ forever if something doesn't simplify into a constant. */
+ (if (!CONSTANT_CLASS_P (@0))
+ (minus (outer_op (view_convert @1) @2) (view_convert @0)))
+ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+ (view_convert (minus (outer_op @1 (view_convert @2)) @0))
+ (if (types_match (type, @0))
+ (with { tree cst = const_binop (outer_op, type, @1, @2); }
+ (if (cst && !TREE_OVERFLOW (cst))
+ (minus { cst; } @0))))))))
+
+ /* CST1 - (CST2 - A) -> CST3 + A
+ Use view_convert because it is safe for vectors and equivalent for
+ scalars. */
(simplify
- (minus CONSTANT_CLASS_P@1 (minus CONSTANT_CLASS_P@2 @0))
- (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); }
- (if (cst && !TREE_OVERFLOW (cst))
- (plus { cst; } @0))))
+ (minus CONSTANT_CLASS_P@1 (nop_convert? (minus CONSTANT_CLASS_P@2 @0)))
+ /* If one of the types wraps, use that one. */
+ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
+ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse
+ forever if something doesn't simplify into a constant. */
+ (if (!CONSTANT_CLASS_P (@0))
+ (plus (view_convert @0) (minus @1 (view_convert @2))))
+ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+ (view_convert (plus @0 (minus (view_convert @1) @2)))
+ (if (types_match (type, @0))
+ (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); }
+ (if (cst && !TREE_OVERFLOW (cst))
+ (plus { cst; } @0)))))))
/* ((T)(A)) + CST -> (T)(A + CST) */
#if GIMPLE
max_ovf = wi::OVF_OVERFLOW;
tree inner_type = TREE_TYPE (@0);
- wide_int w1 = wide_int::from (wi::to_wide (@1), TYPE_PRECISION (inner_type),
- TYPE_SIGN (inner_type));
+ wide_int w1
+ = wide_int::from (wi::to_wide (@1), TYPE_PRECISION (inner_type),
+ TYPE_SIGN (inner_type));
wide_int wmin0, wmax0;
if (get_range_info (@0, &wmin0, &wmax0) == VR_RANGE)
)))
#endif
+/* ((T)(A + CST1)) + CST2 -> (T)(A) + (T)CST1 + CST2 */
+#if GIMPLE
+ (for op (plus minus)
+ (simplify
+ (plus (convert:s (op:s @0 INTEGER_CST@1)) INTEGER_CST@2)
+ (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+ && TREE_CODE (type) == INTEGER_TYPE
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+ && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_WRAPS (type))
+ (plus (convert @0) (op @2 (convert @1))))))
+#endif
+
/* ~A + A -> -1 */
(simplify
(plus:c (bit_not @0) @0)
(plusminus @0 (mult:c@3 @0 @2))
(if ((!ANY_INTEGRAL_TYPE_P (type)
|| TYPE_OVERFLOW_WRAPS (type)
+ /* For @0 + @0*@2 this transformation would introduce UB
+ (where there was none before) for @0 in [-1,0] and @2 max.
+ For @0 - @0*@2 this transformation would introduce UB
+ for @0 0 and @2 in [min,min+1] or @0 -1 and @2 min+1. */
|| (INTEGRAL_TYPE_P (type)
- && tree_expr_nonzero_p (@0)
- && expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
+ && ((tree_expr_nonzero_p (@0)
+ && expr_not_equal_to (@0,
+ wi::minus_one (TYPE_PRECISION (type))))
+ || (plusminus == PLUS_EXPR
+ ? expr_not_equal_to (@2,
+ wi::max_value (TYPE_PRECISION (type), SIGNED))
+ /* Let's ignore the @0 -1 and @2 min case. */
+ : (expr_not_equal_to (@2,
+ wi::min_value (TYPE_PRECISION (type), SIGNED))
+ && expr_not_equal_to (@2,
+ wi::min_value (TYPE_PRECISION (type), SIGNED)
+ + 1))))))
&& single_use (@3))
(mult (plusminus { build_one_cst (type); } @2) @0)))
(simplify
(plusminus (mult:c@3 @0 @2) @0)
(if ((!ANY_INTEGRAL_TYPE_P (type)
|| TYPE_OVERFLOW_WRAPS (type)
+ /* For @0*@2 + @0 this transformation would introduce UB
+ (where there was none before) for @0 in [-1,0] and @2 max.
+ For @0*@2 - @0 this transformation would introduce UB
+ for @0 0 and @2 min. */
|| (INTEGRAL_TYPE_P (type)
- && tree_expr_nonzero_p (@0)
- && expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
+ && ((tree_expr_nonzero_p (@0)
+ && (plusminus == MINUS_EXPR
+ || expr_not_equal_to (@0,
+ wi::minus_one (TYPE_PRECISION (type)))))
+ || expr_not_equal_to (@2,
+ (plusminus == PLUS_EXPR
+ ? wi::max_value (TYPE_PRECISION (type), SIGNED)
+ : wi::min_value (TYPE_PRECISION (type), SIGNED))))))
&& single_use (@3))
(mult (plusminus @2 { build_one_cst (type); }) @0))))))
+#if GIMPLE
+/* Canonicalize X + (X << C) into X * (1 + (1 << C)) and
+ (X << C1) + (X << C2) into X * ((1 << C1) + (1 << C2)). */
+(simplify
+ (plus:c @0 (lshift:s @0 INTEGER_CST@1))
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && tree_fits_uhwi_p (@1)
+ && tree_to_uhwi (@1) < element_precision (type))
+ (with { tree t = type;
+ if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t);
+ wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1),
+ element_precision (type));
+ w += 1;
+ tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t)
+ : t, w);
+ cst = build_uniform_cst (t, cst); }
+ (convert (mult (convert:t @0) { cst; })))))
+(simplify
+ (plus (lshift:s @0 INTEGER_CST@1) (lshift:s @0 INTEGER_CST@2))
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && tree_fits_uhwi_p (@1)
+ && tree_to_uhwi (@1) < element_precision (type)
+ && tree_fits_uhwi_p (@2)
+ && tree_to_uhwi (@2) < element_precision (type))
+ (with { tree t = type;
+ if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t);
+ unsigned int prec = element_precision (type);
+ wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1), prec);
+ w += wi::set_bit_in_zero (tree_to_uhwi (@2), prec);
+ tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t)
+ : t, w);
+ cst = build_uniform_cst (t, cst); }
+ (convert (mult (convert:t @0) { cst; })))))
+#endif
+
/* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */
(for minmax (min max FMIN_ALL FMAX_ALL)
(cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
(comb (cmp @0 @2) (cmp @1 @2))))
+/* Undo fancy way of writing max/min or other ?: expressions,
+ like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
+ People normally use ?: and that is what we actually try to optimize. */
+(for cmp (simple_comparison)
+ (simplify
+ (minus @0 (bit_and:c (minus @0 @1)
+ (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+ && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+ && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+ && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+ || !TYPE_UNSIGNED (TREE_TYPE (@4)))
+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+ (cond (cmp @2 @3) @1 @0)))
+ (simplify
+ (plus:c @0 (bit_and:c (minus @1 @0)
+ (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+ && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+ && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+ && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+ || !TYPE_UNSIGNED (TREE_TYPE (@4)))
+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+ (cond (cmp @2 @3) @1 @0)))
+ /* Similarly with ^ instead of - though in that case with :c. */
+ (simplify
+ (bit_xor:c @0 (bit_and:c (bit_xor:c @0 @1)
+ (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+ && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+ && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+ && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+ || !TYPE_UNSIGNED (TREE_TYPE (@4)))
+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+ (cond (cmp @2 @3) @1 @0))))
+
/* Simplifications of shift and rotates. */
(for rotate (lrotate rrotate)
/* Optimize (x >> c) << c into x & (-1<<c). */
(simplify
- (lshift (rshift @0 INTEGER_CST@1) @1)
+ (lshift (nop_convert? (rshift @0 INTEGER_CST@1)) @1)
(if (wi::ltu_p (wi::to_wide (@1), element_precision (type)))
- (bit_and @0 (lshift { build_minus_one_cst (type); } @1))))
+ /* It doesn't matter if the right shift is arithmetic or logical. */
+ (bit_and (view_convert @0) (lshift { build_minus_one_cst (type); } @1))))
+
+(simplify
+ (lshift (convert (convert@2 (rshift @0 INTEGER_CST@1))) @1)
+ (if (wi::ltu_p (wi::to_wide (@1), element_precision (type))
+ /* Allow intermediate conversion to integral type with whatever sign, as
+ long as the low TYPE_PRECISION (type)
+ - TYPE_PRECISION (TREE_TYPE (@2)) bits are preserved. */
+ && INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@2))
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))
+ && (TYPE_PRECISION (TREE_TYPE (@2)) >= TYPE_PRECISION (type)
+ || wi::geu_p (wi::to_wide (@1),
+ TYPE_PRECISION (type)
+ - TYPE_PRECISION (TREE_TYPE (@2)))))
+ (bit_and (convert @0) (lshift { build_minus_one_cst (type); } @1))))
/* Optimize (x << c) >> c into x & ((unsigned)-1 >> c) for unsigned
types. */
(cmp { tem; } @1)))))
/* Fold comparisons against built-in math functions. */
- (if (flag_unsafe_math_optimizations
- && ! flag_errno_math)
+ (if (flag_unsafe_math_optimizations && ! flag_errno_math)
(for sq (SQRT)
(simplify
(cmp (sq @0) REAL_CST@1)
if x is negative or NaN. Due to -funsafe-math-optimizations,
the results for other x follow from natural arithmetic. */
(cmp @0 @1)))
- (if (cmp == GT_EXPR || cmp == GE_EXPR)
+ (if ((cmp == LT_EXPR
+ || cmp == LE_EXPR
+ || cmp == GT_EXPR
+ || cmp == GE_EXPR)
+ && !REAL_VALUE_ISNAN (TREE_REAL_CST (@1))
+ /* Give up for -frounding-math. */
+ && !HONOR_SIGN_DEPENDENT_ROUNDING (TREE_TYPE (@0)))
(with
{
- REAL_VALUE_TYPE c2;
+ REAL_VALUE_TYPE c2;
+ enum tree_code ncmp = cmp;
+ const real_format *fmt
+ = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (@0)));
real_arithmetic (&c2, MULT_EXPR,
&TREE_REAL_CST (@1), &TREE_REAL_CST (@1));
- real_convert (&c2, TYPE_MODE (TREE_TYPE (@0)), &c2);
- }
- (if (REAL_VALUE_ISINF (c2))
- /* sqrt(x) > y is x == +Inf, when y is very large. */
- (if (HONOR_INFINITIES (@0))
- (eq @0 { build_real (TREE_TYPE (@0), c2); })
- { constant_boolean_node (false, type); })
- /* sqrt(x) > c is the same as x > c*c. */
- (cmp @0 { build_real (TREE_TYPE (@0), c2); }))))
- (if (cmp == LT_EXPR || cmp == LE_EXPR)
- (with
- {
- REAL_VALUE_TYPE c2;
- real_arithmetic (&c2, MULT_EXPR,
- &TREE_REAL_CST (@1), &TREE_REAL_CST (@1));
- real_convert (&c2, TYPE_MODE (TREE_TYPE (@0)), &c2);
+ real_convert (&c2, fmt, &c2);
+ /* See PR91734: if c2 is inexact and sqrt(c2) < c (or sqrt(c2) >= c),
+ then change LT_EXPR into LE_EXPR or GE_EXPR into GT_EXPR. */
+ if (!REAL_VALUE_ISINF (c2))
+ {
+ tree c3 = fold_const_call (CFN_SQRT, TREE_TYPE (@0),
+ build_real (TREE_TYPE (@0), c2));
+ if (c3 == NULL_TREE || TREE_CODE (c3) != REAL_CST)
+ ncmp = ERROR_MARK;
+ else if ((cmp == LT_EXPR || cmp == GE_EXPR)
+ && real_less (&TREE_REAL_CST (c3), &TREE_REAL_CST (@1)))
+ ncmp = cmp == LT_EXPR ? LE_EXPR : GT_EXPR;
+ else if ((cmp == LE_EXPR || cmp == GT_EXPR)
+ && real_less (&TREE_REAL_CST (@1), &TREE_REAL_CST (c3)))
+ ncmp = cmp == LE_EXPR ? LT_EXPR : GE_EXPR;
+ else
+ {
+ /* With rounding to even, sqrt of up to 3 different values
+ gives the same normal result, so in some cases c2 needs
+ to be adjusted. */
+ REAL_VALUE_TYPE c2alt, tow;
+ if (cmp == LT_EXPR || cmp == GE_EXPR)
+ tow = dconst0;
+ else
+ real_inf (&tow);
+ real_nextafter (&c2alt, fmt, &c2, &tow);
+ real_convert (&c2alt, fmt, &c2alt);
+ if (REAL_VALUE_ISINF (c2alt))
+ ncmp = ERROR_MARK;
+ else
+ {
+ c3 = fold_const_call (CFN_SQRT, TREE_TYPE (@0),
+ build_real (TREE_TYPE (@0), c2alt));
+ if (c3 == NULL_TREE || TREE_CODE (c3) != REAL_CST)
+ ncmp = ERROR_MARK;
+ else if (real_equal (&TREE_REAL_CST (c3),
+ &TREE_REAL_CST (@1)))
+ c2 = c2alt;
+ }
+ }
+ }
}
- (if (REAL_VALUE_ISINF (c2))
- (switch
- /* sqrt(x) < y is always true, when y is a very large
- value and we don't care about NaNs or Infinities. */
- (if (! HONOR_NANS (@0) && ! HONOR_INFINITIES (@0))
- { constant_boolean_node (true, type); })
- /* sqrt(x) < y is x != +Inf when y is very large and we
- don't care about NaNs. */
- (if (! HONOR_NANS (@0))
- (ne @0 { build_real (TREE_TYPE (@0), c2); }))
- /* sqrt(x) < y is x >= 0 when y is very large and we
- don't care about Infinities. */
- (if (! HONOR_INFINITIES (@0))
- (ge @0 { build_real (TREE_TYPE (@0), dconst0); }))
- /* sqrt(x) < y is x >= 0 && x != +Inf, when y is large. */
- (if (GENERIC)
- (truth_andif
- (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
- (ne @0 { build_real (TREE_TYPE (@0), c2); }))))
- /* sqrt(x) < c is the same as x < c*c, if we ignore NaNs. */
- (if (! HONOR_NANS (@0))
- (cmp @0 { build_real (TREE_TYPE (@0), c2); })
- /* sqrt(x) < c is the same as x >= 0 && x < c*c. */
- (if (GENERIC)
- (truth_andif
- (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
- (cmp @0 { build_real (TREE_TYPE (@0), c2); })))))))))
+ (if (cmp == GT_EXPR || cmp == GE_EXPR)
+ (if (REAL_VALUE_ISINF (c2))
+ /* sqrt(x) > y is x == +Inf, when y is very large. */
+ (if (HONOR_INFINITIES (@0))
+ (eq @0 { build_real (TREE_TYPE (@0), c2); })
+ { constant_boolean_node (false, type); })
+ /* sqrt(x) > c is the same as x > c*c. */
+ (if (ncmp != ERROR_MARK)
+ (if (ncmp == GE_EXPR)
+ (ge @0 { build_real (TREE_TYPE (@0), c2); })
+ (gt @0 { build_real (TREE_TYPE (@0), c2); }))))
+ /* else if (cmp == LT_EXPR || cmp == LE_EXPR) */
+ (if (REAL_VALUE_ISINF (c2))
+ (switch
+ /* sqrt(x) < y is always true, when y is a very large
+ value and we don't care about NaNs or Infinities. */
+ (if (! HONOR_NANS (@0) && ! HONOR_INFINITIES (@0))
+ { constant_boolean_node (true, type); })
+ /* sqrt(x) < y is x != +Inf when y is very large and we
+ don't care about NaNs. */
+ (if (! HONOR_NANS (@0))
+ (ne @0 { build_real (TREE_TYPE (@0), c2); }))
+ /* sqrt(x) < y is x >= 0 when y is very large and we
+ don't care about Infinities. */
+ (if (! HONOR_INFINITIES (@0))
+ (ge @0 { build_real (TREE_TYPE (@0), dconst0); }))
+ /* sqrt(x) < y is x >= 0 && x != +Inf, when y is large. */
+ (if (GENERIC)
+ (truth_andif
+ (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
+ (ne @0 { build_real (TREE_TYPE (@0), c2); }))))
+ /* sqrt(x) < c is the same as x < c*c, if we ignore NaNs. */
+ (if (ncmp != ERROR_MARK && ! HONOR_NANS (@0))
+ (if (ncmp == LT_EXPR)
+ (lt @0 { build_real (TREE_TYPE (@0), c2); })
+ (le @0 { build_real (TREE_TYPE (@0), c2); }))
+ /* sqrt(x) < c is the same as x >= 0 && x < c*c. */
+ (if (ncmp != ERROR_MARK && GENERIC)
+ (if (ncmp == LT_EXPR)
+ (truth_andif
+ (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
+ (lt @0 { build_real (TREE_TYPE (@0), c2); }))
+ (truth_andif
+ (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
+ (le @0 { build_real (TREE_TYPE (@0), c2); })))))))))))
/* Transform sqrt(x) cmp sqrt(y) -> x cmp y. */
(simplify
(cmp (sq @0) (sq @1))
{ constant_boolean_node (above ? false : true, type); }))))))))))))
(for cmp (eq ne)
- /* A local variable can never be pointed to by
- the default SSA name of an incoming parameter.
- SSA names are canonicalized to 2nd place. */
(simplify
+ /* SSA names are canonicalized to 2nd place. */
(cmp addr@0 SSA_NAME@1)
- (if (SSA_NAME_IS_DEFAULT_DEF (@1)
- && TREE_CODE (SSA_NAME_VAR (@1)) == PARM_DECL)
- (with { tree base = get_base_address (TREE_OPERAND (@0, 0)); }
- (if (TREE_CODE (base) == VAR_DECL
- && auto_var_in_fn_p (base, current_function_decl))
- (if (cmp == NE_EXPR)
- { constant_boolean_node (true, type); }
- { constant_boolean_node (false, type); }))))))
+ (with
+ { poly_int64 off; tree base; }
+ /* A local variable can never be pointed to by
+ the default SSA name of an incoming parameter. */
+ (if (SSA_NAME_IS_DEFAULT_DEF (@1)
+ && TREE_CODE (SSA_NAME_VAR (@1)) == PARM_DECL
+ && (base = get_base_address (TREE_OPERAND (@0, 0)))
+ && TREE_CODE (base) == VAR_DECL
+ && auto_var_in_fn_p (base, current_function_decl))
+ (if (cmp == NE_EXPR)
+ { constant_boolean_node (true, type); }
+ { constant_boolean_node (false, type); })
+ /* If the address is based on @1 decide using the offset. */
+ (if ((base = get_addr_base_and_unit_offset (TREE_OPERAND (@0, 0), &off))
+ && TREE_CODE (base) == MEM_REF
+ && TREE_OPERAND (base, 0) == @1)
+ (with { off += mem_ref_offset (base).force_shwi (); }
+ (if (known_ne (off, 0))
+ { constant_boolean_node (cmp == NE_EXPR, type); }
+ (if (known_eq (off, 0))
+ { constant_boolean_node (cmp == EQ_EXPR, type); }))))))))
/* Equality compare simplifications from fold_binary */
(for cmp (eq ne)
(simplify
(cmp (convert? addr@0) integer_zerop)
(if (tree_single_nonzero_warnv_p (@0, NULL))
- { constant_boolean_node (cmp == NE_EXPR, type); })))
+ { constant_boolean_node (cmp == NE_EXPR, type); }))
+
+ /* (X & C) op (Y & C) into (X ^ Y) & C op 0. */
+ (simplify
+ (cmp (bit_and:cs @0 @2) (bit_and:cs @1 @2))
+ (cmp (bit_and (bit_xor @0 @1) @2) { build_zero_cst (TREE_TYPE (@2)); })))
+
+/* (X < 0) != (Y < 0) into (X ^ Y) < 0.
+ (X >= 0) != (Y >= 0) into (X ^ Y) < 0.
+ (X < 0) == (Y < 0) into (X ^ Y) >= 0.
+ (X >= 0) == (Y >= 0) into (X ^ Y) >= 0. */
+(for cmp (eq ne)
+ ncmp (ge lt)
+ (for sgncmp (ge lt)
+ (simplify
+ (cmp (sgncmp @0 integer_zerop@2) (sgncmp @1 integer_zerop))
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
+ && types_match (@0, @1))
+ (ncmp (bit_xor @0 @1) @2)))))
+/* (X < 0) == (Y >= 0) into (X ^ Y) < 0.
+ (X < 0) != (Y >= 0) into (X ^ Y) >= 0. */
+(for cmp (eq ne)
+ ncmp (lt ge)
+ (simplify
+ (cmp:c (lt @0 integer_zerop@2) (ge @1 integer_zerop))
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
+ && types_match (@0, @1))
+ (ncmp (bit_xor @0 @1) @2))))
/* If we have (A & C) == C where C is a power of 2, convert this into
(A & C) != 0. Similarly for NE_EXPR. */
(cmp:c (minus@2 @0 @1) @0)
(if (single_use (@2)
&& ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
- && TYPE_UNSIGNED (TREE_TYPE (@0))
- && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+ && TYPE_UNSIGNED (TREE_TYPE (@0)))
(cmp @1 @0))))
+/* Optimize A - B + -1 >= A into B >= A for unsigned comparisons. */
+(for cmp (ge lt)
+ (simplify
+ (cmp:c (plus (minus @0 @1) integer_minus_onep) @0)
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && TYPE_UNSIGNED (TREE_TYPE (@0)))
+ (cmp @1 @0))))
+
/* Testing for overflow is unnecessary if we already know the result. */
/* A - B > A */
(for cmp (gt le)
(with { tree t = TREE_TYPE (@0), cpx = build_complex_type (t); }
(out (imagpart (IFN_MUL_OVERFLOW:cpx @0 @1)) { build_zero_cst (t); })))))
+/* Similarly, for unsigned operands, (((type) A * B) >> prec) != 0 where type
+ is at least twice as wide as type of A and B, simplify to
+ __builtin_mul_overflow (A, B, <unused>). */
+(for cmp (eq ne)
+ (simplify
+ (cmp (rshift (mult:s (convert@3 @0) (convert @1)) INTEGER_CST@2)
+ integer_zerop)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && INTEGRAL_TYPE_P (TREE_TYPE (@3))
+ && TYPE_UNSIGNED (TREE_TYPE (@0))
+ && (TYPE_PRECISION (TREE_TYPE (@3))
+ >= 2 * TYPE_PRECISION (TREE_TYPE (@0)))
+ && tree_fits_uhwi_p (@2)
+ && tree_to_uhwi (@2) == TYPE_PRECISION (TREE_TYPE (@0))
+ && types_match (@0, @1)
+ && type_has_mode_precision_p (TREE_TYPE (@0))
+ && (optab_handler (umulv4_optab, TYPE_MODE (TREE_TYPE (@0)))
+ != CODE_FOR_nothing))
+ (with { tree t = TREE_TYPE (@0), cpx = build_complex_type (t); }
+ (cmp (imagpart (IFN_MUL_OVERFLOW:cpx @0 @1)) { build_zero_cst (t); })))))
+
/* Simplification of math builtins. These rules must all be optimizations
as well as IL simplifications. If there is a possibility that the new
form could be a pessimization, the rule should go in the canonicalization
(rdiv (SIN:s @0) (COS:s @0))
(TAN @0))
+ /* Simplify sinh(x) / cosh(x) -> tanh(x). */
+ (simplify
+ (rdiv (SINH:s @0) (COSH:s @0))
+ (TANH @0))
+
+ /* Simplify tanh (x) / sinh (x) -> 1.0 / cosh (x). */
+ (simplify
+ (rdiv (TANH:s @0) (SINH:s @0))
+ (rdiv {build_one_cst (type);} (COSH @0)))
+
/* Simplify cos(x) / sin(x) -> 1 / tan(x). */
(simplify
(rdiv (COS:s @0) (SIN:s @0))
(if (elt < CONSTRUCTOR_NELTS (ctor))
(view_convert { CONSTRUCTOR_ELT (ctor, elt)->value; })
{ build_zero_cst (type); })
- {
- vec<constructor_elt, va_gc> *vals;
- vec_alloc (vals, count);
- for (unsigned i = 0;
- i < count && elt + i < CONSTRUCTOR_NELTS (ctor); ++i)
- CONSTRUCTOR_APPEND_ELT (vals, NULL_TREE,
- CONSTRUCTOR_ELT (ctor, elt + i)->value);
- build_constructor (type, vals);
- })))
+ /* We don't want to emit new CTORs unless the old one goes away.
+ ??? Eventually allow this if the CTOR ends up constant or
+ uniform. */
+ (if (single_use (@0))
+ {
+ vec<constructor_elt, va_gc> *vals;
+ vec_alloc (vals, count);
+ for (unsigned i = 0;
+ i < count && elt + i < CONSTRUCTOR_NELTS (ctor); ++i)
+ CONSTRUCTOR_APPEND_ELT (vals, NULL_TREE,
+ CONSTRUCTOR_ELT (ctor, elt + i)->value);
+ build_constructor (type, vals);
+ }))))
/* The bitfield references a single constructor element. */
(if (k.is_constant (&const_k)
&& idx + n <= (idx / const_k + 1) * const_k)
(cmp (popcount @0) integer_zerop)
(rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
+#if GIMPLE
+/* 64- and 32-bits branchless implementations of popcount are detected:
+
+ int popcount64c (uint64_t x)
+ {
+ x -= (x >> 1) & 0x5555555555555555ULL;
+ x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
+ x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+ return (x * 0x0101010101010101ULL) >> 56;
+ }
+
+ int popcount32c (uint32_t x)
+ {
+ x -= (x >> 1) & 0x55555555;
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x + (x >> 4)) & 0x0f0f0f0f;
+ return (x * 0x01010101) >> 24;
+ } */
+(simplify
+ (rshift
+ (mult
+ (bit_and
+ (plus:c
+ (rshift @8 INTEGER_CST@5)
+ (plus:c@8
+ (bit_and @6 INTEGER_CST@7)
+ (bit_and
+ (rshift
+ (minus@6 @0
+ (bit_and (rshift @0 INTEGER_CST@4) INTEGER_CST@11))
+ INTEGER_CST@10)
+ INTEGER_CST@9)))
+ INTEGER_CST@3)
+ INTEGER_CST@2)
+ INTEGER_CST@1)
+ /* Check constants and optab. */
+ (with { unsigned prec = TYPE_PRECISION (type);
+ int shift = (64 - prec) & 63;
+ unsigned HOST_WIDE_INT c1
+ = HOST_WIDE_INT_UC (0x0101010101010101) >> shift;
+ unsigned HOST_WIDE_INT c2
+ = HOST_WIDE_INT_UC (0x0F0F0F0F0F0F0F0F) >> shift;
+ unsigned HOST_WIDE_INT c3
+ = HOST_WIDE_INT_UC (0x3333333333333333) >> shift;
+ unsigned HOST_WIDE_INT c4
+ = HOST_WIDE_INT_UC (0x5555555555555555) >> shift;
+ }
+ (if (prec >= 16
+ && prec <= 64
+ && pow2p_hwi (prec)
+ && TYPE_UNSIGNED (type)
+ && integer_onep (@4)
+ && wi::to_widest (@10) == 2
+ && wi::to_widest (@5) == 4
+ && wi::to_widest (@1) == prec - 8
+ && tree_to_uhwi (@2) == c1
+ && tree_to_uhwi (@3) == c2
+ && tree_to_uhwi (@9) == c3
+ && tree_to_uhwi (@7) == c3
+ && tree_to_uhwi (@11) == c4
+ && direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+ OPTIMIZE_FOR_BOTH))
+ (convert (IFN_POPCOUNT:type @0)))))
+
+/* __builtin_ffs needs to deal on many targets with the possible zero
+ argument. If we know the argument is always non-zero, __builtin_ctz + 1
+ should lead to better code. */
+(simplify
+ (FFS tree_expr_nonzero_p@0)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0),
+ OPTIMIZE_FOR_SPEED))
+ (plus (CTZ:type @0) { build_one_cst (type); })))
+#endif
+
+(for ffs (BUILT_IN_FFS BUILT_IN_FFSL BUILT_IN_FFSLL
+ BUILT_IN_FFSIMAX)
+ /* __builtin_ffs (X) == 0 -> X == 0.
+ __builtin_ffs (X) == 6 -> (X & 63) == 32. */
+ (for cmp (eq ne)
+ (simplify
+ (cmp (ffs@2 @0) INTEGER_CST@1)
+ (with { int prec = TYPE_PRECISION (TREE_TYPE (@0)); }
+ (switch
+ (if (integer_zerop (@1))
+ (cmp @0 { build_zero_cst (TREE_TYPE (@0)); }))
+ (if (tree_int_cst_sgn (@1) < 0 || wi::to_widest (@1) > prec)
+ { constant_boolean_node (cmp == NE_EXPR ? true : false, type); })
+ (if (single_use (@2))
+ (cmp (bit_and @0 { wide_int_to_tree (TREE_TYPE (@0),
+ wi::mask (tree_to_uhwi (@1),
+ false, prec)); })
+ { wide_int_to_tree (TREE_TYPE (@0),
+ wi::shifted_mask (tree_to_uhwi (@1) - 1, 1,
+ false, prec)); }))))))
+
+ /* __builtin_ffs (X) > 6 -> X != 0 && (X & 63) == 0. */
+ (for cmp (gt le)
+ cmp2 (ne eq)
+ cmp3 (eq ne)
+ bit_op (bit_and bit_ior)
+ (simplify
+ (cmp (ffs@2 @0) INTEGER_CST@1)
+ (with { int prec = TYPE_PRECISION (TREE_TYPE (@0)); }
+ (switch
+ (if (integer_zerop (@1))
+ (cmp2 @0 { build_zero_cst (TREE_TYPE (@0)); }))
+ (if (tree_int_cst_sgn (@1) < 0)
+ { constant_boolean_node (cmp == GT_EXPR ? true : false, type); })
+ (if (wi::to_widest (@1) >= prec)
+ { constant_boolean_node (cmp == GT_EXPR ? false : true, type); })
+ (if (wi::to_widest (@1) == prec - 1)
+ (cmp3 @0 { wide_int_to_tree (TREE_TYPE (@0),
+ wi::shifted_mask (prec - 1, 1,
+ false, prec)); }))
+ (if (single_use (@2))
+ (bit_op (cmp2 @0 { build_zero_cst (TREE_TYPE (@0)); })
+ (cmp3 (bit_and @0
+ { wide_int_to_tree (TREE_TYPE (@0),
+ wi::mask (tree_to_uhwi (@1),
+ false, prec)); })
+ { build_zero_cst (TREE_TYPE (@0)); }))))))))
+
/* Simplify:
a = a1 op a2
|| TREE_CODE (cop1) == VECTOR_CST
|| TREE_CODE (cop1) == CONSTRUCTOR))
{
- if (sel.series_p (1, 1, nelts + 1, 1))
+ bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1);
+ if (insert_first_p)
{
/* After canonicalizing the first elt to come from the
first vector we only can insert the first elt from
if ((ins = fold_read_from_vector (cop0, sel[0])))
op0 = op1;
}
- else
+ /* The above can fail for two-element vectors which always
+ appear to insert the first element, so try inserting
+ into the second lane as well. For more than two
+ elements that's wasted time. */
+ if (!insert_first_p || (!ins && maybe_eq (nelts, 2u)))
{
unsigned int encoded_nelts = sel.encoding ().encoded_nelts ();
for (at = 0; at < encoded_nelts; ++at)
if (maybe_ne (sel[at], at))
break;
- if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1))
+ if (at < encoded_nelts
+ && (known_eq (at + 1, nelts)
+ || sel.series_p (at + 1, 1, at + 1, 1)))
{
- if (known_lt (at, nelts))
+ if (known_lt (poly_uint64 (sel[at]), nelts))
ins = fold_read_from_vector (cop0, sel[at]);
else
ins = fold_read_from_vector (cop1, sel[at] - nelts);
}
(if (ins)
(bit_insert { op0; } { ins; }
- { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); })
+ { bitsize_int (at * vector_element_bits (type)); })
(if (changed)
(vec_perm { op0; } { op1; } { op2; }))))))))))
(simplify
(vec_perm vec_same_elem_p@0 @0 @1)
@0)
+
+/* Match count trailing zeroes for simplify_count_trailing_zeroes in fwprop.
+ The canonical form is array[((x & -x) * C) >> SHIFT] where C is a magic
+ constant which when multiplied by a power of 2 contains a unique value
+ in the top 5 or 6 bits. This is then indexed into a table which maps it
+ to the number of trailing zeroes. */
+(match (ctz_table_index @1 @2 @3)
+ (rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3))