From: Jakub Jelinek Date: Tue, 12 Jan 2021 10:03:40 +0000 (+0100) Subject: reassoc: Optimize in reassoc x < 0 && y < 0 to (x | y) < 0 etc. [PR95731] X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=13d47c37a2c043f3e5981e73e4c82158a39f41e8;p=gcc.git reassoc: Optimize in reassoc x < 0 && y < 0 to (x | y) < 0 etc. [PR95731] We already had x != 0 && y != 0 to (x | y) != 0 and x != -1 && y != -1 to (x & y) != -1 and x < 32U && y < 32U to (x | y) < 32U, this patch adds signed x < 0 && y < 0 to (x | y) < 0. In that case, the low/high seem to be always the same and just in_p indices whether it is >= 0 or < 0, also, all types in the same bucket (same precision) should be type compatible, but we can have some >= 0 and some < 0 comparison mixed, so the patch handles that by using the right BIT_IOR_EXPR or BIT_AND_EXPR and doing one set of < 0 or >= 0 first, then BIT_NOT_EXPR and then the other one. I had to move optimize_range_tests_var_bound before this optimization because that one deals with signed a >= 0 && a < b, and limited it to the last reassoc pass as reassoc itself can't virtually undo this optimization yet (and not sure if vrp would be able to). 2021-01-12 Jakub Jelinek PR tree-optimization/95731 * tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize x < 0 && y < 0 && z < 0 into (x | y | z) < 0 for signed x, y, z. (optimize_range_tests): Call optimize_range_tests_cmp_bitwise only after optimize_range_tests_var_bound. * gcc.dg/tree-ssa/pr95731.c: New test. * gcc.c-torture/execute/pr95731.c: New test. --- diff --git a/gcc/testsuite/gcc.c-torture/execute/pr95731.c b/gcc/testsuite/gcc.c-torture/execute/pr95731.c new file mode 100644 index 00000000000..4d9593cd359 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr95731.c @@ -0,0 +1,40 @@ +/* PR tree-optimization/95731 */ + +__attribute__((noipa)) int +foo (int x, int y, int z, int w, long long u, long long v) +{ + return x >= 0 && y >= 0 && z < 0 && u < 0 && w >= 0 && v < 0; +} + +__attribute__((noipa)) int +bar (int x, int y, int z, int w, long long u, long long v) +{ + return u >= 0 && x >= 0 && y >= 0 && v < 0 && z >= 0 && w >= 0; +} + +__attribute__((noipa)) int +baz (int x, int y, int z, int w, long long u, long long v) +{ + return x >= 0 || u < 0 || y >= 0 || v < 0 || z >= 0 || w >= 0; +} + +int +main () +{ + int i; + for (i = 0; i < 64; i++) + { + int a = foo ((i & 1) ? -123 : 456, (i & 2) ? -123 : 456, + (i & 4) ? -123 : 456, (i & 8) ? -123 : 456, + (i & 16) ? -123 : 456, (i & 32) ? -123 : 456); + int b = bar ((i & 1) ? -123 : 456, (i & 2) ? -123 : 456, + (i & 4) ? -123 : 456, (i & 8) ? -123 : 456, + (i & 16) ? -123 : 456, (i & 32) ? -123 : 456); + int c = baz ((i & 1) ? -123 : 456, (i & 2) ? -123 : 456, + (i & 4) ? -123 : 456, (i & 8) ? -123 : 456, + (i & 16) ? -123 : 456, (i & 32) ? -123 : 456); + if (a != (i == 52) || b != (i == 32) || c != (i != 15)) + __builtin_abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95731.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95731.c new file mode 100644 index 00000000000..39889be6cbb --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95731.c @@ -0,0 +1,22 @@ +/* PR tree-optimization/95731 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times " >= 0\| < 0" 6 "optimized" } } */ + +int +foo (int x, int y, int z, int w, long long u, long long v) +{ + return x >= 0 && y >= 0 && z < 0 && u < 0 && w >= 0 && v < 0; +} + +int +bar (int x, int y, int z, int w, long long u, long long v) +{ + return u >= 0 && x >= 0 && y >= 0 && v < 0 && z >= 0 && w >= 0; +} + +int +baz (int x, int y, int z, int w, long long u, long long v) +{ + return x >= 0 || u < 0 || y >= 0 || v < 0 || z >= 0 || w >= 0; +} diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c index b6a09e5d9aa..8e2a4896d14 100644 --- a/gcc/tree-ssa-reassoc.c +++ b/gcc/tree-ssa-reassoc.c @@ -3320,7 +3320,8 @@ optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. Also, handle x < C && y < C && z < C where C is power of two as - (x | y | z) < C. */ + (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0 + as (x | y | z) < 0. */ static bool optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, @@ -3340,13 +3341,13 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, if (ranges[i].exp == NULL_TREE || TREE_CODE (ranges[i].exp) != SSA_NAME - || !ranges[i].in_p || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE) continue; if (ranges[i].low != NULL_TREE && ranges[i].high != NULL_TREE + && ranges[i].in_p && tree_int_cst_equal (ranges[i].low, ranges[i].high)) { idx = !integer_zerop (ranges[i].low); @@ -3354,7 +3355,8 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, continue; } else if (ranges[i].high != NULL_TREE - && TREE_CODE (ranges[i].high) == INTEGER_CST) + && TREE_CODE (ranges[i].high) == INTEGER_CST + && ranges[i].in_p) { wide_int w = wi::to_wide (ranges[i].high); int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)); @@ -3370,10 +3372,20 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, && integer_zerop (ranges[i].low)))) continue; } + else if (ranges[i].high == NULL_TREE + && ranges[i].low != NULL_TREE + /* Perform this optimization only in the last + reassoc pass, as it interferes with the reassociation + itself or could also with VRP etc. which might not + be able to virtually undo the optimization. */ + && !reassoc_insert_powi_p + && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) + && integer_zerop (ranges[i].low)) + idx = 3; else continue; - b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx; + b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx; if (buckets.length () <= b) buckets.safe_grow_cleared (b + 1, true); if (chains.length () <= (unsigned) i) @@ -3386,7 +3398,7 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, if (i && chains[i - 1]) { int j, k = i; - if ((b % 3) == 2) + if ((b % 4) == 2) { /* When ranges[X - 1].high + 1 is a power of two, we need to process the same bucket up to @@ -3439,6 +3451,19 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, { tree type = TREE_TYPE (ranges[j - 1].exp); strict_overflow_p |= ranges[j - 1].strict_overflow_p; + if ((b % 4) == 3) + { + /* For the signed < 0 cases, the types should be + really compatible (all signed with the same precision, + instead put ranges that have different in_p from + k first. */ + if (!useless_type_conversion_p (type1, type)) + continue; + if (ranges[j - 1].in_p != ranges[k - 1].in_p) + candidates.safe_push (&ranges[j - 1]); + type2 = type1; + continue; + } if (j == k || useless_type_conversion_p (type1, type)) ; @@ -3456,6 +3481,14 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, tree type = TREE_TYPE (ranges[j - 1].exp); if (j == k) continue; + if ((b % 4) == 3) + { + if (!useless_type_conversion_p (type1, type)) + continue; + if (ranges[j - 1].in_p == ranges[k - 1].in_p) + candidates.safe_push (&ranges[j - 1]); + continue; + } if (useless_type_conversion_p (type1, type)) ; else if (type2 == NULL_TREE @@ -3471,6 +3504,7 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, FOR_EACH_VEC_ELT (candidates, id, r) { gimple *g; + enum tree_code code; if (id == 0) { op = r->exp; @@ -3478,7 +3512,8 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, } if (id == l) { - g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op); + code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR; + g = gimple_build_assign (make_ssa_name (type1), code, op); gimple_seq_add_stmt_without_update (&seq, g); op = gimple_assign_lhs (g); } @@ -3490,21 +3525,24 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, gimple_seq_add_stmt_without_update (&seq, g); exp = gimple_assign_lhs (g); } + if ((b % 4) == 3) + code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR; + else + code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR; g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2), - (b % 3) == 1 - ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp); + code, op, exp); gimple_seq_add_stmt_without_update (&seq, g); op = gimple_assign_lhs (g); } candidates.pop (); if (update_range_test (&ranges[k - 1], NULL, candidates.address (), candidates.length (), opcode, ops, op, - seq, true, ranges[k - 1].low, + seq, ranges[k - 1].in_p, ranges[k - 1].low, ranges[k - 1].high, strict_overflow_p)) any_changes = true; else gimple_seq_discard (seq); - if ((b % 3) == 2 && buckets[b] != i) + if ((b % 4) == 2 && buckets[b] != i) /* There is more work to do for this bucket. */ b--; } @@ -3909,10 +3947,10 @@ optimize_range_tests (enum tree_code opcode, if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, ops, ranges); - any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, - ops, ranges); any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, ranges, first_bb); + any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, + ops, ranges); if (any_changes && opcode != ERROR_MARK) {