From bc071d3a951a98284a3f46043afd44c03c123376 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Wed, 15 Jan 2020 15:23:54 +0000 Subject: [PATCH] Fix ctz issues (PR93231) Further improve the ctz recognition: Avoid ICEing on negative shift counts or multiply constants. Check the type is a char type for the string constant case to avoid accidentally matching a wide STRING_CST. Add a tree_expr_nonzero_p check to allow the optimization even if CTZ_DEFINED_VALUE_AT_ZERO returns 0 or 1. Add extra test cases. Bootstrap OK on AArch64 and x64. gcc/ PR tree-optimization/93231 * tree-ssa-forwprop.c (optimize_count_trailing_zeroes): Check input_type is unsigned. Use tree_to_shwi for shift constant. Check CST_STRING element size is CHAR_TYPE_SIZE bits. (simplify_count_trailing_zeroes): Add test to handle known non-zero inputs more efficiently. testsuite/ PR tree-optimization/93231 * gcc.dg/pr90838.c: New test. * gcc.dg/pr93231.c: New test. * gcc.target/aarch64/pr90838.c: Use #define u 0. --- gcc/ChangeLog | 9 ++++ gcc/testsuite/ChangeLog | 6 +++ gcc/testsuite/gcc.dg/pr90838.c | 59 ++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr93231.c | 35 +++++++++++++ gcc/testsuite/gcc.target/aarch64/pr90838.c | 2 +- gcc/tree-ssa-forwprop.c | 27 ++++++---- 6 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr90838.c create mode 100644 gcc/testsuite/gcc.dg/pr93231.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 471620eafdc..76ef4d2305e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2020-01-15 Wilco Dijkstra + + PR tree-optimization/93231 + * tree-ssa-forwprop.c (optimize_count_trailing_zeroes): Check + input_type is unsigned. Use tree_to_shwi for shift constant. + Check CST_STRING element size is CHAR_TYPE_SIZE bits. + (simplify_count_trailing_zeroes): Add test to handle known non-zero + inputs more efficiently. + 2020-01-15 Uroš Bizjak * config/i386/i386.md (*movsf_internal): Do not require diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index da638d35809..769020d67ca 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2020-01-15 Wilco Dijkstra + + * gcc.dg/pr90838.c: New test. + * gcc.dg/pr93231.c: New test. + * gcc.target/aarch64/pr90838.c: Use #define u 0. + 2020-01-15 Richard Biener PR middle-end/93273 diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c new file mode 100644 index 00000000000..41c5dab9a5c --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr90838.c @@ -0,0 +1,59 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop2-details" } */ + +int ctz1 (unsigned x) +{ + static const char table[32] = "\x00\x01\x1c\x02\x1d\x0e\x18\x03\x1e\x16\x14" + "\x0f\x19\x11\x04\b\x1f\x1b\r\x17\x15\x13\x10\x07\x1a\f\x12\x06\v\x05\n\t"; + + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; +} + +int ctz2 (unsigned x) +{ +#define u 0 + static short table[64] = + { + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u + }; + + x = (x & -x) * 0x0450FBAF; + return table[x >> 26]; +} + +int ctz3 (unsigned x) +{ + static int table[32] = + { + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 + }; + + if (x == 0) return 32; + x = (x & -x) * 0x04D7651F; + return table[x >> 27]; +} + +static const unsigned long long magic = 0x03f08c5392f756cdULL; + +static const char table[64] = { + 0, 1, 12, 2, 13, 22, 17, 3, + 14, 33, 23, 36, 18, 58, 28, 4, + 62, 15, 34, 26, 24, 48, 50, 37, + 19, 55, 59, 52, 29, 44, 39, 5, + 63, 11, 21, 16, 32, 35, 57, 27, + 61, 25, 47, 49, 54, 51, 43, 38, + 10, 20, 31, 56, 60, 46, 53, 42, + 9, 30, 45, 41, 8, 40, 7, 6, +}; + +int ctz4 (unsigned long x) +{ + unsigned long lsb = x & -x; + return table[(lsb * magic) >> 58]; +} + +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/pr93231.c b/gcc/testsuite/gcc.dg/pr93231.c new file mode 100644 index 00000000000..cd0b3f320f7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr93231.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop2-details -Wno-shift-count-negative" } */ + +int ctz_ice1 (int x) +{ + static const char table[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + return table[((int)((x & -x) * -0x077CB531)) >> 27]; +} + +int ctz_ice2 (unsigned x) +{ + static const char table[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + return table[((unsigned)((x & -x) * 0x077CB531U)) >> -27]; +} + +// This should never match +int ctz_fail (unsigned x) +{ + static const unsigned short int table[32] = + u"\x0100\x021c\x0e1d\x0318\x161e\x0f14\x1119\x0804\x1b1f\x170d\x1315\x0710\x0c1a\x0612\x050b\x090a"; + + return table[((x & -x) * 0x077CB531) >> 27]; +} + +/* { dg-final { scan-tree-dump-not {= \.CTZ} "forwprop2" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr90838.c b/gcc/testsuite/gcc.target/aarch64/pr90838.c index bff3144c0d1..e1e19ac6a61 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr90838.c +++ b/gcc/testsuite/gcc.target/aarch64/pr90838.c @@ -14,7 +14,7 @@ int ctz1 (unsigned x) int ctz2 (unsigned x) { - const int u = 0; +#define u 0 static short table[64] = { 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 56c470f6ecf..d63e87c8a5b 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -1864,9 +1864,9 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc, tree input_type = TREE_TYPE (x); unsigned input_bits = tree_to_shwi (TYPE_SIZE (input_type)); - /* Check the array is not wider than integer type and the input is a 32-bit - or 64-bit type. */ - if (TYPE_PRECISION (type) > 32) + /* Check the array element type is not wider than 32 bits and the input is + an unsigned 32-bit or 64-bit type. */ + if (TYPE_PRECISION (type) > 32 || !TYPE_UNSIGNED (input_type)) return false; if (input_bits != 32 && input_bits != 64) return false; @@ -1879,7 +1879,7 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc, if (!low || !integer_zerop (low)) return false; - unsigned shiftval = tree_to_uhwi (tshift); + unsigned shiftval = tree_to_shwi (tshift); /* Check the shift extracts the top 5..7 bits. */ if (shiftval < input_bits - 7 || shiftval > input_bits - 5) @@ -1894,7 +1894,8 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc, if (TREE_CODE (ctor) == CONSTRUCTOR) return check_ctz_array (ctor, val, zero_val, shiftval, input_bits); - if (TREE_CODE (ctor) == STRING_CST) + if (TREE_CODE (ctor) == STRING_CST + && TYPE_PRECISION (type) == CHAR_TYPE_SIZE) return check_ctz_string (ctor, val, zero_val, shiftval, input_bits); return false; @@ -1920,16 +1921,24 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi) res_ops[1], res_ops[2], zero_val)) { tree type = TREE_TYPE (res_ops[0]); - HOST_WIDE_INT ctzval = 0; + HOST_WIDE_INT ctz_val = 0; HOST_WIDE_INT type_size = tree_to_shwi (TYPE_SIZE (type)); bool zero_ok - = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctzval) == 2; + = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctz_val) == 2; + + /* If the input value can't be zero, don't special case ctz (0). */ + if (tree_expr_nonzero_p (res_ops[0])) + { + zero_ok = true; + zero_val = 0; + ctz_val = 0; + } /* Skip if there is no value defined at zero, or if we can't easily return the correct value for zero. */ if (!zero_ok) return false; - if (zero_val != ctzval && !(zero_val == 0 && ctzval == type_size)) + if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == type_size)) return false; gimple_seq seq = NULL; @@ -1942,7 +1951,7 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi) tree prev_lhs = gimple_call_lhs (call); /* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0. */ - if (zero_val == 0 && ctzval == type_size) + if (zero_val == 0 && ctz_val == type_size) { g = gimple_build_assign (make_ssa_name (integer_type_node), BIT_AND_EXPR, prev_lhs, -- 2.30.2