From 56f0b32476c0c261c8e08525f9e47fe87492447a Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 31 Mar 2020 11:02:47 +0200 Subject: [PATCH] forwprop: Pattern recognize more rotates [PR94344] The following patch adjusts simplify_rotate to recognize more rotates, basically we allow even some same precision integral -> integral conversions, with the requirement that the RSHIFT_EXPR operand has to be done in unsigned type (i.e. logical right shift), so that we compensate for the combiner no longer being able to simplify those into rotates on some targets. 2020-03-31 Jakub Jelinek PR rtl-optimization/94344 * tree-ssa-forwprop.c (simplify_rotate): Handle also same precision conversions, either on both operands of |^+ or just one. Handle also extra same precision conversion on RSHIFT_EXPR first operand provided RSHIFT_EXPR is performed in unsigned type. * gcc.dg/pr94344.c: New test. --- gcc/ChangeLog | 8 +++++ gcc/testsuite/ChangeLog | 5 ++++ gcc/testsuite/gcc.dg/pr94344.c | 53 ++++++++++++++++++++++++++++++++++ gcc/tree-ssa-forwprop.c | 48 +++++++++++++++++++++++++++--- 4 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr94344.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 101956a53c6..f7d1ccf72b6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2020-03-31 Jakub Jelinek + + PR rtl-optimization/94344 + * tree-ssa-forwprop.c (simplify_rotate): Handle also same precision + conversions, either on both operands of |^+ or just one. Handle + also extra same precision conversion on RSHIFT_EXPR first operand + provided RSHIFT_EXPR is performed in unsigned type. + 2020-03-30 David Malcolm * lra.c (finish_insn_code_data_once): Set the array elements diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c72aa9aae06..14faa3faa32 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-03-31 Jakub Jelinek + + PR rtl-optimization/94344 + * gcc.dg/pr94344.c: New test. + 2020-03-30 David Malcolm * jit.dg/all-non-failing-tests.h: Add test-empty.c diff --git a/gcc/testsuite/gcc.dg/pr94344.c b/gcc/testsuite/gcc.dg/pr94344.c new file mode 100644 index 00000000000..361e16eab2d --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr94344.c @@ -0,0 +1,53 @@ +/* PR rtl-optimization/94344 */ +/* { dg-do compile { target { ilp32 || lp64 } } } */ +/* { dg-options "-O2 -fdump-tree-forwprop1" } */ +/* { dg-final { scan-tree-dump-times " r>> 27;" 4 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times " r>> 59;" 4 "forwprop1" } } */ + +int +f1 (int x) +{ + return (x << 5) | (int)((unsigned int)x >> 27); +} + +unsigned int +f2 (int x) +{ + return (x << 5) | ((unsigned int)x >> 27); +} + +long long int +f3 (long long int x) +{ + return (x << 5) | (long long int)((unsigned long long int)x >> 59); +} + +unsigned long long int +f4 (long long int x) +{ + return (x << 5) | ((unsigned long long int)x >> 59); +} + +int +f5 (int x) +{ + return (int)((unsigned int)x >> 27) | (x << 5); +} + +unsigned int +f6 (int x) +{ + return ((unsigned int)x >> 27) | (x << 5); +} + +long long int +f7 (long long int x) +{ + return (long long int)((unsigned long long int)x >> 59) | (x << 5); +} + +unsigned long long int +f8 (long long int x) +{ + return ((unsigned long long int)x >> 59) | (x << 5); +} diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 234c1f7dd7d..e7eaa18ccad 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -1562,14 +1562,14 @@ simplify_rotate (gimple_stmt_iterator *gsi) for (i = 0; i < 2; i++) defcodefor_name (arg[i], &def_code[i], &def_arg1[i], &def_arg2[i]); - /* Look through narrowing conversions. */ + /* Look through narrowing (or same precision) conversions. */ if (CONVERT_EXPR_CODE_P (def_code[0]) && CONVERT_EXPR_CODE_P (def_code[1]) && INTEGRAL_TYPE_P (TREE_TYPE (def_arg1[0])) && INTEGRAL_TYPE_P (TREE_TYPE (def_arg1[1])) && TYPE_PRECISION (TREE_TYPE (def_arg1[0])) == TYPE_PRECISION (TREE_TYPE (def_arg1[1])) - && TYPE_PRECISION (TREE_TYPE (def_arg1[0])) > TYPE_PRECISION (rtype) + && TYPE_PRECISION (TREE_TYPE (def_arg1[0])) >= TYPE_PRECISION (rtype) && has_single_use (arg[0]) && has_single_use (arg[1])) { @@ -1579,6 +1579,21 @@ simplify_rotate (gimple_stmt_iterator *gsi) defcodefor_name (arg[i], &def_code[i], &def_arg1[i], &def_arg2[i]); } } + else + { + /* Handle signed rotate; the RSHIFT_EXPR has to be done + in unsigned type but LSHIFT_EXPR could be signed. */ + i = (def_code[0] == LSHIFT_EXPR || def_code[0] == RSHIFT_EXPR); + if (CONVERT_EXPR_CODE_P (def_code[i]) + && (def_code[1 - i] == LSHIFT_EXPR || def_code[1 - i] == RSHIFT_EXPR) + && INTEGRAL_TYPE_P (TREE_TYPE (def_arg1[i])) + && TYPE_PRECISION (rtype) == TYPE_PRECISION (TREE_TYPE (def_arg1[i])) + && has_single_use (arg[i])) + { + arg[i] = def_arg1[i]; + defcodefor_name (arg[i], &def_code[i], &def_arg1[i], &def_arg2[i]); + } + } /* One operand has to be LSHIFT_EXPR and one RSHIFT_EXPR. */ for (i = 0; i < 2; i++) @@ -1608,8 +1623,33 @@ simplify_rotate (gimple_stmt_iterator *gsi) if (!operand_equal_for_phi_arg_p (def_arg1[0], def_arg1[1]) || !types_compatible_p (TREE_TYPE (def_arg1[0]), TREE_TYPE (def_arg1[1]))) - return false; - if (!TYPE_UNSIGNED (TREE_TYPE (def_arg1[0]))) + { + if ((TYPE_PRECISION (TREE_TYPE (def_arg1[0])) + != TYPE_PRECISION (TREE_TYPE (def_arg1[1]))) + || (TYPE_UNSIGNED (TREE_TYPE (def_arg1[0])) + == TYPE_UNSIGNED (TREE_TYPE (def_arg1[1])))) + return false; + + /* Handle signed rotate; the RSHIFT_EXPR has to be done + in unsigned type but LSHIFT_EXPR could be signed. */ + i = def_code[0] != RSHIFT_EXPR; + if (!TYPE_UNSIGNED (TREE_TYPE (def_arg1[i]))) + return false; + + tree tem; + enum tree_code code; + defcodefor_name (def_arg1[i], &code, &tem, NULL); + if (!CONVERT_EXPR_CODE_P (code) + || !INTEGRAL_TYPE_P (TREE_TYPE (tem)) + || TYPE_PRECISION (TREE_TYPE (tem)) != TYPE_PRECISION (rtype)) + return false; + def_arg1[i] = tem; + if (!operand_equal_for_phi_arg_p (def_arg1[0], def_arg1[1]) + || !types_compatible_p (TREE_TYPE (def_arg1[0]), + TREE_TYPE (def_arg1[1]))) + return false; + } + else if (!TYPE_UNSIGNED (TREE_TYPE (def_arg1[0]))) return false; /* CNT1 + CNT2 == B case above. */ -- 2.30.2