From a0e4174cd80f8a17026d6c13af2802dc8a0146ad Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 13 Oct 2017 09:28:46 +0200 Subject: [PATCH] re PR target/82498 (Missed optimization for x86 rotate instruction) PR target/82498 * fold-const.c (fold_binary_loc) : Code cleanups, instead of handling MINUS_EXPR twice (once for each argument), canonicalize operand order and handle just once, use rtype where possible. Handle (A << B) | (A >> (-B & (Z - 1))). * gcc.dg/tree-ssa/pr82498.c: New test. From-SVN: r253709 --- gcc/ChangeLog | 6 ++ gcc/fold-const.c | 84 ++++++++++++++----------- gcc/testsuite/ChangeLog | 3 + gcc/testsuite/gcc.dg/tree-ssa/pr82498.c | 53 ++++++++++++++++ 4 files changed, 108 insertions(+), 38 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr82498.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 58923e8835a..b8346e3827e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,11 @@ 2017-10-13 Jakub Jelinek + PR target/82498 + * fold-const.c (fold_binary_loc) : Code cleanups, + instead of handling MINUS_EXPR twice (once for each argument), + canonicalize operand order and handle just once, use rtype where + possible. Handle (A << B) | (A >> (-B & (Z - 1))). + PR target/82498 * config/i386/ia32intrin.h (__rold, __rord, __rolq, __rorq): Allow any values of __C while still being pattern recognizable as a simple diff --git a/gcc/fold-const.c b/gcc/fold-const.c index e22b02f1312..d1e61918e0b 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -9429,7 +9429,10 @@ fold_binary_loc (location_t loc, /* (A << C1) + (A >> C2) if A is unsigned and C1+C2 is the size of A is a rotate of A by C1 bits. */ /* (A << B) + (A >> (Z - B)) if A is unsigned and Z is the size of A - is a rotate of A by B bits. */ + is a rotate of A by B bits. + Similarly for (A << B) | (A >> (-B & C3)) where C3 is Z-1, + though in this case CODE must be | and not + or ^, otherwise + it doesn't return A when B is 0. */ { enum tree_code code0, code1; tree rtype; @@ -9447,25 +9450,32 @@ fold_binary_loc (location_t loc, == GET_MODE_UNIT_PRECISION (TYPE_MODE (rtype)))) { tree tree01, tree11; + tree orig_tree01, orig_tree11; enum tree_code code01, code11; - tree01 = TREE_OPERAND (arg0, 1); - tree11 = TREE_OPERAND (arg1, 1); + tree01 = orig_tree01 = TREE_OPERAND (arg0, 1); + tree11 = orig_tree11 = TREE_OPERAND (arg1, 1); STRIP_NOPS (tree01); STRIP_NOPS (tree11); code01 = TREE_CODE (tree01); code11 = TREE_CODE (tree11); + if (code11 != MINUS_EXPR + && (code01 == MINUS_EXPR || code01 == BIT_AND_EXPR)) + { + std::swap (code0, code1); + std::swap (code01, code11); + std::swap (tree01, tree11); + std::swap (orig_tree01, orig_tree11); + } if (code01 == INTEGER_CST && code11 == INTEGER_CST && (wi::to_widest (tree01) + wi::to_widest (tree11) - == element_precision (TREE_TYPE (TREE_OPERAND (arg0, 0))))) + == element_precision (rtype))) { tem = build2_loc (loc, LROTATE_EXPR, - TREE_TYPE (TREE_OPERAND (arg0, 0)), - TREE_OPERAND (arg0, 0), + rtype, TREE_OPERAND (arg0, 0), code0 == LSHIFT_EXPR - ? TREE_OPERAND (arg0, 1) - : TREE_OPERAND (arg1, 1)); + ? orig_tree01 : orig_tree11); return fold_convert_loc (loc, type, tem); } else if (code11 == MINUS_EXPR) @@ -9477,39 +9487,37 @@ fold_binary_loc (location_t loc, STRIP_NOPS (tree111); if (TREE_CODE (tree110) == INTEGER_CST && 0 == compare_tree_int (tree110, - element_precision - (TREE_TYPE (TREE_OPERAND - (arg0, 0)))) + element_precision (rtype)) && operand_equal_p (tree01, tree111, 0)) - return - fold_convert_loc (loc, type, - build2 ((code0 == LSHIFT_EXPR - ? LROTATE_EXPR - : RROTATE_EXPR), - TREE_TYPE (TREE_OPERAND (arg0, 0)), - TREE_OPERAND (arg0, 0), - TREE_OPERAND (arg0, 1))); + { + tem = build2_loc (loc, (code0 == LSHIFT_EXPR + ? LROTATE_EXPR : RROTATE_EXPR), + rtype, TREE_OPERAND (arg0, 0), + orig_tree01); + return fold_convert_loc (loc, type, tem); + } } - else if (code01 == MINUS_EXPR) + else if (code == BIT_IOR_EXPR + && code11 == BIT_AND_EXPR + && pow2p_hwi (element_precision (rtype))) { - tree tree010, tree011; - tree010 = TREE_OPERAND (tree01, 0); - tree011 = TREE_OPERAND (tree01, 1); - STRIP_NOPS (tree010); - STRIP_NOPS (tree011); - if (TREE_CODE (tree010) == INTEGER_CST - && 0 == compare_tree_int (tree010, - element_precision - (TREE_TYPE (TREE_OPERAND - (arg0, 0)))) - && operand_equal_p (tree11, tree011, 0)) - return fold_convert_loc - (loc, type, - build2 ((code0 != LSHIFT_EXPR - ? LROTATE_EXPR - : RROTATE_EXPR), - TREE_TYPE (TREE_OPERAND (arg0, 0)), - TREE_OPERAND (arg0, 0), TREE_OPERAND (arg1, 1))); + tree tree110, tree111; + tree110 = TREE_OPERAND (tree11, 0); + tree111 = TREE_OPERAND (tree11, 1); + STRIP_NOPS (tree110); + STRIP_NOPS (tree111); + if (TREE_CODE (tree110) == NEGATE_EXPR + && TREE_CODE (tree111) == INTEGER_CST + && 0 == compare_tree_int (tree111, + element_precision (rtype) - 1) + && operand_equal_p (tree01, TREE_OPERAND (tree110, 0), 0)) + { + tem = build2_loc (loc, (code0 == LSHIFT_EXPR + ? LROTATE_EXPR : RROTATE_EXPR), + rtype, TREE_OPERAND (arg0, 0), + orig_tree01); + return fold_convert_loc (loc, type, tem); + } } } } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0a8246fd980..ff6d7438956 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,8 @@ 2017-10-13 Jakub Jelinek + PR target/82498 + * gcc.dg/tree-ssa/pr82498.c: New test. + PR target/82498 * gcc.dg/ubsan/pr82498.c: New test. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82498.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82498.c new file mode 100644 index 00000000000..19a42f0a3c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82498.c @@ -0,0 +1,53 @@ +/* PR target/82498 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-original" } */ +/* { dg-final { scan-tree-dump-times "x r<< y" 4 "original" { target int32 } } } */ +/* { dg-final { scan-tree-dump-times "x r>> y" 4 "original" { target int32 } } } */ + +unsigned +f1 (unsigned x, int y) +{ + return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y)); +} + +unsigned +f2 (unsigned x, int y) +{ + return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))); +} + +unsigned +f3 (unsigned x, int y) +{ + return (x >> y) | (x << (__CHAR_BIT__ * __SIZEOF_INT__ - y)); +} + +unsigned +f4 (unsigned x, int y) +{ + return (x >> y) | (x << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))); +} + +unsigned +f5 (unsigned x, int y) +{ + return (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y)) | (x << y); +} + +unsigned +f6 (unsigned x, int y) +{ + return (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x << y); +} + +unsigned +f7 (unsigned x, int y) +{ + return (x << (__CHAR_BIT__ * __SIZEOF_INT__ - y)) | (x >> y); +} + +unsigned +f8 (unsigned x, int y) +{ + return (x << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> y); +} -- 2.30.2