From 89b1427f8693699d64050cc4daf5626f6b96b96a Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 30 Nov 2017 11:29:58 +0100 Subject: [PATCH] re PR target/83210 (__builtin_mul_overflow() generates suboptimal code when exactly one argument is the constant 2) PR target/83210 * internal-fn.c (expand_mul_overflow): Optimize unsigned multiplication by power of 2 constant into two shifts + comparison. * gcc.target/i386/pr83210.c: New test. From-SVN: r255269 --- gcc/ChangeLog | 8 +++- gcc/internal-fn.c | 43 ++++++++++++++++++++ gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/gcc.target/i386/pr83210.c | 53 +++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr83210.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 424c7e7dd1f..776508a1bdc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,7 +1,13 @@ +2017-11-30 Jakub Jelinek + + PR target/83210 + * internal-fn.c (expand_mul_overflow): Optimize unsigned + multiplication by power of 2 constant into two shifts + comparison. + 2017-11-30 Jan Hubicka PR target/81616 - * x86-tnue-costs.h (generic_cost): Revise for modern CPUs + * config/i386/x86-tune-costs.h (generic_cost): Revise for modern CPUs. 2017-11-30 Richard Biener diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 119fa1ab030..7ddc5246e3c 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -1462,6 +1462,49 @@ expand_mul_overflow (location_t loc, tree lhs, tree arg0, tree arg1, type = build_nonstandard_integer_type (GET_MODE_PRECISION (mode), uns); sign = uns ? UNSIGNED : SIGNED; icode = optab_handler (uns ? umulv4_optab : mulv4_optab, mode); + if (uns + && (integer_pow2p (arg0) || integer_pow2p (arg1)) + && (optimize_insn_for_speed_p () || icode == CODE_FOR_nothing)) + { + /* Optimize unsigned multiplication by power of 2 constant + using 2 shifts, one for result, one to extract the shifted + out bits to see if they are all zero. + Don't do this if optimizing for size and we have umulv4_optab, + in that case assume multiplication will be shorter. + This is heuristics based on the single target that provides + umulv4 right now (i?86/x86_64), if further targets add it, this + might need to be revisited. + Cases where both operands are constant should be folded already + during GIMPLE, and cases where one operand is constant but not + power of 2 are questionable, either the WIDEN_MULT_EXPR case + below can be done without multiplication, just by shifts and adds, + or we'd need to divide the result (and hope it actually doesn't + really divide nor multiply) and compare the result of the division + with the original operand. */ + rtx opn0 = op0; + rtx opn1 = op1; + tree argn0 = arg0; + tree argn1 = arg1; + if (integer_pow2p (arg0)) + { + std::swap (opn0, opn1); + std::swap (argn0, argn1); + } + int cnt = tree_log2 (argn1); + if (cnt >= 0 && cnt < GET_MODE_PRECISION (mode)) + { + rtx upper = const0_rtx; + res = expand_shift (LSHIFT_EXPR, mode, opn0, cnt, NULL_RTX, uns); + if (cnt != 0) + upper = expand_shift (RSHIFT_EXPR, mode, opn0, + GET_MODE_PRECISION (mode) - cnt, + NULL_RTX, uns); + do_compare_rtx_and_jump (upper, const0_rtx, EQ, true, mode, + NULL_RTX, NULL, done_label, + profile_probability::very_likely ()); + goto do_error_label; + } + } if (icode != CODE_FOR_nothing) { struct expand_operand ops[4]; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a7f3c6fde4f..bda1bf5560d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-11-30 Jakub Jelinek + + PR target/83210 + * gcc.target/i386/pr83210.c: New test. + 2017-11-30 Jan Hubicka PR target/81616 diff --git a/gcc/testsuite/gcc.target/i386/pr83210.c b/gcc/testsuite/gcc.target/i386/pr83210.c new file mode 100644 index 00000000000..cf985d29dae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr83210.c @@ -0,0 +1,53 @@ +/* PR target/83210 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not {\mmul[lq]\M} } } */ + +void bar (void); + +unsigned +f1 (unsigned int x) +{ + unsigned res; + if (__builtin_mul_overflow (x, 2, &res)) + bar (); + return res; +} + +unsigned long +f2 (unsigned long x) +{ + unsigned long res; + if (__builtin_mul_overflow (16, x, &res)) + bar (); + return res; +} + +unsigned long long +f3 (unsigned long long x) +{ + unsigned long long res; + if (__builtin_mul_overflow (x, (1ULL << (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 1)), &res)) + bar (); + return res; +} + +#ifdef __SIZEOF_INT128__ +unsigned __int128 +f4 (unsigned __int128 x) +{ + unsigned __int128 res; + if (__builtin_mul_overflow (x, (((unsigned __int128) 1) << (__SIZEOF_INT128__ * __CHAR_BIT__ / 2)), &res)) + bar (); + return res; +} + +unsigned __int128 +f5 (unsigned __int128 x) +{ + unsigned __int128 res; + if (__builtin_mul_overflow (x, (((unsigned __int128) 1) << (__SIZEOF_INT128__ * __CHAR_BIT__ / 2 + 3)), &res)) + bar (); + return res; +} +#endif -- 2.30.2