From b285bebe6ad1e9f6416f0eb6cb69edc44db7813c Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 30 Jan 2020 09:39:05 +0100 Subject: [PATCH] i386: Optimize popcnt followed by zero/sign extension [PR91824] Like any other instruction with 32-bit GPR destination operand in 64-bit mode, popcntl also clears the upper 32 bits of the register (and other bits too, it can return only 0 to 32 inclusive). During combine, the zero or sign extensions of it show up as paradoxical subreg of the popcount & 63, there 63 is the smallest power of two - 1 mask that can represent all the 0 to 32 inclusive values. 2020-01-30 Jakub Jelinek PR target/91824 * config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split. (*popcountsi2_zext_falsedep): New define_insn. * gcc.target/i386/pr91824-1.c: New test. --- gcc/ChangeLog | 6 +++ gcc/config/i386/i386.md | 54 +++++++++++++++++++++++ gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/gcc.target/i386/pr91824-1.c | 54 +++++++++++++++++++++++ 4 files changed, 119 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr91824-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a36e73249a3..c86b9c2aa50 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2020-01-30 Jakub Jelinek + + PR target/91824 + * config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split. + (*popcountsi2_zext_falsedep): New define_insn. + 2020-01-30 Dragan Mladjenovic * config.in: Regenerated. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f83b3702c8e..f5c8d552af8 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14563,6 +14563,60 @@ (set_attr "type" "bitmanip") (set_attr "mode" "")]) +(define_insn_and_split "*popcountsi2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI + (subreg:DI + (popcount:SI + (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) + (const_int 63))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT && TARGET_64BIT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %k0|%k0, %1}"; +#else + return "popcnt{l}\t{%1, %k0|%k0, %1}"; +#endif +} + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. +(define_insn "*popcountsi2_zext_falsedep" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI + (subreg:DI + (popcount:SI + (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) + (const_int 63))) + (unspec [(match_operand:DI 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT && TARGET_64BIT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %k0|%k0, %1}"; +#else + return "popcnt{l}\t{%1, %k0|%k0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + (define_insn_and_split "*popcounthi2_1" [(set (match_operand:SI 0 "register_operand") (popcount:SI diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 30e804b2969..9b3660645ef 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-01-30 Jakub Jelinek + + PR target/91824 + * gcc.target/i386/pr91824-1.c: New test. + 2020-01-30 Bin Cheng * g++.dg/coroutines/co-await-syntax-09-convert.C: New test. diff --git a/gcc/testsuite/gcc.target/i386/pr91824-1.c b/gcc/testsuite/gcc.target/i386/pr91824-1.c new file mode 100644 index 00000000000..0bb24e7fedc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91824-1.c @@ -0,0 +1,54 @@ +/* PR target/91824 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mpopcnt" } */ +/* { dg-final { scan-assembler-not "cltq" } } */ + +unsigned int foo (void); + +unsigned long +f1 (unsigned int x) +{ + return __builtin_popcount (x); +} + +unsigned long +f2 (unsigned int x) +{ + return (unsigned) __builtin_popcount (x); +} + +unsigned long +f3 (unsigned int x) +{ + return __builtin_popcount (x) & 63ULL; +} + +unsigned long +f4 (unsigned int x) +{ + return __builtin_popcount (x) & 1023ULL; +} + +unsigned long +f5 (void) +{ + return __builtin_popcount (foo ()); +} + +unsigned long +f6 (void) +{ + return (unsigned) __builtin_popcount (foo ()); +} + +unsigned long +f7 (void) +{ + return __builtin_popcount (foo ()) & 63ULL; +} + +unsigned long +f8 (void) +{ + return __builtin_popcount (foo ()) & 1023ULL; +} -- 2.30.2