From d37c81f476c17d292943189335d745c3fb817b7d Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 30 Jan 2020 09:41:00 +0100 Subject: [PATCH] i386: Optimize {,v}{,p}movmsk{b,ps,pd} followed by sign extension [PR91824] Some time ago, patterns were added to optimize move mask followed by zero extension from 32 bits to 64 bit. As the testcase shows, the intrinsics actually return int, not unsigned int, so it will happen quite often that one actually needs sign extension instead of zero extension. Except for vpmovmskb with 256-bit operand, sign vs. zero extension doesn't make a difference, as we know the bit 31 will not be set (the source will have 2 or 4 doubles, 4 or 8 floats or 16 or 32 chars). So, for the floating point patterns, this patch just uses a code iterator so that we handle both zero extend and sign extend, and for the byte one adds a separate pattern for the 128-bit operand. 2020-01-30 Jakub Jelinek PR target/91824 * config/i386/sse.md (*_movmsk_zext): Renamed to ... (*_movmsk_ext): ... this. Use any_extend code iterator instead of always zero_extend. (*_movmsk_zext_lt): Renamed to ... (*_movmsk_ext_lt): ... this. Use any_extend code iterator instead of always zero_extend. (*_movmsk_zext_shift): Renamed to ... (*_movmsk_ext_shift): ... this. Use any_extend code iterator instead of always zero_extend. (*sse2_pmovmskb_ext): New define_insn. (*sse2_pmovmskb_ext_lt): New define_insn_and_split. * gcc.target/i386/pr91824-2.c: New test. --- gcc/ChangeLog | 14 +++++ gcc/config/i386/sse.md | 55 ++++++++++++++--- gcc/testsuite/ChangeLog | 3 + gcc/testsuite/gcc.target/i386/pr91824-2.c | 73 +++++++++++++++++++++++ 4 files changed, 137 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr91824-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c86b9c2aa50..4beb63934bc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,19 @@ 2020-01-30 Jakub Jelinek + PR target/91824 + * config/i386/sse.md + (*_movmsk_zext): Renamed to ... + (*_movmsk_ext): ... this. Use + any_extend code iterator instead of always zero_extend. + (*_movmsk_zext_lt): Renamed to ... + (*_movmsk_ext_lt): ... this. + Use any_extend code iterator instead of always zero_extend. + (*_movmsk_zext_shift): Renamed to ... + (*_movmsk_ext_shift): ... this. + Use any_extend code iterator instead of always zero_extend. + (*sse2_pmovmskb_ext): New define_insn. + (*sse2_pmovmskb_ext_lt): New define_insn_and_split. + PR target/91824 * config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split. (*popcountsi2_zext_falsedep): New define_insn. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index abbd879aab3..f5ff2e93021 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15815,9 +15815,9 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) -(define_insn "*_movmsk_zext" +(define_insn "*_movmsk_ext" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI + (any_extend:DI (unspec:SI [(match_operand:VF_128_256 1 "register_operand" "x")] UNSPEC_MOVMSK)))] @@ -15844,9 +15844,9 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) -(define_insn_and_split "*_movmsk_zext_lt" +(define_insn_and_split "*_movmsk_ext_lt" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI + (any_extend:DI (unspec:SI [(lt:VF_128_256 (match_operand: 1 "register_operand" "x") @@ -15856,7 +15856,7 @@ "#" "&& reload_completed" [(set (match_dup 0) - (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] "operands[1] = gen_lowpart (mode, operands[1]);" [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") @@ -15880,9 +15880,9 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) -(define_insn_and_split "*_movmsk_zext_shift" +(define_insn_and_split "*_movmsk_ext_shift" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI + (any_extend:DI (unspec:SI [(subreg:VF_128_256 (ashiftrt: @@ -15893,7 +15893,7 @@ "#" "&& reload_completed" [(set (match_dup 0) - (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] "operands[1] = gen_lowpart (mode, operands[1]);" [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") @@ -15932,6 +15932,23 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn "*sse2_pmovmskb_ext" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (unspec:SI + [(match_operand:V16QI 1 "register_operand" "x")] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE2" + "%vpmovmskb\t{%1, %k0|%k0, %1}" + [(set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + (define_insn_and_split "*_pmovmskb_lt" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI @@ -15975,6 +15992,28 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn_and_split "*sse2_pmovmskb_ext_lt" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (unspec:SI + [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "const0_operand" "C"))] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE2" + "#" + "" + [(set (match_dup 0) + (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "" + [(set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + (define_expand "sse2_maskmovdqu" [(set (match_operand:V16QI 0 "memory_operand") (unspec:V16QI [(match_operand:V16QI 1 "register_operand") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9b3660645ef..842e4198917 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,8 @@ 2020-01-30 Jakub Jelinek + PR target/91824 + * gcc.target/i386/pr91824-2.c: New test. + PR target/91824 * gcc.target/i386/pr91824-1.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/pr91824-2.c b/gcc/testsuite/gcc.target/i386/pr91824-2.c new file mode 100644 index 00000000000..bdf1295a765 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91824-2.c @@ -0,0 +1,73 @@ +/* PR target/91824 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2" } */ +/* { dg-final { scan-assembler-not "cltq" } } */ +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */ + +#include + +unsigned long long +f1 (__m128i x) +{ + return _mm_movemask_epi8 (x); +} + +unsigned long long +f2 (__m128i x) +{ + return (unsigned) _mm_movemask_epi8 (x); +} + +unsigned long long +f3 (__m128 x) +{ + return _mm_movemask_ps (x); +} + +unsigned long long +f4 (__m128 x) +{ + return (unsigned) _mm_movemask_ps (x); +} + +unsigned long long +f5 (__m128d x) +{ + return _mm_movemask_pd (x); +} + +unsigned long long +f6 (__m128d x) +{ + return (unsigned) _mm_movemask_pd (x); +} + +unsigned long long +f7 (__m256 x) +{ + return _mm256_movemask_ps (x); +} + +unsigned long long +f8 (__m256 x) +{ + return (unsigned) _mm256_movemask_ps (x); +} + +unsigned long long +f9 (__m256d x) +{ + return _mm256_movemask_pd (x); +} + +unsigned long long +f10 (__m256d x) +{ + return (unsigned) _mm256_movemask_pd (x); +} + +unsigned long long +f11 (__m256i x) +{ + return (unsigned) _mm256_movemask_epi8 (x); +} -- 2.30.2