From 8f7941ca37001773a36add8119791725aeb823ba Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 30 Dec 2020 11:21:24 +0100 Subject: [PATCH] i386: Optimize pmovmskb on inverted vector to inversion of pmovmskb result [PR98461] The following patch adds combine splitters to optimize: - vpcmpeqd %ymm1, %ymm1, %ymm1 - vpandn %ymm1, %ymm0, %ymm0 vpmovmskb %ymm0, %eax + notl %eax etc. (for vectors with less than 32 elements with xorl instead of notl). 2020-12-30 Jakub Jelinek PR target/98461 * config/i386/sse.md (_pmovmskb): Add splitters for pmovmskb of NOT vector. * gcc.target/i386/sse2-pr98461.c: New test. * gcc.target/i386/avx2-pr98461.c: New test. --- gcc/config/i386/sse.md | 47 +++++++++++++++++ gcc/testsuite/gcc.target/i386/avx2-pr98461.c | 54 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/sse2-pr98461.c | 50 ++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr98461.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-pr98461.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 141a99d254e..d84103807ff 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -16099,6 +16099,53 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_split + [(set (match_operand:SI 0 "register_operand") + (unspec:SI + [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + [(set (match_dup 2) + (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)) + (set (match_dup 0) (match_dup 3))] +{ + operands[2] = gen_reg_rtx (SImode); + if (GET_MODE_NUNITS (mode) == 32) + operands[3] = gen_rtx_NOT (SImode, operands[2]); + else + { + operands[3] + = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (mode)) - 1, + SImode); + operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]); + } +}) + +(define_split + [(set (match_operand:SI 0 "register_operand") + (unspec:SI + [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)] + UNSPEC_MOVMSK))] + "TARGET_SSE2 + && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT + && GET_MODE_SIZE (GET_MODE (operands[1])) == " + [(set (match_dup 2) + (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)) + (set (match_dup 0) (match_dup 3))] +{ + operands[2] = gen_reg_rtx (SImode); + operands[1] = gen_lowpart (mode, operands[1]); + if (GET_MODE_NUNITS (mode) == 32) + operands[3] = gen_rtx_NOT (SImode, operands[2]); + else + { + operands[3] + = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (mode)) - 1, + SImode); + operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]); + } +}) + (define_insn_and_split "*_pmovmskb_lt" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr98461.c b/gcc/testsuite/gcc.target/i386/avx2-pr98461.c new file mode 100644 index 00000000000..15f49b864da --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr98461.c @@ -0,0 +1,54 @@ +/* PR target/98461 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2 -masm=att" } */ +/* { dg-final { scan-assembler-times "\tvpmovmskb\t" 6 } } */ +/* { dg-final { scan-assembler-times "\tnotl\t" 6 } } */ +/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */ +/* { dg-final { scan-assembler-not "\tvpxor" } } */ +/* { dg-final { scan-assembler-not "\tvpandn" } } */ + +#include + +int +f1 (__m256i x) +{ + return ~_mm256_movemask_epi8 (x); +} + +int +f2 (__m256i x) +{ + return _mm256_movemask_epi8 (_mm256_andnot_si256 (x, _mm256_set1_epi8 (255))); +} + +int +f3 (__v32qi x) +{ + x ^= (__v32qi) { -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 }; + return _mm256_movemask_epi8 ((__m256i) x); +} + +long +f4 (__m256i x) +{ + return (unsigned) ~_mm256_movemask_epi8 (x); +} + +long +f5 (__m256i x) +{ + return (unsigned) _mm256_movemask_epi8 (_mm256_andnot_si256 (x, _mm256_set1_epi8 (255))); +} + +long +f6 (__v32qi x) +{ + x ^= (__v32qi) { -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 }; + return (unsigned) _mm256_movemask_epi8 ((__m256i) x); +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr98461.c b/gcc/testsuite/gcc.target/i386/sse2-pr98461.c new file mode 100644 index 00000000000..9fae7557f52 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-pr98461.c @@ -0,0 +1,50 @@ +/* PR target/98461 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-sse3 -masm=att" } */ +/* { dg-final { scan-assembler-times "\tpmovmskb\t" 6 } } */ +/* { dg-final { scan-assembler-times "\txorl\t" 6 } } */ +/* { dg-final { scan-assembler-not "\tpcmpeq" } } */ +/* { dg-final { scan-assembler-not "\tpxor" } } */ +/* { dg-final { scan-assembler-not "\tpandn" } } */ + +#include + +int +f1 (__m128i x) +{ + return _mm_movemask_epi8 (x) ^ 65535; +} + +int +f2 (__m128i x) +{ + return _mm_movemask_epi8 (_mm_andnot_si128 (x, _mm_set1_epi8 (255))); +} + +int +f3 (__v16qi x) +{ + x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 }; + return _mm_movemask_epi8 ((__m128i) x); +} + +long +f4 (__m128i x) +{ + return (unsigned) (_mm_movemask_epi8 (x) ^ 65535); +} + +long +f5 (__m128i x) +{ + return (unsigned) _mm_movemask_epi8 (_mm_andnot_si128 (x, _mm_set1_epi8 (255))); +} + +long +f6 (__v16qi x) +{ + x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 }; + return (unsigned) _mm_movemask_epi8 ((__m128i) x); +} -- 2.30.2