From 0b1c4b83e44a89b2736671f968c07ca1aab634ae Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 29 Nov 2018 23:16:37 +0100 Subject: [PATCH] re PR target/88152 (optimize SSE & AVX char compares with subsequent movmskb) PR target/88152 * config/i386/sse.md (*_movmsk_lt, *_movmsk_zext_lt, *_movmsk_shift, *_movmsk_zext_shift, *_pmovmskb_lt, *_pmovmskb_zext_lt): New define_insn_and_split patterns. * g++.target/i386/pr88152.C: New test. From-SVN: r266649 --- gcc/ChangeLog | 8 ++ gcc/config/i386/sse.md | 115 ++++++++++++++++++++++++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/g++.target/i386/pr88152.C | 44 +++++++++ 4 files changed, 172 insertions(+) create mode 100644 gcc/testsuite/g++.target/i386/pr88152.C diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 93ee3fad3e7..7075f34f566 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,13 @@ 2018-11-29 Jakub Jelinek + PR target/88152 + * config/i386/sse.md (*_movmsk_lt, + *_movmsk_zext_lt, + *_movmsk_shift, + *_movmsk_zext_shift, + *_pmovmskb_lt, *_pmovmskb_zext_lt): New + define_insn_and_split patterns. + PR target/54700 * config/i386/sse.md (*_blendv_lt, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b4d355e40b2..e2cae71d44a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -14653,6 +14653,78 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) +(define_insn_and_split "*_movmsk_lt" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(lt:VF_128_256 + (match_operand: 1 "register_operand" "x") + (match_operand: 2 "const0_operand" "C"))] + UNSPEC_MOVMSK))] + "TARGET_SSE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))] + "operands[1] = gen_lowpart (mode, operands[1]);" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +(define_insn_and_split "*_movmsk_zext_lt" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unspec:SI + [(lt:VF_128_256 + (match_operand: 1 "register_operand" "x") + (match_operand: 2 "const0_operand" "C"))] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "operands[1] = gen_lowpart (mode, operands[1]);" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +(define_insn_and_split "*_movmsk_shift" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(subreg:VF_128_256 + (ashiftrt: + (match_operand: 1 "register_operand" "x") + (match_operand:QI 2 "const_int_operand" "n")) 0)] + UNSPEC_MOVMSK))] + "TARGET_SSE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))] + "operands[1] = gen_lowpart (mode, operands[1]);" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +(define_insn_and_split "*_movmsk_zext_shift" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unspec:SI + [(subreg:VF_128_256 + (ashiftrt: + (match_operand: 1 "register_operand" "x") + (match_operand:QI 2 "const_int_operand" "n")) 0)] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "operands[1] = gen_lowpart (mode, operands[1]);" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + (define_insn "_pmovmskb" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI @@ -14686,6 +14758,49 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn_and_split "*_pmovmskb_lt" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x") + (match_operand:VI1_AVX2 2 "const0_operand" "C"))] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "#" + "" + [(set (match_dup 0) + (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))] + "" + [(set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*_pmovmskb_zext_lt" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unspec:SI + [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x") + (match_operand:VI1_AVX2 2 "const0_operand" "C"))] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE2" + "#" + "" + [(set (match_dup 0) + (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "" + [(set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + (define_expand "sse2_maskmovdqu" [(set (match_operand:V16QI 0 "memory_operand") (unspec:V16QI [(match_operand:V16QI 1 "register_operand") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c69e7a3e184..aae3c61c126 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-11-29 Jakub Jelinek + + PR target/88152 + * g++.target/i386/pr88152.C: New test. + 2018-11-29 Vladimir Makarov * gcc.target/i386/pr34256.c: Adjust the number of expected moves. diff --git a/gcc/testsuite/g++.target/i386/pr88152.C b/gcc/testsuite/g++.target/i386/pr88152.C new file mode 100644 index 00000000000..9f1659b7586 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr88152.C @@ -0,0 +1,44 @@ +// PR target/88152 +// { dg-do compile } +// { dg-options "-O2 -mavx2 -std=c++11" } +// { dg-final { scan-assembler-times "vpmovmskb\[^\n\r]*xmm" 6 } } +// { dg-final { scan-assembler-times "vpmovmskb\[^\n\r]*ymm" 6 } } +// { dg-final { scan-assembler-times "vmovmskps\[^\n\r]*xmm" 4 } } +// { dg-final { scan-assembler-times "vmovmskps\[^\n\r]*ymm" 4 } } +// { dg-final { scan-assembler-times "vmovmskpd\[^\n\r]*xmm" 4 } } +// { dg-final { scan-assembler-times "vmovmskpd\[^\n\r]*ymm" 4 } } +// { dg-final { scan-assembler-not "vpcmpgt|vpcmpeq|vpsra" } } + +#include + +template +using V [[gnu::vector_size(N)]] = T; + +int f0 (V a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i> (a > 0x7f)); } +long int f1 (V a) { return (unsigned) _mm_movemask_epi8 (reinterpret_cast<__m128i> (a >= 0x80)); } +long int f2 (V a) { return (unsigned) _mm_movemask_epi8 (reinterpret_cast<__m128i> (a < 0)); } +int f3 (V a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i> (a <= -1)); } +int f4 (V a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i> (a < 0)); } +long int f5 (V a) { return (unsigned) _mm_movemask_epi8 (reinterpret_cast<__m128i> (a <= -1)); } +int f6 (V a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a > __INT_MAX__)); } +int f7 (V a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a >= 1U + __INT_MAX__)); } +int f8 (V a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a < 0)); } +int f9 (V a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a <= -1)); } +int f10 (V a) { return _mm_movemask_pd (reinterpret_cast<__m128d> (a > __LONG_LONG_MAX__)); } +int f11 (V a) { return _mm_movemask_pd (reinterpret_cast<__m128d> (a >= 1ULL + __LONG_LONG_MAX__)); } +long int f12 (V a) { return (unsigned) _mm_movemask_pd (reinterpret_cast<__m128d> (a < 0)); } +int f13 (V a) { return _mm_movemask_pd (reinterpret_cast<__m128d> (a <= -1)); } +int f14 (V a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a > 0x7f)); } +int f15 (V a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a >= 0x80)); } +long int f16 (V a) { return (unsigned) _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a < 0)); } +int f17 (V a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a <= -1)); } +int f18 (V a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a < 0)); } +int f19 (V a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a <= -1)); } +long int f20 (V a) { return (unsigned) _mm256_movemask_ps (reinterpret_cast<__m256> (a > __INT_MAX__)); } +int f21 (V a) { return _mm256_movemask_ps (reinterpret_cast<__m256> (a >= 1U + __INT_MAX__)); } +int f22 (V a) { return _mm256_movemask_ps (reinterpret_cast<__m256> (a < 0)); } +int f23 (V a) { return _mm256_movemask_ps (reinterpret_cast<__m256> (a <= -1)); } +int f24 (V a) { return _mm256_movemask_pd (reinterpret_cast<__m256d> (a > __LONG_LONG_MAX__)); } +int f25 (V a) { return _mm256_movemask_pd (reinterpret_cast<__m256d> (a >= 1ULL + __LONG_LONG_MAX__)); } +int f26 (V a) { return _mm256_movemask_pd (reinterpret_cast<__m256d> (a < 0)); } +long int f27 (V a) { return (unsigned) _mm256_movemask_pd (reinterpret_cast<__m256d> (a <= -1)); } -- 2.30.2