i386: Optimize {,v}{,p}movmsk{b,ps,pd} followed by sign extension [PR91824]
authorJakub Jelinek <jakub@redhat.com>
Thu, 30 Jan 2020 08:41:00 +0000 (09:41 +0100)
committerJakub Jelinek <jakub@redhat.com>
Thu, 30 Jan 2020 08:41:00 +0000 (09:41 +0100)
Some time ago, patterns were added to optimize move mask followed by zero
extension from 32 bits to 64 bit.  As the testcase shows, the intrinsics
actually return int, not unsigned int, so it will happen quite often that
one actually needs sign extension instead of zero extension.  Except for
vpmovmskb with 256-bit operand, sign vs. zero extension doesn't make a
difference, as we know the bit 31 will not be set (the source will have 2 or
4 doubles, 4 or 8 floats or 16 or 32 chars).
So, for the floating point patterns, this patch just uses a code iterator
so that we handle both zero extend and sign extend, and for the byte one
adds a separate pattern for the 128-bit operand.

2020-01-30  Jakub Jelinek  <jakub@redhat.com>

PR target/91824
* config/i386/sse.md
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext): Renamed to ...
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext): ... this.  Use
any_extend code iterator instead of always zero_extend.
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt): Renamed to ...
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt): ... this.
Use any_extend code iterator instead of always zero_extend.
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift): Renamed to ...
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift): ... this.
Use any_extend code iterator instead of always zero_extend.
(*sse2_pmovmskb_ext): New define_insn.
(*sse2_pmovmskb_ext_lt): New define_insn_and_split.

* gcc.target/i386/pr91824-2.c: New test.

gcc/ChangeLog
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr91824-2.c [new file with mode: 0644]

index c86b9c2aa509736d68c3d1f11d962c5eb911544e..4beb63934bc9984ca09aefa6ab93972cbb666010 100644 (file)
@@ -1,5 +1,19 @@
 2020-01-30  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/91824
+       * config/i386/sse.md
+       (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext): Renamed to ...
+       (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext): ... this.  Use
+       any_extend code iterator instead of always zero_extend.
+       (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt): Renamed to ...
+       (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt): ... this.
+       Use any_extend code iterator instead of always zero_extend.
+       (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift): Renamed to ...
+       (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift): ... this.
+       Use any_extend code iterator instead of always zero_extend.
+       (*sse2_pmovmskb_ext): New define_insn.
+       (*sse2_pmovmskb_ext_lt): New define_insn_and_split.
+
        PR target/91824
        * config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split.
        (*popcountsi2_zext_falsedep): New define_insn.
index abbd879aab35c3512b25e56087cbb5b87a50b004..f5ff2e930213b771704ed7d3dc74dbc10dc52026 100644 (file)
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
+(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
   [(set (match_operand:DI 0 "register_operand" "=r")
-       (zero_extend:DI
+       (any_extend:DI
          (unspec:SI
            [(match_operand:VF_128_256 1 "register_operand" "x")]
            UNSPEC_MOVMSK)))]
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
   [(set (match_operand:DI 0 "register_operand" "=r")
-       (zero_extend:DI
+       (any_extend:DI
          (unspec:SI
            [(lt:VF_128_256
               (match_operand:<sseintvecmode> 1 "register_operand" "x")
   "#"
   "&& reload_completed"
   [(set (match_dup 0)
-       (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+       (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
   "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "maybe_vex")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
   [(set (match_operand:DI 0 "register_operand" "=r")
-       (zero_extend:DI
+       (any_extend:DI
          (unspec:SI
            [(subreg:VF_128_256
               (ashiftrt:<sseintvecmode>
   "#"
   "&& reload_completed"
   [(set (match_dup 0)
-       (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+       (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
   "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "maybe_vex")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SI")])
 
+(define_insn "*sse2_pmovmskb_ext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (sign_extend:DI
+         (unspec:SI
+           [(match_operand:V16QI 1 "register_operand" "x")]
+           UNSPEC_MOVMSK)))]
+  "TARGET_64BIT && TARGET_SSE2"
+  "%vpmovmskb\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "ssemov")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
   [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*sse2_pmovmskb_ext_lt"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (sign_extend:DI
+         (unspec:SI
+           [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
+                      (match_operand:V16QI 2 "const0_operand" "C"))]
+           UNSPEC_MOVMSK)))]
+  "TARGET_64BIT && TARGET_SSE2"
+  "#"
+  ""
+  [(set (match_dup 0)
+       (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+  ""
+  [(set_attr "type" "ssemov")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
 (define_expand "sse2_maskmovdqu"
   [(set (match_operand:V16QI 0 "memory_operand")
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
index 9b3660645efe4bec4425b3f7979757358f8f0bea..842e4198917db7468ce626e045f98dbaf5ab8e6d 100644 (file)
@@ -1,5 +1,8 @@
 2020-01-30  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/91824
+       * gcc.target/i386/pr91824-2.c: New test.
+
        PR target/91824
        * gcc.target/i386/pr91824-1.c: New test.
 
diff --git a/gcc/testsuite/gcc.target/i386/pr91824-2.c b/gcc/testsuite/gcc.target/i386/pr91824-2.c
new file mode 100644 (file)
index 0000000..bdf1295
--- /dev/null
@@ -0,0 +1,73 @@
+/* PR target/91824 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-final { scan-assembler-not "cltq" } } */
+/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
+
+#include <x86intrin.h>
+
+unsigned long long
+f1 (__m128i x)
+{
+  return _mm_movemask_epi8 (x);
+}
+
+unsigned long long
+f2 (__m128i x)
+{
+  return (unsigned) _mm_movemask_epi8 (x);
+}
+
+unsigned long long
+f3 (__m128 x)
+{
+  return _mm_movemask_ps (x);
+}
+
+unsigned long long
+f4 (__m128 x)
+{
+  return (unsigned) _mm_movemask_ps (x);
+}
+
+unsigned long long
+f5 (__m128d x)
+{
+  return _mm_movemask_pd (x);
+}
+
+unsigned long long
+f6 (__m128d x)
+{
+  return (unsigned) _mm_movemask_pd (x);
+}
+
+unsigned long long
+f7 (__m256 x)
+{
+  return _mm256_movemask_ps (x);
+}
+
+unsigned long long
+f8 (__m256 x)
+{
+  return (unsigned) _mm256_movemask_ps (x);
+}
+
+unsigned long long
+f9 (__m256d x)
+{
+  return _mm256_movemask_pd (x);
+}
+
+unsigned long long
+f10 (__m256d x)
+{
+  return (unsigned) _mm256_movemask_pd (x);
+}
+
+unsigned long long
+f11 (__m256i x)
+{
+  return (unsigned) _mm256_movemask_epi8 (x);
+}