re PR target/82370 (AVX512 can use a memory operand for immediate-count vpsrlw, but...
authorJakub Jelinek <jakub@redhat.com>
Tue, 24 Oct 2017 19:34:06 +0000 (21:34 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 24 Oct 2017 19:34:06 +0000 (21:34 +0200)
PR target/82370
* config/i386/sse.md (VIMAX_AVX2): Remove V4TImode.
(VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators.
(vec_shl_<mode>): Remove unused expander.
(avx512bw_<shift_insn><mode>3): New define_insn.
(<sse2_avx2>_ashl<mode>3, <sse2_avx2>_lshr<mode>3): Replaced by ...
(<sse2_avx2>_<shift_insn><mode>3): ... this.  New define_insn.

* gcc.target/i386/pr82370.c: New test.

From-SVN: r254058

gcc/ChangeLog
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr82370.c [new file with mode: 0644]

index a27df40cd624ef6fbf92598c9609ea22f41021bd..b2864cf177bbc3f6bc83764c5d6f820ce4a0e490 100644 (file)
@@ -1,3 +1,13 @@
+2017-10-24  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/82370
+       * config/i386/sse.md (VIMAX_AVX2): Remove V4TImode.
+       (VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators.
+       (vec_shl_<mode>): Remove unused expander.
+       (avx512bw_<shift_insn><mode>3): New define_insn.
+       (<sse2_avx2>_ashl<mode>3, <sse2_avx2>_lshr<mode>3): Replaced by ...
+       (<sse2_avx2>_<shift_insn><mode>3): ... this.  New define_insn.
+
 2017-10-24  Paolo Carlini  <paolo.carlini@oracle.com>
 
        PR c++/82466
index 35e4bc95c4a0b19666739710afad76110f4a2d01..4f9f2bd0a1cb1a8ec1343b398b30cc7d55805d16 100644 (file)
   [V16SF V16SI])
 
 ;; ??? We should probably use TImode instead.
-(define_mode_iterator VIMAX_AVX2
+(define_mode_iterator VIMAX_AVX2_AVX512BW
   [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
 
-;; ??? This should probably be dropped in favor of VIMAX_AVX2.
+;; Suppose TARGET_AVX512BW as baseline
+(define_mode_iterator VIMAX_AVX512VL
+  [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
+
+(define_mode_iterator VIMAX_AVX2
+  [(V2TI "TARGET_AVX2") V1TI])
+
+;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
 (define_mode_iterator SSESCALARMODE
   [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
 
    (set_attr "mode" "<sseinsnmode>")])
 
 
-(define_expand "vec_shl_<mode>"
+(define_expand "vec_shr_<mode>"
   [(set (match_dup 3)
-       (ashift:V1TI
+       (lshiftrt:V1TI
         (match_operand:VI_128 1 "register_operand")
         (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
    (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
   operands[4] = gen_lowpart (<MODE>mode, operands[3]);
 })
 
-(define_insn "<sse2_avx2>_ashl<mode>3"
-  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
-       (ashift:VIMAX_AVX2
-        (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
-        (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
-  "TARGET_SSE2"
+(define_insn "avx512bw_<shift_insn><mode>3"
+  [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
+       (any_lshift:VIMAX_AVX512VL
+        (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
+        (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_AVX512BW"
 {
   operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
-
-  switch (which_alternative)
-    {
-    case 0:
-      return "pslldq\t{%2, %0|%0, %2}";
-    case 1:
-      return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
-    default:
-      gcc_unreachable ();
-    }
+  return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
 }
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseishft")
+  [(set_attr "type" "sseishft")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "vec_shr_<mode>"
-  [(set (match_dup 3)
-       (lshiftrt:V1TI
-        (match_operand:VI_128 1 "register_operand")
-        (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
-   (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
-  "TARGET_SSE2"
-{
-  operands[1] = gen_lowpart (V1TImode, operands[1]);
-  operands[3] = gen_reg_rtx (V1TImode);
-  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
-})
-
-(define_insn "<sse2_avx2>_lshr<mode>3"
+(define_insn "<sse2_avx2>_<shift_insn><mode>3"
   [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
-       (lshiftrt:VIMAX_AVX2
+       (any_lshift:VIMAX_AVX2
         (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
         (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
   "TARGET_SSE2"
   switch (which_alternative)
     {
     case 0:
-      return "psrldq\t{%2, %0|%0, %2}";
+      return "p<vshift>dq\t{%2, %0|%0, %2}";
     case 1:
-      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+      return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
     default:
       gcc_unreachable ();
     }
index 7801c03d1d3f8a832de9dfec6648db8159b7a645..a33711847637ca7cf8fe9df8ac421dc204ef2fdb 100644 (file)
@@ -1,3 +1,8 @@
+2017-10-24  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/82370
+       * gcc.target/i386/pr82370.c: New test.
+
 2017-10-24  Paolo Carlini  <paolo.carlini@oracle.com>
 
        PR c++/82466
diff --git a/gcc/testsuite/gcc.target/i386/pr82370.c b/gcc/testsuite/gcc.target/i386/pr82370.c
new file mode 100644 (file)
index 0000000..cc4d9b6
--- /dev/null
@@ -0,0 +1,18 @@
+/* PR target/82370 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */
+
+#include <x86intrin.h>
+
+__m512i f1 (__m512i *x) { return _mm512_bslli_epi128 (*x, 5); }
+__m512i f2 (__m512i *x) { return _mm512_bsrli_epi128 (*x, 5); }
+__m256i f3 (__m256i *x) { return _mm256_bslli_epi128 (*x, 5); }
+__m256i f4 (__m256i *x) { return _mm256_bsrli_epi128 (*x, 5); }
+__m128i f5 (__m128i *x) { return _mm_bslli_si128 (*x, 5); }
+__m128i f6 (__m128i *x) { return _mm_bsrli_si128 (*x, 5); }