re PR target/85480 (zero extension from xmm to zmm via _mm512_insert???x? not optimized)
authorJakub Jelinek <jakub@redhat.com>
Tue, 8 May 2018 12:02:38 +0000 (14:02 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 8 May 2018 12:02:38 +0000 (14:02 +0200)
PR target/85480
* config/i386/sse.md (ssequaterinsnmode): New mode attribute.
(*<extract_type>_vinsert<shuffletype><extract_suf>_0): New pattern.

* gcc.target/i386/avx512dq-pr85480-1.c: New test.
* gcc.target/i386/avx512dq-pr85480-2.c: New test.

From-SVN: r260039

gcc/ChangeLog
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512dq-pr85480-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512dq-pr85480-2.c [new file with mode: 0644]

index 924a033b8cd7c89dc743f9706c5f3bc5d8106cbd..74c90d83fbb18749f069340c3900fb2f849434f8 100644 (file)
@@ -1,3 +1,9 @@
+2018-05-08  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/85480
+       * config/i386/sse.md (ssequaterinsnmode): New mode attribute.
+       (*<extract_type>_vinsert<shuffletype><extract_suf>_0): New pattern.
+
 2018-05-08  Richard Earnshaw  <rearnsha@arm.com>
 
        PR target/85658
index 858c29ec3e262ae8cd75a2784c6c4d8dc2329a4e..aab4261343e68bd16d7b351cdb10e764bc2ac429 100644 (file)
 (define_mode_attr ssequartermode
   [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
 
+(define_mode_attr ssequarterinsnmode
+  [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
+
 (define_mode_attr ssedoublemodelower
   [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
    (V8HI "v8si")   (V16HI "v16si") (V32HI "v32si")
   DONE;
 })
 
+(define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
+  [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
+       (vec_merge:AVX512_VEC
+         (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
+         (vec_duplicate:AVX512_VEC
+               (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
+         (match_operand:SI 3 "const_int_operand" "n,n,n")))]
+  "TARGET_AVX512F
+   && (INTVAL (operands[3])
+       == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
+{
+  if (which_alternative == 0)
+    return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
+  switch (<MODE>mode)
+    {
+    case E_V8DFmode:
+      return "vmovapd\t{%2, %x0|%x0, %2}";
+    case E_V16SFmode:
+      return "vmovaps\t{%2, %x0|%x0, %2}";
+    case E_V8DImode:
+      return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
+                                   : "vmovdqa\t{%2, %x0|%x0, %2}";
+    case E_V16SImode:
+      return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
+                                   : "vmovdqa\t{%2, %x0|%x0, %2}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "length_immediate" "1,0,0")
+   (set_attr "prefix" "evex,vex,evex")
+   (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
+
 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
   [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
        (vec_merge:AVX512_VEC
index 2628d55dc237a7a470e2343f385d9a6d2adfcd97..6d92f7ee1c2443128689e3eae2fb64b8277a4296 100644 (file)
@@ -1,3 +1,9 @@
+2018-05-08  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/85480
+       * gcc.target/i386/avx512dq-pr85480-1.c: New test.
+       * gcc.target/i386/avx512dq-pr85480-2.c: New test.
+
 2018-05-08  Richard Sandiford  <richard.sandiford@linaro.org>
 
        * g++.dg/other/sve_const_pred_1.C: Rename to...
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr85480-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr85480-1.c
new file mode 100644 (file)
index 0000000..681477f
--- /dev/null
@@ -0,0 +1,26 @@
+/* PR target/85480 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-final { scan-assembler-times "vmovaps\[^\n\r]*xmm0\[^\n\r]*xmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[^\n\r]*xmm0\[^\n\r]*xmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa\[^\n\r]*xmm0\[^\n\r]*xmm0" 1 } } */
+
+#include <x86intrin.h>
+
+__m512
+f1 (__m128 a)
+{
+  return _mm512_insertf32x4 (_mm512_set1_ps (0.0f), a, 0);
+}
+
+__m512d
+f2 (__m128d a)
+{
+  return _mm512_insertf64x2 (_mm512_set1_pd (0.0), a, 0);
+}
+
+__m512i
+f3 (__m128i a)
+{
+  return _mm512_inserti32x4 (_mm512_set1_epi32 (0), a, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr85480-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr85480-2.c
new file mode 100644 (file)
index 0000000..670870f
--- /dev/null
@@ -0,0 +1,38 @@
+/* PR target/85480 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vmovaps\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+
+#include <x86intrin.h>
+
+__m512
+f1 (__m128 a)
+{
+  register __m128 b __asm ("xmm16");
+  asm ("" : "=v" (b) : "0" (a));
+  register __m512 c __asm ("xmm17") = _mm512_insertf32x4 (_mm512_set1_ps (0.0f), b, 0);
+  asm ("" : "+v" (c));
+  return c;
+}
+
+__m512d
+f2 (__m128d a)
+{
+  register __m128d b __asm ("xmm16");
+  asm ("" : "=v" (b) : "0" (a));
+  register __m512d c __asm ("xmm17") = _mm512_insertf64x2 (_mm512_set1_pd (0.0), b, 0);
+  asm ("" : "+v" (c));
+  return c;
+}
+
+__m512i
+f3 (__m128i a)
+{
+  register __m128i b __asm ("xmm16");
+  asm ("" : "=v" (b) : "0" (a));
+  register __m512i c __asm ("xmm17") = _mm512_inserti32x4 (_mm512_set1_epi32 (0), b, 0);
+  asm ("" : "+v" (c));
+  return c;
+}