Fix zero-masking for vcvtps2ph when dest operand is memory.
authorliuhongt <hongtao.liu@intel.com>
Fri, 29 May 2020 05:38:49 +0000 (13:38 +0800)
committerliuhongt <hongtao.liu@intel.com>
Thu, 4 Jun 2020 12:04:40 +0000 (20:04 +0800)
When dest is memory, zero-masking is not valid, only merging-masking is available,

2020-06-24  Hongtao Liu  <hongtao.liu@inte.com>

gcc/ChangeLog:
PR target/95254
* config/i386/sse.md (*vcvtps2ph_store<merge_mask_name>):
Refine from *vcvtps2ph_store<mask_name>.
(vcvtps2ph256<mask_name>): Refine constraint from vm to v.
(<mask_codefor>avx512f_vcvtps2ph512<mask_name>): Ditto.
(*vcvtps2ph256<merge_mask_name>): New define_insn.
(*avx512f_vcvtps2ph512<merge_mask_name>): Ditto.
* config/i386/subst.md (merge_mask): New define_subst.
(merge_mask_name): New define_subst_attr.
(merge_mask_operand3): Ditto.

gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512f-vcvtps2ph-pr95254.c: New test.
* gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c: Ditto.

gcc/config/i386/sse.md
gcc/config/i386/subst.md
gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c [new file with mode: 0644]

index 87354451c58b9529fedee177a4b27eddd4fac539..7815d77bcbfdbe318fefd974e881c3b6db9a6171 100644 (file)
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "V4SF")])
 
-(define_insn "*vcvtps2ph_store<mask_name>"
+(define_insn "*vcvtps2ph_store<merge_mask_name>"
   [(set (match_operand:V4HI 0 "memory_operand" "=m")
        (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
                      (match_operand:SI 2 "const_0_to_255_operand" "N")]
                     UNSPEC_VCVTPS2PH))]
   "TARGET_F16C || TARGET_AVX512VL"
-  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "V4SF")])
 
 (define_insn "vcvtps2ph256<mask_name>"
-  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
        (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
                      (match_operand:SI 2 "const_0_to_255_operand" "N")]
                     UNSPEC_VCVTPS2PH))]
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "V8SF")])
 
+(define_insn "*vcvtps2ph256<merge_mask_name>"
+  [(set (match_operand:V8HI 0 "memory_operand" "=m")
+       (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
+                     (match_operand:SI 2 "const_0_to_255_operand" "N")]
+                    UNSPEC_VCVTPS2PH))]
+  "TARGET_F16C || TARGET_AVX512VL"
+  "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "V8SF")])
+
 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
-  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
+  [(set (match_operand:V16HI 0 "register_operand" "=v")
        (unspec:V16HI
          [(match_operand:V16SF 1 "register_operand" "v")
           (match_operand:SI 2 "const_0_to_255_operand" "N")]
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
 
+(define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
+  [(set (match_operand:V16HI 0 "memory_operand" "=m")
+       (unspec:V16HI
+         [(match_operand:V16SF 1 "register_operand" "v")
+          (match_operand:SI 2 "const_0_to_255_operand" "N")]
+         UNSPEC_VCVTPS2PH))]
+  "TARGET_AVX512F"
+  "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
 ;; For gather* insn patterns
 (define_mode_iterator VEC_GATHER_MODE
                      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
index a5ca144c7f70ac148c3ef798c867775ea4cca9d3..58ea9dc83e27c74a2f2e84670b1c3be6bd78c6bf 100644 (file)
          (match_operand:SUBST_V 2 "nonimm_or_0_operand" "0C")
          (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))])
 
+(define_subst_attr "merge_mask_name" "merge_mask" "" "_merge_mask")
+(define_subst_attr "merge_mask_operand3" "merge_mask" "" "%{%3%}")
+(define_subst "merge_mask"
+  [(set (match_operand:SUBST_V 0)
+        (match_operand:SUBST_V 1))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+        (vec_merge:SUBST_V
+         (match_dup 1)
+         (match_dup 0)
+         (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))])
+
 (define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask")
 (define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}")
 (define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}")
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c
new file mode 100644 (file)
index 0000000..9e0da94
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include<immintrin.h>
+extern __m256i res;
+void
+foo (__m512 a, __mmask16 m)
+{
+  res = _mm512_maskz_cvtps_ph (m, a, 10);
+}
+
+/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c
new file mode 100644 (file)
index 0000000..0c685ea
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512f" } */
+
+#include<immintrin.h>
+extern __m128i res;
+void
+foo (__m256 a, __mmask8 m)
+{
+  res = _mm256_maskz_cvtps_ph (m, a, 10);
+}
+
+void
+foo1 (__m128 a, __mmask8 m)
+{
+  res = _mm_maskz_cvtps_ph (m, a, 10);
+}
+
+/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%\[xy\]mm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */