Add missing mask[z]_roundscale_[round]_s[d,s] intrinsics
authorHongtao Liu <liuhongt@gcc.gnu.org>
Tue, 15 Oct 2019 07:44:15 +0000 (07:44 +0000)
committerHongtao Liu <liuhongt@gcc.gnu.org>
Tue, 15 Oct 2019 07:44:15 +0000 (07:44 +0000)
gcc/
* config/i386/avx512fintrin.h (_mm_mask_roundscale_ss,
_mm_maskz_roundscale_ss, _mm_maskz_roundscale_round_ss,
_mm_maskz_roundscale_round_ss, _mm_mask_roundscale_sd,
_mm_maskz_roundscale_sd, _mm_mask_roundscale_round_sd,
_mm_maskz_roundscale_round_sd): New intrinsics.
(_mm_roundscale_ss, _mm_roundscale_round_ss): Use
__builtin_ia32_rndscales?_mask_round builtins instead of
__builtin_ia32_rndscales?_round.
* config/i386/i386-builtin.def (__builtin_ia32_rndscaless_round,
__builtin_ia32_rndscalesd_round): Remove.
(__builtin_ia32_rndscaless_mask_round,
__builtin_ia32_rndscalesd_mask_round): New intrinsics.
* config/i386/sse.md
(avx512f_rndscale<mode><round_saeonly_name>): Renamed to ...
(avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>):
 ... this, adjust and add subst atrributes to make it maskable.

gcc/testsuite/
* gcc.target/i386/avx512f-vrndscaless-1.c: Add scan-assembler-times
directives for newly expected instructions.
* gcc.target/i386/avx512f-vrndscalesd-1.c: Likewise.
* gcc.target/i386/avx512f-vrndscaless-2.c
(avx512f_test): Add tests for new intrinsics.
* gcc.target/i386/avx512f-vrndscalesd-2.c: Likewise.
* gcc.target/i386/avx-1.c (__builtin_ia32_rndscalefss_round,
__builtin_ia32_rndscalefsd_round): Remove.
(__builtin_ia32_rndscalefss_mask_round,
__builtin_ia32_rndscalefsd_mask_round): Define.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.

From-SVN: r276986

12 files changed:
gcc/ChangeLog
gcc/config/i386/avx512fintrin.h
gcc/config/i386/i386-builtin.def
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx-1.c
gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-1.c
gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-2.c
gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-1.c
gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-2.c
gcc/testsuite/gcc.target/i386/sse-13.c
gcc/testsuite/gcc.target/i386/sse-23.c

index 299bcb1c8a5d24210e7904d7cf0bd85c2c3e89b8..40726e9635421ccc06531dd3f0a20a25ce033ca4 100644 (file)
@@ -1,3 +1,23 @@
+2019-10-15  Hongyu Wang  <hongtao.wang@intel.com>
+
+       PR target/92035
+       * config/i386/avx512fintrin.h (_mm_mask_roundscale_ss,
+       _mm_maskz_roundscale_ss, _mm_maskz_roundscale_round_ss,
+       _mm_maskz_roundscale_round_ss, _mm_mask_roundscale_sd,
+       _mm_maskz_roundscale_sd, _mm_mask_roundscale_round_sd,
+       _mm_maskz_roundscale_round_sd): New intrinsics.
+       (_mm_roundscale_ss, _mm_roundscale_round_ss): Use
+       __builtin_ia32_rndscales?_mask_round builtins instead of
+       __builtin_ia32_rndscales?_round.
+       * config/i386/i386-builtin.def (__builtin_ia32_rndscaless_round,
+       __builtin_ia32_rndscalesd_round): Remove.
+       (__builtin_ia32_rndscaless_mask_round,
+       __builtin_ia32_rndscalesd_mask_round): New intrinsics.
+       * config/i386/sse.md
+       (avx512f_rndscale<mode><round_saeonly_name>): Renamed to ...
+       (avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>):
+        ... this, adjust and add subst atrributes to make it maskable.
+
 2019-10-15  Richard Biener  <rguenther@suse.de>
 
        PR middle-end/92046
index c2ca4e15acd2c86f7261144c142d7afdfda48fc2..1d08f01a841ec9bc789d819be92e3b616ca0f952 100644 (file)
@@ -9169,10 +9169,43 @@ _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
+_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
+                        const int __R)
+{
+  return (__m128)
+    __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
+                                         (__v4sf) __B, __imm,
+                                         (__v4sf)
+                                         _mm_setzero_ps (),
+                                         (__mmask8) -1,
+                                         __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C,
+                             __m128 __D, const int __imm, const int __R)
 {
-  return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
-                                                  (__v4sf) __B, __imm, __R);
+  return (__m128)
+    __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
+                                         (__v4sf) __D, __imm,
+                                         (__v4sf) __A,
+                                         (__mmask8) __B,
+                                         __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
+                              const int __imm, const int __R)
+{
+  return (__m128)
+    __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
+                                         (__v4sf) __C, __imm,
+                                         (__v4sf)
+                                         _mm_setzero_ps (),
+                                         (__mmask8) __A,
+                                         __R);
 }
 
 extern __inline __m128d
@@ -9180,8 +9213,40 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
                         const int __R)
 {
-  return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
-                                                   (__v2df) __B, __imm, __R);
+  return (__m128d)
+    __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
+                                         (__v2df) __B, __imm,
+                                         (__v2df)
+                                         _mm_setzero_pd (),
+                                         (__mmask8) -1,
+                                         __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C,
+                             __m128d __D, const int __imm, const int __R)
+{
+  return (__m128d)
+    __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
+                                         (__v2df) __D, __imm,
+                                         (__v2df) __A,
+                                         (__mmask8) __B,
+                                         __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
+                              const int __imm, const int __R)
+{
+  return (__m128d)
+    __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
+                                         (__v2df) __C, __imm,
+                                         (__v2df)
+                                         _mm_setzero_pd (),
+                                         (__mmask8) __A,
+                                         __R);
 }
 
 #else
@@ -9211,12 +9276,54 @@ _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
                                             (int)(C),                  \
                                             (__v8df)_mm512_setzero_pd(),\
                                             (__mmask8)(A), R))
-#define _mm_roundscale_round_ss(A, B, C, R)                                    \
-  ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),      \
-    (__v4sf)(__m128)(B), (int)(C), R))
-#define _mm_roundscale_round_sd(A, B, C, R)                                    \
-  ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),    \
-    (__v2df)(__m128d)(B), (int)(C), R))
+#define _mm_roundscale_round_ss(A, B, I, R)                            \
+  ((__m128)                                                            \
+   __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A),                \
+                                        (__v4sf) (__m128) (B),         \
+                                        (int) (I),                     \
+                                        (__v4sf) _mm_setzero_ps (),    \
+                                        (__mmask8) (-1),               \
+                                        (int) (R)))
+#define _mm_mask_roundscale_round_ss(A, U, B, C, I, R)         \
+  ((__m128)                                                    \
+   __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B),        \
+                                        (__v4sf) (__m128) (C), \
+                                        (int) (I),             \
+                                        (__v4sf) (__m128) (A), \
+                                        (__mmask8) (U),        \
+                                        (int) (R)))
+#define _mm_maskz_roundscale_round_ss(U, A, B, I, R)                   \
+  ((__m128)                                                            \
+   __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A),                \
+                                        (__v4sf) (__m128) (B),         \
+                                        (int) (I),                     \
+                                        (__v4sf) _mm_setzero_ps (),    \
+                                        (__mmask8) (U),                \
+                                        (int) (R)))
+#define _mm_roundscale_round_sd(A, B, I, R)                            \
+  ((__m128d)                                                           \
+   __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A),       \
+                                        (__v2df) (__m128d) (B),        \
+                                        (int) (I),                     \
+                                        (__v2df) _mm_setzero_pd (),    \
+                                        (__mmask8) (-1),               \
+                                        (int) (R)))
+#define _mm_mask_roundscale_round_sd(A, U, B, C, I, R)                 \
+  ((__m128d)                                                           \
+   __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B),       \
+                                        (__v2df) (__m128d) (C),        \
+                                        (int) (I),                     \
+                                        (__v2df) (__m128d) (A),        \
+                                        (__mmask8) (U),                \
+                                        (int) (R)))
+#define _mm_maskz_roundscale_round_sd(U, A, B, I, R)                   \
+  ((__m128d)                                                           \
+   __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A),       \
+                                        (__v2df) (__m128d) (B),        \
+                                        (int) (I),                     \
+                                        (__v2df) _mm_setzero_pd (),    \
+                                        (__mmask8) (U),                \
+                                        (int) (R)))
 #endif
 
 extern __inline __m512
@@ -14812,18 +14919,81 @@ extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
 {
-  return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
-                                                  (__v4sf) __B, __imm,
-                                                  _MM_FROUND_CUR_DIRECTION);
+  return (__m128)
+    __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
+                                         (__v4sf) __B, __imm,
+                                         (__v4sf)
+                                         _mm_setzero_ps (),
+                                         (__mmask8) -1,
+                                         _MM_FROUND_CUR_DIRECTION);
+}
+
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D,
+                       const int __imm)
+{
+  return (__m128)
+    __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
+                                         (__v4sf) __D, __imm,
+                                         (__v4sf) __A,
+                                         (__mmask8) __B,
+                                         _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
+                        const int __imm)
+{
+  return (__m128)
+    __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
+                                         (__v4sf) __C, __imm,
+                                         (__v4sf)
+                                         _mm_setzero_ps (),
+                                         (__mmask8) __A,
+                                         _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
 {
-  return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
-                                                   (__v2df) __B, __imm,
-                                                  _MM_FROUND_CUR_DIRECTION);
+  return (__m128d)
+    __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
+                                         (__v2df) __B, __imm,
+                                         (__v2df)
+                                         _mm_setzero_pd (),
+                                         (__mmask8) -1,
+                                         _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D,
+                       const int __imm)
+{
+  return (__m128d)
+    __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
+                                         (__v2df) __D, __imm,
+                                         (__v2df) __A,
+                                         (__mmask8) __B,
+                                         _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
+                        const int __imm)
+{
+  return (__m128d)
+    __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
+                                         (__v2df) __C, __imm,
+                                         (__v2df)
+                                         _mm_setzero_pd (),
+                                         (__mmask8) __A,
+                                         _MM_FROUND_CUR_DIRECTION);
 }
 
 #else
@@ -14853,12 +15023,54 @@ _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
                                             (int)(C),                  \
                                             (__v8df)_mm512_setzero_pd(),\
                                             (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
-#define _mm_roundscale_ss(A, B, C)                                     \
-  ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),      \
-  (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
-#define _mm_roundscale_sd(A, B, C)                                     \
-  ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),    \
-    (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_ss(A, B, I)                                     \
+  ((__m128)                                                            \
+   __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A),                \
+                                        (__v4sf) (__m128) (B),         \
+                                        (int) (I),                     \
+                                        (__v4sf) _mm_setzero_ps (),    \
+                                        (__mmask8) (-1),               \
+                                        _MM_FROUND_CUR_DIRECTION))
+#define _mm_mask_roundscale_ss(A, U, B, C, I)                          \
+  ((__m128)                                                            \
+   __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B),                \
+                                        (__v4sf) (__m128) (C),         \
+                                        (int) (I),                     \
+                                        (__v4sf) (__m128) (A),         \
+                                        (__mmask8) (U),                \
+                                        _MM_FROUND_CUR_DIRECTION))
+#define _mm_maskz_roundscale_ss(U, A, B, I)                            \
+  ((__m128)                                                            \
+   __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A),                \
+                                        (__v4sf) (__m128) (B),         \
+                                        (int) (I),                     \
+                                        (__v4sf) _mm_setzero_ps (),    \
+                                        (__mmask8) (U),                \
+                                        _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_sd(A, B, I)                                     \
+  ((__m128d)                                                           \
+   __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A),       \
+                                        (__v2df) (__m128d) (B),        \
+                                        (int) (I),                     \
+                                        (__v2df) _mm_setzero_pd (),    \
+                                        (__mmask8) (-1),               \
+                                        _MM_FROUND_CUR_DIRECTION))
+#define _mm_mask_roundscale_sd(A, U, B, C, I)                          \
+  ((__m128d)                                                           \
+   __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B),       \
+                                        (__v2df) (__m128d) (C),        \
+                                        (int) (I),                     \
+                                        (__v2df) (__m128d) (A),        \
+                                        (__mmask8) (U),                \
+                                        _MM_FROUND_CUR_DIRECTION))
+#define _mm_maskz_roundscale_sd(U, A, B, I)                            \
+  ((__m128d)                                                           \
+   __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A),       \
+                                        (__v2df) (__m128d) (B),        \
+                                        (int) (I),                     \
+                                        (__v2df) _mm_setzero_pd (),    \
+                                        (__mmask8) (U),                \
+                                        _MM_FROUND_CUR_DIRECTION))
 #endif
 
 #ifdef __OPTIMIZE__
index 6ac820eb8977da970fae0a62d639bfb3b843d562..11028331cda6588b1d36a7b0f78064b0556c85b6 100644 (file)
@@ -2828,8 +2828,8 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia3
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask_round", IX86_BUILTIN_MULSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
index 07922a1bf971fc64751259fa4af2f7ad72692f86..f474eed1c4e1a0dad4efbe68e36c9c0f13f0baff 100644 (file)
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_rndscale<mode><round_saeonly_name>"
+(define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
        (vec_merge:VF_128
          (unspec:VF_128
-           [(match_operand:VF_128 1 "register_operand" "v")
-            (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+           [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
             (match_operand:SI 3 "const_0_to_255_operand")]
            UNSPEC_ROUND)
-         (match_dup 1)
+         (match_operand:VF_128 1 "register_operand" "v")
          (const_int 1)))]
   "TARGET_AVX512F"
-  "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
+  "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
index 8ff78713652fdc78e4f87e06e353ea45a566d0cf..34854d29cf75f60dbc3bf575fcee4688db81d8fe 100644 (file)
@@ -1,3 +1,19 @@
+2019-10-15  Hongyu Wang  <hongyu.wang@intel.com>
+
+       PR target/92035
+       * gcc.target/i386/avx512f-vrndscaless-1.c: Add scan-assembler-times
+       directives for newly expected instructions.
+       * gcc.target/i386/avx512f-vrndscalesd-1.c: Likewise.
+       * gcc.target/i386/avx512f-vrndscaless-2.c
+       (avx512f_test): Add tests for new intrinsics.
+       * gcc.target/i386/avx512f-vrndscalesd-2.c: Likewise.
+       * gcc.target/i386/avx-1.c (__builtin_ia32_rndscalefss_round,
+       __builtin_ia32_rndscalefsd_round): Remove.
+       (__builtin_ia32_rndscalefss_mask_round,
+       __builtin_ia32_rndscalefsd_mask_round): Define.
+       * gcc.target/i386/sse-13.c: Ditto.
+       * gcc.target/i386/sse-23.c: Ditto.
+
 2019-10-15  Richard Biener  <rguenther@suse.de>
 
        PR middle-end/92046
index 741b3c4f8e3fb7524720ecb8b128445f5daa5baf..3600a7abe9156a39abb027c71789c2b1c45ef775 100644 (file)
 #define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
 #define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 8)
 #define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 8)
-#define __builtin_ia32_rndscalesd_round(A, B, C, D) __builtin_ia32_rndscalesd_round(A, B, 1, 4)
-#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
+#define __builtin_ia32_rndscalesd_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesd_mask_round(A, B, 1, D, E, 4)
+#define __builtin_ia32_rndscaless_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscaless_mask_round(A, B, 1, D, E, 4)
 #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8)
index 255b384d5658d5a889a3a015584851e4745262b2..f95d470960780a04b270ef4eec68da67fe7aeb59 100644 (file)
@@ -1,14 +1,24 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128d x1, x2;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
   x1 = _mm_roundscale_sd (x1, x2, 0x42);
   x1 = _mm_roundscale_round_sd (x1, x2, 0x42, _MM_FROUND_NO_EXC);
+  x1 = _mm_mask_roundscale_sd (x1, m, x1, x2, 0x42);
+  x1 = _mm_mask_roundscale_round_sd (x1, m, x1, x2, 0x42, _MM_FROUND_NO_EXC);
+  x1 = _mm_maskz_roundscale_sd (m, x1, x2, 0x42);
+  x1 = _mm_maskz_roundscale_round_sd (m, x1, x2, 0x42, _MM_FROUND_NO_EXC);
 }
index b96aa462790e31107d2b34c48ab42bf67e61ea7a..83b940d96360a4a38b458c75e0f77808f18edffd 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <math.h>
 #include "avx512f-check.h"
+#include "avx512f-mask-type.h"
 
 static void
 compute_rndscalesd (double *s1, double *s2, double *r, int imm)
@@ -33,17 +34,54 @@ compute_rndscalesd (double *s1, double *s2, double *r, int imm)
 static void
 avx512f_test (void)
 {
-  int imm = _MM_FROUND_FLOOR | (7 << 4);
-  union128d s1, s2, res1;
+  int i, imm;
+  union128d s1, s2, res1, res2, res3, res4, res5, res6;
   double res_ref[SIZE];
+  
+  MASK_TYPE mask = MASK_VALUE;
+
+  imm = _MM_FROUND_FLOOR | (7 << 4);
 
   s1.x = _mm_set_pd (4.05084, -1.23162);
   s2.x = _mm_set_pd (-3.53222, 7.33527);
 
+  for(i = 0; i < SIZE; i++)
+    {
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+    }
+
   res1.x = _mm_roundscale_sd (s1.x, s2.x, imm);
+  res2.x = _mm_mask_roundscale_sd (res2.x, mask, s1.x, s2.x, imm);
+  res3.x = _mm_maskz_roundscale_sd (mask, s1.x, s2.x, imm);
+  res4.x = _mm_roundscale_round_sd (s1.x, s2.x, imm, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_roundscale_round_sd (res5.x, mask, s1.x, s2.x, imm, _MM_FROUND_NO_EXC);
+  res6.x = _mm_maskz_roundscale_round_sd (mask, s1.x, s2.x, imm, _MM_FROUND_NO_EXC);
 
   compute_rndscalesd (s1.a, s2.a, res_ref, imm);
 
   if (check_union128d (res1, res_ref))
     abort ();
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res2, res_ref))
+    abort ();
+  
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  compute_rndscalesd (s1.a, s2.a, res_ref, imm);
+
+  if (check_union128d (res4, res_ref))
+    abort ();
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res5, res_ref))
+    abort ();
+  
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res6, res_ref))
+    abort ();
+
 }
index dbd6e21b76207f1439eb0423b2285f0245751ba3..19e3a973fa403efbeda2ff5ec0cf8c264958871a 100644 (file)
@@ -1,14 +1,24 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128 x1, x2;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
   x1 = _mm_roundscale_ss (x1, x2, 0x42);
   x1 = _mm_roundscale_round_ss (x1, x2, 0x42, _MM_FROUND_NO_EXC);
+  x1 = _mm_mask_roundscale_ss (x1, m, x1, x2, 0x42);
+  x1 = _mm_mask_roundscale_round_ss (x1, m, x1, x2, 0x42, _MM_FROUND_NO_EXC);
+  x1 = _mm_maskz_roundscale_ss (m, x1, x2, 0x42);
+  x1 = _mm_maskz_roundscale_round_ss (m, x1, x2, 0x42, _MM_FROUND_NO_EXC);
 }
index 42dd645ab8797ab7b27b616e15714a037df699ee..6906880d3628bc9a9a7f9b76b90a86c6a6b05f8d 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <math.h>
 #include "avx512f-check.h"
+#include "avx512f-mask-type.h"
 
 static void
 compute_rndscaless (float *s1, float *s2, float *r, int imm)
@@ -35,17 +36,53 @@ compute_rndscaless (float *s1, float *s2, float *r, int imm)
 static void
 avx512f_test (void)
 {
-  int imm = _MM_FROUND_FLOOR | (7 << 4);
-  union128 s1, s2, res1;
+  int i, imm;
+  union128 s1, s2, res1, res2, res3, res4, res5, res6;
   float res_ref[SIZE];
+  
+  MASK_TYPE mask = MASK_VALUE;
 
+  imm = _MM_FROUND_FLOOR | (7 << 4);
+  
   s1.x = _mm_set_ps (4.05084, -1.23162, 2.00231, -6.22103);
   s2.x = _mm_set_ps (-4.19319, -3.53222, 7.33527, 5.57655);
+  for(i = 0; i < SIZE; i++)
+    {
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+    }
 
   res1.x = _mm_roundscale_ss (s1.x, s2.x, imm);
+  res2.x = _mm_mask_roundscale_ss (res2.x, mask, s1.x, s2.x, imm);
+  res3.x = _mm_maskz_roundscale_ss (mask, s1.x, s2.x, imm);
+  res4.x = _mm_roundscale_round_ss (s1.x, s2.x, imm, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_roundscale_round_ss (res5.x, mask, s1.x, s2.x, imm, _MM_FROUND_NO_EXC);
+  res6.x = _mm_maskz_roundscale_round_ss (mask, s1.x, s2.x, imm, _MM_FROUND_NO_EXC);
 
   compute_rndscaless (s1.a, s2.a, res_ref, imm);
 
   if (check_union128 (res1, res_ref))
     abort ();
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res2, res_ref))
+    abort ();
+  
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  compute_rndscaless (s1.a, s2.a, res_ref, imm);
+
+  if (check_union128 (res4, res_ref))
+    abort ();
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res5, res_ref))
+    abort ();
+  
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res6, res_ref))
+    abort ();
 }
index 39b2d31578c67a1ee3be5e01fb7afce2f2cf150f..45c1c285c572ac0ecc1f668ce268a429ee537996 100644 (file)
 #define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
 #define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 8)
 #define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 8)
-#define __builtin_ia32_rndscalesd_round(A, B, C, D) __builtin_ia32_rndscalesd_round(A, B, 1, 4)
-#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
+#define __builtin_ia32_rndscalesd_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesd_mask_round(A, B, 1, D, E, 4)
+#define __builtin_ia32_rndscaless_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscaless_mask_round(A, B, 1, D, E, 4)
 #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8)
index 7ea665de747a788280f07acda10bd0513685742e..e98c7693ef73cbdcfce05bfdaeb0f7658b4f8528 100644 (file)
 #define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
 #define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 8)
 #define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 8)
-#define __builtin_ia32_rndscalesd_round(A, B, C, D) __builtin_ia32_rndscalesd_round(A, B, 1, 4)
-#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
+#define __builtin_ia32_rndscalesd_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesd_mask_round(A, B, 1, D, E, 4)
+#define __builtin_ia32_rndscaless_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscaless_mask_round(A, B, 1, D, E, 4)
 #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8)