Add AVX512 k-mask intrinsics.
authorAndrew Senkevich <andrew.senkevich@intel.com>
Fri, 20 Jan 2017 08:37:13 +0000 (08:37 +0000)
committerKirill Yukhin <kyukhin@gcc.gnu.org>
Fri, 20 Jan 2017 08:37:13 +0000 (08:37 +0000)
gcc/
* config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
* config/i386/avx512dqintrin.h: Ditto.
* config/i386/avx512fintrin.h: Ditto.
* config/i386/i386-builtin-types.def: Add new types.
* gcc/config/i386/i386.c: Handle new types.
* config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi)
(__builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi)
(__builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi)
(__builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi)
(__builtin_ia32_kshiftridi): New.
* config/i386/sse.md (k<code><mode>): Rename *k<code><mode>.

gcc/testsuite/
* gcc.target/i386/avx512bw-kshiftld-1.c: New test.
* gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
* gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
* gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
* gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
* gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
* gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
* gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
* gcc.target/i386/avx512bw-kshiftld-2.c: Ditto.
* gcc.target/i386/avx512bw-kshiftlq-2.c: Ditto.
* gcc.target/i386/avx512bw-kshiftrd-2.c: Ditto.
* gcc.target/i386/avx512bw-kshiftrq-2.c: Ditto.
* gcc.target/i386/avx512dq-kshiftlb-2.c: Ditto.
* gcc.target/i386/avx512dq-kshiftrb-2.c: Ditto.
* gcc.target/i386/avx512f-kshiftlw-2.c: Ditto.
* gcc.target/i386/avx512f-kshiftrw-2.c: Ditto.
* gcc.target/i386/avx-1.c: Test new intrinsics.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.

From-SVN: r244685

28 files changed:
gcc/ChangeLog
gcc/config/i386/avx512bwintrin.h
gcc/config/i386/avx512dqintrin.h
gcc/config/i386/avx512fintrin.h
gcc/config/i386/i386-builtin-types.def
gcc/config/i386/i386-builtin.def
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx-1.c
gcc/testsuite/gcc.target/i386/avx512bw-kshiftld-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-kshiftld-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-kshiftlq-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-kshiftlq-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-kshiftrd-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-kshiftrd-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-kshiftrq-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-kshiftrq-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512dq-kshiftlb-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512dq-kshiftlb-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512dq-kshiftrb-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512dq-kshiftrb-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-kshiftlw-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-kshiftlw-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-kshiftrw-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-kshiftrw-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse-13.c
gcc/testsuite/gcc.target/i386/sse-23.c

index 470608638d235b70ba6a36743fc151e622ad356e..1ecda9579122ac50a2c7ae5c52974fde8c497d22 100644 (file)
@@ -1,3 +1,17 @@
+2017-01-20  Andrew Senkevich  <andrew.senkevich@intel.com>
+
+       * config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
+       * config/i386/avx512dqintrin.h: Ditto.
+       * config/i386/avx512fintrin.h: Ditto.
+       * config/i386/i386-builtin-types.def: Add new types.
+       * gcc/config/i386/i386.c: Handle new types.
+       * config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi)
+       (__builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi)
+       (__builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi)
+       (__builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi)
+       (__builtin_ia32_kshiftridi): New.
+       * config/i386/sse.md (k<code><mode>): Rename *k<code><mode>.
+
 2017-01-19  Segher Boessenkool  <segher@kernel.crashing.org>
 
        PR target/78875
index 21bec7342f8a3115c3e551be54425c1b8824a163..e41428a2ec00408f717c455a4bdf692ad558cef3 100644 (file)
@@ -2569,6 +2569,38 @@ _mm512_cmple_epi16_mask (__m512i __X, __m512i __Y)
 }
 
 #ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask32 (__mmask32 __A, unsigned int __B)
+{
+  return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A,
+                                               (__mmask8) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask64 (__mmask64 __A, unsigned int __B)
+{
+  return (__mmask64) __builtin_ia32_kshiftlidi ((__mmask64) __A,
+                                               (__mmask8) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask32 (__mmask32 __A, unsigned int __B)
+{
+  return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A,
+                                               (__mmask8) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask64 (__mmask64 __A, unsigned int __B)
+{
+  return (__mmask64) __builtin_ia32_kshiftridi ((__mmask64) __A,
+                                               (__mmask8) __B);
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_alignr_epi8 (__m512i __A, __m512i __B, const int __N)
@@ -2972,6 +3004,18 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N)
 }
 
 #else
+#define _kshiftli_mask32(X, Y)                                                 \
+  ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y)))
+
+#define _kshiftli_mask64(X, Y)                                                 \
+  ((__mmask64) __builtin_ia32_kshiftlidi ((__mmask64)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask32(X, Y)                                                 \
+  ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask64(X, Y)                                                 \
+  ((__mmask64) __builtin_ia32_kshiftridi ((__mmask64)(X), (__mmask8)(Y)))
+
 #define _mm512_alignr_epi8(X, Y, N)                                                \
   ((__m512i) __builtin_ia32_palignr512 ((__v8di)(__m512i)(X),                      \
                                        (__v8di)(__m512i)(Y),                       \
index 1fc2f68bfef95b6187f69ac9ac2e87bc596aadcb..bcb4a321256b9a8b1d4249f2146232eebaa8fb5c 100644 (file)
@@ -997,6 +997,20 @@ _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
 }
 
 #ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask8 (__mmask8 __A, unsigned int __B)
+{
+  return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask8 (__mmask8 __A, unsigned int __B)
+{
+  return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B);
+}
+
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_range_pd (__m512d __A, __m512d __B, int __C)
@@ -2008,6 +2022,12 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 }
 
 #else
+#define _kshiftli_mask8(X, Y)                                          \
+  ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask8(X, Y)                                          \
+  ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y)))
+
 #define _mm_range_sd(A, B, C)                                          \
   ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A),    \
     (__v2df)(__m128d)(B), (int)(C),                                    \
index af6880ed478c2fb03849a6dc59e582bca572cd9d..810ac23e0b505bb38186bb7f194dc5ec60374d32 100644 (file)
@@ -8966,6 +8966,22 @@ _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
 #define _MM_CMPINT_GT      0x6
 
 #ifdef __OPTIMIZE__
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
+{
+  return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
+                                               (__mmask8) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
+{
+  return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
+                                               (__mmask8) __B);
+}
+
 extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
@@ -9120,6 +9136,12 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
 }
 
 #else
+#define _kshiftli_mask16(X, Y)                                         \
+  ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask16(X, Y)                                         \
+  ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
+
 #define _mm512_cmp_epi64_mask(X, Y, P)                                 \
   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),       \
                                           (__v8di)(__m512i)(Y), (int)(P),\
index f287ca01197a2ac33a96e24671553433dfc2e785..2922324fe124d134c994bb1534ec496918a09b81 100644 (file)
@@ -558,10 +558,9 @@ DEF_FUNCTION_TYPE (USI, UHI)
 DEF_FUNCTION_TYPE (UQI, USI)
 DEF_FUNCTION_TYPE (UHI, USI)
 
-DEF_FUNCTION_TYPE (UQI, UQI, INT)
-DEF_FUNCTION_TYPE (UHI, UHI, INT)
-DEF_FUNCTION_TYPE (USI, USI, INT)
-DEF_FUNCTION_TYPE (UDI, UDI, INT)
+DEF_FUNCTION_TYPE (UHI, UHI, UQI)
+DEF_FUNCTION_TYPE (USI, USI, UQI)
+DEF_FUNCTION_TYPE (UDI, UDI, UQI)
 DEF_FUNCTION_TYPE (UQI, UQI)
 DEF_FUNCTION_TYPE (UHI, UHI)
 DEF_FUNCTION_TYPE (USI, USI)
@@ -619,6 +618,8 @@ DEF_FUNCTION_TYPE (UQI, V4SF, V4SF, INT)
 DEF_FUNCTION_TYPE (UQI, V4SF, V4SF, INT, UQI)
 DEF_FUNCTION_TYPE (UQI, V4SF, V4SF, INT, UQI, INT)
 
+DEF_FUNCTION_TYPE_ALIAS (UQI_FTYPE_UQI_UQI, CONST)
+
 DEF_FUNCTION_TYPE (V16SI, UHI)
 DEF_FUNCTION_TYPE (V8DI, UQI)
 DEF_FUNCTION_TYPE (V16QI, UHI)
index c35133595c53fcc7fdbb66db16e171f5344409df..08ce2c98fcb6e6d7114798583a402d7e25cac1d0 100644 (file)
@@ -1440,6 +1440,14 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__bu
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
 
 /* Mask arithmetic operations */
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kashiftqi, "__builtin_ia32_kshiftliqi", IX86_BUILTIN_KSHIFTLI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kashifthi, "__builtin_ia32_kshiftlihi", IX86_BUILTIN_KSHIFTLI16, UNKNOWN, (int) UHI_FTYPE_UHI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kashiftsi, "__builtin_ia32_kshiftlisi", IX86_BUILTIN_KSHIFTLI32, UNKNOWN, (int) USI_FTYPE_USI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kashiftdi, "__builtin_ia32_kshiftlidi", IX86_BUILTIN_KSHIFTLI64, UNKNOWN, (int) UDI_FTYPE_UDI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_klshiftrtqi, "__builtin_ia32_kshiftriqi", IX86_BUILTIN_KSHIFTRI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_klshiftrthi, "__builtin_ia32_kshiftrihi", IX86_BUILTIN_KSHIFTRI16, UNKNOWN, (int) UHI_FTYPE_UHI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_klshiftrtsi, "__builtin_ia32_kshiftrisi", IX86_BUILTIN_KSHIFTRI32, UNKNOWN, (int) USI_FTYPE_USI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_klshiftrtdi, "__builtin_ia32_kshiftridi", IX86_BUILTIN_KSHIFTRI64, UNKNOWN, (int) UDI_FTYPE_UDI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kandqi, "__builtin_ia32_kandqi", IX86_BUILTIN_KAND8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kandhi, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kandsi, "__builtin_ia32_kandsi", IX86_BUILTIN_KAND32, UNKNOWN, (int) USI_FTYPE_USI_USI)
index eb4781d9239c6f825de1b81ee874bdc70505e95f..46d1c44d8fc0fa8e522b0889985a9bd02dcbff1b 100644 (file)
@@ -35073,10 +35073,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V4DI_FTYPE_V8DI_INT:
     case QI_FTYPE_V4SF_INT:
     case QI_FTYPE_V2DF_INT:
-    case UQI_FTYPE_UQI_INT:
-    case UHI_FTYPE_UHI_INT:
-    case USI_FTYPE_USI_INT:
-    case UDI_FTYPE_UDI_INT:
+    case UQI_FTYPE_UQI_UQI_CONST:
+    case UHI_FTYPE_UHI_UQI:
+    case USI_FTYPE_USI_UQI:
+    case UDI_FTYPE_UDI_UQI:
       nargs = 2;
       nargs_constant = 1;
       break;
index f75499418127e6578255358b85b1d73addd903c4..bc504eb874a4de92388fd162e546a1a138272744 100644 (file)
 ;; Mask variant shift mnemonics
 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
 
-(define_insn "*k<code><mode>"
+(define_insn "k<code><mode>"
   [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
        (any_lshift:SWI1248_AVX512BWDQ
          (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
index 5e3ac310789fac1b5726041ce6e803b755093d0c..bbfd1338a1fae95378df05215242a6a030f2712a 100644 (file)
@@ -1,3 +1,25 @@
+2017-01-20  Andrew Senkevich  <andrew.senkevich@intel.com>
+
+       * gcc.target/i386/avx512bw-kshiftld-1.c: New test.
+       * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
+       * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
+       * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
+       * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
+       * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
+       * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
+       * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
+       * gcc.target/i386/avx512bw-kshiftld-2.c: Ditto.
+       * gcc.target/i386/avx512bw-kshiftlq-2.c: Ditto.
+       * gcc.target/i386/avx512bw-kshiftrd-2.c: Ditto.
+       * gcc.target/i386/avx512bw-kshiftrq-2.c: Ditto.
+       * gcc.target/i386/avx512dq-kshiftlb-2.c: Ditto.
+       * gcc.target/i386/avx512dq-kshiftrb-2.c: Ditto.
+       * gcc.target/i386/avx512f-kshiftlw-2.c: Ditto.
+       * gcc.target/i386/avx512f-kshiftrw-2.c: Ditto.
+       * gcc.target/i386/avx-1.c: Test new intrinsics.
+       * gcc.target/i386/sse-13.c: Ditto.
+       * gcc.target/i386/sse-23.c: Ditto.
+
 2017-01-20  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/71264
index 0418d07ad48615e4b7178bd1441c30240efefdc5..2a0df232278a3f0ec217d77e9d67a71c8a0ca80d 100644 (file)
 #define __builtin_ia32_xabort(I) __builtin_ia32_xabort(0)
 
 /* avx512fintrin.h */
+#define __builtin_ia32_kshiftlihi(A, B) __builtin_ia32_kshiftlihi(A, 8)
+#define __builtin_ia32_kshiftrihi(A, B) __builtin_ia32_kshiftrihi(A, 8)
 #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8)
 #define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
 
 /* avx512bwintrin.h */
+#define __builtin_ia32_kshiftlisi(A, B) __builtin_ia32_kshiftlisi(A, 8)
+#define __builtin_ia32_kshiftlidi(A, B) __builtin_ia32_kshiftlidi(A, 8)
+#define __builtin_ia32_kshiftrisi(A, B) __builtin_ia32_kshiftrisi(A, 8)
+#define __builtin_ia32_kshiftridi(A, B) __builtin_ia32_kshiftridi(A, 8)
 #define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
 #define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
 #define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
 #define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
 
 /* avx512dqintrin.h */
+#define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8)
+#define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8)
 #define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1)
 #define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1)
 #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftld-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftld-1.c
new file mode 100644 (file)
index 0000000..03714a6
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftld\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2;
+  unsigned int i = 5;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftli_mask32 (k1, i);
+  x = _mm512_mask_add_epi16 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftld-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftld-2.c
new file mode 100644 (file)
index 0000000..7fdc01a
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1 = 1;
+  unsigned int i = 25;
+
+  volatile __mmask32 r = _kshiftli_mask32 (k1, i);
+  if (r != 1 << i)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftlq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftlq-1.c
new file mode 100644 (file)
index 0000000..70a4b67
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftlq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2;
+  unsigned int i = 5;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftli_mask64 (k1, i);
+  x = _mm512_mask_add_epi8 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftlq-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftlq-2.c
new file mode 100644 (file)
index 0000000..4dabb4a
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1 = 1;
+  unsigned int i = 53;
+
+  volatile __mmask64 r = _kshiftli_mask64 (k1, i);
+  if (r != 1 << i)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrd-1.c
new file mode 100644 (file)
index 0000000..b99a713
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2;
+  unsigned int i = 5;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftri_mask32 (k1, i);
+  x = _mm512_mask_add_epi16 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrd-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrd-2.c
new file mode 100644 (file)
index 0000000..ce3707f
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+void
+avx512bw_test ()
+{
+  unsigned int i = 25;
+  __mmask32 k1 = 1 << i;
+
+  volatile __mmask32 r = _kshiftri_mask32 (k1, i);
+  if (r != 1)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrq-1.c
new file mode 100644 (file)
index 0000000..b0051b5
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2;
+  unsigned int i = 5;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftri_mask64 (k1, i);
+  x = _mm512_mask_add_epi8 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrq-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrq-2.c
new file mode 100644 (file)
index 0000000..655f926
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+void
+avx512bw_test ()
+{
+  unsigned int i = 53;
+  __mmask64 k1 = 1 << i;
+
+  volatile __mmask64 r = _kshiftri_mask64 (k1, i);
+  if (r != 1)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kshiftlb-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-kshiftlb-1.c
new file mode 100644 (file)
index 0000000..2d72c0a
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kshiftlb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2;
+  unsigned int i = 5;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftli_mask8 (k1, i);
+  x = _mm512_mask_add_pd (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kshiftlb-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-kshiftlb-2.c
new file mode 100644 (file)
index 0000000..bb0f10a
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512dq-check.h"
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1 = 1;
+  unsigned int i = 5;
+
+  volatile __mmask8 r = _kshiftli_mask8 (k1, i);
+  if (r != 1 << i)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kshiftrb-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-kshiftrb-1.c
new file mode 100644 (file)
index 0000000..c5ae199
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2;
+  unsigned int i = 5;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftri_mask8 (k1, i);
+  x = _mm512_mask_add_pd (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kshiftrb-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-kshiftrb-2.c
new file mode 100644 (file)
index 0000000..1b7c3bd
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512dq-check.h"
+
+void
+avx512dq_test ()
+{
+  unsigned int i = 5;
+  __mmask8 k1 = 1 << i;
+
+  volatile __mmask8 r = _kshiftri_mask8 (k1, i);
+  if (r != 1)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kshiftlw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-kshiftlw-1.c
new file mode 100644 (file)
index 0000000..3782d90
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kshiftlw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+  __mmask16 k1, k2;
+  unsigned int i = 5;
+  volatile __m512 x = _mm512_setzero_ps();
+
+  __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftli_mask16 (k1, i);
+  x = _mm512_mask_add_ps (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kshiftlw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-kshiftlw-2.c
new file mode 100644 (file)
index 0000000..89d45fc
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void
+avx512f_test ()
+{
+  __mmask16 k1 = 1;
+  unsigned int i = 10;
+
+  volatile __mmask16 r = _kshiftli_mask16 (k1, i);
+  if (r != 1 << i)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kshiftrw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-kshiftrw-1.c
new file mode 100644 (file)
index 0000000..6d537ef
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+  __mmask16 k1, k2;
+  unsigned int i = 5;
+  volatile __m512 x = _mm512_setzero_ps();
+
+  __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftri_mask16 (k1, i);
+  x = _mm512_mask_add_ps (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kshiftrw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-kshiftrw-2.c
new file mode 100644 (file)
index 0000000..5a1483a
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void
+avx512f_test ()
+{
+  unsigned int i = 10;
+  __mmask16 k1 = 1 << i;
+
+  volatile __mmask16 r = _kshiftri_mask16 (k1, i);
+  if (r != 1)
+    abort ();
+}
index b23480a8ec88d72e3b88bede812cf9bfc120ebbc..ff0051bf479697c90b720aa24152305ae16a329e 100644 (file)
 #define __builtin_ia32_xabort(N) __builtin_ia32_xabort(1)
 
 /* avx512fintrin.h */
+#define __builtin_ia32_kshiftlihi(A, B) __builtin_ia32_kshiftlihi(A, 8)
+#define __builtin_ia32_kshiftrihi(A, B) __builtin_ia32_kshiftrihi(A, 8)
 #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8)
 #define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
 
 /* avx512bwintrin.h */
+#define __builtin_ia32_kshiftlisi(A, B) __builtin_ia32_kshiftlisi(A, 8)
+#define __builtin_ia32_kshiftlidi(A, B) __builtin_ia32_kshiftlidi(A, 8)
+#define __builtin_ia32_kshiftrisi(A, B) __builtin_ia32_kshiftrisi(A, 8)
+#define __builtin_ia32_kshiftridi(A, B) __builtin_ia32_kshiftridi(A, 8)
 #define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
 #define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
 #define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
 #define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
 
 /* avx512dqintrin.h */
+#define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8)
+#define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8)
 #define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1)
 #define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1)
 #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
index bf1cba05f538a48904e904a0e1b8988fb4b7ab01..f4fcb002bb5b3283dfa3dfaf5c414eefa6786191 100644 (file)
 #define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1)
 
 /* avx512fintrin.h */
+#define __builtin_ia32_kshiftlihi(A, B) __builtin_ia32_kshiftlihi(A, 8)
+#define __builtin_ia32_kshiftrihi(A, B) __builtin_ia32_kshiftrihi(A, 8)
 #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8)
 #define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
 
 /* avx512bwintrin.h */
+#define __builtin_ia32_kshiftlisi(A, B) __builtin_ia32_kshiftlisi(A, 8)
+#define __builtin_ia32_kshiftlidi(A, B) __builtin_ia32_kshiftlidi(A, 8)
+#define __builtin_ia32_kshiftrisi(A, B) __builtin_ia32_kshiftrisi(A, 8)
+#define __builtin_ia32_kshiftridi(A, B) __builtin_ia32_kshiftridi(A, 8)
 #define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
 #define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
 #define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
 #define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
 
 /* avx512dqintrin.h */
+#define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8)
+#define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8)
 #define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1)
 #define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1)
 #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)