+2017-01-20 Andrew Senkevich <andrew.senkevich@intel.com>
+
+ * config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
+ * config/i386/avx512dqintrin.h: Ditto.
+ * config/i386/avx512fintrin.h: Ditto.
+ * config/i386/i386-builtin-types.def: Add new types.
+ * gcc/config/i386/i386.c: Handle new types.
+ * config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi)
+ (__builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi)
+ (__builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi)
+ (__builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi)
+ (__builtin_ia32_kshiftridi): New.
+ * config/i386/sse.md (k<code><mode>): Rename *k<code><mode>.
+
2017-01-19 Segher Boessenkool <segher@kernel.crashing.org>
PR target/78875
}
#ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask32 (__mmask32 __A, unsigned int __B)
+{
+ return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A,
+ (__mmask8) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask64 (__mmask64 __A, unsigned int __B)
+{
+ return (__mmask64) __builtin_ia32_kshiftlidi ((__mmask64) __A,
+ (__mmask8) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask32 (__mmask32 __A, unsigned int __B)
+{
+ return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A,
+ (__mmask8) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask64 (__mmask64 __A, unsigned int __B)
+{
+ return (__mmask64) __builtin_ia32_kshiftridi ((__mmask64) __A,
+ (__mmask8) __B);
+}
+
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_alignr_epi8 (__m512i __A, __m512i __B, const int __N)
}
#else
+#define _kshiftli_mask32(X, Y) \
+ ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y)))
+
+#define _kshiftli_mask64(X, Y) \
+ ((__mmask64) __builtin_ia32_kshiftlidi ((__mmask64)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask32(X, Y) \
+ ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask64(X, Y) \
+ ((__mmask64) __builtin_ia32_kshiftridi ((__mmask64)(X), (__mmask8)(Y)))
+
#define _mm512_alignr_epi8(X, Y, N) \
((__m512i) __builtin_ia32_palignr512 ((__v8di)(__m512i)(X), \
(__v8di)(__m512i)(Y), \
}
#ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask8 (__mmask8 __A, unsigned int __B)
+{
+ return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask8 (__mmask8 __A, unsigned int __B)
+{
+ return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B);
+}
+
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_range_pd (__m512d __A, __m512d __B, int __C)
}
#else
+#define _kshiftli_mask8(X, Y) \
+ ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask8(X, Y) \
+ ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y)))
+
#define _mm_range_sd(A, B, C) \
((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), (int)(C), \
#define _MM_CMPINT_GT 0x6
#ifdef __OPTIMIZE__
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
+{
+ return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
+ (__mmask8) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
+{
+ return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
+ (__mmask8) __B);
+}
+
extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
}
#else
+#define _kshiftli_mask16(X, Y) \
+ ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask16(X, Y) \
+ ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
+
#define _mm512_cmp_epi64_mask(X, Y, P) \
((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
(__v8di)(__m512i)(Y), (int)(P),\
DEF_FUNCTION_TYPE (UQI, USI)
DEF_FUNCTION_TYPE (UHI, USI)
-DEF_FUNCTION_TYPE (UQI, UQI, INT)
-DEF_FUNCTION_TYPE (UHI, UHI, INT)
-DEF_FUNCTION_TYPE (USI, USI, INT)
-DEF_FUNCTION_TYPE (UDI, UDI, INT)
+DEF_FUNCTION_TYPE (UHI, UHI, UQI)
+DEF_FUNCTION_TYPE (USI, USI, UQI)
+DEF_FUNCTION_TYPE (UDI, UDI, UQI)
DEF_FUNCTION_TYPE (UQI, UQI)
DEF_FUNCTION_TYPE (UHI, UHI)
DEF_FUNCTION_TYPE (USI, USI)
DEF_FUNCTION_TYPE (UQI, V4SF, V4SF, INT, UQI)
DEF_FUNCTION_TYPE (UQI, V4SF, V4SF, INT, UQI, INT)
+DEF_FUNCTION_TYPE_ALIAS (UQI_FTYPE_UQI_UQI, CONST)
+
DEF_FUNCTION_TYPE (V16SI, UHI)
DEF_FUNCTION_TYPE (V8DI, UQI)
DEF_FUNCTION_TYPE (V16QI, UHI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
/* Mask arithmetic operations */
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kashiftqi, "__builtin_ia32_kshiftliqi", IX86_BUILTIN_KSHIFTLI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kashifthi, "__builtin_ia32_kshiftlihi", IX86_BUILTIN_KSHIFTLI16, UNKNOWN, (int) UHI_FTYPE_UHI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kashiftsi, "__builtin_ia32_kshiftlisi", IX86_BUILTIN_KSHIFTLI32, UNKNOWN, (int) USI_FTYPE_USI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kashiftdi, "__builtin_ia32_kshiftlidi", IX86_BUILTIN_KSHIFTLI64, UNKNOWN, (int) UDI_FTYPE_UDI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_klshiftrtqi, "__builtin_ia32_kshiftriqi", IX86_BUILTIN_KSHIFTRI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_klshiftrthi, "__builtin_ia32_kshiftrihi", IX86_BUILTIN_KSHIFTRI16, UNKNOWN, (int) UHI_FTYPE_UHI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_klshiftrtsi, "__builtin_ia32_kshiftrisi", IX86_BUILTIN_KSHIFTRI32, UNKNOWN, (int) USI_FTYPE_USI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_klshiftrtdi, "__builtin_ia32_kshiftridi", IX86_BUILTIN_KSHIFTRI64, UNKNOWN, (int) UDI_FTYPE_UDI_UQI)
BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kandqi, "__builtin_ia32_kandqi", IX86_BUILTIN_KAND8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kandhi, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kandsi, "__builtin_ia32_kandsi", IX86_BUILTIN_KAND32, UNKNOWN, (int) USI_FTYPE_USI_USI)
case V4DI_FTYPE_V8DI_INT:
case QI_FTYPE_V4SF_INT:
case QI_FTYPE_V2DF_INT:
- case UQI_FTYPE_UQI_INT:
- case UHI_FTYPE_UHI_INT:
- case USI_FTYPE_USI_INT:
- case UDI_FTYPE_UDI_INT:
+ case UQI_FTYPE_UQI_UQI_CONST:
+ case UHI_FTYPE_UHI_UQI:
+ case USI_FTYPE_USI_UQI:
+ case UDI_FTYPE_UDI_UQI:
nargs = 2;
nargs_constant = 1;
break;
;; Mask variant shift mnemonics
(define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
-(define_insn "*k<code><mode>"
+(define_insn "k<code><mode>"
[(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
(any_lshift:SWI1248_AVX512BWDQ
(match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
+2017-01-20 Andrew Senkevich <andrew.senkevich@intel.com>
+
+ * gcc.target/i386/avx512bw-kshiftld-1.c: New test.
+ * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
+ * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
+ * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftld-2.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftlq-2.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftrd-2.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftrq-2.c: Ditto.
+ * gcc.target/i386/avx512dq-kshiftlb-2.c: Ditto.
+ * gcc.target/i386/avx512dq-kshiftrb-2.c: Ditto.
+ * gcc.target/i386/avx512f-kshiftlw-2.c: Ditto.
+ * gcc.target/i386/avx512f-kshiftrw-2.c: Ditto.
+ * gcc.target/i386/avx-1.c: Test new intrinsics.
+ * gcc.target/i386/sse-13.c: Ditto.
+ * gcc.target/i386/sse-23.c: Ditto.
+
2017-01-20 Richard Biener <rguenther@suse.de>
PR tree-optimization/71264
#define __builtin_ia32_xabort(I) __builtin_ia32_xabort(0)
/* avx512fintrin.h */
+#define __builtin_ia32_kshiftlihi(A, B) __builtin_ia32_kshiftlihi(A, 8)
+#define __builtin_ia32_kshiftrihi(A, B) __builtin_ia32_kshiftrihi(A, 8)
#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8)
#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8)
#define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8)
#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
/* avx512bwintrin.h */
+#define __builtin_ia32_kshiftlisi(A, B) __builtin_ia32_kshiftlisi(A, 8)
+#define __builtin_ia32_kshiftlidi(A, B) __builtin_ia32_kshiftlidi(A, 8)
+#define __builtin_ia32_kshiftrisi(A, B) __builtin_ia32_kshiftrisi(A, 8)
+#define __builtin_ia32_kshiftridi(A, B) __builtin_ia32_kshiftridi(A, 8)
#define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
#define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
#define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
#define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
/* avx512dqintrin.h */
+#define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8)
+#define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8)
#define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1)
#define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1)
#define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftld\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+ __mmask32 k1, k2;
+ unsigned int i = 5;
+ volatile __m512i x = _mm512_setzero_si512 ();
+
+ __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+
+ k2 = _kshiftli_mask32 (k1, i);
+ x = _mm512_mask_add_epi16 (x, k2, x, x);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+void
+avx512bw_test ()
+{
+ __mmask32 k1 = 1;
+ unsigned int i = 25;
+
+ volatile __mmask32 r = _kshiftli_mask32 (k1, i);
+ if (r != 1 << i)
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftlq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+ __mmask64 k1, k2;
+ unsigned int i = 5;
+ volatile __m512i x = _mm512_setzero_si512 ();
+
+ __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+
+ k2 = _kshiftli_mask64 (k1, i);
+ x = _mm512_mask_add_epi8 (x, k2, x, x);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+void
+avx512bw_test ()
+{
+ __mmask64 k1 = 1;
+ unsigned int i = 53;
+
+ volatile __mmask64 r = _kshiftli_mask64 (k1, i);
+ if (r != 1 << i)
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+ __mmask32 k1, k2;
+ unsigned int i = 5;
+ volatile __m512i x = _mm512_setzero_si512 ();
+
+ __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+
+ k2 = _kshiftri_mask32 (k1, i);
+ x = _mm512_mask_add_epi16 (x, k2, x, x);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+void
+avx512bw_test ()
+{
+ unsigned int i = 25;
+ __mmask32 k1 = 1 << i;
+
+ volatile __mmask32 r = _kshiftri_mask32 (k1, i);
+ if (r != 1)
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+ __mmask64 k1, k2;
+ unsigned int i = 5;
+ volatile __m512i x = _mm512_setzero_si512 ();
+
+ __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+
+ k2 = _kshiftri_mask64 (k1, i);
+ x = _mm512_mask_add_epi8 (x, k2, x, x);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+void
+avx512bw_test ()
+{
+ unsigned int i = 53;
+ __mmask64 k1 = 1 << i;
+
+ volatile __mmask64 r = _kshiftri_mask64 (k1, i);
+ if (r != 1)
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kshiftlb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+ __mmask8 k1, k2;
+ unsigned int i = 5;
+ volatile __m512d x = _mm512_setzero_pd();
+
+ __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+
+ k2 = _kshiftli_mask8 (k1, i);
+ x = _mm512_mask_add_pd (x, k2, x, x);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512dq-check.h"
+
+void
+avx512dq_test ()
+{
+ __mmask8 k1 = 1;
+ unsigned int i = 5;
+
+ volatile __mmask8 r = _kshiftli_mask8 (k1, i);
+ if (r != 1 << i)
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+ __mmask8 k1, k2;
+ unsigned int i = 5;
+ volatile __m512d x = _mm512_setzero_pd();
+
+ __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+
+ k2 = _kshiftri_mask8 (k1, i);
+ x = _mm512_mask_add_pd (x, k2, x, x);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512dq-check.h"
+
+void
+avx512dq_test ()
+{
+ unsigned int i = 5;
+ __mmask8 k1 = 1 << i;
+
+ volatile __mmask8 r = _kshiftri_mask8 (k1, i);
+ if (r != 1)
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kshiftlw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+ __mmask16 k1, k2;
+ unsigned int i = 5;
+ volatile __m512 x = _mm512_setzero_ps();
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+
+ k2 = _kshiftli_mask16 (k1, i);
+ x = _mm512_mask_add_ps (x, k2, x, x);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void
+avx512f_test ()
+{
+ __mmask16 k1 = 1;
+ unsigned int i = 10;
+
+ volatile __mmask16 r = _kshiftli_mask16 (k1, i);
+ if (r != 1 << i)
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+ __mmask16 k1, k2;
+ unsigned int i = 5;
+ volatile __m512 x = _mm512_setzero_ps();
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+
+ k2 = _kshiftri_mask16 (k1, i);
+ x = _mm512_mask_add_ps (x, k2, x, x);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void
+avx512f_test ()
+{
+ unsigned int i = 10;
+ __mmask16 k1 = 1 << i;
+
+ volatile __mmask16 r = _kshiftri_mask16 (k1, i);
+ if (r != 1)
+ abort ();
+}
#define __builtin_ia32_xabort(N) __builtin_ia32_xabort(1)
/* avx512fintrin.h */
+#define __builtin_ia32_kshiftlihi(A, B) __builtin_ia32_kshiftlihi(A, 8)
+#define __builtin_ia32_kshiftrihi(A, B) __builtin_ia32_kshiftrihi(A, 8)
#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8)
#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8)
#define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8)
#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
/* avx512bwintrin.h */
+#define __builtin_ia32_kshiftlisi(A, B) __builtin_ia32_kshiftlisi(A, 8)
+#define __builtin_ia32_kshiftlidi(A, B) __builtin_ia32_kshiftlidi(A, 8)
+#define __builtin_ia32_kshiftrisi(A, B) __builtin_ia32_kshiftrisi(A, 8)
+#define __builtin_ia32_kshiftridi(A, B) __builtin_ia32_kshiftridi(A, 8)
#define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
#define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
#define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
#define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
/* avx512dqintrin.h */
+#define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8)
+#define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8)
#define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1)
#define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1)
#define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
#define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1)
/* avx512fintrin.h */
+#define __builtin_ia32_kshiftlihi(A, B) __builtin_ia32_kshiftlihi(A, 8)
+#define __builtin_ia32_kshiftrihi(A, B) __builtin_ia32_kshiftrihi(A, 8)
#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8)
#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8)
#define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8)
#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
/* avx512bwintrin.h */
+#define __builtin_ia32_kshiftlisi(A, B) __builtin_ia32_kshiftlisi(A, 8)
+#define __builtin_ia32_kshiftlidi(A, B) __builtin_ia32_kshiftlidi(A, 8)
+#define __builtin_ia32_kshiftrisi(A, B) __builtin_ia32_kshiftrisi(A, 8)
+#define __builtin_ia32_kshiftridi(A, B) __builtin_ia32_kshiftridi(A, 8)
#define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
#define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
#define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
#define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
/* avx512dqintrin.h */
+#define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8)
+#define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8)
#define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1)
#define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1)
#define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)