+2018-07-11 Jakub Jelinek <jakub@redhat.com>
+
+ * config/i386/avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask):
+ Use __mmask64 type instead of __mmask8 for __M argument.
+ * config/i386/avx512fintrin.h (_mm512_mask_xor_epi64,
+ _mm512_maskz_xor_epi64): Use __mmask8 type instead of __mmask16 for
+ __U argument.
+ (_mm512_mask_cmpneq_epi64_mask): Use __mmask8 type instead of
+ __mmask16 for __M argument.
+ (_mm512_maskz_insertf32x4, _mm512_maskz_inserti32x4,
+ _mm512_mask_insertf32x4, _mm512_mask_inserti32x4): Cast last argument
+ to __mmask16 instead of __mmask8.
+ * config/i386/avx512vlintrin.h (_mm_mask_add_ps, _mm_maskz_add_ps,
+ _mm256_mask_add_ps, _mm256_maskz_add_ps, _mm_mask_sub_ps,
+ _mm_maskz_sub_ps, _mm256_mask_sub_ps, _mm256_maskz_sub_ps,
+ _mm256_maskz_cvtepi32_ps, _mm_maskz_cvtepi32_ps): Use __mmask8 type
+ instead of __mmask16 for __U argument.
+ * config/i386/avx512vlbwintrin.h (_mm_mask_cmp_epi8_mask): Use
+ __mmask16 instead of __mmask8 for __U argument.
+ (_mm256_mask_cmp_epi8_mask): Use __mmask32 instead of __mmask16 for
+ __U argument.
+ (_mm256_cmp_epi8_mask): Use __mmask32 return type instead of
+ __mmask16.
+ (_mm_mask_cmp_epu8_mask): Use __mmask16 instead of __mmask8 for __U
+ argument.
+ (_mm256_mask_cmp_epu8_mask): Use __mmask32 instead of __mmask16 for
+ __U argument.
+ (_mm256_cmp_epu8_mask): Use __mmask32 return type instead of
+ __mmask16.
+ (_mm_mask_cmp_epi16_mask): Cast last argument to __mmask8 instead
+ of __mmask16.
+ (_mm256_mask_cvtepi8_epi16): Use __mmask16 instead of __mmask32 for
+ __U argument.
+ (_mm_mask_cvtepi8_epi16): Use __mmask8 instead of __mmask32 for
+ __U argument.
+ (_mm256_mask_cvtepu8_epi16): Use __mmask16 instead of __mmask32 for
+ __U argument.
+ (_mm_mask_cvtepu8_epi16): Use __mmask8 instead of __mmask32 for
+ __U argument.
+ (_mm256_mask_cmpneq_epu8_mask, _mm256_mask_cmplt_epu8_mask,
+ _mm256_mask_cmpge_epu8_mask, _mm256_mask_cmple_epu8_mask): Change
+ return type as well as __M argument type and all casts from __mmask8
+ to __mmask32.
+ (_mm256_mask_cmpneq_epu16_mask, _mm256_mask_cmplt_epu16_mask,
+ _mm256_mask_cmpge_epu16_mask, _mm256_mask_cmple_epu16_mask): Change
+ return type as well as __M argument type and all casts from __mmask8
+ to __mmask16.
+ (_mm256_mask_cmpneq_epi8_mask, _mm256_mask_cmplt_epi8_mask,
+ _mm256_mask_cmpge_epi8_mask, _mm256_mask_cmple_epi8_mask): Change
+ return type as well as __M argument type and all casts from __mmask8
+ to __mmask32.
+ (_mm256_mask_cmpneq_epi16_mask, _mm256_mask_cmplt_epi16_mask,
+ _mm256_mask_cmpge_epi16_mask, _mm256_mask_cmple_epi16_mask): Change
+ return type as well as __M argument type and all casts from __mmask8
+ to __mmask16.
+ * config/i386/avx512vbmi2vlintrin.h (_mm_mask_shrdi_epi32,
+ _mm_mask_shldi_epi32): Cast last argument to __mmask8 instead of
+ __mmask16.
+
+2018-07-11 Grazvydas Ignotas <notasas@gmail.com>
+
+ * config/i386/avx512bwintrin.h: (_mm512_mask_cmp_epi8_mask,
+ _mm512_mask_cmp_epu8_mask): Use __mmask64 type instead of __mmask32
+ for __U argument.
+
2018-07-11 Paul Koning <ni1d@arrl.net>
* doc/md.texi (define_subst): Document how multiple occurrences of
extern __inline __mmask64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B)
+_mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
{
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
(__v64qi) __B,
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epi8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+_mm512_mask_cmp_epi8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
const int __P)
{
return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epu8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+_mm512_mask_cmp_epu8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
const int __P)
{
return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
(__v8di) __B,
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
+_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
(__v8di) __B,
extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
+_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
(__v8di) __Y, 4,
#define _mm512_maskz_insertf32x4(A, X, Y, C) \
((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
(__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
- (__mmask8)(A)))
+ (__mmask16)(A)))
#define _mm512_maskz_inserti32x4(A, X, Y, C) \
((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
(__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
- (__mmask8)(A)))
+ (__mmask16)(A)))
#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
(__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
- (__mmask8)(B)))
+ (__mmask16)(B)))
#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
(__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
- (__mmask8)(B)))
+ (__mmask16)(B)))
#endif
extern __inline __m512i
(__v4si)(__m128i)(B),(int)(C))
#define _mm_mask_shrdi_epi32(A, B, C, D, E) \
((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
- (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
+ (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
#define _mm_maskz_shrdi_epi32(A, B, C, D) \
((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
(__v4si)(__m128i)(C),(int)(D), \
(__v4si)(__m128i)(B),(int)(C))
#define _mm_mask_shldi_epi32(A, B, C, D, E) \
((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
- (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
+ (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
#define _mm_maskz_shldi_epi32(A, B, C, D) \
((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
(__v4si)(__m128i)(C),(int)(D), \
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epi8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+_mm_mask_cmp_epi8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
const int __P)
{
return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+_mm256_mask_cmp_epi8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
const int __P)
{
return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
(__mmask32) __U);
}
-extern __inline __mmask16
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
{
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epu8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+_mm_mask_cmp_epu8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
const int __P)
{
return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+_mm256_mask_cmp_epu8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
const int __P)
{
return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
(__mmask32) __U);
}
-extern __inline __mmask16
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
{
#define _mm_mask_cmp_epi16_mask(M, X, Y, P) \
((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
(__v8hi)(__m128i)(Y), (int)(P),\
- (__mmask16)(M)))
+ (__mmask8)(M)))
#define _mm_mask_cmp_epi8_mask(M, X, Y, P) \
((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
{
return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
(__v16hi) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
(__v8hi) __W,
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
{
return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
(__v16hi) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
(__v8hi) __W,
(__mmask8) __M);
}
-extern __inline __mmask8
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
- (__v32qi) __Y, 4,
- (__mmask8) __M);
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 4,
+ (__mmask32) __M);
}
-extern __inline __mmask8
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
- (__v32qi) __Y, 1,
- (__mmask8) __M);
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 1,
+ (__mmask32) __M);
}
-extern __inline __mmask8
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
- (__v32qi) __Y, 5,
- (__mmask8) __M);
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 5,
+ (__mmask32) __M);
}
-extern __inline __mmask8
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
- (__v32qi) __Y, 2,
- (__mmask8) __M);
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 2,
+ (__mmask32) __M);
}
-extern __inline __mmask8
+extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
- (__v16hi) __Y, 4,
- (__mmask8) __M);
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 4,
+ (__mmask16) __M);
}
-extern __inline __mmask8
+extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
- (__v16hi) __Y, 1,
- (__mmask8) __M);
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 1,
+ (__mmask16) __M);
}
-extern __inline __mmask8
+extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
- (__v16hi) __Y, 5,
- (__mmask8) __M);
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 5,
+ (__mmask16) __M);
}
-extern __inline __mmask8
+extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
- (__v16hi) __Y, 2,
- (__mmask8) __M);
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 2,
+ (__mmask16) __M);
}
-extern __inline __mmask8
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
- (__v32qi) __Y, 4,
- (__mmask8) __M);
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 4,
+ (__mmask32) __M);
}
-extern __inline __mmask8
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
- (__v32qi) __Y, 1,
- (__mmask8) __M);
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 1,
+ (__mmask32) __M);
}
-extern __inline __mmask8
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
- (__v32qi) __Y, 5,
- (__mmask8) __M);
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 5,
+ (__mmask32) __M);
}
-extern __inline __mmask8
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
- (__v32qi) __Y, 2,
- (__mmask8) __M);
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 2,
+ (__mmask32) __M);
}
-extern __inline __mmask8
+extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
- (__v16hi) __Y, 4,
- (__mmask8) __M);
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 4,
+ (__mmask16) __M);
}
-extern __inline __mmask8
+extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
- (__v16hi) __Y, 1,
- (__mmask8) __M);
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 1,
+ (__mmask16) __M);
}
-extern __inline __mmask8
+extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
- (__v16hi) __Y, 5,
- (__mmask8) __M);
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 5,
+ (__mmask16) __M);
}
-extern __inline __mmask8
+extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{
- return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
- (__v16hi) __Y, 2,
- (__mmask8) __M);
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 2,
+ (__mmask16) __M);
}
#ifdef __DISABLE_AVX512VLBW__
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
(__v4sf) __B,
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
+_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
(__v4sf) __B,
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
(__v8sf) __B,
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
+_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
(__v8sf) __B,
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
(__v4sf) __B,
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
+_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
(__v4sf) __B,
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
(__v8sf) __B,
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
+_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
(__v8sf) __B,
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
+_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
{
return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
(__v8sf)
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
+_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
{
return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
(__v4sf)
+2018-07-11 Jakub Jelinek <jakub@redhat.com>
+
+ * gcc.target/i386/avx512bw-vpcmpb-2.c (CMP): Use SIZE macro instead
+ of hardcoding size. Cast (rel) to MASK_TYPE.
+ * gcc.target/i386/avx512bw-vpcmpub-2.c (CMP): Likewise.
+ * gcc.target/i386/avx512f-vinserti32x4-3.c: New test.
+ * gcc.target/i386/avx512f-vinsertf32x4-3.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpnequb-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpgeub-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpleb-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpgeb-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpltb-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpltub-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpleub-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpneqb-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpnequw-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpgeuw-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmplew-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpgew-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpltw-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpltuw-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpleuw-2.c: New test.
+ * gcc.target/i386/avx512vl-vpcmpneqw-2.c: New test.
+
+2018-07-11 Grazvydas Ignotas <notasas@gmail.com>
+
+ * gcc.target/i386/avx512bw-vpcmpb-2.c (SIZE): Define to
+ (AVX512F_LEN / 8) instead of (AVX512F_LEN / 16).
+ * gcc.target/i386/avx512bw-vpcmpub-2.c (SIZE): Likewise.
+
2018-07-10 Mark Wielaard <mark@klomp.org>
PR debug/86459
#include "avx512f-helper.h"
#include <math.h>
-#define SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 8)
#include "avx512f-mask-type.h"
#if AVX512F_LEN == 512
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 64; i++) \
- { \
- dst_ref = ((rel) << i) | dst_ref; \
- } \
+ for (i = 0; i < SIZE; i++) \
+ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm512_loadu_si512 (s1); \
source2.x = _mm512_loadu_si512 (s2); \
dst1 = _mm512_cmp_epi8_mask (source1.x, source2.x, imm);\
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 32; i++) \
- { \
- dst_ref = ((rel) << i) | dst_ref; \
- } \
+ for (i = 0; i < SIZE; i++) \
+ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
dst1 = _mm256_cmp_epi8_mask (source1.x, source2.x, imm);\
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 16; i++) \
- { \
- dst_ref = ((rel) << i) | dst_ref; \
- } \
+ for (i = 0; i < SIZE; i++) \
+ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm_loadu_si128 ((__m128i*)s1); \
source2.x = _mm_loadu_si128 ((__m128i*)s2); \
dst1 = _mm_cmp_epi8_mask (source1.x, source2.x, imm);\
#include "avx512f-helper.h"
#include <math.h>
-#define SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 8)
#include "avx512f-mask-type.h"
#if AVX512F_LEN == 512
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 64; i++) \
- { \
- dst_ref = ((rel) << i) | dst_ref; \
- } \
+ for (i = 0; i < SIZE; i++) \
+ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm512_loadu_si512 (s1); \
source2.x = _mm512_loadu_si512 (s2); \
dst1 = _mm512_cmp_epu8_mask (source1.x, source2.x, imm);\
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 32; i++) \
- { \
- dst_ref = ((rel) << i) | dst_ref; \
- } \
+ for (i = 0; i < SIZE; i++) \
+ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
dst1 = _mm256_cmp_epu8_mask (source1.x, source2.x, imm);\
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 16; i++) \
- { \
- dst_ref = ((rel) << i) | dst_ref; \
- } \
+ for (i = 0; i < SIZE; i++) \
+ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm_loadu_si128 ((__m128i*)s1); \
source2.x = _mm_loadu_si128 ((__m128i*)s2); \
dst1 = _mm_cmp_epu8_mask (source1.x, source2.x, imm);\
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void static
+CALC (UNION_TYPE (AVX512F_LEN,) s1, union128 s2, float *res_ref, int imm)
+{
+ memcpy (res_ref, s1.a, SIZE * sizeof (float));
+ memcpy (res_ref + imm * 4, s2.a, 16);
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s1, res1, res2, res3;
+ union128 s2;
+ float res_ref[SIZE];
+ int j;
+
+ MASK_TYPE mask = (MASK_TYPE) 0xa55a;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j / 10.2;
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ for (j = 0; j < 4; j++)
+ s2.a[j] = j * j * j / 2.03;
+
+ res1.x = INTRINSIC (_insertf32x4) (s1.x, s2.x, 1);
+ res2.x = INTRINSIC (_mask_insertf32x4) (res2.x, mask, s1.x, s2.x, 1);
+ res3.x = INTRINSIC (_maskz_insertf32x4) (mask, s1.x, s2.x, 1);
+
+ CALC (s1, s2, res_ref, 1);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void static
+CALC (UNION_TYPE (AVX512F_LEN, i_d) s1, union128i_d s2, int *res_ref, int imm)
+{
+ memcpy (res_ref, s1.a, SIZE * sizeof (int));
+ memcpy (res_ref + imm * 4, s2.a, 16);
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
+ union128i_d s2;
+ int res_ref[SIZE];
+ int j;
+
+ MASK_TYPE mask = (MASK_TYPE) 0xa55a;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j;
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ for (j = 0; j < 4; j++)
+ s2.a[j] = j * j * j;
+
+ res1.x = INTRINSIC (_inserti32x4) (s1.x, s2.x, 1);
+ res2.x = INTRINSIC (_mask_inserti32x4) (res2.x, mask, s1.x, s2.x, 1);
+ res3.x = INTRINSIC (_maskz_inserti32x4) (mask, s1.x, s2.x, 1);
+
+ CALC (s1, s2, res_ref, 1);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeb-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeub-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeuw-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgew-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgew-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleb-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleb-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmplew-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmplew-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmplew-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltb-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltub-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltuw-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltw-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqb-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequb-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequw-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqw-2.c"