From: Jakub Jelinek Date: Thu, 9 Mar 2017 09:11:06 +0000 (+0100) Subject: re PR target/79932 (_mm512_packus_epi32 does not compile under -O0) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6b62f323022656906067769236c31c74b9e10f47;p=gcc.git re PR target/79932 (_mm512_packus_epi32 does not compile under -O0) PR target/79932 * config/i386/avx512vlintrin.h (_mm256_cmpge_epi32_mask, _mm256_cmpge_epi64_mask, _mm256_cmpge_epu32_mask, _mm256_cmpge_epu64_mask, _mm256_cmple_epi32_mask, _mm256_cmple_epi64_mask, _mm256_cmple_epu32_mask, _mm256_cmple_epu64_mask, _mm256_cmplt_epi32_mask, _mm256_cmplt_epi64_mask, _mm256_cmplt_epu32_mask, _mm256_cmplt_epu64_mask, _mm256_cmpneq_epi32_mask, _mm256_cmpneq_epi64_mask, _mm256_cmpneq_epu32_mask, _mm256_cmpneq_epu64_mask, _mm256_mask_cmpge_epi32_mask, _mm256_mask_cmpge_epi64_mask, _mm256_mask_cmpge_epu32_mask, _mm256_mask_cmpge_epu64_mask, _mm256_mask_cmple_epi32_mask, _mm256_mask_cmple_epi64_mask, _mm256_mask_cmple_epu32_mask, _mm256_mask_cmple_epu64_mask, _mm256_mask_cmplt_epi32_mask, _mm256_mask_cmplt_epi64_mask, _mm256_mask_cmplt_epu32_mask, _mm256_mask_cmplt_epu64_mask, _mm256_mask_cmpneq_epi32_mask, _mm256_mask_cmpneq_epi64_mask, _mm256_mask_cmpneq_epu32_mask, _mm256_mask_cmpneq_epu64_mask, _mm_cmpge_epi32_mask, _mm_cmpge_epi64_mask, _mm_cmpge_epu32_mask, _mm_cmpge_epu64_mask, _mm_cmple_epi32_mask, _mm_cmple_epi64_mask, _mm_cmple_epu32_mask, _mm_cmple_epu64_mask, _mm_cmplt_epi32_mask, _mm_cmplt_epi64_mask, _mm_cmplt_epu32_mask, _mm_cmplt_epu64_mask, _mm_cmpneq_epi32_mask, _mm_cmpneq_epi64_mask, _mm_cmpneq_epu32_mask, _mm_cmpneq_epu64_mask, _mm_mask_cmpge_epi32_mask, _mm_mask_cmpge_epi64_mask, _mm_mask_cmpge_epu32_mask, _mm_mask_cmpge_epu64_mask, _mm_mask_cmple_epi32_mask, _mm_mask_cmple_epi64_mask, _mm_mask_cmple_epu32_mask, _mm_mask_cmple_epu64_mask, _mm_mask_cmplt_epi32_mask, _mm_mask_cmplt_epi64_mask, _mm_mask_cmplt_epu32_mask, _mm_mask_cmplt_epu64_mask, _mm_mask_cmpneq_epi32_mask, _mm_mask_cmpneq_epi64_mask, _mm_mask_cmpneq_epu32_mask, _mm_mask_cmpneq_epu64_mask): Move definitions outside of __OPTIMIZE__ guarded section. * gcc.target/i386/pr79932-2.c: New test. From-SVN: r245990 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 33f340b49b8..3d85fe0e981 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,38 @@ 2017-03-09 Jakub Jelinek + PR target/79932 + * config/i386/avx512vlintrin.h (_mm256_cmpge_epi32_mask, + _mm256_cmpge_epi64_mask, _mm256_cmpge_epu32_mask, + _mm256_cmpge_epu64_mask, _mm256_cmple_epi32_mask, + _mm256_cmple_epi64_mask, _mm256_cmple_epu32_mask, + _mm256_cmple_epu64_mask, _mm256_cmplt_epi32_mask, + _mm256_cmplt_epi64_mask, _mm256_cmplt_epu32_mask, + _mm256_cmplt_epu64_mask, _mm256_cmpneq_epi32_mask, + _mm256_cmpneq_epi64_mask, _mm256_cmpneq_epu32_mask, + _mm256_cmpneq_epu64_mask, _mm256_mask_cmpge_epi32_mask, + _mm256_mask_cmpge_epi64_mask, _mm256_mask_cmpge_epu32_mask, + _mm256_mask_cmpge_epu64_mask, _mm256_mask_cmple_epi32_mask, + _mm256_mask_cmple_epi64_mask, _mm256_mask_cmple_epu32_mask, + _mm256_mask_cmple_epu64_mask, _mm256_mask_cmplt_epi32_mask, + _mm256_mask_cmplt_epi64_mask, _mm256_mask_cmplt_epu32_mask, + _mm256_mask_cmplt_epu64_mask, _mm256_mask_cmpneq_epi32_mask, + _mm256_mask_cmpneq_epi64_mask, _mm256_mask_cmpneq_epu32_mask, + _mm256_mask_cmpneq_epu64_mask, _mm_cmpge_epi32_mask, + _mm_cmpge_epi64_mask, _mm_cmpge_epu32_mask, _mm_cmpge_epu64_mask, + _mm_cmple_epi32_mask, _mm_cmple_epi64_mask, _mm_cmple_epu32_mask, + _mm_cmple_epu64_mask, _mm_cmplt_epi32_mask, _mm_cmplt_epi64_mask, + _mm_cmplt_epu32_mask, _mm_cmplt_epu64_mask, _mm_cmpneq_epi32_mask, + _mm_cmpneq_epi64_mask, _mm_cmpneq_epu32_mask, _mm_cmpneq_epu64_mask, + _mm_mask_cmpge_epi32_mask, _mm_mask_cmpge_epi64_mask, + _mm_mask_cmpge_epu32_mask, _mm_mask_cmpge_epu64_mask, + _mm_mask_cmple_epi32_mask, _mm_mask_cmple_epi64_mask, + _mm_mask_cmple_epu32_mask, _mm_mask_cmple_epu64_mask, + _mm_mask_cmplt_epi32_mask, _mm_mask_cmplt_epi64_mask, + _mm_mask_cmplt_epu32_mask, _mm_mask_cmplt_epu64_mask, + _mm_mask_cmpneq_epi32_mask, _mm_mask_cmpneq_epi64_mask, + _mm_mask_cmpneq_epu32_mask, _mm_mask_cmpneq_epu64_mask): Move + definitions outside of __OPTIMIZE__ guarded section. + PR target/79932 * config/i386/avx512bwintrin.h (_mm512_packs_epi32, _mm512_maskz_packs_epi32, _mm512_mask_packs_epi32, diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 9750cd811aa..f62f641188e 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -9172,3192 +9172,3192 @@ _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __M); } -#ifdef __OPTIMIZE__ -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M, - __m256i __X, const int __I) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X, - __I, - (__v4di) __W, + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, 4, (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X, - __I, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __M); + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, 4, + (__mmask8) -1); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A, - __m256d __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A, - (__v4df) __B, __imm, - (__v4df) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, 1, + (__mmask8) __M); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y) { - return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A, - (__v4df) __B, __imm, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, 1, + (__mmask8) -1); } -extern __inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A, - __m128d __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A, - (__v2df) __B, __imm, - (__v2df) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, 5, + (__mmask8) __M); } -extern __inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y) { - return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A, - (__v2df) __B, __imm, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, 5, + (__mmask8) -1); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A, - __m256 __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A, - (__v8sf) __B, __imm, - (__v8sf) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, 2, + (__mmask8) __M); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y) { - return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A, - (__v8sf) __B, __imm, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, 2, + (__mmask8) -1); } -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A, - (__v4sf) __B, __imm, - (__v4sf) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, 4, + (__mmask8) __M); } -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y) { - return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A, - (__v4sf) __B, __imm, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, 4, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A, - (__v4si) __B, - __imm, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, 1, + (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A, - __m128i __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A, - (__v4si) __B, - __imm, - (__v8si) __W, - (__mmask8) - __U); + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, 1, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A, - (__v4si) __B, - __imm, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) - __U); + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, 5, + (__mmask8) __M); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y) { - return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A, - (__v4sf) __B, - __imm, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1); + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, 5, + (__mmask8) -1); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A, - __m128 __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A, - (__v4sf) __B, - __imm, - (__v8sf) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, 2, + (__mmask8) __M); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y) { - return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A, - (__v4sf) __B, - __imm, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, 2, + (__mmask8) -1); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_extracti32x4_epi32 (__m256i __A, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A, - __imm, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, 4, + (__mmask8) __M); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A, - const int __imm) -{ - return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A, - __imm, - (__v4si) __W, - (__mmask8) - __U); +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y) +{ + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, 4, + (__mmask8) -1); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A, - __imm, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) - __U); + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, 1, + (__mmask8) __M); } -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_extractf32x4_ps (__m256 __A, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y) { - return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A, - __imm, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, 1, + (__mmask8) -1); } -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A, - __imm, - (__v4sf) __W, - (__mmask8) - __U); + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, 5, + (__mmask8) __M); } -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y) { - return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A, - __imm, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) - __U); + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, 5, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A, - (__v4di) __B, - __imm, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, 2, + (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A, - __m256i __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A, - (__v4di) __B, - __imm, - (__v4di) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, 2, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A, - (__v4di) __B, - __imm, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, 4, + (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A, - (__v8si) __B, - __imm, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, 4, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A, - __m256i __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A, - (__v8si) __B, - __imm, - (__v8si) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, 1, + (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A, - (__v8si) __B, - __imm, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, 1, + (__mmask8) -1); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A, - (__v4df) __B, - __imm, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, 5, + (__mmask8) __M); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A, - __m256d __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y) { - return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A, - (__v4df) __B, - __imm, - (__v4df) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, 5, + (__mmask8) -1); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A, - (__v4df) __B, - __imm, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, 2, + (__mmask8) __M); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y) { - return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A, - (__v8sf) __B, - __imm, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, 2, + (__mmask8) -1); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A, - __m256 __B, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A, - (__v8sf) __B, - __imm, - (__v8sf) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, 4, + (__mmask8) __M); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y) { - return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A, - (__v8sf) __B, - __imm, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, 4, + (__mmask8) -1); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4di) __C, - __imm, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, 1, + (__mmask8) __M); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B, - __m256i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y) { - return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4di) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, 1, + (__mmask8) -1); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B, - __m256i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A, - (__v4df) __B, - (__v4di) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, 5, + (__mmask8) __M); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y) { - return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8si) __C, - __imm, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, 5, + (__mmask8) -1); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B, - __m256i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8si) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, 2, + (__mmask8) __M); } -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B, - __m256i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_epu32_mask (__m128i __X, __m128i __Y) { - return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A, - (__v8sf) __B, - (__v8si) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, 2, + (__mmask8) -1); } -extern __inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2di) __C, - __imm, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, 4, + (__mmask8) __M); } -extern __inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B, - __m128i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y) { - return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2di) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, 4, + (__mmask8) -1); } -extern __inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B, - __m128i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A, - (__v2df) __B, - (__v2di) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, 1, + (__mmask8) __M); } -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y) { - return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4si) __C, - __imm, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, 1, + (__mmask8) -1); } -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B, - __m128i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4si) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, 5, + (__mmask8) __M); } -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B, - __m128i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y) { - return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A, - (__v4sf) __B, - (__v4si) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, 5, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm, - (__v8si) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, 2, + (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_epu64_mask (__m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, 2, + (__mmask8) -1); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm, - (__v4si) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, + (__v4si) __Y, 4, + (__mmask8) __M); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y) +{ + return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, + (__v4si) __Y, 4, + (__mmask8) -1); +} + +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) +{ + return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, + (__v4si) __Y, 1, + (__mmask8) __M); +} + +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, + (__v4si) __Y, 1, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm, - (__v4di) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, + (__v4si) __Y, 5, + (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, + (__v4si) __Y, 5, + (__mmask8) -1); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm, - (__v2di) __W, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, + (__v4si) __Y, 2, + (__mmask8) __M); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_epi32_mask (__m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, + (__v4si) __Y, 2, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __C, __imm, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, + (__v2di) __Y, 4, + (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U, - __m256i __B, __m256i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __C, __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, + (__v2di) __Y, 4, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A, - __m256i __B, __m256i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A, - (__v4di) __B, - (__v4di) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, + (__v2di) __Y, 1, + (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) __C, __imm, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, + (__v2di) __Y, 1, + (__mmask8) -1); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U, - __m256i __B, __m256i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) __C, __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, + (__v2di) __Y, 5, + (__mmask8) __M); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A, - __m256i __B, __m256i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y) { - return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A, - (__v8si) __B, - (__v8si) __C, - __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, + (__v2di) __Y, 5, + (__mmask8) -1); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C, - const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __C, __imm, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, + (__v2di) __Y, 2, + (__mmask8) __M); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U, - __m128i __B, __m128i __C, const int __imm) +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_epi64_mask (__m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __C, __imm, - (__mmask8) __U); + return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, + (__v2di) __Y, 2, + (__mmask8) -1); } -extern __inline __m128i +#ifdef __OPTIMIZE__ +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A, - __m128i __B, __m128i __C, const int __imm) +_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M, + __m256i __X, const int __I) { - return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A, - (__v2di) __B, - (__v2di) __C, - __imm, - (__mmask8) __U); + return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X, + __I, + (__v4di) __W, + (__mmask8) __M); } -extern __inline __m128i +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C, - const int __imm) +_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I) { - return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) __C, __imm, - (__mmask8) -1); + return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X, + __I, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __M); } -extern __inline __m128i +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U, - __m128i __B, __m128i __C, const int __imm) +_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A, + __m256d __B, const int __imm) { - return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) __C, __imm, - (__mmask8) __U); + return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A, + (__v4df) __B, __imm, + (__v4df) __W, + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A, - __m128i __B, __m128i __C, const int __imm) +_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B, + const int __imm) { - return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A, - (__v4si) __B, - (__v4si) __C, - __imm, - (__mmask8) __U); + return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A, + (__v4df) __B, __imm, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); } -extern __inline __m256 +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_roundscale_ps (__m256 __A, const int __imm) +_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, const int __imm) { - return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, - __imm, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1); + return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A, + (__v2df) __B, __imm, + (__v2df) __W, + (__mmask8) __U); } -extern __inline __m256 +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A, - const int __imm) +_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B, + const int __imm) { - return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, - __imm, - (__v8sf) __W, - (__mmask8) __U); + return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A, + (__v2df) __B, __imm, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm) +_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A, + __m256 __B, const int __imm) { - return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, - __imm, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A, + (__v8sf) __B, __imm, + (__v8sf) __W, + (__mmask8) __U); } -extern __inline __m256d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_roundscale_pd (__m256d __A, const int __imm) +_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B, + const int __imm) { - return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, - __imm, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) -1); + return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A, + (__v8sf) __B, __imm, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); } -extern __inline __m256d +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A, - const int __imm) +_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, + const int __imm) { - return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, - __imm, - (__v4df) __W, - (__mmask8) __U); + return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A, + (__v4sf) __B, __imm, + (__v4sf) __W, + (__mmask8) __U); } -extern __inline __m256d +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm) +_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B, + const int __imm) { - return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, - __imm, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); + return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A, + (__v4sf) __B, __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); } -extern __inline __m128 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_roundscale_ps (__m128 __A, const int __imm) +_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm) { - return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, - __imm, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1); + return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A, + (__v4si) __B, + __imm, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); } -extern __inline __m128 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A, - const int __imm) +_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A, + __m128i __B, const int __imm) { - return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, - __imm, - (__v4sf) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A, + (__v4si) __B, + __imm, + (__v8si) __W, + (__mmask8) + __U); } -extern __inline __m128 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm) +_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B, + const int __imm) { - return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, - __imm, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); + return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A, + (__v4si) __B, + __imm, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) + __U); } -extern __inline __m128d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_roundscale_pd (__m128d __A, const int __imm) +_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm) { - return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, + return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A, + (__v4sf) __B, __imm, - (__v2df) - _mm_setzero_pd (), + (__v8sf) + _mm256_setzero_ps (), (__mmask8) -1); } -extern __inline __m128d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A, - const int __imm) +_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A, + __m128 __B, const int __imm) { - return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, + return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A, + (__v4sf) __B, __imm, - (__v2df) __W, + (__v8sf) __W, (__mmask8) __U); } -extern __inline __m128d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm) +_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B, + const int __imm) { - return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, + return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A, + (__v4sf) __B, __imm, - (__v2df) - _mm_setzero_pd (), + (__v8sf) + _mm256_setzero_ps (), (__mmask8) __U); } -extern __inline __m256 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_extracti32x4_epi32 (__m256i __A, const int __imm) { - return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A, - (__C << 2) | __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A, + __imm, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); } -extern __inline __m256 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A, + const int __imm) { - return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A, - (__C << 2) | __B, - (__v8sf) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A, + __imm, + (__v4si) __W, + (__mmask8) + __U); } -extern __inline __m256 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A, + const int __imm) { - return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A, - (__C << 2) | __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A, + __imm, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) + __U); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_extractf32x4_ps (__m256 __A, const int __imm) { - return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A, - (__C << 2) | __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1); + return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A, + __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A, + const int __imm) { - return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A, - (__C << 2) | __B, - (__v4sf) __W, - (__mmask8) __U); + return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A, + __imm, + (__v4sf) __W, + (__mmask8) + __U); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A, + const int __imm) +{ + return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A, + __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) + __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm) +{ + return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A, + (__v4di) __B, + __imm, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B, const int __imm) { - return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A, - (__C << 2) | __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); + return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A, + (__v4di) __B, + __imm, + (__v4di) __W, + (__mmask8) __U); } -extern __inline __m256d +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B, + const int __imm) { - return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A, - (__C << 2) | __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) -1); + return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A, + (__v4di) __B, + __imm, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __m256d +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm) { - return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A, - (__C << 2) | __B, - (__v4df) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A, + (__v8si) __B, + __imm, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); } -extern __inline __m256d +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B, const int __imm) { - return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A, - (__C << 2) | __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); + return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A, + (__v8si) __B, + __imm, + (__v8si) __W, + (__mmask8) __U); } -extern __inline __m128d +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B, + const int __imm) { - return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A, - (__C << 2) | __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) -1); + return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A, + (__v8si) __B, + __imm, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __m128d +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm) { - return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A, - (__C << 2) | __B, - (__v2df) __W, - (__mmask8) __U); + return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A, + (__v4df) __B, + __imm, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) -1); } -extern __inline __m128d +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) +_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A, + __m256d __B, const int __imm) { - return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A, - (__C << 2) | __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); + return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A, + (__v4df) __B, + __imm, + (__v4df) __W, + (__mmask8) __U); } -extern __inline __m256 +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask, - __m256i __index, void const *__addr, - int __scale) +_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B, + const int __imm) { - return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old, - __addr, - (__v8si) __index, - __mask, __scale); + return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A, + (__v4df) __B, + __imm, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); } -extern __inline __m128 +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm) { - return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old, - __addr, - (__v4si) __index, - __mask, __scale); + return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A, + (__v8sf) __B, + __imm, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) -1); } -extern __inline __m256d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A, + __m256 __B, const int __imm) { - return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old, - __addr, - (__v4si) __index, - __mask, __scale); + return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A, + (__v8sf) __B, + __imm, + (__v8sf) __W, + (__mmask8) __U); } -extern __inline __m128d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B, + const int __imm) { - return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old, - __addr, - (__v4si) __index, - __mask, __scale); + return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A, + (__v8sf) __B, + __imm, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); } -extern __inline __m128 +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask, - __m256i __index, void const *__addr, - int __scale) +_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C, + const int __imm) { - return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old, - __addr, - (__v4di) __index, - __mask, __scale); + return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4di) __C, + __imm, + (__mmask8) -1); } -extern __inline __m128 +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B, + __m256i __C, const int __imm) { - return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old, - __addr, - (__v2di) __index, - __mask, __scale); + return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4di) __C, + __imm, + (__mmask8) __U); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask, - __m256i __index, void const *__addr, - int __scale) +_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B, + __m256i __C, const int __imm) { - return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old, - __addr, - (__v4di) __index, - __mask, __scale); + return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A, + (__v4df) __B, + (__v4di) __C, + __imm, + (__mmask8) __U); } -extern __inline __m128d +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C, + const int __imm) { - return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old, - __addr, - (__v2di) __index, - __mask, __scale); + return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8si) __C, + __imm, + (__mmask8) -1); } -extern __inline __m256i +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask, - __m256i __index, void const *__addr, - int __scale) +_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B, + __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old, - __addr, - (__v8si) __index, - __mask, __scale); + return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8si) __C, + __imm, + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B, + __m256i __C, const int __imm) { - return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old, - __addr, - (__v4si) __index, - __mask, __scale); + return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A, + (__v8sf) __B, + (__v8si) __C, + __imm, + (__mmask8) __U); } -extern __inline __m256i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C, + const int __imm) { - return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old, - __addr, - (__v4si) __index, - __mask, __scale); + return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2di) __C, + __imm, + (__mmask8) -1); } -extern __inline __m128i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B, + __m128i __C, const int __imm) { - return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old, - __addr, - (__v4si) __index, - __mask, __scale); + return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2di) __C, + __imm, + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask, - __m256i __index, void const *__addr, - int __scale) +_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B, + __m128i __C, const int __imm) { - return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old, - __addr, - (__v4di) __index, - __mask, __scale); + return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A, + (__v2df) __B, + (__v2di) __C, + __imm, + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm) { - return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old, - __addr, - (__v2di) __index, - __mask, __scale); + return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4si) __C, + __imm, + (__mmask8) -1); } -extern __inline __m256i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask, - __m256i __index, void const *__addr, - int __scale) +_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B, + __m128i __C, const int __imm) { - return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old, - __addr, - (__v4di) __index, - __mask, __scale); + return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4si) __C, + __imm, + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask, - __m128i __index, void const *__addr, - int __scale) +_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B, + __m128i __C, const int __imm) { - return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old, - __addr, - (__v2di) __index, - __mask, __scale); + return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A, + (__v4sf) __B, + (__v4si) __C, + __imm, + (__mmask8) __U); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_i32scatter_ps (void *__addr, __m256i __index, - __m256 __v1, const int __scale) +_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + const int __imm) { - __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF, - (__v8si) __index, (__v8sf) __v1, - __scale); + return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm, + (__v8si) __W, + (__mmask8) __U); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask, - __m256i __index, __m256 __v1, - const int __scale) +_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm) { - __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index, - (__v8sf) __v1, __scale); + return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1, - const int __scale) +_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + const int __imm) { - __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF, - (__v4si) __index, (__v4sf) __v1, - __scale); + return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm, + (__v4si) __W, + (__mmask8) __U); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask, - __m128i __index, __m128 __v1, - const int __scale) +_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm) { - __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index, - (__v4sf) __v1, __scale); + return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_i32scatter_pd (void *__addr, __m128i __index, - __m256d __v1, const int __scale) +_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + const int __imm) { - __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF, - (__v4si) __index, (__v4df) __v1, - __scale); + return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm, + (__v4di) __W, + (__mmask8) __U); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask, - __m128i __index, __m256d __v1, - const int __scale) +_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm) { - __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index, - (__v4df) __v1, __scale); + return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_i32scatter_pd (void *__addr, __m128i __index, - __m128d __v1, const int __scale) +_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + const int __imm) { - __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF, - (__v4si) __index, (__v2df) __v1, - __scale); + return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm, + (__v2di) __W, + (__mmask8) __U); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask, - __m128i __index, __m128d __v1, - const int __scale) +_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm) { - __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index, - (__v2df) __v1, __scale); + return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_i64scatter_ps (void *__addr, __m256i __index, - __m128 __v1, const int __scale) +_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C, + const int __imm) { - __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF, - (__v4di) __index, (__v4sf) __v1, - __scale); + return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __C, __imm, + (__mmask8) -1); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask, - __m256i __index, __m128 __v1, - const int __scale) +_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U, + __m256i __B, __m256i __C, + const int __imm) { - __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index, - (__v4sf) __v1, __scale); + return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __C, __imm, + (__mmask8) __U); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1, - const int __scale) +_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A, + __m256i __B, __m256i __C, + const int __imm) { - __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF, - (__v2di) __index, (__v4sf) __v1, - __scale); + return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A, + (__v4di) __B, + (__v4di) __C, + __imm, + (__mmask8) __U); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask, - __m128i __index, __m128 __v1, - const int __scale) +_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C, + const int __imm) { - __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index, - (__v4sf) __v1, __scale); + return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __C, __imm, + (__mmask8) -1); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_i64scatter_pd (void *__addr, __m256i __index, - __m256d __v1, const int __scale) +_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U, + __m256i __B, __m256i __C, + const int __imm) { - __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF, - (__v4di) __index, (__v4df) __v1, - __scale); + return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __C, __imm, + (__mmask8) __U); } -extern __inline void +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask, - __m256i __index, __m256d __v1, - const int __scale) +_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A, + __m256i __B, __m256i __C, + const int __imm) { - __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index, - (__v4df) __v1, __scale); + return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A, + (__v8si) __B, + (__v8si) __C, + __imm, + (__mmask8) __U); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_i64scatter_pd (void *__addr, __m128i __index, - __m128d __v1, const int __scale) +_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C, + const int __imm) { - __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF, - (__v2di) __index, (__v2df) __v1, - __scale); + return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __C, __imm, + (__mmask8) -1); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask, - __m128i __index, __m128d __v1, - const int __scale) +_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U, + __m128i __B, __m128i __C, const int __imm) { - __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index, - (__v2df) __v1, __scale); + return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __C, __imm, + (__mmask8) __U); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_i32scatter_epi32 (void *__addr, __m256i __index, - __m256i __v1, const int __scale) +_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A, + __m128i __B, __m128i __C, const int __imm) { - __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF, - (__v8si) __index, (__v8si) __v1, - __scale); + return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A, + (__v2di) __B, + (__v2di) __C, + __imm, + (__mmask8) __U); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask, - __m256i __index, __m256i __v1, - const int __scale) +_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C, + const int __imm) { - __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index, - (__v8si) __v1, __scale); + return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __C, __imm, + (__mmask8) -1); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_i32scatter_epi32 (void *__addr, __m128i __index, - __m128i __v1, const int __scale) +_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U, + __m128i __B, __m128i __C, const int __imm) { - __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF, - (__v4si) __index, (__v4si) __v1, - __scale); + return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __C, __imm, + (__mmask8) __U); } -extern __inline void +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask, - __m128i __index, __m128i __v1, - const int __scale) +_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A, + __m128i __B, __m128i __C, const int __imm) { - __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index, - (__v4si) __v1, __scale); + return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A, + (__v4si) __B, + (__v4si) __C, + __imm, + (__mmask8) __U); } -extern __inline void +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_i32scatter_epi64 (void *__addr, __m128i __index, - __m256i __v1, const int __scale) +_mm256_roundscale_ps (__m256 __A, const int __imm) { - __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF, - (__v4si) __index, (__v4di) __v1, - __scale); + return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, + __imm, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) -1); } -extern __inline void +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask, - __m128i __index, __m256i __v1, - const int __scale) +_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A, + const int __imm) { - __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index, - (__v4di) __v1, __scale); + return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, + __imm, + (__v8sf) __W, + (__mmask8) __U); } -extern __inline void +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_i32scatter_epi64 (void *__addr, __m128i __index, - __m128i __v1, const int __scale) +_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm) { - __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF, - (__v4si) __index, (__v2di) __v1, - __scale); + return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, + __imm, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); } -extern __inline void +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask, - __m128i __index, __m128i __v1, - const int __scale) +_mm256_roundscale_pd (__m256d __A, const int __imm) { - __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index, - (__v2di) __v1, __scale); + return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, + __imm, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) -1); } -extern __inline void +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_i64scatter_epi32 (void *__addr, __m256i __index, - __m128i __v1, const int __scale) +_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A, + const int __imm) { - __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF, - (__v4di) __index, (__v4si) __v1, - __scale); + return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, + __imm, + (__v4df) __W, + (__mmask8) __U); } -extern __inline void +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask, - __m256i __index, __m128i __v1, - const int __scale) +_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm) { - __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index, - (__v4si) __v1, __scale); + return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, + __imm, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); } -extern __inline void +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_i64scatter_epi32 (void *__addr, __m128i __index, - __m128i __v1, const int __scale) +_mm_roundscale_ps (__m128 __A, const int __imm) { - __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF, - (__v2di) __index, (__v4si) __v1, - __scale); + return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, + __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); } -extern __inline void +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask, - __m128i __index, __m128i __v1, - const int __scale) +_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A, + const int __imm) { - __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index, - (__v4si) __v1, __scale); + return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, + __imm, + (__v4sf) __W, + (__mmask8) __U); } -extern __inline void +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_i64scatter_epi64 (void *__addr, __m256i __index, - __m256i __v1, const int __scale) +_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm) { - __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF, - (__v4di) __index, (__v4di) __v1, - __scale); + return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, + __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); } -extern __inline void +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask, - __m256i __index, __m256i __v1, - const int __scale) +_mm_roundscale_pd (__m128d __A, const int __imm) { - __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index, - (__v4di) __v1, __scale); + return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, + __imm, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1); } -extern __inline void +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_i64scatter_epi64 (void *__addr, __m128i __index, - __m128i __v1, const int __scale) +_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A, + const int __imm) { - __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF, - (__v2di) __index, (__v2di) __v1, - __scale); + return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, + __imm, + (__v2df) __W, + (__mmask8) __U); } -extern __inline void +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask, - __m128i __index, __m128i __v1, - const int __scale) +_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm) { - __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index, - (__v2di) __v1, __scale); + return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, + __imm, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); } -extern __inline __m256i +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A, - _MM_PERM_ENUM __mask) +_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask, - (__v8si) __W, - (__mmask8) __U); + return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A, + (__C << 2) | __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) -1); } -extern __inline __m256i +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A, - _MM_PERM_ENUM __mask) +_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A, + (__C << 2) | __B, + (__v8sf) __W, + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A, - _MM_PERM_ENUM __mask) +_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask, - (__v4si) __W, - (__mmask8) __U); + return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A, + (__C << 2) | __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A, - _MM_PERM_ENUM __mask) +_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A, + (__C << 2) | __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); } -extern __inline __m256i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_rol_epi32 (__m256i __A, const int __B) +_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A, + (__C << 2) | __B, + (__v4sf) __W, + (__mmask8) __U); } -extern __inline __m256i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A, - const int __B) +_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B, - (__v8si) __W, - (__mmask8) __U); + return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A, + (__C << 2) | __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); } -extern __inline __m256i +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B) +_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A, + (__C << 2) | __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) -1); } -extern __inline __m128i +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_rol_epi32 (__m128i __A, const int __B) +_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A, + (__C << 2) | __B, + (__v4df) __W, + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A, - const int __B) +_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B, - (__v4si) __W, - (__mmask8) __U); + return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A, + (__C << 2) | __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B) +_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A, + (__C << 2) | __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1); } -extern __inline __m256i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_ror_epi32 (__m256i __A, const int __B) +_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A, + (__C << 2) | __B, + (__v2df) __W, + (__mmask8) __U); } -extern __inline __m256i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A, - const int __B) +_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) { - return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B, - (__v8si) __W, - (__mmask8) __U); + return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A, + (__C << 2) | __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); } -extern __inline __m256i +extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B) +_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask, + __m256i __index, void const *__addr, + int __scale) { - return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old, + __addr, + (__v8si) __index, + __mask, __scale); } -extern __inline __m128i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ror_epi32 (__m128i __A, const int __B) +_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old, + __addr, + (__v4si) __index, + __mask, __scale); } -extern __inline __m128i +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A, - const int __B) +_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B, - (__v4si) __W, - (__mmask8) __U); + return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old, + __addr, + (__v4si) __index, + __mask, __scale); } -extern __inline __m128i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B) +_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old, + __addr, + (__v4si) __index, + __mask, __scale); } -extern __inline __m256i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_rol_epi64 (__m256i __A, const int __B) -{ - return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); +_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask, + __m256i __index, void const *__addr, + int __scale) +{ + return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old, + __addr, + (__v4di) __index, + __mask, __scale); } -extern __inline __m256i +extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A, - const int __B) +_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B, - (__v4di) __W, - (__mmask8) __U); + return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old, + __addr, + (__v2di) __index, + __mask, __scale); } -extern __inline __m256i +extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B) +_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask, + __m256i __index, void const *__addr, + int __scale) { - return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old, + __addr, + (__v4di) __index, + __mask, __scale); } -extern __inline __m128i +extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_rol_epi64 (__m128i __A, const int __B) +_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old, + __addr, + (__v2di) __index, + __mask, __scale); } -extern __inline __m128i +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A, - const int __B) +_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask, + __m256i __index, void const *__addr, + int __scale) { - return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, - (__v2di) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old, + __addr, + (__v8si) __index, + __mask, __scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B) +_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old, + __addr, + (__v4si) __index, + __mask, __scale); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_ror_epi64 (__m256i __A, const int __B) +_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old, + __addr, + (__v4si) __index, + __mask, __scale); } -extern __inline __m256i +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A, - const int __B) +_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B, - (__v4di) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old, + __addr, + (__v4si) __index, + __mask, __scale); } -extern __inline __m256i +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B) +_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask, + __m256i __index, void const *__addr, + int __scale) { - return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old, + __addr, + (__v4di) __index, + __mask, __scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ror_epi64 (__m128i __A, const int __B) +_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old, + __addr, + (__v2di) __index, + __mask, __scale); } -extern __inline __m128i +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A, - const int __B) +_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask, + __m256i __index, void const *__addr, + int __scale) { - return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, - (__v2di) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old, + __addr, + (__v4di) __index, + __mask, __scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B) +_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask, + __m128i __index, void const *__addr, + int __scale) { - return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old, + __addr, + (__v2di) __index, + __mask, __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm) +_mm256_i32scatter_ps (void *__addr, __m256i __index, + __m256 __v1, const int __scale) { - return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A, - (__v4si) __B, __imm, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) -1); + __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF, + (__v8si) __index, (__v8sf) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A, - __m128i __B, const int __imm) +_mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask, + __m256i __index, __m256 __v1, + const int __scale) { - return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A, - (__v4si) __B, __imm, - (__v4si) __W, - (__mmask8) __U); + __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index, + (__v8sf) __v1, __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B, - const int __imm) +_mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1, + const int __scale) { - return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A, - (__v4si) __B, __imm, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF, + (__v4si) __index, (__v4sf) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm) +_mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask, + __m128i __index, __m128 __v1, + const int __scale) { - return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, - (__v2di) __B, __imm, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) -1); + __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index, + (__v4sf) __v1, __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A, - __m128i __B, const int __imm) +_mm256_i32scatter_pd (void *__addr, __m128i __index, + __m256d __v1, const int __scale) { - return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, - (__v2di) __B, __imm, - (__v2di) __W, - (__mmask8) __U); + __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF, + (__v4si) __index, (__v4df) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B, - const int __imm) +_mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask, + __m128i __index, __m256d __v1, + const int __scale) { - return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, - (__v2di) __B, __imm, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) __U); + __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index, + (__v4df) __v1, __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm) +_mm_i32scatter_pd (void *__addr, __m128i __index, + __m128d __v1, const int __scale) { - return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A, - (__v8si) __B, __imm, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1); + __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF, + (__v4si) __index, (__v2df) __v1, + __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A, - __m256i __B, const int __imm) +_mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask, + __m128i __index, __m128d __v1, + const int __scale) { - return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A, - (__v8si) __B, __imm, - (__v8si) __W, - (__mmask8) __U); + __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index, + (__v2df) __v1, __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B, - const int __imm) +_mm256_i64scatter_ps (void *__addr, __m256i __index, + __m128 __v1, const int __scale) { - return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A, - (__v8si) __B, __imm, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF, + (__v4di) __index, (__v4sf) __v1, + __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm) +_mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask, + __m256i __index, __m128 __v1, + const int __scale) { - return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A, - (__v4di) __B, __imm, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index, + (__v4sf) __v1, __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A, - __m256i __B, const int __imm) +_mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1, + const int __scale) { - return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A, - (__v4di) __B, __imm, - (__v4di) __W, - (__mmask8) __U); + __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF, + (__v2di) __index, (__v4sf) __v1, + __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B, - const int __imm) +_mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask, + __m128i __index, __m128 __v1, + const int __scale) { - return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A, - (__v4di) __B, __imm, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index, + (__v4sf) __v1, __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A, - const int __I) +_mm256_i64scatter_pd (void *__addr, __m256i __index, + __m256d __v1, const int __scale) { - return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I, - (__v8hi) __W, - (__mmask8) __U); + __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF, + (__v4di) __index, (__v4df) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I) +_mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask, + __m256i __index, __m256d __v1, + const int __scale) { - return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); + __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index, + (__v4df) __v1, __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A, - const int __I) +_mm_i64scatter_pd (void *__addr, __m128i __index, + __m128d __v1, const int __scale) { - return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I, - (__v8hi) __W, - (__mmask8) __U); + __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF, + (__v2di) __index, (__v2df) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I) +_mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask, + __m128i __index, __m128d __v1, + const int __scale) { - return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); + __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index, + (__v2df) __v1, __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A, - const int __imm) +_mm256_i32scatter_epi32 (void *__addr, __m256i __index, + __m256i __v1, const int __scale) { - return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm, - (__v8si) __W, - (__mmask8) __U); + __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF, + (__v8si) __index, (__v8si) __v1, + __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm) +_mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask, + __m256i __index, __m256i __v1, + const int __scale) { - return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index, + (__v8si) __v1, __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A, - const int __imm) +_mm_i32scatter_epi32 (void *__addr, __m128i __index, + __m128i __v1, const int __scale) { - return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm, - (__v4si) __W, - (__mmask8) __U); + __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF, + (__v4si) __index, (__v4si) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm) +_mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask, + __m128i __index, __m128i __v1, + const int __scale) { - return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index, + (__v4si) __v1, __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_srai_epi64 (__m256i __A, const int __imm) +_mm256_i32scatter_epi64 (void *__addr, __m128i __index, + __m256i __v1, const int __scale) { - return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF, + (__v4si) __index, (__v4di) __v1, + __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A, - const int __imm) +_mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask, + __m128i __index, __m256i __v1, + const int __scale) { - return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm, - (__v4di) __W, - (__mmask8) __U); + __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index, + (__v4di) __v1, __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm) +_mm_i32scatter_epi64 (void *__addr, __m128i __index, + __m128i __v1, const int __scale) { - return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF, + (__v4si) __index, (__v2di) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_srai_epi64 (__m128i __A, const int __imm) +_mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask, + __m128i __index, __m128i __v1, + const int __scale) { - return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) -1); + __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index, + (__v2di) __v1, __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A, - const int __imm) +_mm256_i64scatter_epi32 (void *__addr, __m256i __index, + __m128i __v1, const int __scale) { - return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, - (__v2di) __W, - (__mmask8) __U); + __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF, + (__v4di) __index, (__v4si) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm) +_mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask, + __m256i __index, __m128i __v1, + const int __scale) { - return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) __U); + __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index, + (__v4si) __v1, __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B) +_mm_i64scatter_epi32 (void *__addr, __m128i __index, + __m128i __v1, const int __scale) { - return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B, - (__v4si) __W, - (__mmask8) __U); + __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF, + (__v2di) __index, (__v4si) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B) +_mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask, + __m128i __index, __m128i __v1, + const int __scale) { - return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index, + (__v4si) __v1, __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B) +_mm256_i64scatter_epi64 (void *__addr, __m256i __index, + __m256i __v1, const int __scale) { - return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B, - (__v2di) __W, - (__mmask8) __U); + __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF, + (__v4di) __index, (__v4di) __v1, + __scale); } -extern __inline __m128i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B) +_mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask, + __m256i __index, __m256i __v1, + const int __scale) { - return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) __U); + __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index, + (__v4di) __v1, __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A, - int __B) +_mm_i64scatter_epi64 (void *__addr, __m128i __index, + __m128i __v1, const int __scale) { - return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B, - (__v8si) __W, - (__mmask8) __U); + __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF, + (__v2di) __index, (__v2di) __v1, + __scale); } -extern __inline __m256i +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B) +_mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask, + __m128i __index, __m128i __v1, + const int __scale) { - return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index, + (__v2di) __v1, __scale); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A, - int __B) +_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + _MM_PERM_ENUM __mask) { - return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B, - (__v4di) __W, + return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask, + (__v8si) __W, (__mmask8) __U); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B) +_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A, + _MM_PERM_ENUM __mask) { - return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B, - (__v4di) + return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask, + (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); } -extern __inline __m256d +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X, - const int __imm) +_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + _MM_PERM_ENUM __mask) { - return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm, - (__v4df) __W, + return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask, + (__v4si) __W, (__mmask8) __U); } -extern __inline __m256d +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm) +_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A, + _MM_PERM_ENUM __mask) { - return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm, - (__v4df) - _mm256_setzero_pd (), + return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask, + (__v4si) + _mm_setzero_si128 (), (__mmask8) __U); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X, - const int __C) -{ - return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C, - (__v4df) __W, - (__mmask8) __U); -} - -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C) -{ - return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); -} - -extern __inline __m128d +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X, - const int __C) +_mm256_rol_epi32 (__m256i __A, const int __B) { - return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C, - (__v2df) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); } -extern __inline __m128d +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C) +_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + const int __B) { - return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); + return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B, + (__v8si) __W, + (__mmask8) __U); } -extern __inline __m256 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X, - const int __C) +_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B) { - return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C, - (__v8sf) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __m256 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C) +_mm_rol_epi32 (__m128i __A, const int __B) { - return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); } -extern __inline __m128 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X, - const int __C) +_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + const int __B) { - return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C, - (__v4sf) __W, + return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B, + (__v4si) __W, (__mmask8) __U); } -extern __inline __m128 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C) +_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B) { - return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C, - (__v4sf) - _mm_setzero_ps (), + return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B, + (__v4si) + _mm_setzero_si128 (), (__mmask8) __U); } -extern __inline __m256d +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) +_mm256_ror_epi32 (__m256i __A, const int __B) { - return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A, - (__v4df) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); } -extern __inline __m256 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) +_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + const int __B) { - return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A, - (__v8sf) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B, + (__v8si) __W, + (__mmask8) __U); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) +_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B) { - return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A, - (__v4di) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __m256i +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) +_mm_ror_epi32 (__m128i __A, const int __B) { - return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A, - (__v8si) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); } -extern __inline __m128d +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) +_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + const int __B) { - return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A, - (__v2df) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B, + (__v4si) __W, + (__mmask8) __U); } -extern __inline __m128 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) +_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B) { - return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A, - (__v4sf) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __m128i +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) +_mm256_rol_epi64 (__m256i __A, const int __B) { - return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A, - (__v2di) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) -1); } -extern __inline __m128i +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) +_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + const int __B) { - return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B, + (__v4di) __W, + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P) -{ - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, __P, - (__mmask8) -1); +_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B) +{ + return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P) +_mm_rol_epi64 (__m128i __A, const int __B) { - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, __P, + return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, + (__v2di) + _mm_setzero_si128 (), (__mmask8) -1); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P) +_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + const int __B) { - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, __P, - (__mmask8) -1); + return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, + (__v2di) __W, + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P) +_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B) { - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, __P, - (__mmask8) -1); + return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P) +_mm256_ror_epi64 (__m256i __A, const int __B) { - return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X, - (__v4df) __Y, __P, - (__mmask8) -1); + return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) -1); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P) +_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + const int __B) { - return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X, - (__v8sf) __Y, __P, - (__mmask8) -1); + return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B, + (__v4di) __W, + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y, - const int __P) +_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B) { - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, __P, + return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B, + (__v4di) + _mm256_setzero_si256 (), (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y, - const int __P) +_mm_ror_epi64 (__m128i __A, const int __B) { - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, __P, - (__mmask8) __U); + return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) -1); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y, - const int __P) +_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + const int __B) { - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, __P, - (__mmask8) __U); + return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, + (__v2di) __W, + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y, - const int __P) +_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B) { - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, __P, - (__mmask8) __U); + return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y, - const int __P) +_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm) { - return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X, - (__v4df) __Y, __P, - (__mmask8) __U); + return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A, + (__v4si) __B, __imm, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y, - const int __P) +_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B, const int __imm) { - return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X, - (__v8sf) __Y, __P, + return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A, + (__v4si) __B, __imm, + (__v4si) __W, (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P) +_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B, + const int __imm) { - return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, __P, - (__mmask8) -1); + return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A, + (__v4si) __B, __imm, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P) +_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm) { - return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, __P, - (__mmask8) -1); + return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, + (__v2di) __B, __imm, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) -1); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P) +_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B, const int __imm) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, __P, - (__mmask8) -1); + return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, + (__v2di) __B, __imm, + (__v2di) __W, + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P) +_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B, + const int __imm) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, __P, - (__mmask8) -1); + return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, + (__v2di) __B, __imm, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P) +_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm) { - return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X, - (__v2df) __Y, __P, + return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A, + (__v8si) __B, __imm, + (__v8si) + _mm256_setzero_si256 (), (__mmask8) -1); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P) +_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B, const int __imm) { - return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X, - (__v4sf) __Y, __P, - (__mmask8) -1); + return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A, + (__v8si) __B, __imm, + (__v8si) __W, + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y, - const int __P) +_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B, + const int __imm) { - return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, __P, - (__mmask8) __U); + return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A, + (__v8si) __B, __imm, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y, - const int __P) +_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm) { - return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, __P, - (__mmask8) __U); + return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A, + (__v4di) __B, __imm, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) -1); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y, - const int __P) +_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B, const int __imm) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, __P, + return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A, + (__v4di) __B, __imm, + (__v4di) __W, (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y, - const int __P) +_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B, + const int __imm) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, __P, + return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A, + (__v4di) __B, __imm, + (__v4di) + _mm256_setzero_si256 (), (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y, - const int __P) +_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A, + const int __I) { - return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X, - (__v2df) __Y, __P, + return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I, + (__v8hi) __W, (__mmask8) __U); } -extern __inline __mmask8 +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y, - const int __P) +_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I) { - return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X, - (__v4sf) __Y, __P, + return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I, + (__v8hi) + _mm_setzero_si128 (), (__mmask8) __U); } -extern __inline __m256d +extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_permutex_pd (__m256d __X, const int __M) +_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A, + const int __I) { - return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M, - (__v4df) - _mm256_undefined_pd (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I, + (__v8hi) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I) { - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, 4, - (__mmask8) __M); + return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + const int __imm) { - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, 4, - (__mmask8) -1); + return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm, + (__v8si) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm) { - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, 1, - (__mmask8) __M); + return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + const int __imm) { - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, 1, - (__mmask8) -1); + return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm, + (__v4si) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm) { - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, 5, - (__mmask8) __M); + return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_srai_epi64 (__m256i __A, const int __imm) { - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, 5, + return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm, + (__v4di) + _mm256_setzero_si256 (), (__mmask8) -1); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, 2, - (__mmask8) __M); -} - -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + const int __imm) { - return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, - (__v8si) __Y, 2, - (__mmask8) -1); + return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm, + (__v4di) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm) { - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, 4, - (__mmask8) __M); + return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srai_epi64 (__m128i __A, const int __imm) { - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, 4, + return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, + (__v2di) + _mm_setzero_si128 (), (__mmask8) -1); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, 1, - (__mmask8) __M); -} - -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + const int __imm) { - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, 1, - (__mmask8) -1); + return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, + (__v2di) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm) { - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, 5, - (__mmask8) __M); + return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B) { - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, 5, - (__mmask8) -1); + return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B, + (__v4si) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B) { - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, 2, - (__mmask8) __M); + return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B) { - return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, - (__v4di) __Y, 2, - (__mmask8) -1); + return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B, + (__v2di) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B) { - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, 4, - (__mmask8) __M); + return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + int __B) { - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, 4, - (__mmask8) -1); + return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B, + (__v8si) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, 1, - (__mmask8) __M); +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B) +{ + return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + int __B) { - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, 1, - (__mmask8) -1); + return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B, + (__v4di) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B) { - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, 5, - (__mmask8) __M); + return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y) +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X, + const int __imm) { - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, 5, - (__mmask8) -1); + return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm, + (__v4df) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm) { - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, 2, - (__mmask8) __M); + return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y) +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X, + const int __C) { - return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, - (__v8si) __Y, 2, - (__mmask8) -1); + return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C, + (__v4df) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C) { - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, 4, - (__mmask8) __M); + return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y) +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X, + const int __C) { - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, 4, - (__mmask8) -1); + return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C, + (__v2df) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C) { - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, 1, - (__mmask8) __M); + return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y) +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X, + const int __C) { - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, 1, - (__mmask8) -1); + return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C, + (__v8sf) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C) { - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, 5, - (__mmask8) __M); + return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y) +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X, + const int __C) { - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, 5, - (__mmask8) -1); + return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C, + (__v4sf) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C) { - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, 2, - (__mmask8) __M); + return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y) +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { - return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, - (__v4di) __Y, 2, - (__mmask8) -1); + return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A, + (__v4df) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, 4, - (__mmask8) __M); + return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A, + (__v8sf) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, 4, - (__mmask8) -1); + return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A, + (__v4di) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, 1, - (__mmask8) __M); + return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A, + (__v8si) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y) +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, 1, - (__mmask8) -1); + return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A, + (__v2df) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, 5, - (__mmask8) __M); + return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, 5, - (__mmask8) -1); + return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A, + (__v2di) __W, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, 2, - (__mmask8) __M); + return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A, + (__v4si) __W, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmple_epu32_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P) { - return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, - (__v4si) __Y, 2, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, __P, + (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, 4, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, __P, + (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, 4, + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, __P, (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, 1, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, __P, + (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, 1, + return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X, + (__v4df) __Y, __P, (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, 5, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X, + (__v8sf) __Y, __P, + (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y, + const int __P) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, 5, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, + (__v4di) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y, + const int __P) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, 2, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, + (__v8si) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmple_epu64_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y, + const int __P) { - return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, - (__v2di) __Y, 2, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, + (__v4di) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y, + const int __P) { - return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, 4, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, + (__v8si) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y, + const int __P) { - return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, 4, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X, + (__v4df) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y, + const int __P) { - return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, 1, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X, + (__v8sf) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P) { - return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, 1, + return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, + (__v2di) __Y, __P, (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P) { return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, 5, - (__mmask8) __M); + (__v4si) __Y, __P, + (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P) { - return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, 5, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, __P, + (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P) { - return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, 2, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, __P, + (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmple_epi32_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P) { - return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, - (__v4si) __Y, 2, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X, + (__v2df) __Y, __P, + (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P) { - return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, 4, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X, + (__v4sf) __Y, __P, + (__mmask8) -1); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y, + const int __P) { return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, 4, - (__mmask8) -1); + (__v2di) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y, + const int __P) { - return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, 1, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, + (__v4si) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y, + const int __P) { - return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, 1, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, + (__v2di) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y, + const int __P) { - return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, 5, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, + (__v4si) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y, + const int __P) { - return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, 5, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X, + (__v2df) __Y, __P, + (__mmask8) __U); } extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y, + const int __P) { - return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, 2, - (__mmask8) __M); + return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X, + (__v4sf) __Y, __P, + (__mmask8) __U); } -extern __inline __mmask8 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmple_epi64_mask (__m128i __X, __m128i __Y) +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_permutex_pd (__m256d __X, const int __M) { - return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, - (__v2di) __Y, 2, - (__mmask8) -1); + return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M, + (__v4df) + _mm256_undefined_pd (), + (__mmask8) -1); } #else diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ebe5a4a342c..45839d25ddc 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,8 @@ 2017-03-09 Jakub Jelinek + PR target/79932 + * gcc.target/i386/pr79932-2.c: New test. + PR target/79932 * gcc.target/i386/pr79932-1.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/pr79932-2.c b/gcc/testsuite/gcc.target/i386/pr79932-2.c new file mode 100644 index 00000000000..dc8178bdd12 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79932-2.c @@ -0,0 +1,78 @@ +/* PR target/79932 */ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512vl" } */ + +#include + +__m256i a, b; +__m128i c, d; +__mmask32 e, f[64]; + +void +foo (void) +{ + f[0] = _mm256_cmpge_epi32_mask (a, b); + f[1] = _mm256_cmpge_epi64_mask (a, b); + f[2] = _mm256_cmpge_epu32_mask (a, b); + f[3] = _mm256_cmpge_epu64_mask (a, b); + f[4] = _mm256_cmple_epi32_mask (a, b); + f[5] = _mm256_cmple_epi64_mask (a, b); + f[6] = _mm256_cmple_epu32_mask (a, b); + f[7] = _mm256_cmple_epu64_mask (a, b); + f[8] = _mm256_cmplt_epi32_mask (a, b); + f[9] = _mm256_cmplt_epi64_mask (a, b); + f[10] = _mm256_cmplt_epu32_mask (a, b); + f[11] = _mm256_cmplt_epu64_mask (a, b); + f[12] = _mm256_cmpneq_epi32_mask (a, b); + f[13] = _mm256_cmpneq_epi64_mask (a, b); + f[14] = _mm256_cmpneq_epu32_mask (a, b); + f[15] = _mm256_cmpneq_epu64_mask (a, b); + f[16] = _mm256_mask_cmpge_epi32_mask (e, a, b); + f[17] = _mm256_mask_cmpge_epi64_mask (e, a, b); + f[18] = _mm256_mask_cmpge_epu32_mask (e, a, b); + f[19] = _mm256_mask_cmpge_epu64_mask (e, a, b); + f[20] = _mm256_mask_cmple_epi32_mask (e, a, b); + f[21] = _mm256_mask_cmple_epi64_mask (e, a, b); + f[22] = _mm256_mask_cmple_epu32_mask (e, a, b); + f[23] = _mm256_mask_cmple_epu64_mask (e, a, b); + f[24] = _mm256_mask_cmplt_epi32_mask (e, a, b); + f[25] = _mm256_mask_cmplt_epi64_mask (e, a, b); + f[26] = _mm256_mask_cmplt_epu32_mask (e, a, b); + f[27] = _mm256_mask_cmplt_epu64_mask (e, a, b); + f[28] = _mm256_mask_cmpneq_epi32_mask (e, a, b); + f[29] = _mm256_mask_cmpneq_epi64_mask (e, a, b); + f[30] = _mm256_mask_cmpneq_epu32_mask (e, a, b); + f[31] = _mm256_mask_cmpneq_epu64_mask (e, a, b); + f[32] = _mm_cmpge_epi32_mask (c, d); + f[33] = _mm_cmpge_epi64_mask (c, d); + f[34] = _mm_cmpge_epu32_mask (c, d); + f[35] = _mm_cmpge_epu64_mask (c, d); + f[36] = _mm_cmple_epi32_mask (c, d); + f[37] = _mm_cmple_epi64_mask (c, d); + f[38] = _mm_cmple_epu32_mask (c, d); + f[39] = _mm_cmple_epu64_mask (c, d); + f[40] = _mm_cmplt_epi32_mask (c, d); + f[41] = _mm_cmplt_epi64_mask (c, d); + f[42] = _mm_cmplt_epu32_mask (c, d); + f[43] = _mm_cmplt_epu64_mask (c, d); + f[44] = _mm_cmpneq_epi32_mask (c, d); + f[45] = _mm_cmpneq_epi64_mask (c, d); + f[46] = _mm_cmpneq_epu32_mask (c, d); + f[47] = _mm_cmpneq_epu64_mask (c, d); + f[48] = _mm_mask_cmpge_epi32_mask (e, c, d); + f[49] = _mm_mask_cmpge_epi64_mask (e, c, d); + f[50] = _mm_mask_cmpge_epu32_mask (e, c, d); + f[51] = _mm_mask_cmpge_epu64_mask (e, c, d); + f[52] = _mm_mask_cmple_epi32_mask (e, c, d); + f[53] = _mm_mask_cmple_epi64_mask (e, c, d); + f[54] = _mm_mask_cmple_epu32_mask (e, c, d); + f[55] = _mm_mask_cmple_epu64_mask (e, c, d); + f[56] = _mm_mask_cmplt_epi32_mask (e, c, d); + f[57] = _mm_mask_cmplt_epi64_mask (e, c, d); + f[58] = _mm_mask_cmplt_epu32_mask (e, c, d); + f[59] = _mm_mask_cmplt_epu64_mask (e, c, d); + f[60] = _mm_mask_cmpneq_epi32_mask (e, c, d); + f[61] = _mm_mask_cmpneq_epi64_mask (e, c, d); + f[62] = _mm_mask_cmpneq_epu32_mask (e, c, d); + f[63] = _mm_mask_cmpneq_epu64_mask (e, c, d); +}