+2016-11-22 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/78451
+ * config/i386/avx512vlintrin.h (_mm_setzero_di): Removed.
+ (_mm_maskz_mov_epi64): Use _mm_setzero_si128 instead of
+ _mm_setzero_di.
+ (_mm_maskz_load_epi64): Likewise.
+ (_mm_setzero_hi): Removed.
+ (_mm_maskz_loadu_epi64): Use _mm_setzero_si128 instead of
+ _mm_setzero_di.
+ (_mm_abs_epi64, _mm_maskz_abs_epi64, _mm_maskz_srl_epi64,
+ _mm_maskz_unpackhi_epi64, _mm_maskz_unpacklo_epi64,
+ _mm_maskz_compress_epi64, _mm_srav_epi64, _mm_maskz_srav_epi64,
+ _mm_maskz_sllv_epi64, _mm_maskz_srlv_epi64, _mm_rolv_epi64,
+ _mm_maskz_rolv_epi64, _mm_rorv_epi64, _mm_maskz_rorv_epi64,
+ _mm_min_epi64, _mm_max_epi64, _mm_max_epu64, _mm_min_epu64,
+ _mm_lzcnt_epi64, _mm_maskz_lzcnt_epi64, _mm_conflict_epi64,
+ _mm_maskz_conflict_epi64, _mm_sra_epi64, _mm_maskz_sra_epi64,
+ _mm_maskz_sll_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64,
+ _mm_ror_epi64, _mm_maskz_ror_epi64, _mm_alignr_epi64,
+ _mm_maskz_alignr_epi64, _mm_srai_epi64, _mm_maskz_slli_epi64):
+ Likewise.
+ (_mm_cvtepi32_epi8, _mm256_cvtepi32_epi8, _mm_cvtsepi32_epi8,
+ _mm256_cvtsepi32_epi8, _mm_cvtusepi32_epi8, _mm256_cvtusepi32_epi8,
+ _mm_cvtepi32_epi16, _mm256_cvtepi32_epi16, _mm_cvtsepi32_epi16,
+ _mm256_cvtsepi32_epi16, _mm_cvtusepi32_epi16, _mm256_cvtusepi32_epi16,
+ _mm_cvtepi64_epi8, _mm256_cvtepi64_epi8, _mm_cvtsepi64_epi8,
+ _mm256_cvtsepi64_epi8, _mm_cvtusepi64_epi8, _mm256_cvtusepi64_epi8,
+ _mm_cvtepi64_epi16, _mm256_cvtepi64_epi16, _mm_cvtsepi64_epi16,
+ _mm256_cvtsepi64_epi16, _mm_cvtusepi64_epi16, _mm256_cvtusepi64_epi16,
+ _mm_cvtepi64_epi32, _mm256_cvtepi64_epi32, _mm_cvtsepi64_epi32,
+ _mm256_cvtsepi64_epi32, _mm_cvtusepi64_epi32, _mm256_cvtusepi64_epi32,
+ _mm_maskz_set1_epi32, _mm_maskz_set1_epi64): Formatting fixes.
+ (_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128
+ instead of _mm_setzero_hi.
+ (_mm256_permutex_pd, _mm256_maskz_permutex_epi64, _mm256_insertf32x4,
+ _mm256_maskz_insertf32x4, _mm256_inserti32x4, _mm256_maskz_inserti32x4,
+ _mm256_extractf32x4_ps, _mm256_maskz_extractf32x4_ps,
+ _mm256_shuffle_i32x4, _mm256_maskz_shuffle_i32x4, _mm256_shuffle_f64x2,
+ _mm256_maskz_shuffle_f64x2, _mm256_shuffle_f32x4,
+ _mm256_maskz_shuffle_f32x4, _mm256_maskz_shuffle_pd,
+ _mm_maskz_shuffle_pd, _mm256_maskz_shuffle_ps, _mm_maskz_shuffle_ps,
+ _mm256_maskz_srli_epi32, _mm_maskz_srli_epi32, _mm_maskz_srli_epi64,
+ _mm256_mask_slli_epi32, _mm256_maskz_slli_epi32, _mm256_mask_slli_epi64,
+ _mm256_maskz_slli_epi64, _mm256_roundscale_ps,
+ _mm256_maskz_roundscale_ps, _mm256_roundscale_pd,
+ _mm256_maskz_roundscale_pd, _mm_roundscale_ps, _mm_maskz_roundscale_ps,
+ _mm_roundscale_pd, _mm_maskz_roundscale_pd, _mm256_getmant_ps,
+ _mm256_maskz_getmant_ps, _mm_getmant_ps, _mm_maskz_getmant_ps,
+ _mm256_getmant_pd, _mm256_maskz_getmant_pd, _mm_getmant_pd,
+ _mm_maskz_getmant_pd, _mm256_maskz_shuffle_epi32,
+ _mm_maskz_shuffle_epi32, _mm256_rol_epi32, _mm256_maskz_rol_epi32,
+ _mm_rol_epi32, _mm_maskz_rol_epi32, _mm256_ror_epi32,
+ _mm256_maskz_ror_epi32, _mm_ror_epi32, _mm_maskz_ror_epi32,
+ _mm_maskz_alignr_epi32, _mm_maskz_alignr_epi64,
+ _mm256_maskz_srai_epi32, _mm_maskz_srai_epi32, _mm_srai_epi64,
+ _mm_maskz_srai_epi64, _mm256_maskz_permutex_pd,
+ _mm256_maskz_permute_pd, _mm256_maskz_permute_ps, _mm_maskz_permute_pd,
+ _mm_maskz_permute_ps, _mm256_permutexvar_ps): Formatting fixes.
+ (_mm_maskz_slli_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64,
+ _mm_ror_epi64, _mm_maskz_ror_epi64): Use _mm_setzero_si128 instead of
+ _mm_setzero_di.
+ (_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128
+ instead of _mm_setzero_hi.
+ * config/i386/avx512dqintrin.h (_mm512_broadcast_f64x2,
+ _mm512_broadcast_i64x2, _mm512_broadcast_f32x2, _mm512_broadcast_i32x2,
+ _mm512_broadcast_f32x8, _mm512_broadcast_i32x8): Formatting fixes.
+ (_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use
+ _mm_setzero_si128 instead of _mm_setzero_di.
+ (_mm512_cvtt_roundpd_epi64, _mm512_mask_cvtt_roundpd_epi64,
+ _mm512_maskz_cvtt_roundpd_epi64, _mm512_cvtt_roundpd_epu64,
+ _mm512_mask_cvtt_roundpd_epu64, _mm512_maskz_cvtt_roundpd_epu64,
+ _mm512_cvtt_roundps_epi64, _mm512_mask_cvtt_roundps_epi64,
+ _mm512_maskz_cvtt_roundps_epi64, _mm512_cvtt_roundps_epu64,
+ _mm512_mask_cvtt_roundps_epu64, _mm512_maskz_cvtt_roundps_epu64,
+ _mm512_cvt_roundpd_epi64, _mm512_mask_cvt_roundpd_epi64,
+ _mm512_maskz_cvt_roundpd_epi64, _mm512_cvt_roundpd_epu64,
+ _mm512_mask_cvt_roundpd_epu64, _mm512_maskz_cvt_roundpd_epu64,
+ _mm512_cvt_roundps_epi64, _mm512_mask_cvt_roundps_epi64,
+ _mm512_maskz_cvt_roundps_epi64, _mm512_cvt_roundps_epu64,
+ _mm512_mask_cvt_roundps_epu64, _mm512_maskz_cvt_roundps_epu64,
+ _mm512_cvt_roundepi64_ps, _mm512_mask_cvt_roundepi64_ps,
+ _mm512_maskz_cvt_roundepi64_ps, _mm512_cvt_roundepu64_ps,
+ _mm512_mask_cvt_roundepu64_ps, _mm512_maskz_cvt_roundepu64_ps,
+ _mm512_cvt_roundepi64_pd, _mm512_mask_cvt_roundepi64_pd,
+ _mm512_maskz_cvt_roundepi64_pd, _mm512_cvt_roundepu64_pd,
+ _mm512_mask_cvt_roundepu64_pd, _mm512_maskz_cvt_roundepu64_pd,
+ _mm512_reduce_pd, _mm512_maskz_reduce_pd, _mm512_reduce_ps,
+ _mm512_maskz_reduce_ps, _mm512_extractf32x8_ps,
+ _mm512_maskz_extractf32x8_ps, _mm512_extractf64x2_pd,
+ _mm512_maskz_extractf64x2_pd, _mm512_extracti32x8_epi32,
+ _mm512_maskz_extracti32x8_epi32, _mm512_range_pd,
+ _mm512_maskz_range_pd, _mm512_range_ps, _mm512_maskz_range_ps,
+ _mm512_range_round_pd, _mm512_maskz_range_round_pd,
+ _mm512_range_round_ps, _mm512_maskz_range_round_ps,
+ _mm512_maskz_insertf64x2, _mm512_insertf32x8,
+ _mm512_maskz_insertf32x8): Formatting fixes.
+ (_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use
+ _mm_setzero_si128 instead of _mm_setzero_di.
+ * config/i386/avx512vldqintrin.h (_mm_cvttpd_epi64,
+ _mm_cvttpd_epu64, _mm_cvtpd_epi64, _mm_cvtpd_epu64,
+ _mm_cvttps_epi64, _mm_maskz_cvttps_epi64, _mm_cvttps_epu64,
+ _mm_maskz_cvttps_epu64, _mm_maskz_mullo_epi64, _mm_cvtps_epi64,
+ _mm_maskz_cvtps_epi64, _mm_cvtps_epu64, _mm_maskz_cvtps_epu64,
+ _mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64): Use
+ _mm_setzero_si128 instead of _mm_setzero_di.
+ (_mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64):
+ Likewise in macros.
+ * config/i386/avx512vlbwintrin.h (_mm_maskz_mov_epi8,
+ _mm_maskz_loadu_epi16, _mm_maskz_mov_epi16, _mm_maskz_loadu_epi8,
+ _mm_permutexvar_epi16, _mm_maskz_maddubs_epi16): Use
+ _mm_setzero_si128 instead of _mm_setzero_hi.
+ (_mm_maskz_min_epu16, _mm_maskz_max_epu8, _mm_maskz_max_epi8,
+ _mm_maskz_min_epu8, _mm_maskz_min_epi8, _mm_maskz_max_epi16,
+ _mm_maskz_max_epu16, _mm_maskz_min_epi16): Use _mm_setzero_si128
+ instead of _mm_setzero_di.
+ (_mm_dbsad_epu8, _mm_maskz_shufflehi_epi16,
+ _mm_maskz_shufflelo_epi16): Use _mm_setzero_si128 instead of
+ _mm_setzero_hi.
+ (_mm_maskz_shufflehi_epi16, _mm_maskz_shufflelo_epi16,
+ _mm_maskz_slli_epi16): Use _mm_setzero_si128 instead of
+ _mm_setzero_hi.
+ (_mm_maskz_alignr_epi8): Use _mm_setzero_si128 instead of
+ _mm_setzero_di.
+ (_mm_maskz_mulhi_epi16, _mm_maskz_mulhi_epu16, _mm_maskz_mulhrs_epi16,
+ _mm_maskz_mullo_epi16, _mm_srav_epi16, _mm_srlv_epi16,
+ _mm_sllv_epi16): Use _mm_setzero_si128 instead of _mm_setzero_hi.
+
2016-11-22 Carl Love <cel@us.ibm.com>
* config/rs6000/rs6000-c.c: Add built-in support for vector compare
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_f64x2 (__m128d __A)
{
- return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
- __A,
- _mm512_undefined_pd(),
- (__mmask8) -1);
+ return (__m512d)
+ __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_i64x2 (__m128i __A)
{
- return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
- __A,
- _mm512_undefined_epi32(),
- (__mmask8) -1);
+ return (__m512i)
+ __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
+ _mm512_undefined_epi32 (),
+ (__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_f32x2 (__m128 __A)
{
- return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
- (__v16sf)_mm512_undefined_ps(),
- (__mmask16) -1);
+ return (__m512)
+ __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+ (__v16sf)_mm512_undefined_ps (),
+ (__mmask16) -1);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_i32x2 (__m128i __A)
{
- return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
- __A,
- (__v16si)_mm512_undefined_epi32(),
- (__mmask16) -1);
+ return (__m512i)
+ __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
+ (__v16si)
+ _mm512_undefined_epi32 (),
+ (__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_f32x8 (__m256 __A)
{
- return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
- _mm512_undefined_ps(),
- (__mmask16) -1);
+ return (__m512)
+ __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_i32x8 (__m256i __A)
{
- return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
- __A,
- (__v16si)_mm512_undefined_epi32(),
- (__mmask16) -1);
+ return (__m512i)
+ __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
+ (__v16si)
+ _mm512_undefined_epi32 (),
+ (__mmask16) -1);
}
extern __inline __m512i
return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
__imm,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
__imm,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8)
__U);
}
(__v4sf)(__m128)(B), (int)(C), (R)))
#define _mm512_cvtt_roundpd_epi64(A, B) \
- ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \
+ _mm512_setzero_si512 (), \
+ -1, (B)))
#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \
- ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)(W), (U), (B)))
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
#define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \
- ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
#define _mm512_cvtt_roundpd_epu64(A, B) \
- ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \
- ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
#define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \
- ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
#define _mm512_cvtt_roundps_epi64(A, B) \
- ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
#define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \
- ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)(W), (U), (B)))
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B)))
#define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \
- ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
#define _mm512_cvtt_roundps_epu64(A, B) \
- ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
#define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \
- ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)(W), (U), (B)))
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
#define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \
- ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
#define _mm512_cvt_roundpd_epi64(A, B) \
- ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
#define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \
- ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)(W), (U), (B)))
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
#define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \
- ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
#define _mm512_cvt_roundpd_epu64(A, B) \
- ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
#define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \
- ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
#define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \
- ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
#define _mm512_cvt_roundps_epi64(A, B) \
- ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
#define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \
- ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)(W), (U), (B)))
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B)))
#define _mm512_maskz_cvt_roundps_epi64(U, A, B) \
- ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
#define _mm512_cvt_roundps_epu64(A, B) \
- ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
#define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \
- ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)(W), (U), (B)))
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
#define _mm512_maskz_cvt_roundps_epu64(U, A, B) \
- ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
#define _mm512_cvt_roundepi64_ps(A, B) \
- ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
#define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \
- ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (W), (U), (B)))
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
#define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \
- ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
#define _mm512_cvt_roundepu64_ps(A, B) \
- ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
#define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \
- ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (W), (U), (B)))
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
#define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \
- ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
#define _mm512_cvt_roundepi64_pd(A, B) \
- ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
#define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \
- ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (W), (U), (B)))
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
#define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \
- ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
#define _mm512_cvt_roundepu64_pd(A, B) \
- ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
#define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \
- ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (W), (U), (B)))
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
#define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \
- ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
#define _mm512_reduce_pd(A, B) \
((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
- (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)-1))
+ (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
#define _mm512_mask_reduce_pd(W, U, A, B) \
((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
#define _mm512_maskz_reduce_pd(U, A, B) \
((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
- (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)(U)))
+ (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
#define _mm512_reduce_ps(A, B) \
((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
- (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)-1))
+ (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
#define _mm512_mask_reduce_ps(W, U, A, B) \
((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
#define _mm512_maskz_reduce_ps(U, A, B) \
((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
- (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U)))
+ (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
#define _mm512_extractf32x8_ps(X, C) \
((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
- (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8)-1))
+ (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
#define _mm512_mask_extractf32x8_ps(W, U, X, C) \
((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
#define _mm512_maskz_extractf32x8_ps(U, X, C) \
((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
- (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8) (U)))
+ (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U)))
#define _mm512_extractf64x2_pd(X, C) \
((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
- (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1))
#define _mm512_mask_extractf64x2_pd(W, U, X, C) \
((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
#define _mm512_maskz_extractf64x2_pd(U, X, C) \
((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
- (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U)))
#define _mm512_extracti32x8_epi32(X, C) \
((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
- (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8)-1))
+ (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1))
#define _mm512_mask_extracti32x8_epi32(W, U, X, C) \
((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
#define _mm512_maskz_extracti32x8_epi32(U, X, C) \
((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
- (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8) (U)))
+ (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U)))
#define _mm512_extracti64x2_epi64(X, C) \
((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
- (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
+ (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
#define _mm512_mask_extracti64x2_epi64(W, U, X, C) \
((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
#define _mm512_maskz_extracti64x2_epi64(U, X, C) \
((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
- (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
+ (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
#define _mm512_range_pd(A, B, C) \
((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(B), (int)(C), \
- (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+ (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_range_pd(W, U, A, B, C) \
((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
#define _mm512_maskz_range_pd(U, A, B, C) \
((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(B), (int)(C), \
- (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+ (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
#define _mm512_range_ps(A, B, C) \
((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
+ (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_range_ps(W, U, A, B, C) \
((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
#define _mm512_maskz_range_ps(U, A, B, C) \
((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+ (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
#define _mm512_range_round_pd(A, B, C, R) \
((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(B), (int)(C), \
- (__v8df)_mm512_setzero_pd(), (__mmask8)-1, (R)))
+ (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
#define _mm512_maskz_range_round_pd(U, A, B, C, R) \
((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(B), (int)(C), \
- (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (R)))
+ (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R)))
#define _mm512_range_round_ps(A, B, C, R) \
((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, (R)))
+ (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
#define _mm512_maskz_range_round_ps(U, A, B, C, R) \
((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (R)))
+ (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
#define _mm512_insertf64x2(X, Y, C) \
((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
#define _mm512_maskz_insertf64x2(U, X, Y, C) \
((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
(__v2df)(__m128d) (Y), (int) (C), \
- (__v8df)(__m512d) _mm512_setzero_pd(), (__mmask8) (U)))
+ (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U)))
#define _mm512_inserti64x2(X, Y, C) \
((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
#define _mm512_insertf32x8(X, Y, C) \
((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
(__v8sf)(__m256) (Y), (int) (C),\
- (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__v16sf)(__m512)_mm512_setzero_ps (),\
(__mmask16)-1))
#define _mm512_mask_insertf32x8(W, U, X, Y, C) \
#define _mm512_maskz_insertf32x8(U, X, Y, C) \
((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
(__v8sf)(__m256) (Y), (int) (C),\
- (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__v16sf)(__m512)_mm512_setzero_ps (),\
(__mmask16)(U)))
#define _mm512_inserti32x8(X, Y, C) \
{
return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
(__v16qi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask16) __U);
}
{
return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
(__v16qi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask16) __U);
}
return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
(__v8hi) __A,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
(__v16qi) __Y,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __M);
}
return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
(__v16qi) __B,
(__v16qi)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask16) __M);
}
return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
(__v16qi) __B,
(__v16qi)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask16) __M);
}
return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
(__v16qi) __B,
(__v16qi)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask16) __M);
}
return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
(__v16qi) __B,
(__v16qi)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask16) __M);
}
return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __M);
}
return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __M);
}
return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __M);
}
(__v16qi) __B,
__imm,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
#define _mm_maskz_shufflehi_epi16(U, A, B) \
((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
- (__v8hi)(__m128i)_mm_setzero_hi(), \
+ (__v8hi)(__m128i)_mm_setzero_si128 (), \
(__mmask8)(U)))
#define _mm256_mask_shufflelo_epi16(W, U, A, B) \
#define _mm_maskz_shufflelo_epi16(U, A, B) \
((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
- (__v8hi)(__m128i)_mm_setzero_hi(), \
+ (__v8hi)(__m128i)_mm_setzero_si128 (), \
(__mmask8)(U)))
#define _mm256_maskz_alignr_epi8(U, X, Y, N) \
#define _mm_maskz_alignr_epi8(U, X, Y, N) \
((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \
(__v2di)(__m128i)(Y), (int)(N * 8), \
- (__v2di)(__m128i)_mm_setzero_di(), \
+ (__v2di)(__m128i)_mm_setzero_si128 (), \
(__mmask16)(U)))
#define _mm_mask_slli_epi16(W, U, X, C) \
#define _mm_maskz_slli_epi16(U, X, C) \
((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
- (__v8hi)(__m128i)_mm_setzero_hi(),\
+ (__v8hi)(__m128i)_mm_setzero_si128 (),\
(__mmask8)(U)))
#define _mm256_dbsad_epu8(X, Y, C) \
return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
(__v8hi) __Y,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
(__v8hi) __B,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
__imm,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
__imm,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8)
__U);
}
#define _mm256_extracti64x2_epi64(X, C) \
((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
- (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
+ (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
#define _mm256_mask_extracti64x2_epi64(W, U, X, C) \
((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
#define _mm256_maskz_extracti64x2_epi64(U, X, C) \
((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
- (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
+ (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
#define _mm256_reduce_pd(A, B) \
((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
#ifndef _AVX512VLINTRIN_H_INCLUDED
#define _AVX512VLINTRIN_H_INCLUDED
-/* Doesn't require avx512vl target and is used in avx512dqintrin.h. */
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_di (void)
-{
- return __extension__ (__m128i)(__v2di){ 0LL, 0LL};
-}
-
#ifndef __AVX512VL__
#pragma GCC push_options
#pragma GCC target("avx512vl")
{
return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8)
__U);
}
(__mmask8) __U);
}
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_hi (void)
-{
- return __extension__ (__m128i) (__v8hi)
- {
- 0, 0, 0, 0, 0, 0, 0, 0};
-}
-
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
_mm_cvtepi32_epi8 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtepi32_epi8 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtsepi32_epi8 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtsepi32_epi8 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtusepi32_epi8 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtusepi32_epi8 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtepi32_epi16 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
- (__v8hi) _mm_setzero_si128 (),
+ (__v8hi)
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
_mm256_cvtepi32_epi16 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
- (__v8hi)_mm_setzero_si128 (),
+ (__v8hi)
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
_mm_cvtsepi32_epi16 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
- (__v8hi)_mm_setzero_si128 (),
+ (__v8hi)
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
_mm256_cvtsepi32_epi16 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
- (__v8hi)_mm_undefined_si128(),
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtusepi32_epi16 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
- (__v8hi)_mm_undefined_si128(),
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtusepi32_epi16 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
- (__v8hi)_mm_undefined_si128(),
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtepi64_epi8 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtepi64_epi8 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtsepi64_epi8 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtsepi64_epi8 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtusepi64_epi8 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtusepi64_epi8 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
- (__v16qi)_mm_undefined_si128(),
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtepi64_epi16 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
- (__v8hi)_mm_undefined_si128(),
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtepi64_epi16 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
- (__v8hi)_mm_undefined_si128(),
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtsepi64_epi16 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
- (__v8hi)_mm_undefined_si128(),
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtsepi64_epi16 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
- (__v8hi)_mm_undefined_si128(),
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtusepi64_epi16 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
- (__v8hi)_mm_undefined_si128(),
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtusepi64_epi16 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
- (__v8hi)_mm_undefined_si128(),
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtepi64_epi32 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
- (__v4si)_mm_undefined_si128(),
+ (__v4si)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtepi64_epi32 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
- (__v4si)_mm_undefined_si128(),
+ (__v4si)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtsepi64_epi32 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
- (__v4si)_mm_undefined_si128(),
+ (__v4si)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtsepi64_epi32 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
- (__v4si)_mm_undefined_si128(),
+ (__v4si)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm_cvtusepi64_epi32 (__m128i __A)
{
return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
- (__v4si)_mm_undefined_si128(),
+ (__v4si)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
_mm256_cvtusepi64_epi32 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
- (__v4si)_mm_undefined_si128(),
+ (__v4si)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
{
- return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
- (__v4si)
- _mm_setzero_si128 (),
- __M);
+ return (__m128i)
+ __builtin_ia32_pbroadcastd128_gpr_mask (__A,
+ (__v4si) _mm_setzero_si128 (),
+ __M);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
{
- return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
- (__v2di)
- _mm_setzero_si128 (),
- __M);
+ return (__m128i)
+ __builtin_ia32_pbroadcastq128_gpr_mask (__A,
+ (__v2di) _mm_setzero_si128 (),
+ __M);
}
extern __inline __m256
return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
(__v2di) __Y,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
(__v2di) __Y,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
(__v2di) __Y,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
(__v2di) __Y,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8)
__U);
}
return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
(__v2di) __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
(__v2di) __B, __imm,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
(__v2di) __B, __imm,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
(__v8hi)
- _mm_setzero_hi (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
{
return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) -1);
}
{
return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
(__v2di)
- _mm_setzero_di (),
+ _mm_setzero_si128 (),
(__mmask8) __U);
}
#else
#define _mm256_permutex_pd(X, M) \
((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
- (__v4df)(__m256d)_mm256_undefined_pd(),\
+ (__v4df)(__m256d) \
+ _mm256_undefined_pd (), \
(__mmask8)-1))
#define _mm256_maskz_permutex_epi64(M, X, I) \
((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
(int)(I), \
(__v4di)(__m256i) \
- (_mm256_setzero_si256()),\
+ (_mm256_setzero_si256 ()),\
(__mmask8)(M)))
#define _mm256_mask_permutex_epi64(W, M, X, I) \
#define _mm256_insertf32x4(X, Y, C) \
((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
(__v4sf)(__m128) (Y), (int) (C), \
- (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__v8sf)(__m256)_mm256_setzero_ps (), \
(__mmask8)-1))
#define _mm256_mask_insertf32x4(W, U, X, Y, C) \
#define _mm256_maskz_insertf32x4(U, X, Y, C) \
((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
(__v4sf)(__m128) (Y), (int) (C), \
- (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__v8sf)(__m256)_mm256_setzero_ps (), \
(__mmask8)(U)))
#define _mm256_inserti32x4(X, Y, C) \
((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
(__v4si)(__m128i) (Y), (int) (C), \
- (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__v8si)(__m256i)_mm256_setzero_si256 (), \
(__mmask8)-1))
#define _mm256_mask_inserti32x4(W, U, X, Y, C) \
#define _mm256_maskz_inserti32x4(U, X, Y, C) \
((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
(__v4si)(__m128i) (Y), (int) (C), \
- (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__v8si)(__m256i)_mm256_setzero_si256 (), \
(__mmask8)(U)))
#define _mm256_extractf32x4_ps(X, C) \
((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
(int) (C), \
- (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__v4sf)(__m128)_mm_setzero_ps (), \
(__mmask8)-1))
#define _mm256_mask_extractf32x4_ps(W, U, X, C) \
#define _mm256_maskz_extractf32x4_ps(U, X, C) \
((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
(int) (C), \
- (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__v4sf)(__m128)_mm_setzero_ps (), \
(__mmask8)(U)))
#define _mm256_extracti32x4_epi32(X, C) \
#define _mm256_shuffle_i32x4(X, Y, C) \
((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
(__v8si)(__m256i)(Y), (int)(C), \
- (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__v8si)(__m256i) \
+ _mm256_setzero_si256 (), \
(__mmask8)-1))
#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
#define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
(__v8si)(__m256i)(Y), (int)(C), \
- (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__v8si)(__m256i) \
+ _mm256_setzero_si256 (), \
(__mmask8)(U)))
#define _mm256_shuffle_f64x2(X, Y, C) \
((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
(__v4df)(__m256d)(Y), (int)(C), \
- (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__v4df)(__m256d)_mm256_setzero_pd (),\
(__mmask8)-1))
#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
#define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
(__v4df)(__m256d)(Y), (int)(C), \
- (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__v4df)(__m256d)_mm256_setzero_pd( ),\
(__mmask8)(U)))
#define _mm256_shuffle_f32x4(X, Y, C) \
((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
(__v8sf)(__m256)(Y), (int)(C), \
- (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__v8sf)(__m256)_mm256_setzero_ps (), \
(__mmask8)-1))
#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
#define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
(__v8sf)(__m256)(Y), (int)(C), \
- (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__v8sf)(__m256)_mm256_setzero_ps (), \
(__mmask8)(U)))
#define _mm256_mask_shuffle_pd(W, U, A, B, C) \
#define _mm256_maskz_shuffle_pd(U, A, B, C) \
((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
(__v4df)(__m256d)(B), (int)(C), \
- (__v4df)(__m256d)_mm256_setzero_pd(),\
+ (__v4df)(__m256d) \
+ _mm256_setzero_pd (), \
(__mmask8)(U)))
#define _mm_mask_shuffle_pd(W, U, A, B, C) \
#define _mm_maskz_shuffle_pd(U, A, B, C) \
((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), (int)(C), \
- (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__v2df)(__m128d)_mm_setzero_pd (), \
(__mmask8)(U)))
#define _mm256_mask_shuffle_ps(W, U, A, B, C) \
#define _mm256_maskz_shuffle_ps(U, A, B, C) \
((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
(__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__v8sf)(__m256)_mm256_setzero_ps (),\
(__mmask8)(U)))
#define _mm_mask_shuffle_ps(W, U, A, B, C) \
#define _mm_maskz_shuffle_ps(U, A, B, C) \
((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), (int)(C), \
- (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__v4sf)(__m128)_mm_setzero_ps (), \
(__mmask8)(U)))
#define _mm256_fixupimm_pd(X, Y, Z, C) \
#define _mm256_maskz_srli_epi32(U, A, B) \
((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
- (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
+ (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
#define _mm_mask_srli_epi32(W, U, A, B) \
((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
#define _mm_maskz_srli_epi32(U, A, B) \
((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
- (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
+ (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
#define _mm256_mask_srli_epi64(W, U, A, B) \
((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
#define _mm_maskz_srli_epi64(U, A, B) \
((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
- (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
+ (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
#define _mm256_mask_slli_epi32(W, U, X, C) \
((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
- (__v8si)(__m256i)(W),\
+ (__v8si)(__m256i)(W), \
(__mmask8)(U)))
#define _mm256_maskz_slli_epi32(U, X, C) \
((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
- (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__v8si)(__m256i)_mm256_setzero_si256 (), \
(__mmask8)(U)))
#define _mm256_mask_slli_epi64(W, U, X, C) \
((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
- (__v4di)(__m256i)(W),\
+ (__v4di)(__m256i)(W), \
(__mmask8)(U)))
#define _mm256_maskz_slli_epi64(U, X, C) \
((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
- (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
(__mmask8)(U)))
#define _mm_mask_slli_epi32(W, U, X, C) \
#define _mm_maskz_slli_epi64(U, X, C) \
((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
- (__v2di)(__m128i)_mm_setzero_di(),\
+ (__v2di)(__m128i)_mm_setzero_si128 (),\
(__mmask8)(U)))
#define _mm256_ternarylogic_epi64(A, B, C, I) \
#define _mm256_roundscale_ps(A, B) \
((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
- (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
+ (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
#define _mm256_mask_roundscale_ps(W, U, A, B) \
((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
#define _mm256_maskz_roundscale_ps(U, A, B) \
((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
- (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
+ (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
#define _mm256_roundscale_pd(A, B) \
((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
- (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
+ (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
#define _mm256_mask_roundscale_pd(W, U, A, B) \
((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
#define _mm256_maskz_roundscale_pd(U, A, B) \
((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
- (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
+ (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
#define _mm_roundscale_ps(A, B) \
((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
- (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
+ (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
#define _mm_mask_roundscale_ps(W, U, A, B) \
((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
#define _mm_maskz_roundscale_ps(U, A, B) \
((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
- (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
+ (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
#define _mm_roundscale_pd(A, B) \
((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
- (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
+ (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
#define _mm_mask_roundscale_pd(W, U, A, B) \
((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
#define _mm_maskz_roundscale_pd(U, A, B) \
((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
- (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
+ (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
#define _mm256_getmant_ps(X, B, C) \
((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
(int)(((C)<<2) | (B)), \
- (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__v8sf)(__m256)_mm256_setzero_ps (), \
(__mmask8)-1))
#define _mm256_mask_getmant_ps(W, U, X, B, C) \
#define _mm256_maskz_getmant_ps(U, X, B, C) \
((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
(int)(((C)<<2) | (B)), \
- (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__v8sf)(__m256)_mm256_setzero_ps (), \
(__mmask8)(U)))
#define _mm_getmant_ps(X, B, C) \
((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
(int)(((C)<<2) | (B)), \
- (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__v4sf)(__m128)_mm_setzero_ps (), \
(__mmask8)-1))
#define _mm_mask_getmant_ps(W, U, X, B, C) \
#define _mm_maskz_getmant_ps(U, X, B, C) \
((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
(int)(((C)<<2) | (B)), \
- (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__v4sf)(__m128)_mm_setzero_ps (), \
(__mmask8)(U)))
#define _mm256_getmant_pd(X, B, C) \
((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
(int)(((C)<<2) | (B)), \
- (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__v4df)(__m256d)_mm256_setzero_pd (),\
(__mmask8)-1))
#define _mm256_mask_getmant_pd(W, U, X, B, C) \
#define _mm256_maskz_getmant_pd(U, X, B, C) \
((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
(int)(((C)<<2) | (B)), \
- (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__v4df)(__m256d)_mm256_setzero_pd (),\
(__mmask8)(U)))
#define _mm_getmant_pd(X, B, C) \
((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
(int)(((C)<<2) | (B)), \
- (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__v2df)(__m128d)_mm_setzero_pd (), \
(__mmask8)-1))
#define _mm_mask_getmant_pd(W, U, X, B, C) \
#define _mm_maskz_getmant_pd(U, X, B, C) \
((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
(int)(((C)<<2) | (B)), \
- (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__v2df)(__m128d)_mm_setzero_pd (), \
(__mmask8)(U)))
#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
#define _mm256_maskz_shuffle_epi32(U, X, C) \
((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
- (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__v8si)(__m256i) \
+ _mm256_setzero_si256 (), \
(__mmask8)(U)))
#define _mm_mask_shuffle_epi32(W, U, X, C) \
#define _mm_maskz_shuffle_epi32(U, X, C) \
((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
- (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
(__mmask8)(U)))
#define _mm256_rol_epi64(A, B) \
#define _mm_rol_epi64(A, B) \
((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
- (__v2di)(__m128i)_mm_setzero_di(), \
+ (__v2di)(__m128i)_mm_setzero_si128 (),\
(__mmask8)-1))
#define _mm_mask_rol_epi64(W, U, A, B) \
#define _mm_maskz_rol_epi64(U, A, B) \
((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
- (__v2di)(__m128i)_mm_setzero_di(), \
+ (__v2di)(__m128i)_mm_setzero_si128 (),\
(__mmask8)(U)))
#define _mm256_ror_epi64(A, B) \
#define _mm_ror_epi64(A, B) \
((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
- (__v2di)(__m128i)_mm_setzero_di(), \
+ (__v2di)(__m128i)_mm_setzero_si128 (),\
(__mmask8)-1))
#define _mm_mask_ror_epi64(W, U, A, B) \
#define _mm_maskz_ror_epi64(U, A, B) \
((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
- (__v2di)(__m128i)_mm_setzero_di(), \
+ (__v2di)(__m128i)_mm_setzero_si128 (),\
(__mmask8)(U)))
#define _mm256_rol_epi32(A, B) \
((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
- (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__v8si)(__m256i)_mm256_setzero_si256 (),\
(__mmask8)-1))
#define _mm256_mask_rol_epi32(W, U, A, B) \
#define _mm256_maskz_rol_epi32(U, A, B) \
((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
- (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__v8si)(__m256i)_mm256_setzero_si256 (),\
(__mmask8)(U)))
#define _mm_rol_epi32(A, B) \
((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
- (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__v4si)(__m128i)_mm_setzero_si128 (),\
(__mmask8)-1))
#define _mm_mask_rol_epi32(W, U, A, B) \
#define _mm_maskz_rol_epi32(U, A, B) \
((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
- (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__v4si)(__m128i)_mm_setzero_si128 (),\
(__mmask8)(U)))
#define _mm256_ror_epi32(A, B) \
((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
- (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__v8si)(__m256i)_mm256_setzero_si256 (),\
(__mmask8)-1))
#define _mm256_mask_ror_epi32(W, U, A, B) \
#define _mm256_maskz_ror_epi32(U, A, B) \
((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
- (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__v8si)(__m256i) \
+ _mm256_setzero_si256 (), \
(__mmask8)(U)))
#define _mm_ror_epi32(A, B) \
((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
- (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__v4si)(__m128i)_mm_setzero_si128 (),\
(__mmask8)-1))
#define _mm_mask_ror_epi32(W, U, A, B) \
#define _mm_maskz_ror_epi32(U, A, B) \
((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
- (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__v4si)(__m128i)_mm_setzero_si128 (),\
(__mmask8)(U)))
#define _mm256_alignr_epi32(X, Y, C) \
#define _mm_maskz_alignr_epi32(U, X, Y, C) \
((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
- (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
+ (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
(__mmask8)(U)))
#define _mm_alignr_epi64(X, Y, C) \
#define _mm_maskz_alignr_epi64(U, X, Y, C) \
((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
- (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
+ (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
(__mmask8)(U)))
#define _mm_mask_cvtps_ph(W, U, A, I) \
#define _mm_maskz_cvtps_ph(U, A, I) \
((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
- (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
+ (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
#define _mm256_mask_cvtps_ph(W, U, A, I) \
((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
#define _mm256_maskz_cvtps_ph(U, A, I) \
((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
- (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
+ (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
#define _mm256_mask_srai_epi32(W, U, A, B) \
((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
#define _mm256_maskz_srai_epi32(U, A, B) \
((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
- (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
+ (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
#define _mm_mask_srai_epi32(W, U, A, B) \
((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
#define _mm_maskz_srai_epi32(U, A, B) \
((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
- (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
+ (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
#define _mm256_srai_epi64(A, B) \
((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
#define _mm_srai_epi64(A, B) \
((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
- (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
+ (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
#define _mm_mask_srai_epi64(W, U, A, B) \
((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
#define _mm_maskz_srai_epi64(U, A, B) \
((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
- (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
+ (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
#define _mm256_mask_permutex_pd(W, U, A, B) \
((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
#define _mm256_maskz_permutex_pd(U, A, B) \
((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
- (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
+ (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
#define _mm256_mask_permute_pd(W, U, X, C) \
((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
#define _mm256_maskz_permute_pd(U, X, C) \
((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
- (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__v4df)(__m256d)_mm256_setzero_pd (),\
(__mmask8)(U)))
#define _mm256_mask_permute_ps(W, U, X, C) \
#define _mm256_maskz_permute_ps(U, X, C) \
((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
- (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__v8sf)(__m256)_mm256_setzero_ps (), \
(__mmask8)(U)))
#define _mm_mask_permute_pd(W, U, X, C) \
#define _mm_maskz_permute_pd(U, X, C) \
((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
- (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__v2df)(__m128d)_mm_setzero_pd (), \
(__mmask8)(U)))
#define _mm_mask_permute_ps(W, U, X, C) \
#define _mm_maskz_permute_ps(U, X, C) \
((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
- (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__v4sf)(__m128)_mm_setzero_ps (), \
(__mmask8)(U)))
#define _mm256_mask_blend_pd(__U, __A, __W) \
#endif
-#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
+#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
#ifdef __DISABLE_AVX512VL__
#undef __DISABLE_AVX512VL__