From a25a788762c63930b83858d03d5b30465f67aec7 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 22 Nov 2016 17:53:35 +0100 Subject: [PATCH] re PR target/78451 (FAIL: gcc.target/i386/sse-22a.c: error: inlining failed in call to always_inline '_mm512_setzero_ps') PR target/78451 * config/i386/avx512vlintrin.h (_mm_setzero_di): Removed. (_mm_maskz_mov_epi64): Use _mm_setzero_si128 instead of _mm_setzero_di. (_mm_maskz_load_epi64): Likewise. (_mm_setzero_hi): Removed. (_mm_maskz_loadu_epi64): Use _mm_setzero_si128 instead of _mm_setzero_di. (_mm_abs_epi64, _mm_maskz_abs_epi64, _mm_maskz_srl_epi64, _mm_maskz_unpackhi_epi64, _mm_maskz_unpacklo_epi64, _mm_maskz_compress_epi64, _mm_srav_epi64, _mm_maskz_srav_epi64, _mm_maskz_sllv_epi64, _mm_maskz_srlv_epi64, _mm_rolv_epi64, _mm_maskz_rolv_epi64, _mm_rorv_epi64, _mm_maskz_rorv_epi64, _mm_min_epi64, _mm_max_epi64, _mm_max_epu64, _mm_min_epu64, _mm_lzcnt_epi64, _mm_maskz_lzcnt_epi64, _mm_conflict_epi64, _mm_maskz_conflict_epi64, _mm_sra_epi64, _mm_maskz_sra_epi64, _mm_maskz_sll_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64, _mm_ror_epi64, _mm_maskz_ror_epi64, _mm_alignr_epi64, _mm_maskz_alignr_epi64, _mm_srai_epi64, _mm_maskz_slli_epi64): Likewise. (_mm_cvtepi32_epi8, _mm256_cvtepi32_epi8, _mm_cvtsepi32_epi8, _mm256_cvtsepi32_epi8, _mm_cvtusepi32_epi8, _mm256_cvtusepi32_epi8, _mm_cvtepi32_epi16, _mm256_cvtepi32_epi16, _mm_cvtsepi32_epi16, _mm256_cvtsepi32_epi16, _mm_cvtusepi32_epi16, _mm256_cvtusepi32_epi16, _mm_cvtepi64_epi8, _mm256_cvtepi64_epi8, _mm_cvtsepi64_epi8, _mm256_cvtsepi64_epi8, _mm_cvtusepi64_epi8, _mm256_cvtusepi64_epi8, _mm_cvtepi64_epi16, _mm256_cvtepi64_epi16, _mm_cvtsepi64_epi16, _mm256_cvtsepi64_epi16, _mm_cvtusepi64_epi16, _mm256_cvtusepi64_epi16, _mm_cvtepi64_epi32, _mm256_cvtepi64_epi32, _mm_cvtsepi64_epi32, _mm256_cvtsepi64_epi32, _mm_cvtusepi64_epi32, _mm256_cvtusepi64_epi32, _mm_maskz_set1_epi32, _mm_maskz_set1_epi64): Formatting fixes. (_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128 instead of _mm_setzero_hi. (_mm256_permutex_pd, _mm256_maskz_permutex_epi64, _mm256_insertf32x4, _mm256_maskz_insertf32x4, _mm256_inserti32x4, _mm256_maskz_inserti32x4, _mm256_extractf32x4_ps, _mm256_maskz_extractf32x4_ps, _mm256_shuffle_i32x4, _mm256_maskz_shuffle_i32x4, _mm256_shuffle_f64x2, _mm256_maskz_shuffle_f64x2, _mm256_shuffle_f32x4, _mm256_maskz_shuffle_f32x4, _mm256_maskz_shuffle_pd, _mm_maskz_shuffle_pd, _mm256_maskz_shuffle_ps, _mm_maskz_shuffle_ps, _mm256_maskz_srli_epi32, _mm_maskz_srli_epi32, _mm_maskz_srli_epi64, _mm256_mask_slli_epi32, _mm256_maskz_slli_epi32, _mm256_mask_slli_epi64, _mm256_maskz_slli_epi64, _mm256_roundscale_ps, _mm256_maskz_roundscale_ps, _mm256_roundscale_pd, _mm256_maskz_roundscale_pd, _mm_roundscale_ps, _mm_maskz_roundscale_ps, _mm_roundscale_pd, _mm_maskz_roundscale_pd, _mm256_getmant_ps, _mm256_maskz_getmant_ps, _mm_getmant_ps, _mm_maskz_getmant_ps, _mm256_getmant_pd, _mm256_maskz_getmant_pd, _mm_getmant_pd, _mm_maskz_getmant_pd, _mm256_maskz_shuffle_epi32, _mm_maskz_shuffle_epi32, _mm256_rol_epi32, _mm256_maskz_rol_epi32, _mm_rol_epi32, _mm_maskz_rol_epi32, _mm256_ror_epi32, _mm256_maskz_ror_epi32, _mm_ror_epi32, _mm_maskz_ror_epi32, _mm_maskz_alignr_epi32, _mm_maskz_alignr_epi64, _mm256_maskz_srai_epi32, _mm_maskz_srai_epi32, _mm_srai_epi64, _mm_maskz_srai_epi64, _mm256_maskz_permutex_pd, _mm256_maskz_permute_pd, _mm256_maskz_permute_ps, _mm_maskz_permute_pd, _mm_maskz_permute_ps, _mm256_permutexvar_ps): Formatting fixes. (_mm_maskz_slli_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64, _mm_ror_epi64, _mm_maskz_ror_epi64): Use _mm_setzero_si128 instead of _mm_setzero_di. (_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128 instead of _mm_setzero_hi. * config/i386/avx512dqintrin.h (_mm512_broadcast_f64x2, _mm512_broadcast_i64x2, _mm512_broadcast_f32x2, _mm512_broadcast_i32x2, _mm512_broadcast_f32x8, _mm512_broadcast_i32x8): Formatting fixes. (_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use _mm_setzero_si128 instead of _mm_setzero_di. (_mm512_cvtt_roundpd_epi64, _mm512_mask_cvtt_roundpd_epi64, _mm512_maskz_cvtt_roundpd_epi64, _mm512_cvtt_roundpd_epu64, _mm512_mask_cvtt_roundpd_epu64, _mm512_maskz_cvtt_roundpd_epu64, _mm512_cvtt_roundps_epi64, _mm512_mask_cvtt_roundps_epi64, _mm512_maskz_cvtt_roundps_epi64, _mm512_cvtt_roundps_epu64, _mm512_mask_cvtt_roundps_epu64, _mm512_maskz_cvtt_roundps_epu64, _mm512_cvt_roundpd_epi64, _mm512_mask_cvt_roundpd_epi64, _mm512_maskz_cvt_roundpd_epi64, _mm512_cvt_roundpd_epu64, _mm512_mask_cvt_roundpd_epu64, _mm512_maskz_cvt_roundpd_epu64, _mm512_cvt_roundps_epi64, _mm512_mask_cvt_roundps_epi64, _mm512_maskz_cvt_roundps_epi64, _mm512_cvt_roundps_epu64, _mm512_mask_cvt_roundps_epu64, _mm512_maskz_cvt_roundps_epu64, _mm512_cvt_roundepi64_ps, _mm512_mask_cvt_roundepi64_ps, _mm512_maskz_cvt_roundepi64_ps, _mm512_cvt_roundepu64_ps, _mm512_mask_cvt_roundepu64_ps, _mm512_maskz_cvt_roundepu64_ps, _mm512_cvt_roundepi64_pd, _mm512_mask_cvt_roundepi64_pd, _mm512_maskz_cvt_roundepi64_pd, _mm512_cvt_roundepu64_pd, _mm512_mask_cvt_roundepu64_pd, _mm512_maskz_cvt_roundepu64_pd, _mm512_reduce_pd, _mm512_maskz_reduce_pd, _mm512_reduce_ps, _mm512_maskz_reduce_ps, _mm512_extractf32x8_ps, _mm512_maskz_extractf32x8_ps, _mm512_extractf64x2_pd, _mm512_maskz_extractf64x2_pd, _mm512_extracti32x8_epi32, _mm512_maskz_extracti32x8_epi32, _mm512_range_pd, _mm512_maskz_range_pd, _mm512_range_ps, _mm512_maskz_range_ps, _mm512_range_round_pd, _mm512_maskz_range_round_pd, _mm512_range_round_ps, _mm512_maskz_range_round_ps, _mm512_maskz_insertf64x2, _mm512_insertf32x8, _mm512_maskz_insertf32x8): Formatting fixes. (_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use _mm_setzero_si128 instead of _mm_setzero_di. * config/i386/avx512vldqintrin.h (_mm_cvttpd_epi64, _mm_cvttpd_epu64, _mm_cvtpd_epi64, _mm_cvtpd_epu64, _mm_cvttps_epi64, _mm_maskz_cvttps_epi64, _mm_cvttps_epu64, _mm_maskz_cvttps_epu64, _mm_maskz_mullo_epi64, _mm_cvtps_epi64, _mm_maskz_cvtps_epi64, _mm_cvtps_epu64, _mm_maskz_cvtps_epu64, _mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64): Use _mm_setzero_si128 instead of _mm_setzero_di. (_mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64): Likewise in macros. * config/i386/avx512vlbwintrin.h (_mm_maskz_mov_epi8, _mm_maskz_loadu_epi16, _mm_maskz_mov_epi16, _mm_maskz_loadu_epi8, _mm_permutexvar_epi16, _mm_maskz_maddubs_epi16): Use _mm_setzero_si128 instead of _mm_setzero_hi. (_mm_maskz_min_epu16, _mm_maskz_max_epu8, _mm_maskz_max_epi8, _mm_maskz_min_epu8, _mm_maskz_min_epi8, _mm_maskz_max_epi16, _mm_maskz_max_epu16, _mm_maskz_min_epi16): Use _mm_setzero_si128 instead of _mm_setzero_di. (_mm_dbsad_epu8, _mm_maskz_shufflehi_epi16, _mm_maskz_shufflelo_epi16): Use _mm_setzero_si128 instead of _mm_setzero_hi. (_mm_maskz_shufflehi_epi16, _mm_maskz_shufflelo_epi16, _mm_maskz_slli_epi16): Use _mm_setzero_si128 instead of _mm_setzero_hi. (_mm_maskz_alignr_epi8): Use _mm_setzero_si128 instead of _mm_setzero_di. (_mm_maskz_mulhi_epi16, _mm_maskz_mulhi_epu16, _mm_maskz_mulhrs_epi16, _mm_maskz_mullo_epi16, _mm_srav_epi16, _mm_srlv_epi16, _mm_sllv_epi16): Use _mm_setzero_si128 instead of _mm_setzero_hi. From-SVN: r242707 --- gcc/ChangeLog | 128 +++++++++++ gcc/config/i386/avx512dqintrin.h | 172 +++++++------- gcc/config/i386/avx512vlbwintrin.h | 56 ++--- gcc/config/i386/avx512vldqintrin.h | 34 +-- gcc/config/i386/avx512vlintrin.h | 345 +++++++++++++++-------------- 5 files changed, 444 insertions(+), 291 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a81c423f477..179a26dd36b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,131 @@ +2016-11-22 Jakub Jelinek + + PR target/78451 + * config/i386/avx512vlintrin.h (_mm_setzero_di): Removed. + (_mm_maskz_mov_epi64): Use _mm_setzero_si128 instead of + _mm_setzero_di. + (_mm_maskz_load_epi64): Likewise. + (_mm_setzero_hi): Removed. + (_mm_maskz_loadu_epi64): Use _mm_setzero_si128 instead of + _mm_setzero_di. + (_mm_abs_epi64, _mm_maskz_abs_epi64, _mm_maskz_srl_epi64, + _mm_maskz_unpackhi_epi64, _mm_maskz_unpacklo_epi64, + _mm_maskz_compress_epi64, _mm_srav_epi64, _mm_maskz_srav_epi64, + _mm_maskz_sllv_epi64, _mm_maskz_srlv_epi64, _mm_rolv_epi64, + _mm_maskz_rolv_epi64, _mm_rorv_epi64, _mm_maskz_rorv_epi64, + _mm_min_epi64, _mm_max_epi64, _mm_max_epu64, _mm_min_epu64, + _mm_lzcnt_epi64, _mm_maskz_lzcnt_epi64, _mm_conflict_epi64, + _mm_maskz_conflict_epi64, _mm_sra_epi64, _mm_maskz_sra_epi64, + _mm_maskz_sll_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64, + _mm_ror_epi64, _mm_maskz_ror_epi64, _mm_alignr_epi64, + _mm_maskz_alignr_epi64, _mm_srai_epi64, _mm_maskz_slli_epi64): + Likewise. + (_mm_cvtepi32_epi8, _mm256_cvtepi32_epi8, _mm_cvtsepi32_epi8, + _mm256_cvtsepi32_epi8, _mm_cvtusepi32_epi8, _mm256_cvtusepi32_epi8, + _mm_cvtepi32_epi16, _mm256_cvtepi32_epi16, _mm_cvtsepi32_epi16, + _mm256_cvtsepi32_epi16, _mm_cvtusepi32_epi16, _mm256_cvtusepi32_epi16, + _mm_cvtepi64_epi8, _mm256_cvtepi64_epi8, _mm_cvtsepi64_epi8, + _mm256_cvtsepi64_epi8, _mm_cvtusepi64_epi8, _mm256_cvtusepi64_epi8, + _mm_cvtepi64_epi16, _mm256_cvtepi64_epi16, _mm_cvtsepi64_epi16, + _mm256_cvtsepi64_epi16, _mm_cvtusepi64_epi16, _mm256_cvtusepi64_epi16, + _mm_cvtepi64_epi32, _mm256_cvtepi64_epi32, _mm_cvtsepi64_epi32, + _mm256_cvtsepi64_epi32, _mm_cvtusepi64_epi32, _mm256_cvtusepi64_epi32, + _mm_maskz_set1_epi32, _mm_maskz_set1_epi64): Formatting fixes. + (_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128 + instead of _mm_setzero_hi. + (_mm256_permutex_pd, _mm256_maskz_permutex_epi64, _mm256_insertf32x4, + _mm256_maskz_insertf32x4, _mm256_inserti32x4, _mm256_maskz_inserti32x4, + _mm256_extractf32x4_ps, _mm256_maskz_extractf32x4_ps, + _mm256_shuffle_i32x4, _mm256_maskz_shuffle_i32x4, _mm256_shuffle_f64x2, + _mm256_maskz_shuffle_f64x2, _mm256_shuffle_f32x4, + _mm256_maskz_shuffle_f32x4, _mm256_maskz_shuffle_pd, + _mm_maskz_shuffle_pd, _mm256_maskz_shuffle_ps, _mm_maskz_shuffle_ps, + _mm256_maskz_srli_epi32, _mm_maskz_srli_epi32, _mm_maskz_srli_epi64, + _mm256_mask_slli_epi32, _mm256_maskz_slli_epi32, _mm256_mask_slli_epi64, + _mm256_maskz_slli_epi64, _mm256_roundscale_ps, + _mm256_maskz_roundscale_ps, _mm256_roundscale_pd, + _mm256_maskz_roundscale_pd, _mm_roundscale_ps, _mm_maskz_roundscale_ps, + _mm_roundscale_pd, _mm_maskz_roundscale_pd, _mm256_getmant_ps, + _mm256_maskz_getmant_ps, _mm_getmant_ps, _mm_maskz_getmant_ps, + _mm256_getmant_pd, _mm256_maskz_getmant_pd, _mm_getmant_pd, + _mm_maskz_getmant_pd, _mm256_maskz_shuffle_epi32, + _mm_maskz_shuffle_epi32, _mm256_rol_epi32, _mm256_maskz_rol_epi32, + _mm_rol_epi32, _mm_maskz_rol_epi32, _mm256_ror_epi32, + _mm256_maskz_ror_epi32, _mm_ror_epi32, _mm_maskz_ror_epi32, + _mm_maskz_alignr_epi32, _mm_maskz_alignr_epi64, + _mm256_maskz_srai_epi32, _mm_maskz_srai_epi32, _mm_srai_epi64, + _mm_maskz_srai_epi64, _mm256_maskz_permutex_pd, + _mm256_maskz_permute_pd, _mm256_maskz_permute_ps, _mm_maskz_permute_pd, + _mm_maskz_permute_ps, _mm256_permutexvar_ps): Formatting fixes. + (_mm_maskz_slli_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64, + _mm_ror_epi64, _mm_maskz_ror_epi64): Use _mm_setzero_si128 instead of + _mm_setzero_di. + (_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128 + instead of _mm_setzero_hi. + * config/i386/avx512dqintrin.h (_mm512_broadcast_f64x2, + _mm512_broadcast_i64x2, _mm512_broadcast_f32x2, _mm512_broadcast_i32x2, + _mm512_broadcast_f32x8, _mm512_broadcast_i32x8): Formatting fixes. + (_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use + _mm_setzero_si128 instead of _mm_setzero_di. + (_mm512_cvtt_roundpd_epi64, _mm512_mask_cvtt_roundpd_epi64, + _mm512_maskz_cvtt_roundpd_epi64, _mm512_cvtt_roundpd_epu64, + _mm512_mask_cvtt_roundpd_epu64, _mm512_maskz_cvtt_roundpd_epu64, + _mm512_cvtt_roundps_epi64, _mm512_mask_cvtt_roundps_epi64, + _mm512_maskz_cvtt_roundps_epi64, _mm512_cvtt_roundps_epu64, + _mm512_mask_cvtt_roundps_epu64, _mm512_maskz_cvtt_roundps_epu64, + _mm512_cvt_roundpd_epi64, _mm512_mask_cvt_roundpd_epi64, + _mm512_maskz_cvt_roundpd_epi64, _mm512_cvt_roundpd_epu64, + _mm512_mask_cvt_roundpd_epu64, _mm512_maskz_cvt_roundpd_epu64, + _mm512_cvt_roundps_epi64, _mm512_mask_cvt_roundps_epi64, + _mm512_maskz_cvt_roundps_epi64, _mm512_cvt_roundps_epu64, + _mm512_mask_cvt_roundps_epu64, _mm512_maskz_cvt_roundps_epu64, + _mm512_cvt_roundepi64_ps, _mm512_mask_cvt_roundepi64_ps, + _mm512_maskz_cvt_roundepi64_ps, _mm512_cvt_roundepu64_ps, + _mm512_mask_cvt_roundepu64_ps, _mm512_maskz_cvt_roundepu64_ps, + _mm512_cvt_roundepi64_pd, _mm512_mask_cvt_roundepi64_pd, + _mm512_maskz_cvt_roundepi64_pd, _mm512_cvt_roundepu64_pd, + _mm512_mask_cvt_roundepu64_pd, _mm512_maskz_cvt_roundepu64_pd, + _mm512_reduce_pd, _mm512_maskz_reduce_pd, _mm512_reduce_ps, + _mm512_maskz_reduce_ps, _mm512_extractf32x8_ps, + _mm512_maskz_extractf32x8_ps, _mm512_extractf64x2_pd, + _mm512_maskz_extractf64x2_pd, _mm512_extracti32x8_epi32, + _mm512_maskz_extracti32x8_epi32, _mm512_range_pd, + _mm512_maskz_range_pd, _mm512_range_ps, _mm512_maskz_range_ps, + _mm512_range_round_pd, _mm512_maskz_range_round_pd, + _mm512_range_round_ps, _mm512_maskz_range_round_ps, + _mm512_maskz_insertf64x2, _mm512_insertf32x8, + _mm512_maskz_insertf32x8): Formatting fixes. + (_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use + _mm_setzero_si128 instead of _mm_setzero_di. + * config/i386/avx512vldqintrin.h (_mm_cvttpd_epi64, + _mm_cvttpd_epu64, _mm_cvtpd_epi64, _mm_cvtpd_epu64, + _mm_cvttps_epi64, _mm_maskz_cvttps_epi64, _mm_cvttps_epu64, + _mm_maskz_cvttps_epu64, _mm_maskz_mullo_epi64, _mm_cvtps_epi64, + _mm_maskz_cvtps_epi64, _mm_cvtps_epu64, _mm_maskz_cvtps_epu64, + _mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64): Use + _mm_setzero_si128 instead of _mm_setzero_di. + (_mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64): + Likewise in macros. + * config/i386/avx512vlbwintrin.h (_mm_maskz_mov_epi8, + _mm_maskz_loadu_epi16, _mm_maskz_mov_epi16, _mm_maskz_loadu_epi8, + _mm_permutexvar_epi16, _mm_maskz_maddubs_epi16): Use + _mm_setzero_si128 instead of _mm_setzero_hi. + (_mm_maskz_min_epu16, _mm_maskz_max_epu8, _mm_maskz_max_epi8, + _mm_maskz_min_epu8, _mm_maskz_min_epi8, _mm_maskz_max_epi16, + _mm_maskz_max_epu16, _mm_maskz_min_epi16): Use _mm_setzero_si128 + instead of _mm_setzero_di. + (_mm_dbsad_epu8, _mm_maskz_shufflehi_epi16, + _mm_maskz_shufflelo_epi16): Use _mm_setzero_si128 instead of + _mm_setzero_hi. + (_mm_maskz_shufflehi_epi16, _mm_maskz_shufflelo_epi16, + _mm_maskz_slli_epi16): Use _mm_setzero_si128 instead of + _mm_setzero_hi. + (_mm_maskz_alignr_epi8): Use _mm_setzero_si128 instead of + _mm_setzero_di. + (_mm_maskz_mulhi_epi16, _mm_maskz_mulhi_epu16, _mm_maskz_mulhrs_epi16, + _mm_maskz_mullo_epi16, _mm_srav_epi16, _mm_srlv_epi16, + _mm_sllv_epi16): Use _mm_setzero_si128 instead of _mm_setzero_hi. + 2016-11-22 Carl Love * config/rs6000/rs6000-c.c: Add built-in support for vector compare diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index 1dbb6b04e41..4b954f924ed 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -38,10 +38,10 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcast_f64x2 (__m128d __A) { - return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) - __A, - _mm512_undefined_pd(), - (__mmask8) -1); + return (__m512d) + __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, + _mm512_undefined_pd (), + (__mmask8) -1); } extern __inline __m512d @@ -69,10 +69,10 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcast_i64x2 (__m128i __A) { - return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) - __A, - _mm512_undefined_epi32(), - (__mmask8) -1); + return (__m512i) + __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, + _mm512_undefined_epi32 (), + (__mmask8) -1); } extern __inline __m512i @@ -100,9 +100,10 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcast_f32x2 (__m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf)_mm512_undefined_ps(), - (__mmask16) -1); + return (__m512) + __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, + (__v16sf)_mm512_undefined_ps (), + (__mmask16) -1); } extern __inline __m512 @@ -128,10 +129,11 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcast_i32x2 (__m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) - __A, - (__v16si)_mm512_undefined_epi32(), - (__mmask16) -1); + return (__m512i) + __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, + (__v16si) + _mm512_undefined_epi32 (), + (__mmask16) -1); } extern __inline __m512i @@ -159,9 +161,10 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcast_f32x8 (__m256 __A) { - return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, - _mm512_undefined_ps(), - (__mmask16) -1); + return (__m512) + __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, + _mm512_undefined_ps (), + (__mmask16) -1); } extern __inline __m512 @@ -187,10 +190,11 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcast_i32x8 (__m256i __A) { - return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) - __A, - (__v16si)_mm512_undefined_epi32(), - (__mmask16) -1); + return (__m512i) + __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, + (__v16si) + _mm512_undefined_epi32 (), + (__mmask16) -1); } extern __inline __m512i @@ -1632,7 +1636,7 @@ _mm512_extracti64x2_epi64 (__m512i __A, const int __imm) return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A, __imm, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -1656,7 +1660,7 @@ _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A, return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A, __imm, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -1946,116 +1950,118 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) (__v4sf)(__m128)(B), (int)(C), (R))) #define _mm512_cvtt_roundpd_epi64(A, B) \ - ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) + ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \ + _mm512_setzero_si512 (), \ + -1, (B))) #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \ - ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)(W), (U), (B))) + ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B))) #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \ - ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) + ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) #define _mm512_cvtt_roundpd_epu64(A, B) \ - ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) + ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \ - ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)(W), (U), (B))) + ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B))) #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \ - ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) + ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) #define _mm512_cvtt_roundps_epi64(A, B) \ - ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) + ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \ - ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)(W), (U), (B))) + ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B))) #define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \ - ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) + ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) #define _mm512_cvtt_roundps_epu64(A, B) \ - ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) + ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \ - ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)(W), (U), (B))) + ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B))) #define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \ - ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) + ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) #define _mm512_cvt_roundpd_epi64(A, B) \ - ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) + ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \ - ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)(W), (U), (B))) + ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B))) #define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \ - ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) + ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) #define _mm512_cvt_roundpd_epu64(A, B) \ - ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) + ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \ - ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)(W), (U), (B))) + ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B))) #define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \ - ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) + ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) #define _mm512_cvt_roundps_epi64(A, B) \ - ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) + ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) #define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \ - ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)(W), (U), (B))) + ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B))) #define _mm512_maskz_cvt_roundps_epi64(U, A, B) \ - ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) + ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) #define _mm512_cvt_roundps_epu64(A, B) \ - ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) + ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) #define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \ - ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)(W), (U), (B))) + ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B))) #define _mm512_maskz_cvt_roundps_epu64(U, A, B) \ - ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) + ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) #define _mm512_cvt_roundepi64_ps(A, B) \ - ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B))) + ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B))) #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \ - ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (W), (U), (B))) + ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B))) #define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \ - ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B))) + ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B))) #define _mm512_cvt_roundepu64_ps(A, B) \ - ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B))) + ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B))) #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \ - ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (W), (U), (B))) + ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B))) #define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \ - ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B))) + ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B))) #define _mm512_cvt_roundepi64_pd(A, B) \ - ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B))) + ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B))) #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \ - ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (W), (U), (B))) + ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B))) #define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \ - ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B))) + ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B))) #define _mm512_cvt_roundepu64_pd(A, B) \ - ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B))) + ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B))) #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \ - ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (W), (U), (B))) + ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B))) #define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \ - ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B))) + ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B))) #define _mm512_reduce_pd(A, B) \ ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \ - (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)-1)) + (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1)) #define _mm512_mask_reduce_pd(W, U, A, B) \ ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \ @@ -2063,11 +2069,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_reduce_pd(U, A, B) \ ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \ - (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)(U))) + (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U))) #define _mm512_reduce_ps(A, B) \ ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \ - (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)-1)) + (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1)) #define _mm512_mask_reduce_ps(W, U, A, B) \ ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \ @@ -2075,11 +2081,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_reduce_ps(U, A, B) \ ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \ - (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U))) + (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U))) #define _mm512_extractf32x8_ps(X, C) \ ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \ - (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8)-1)) + (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1)) #define _mm512_mask_extractf32x8_ps(W, U, X, C) \ ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \ @@ -2087,11 +2093,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_extractf32x8_ps(U, X, C) \ ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \ - (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8) (U))) + (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U))) #define _mm512_extractf64x2_pd(X, C) \ ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\ - (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1)) + (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1)) #define _mm512_mask_extractf64x2_pd(W, U, X, C) \ ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\ @@ -2099,11 +2105,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_extractf64x2_pd(U, X, C) \ ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\ - (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U))) + (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U))) #define _mm512_extracti32x8_epi32(X, C) \ ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \ - (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8)-1)) + (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1)) #define _mm512_mask_extracti32x8_epi32(W, U, X, C) \ ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \ @@ -2111,11 +2117,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_extracti32x8_epi32(U, X, C) \ ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \ - (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8) (U))) + (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U))) #define _mm512_extracti64x2_epi64(X, C) \ ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\ - (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1)) + (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1)) #define _mm512_mask_extracti64x2_epi64(W, U, X, C) \ ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\ @@ -2123,12 +2129,12 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_extracti64x2_epi64(U, X, C) \ ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\ - (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U))) + (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) #define _mm512_range_pd(A, B, C) \ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) + (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_range_pd(W, U, A, B, C) \ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ @@ -2138,12 +2144,12 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_range_pd(U, A, B, C) \ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm512_range_ps(A, B, C) \ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) + (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_range_ps(W, U, A, B, C) \ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ @@ -2153,12 +2159,12 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_range_ps(U, A, B, C) \ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm512_range_round_pd(A, B, C, R) \ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)_mm512_setzero_pd(), (__mmask8)-1, (R))) + (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R))) #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ @@ -2168,12 +2174,12 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (R))) + (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R))) #define _mm512_range_round_ps(A, B, C, R) \ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, (R))) + (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R))) #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ @@ -2183,7 +2189,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (R))) + (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R))) #define _mm512_insertf64x2(X, Y, C) \ ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\ @@ -2198,7 +2204,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_insertf64x2(U, X, Y, C) \ ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\ (__v2df)(__m128d) (Y), (int) (C), \ - (__v8df)(__m512d) _mm512_setzero_pd(), (__mmask8) (U))) + (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U))) #define _mm512_inserti64x2(X, Y, C) \ ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\ @@ -2217,7 +2223,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_insertf32x8(X, Y, C) \ ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \ (__v8sf)(__m256) (Y), (int) (C),\ - (__v16sf)(__m512)_mm512_setzero_ps(),\ + (__v16sf)(__m512)_mm512_setzero_ps (),\ (__mmask16)-1)) #define _mm512_mask_insertf32x8(W, U, X, Y, C) \ @@ -2229,7 +2235,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm512_maskz_insertf32x8(U, X, Y, C) \ ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \ (__v8sf)(__m256) (Y), (int) (C),\ - (__v16sf)(__m512)_mm512_setzero_ps(),\ + (__v16sf)(__m512)_mm512_setzero_ps (),\ (__mmask16)(U))) #define _mm512_inserti32x8(X, Y, C) \ diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 02bbed0a8b9..eb384d65a2d 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -69,7 +69,7 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A, (__v16qi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask16) __U); } @@ -125,7 +125,7 @@ _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -164,7 +164,7 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -202,7 +202,7 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P, (__v16qi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask16) __U); } @@ -541,7 +541,7 @@ _mm_permutexvar_epi16 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, (__v8hi) __A, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -707,7 +707,7 @@ _mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y) return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X, (__v16qi) __Y, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -908,7 +908,7 @@ _mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __M); } @@ -974,7 +974,7 @@ _mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A, (__v16qi) __B, (__v16qi) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask16) __M); } @@ -1018,7 +1018,7 @@ _mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A, (__v16qi) __B, (__v16qi) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask16) __M); } @@ -1062,7 +1062,7 @@ _mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A, (__v16qi) __B, (__v16qi) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask16) __M); } @@ -1106,7 +1106,7 @@ _mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A, (__v16qi) __B, (__v16qi) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask16) __M); } @@ -1150,7 +1150,7 @@ _mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __M); } @@ -1194,7 +1194,7 @@ _mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __M); } @@ -1216,7 +1216,7 @@ _mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __M); } @@ -1327,7 +1327,7 @@ _mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm) (__v16qi) __B, __imm, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -1623,7 +1623,7 @@ _mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm) { return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -1666,7 +1666,7 @@ _mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm) { return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -1804,7 +1804,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) #define _mm_maskz_shufflehi_epi16(U, A, B) \ ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \ - (__v8hi)(__m128i)_mm_setzero_hi(), \ + (__v8hi)(__m128i)_mm_setzero_si128 (), \ (__mmask8)(U))) #define _mm256_mask_shufflelo_epi16(W, U, A, B) \ @@ -1824,7 +1824,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) #define _mm_maskz_shufflelo_epi16(U, A, B) \ ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \ - (__v8hi)(__m128i)_mm_setzero_hi(), \ + (__v8hi)(__m128i)_mm_setzero_si128 (), \ (__mmask8)(U))) #define _mm256_maskz_alignr_epi8(U, X, Y, N) \ @@ -1841,7 +1841,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) #define _mm_maskz_alignr_epi8(U, X, Y, N) \ ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \ (__v2di)(__m128i)(Y), (int)(N * 8), \ - (__v2di)(__m128i)_mm_setzero_di(), \ + (__v2di)(__m128i)_mm_setzero_si128 (), \ (__mmask16)(U))) #define _mm_mask_slli_epi16(W, U, X, C) \ @@ -1851,7 +1851,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) #define _mm_maskz_slli_epi16(U, X, C) \ ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\ - (__v8hi)(__m128i)_mm_setzero_hi(),\ + (__v8hi)(__m128i)_mm_setzero_si128 (),\ (__mmask8)(U))) #define _mm256_dbsad_epu8(X, Y, C) \ @@ -2301,7 +2301,7 @@ _mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -2323,7 +2323,7 @@ _mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -2345,7 +2345,7 @@ _mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y) return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X, (__v8hi) __Y, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -2389,7 +2389,7 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -4067,7 +4067,7 @@ _mm_srav_epi16 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -4133,7 +4133,7 @@ _mm_srlv_epi16 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -4199,7 +4199,7 @@ _mm_sllv_epi16 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A, (__v8hi) __B, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) -1); } diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h index 5ff0a526399..cd0b7143b46 100644 --- a/gcc/config/i386/avx512vldqintrin.h +++ b/gcc/config/i386/avx512vldqintrin.h @@ -69,7 +69,7 @@ _mm_cvttpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -127,7 +127,7 @@ _mm_cvttpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -185,7 +185,7 @@ _mm_cvtpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -243,7 +243,7 @@ _mm_cvtpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -301,7 +301,7 @@ _mm_cvttps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -320,7 +320,7 @@ _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -359,7 +359,7 @@ _mm_cvttps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -378,7 +378,7 @@ _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -588,7 +588,7 @@ _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -714,7 +714,7 @@ _mm_cvtps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -733,7 +733,7 @@ _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -772,7 +772,7 @@ _mm_cvtps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -791,7 +791,7 @@ _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -1381,7 +1381,7 @@ _mm256_extracti64x2_epi64 (__m256i __A, const int __imm) return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, __imm, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -1405,7 +1405,7 @@ _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A, return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, __imm, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -1856,7 +1856,7 @@ _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B, #define _mm256_extracti64x2_epi64(X, C) \ ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ - (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1)) + (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1)) #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \ ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ @@ -1864,7 +1864,7 @@ _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B, #define _mm256_maskz_extracti64x2_epi64(U, X, C) \ ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ - (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U))) + (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) #define _mm256_reduce_pd(A, B) \ ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index f339ed3e862..f83bfe25f19 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -28,14 +28,6 @@ #ifndef _AVX512VLINTRIN_H_INCLUDED #define _AVX512VLINTRIN_H_INCLUDED -/* Doesn't require avx512vl target and is used in avx512dqintrin.h. */ -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_setzero_di (void) -{ - return __extension__ (__m128i)(__v2di){ 0LL, 0LL}; -} - #ifndef __AVX512VL__ #pragma GCC push_options #pragma GCC target("avx512vl") @@ -267,7 +259,7 @@ _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -308,7 +300,7 @@ _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -429,15 +421,6 @@ _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) (__mmask8) __U); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_setzero_hi (void) -{ - return __extension__ (__m128i) (__v8hi) - { - 0, 0, 0, 0, 0, 0, 0, 0}; -} - extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -768,7 +751,7 @@ _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -919,7 +902,7 @@ _mm_abs_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -938,7 +921,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -1465,7 +1448,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1499,7 +1483,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1533,7 +1518,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1567,7 +1553,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtsepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1601,7 +1588,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtusepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1636,7 +1624,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtusepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1671,7 +1660,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, - (__v8hi) _mm_setzero_si128 (), + (__v8hi) + _mm_setzero_si128 (), (__mmask8) -1); } @@ -1705,7 +1695,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, - (__v8hi)_mm_setzero_si128 (), + (__v8hi) + _mm_setzero_si128 (), (__mmask8) -1); } @@ -1739,7 +1730,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, - (__v8hi)_mm_setzero_si128 (), + (__v8hi) + _mm_setzero_si128 (), (__mmask8) -1); } @@ -1774,7 +1766,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtsepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, - (__v8hi)_mm_undefined_si128(), + (__v8hi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1808,7 +1801,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtusepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, - (__v8hi)_mm_undefined_si128(), + (__v8hi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1842,7 +1836,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtusepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, - (__v8hi)_mm_undefined_si128(), + (__v8hi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1876,7 +1871,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1910,7 +1906,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1944,7 +1941,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -1978,7 +1976,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtsepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2012,7 +2011,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtusepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2047,7 +2047,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtusepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, - (__v16qi)_mm_undefined_si128(), + (__v16qi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2082,7 +2083,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, - (__v8hi)_mm_undefined_si128(), + (__v8hi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2117,7 +2119,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, - (__v8hi)_mm_undefined_si128(), + (__v8hi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2151,7 +2154,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, - (__v8hi)_mm_undefined_si128(), + (__v8hi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2185,7 +2189,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtsepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, - (__v8hi)_mm_undefined_si128(), + (__v8hi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2219,7 +2224,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtusepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, - (__v8hi)_mm_undefined_si128(), + (__v8hi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2253,7 +2259,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtusepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, - (__v8hi)_mm_undefined_si128(), + (__v8hi) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2287,7 +2294,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, - (__v4si)_mm_undefined_si128(), + (__v4si) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2321,7 +2329,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si)_mm_undefined_si128(), + (__v4si) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2355,7 +2364,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, - (__v4si)_mm_undefined_si128(), + (__v4si) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2389,7 +2399,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtsepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, - (__v4si)_mm_undefined_si128(), + (__v4si) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2424,7 +2435,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtusepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, - (__v4si)_mm_undefined_si128(), + (__v4si) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2458,7 +2470,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtusepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, - (__v4si)_mm_undefined_si128(), + (__v4si) + _mm_undefined_si128 (), (__mmask8) -1); } @@ -2612,10 +2625,10 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_set1_epi32 (__mmask8 __M, int __A) { - return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, - (__v4si) - _mm_setzero_si128 (), - __M); + return (__m128i) + __builtin_ia32_pbroadcastd128_gpr_mask (__A, + (__v4si) _mm_setzero_si128 (), + __M); } extern __inline __m256i @@ -2686,10 +2699,10 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) { - return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, - (__v2di) - _mm_setzero_si128 (), - __M); + return (__m128i) + __builtin_ia32_pbroadcastq128_gpr_mask (__A, + (__v2di) _mm_setzero_si128 (), + __M); } extern __inline __m256 @@ -3815,7 +3828,7 @@ _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -5217,7 +5230,7 @@ _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -5305,7 +5318,7 @@ _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -5894,7 +5907,7 @@ _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -6678,7 +6691,7 @@ _mm_srav_epi64 (__m128i __X, __m128i __Y) return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, (__v2di) __Y, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -6700,7 +6713,7 @@ _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, (__v2di) __Y, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -6788,7 +6801,7 @@ _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X, (__v2di) __Y, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -6920,7 +6933,7 @@ _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X, (__v2di) __Y, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -7096,7 +7109,7 @@ _mm_rolv_epi64 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -7118,7 +7131,7 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -7162,7 +7175,7 @@ _mm_rorv_epi64 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -7184,7 +7197,7 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -7972,7 +7985,7 @@ _mm_min_epi64 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -8015,7 +8028,7 @@ _mm_max_epi64 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -8026,7 +8039,7 @@ _mm_max_epu64 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -8047,7 +8060,7 @@ _mm_min_epu64 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -8345,7 +8358,7 @@ _mm_lzcnt_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -8364,7 +8377,7 @@ _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -8374,7 +8387,7 @@ _mm_conflict_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -8394,7 +8407,7 @@ _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -8730,7 +8743,7 @@ _mm_sra_epi64 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -8752,7 +8765,7 @@ _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -8796,7 +8809,7 @@ _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -10923,7 +10936,7 @@ _mm_rol_epi64 (__m128i __A, const int __B) { return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -10943,7 +10956,7 @@ _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B) { return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -10983,7 +10996,7 @@ _mm_ror_epi64 (__m128i __A, const int __B) { return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -11003,7 +11016,7 @@ _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B) { return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -11048,7 +11061,7 @@ _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm) return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, (__v2di) __B, __imm, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -11071,7 +11084,7 @@ _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B, return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, (__v2di) __B, __imm, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -11159,7 +11172,7 @@ _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I) { return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -11179,7 +11192,7 @@ _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I) { return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -11259,7 +11272,7 @@ _mm_srai_epi64 (__m128i __A, const int __imm) { return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -11317,7 +11330,7 @@ _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B) { return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -12350,14 +12363,15 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #else #define _mm256_permutex_pd(X, M) \ ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \ - (__v4df)(__m256d)_mm256_undefined_pd(),\ + (__v4df)(__m256d) \ + _mm256_undefined_pd (), \ (__mmask8)-1)) #define _mm256_maskz_permutex_epi64(M, X, I) \ ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \ (int)(I), \ (__v4di)(__m256i) \ - (_mm256_setzero_si256()),\ + (_mm256_setzero_si256 ()),\ (__mmask8)(M))) #define _mm256_mask_permutex_epi64(W, M, X, I) \ @@ -12369,7 +12383,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_insertf32x4(X, Y, C) \ ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \ (__v4sf)(__m128) (Y), (int) (C), \ - (__v8sf)(__m256)_mm256_setzero_ps(), \ + (__v8sf)(__m256)_mm256_setzero_ps (), \ (__mmask8)-1)) #define _mm256_mask_insertf32x4(W, U, X, Y, C) \ @@ -12381,13 +12395,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_insertf32x4(U, X, Y, C) \ ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \ (__v4sf)(__m128) (Y), (int) (C), \ - (__v8sf)(__m256)_mm256_setzero_ps(), \ + (__v8sf)(__m256)_mm256_setzero_ps (), \ (__mmask8)(U))) #define _mm256_inserti32x4(X, Y, C) \ ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\ (__v4si)(__m128i) (Y), (int) (C), \ - (__v8si)(__m256i)_mm256_setzero_si256(), \ + (__v8si)(__m256i)_mm256_setzero_si256 (), \ (__mmask8)-1)) #define _mm256_mask_inserti32x4(W, U, X, Y, C) \ @@ -12399,13 +12413,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_inserti32x4(U, X, Y, C) \ ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\ (__v4si)(__m128i) (Y), (int) (C), \ - (__v8si)(__m256i)_mm256_setzero_si256(), \ + (__v8si)(__m256i)_mm256_setzero_si256 (), \ (__mmask8)(U))) #define _mm256_extractf32x4_ps(X, C) \ ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \ (int) (C), \ - (__v4sf)(__m128)_mm_setzero_ps(), \ + (__v4sf)(__m128)_mm_setzero_ps (), \ (__mmask8)-1)) #define _mm256_mask_extractf32x4_ps(W, U, X, C) \ @@ -12417,7 +12431,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_extractf32x4_ps(U, X, C) \ ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \ (int) (C), \ - (__v4sf)(__m128)_mm_setzero_ps(), \ + (__v4sf)(__m128)_mm_setzero_ps (), \ (__mmask8)(U))) #define _mm256_extracti32x4_epi32(X, C) \ @@ -12453,7 +12467,8 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_shuffle_i32x4(X, Y, C) \ ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \ (__v8si)(__m256i)(Y), (int)(C), \ - (__v8si)(__m256i)_mm256_setzero_si256(), \ + (__v8si)(__m256i) \ + _mm256_setzero_si256 (), \ (__mmask8)-1)) #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \ @@ -12465,13 +12480,14 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \ ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \ (__v8si)(__m256i)(Y), (int)(C), \ - (__v8si)(__m256i)_mm256_setzero_si256(), \ + (__v8si)(__m256i) \ + _mm256_setzero_si256 (), \ (__mmask8)(U))) #define _mm256_shuffle_f64x2(X, Y, C) \ ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), (int)(C), \ - (__v4df)(__m256d)_mm256_setzero_pd(), \ + (__v4df)(__m256d)_mm256_setzero_pd (),\ (__mmask8)-1)) #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \ @@ -12483,13 +12499,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \ ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), (int)(C), \ - (__v4df)(__m256d)_mm256_setzero_pd(), \ + (__v4df)(__m256d)_mm256_setzero_pd( ),\ (__mmask8)(U))) #define _mm256_shuffle_f32x4(X, Y, C) \ ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), (int)(C), \ - (__v8sf)(__m256)_mm256_setzero_ps(), \ + (__v8sf)(__m256)_mm256_setzero_ps (), \ (__mmask8)-1)) #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \ @@ -12501,7 +12517,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \ ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), (int)(C), \ - (__v8sf)(__m256)_mm256_setzero_ps(), \ + (__v8sf)(__m256)_mm256_setzero_ps (), \ (__mmask8)(U))) #define _mm256_mask_shuffle_pd(W, U, A, B, C) \ @@ -12513,7 +12529,8 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_shuffle_pd(U, A, B, C) \ ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)(__m256d)_mm256_setzero_pd(),\ + (__v4df)(__m256d) \ + _mm256_setzero_pd (), \ (__mmask8)(U))) #define _mm_mask_shuffle_pd(W, U, A, B, C) \ @@ -12525,7 +12542,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_shuffle_pd(U, A, B, C) \ ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ - (__v2df)(__m128d)_mm_setzero_pd(), \ + (__v2df)(__m128d)_mm_setzero_pd (), \ (__mmask8)(U))) #define _mm256_mask_shuffle_ps(W, U, A, B, C) \ @@ -12537,7 +12554,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_shuffle_ps(U, A, B, C) \ ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)(__m256)_mm256_setzero_ps(), \ + (__v8sf)(__m256)_mm256_setzero_ps (),\ (__mmask8)(U))) #define _mm_mask_shuffle_ps(W, U, A, B, C) \ @@ -12549,7 +12566,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_shuffle_ps(U, A, B, C) \ ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ - (__v4sf)(__m128)_mm_setzero_ps(), \ + (__v4sf)(__m128)_mm_setzero_ps (), \ (__mmask8)(U))) #define _mm256_fixupimm_pd(X, Y, Z, C) \ @@ -12632,7 +12649,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_srli_epi32(U, A, B) \ ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \ - (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U))) + (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U))) #define _mm_mask_srli_epi32(W, U, A, B) \ ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \ @@ -12640,7 +12657,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_srli_epi32(U, A, B) \ ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \ - (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U))) + (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U))) #define _mm256_mask_srli_epi64(W, U, A, B) \ ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \ @@ -12656,26 +12673,26 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_srli_epi64(U, A, B) \ ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \ - (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U))) + (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U))) #define _mm256_mask_slli_epi32(W, U, X, C) \ ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\ - (__v8si)(__m256i)(W),\ + (__v8si)(__m256i)(W), \ (__mmask8)(U))) #define _mm256_maskz_slli_epi32(U, X, C) \ ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\ - (__v8si)(__m256i)_mm256_setzero_si256(),\ + (__v8si)(__m256i)_mm256_setzero_si256 (), \ (__mmask8)(U))) #define _mm256_mask_slli_epi64(W, U, X, C) \ ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\ - (__v4di)(__m256i)(W),\ + (__v4di)(__m256i)(W), \ (__mmask8)(U))) #define _mm256_maskz_slli_epi64(U, X, C) \ ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\ - (__v4di)(__m256i)_mm256_setzero_si256 (),\ + (__v4di)(__m256i)_mm256_setzero_si256 (), \ (__mmask8)(U))) #define _mm_mask_slli_epi32(W, U, X, C) \ @@ -12695,7 +12712,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_slli_epi64(U, X, C) \ ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\ - (__v2di)(__m128i)_mm_setzero_di(),\ + (__v2di)(__m128i)_mm_setzero_si128 (),\ (__mmask8)(U))) #define _mm256_ternarylogic_epi64(A, B, C, I) \ @@ -12748,7 +12765,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_roundscale_ps(A, B) \ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \ - (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1)) + (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1)) #define _mm256_mask_roundscale_ps(W, U, A, B) \ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \ @@ -12756,11 +12773,11 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_roundscale_ps(U, A, B) \ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \ - (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U))) + (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U))) #define _mm256_roundscale_pd(A, B) \ ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \ - (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1)) + (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1)) #define _mm256_mask_roundscale_pd(W, U, A, B) \ ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \ @@ -12768,11 +12785,11 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_roundscale_pd(U, A, B) \ ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \ - (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U))) + (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U))) #define _mm_roundscale_ps(A, B) \ ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \ - (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1)) + (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1)) #define _mm_mask_roundscale_ps(W, U, A, B) \ ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \ @@ -12780,11 +12797,11 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_roundscale_ps(U, A, B) \ ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \ - (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U))) + (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U))) #define _mm_roundscale_pd(A, B) \ ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \ - (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1)) + (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1)) #define _mm_mask_roundscale_pd(W, U, A, B) \ ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \ @@ -12792,12 +12809,12 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_roundscale_pd(U, A, B) \ ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \ - (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U))) + (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U))) #define _mm256_getmant_ps(X, B, C) \ ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \ (int)(((C)<<2) | (B)), \ - (__v8sf)(__m256)_mm256_setzero_ps(), \ + (__v8sf)(__m256)_mm256_setzero_ps (), \ (__mmask8)-1)) #define _mm256_mask_getmant_ps(W, U, X, B, C) \ @@ -12809,13 +12826,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_getmant_ps(U, X, B, C) \ ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \ (int)(((C)<<2) | (B)), \ - (__v8sf)(__m256)_mm256_setzero_ps(), \ + (__v8sf)(__m256)_mm256_setzero_ps (), \ (__mmask8)(U))) #define _mm_getmant_ps(X, B, C) \ ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \ (int)(((C)<<2) | (B)), \ - (__v4sf)(__m128)_mm_setzero_ps(), \ + (__v4sf)(__m128)_mm_setzero_ps (), \ (__mmask8)-1)) #define _mm_mask_getmant_ps(W, U, X, B, C) \ @@ -12827,13 +12844,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_getmant_ps(U, X, B, C) \ ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \ (int)(((C)<<2) | (B)), \ - (__v4sf)(__m128)_mm_setzero_ps(), \ + (__v4sf)(__m128)_mm_setzero_ps (), \ (__mmask8)(U))) #define _mm256_getmant_pd(X, B, C) \ ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \ (int)(((C)<<2) | (B)), \ - (__v4df)(__m256d)_mm256_setzero_pd(), \ + (__v4df)(__m256d)_mm256_setzero_pd (),\ (__mmask8)-1)) #define _mm256_mask_getmant_pd(W, U, X, B, C) \ @@ -12845,13 +12862,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_getmant_pd(U, X, B, C) \ ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \ (int)(((C)<<2) | (B)), \ - (__v4df)(__m256d)_mm256_setzero_pd(), \ + (__v4df)(__m256d)_mm256_setzero_pd (),\ (__mmask8)(U))) #define _mm_getmant_pd(X, B, C) \ ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \ (int)(((C)<<2) | (B)), \ - (__v2df)(__m128d)_mm_setzero_pd(), \ + (__v2df)(__m128d)_mm_setzero_pd (), \ (__mmask8)-1)) #define _mm_mask_getmant_pd(W, U, X, B, C) \ @@ -12863,7 +12880,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_getmant_pd(U, X, B, C) \ ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \ (int)(((C)<<2) | (B)), \ - (__v2df)(__m128d)_mm_setzero_pd(), \ + (__v2df)(__m128d)_mm_setzero_pd (), \ (__mmask8)(U))) #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ @@ -13129,7 +13146,8 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_shuffle_epi32(U, X, C) \ ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \ - (__v8si)(__m256i)_mm256_setzero_si256(), \ + (__v8si)(__m256i) \ + _mm256_setzero_si256 (), \ (__mmask8)(U))) #define _mm_mask_shuffle_epi32(W, U, X, C) \ @@ -13139,7 +13157,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_shuffle_epi32(U, X, C) \ ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \ - (__v4si)(__m128i)_mm_setzero_si128 (), \ + (__v4si)(__m128i)_mm_setzero_si128 (), \ (__mmask8)(U))) #define _mm256_rol_epi64(A, B) \ @@ -13159,7 +13177,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_rol_epi64(A, B) \ ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)(__m128i)_mm_setzero_di(), \ + (__v2di)(__m128i)_mm_setzero_si128 (),\ (__mmask8)-1)) #define _mm_mask_rol_epi64(W, U, A, B) \ @@ -13169,7 +13187,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_rol_epi64(U, A, B) \ ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)(__m128i)_mm_setzero_di(), \ + (__v2di)(__m128i)_mm_setzero_si128 (),\ (__mmask8)(U))) #define _mm256_ror_epi64(A, B) \ @@ -13189,7 +13207,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_ror_epi64(A, B) \ ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)(__m128i)_mm_setzero_di(), \ + (__v2di)(__m128i)_mm_setzero_si128 (),\ (__mmask8)-1)) #define _mm_mask_ror_epi64(W, U, A, B) \ @@ -13199,12 +13217,12 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_ror_epi64(U, A, B) \ ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)(__m128i)_mm_setzero_di(), \ + (__v2di)(__m128i)_mm_setzero_si128 (),\ (__mmask8)(U))) #define _mm256_rol_epi32(A, B) \ ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \ - (__v8si)(__m256i)_mm256_setzero_si256(),\ + (__v8si)(__m256i)_mm256_setzero_si256 (),\ (__mmask8)-1)) #define _mm256_mask_rol_epi32(W, U, A, B) \ @@ -13214,12 +13232,12 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_rol_epi32(U, A, B) \ ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \ - (__v8si)(__m256i)_mm256_setzero_si256(),\ + (__v8si)(__m256i)_mm256_setzero_si256 (),\ (__mmask8)(U))) #define _mm_rol_epi32(A, B) \ ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \ - (__v4si)(__m128i)_mm_setzero_si128 (), \ + (__v4si)(__m128i)_mm_setzero_si128 (),\ (__mmask8)-1)) #define _mm_mask_rol_epi32(W, U, A, B) \ @@ -13229,12 +13247,12 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_rol_epi32(U, A, B) \ ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \ - (__v4si)(__m128i)_mm_setzero_si128 (), \ + (__v4si)(__m128i)_mm_setzero_si128 (),\ (__mmask8)(U))) #define _mm256_ror_epi32(A, B) \ ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \ - (__v8si)(__m256i)_mm256_setzero_si256(),\ + (__v8si)(__m256i)_mm256_setzero_si256 (),\ (__mmask8)-1)) #define _mm256_mask_ror_epi32(W, U, A, B) \ @@ -13244,12 +13262,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_ror_epi32(U, A, B) \ ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \ - (__v8si)(__m256i)_mm256_setzero_si256(),\ + (__v8si)(__m256i) \ + _mm256_setzero_si256 (), \ (__mmask8)(U))) #define _mm_ror_epi32(A, B) \ ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \ - (__v4si)(__m128i)_mm_setzero_si128 (), \ + (__v4si)(__m128i)_mm_setzero_si128 (),\ (__mmask8)-1)) #define _mm_mask_ror_epi32(W, U, A, B) \ @@ -13259,7 +13278,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_ror_epi32(U, A, B) \ ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \ - (__v4si)(__m128i)_mm_setzero_si128 (), \ + (__v4si)(__m128i)_mm_setzero_si128 (),\ (__mmask8)(U))) #define _mm256_alignr_epi32(X, Y, C) \ @@ -13298,7 +13317,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_alignr_epi32(U, X, Y, C) \ ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \ - (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\ + (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\ (__mmask8)(U))) #define _mm_alignr_epi64(X, Y, C) \ @@ -13311,7 +13330,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_alignr_epi64(U, X, Y, C) \ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ - (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\ + (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\ (__mmask8)(U))) #define _mm_mask_cvtps_ph(W, U, A, I) \ @@ -13320,7 +13339,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_cvtps_ph(U, A, I) \ ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \ - (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U))) + (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) #define _mm256_mask_cvtps_ph(W, U, A, I) \ ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \ @@ -13328,7 +13347,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_cvtps_ph(U, A, I) \ ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \ - (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U))) + (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) #define _mm256_mask_srai_epi32(W, U, A, B) \ ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \ @@ -13336,7 +13355,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_srai_epi32(U, A, B) \ ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \ - (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U))) + (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U))) #define _mm_mask_srai_epi32(W, U, A, B) \ ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \ @@ -13344,7 +13363,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_srai_epi32(U, A, B) \ ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \ - (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U))) + (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U))) #define _mm256_srai_epi64(A, B) \ ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \ @@ -13360,7 +13379,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_srai_epi64(A, B) \ ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \ - (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1)) + (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1)) #define _mm_mask_srai_epi64(W, U, A, B) \ ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \ @@ -13368,7 +13387,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_srai_epi64(U, A, B) \ ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \ - (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U))) + (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U))) #define _mm256_mask_permutex_pd(W, U, A, B) \ ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \ @@ -13376,7 +13395,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_permutex_pd(U, A, B) \ ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \ - (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U))) + (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U))) #define _mm256_mask_permute_pd(W, U, X, C) \ ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \ @@ -13385,7 +13404,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_permute_pd(U, X, C) \ ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \ - (__v4df)(__m256d)_mm256_setzero_pd(), \ + (__v4df)(__m256d)_mm256_setzero_pd (),\ (__mmask8)(U))) #define _mm256_mask_permute_ps(W, U, X, C) \ @@ -13394,7 +13413,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm256_maskz_permute_ps(U, X, C) \ ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \ - (__v8sf)(__m256)_mm256_setzero_ps(), \ + (__v8sf)(__m256)_mm256_setzero_ps (), \ (__mmask8)(U))) #define _mm_mask_permute_pd(W, U, X, C) \ @@ -13403,7 +13422,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_permute_pd(U, X, C) \ ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \ - (__v2df)(__m128d)_mm_setzero_pd(), \ + (__v2df)(__m128d)_mm_setzero_pd (), \ (__mmask8)(U))) #define _mm_mask_permute_ps(W, U, X, C) \ @@ -13412,7 +13431,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #define _mm_maskz_permute_ps(U, X, C) \ ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \ - (__v4sf)(__m128)_mm_setzero_ps(), \ + (__v4sf)(__m128)_mm_setzero_ps (), \ (__mmask8)(U))) #define _mm256_mask_blend_pd(__U, __A, __W) \ @@ -13577,7 +13596,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) #endif -#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) +#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A)) #ifdef __DISABLE_AVX512VL__ #undef __DISABLE_AVX512VL__ -- 2.30.2