From 78cef09019cc9c80d1b39a49861f8827a2ee2e60 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 29 Apr 2020 17:30:22 +0200 Subject: [PATCH] x86: Fix -O0 intrinsic *gather*/*scatter* macros [PR94832] As reported in the PR, while most intrinsic -O0 macro argument uses are properly wrapped in ()s or used in context where having a complex expression passed as the argument doesn't pose a problem (e.g. when macro argument use is in between commas, or between ( and comma, or between comma and ) etc.), especially the gather/scatter macros don't do this and if one passes to some macro e.g. x + y as argument, the corresponding inline function would do cast on the argument, but the macro does (int) ARG, then it is (int) x + y rather than (int) (x + y). The following patch fixes those issues in *gather/*scatter*; additionally, the AVX2 macros were passing incorrect mask of e.g. (__v2df)_mm_set1_pd((double)(long long int) -1) which is IMHO equivalent to (__v2df){-1.0, -1.0} when it really wants to pass __v2df vector with all bits set. I've used what the inline functions use for those cases. 2020-04-29 Jakub Jelinek PR target/94832 * config/i386/avx2intrin.h (_mm_mask_i32gather_pd, _mm256_mask_i32gather_pd, _mm_mask_i64gather_pd, _mm256_mask_i64gather_pd, _mm_mask_i32gather_ps, _mm256_mask_i32gather_ps, _mm_mask_i64gather_ps, _mm256_mask_i64gather_ps, _mm_i32gather_epi64, _mm_mask_i32gather_epi64, _mm256_i32gather_epi64, _mm256_mask_i32gather_epi64, _mm_i64gather_epi64, _mm_mask_i64gather_epi64, _mm256_i64gather_epi64, _mm256_mask_i64gather_epi64, _mm_i32gather_epi32, _mm_mask_i32gather_epi32, _mm256_i32gather_epi32, _mm256_mask_i32gather_epi32, _mm_i64gather_epi32, _mm_mask_i64gather_epi32, _mm256_i64gather_epi32, _mm256_mask_i64gather_epi32): Surround macro parameter uses with parens. (_mm_i32gather_pd, _mm256_i32gather_pd, _mm_i64gather_pd, _mm256_i64gather_pd, _mm_i32gather_ps, _mm256_i32gather_ps, _mm_i64gather_ps, _mm256_i64gather_ps): Likewise. Don't use as mask vector containing -1.0 or -1.0f elts, but instead vector with all bits set using _mm*_cmpeq_p? with zero operands. * config/i386/avx512fintrin.h (_mm512_i32gather_ps, _mm512_mask_i32gather_ps, _mm512_i32gather_pd, _mm512_mask_i32gather_pd, _mm512_i64gather_ps, _mm512_mask_i64gather_ps, _mm512_i64gather_pd, _mm512_mask_i64gather_pd, _mm512_i32gather_epi32, _mm512_mask_i32gather_epi32, _mm512_i32gather_epi64, _mm512_mask_i32gather_epi64, _mm512_i64gather_epi32, _mm512_mask_i64gather_epi32, _mm512_i64gather_epi64, _mm512_mask_i64gather_epi64, _mm512_i32scatter_ps, _mm512_mask_i32scatter_ps, _mm512_i32scatter_pd, _mm512_mask_i32scatter_pd, _mm512_i64scatter_ps, _mm512_mask_i64scatter_ps, _mm512_i64scatter_pd, _mm512_mask_i64scatter_pd, _mm512_i32scatter_epi32, _mm512_mask_i32scatter_epi32, _mm512_i32scatter_epi64, _mm512_mask_i32scatter_epi64, _mm512_i64scatter_epi32, _mm512_mask_i64scatter_epi32, _mm512_i64scatter_epi64, _mm512_mask_i64scatter_epi64): Surround macro parameter uses with parens. * config/i386/avx512pfintrin.h (_mm512_prefetch_i32gather_pd, _mm512_prefetch_i32gather_ps, _mm512_mask_prefetch_i32gather_pd, _mm512_mask_prefetch_i32gather_ps, _mm512_prefetch_i64gather_pd, _mm512_prefetch_i64gather_ps, _mm512_mask_prefetch_i64gather_pd, _mm512_mask_prefetch_i64gather_ps, _mm512_prefetch_i32scatter_pd, _mm512_prefetch_i32scatter_ps, _mm512_mask_prefetch_i32scatter_pd, _mm512_mask_prefetch_i32scatter_ps, _mm512_prefetch_i64scatter_pd, _mm512_prefetch_i64scatter_ps, _mm512_mask_prefetch_i64scatter_pd, _mm512_mask_prefetch_i64scatter_ps): Likewise. * config/i386/avx512vlintrin.h (_mm256_mmask_i32gather_ps, _mm_mmask_i32gather_ps, _mm256_mmask_i32gather_pd, _mm_mmask_i32gather_pd, _mm256_mmask_i64gather_ps, _mm_mmask_i64gather_ps, _mm256_mmask_i64gather_pd, _mm_mmask_i64gather_pd, _mm256_mmask_i32gather_epi32, _mm_mmask_i32gather_epi32, _mm256_mmask_i32gather_epi64, _mm_mmask_i32gather_epi64, _mm256_mmask_i64gather_epi32, _mm_mmask_i64gather_epi32, _mm256_mmask_i64gather_epi64, _mm_mmask_i64gather_epi64, _mm256_i32scatter_ps, _mm256_mask_i32scatter_ps, _mm_i32scatter_ps, _mm_mask_i32scatter_ps, _mm256_i32scatter_pd, _mm256_mask_i32scatter_pd, _mm_i32scatter_pd, _mm_mask_i32scatter_pd, _mm256_i64scatter_ps, _mm256_mask_i64scatter_ps, _mm_i64scatter_ps, _mm_mask_i64scatter_ps, _mm256_i64scatter_pd, _mm256_mask_i64scatter_pd, _mm_i64scatter_pd, _mm_mask_i64scatter_pd, _mm256_i32scatter_epi32, _mm256_mask_i32scatter_epi32, _mm_i32scatter_epi32, _mm_mask_i32scatter_epi32, _mm256_i32scatter_epi64, _mm256_mask_i32scatter_epi64, _mm_i32scatter_epi64, _mm_mask_i32scatter_epi64, _mm256_i64scatter_epi32, _mm256_mask_i64scatter_epi32, _mm_i64scatter_epi32, _mm_mask_i64scatter_epi32, _mm256_i64scatter_epi64, _mm256_mask_i64scatter_epi64, _mm_i64scatter_epi64, _mm_mask_i64scatter_epi64): Likewise. --- gcc/ChangeLog | 73 +++++++ gcc/config/i386/avx2intrin.h | 354 ++++++++++++++++--------------- gcc/config/i386/avx512fintrin.h | 230 ++++++++++---------- gcc/config/i386/avx512pfintrin.h | 69 +++--- gcc/config/i386/avx512vlintrin.h | 336 +++++++++++++++-------------- 5 files changed, 589 insertions(+), 473 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a2d4a1b82f4..16e05d1fa60 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,76 @@ +2020-04-29 Jakub Jelinek + + PR target/94832 + * config/i386/avx2intrin.h (_mm_mask_i32gather_pd, + _mm256_mask_i32gather_pd, _mm_mask_i64gather_pd, + _mm256_mask_i64gather_pd, _mm_mask_i32gather_ps, + _mm256_mask_i32gather_ps, _mm_mask_i64gather_ps, + _mm256_mask_i64gather_ps, _mm_i32gather_epi64, + _mm_mask_i32gather_epi64, _mm256_i32gather_epi64, + _mm256_mask_i32gather_epi64, _mm_i64gather_epi64, + _mm_mask_i64gather_epi64, _mm256_i64gather_epi64, + _mm256_mask_i64gather_epi64, _mm_i32gather_epi32, + _mm_mask_i32gather_epi32, _mm256_i32gather_epi32, + _mm256_mask_i32gather_epi32, _mm_i64gather_epi32, + _mm_mask_i64gather_epi32, _mm256_i64gather_epi32, + _mm256_mask_i64gather_epi32): Surround macro parameter uses with + parens. + (_mm_i32gather_pd, _mm256_i32gather_pd, _mm_i64gather_pd, + _mm256_i64gather_pd, _mm_i32gather_ps, _mm256_i32gather_ps, + _mm_i64gather_ps, _mm256_i64gather_ps): Likewise. Don't use + as mask vector containing -1.0 or -1.0f elts, but instead vector + with all bits set using _mm*_cmpeq_p? with zero operands. + * config/i386/avx512fintrin.h (_mm512_i32gather_ps, + _mm512_mask_i32gather_ps, _mm512_i32gather_pd, + _mm512_mask_i32gather_pd, _mm512_i64gather_ps, + _mm512_mask_i64gather_ps, _mm512_i64gather_pd, + _mm512_mask_i64gather_pd, _mm512_i32gather_epi32, + _mm512_mask_i32gather_epi32, _mm512_i32gather_epi64, + _mm512_mask_i32gather_epi64, _mm512_i64gather_epi32, + _mm512_mask_i64gather_epi32, _mm512_i64gather_epi64, + _mm512_mask_i64gather_epi64, _mm512_i32scatter_ps, + _mm512_mask_i32scatter_ps, _mm512_i32scatter_pd, + _mm512_mask_i32scatter_pd, _mm512_i64scatter_ps, + _mm512_mask_i64scatter_ps, _mm512_i64scatter_pd, + _mm512_mask_i64scatter_pd, _mm512_i32scatter_epi32, + _mm512_mask_i32scatter_epi32, _mm512_i32scatter_epi64, + _mm512_mask_i32scatter_epi64, _mm512_i64scatter_epi32, + _mm512_mask_i64scatter_epi32, _mm512_i64scatter_epi64, + _mm512_mask_i64scatter_epi64): Surround macro parameter uses with + parens. + * config/i386/avx512pfintrin.h (_mm512_prefetch_i32gather_pd, + _mm512_prefetch_i32gather_ps, _mm512_mask_prefetch_i32gather_pd, + _mm512_mask_prefetch_i32gather_ps, _mm512_prefetch_i64gather_pd, + _mm512_prefetch_i64gather_ps, _mm512_mask_prefetch_i64gather_pd, + _mm512_mask_prefetch_i64gather_ps, _mm512_prefetch_i32scatter_pd, + _mm512_prefetch_i32scatter_ps, _mm512_mask_prefetch_i32scatter_pd, + _mm512_mask_prefetch_i32scatter_ps, _mm512_prefetch_i64scatter_pd, + _mm512_prefetch_i64scatter_ps, _mm512_mask_prefetch_i64scatter_pd, + _mm512_mask_prefetch_i64scatter_ps): Likewise. + * config/i386/avx512vlintrin.h (_mm256_mmask_i32gather_ps, + _mm_mmask_i32gather_ps, _mm256_mmask_i32gather_pd, + _mm_mmask_i32gather_pd, _mm256_mmask_i64gather_ps, + _mm_mmask_i64gather_ps, _mm256_mmask_i64gather_pd, + _mm_mmask_i64gather_pd, _mm256_mmask_i32gather_epi32, + _mm_mmask_i32gather_epi32, _mm256_mmask_i32gather_epi64, + _mm_mmask_i32gather_epi64, _mm256_mmask_i64gather_epi32, + _mm_mmask_i64gather_epi32, _mm256_mmask_i64gather_epi64, + _mm_mmask_i64gather_epi64, _mm256_i32scatter_ps, + _mm256_mask_i32scatter_ps, _mm_i32scatter_ps, _mm_mask_i32scatter_ps, + _mm256_i32scatter_pd, _mm256_mask_i32scatter_pd, _mm_i32scatter_pd, + _mm_mask_i32scatter_pd, _mm256_i64scatter_ps, + _mm256_mask_i64scatter_ps, _mm_i64scatter_ps, _mm_mask_i64scatter_ps, + _mm256_i64scatter_pd, _mm256_mask_i64scatter_pd, _mm_i64scatter_pd, + _mm_mask_i64scatter_pd, _mm256_i32scatter_epi32, + _mm256_mask_i32scatter_epi32, _mm_i32scatter_epi32, + _mm_mask_i32scatter_epi32, _mm256_i32scatter_epi64, + _mm256_mask_i32scatter_epi64, _mm_i32scatter_epi64, + _mm_mask_i32scatter_epi64, _mm256_i64scatter_epi32, + _mm256_mask_i64scatter_epi32, _mm_i64scatter_epi32, + _mm_mask_i64scatter_epi32, _mm256_i64scatter_epi64, + _mm256_mask_i64scatter_epi64, _mm_i64scatter_epi64, + _mm_mask_i64scatter_epi64): Likewise. + 2020-04-29 Jeff Law * config/h8300/h8300.md (H8/SX div patterns): All H8/SX specific diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h index 4ac21b99548..6bf1f8c4333 100644 --- a/gcc/config/i386/avx2intrin.h +++ b/gcc/config/i386/avx2intrin.h @@ -1670,234 +1670,246 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base, #else /* __OPTIMIZE__ */ #define _mm_i32gather_pd(BASE, INDEX, SCALE) \ (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (), \ - (double const *)BASE, \ - (__v4si)(__m128i)INDEX, \ - (__v2df)_mm_set1_pd( \ - (double)(long long int) -1), \ - (int)SCALE) - -#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC, \ - (double const *)BASE, \ - (__v4si)(__m128i)INDEX, \ - (__v2df)(__m128d)MASK, \ - (int)SCALE) + (double const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ + (__v2df) \ + _mm_cmpeq_pd (_mm_setzero_pd (),\ + _mm_setzero_pd ()),\ + (int) (SCALE)) + +#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d) (SRC), \ + (double const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ + (__v2df)(__m128d) (MASK), \ + (int) (SCALE)) #define _mm256_i32gather_pd(BASE, INDEX, SCALE) \ (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (), \ - (double const *)BASE, \ - (__v4si)(__m128i)INDEX, \ - (__v4df)_mm256_set1_pd( \ - (double)(long long int) -1), \ - (int)SCALE) - -#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ - (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC, \ - (double const *)BASE, \ - (__v4si)(__m128i)INDEX, \ - (__v4df)(__m256d)MASK, \ - (int)SCALE) + (double const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ + (__v4df) \ + _mm256_cmp_pd (_mm256_setzero_pd (),\ + _mm256_setzero_pd (),\ + _CMP_EQ_OQ), \ + (int) (SCALE)) + +#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ + (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d) (SRC), \ + (double const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ + (__v4df)(__m256d) (MASK), \ + (int) (SCALE)) #define _mm_i64gather_pd(BASE, INDEX, SCALE) \ (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (), \ - (double const *)BASE, \ - (__v2di)(__m128i)INDEX, \ - (__v2df)_mm_set1_pd( \ - (double)(long long int) -1), \ - (int)SCALE) - -#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC, \ - (double const *)BASE, \ - (__v2di)(__m128i)INDEX, \ - (__v2df)(__m128d)MASK, \ - (int)SCALE) + (double const *) (BASE), \ + (__v2di)(__m128i) (INDEX), \ + (__v2df) \ + _mm_cmpeq_pd (_mm_setzero_pd (),\ + _mm_setzero_pd ()),\ + (int) (SCALE)) + +#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d) (SRC), \ + (double const *) (BASE), \ + (__v2di)(__m128i) (INDEX), \ + (__v2df)(__m128d) (MASK), \ + (int) (SCALE)) #define _mm256_i64gather_pd(BASE, INDEX, SCALE) \ (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (), \ - (double const *)BASE, \ - (__v4di)(__m256i)INDEX, \ - (__v4df)_mm256_set1_pd( \ - (double)(long long int) -1), \ - (int)SCALE) - -#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ - (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC, \ - (double const *)BASE, \ - (__v4di)(__m256i)INDEX, \ - (__v4df)(__m256d)MASK, \ - (int)SCALE) + (double const *) (BASE), \ + (__v4di)(__m256i) (INDEX), \ + (__v4df) \ + _mm256_cmp_pd (_mm256_setzero_pd (),\ + _mm256_setzero_pd (),\ + _CMP_EQ_OQ), \ + (int) (SCALE)) + +#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ + (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d) (SRC), \ + (double const *) (BASE), \ + (__v4di)(__m256i) (INDEX), \ + (__v4df)(__m256d) (MASK), \ + (int) (SCALE)) #define _mm_i32gather_ps(BASE, INDEX, SCALE) \ (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (), \ - (float const *)BASE, \ - (__v4si)(__m128i)INDEX, \ - _mm_set1_ps ((float)(int) -1), \ - (int)SCALE) - -#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128)SRC, \ - (float const *)BASE, \ - (__v4si)(__m128i)INDEX, \ - (__v4sf)(__m128)MASK, \ - (int)SCALE) - -#define _mm256_i32gather_ps(BASE, INDEX, SCALE) \ - (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \ - (float const *)BASE, \ - (__v8si)(__m256i)INDEX, \ - (__v8sf)_mm256_set1_ps ( \ - (float)(int) -1), \ - (int)SCALE) - -#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ - (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \ - (float const *)BASE, \ - (__v8si)(__m256i)INDEX, \ - (__v8sf)(__m256)MASK, \ - (int)SCALE) + (float const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ + (__v4sf) \ + _mm_cmpeq_ps (_mm_setzero_ps (),\ + _mm_setzero_ps ()),\ + (int) (SCALE)) + +#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128) (SRC), \ + (float const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ + (__v4sf)(__m128) (MASK), \ + (int) (SCALE)) + +#define _mm256_i32gather_ps(BASE, INDEX, SCALE) \ + (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \ + (float const *) (BASE), \ + (__v8si)(__m256i) (INDEX), \ + (__v8sf) \ + _mm256_cmp_ps (_mm256_setzero_ps (),\ + _mm256_setzero_ps (),\ + _CMP_EQ_OQ), \ + (int) (SCALE)) + +#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ + (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256) (SRC), \ + (float const *) (BASE), \ + (__v8si)(__m256i) (INDEX), \ + (__v8sf)(__m256) (MASK), \ + (int) (SCALE)) #define _mm_i64gather_ps(BASE, INDEX, SCALE) \ (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (), \ - (float const *)BASE, \ - (__v2di)(__m128i)INDEX, \ - (__v4sf)_mm_set1_ps ( \ - (float)(int) -1), \ - (int)SCALE) - -#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \ - (float const *)BASE, \ - (__v2di)(__m128i)INDEX, \ - (__v4sf)(__m128)MASK, \ - (int)SCALE) + (float const *) (BASE), \ + (__v2di)(__m128i) (INDEX), \ + (__v4sf) \ + _mm_cmpeq_ps (_mm_setzero_ps (),\ + _mm_setzero_ps ()),\ + (int) (SCALE)) + +#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128) (SRC), \ + (float const *) (BASE), \ + (__v2di)(__m128i) (INDEX), \ + (__v4sf)(__m128) (MASK), \ + (int) (SCALE)) #define _mm256_i64gather_ps(BASE, INDEX, SCALE) \ (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (), \ - (float const *)BASE, \ - (__v4di)(__m256i)INDEX, \ - (__v4sf)_mm_set1_ps( \ - (float)(int) -1), \ - (int)SCALE) - -#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC, \ - (float const *)BASE, \ - (__v4di)(__m256i)INDEX, \ - (__v4sf)(__m128)MASK, \ - (int)SCALE) + (float const *) (BASE), \ + (__v4di)(__m256i) (INDEX), \ + (__v4sf) \ + _mm_cmpeq_ps (_mm_setzero_ps (),\ + _mm_setzero_ps ()),\ + (int) (SCALE)) + +#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128) (SRC), \ + (float const *) (BASE), \ + (__v4di)(__m256i) (INDEX), \ + (__v4sf)(__m128) (MASK), \ + (int) (SCALE)) #define _mm_i32gather_epi64(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \ - (long long const *)BASE, \ - (__v4si)(__m128i)INDEX, \ + (long long const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ (__v2di)_mm_set1_epi64x (-1), \ - (int)SCALE) + (int) (SCALE)) -#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC, \ - (long long const *)BASE, \ - (__v4si)(__m128i)INDEX, \ - (__v2di)(__m128i)MASK, \ - (int)SCALE) +#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i) (SRC), \ + (long long const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ + (__v2di)(__m128i) (MASK), \ + (int) (SCALE)) #define _mm256_i32gather_epi64(BASE, INDEX, SCALE) \ (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \ - (long long const *)BASE, \ - (__v4si)(__m128i)INDEX, \ + (long long const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ (__v4di)_mm256_set1_epi64x (-1), \ - (int)SCALE) + (int) (SCALE)) -#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ - (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC, \ - (long long const *)BASE, \ - (__v4si)(__m128i)INDEX, \ - (__v4di)(__m256i)MASK, \ - (int)SCALE) +#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ + (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i) (SRC), \ + (long long const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ + (__v4di)(__m256i) (MASK), \ + (int) (SCALE)) #define _mm_i64gather_epi64(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \ - (long long const *)BASE, \ - (__v2di)(__m128i)INDEX, \ + (long long const *) (BASE), \ + (__v2di)(__m128i) (INDEX), \ (__v2di)_mm_set1_epi64x (-1), \ - (int)SCALE) + (int) (SCALE)) -#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC, \ - (long long const *)BASE, \ - (__v2di)(__m128i)INDEX, \ - (__v2di)(__m128i)MASK, \ - (int)SCALE) +#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i) (SRC), \ + (long long const *) (BASE), \ + (__v2di)(__m128i) (INDEX), \ + (__v2di)(__m128i) (MASK), \ + (int) (SCALE)) #define _mm256_i64gather_epi64(BASE, INDEX, SCALE) \ (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \ - (long long const *)BASE, \ - (__v4di)(__m256i)INDEX, \ + (long long const *) (BASE), \ + (__v4di)(__m256i) (INDEX), \ (__v4di)_mm256_set1_epi64x (-1), \ - (int)SCALE) + (int) (SCALE)) -#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ - (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC, \ - (long long const *)BASE, \ - (__v4di)(__m256i)INDEX, \ - (__v4di)(__m256i)MASK, \ - (int)SCALE) +#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ + (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i) (SRC), \ + (long long const *) (BASE), \ + (__v4di)(__m256i) (INDEX), \ + (__v4di)(__m256i) (MASK), \ + (int) (SCALE)) #define _mm_i32gather_epi32(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (), \ - (int const *)BASE, \ - (__v4si)(__m128i)INDEX, \ + (int const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ (__v4si)_mm_set1_epi32 (-1), \ - (int)SCALE) + (int) (SCALE)) -#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC, \ - (int const *)BASE, \ - (__v4si)(__m128i)INDEX, \ - (__v4si)(__m128i)MASK, \ - (int)SCALE) +#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i) (SRC), \ + (int const *) (BASE), \ + (__v4si)(__m128i) (INDEX), \ + (__v4si)(__m128i) (MASK), \ + (int) (SCALE)) #define _mm256_i32gather_epi32(BASE, INDEX, SCALE) \ (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \ - (int const *)BASE, \ - (__v8si)(__m256i)INDEX, \ + (int const *) (BASE), \ + (__v8si)(__m256i) (INDEX), \ (__v8si)_mm256_set1_epi32 (-1), \ - (int)SCALE) + (int) (SCALE)) -#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ - (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC, \ - (int const *)BASE, \ - (__v8si)(__m256i)INDEX, \ - (__v8si)(__m256i)MASK, \ - (int)SCALE) +#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ + (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i) (SRC), \ + (int const *) (BASE), \ + (__v8si)(__m256i) (INDEX), \ + (__v8si)(__m256i) (MASK), \ + (int) (SCALE)) #define _mm_i64gather_epi32(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (), \ - (int const *)BASE, \ - (__v2di)(__m128i)INDEX, \ + (int const *) (BASE), \ + (__v2di)(__m128i) (INDEX), \ (__v4si)_mm_set1_epi32 (-1), \ - (int)SCALE) + (int) (SCALE)) -#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC, \ - (int const *)BASE, \ - (__v2di)(__m128i)INDEX, \ - (__v4si)(__m128i)MASK, \ - (int)SCALE) +#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i) (SRC), \ + (int const *) (BASE), \ + (__v2di)(__m128i) (INDEX), \ + (__v4si)(__m128i) (MASK), \ + (int) (SCALE)) #define _mm256_i64gather_epi32(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \ - (int const *)BASE, \ - (__v4di)(__m256i)INDEX, \ + (int const *) (BASE), \ + (__v4di)(__m256i) (INDEX), \ (__v4si)_mm_set1_epi32(-1), \ - (int)SCALE) - -#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC, \ - (int const *)BASE, \ - (__v4di)(__m256i)INDEX, \ - (__v4si)(__m128i)MASK, \ - (int)SCALE) + (int) (SCALE)) + +#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ + (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i) (SRC), \ + (int const *) (BASE), \ + (__v4di)(__m256i) (INDEX), \ + (__v4si)(__m128i) (MASK), \ + (int) (SCALE)) #endif /* __OPTIMIZE__ */ #ifdef __DISABLE_AVX2__ diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index e979b7e8622..c86982ab9c8 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -10468,179 +10468,189 @@ _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask, #else #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \ (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\ - (void const *)ADDR, \ - (__v16si)(__m512i)INDEX, \ - (__mmask16)0xFFFF, (int)SCALE) + (void const *) (ADDR), \ + (__v16si)(__m512i) (INDEX), \ + (__mmask16)0xFFFF, \ + (int) (SCALE)) #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \ - (void const *)ADDR, \ - (__v16si)(__m512i)INDEX, \ - (__mmask16)MASK, (int)SCALE) + (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD), \ + (void const *) (ADDR), \ + (__v16si)(__m512i) (INDEX), \ + (__mmask16) (MASK), \ + (int) (SCALE)) #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \ (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \ - (void const *)ADDR, \ - (__v8si)(__m256i)INDEX, \ - (__mmask8)0xFF, (int)SCALE) + (void const *) (ADDR), \ + (__v8si)(__m256i) (INDEX), \ + (__mmask8)0xFF, (int) (SCALE)) #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \ - (void const *)ADDR, \ - (__v8si)(__m256i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD), \ + (void const *) (ADDR), \ + (__v8si)(__m256i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \ (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \ - (void const *)ADDR, \ - (__v8di)(__m512i)INDEX, \ - (__mmask8)0xFF, (int)SCALE) + (void const *) (ADDR), \ + (__v8di)(__m512i) (INDEX), \ + (__mmask8)0xFF, (int) (SCALE)) #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \ - (void const *)ADDR, \ - (__v8di)(__m512i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD), \ + (void const *) (ADDR), \ + (__v8di)(__m512i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \ (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \ - (void const *)ADDR, \ - (__v8di)(__m512i)INDEX, \ - (__mmask8)0xFF, (int)SCALE) + (void const *) (ADDR), \ + (__v8di)(__m512i) (INDEX), \ + (__mmask8)0xFF, (int) (SCALE)) #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \ - (void const *)ADDR, \ - (__v8di)(__m512i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD), \ + (void const *) (ADDR), \ + (__v8di)(__m512i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \ - (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \ - (void const *)ADDR, \ - (__v16si)(__m512i)INDEX, \ - (__mmask16)0xFFFF, (int)SCALE) + (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\ + (void const *) (ADDR), \ + (__v16si)(__m512i) (INDEX), \ + (__mmask16)0xFFFF, \ + (int) (SCALE)) #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \ - (void const *)ADDR, \ - (__v16si)(__m512i)INDEX, \ - (__mmask16)MASK, (int)SCALE) + (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD), \ + (void const *) (ADDR), \ + (__v16si)(__m512i) (INDEX), \ + (__mmask16) (MASK), \ + (int) (SCALE)) #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \ - (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \ - (void const *)ADDR, \ - (__v8si)(__m256i)INDEX, \ - (__mmask8)0xFF, (int)SCALE) + (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\ + (void const *) (ADDR), \ + (__v8si)(__m256i) (INDEX), \ + (__mmask8)0xFF, (int) (SCALE)) #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \ - (void const *)ADDR, \ - (__v8si)(__m256i)INDEX, \ - (__mmask8)MASK, (int)SCALE) - -#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \ - (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \ - (void const *)ADDR, \ - (__v8di)(__m512i)INDEX, \ - (__mmask8)0xFF, (int)SCALE) + (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD), \ + (void const *) (ADDR), \ + (__v8si)(__m256i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) + +#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \ + (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\ + (void const *) (ADDR), \ + (__v8di)(__m512i) (INDEX), \ + (__mmask8)0xFF, (int) (SCALE)) #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \ - (void const *)ADDR, \ - (__v8di)(__m512i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD), \ + (void const *) (ADDR), \ + (__v8di)(__m512i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \ - (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \ - (void const *)ADDR, \ - (__v8di)(__m512i)INDEX, \ - (__mmask8)0xFF, (int)SCALE) + (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\ + (void const *) (ADDR), \ + (__v8di)(__m512i) (INDEX), \ + (__mmask8)0xFF, (int) (SCALE)) #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \ - (void const *)ADDR, \ - (__v8di)(__m512i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD), \ + (void const *) (ADDR), \ + (__v8di)(__m512i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \ - (__v16si)(__m512i)INDEX, \ - (__v16sf)(__m512)V1, (int)SCALE) + __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF, \ + (__v16si)(__m512i) (INDEX), \ + (__v16sf)(__m512) (V1), (int) (SCALE)) #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \ - (__v16si)(__m512i)INDEX, \ - (__v16sf)(__m512)V1, (int)SCALE) + __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK), \ + (__v16si)(__m512i) (INDEX), \ + (__v16sf)(__m512) (V1), (int) (SCALE)) #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \ - (__v8si)(__m256i)INDEX, \ - (__v8df)(__m512d)V1, (int)SCALE) + __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF, \ + (__v8si)(__m256i) (INDEX), \ + (__v8df)(__m512d) (V1), (int) (SCALE)) #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \ - (__v8si)(__m256i)INDEX, \ - (__v8df)(__m512d)V1, (int)SCALE) + __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK), \ + (__v8si)(__m256i) (INDEX), \ + (__v8df)(__m512d) (V1), (int) (SCALE)) #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \ - (__v8di)(__m512i)INDEX, \ - (__v8sf)(__m256)V1, (int)SCALE) + __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF, \ + (__v8di)(__m512i) (INDEX), \ + (__v8sf)(__m256) (V1), (int) (SCALE)) #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \ - (__v8di)(__m512i)INDEX, \ - (__v8sf)(__m256)V1, (int)SCALE) + __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK), \ + (__v8di)(__m512i) (INDEX), \ + (__v8sf)(__m256) (V1), (int) (SCALE)) #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \ - (__v8di)(__m512i)INDEX, \ - (__v8df)(__m512d)V1, (int)SCALE) + __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF, \ + (__v8di)(__m512i) (INDEX), \ + (__v8df)(__m512d) (V1), (int) (SCALE)) #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \ - (__v8di)(__m512i)INDEX, \ - (__v8df)(__m512d)V1, (int)SCALE) + __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK), \ + (__v8di)(__m512i) (INDEX), \ + (__v8df)(__m512d) (V1), (int) (SCALE)) #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \ - (__v16si)(__m512i)INDEX, \ - (__v16si)(__m512i)V1, (int)SCALE) + __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF, \ + (__v16si)(__m512i) (INDEX), \ + (__v16si)(__m512i) (V1), (int) (SCALE)) #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \ - (__v16si)(__m512i)INDEX, \ - (__v16si)(__m512i)V1, (int)SCALE) + __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK), \ + (__v16si)(__m512i) (INDEX), \ + (__v16si)(__m512i) (V1), (int) (SCALE)) #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \ - (__v8si)(__m256i)INDEX, \ - (__v8di)(__m512i)V1, (int)SCALE) + __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF, \ + (__v8si)(__m256i) (INDEX), \ + (__v8di)(__m512i) (V1), (int) (SCALE)) #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \ - (__v8si)(__m256i)INDEX, \ - (__v8di)(__m512i)V1, (int)SCALE) + __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK), \ + (__v8si)(__m256i) (INDEX), \ + (__v8di)(__m512i) (V1), (int) (SCALE)) #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \ - (__v8di)(__m512i)INDEX, \ - (__v8si)(__m256i)V1, (int)SCALE) + __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF, \ + (__v8di)(__m512i) (INDEX), \ + (__v8si)(__m256i) (V1), (int) (SCALE)) #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \ - (__v8di)(__m512i)INDEX, \ - (__v8si)(__m256i)V1, (int)SCALE) + __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK), \ + (__v8di)(__m512i) (INDEX), \ + (__v8si)(__m256i) (V1), (int) (SCALE)) #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \ - (__v8di)(__m512i)INDEX, \ - (__v8di)(__m512i)V1, (int)SCALE) + __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF, \ + (__v8di)(__m512i) (INDEX), \ + (__v8di)(__m512i) (V1), (int) (SCALE)) #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \ - (__v8di)(__m512i)INDEX, \ - (__v8di)(__m512i)V1, (int)SCALE) + __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK), \ + (__v8di)(__m512i) (INDEX), \ + (__v8di)(__m512i) (V1), (int) (SCALE)) #endif extern __inline __m512d diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h index 79841826371..6227039fea3 100644 --- a/gcc/config/i386/avx512pfintrin.h +++ b/gcc/config/i386/avx512pfintrin.h @@ -192,68 +192,73 @@ _mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask8 __mask, #else #define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \ - __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \ - (void const *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), \ + (void const *) (ADDR), (int) (SCALE), \ + (int) (HINT)) #define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \ - __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \ - (void const *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), \ + (void const *) (ADDR), (int) (SCALE), \ + (int) (HINT)) #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \ - __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \ - (void const *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), \ + (void const *) (ADDR), (int) (SCALE), \ + (int) (HINT)) #define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \ - __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ - (void const *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX),\ + (void const *) (ADDR), (int) (SCALE), \ + (int) (HINT)) #define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \ - __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \ - __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \ - __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \ - __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \ - __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \ - __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX),\ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \ - __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \ - __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_scatterpfdps ((__mmask16) (MASK), \ + (__v16si)(__m512i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \ - __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \ - __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \ - __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \ - __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ - (void *)ADDR, (int)SCALE, (int)HINT) + __builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \ + (void *) (ADDR), (int) (SCALE), (int) (HINT)) #endif #ifdef __DISABLE_AVX512PF__ diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 4ce53b14af0..7685bdfa391 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -13000,260 +13000,276 @@ _mm256_permutex_pd (__m256d __X, const int __M) (__mmask8)(U))) #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \ - (void const *)ADDR, \ - (__v8si)(__m256i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256) (V1OLD), \ + (void const *) (ADDR), \ + (__v8si)(__m256i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \ - (void const *)ADDR, \ - (__v4si)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128) (V1OLD), \ + (void const *) (ADDR), \ + (__v4si)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \ - (void const *)ADDR, \ - (__v4si)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d) (V1OLD), \ + (void const *) (ADDR), \ + (__v4si)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \ - (void const *)ADDR, \ - (__v4si)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d) (V1OLD), \ + (void const *) (ADDR), \ + (__v4si)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \ - (void const *)ADDR, \ - (__v4di)(__m256i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128) (V1OLD), \ + (void const *) (ADDR), \ + (__v4di)(__m256i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \ - (void const *)ADDR, \ - (__v2di)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128) (V1OLD), \ + (void const *) (ADDR), \ + (__v2di)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \ - (void const *)ADDR, \ - (__v4di)(__m256i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d) (V1OLD), \ + (void const *) (ADDR), \ + (__v4di)(__m256i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \ - (void const *)ADDR, \ - (__v2di)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d) (V1OLD), \ + (void const *) (ADDR), \ + (__v2di)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \ - (void const *)ADDR, \ - (__v8si)(__m256i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i) (V1OLD), \ + (void const *) (ADDR), \ + (__v8si)(__m256i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \ - (void const *)ADDR, \ - (__v4si)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i) (V1OLD), \ + (void const *) (ADDR), \ + (__v4si)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \ - (void const *)ADDR, \ - (__v4si)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i) (V1OLD), \ + (void const *) (ADDR), \ + (__v4si)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \ - (void const *)ADDR, \ - (__v4si)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i) (V1OLD), \ + (void const *) (ADDR), \ + (__v4si)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \ - (void const *)ADDR, \ - (__v4di)(__m256i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i) (V1OLD), \ + (void const *) (ADDR), \ + (__v4di)(__m256i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \ - (void const *)ADDR, \ - (__v2di)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i) (V1OLD), \ + (void const *) (ADDR), \ + (__v2di)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \ - (void const *)ADDR, \ - (__v4di)(__m256i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i) (V1OLD), \ + (void const *) (ADDR), \ + (__v4di)(__m256i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ - (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \ - (void const *)ADDR, \ - (__v2di)(__m128i)INDEX, \ - (__mmask8)MASK, (int)SCALE) + (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i) (V1OLD), \ + (void const *) (ADDR), \ + (__v2di)(__m128i) (INDEX), \ + (__mmask8) (MASK), \ + (int) (SCALE)) #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF, \ - (__v8si)(__m256i)INDEX, \ - (__v8sf)(__m256)V1, (int)SCALE) + __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8)0xFF, \ + (__v8si)(__m256i) (INDEX), \ + (__v8sf)(__m256) (V1), (int) (SCALE)) #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK, \ - (__v8si)(__m256i)INDEX, \ - (__v8sf)(__m256)V1, (int)SCALE) + __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8) (MASK), \ + (__v8si)(__m256i) (INDEX), \ + (__v8sf)(__m256) (V1), (int) (SCALE)) #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF, \ - (__v4si)(__m128i)INDEX, \ - (__v4sf)(__m128)V1, (int)SCALE) + __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4si)(__m128i) (INDEX), \ + (__v4sf)(__m128) (V1), (int) (SCALE)) #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK, \ - (__v4si)(__m128i)INDEX, \ - (__v4sf)(__m128)V1, (int)SCALE) + __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4si)(__m128i) (INDEX), \ + (__v4sf)(__m128) (V1), (int) (SCALE)) #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF, \ - (__v4si)(__m128i)INDEX, \ - (__v4df)(__m256d)V1, (int)SCALE) + __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4si)(__m128i) (INDEX), \ + (__v4df)(__m256d) (V1), (int) (SCALE)) #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK, \ - (__v4si)(__m128i)INDEX, \ - (__v4df)(__m256d)V1, (int)SCALE) + __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4si)(__m128i) (INDEX), \ + (__v4df)(__m256d) (V1), (int) (SCALE)) #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF, \ - (__v4si)(__m128i)INDEX, \ - (__v2df)(__m128d)V1, (int)SCALE) + __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4si)(__m128i) (INDEX), \ + (__v2df)(__m128d) (V1), (int) (SCALE)) #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK, \ - (__v4si)(__m128i)INDEX, \ - (__v2df)(__m128d)V1, (int)SCALE) + __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4si)(__m128i) (INDEX), \ + (__v2df)(__m128d) (V1), (int) (SCALE)) #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF, \ - (__v4di)(__m256i)INDEX, \ - (__v4sf)(__m128)V1, (int)SCALE) + __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4di)(__m256i) (INDEX), \ + (__v4sf)(__m128) (V1), (int) (SCALE)) #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK, \ - (__v4di)(__m256i)INDEX, \ - (__v4sf)(__m128)V1, (int)SCALE) + __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4di)(__m256i) (INDEX), \ + (__v4sf)(__m128) (V1), (int) (SCALE)) #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF, \ - (__v2di)(__m128i)INDEX, \ - (__v4sf)(__m128)V1, (int)SCALE) + __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8)0xFF, \ + (__v2di)(__m128i) (INDEX), \ + (__v4sf)(__m128) (V1), (int) (SCALE)) #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK, \ - (__v2di)(__m128i)INDEX, \ - (__v4sf)(__m128)V1, (int)SCALE) + __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8) (MASK), \ + (__v2di)(__m128i) (INDEX), \ + (__v4sf)(__m128) (V1), (int) (SCALE)) #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF, \ - (__v4di)(__m256i)INDEX, \ - (__v4df)(__m256d)V1, (int)SCALE) + __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4di)(__m256i) (INDEX), \ + (__v4df)(__m256d) (V1), (int) (SCALE)) #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK, \ - (__v4di)(__m256i)INDEX, \ - (__v4df)(__m256d)V1, (int)SCALE) + __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4di)(__m256i) (INDEX), \ + (__v4df)(__m256d) (V1), (int) (SCALE)) #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF, \ - (__v2di)(__m128i)INDEX, \ - (__v2df)(__m128d)V1, (int)SCALE) + __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8)0xFF, \ + (__v2di)(__m128i) (INDEX), \ + (__v2df)(__m128d) (V1), (int) (SCALE)) #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK, \ - (__v2di)(__m128i)INDEX, \ - (__v2df)(__m128d)V1, (int)SCALE) + __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8) (MASK), \ + (__v2di)(__m128i) (INDEX), \ + (__v2df)(__m128d) (V1), (int) (SCALE)) #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF, \ - (__v8si)(__m256i)INDEX, \ - (__v8si)(__m256i)V1, (int)SCALE) + __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8)0xFF, \ + (__v8si)(__m256i) (INDEX), \ + (__v8si)(__m256i) (V1), (int) (SCALE)) #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK, \ - (__v8si)(__m256i)INDEX, \ - (__v8si)(__m256i)V1, (int)SCALE) + __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8) (MASK), \ + (__v8si)(__m256i) (INDEX), \ + (__v8si)(__m256i) (V1), (int) (SCALE)) #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF, \ - (__v4si)(__m128i)INDEX, \ - (__v4si)(__m128i)V1, (int)SCALE) + __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4si)(__m128i) (INDEX), \ + (__v4si)(__m128i) (V1), (int) (SCALE)) #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK, \ - (__v4si)(__m128i)INDEX, \ - (__v4si)(__m128i)V1, (int)SCALE) + __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4si)(__m128i) (INDEX), \ + (__v4si)(__m128i) (V1), (int) (SCALE)) #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF, \ - (__v4si)(__m128i)INDEX, \ - (__v4di)(__m256i)V1, (int)SCALE) + __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4si)(__m128i) (INDEX), \ + (__v4di)(__m256i) (V1), (int) (SCALE)) #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK, \ - (__v4si)(__m128i)INDEX, \ - (__v4di)(__m256i)V1, (int)SCALE) + __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4si)(__m128i) (INDEX), \ + (__v4di)(__m256i) (V1), (int) (SCALE)) #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF, \ - (__v4si)(__m128i)INDEX, \ - (__v2di)(__m128i)V1, (int)SCALE) + __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4si)(__m128i) (INDEX), \ + (__v2di)(__m128i) (V1), (int) (SCALE)) #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK, \ - (__v4si)(__m128i)INDEX, \ - (__v2di)(__m128i)V1, (int)SCALE) + __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4si)(__m128i) (INDEX), \ + (__v2di)(__m128i) (V1), (int) (SCALE)) #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF, \ - (__v4di)(__m256i)INDEX, \ - (__v4si)(__m128i)V1, (int)SCALE) + __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4di)(__m256i) (INDEX), \ + (__v4si)(__m128i) (V1), (int) (SCALE)) #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK, \ - (__v4di)(__m256i)INDEX, \ - (__v4si)(__m128i)V1, (int)SCALE) + __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4di)(__m256i) (INDEX), \ + (__v4si)(__m128i) (V1), (int) (SCALE)) #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF, \ - (__v2di)(__m128i)INDEX, \ - (__v4si)(__m128i)V1, (int)SCALE) + __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8)0xFF, \ + (__v2di)(__m128i) (INDEX), \ + (__v4si)(__m128i) (V1), (int) (SCALE)) #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK, \ - (__v2di)(__m128i)INDEX, \ - (__v4si)(__m128i)V1, (int)SCALE) + __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8) (MASK), \ + (__v2di)(__m128i) (INDEX), \ + (__v4si)(__m128i) (V1), (int) (SCALE)) #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF, \ - (__v4di)(__m256i)INDEX, \ - (__v4di)(__m256i)V1, (int)SCALE) + __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8)0xFF, \ + (__v4di)(__m256i) (INDEX), \ + (__v4di)(__m256i) (V1), (int) (SCALE)) #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK, \ - (__v4di)(__m256i)INDEX, \ - (__v4di)(__m256i)V1, (int)SCALE) + __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8) (MASK), \ + (__v4di)(__m256i) (INDEX), \ + (__v4di)(__m256i) (V1), (int) (SCALE)) #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF, \ - (__v2di)(__m128i)INDEX, \ - (__v2di)(__m128i)V1, (int)SCALE) + __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8)0xFF, \ + (__v2di)(__m128i) (INDEX), \ + (__v2di)(__m128i) (V1), (int) (SCALE)) #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ - __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK, \ - (__v2di)(__m128i)INDEX, \ - (__v2di)(__m128i)V1, (int)SCALE) + __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8) (MASK), \ + (__v2di)(__m128i) (INDEX), \ + (__v2di)(__m128i) (V1), (int) (SCALE)) #define _mm256_mask_shuffle_epi32(W, U, X, C) \ ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \ -- 2.30.2