From f4ee3a9e2ab62e99346ee286be1d10701122c03b Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 10 May 2017 20:04:44 +0200 Subject: [PATCH] * config/i386/avx512fintrin.h (_mm_mask_max_round_sd) (_mm_maskz_max_round_sd, _mm_mask_max_round_ss) (_mm_maskz_max_round_ss, _mm_mask_min_round_sd) (_mm_maskz_min_round_sd, _mm_mask_min_round_ss) (_mm_maskz_min_round_ss): New intrinsics. * config/i386/i386-builtin-types.def (V2DF, V2DF, V2DF, V2DF, UQI, INT) (V4SF, V4SF, V4SF, V4SF, UQI, INT): New function type aliases. * config/i386/i386-builtin.def (__builtin_ia32_maxsd_mask_round) (__builtin_ia32_maxss_mask_round, __builtin_ia32_minsd_mask_round) (__builtin_ia32_minss_mask_round): New builtins. * config/i386/i386.c (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): Handle new types. * config/i386/sse.md (_vm3): Rename to ... (_vm3): ... this. (v\t{%2, %1, %0|%0, %1, %2}): Change to ... (v\t{%2, %1, %0|%0, %1, %2}): ... this. * config/i386/avx512fintrin.h (_mm_mask_mul_round_sd) (_mm_maskz_mul_round_sd, _mm_mask_mul_round_ss) (_mm_maskz_mul_round_ss, _mm_mask_div_round_sd) (_mm_maskz_div_round_sd, _mm_mask_div_round_ss) (_mm_maskz_div_round_ss, _mm_mask_mul_sd, _mm_maskz_mul_sd) (_mm_mask_mul_ss, _mm_maskz_mul_ss, _mm_mask_div_sd) (_mm_maskz_div_sd, _mm_mask_div_ss, _mm_maskz_div_ss): New intrinsics. * config/i386/i386-builtin-types.def (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): New function type aliases. * config/i386/i386-builtin.def (__builtin_ia32_divsd_mask_round) (__builtin_ia32_divss_mask_round, __builtin_ia32_mulsd_mask_round) (__builtin_ia32_mulss_mask_round): New builtins. * config/i386/i386.c (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): Handle new types. * config/i386/sse.md (_vm3): Rename to ... (_vm3): ... this. (v\t{%2, %1, %0|%0, %1, %2}): Change to ... (v\t{%2, %1, %0|%0, %1, %2}): ... this. * config/i386/avxintrin.h (_mm256_set_m128, _mm256_set_m128d) (_mm256_set_m128i, _mm256_setr_m128, _mm256_setr_m128d) (_mm256_setr_m128i): New intrinsics. * config/i386/avx512fintrin.h (_mm_mask_rcp14_sd) (_mm_maskz_rcp14_sd, _mm_mask_rcp14_ss) (_mm_maskz_rcp14_ss): New intrinsics. * config/i386/i386-builtin.def (__builtin_ia32_rcp14sd_mask) (__builtin_ia32_rcp14ss_mask): New builtins. * config/i386/sse.md (srcp14_mask): New pattern. testsuite/ChangeLog: * gcc.target/i386/avx512f-vmaxsd-1.c (_mm_mask_max_round_sd) (_mm_maskz_max_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vmaxsd-2.c: New. * gcc.target/i386/avx512f-vmaxss-1.c (_mm_mask_max_round_ss) (_mm_maskz_max_round_ss): Test new intrinsics. * gcc.target/i386/avx512f-vmaxss-2.c: New. * gcc.target/i386/avx512f-vminsd-1.c (_mm_mask_min_round_sd) (_mm_maskz_min_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vminsd-2.c: New. * gcc.target/i386/avx512f-vminss-1.c (_mm_mask_min_round_ss) (_mm_maskz_min_round_ss): Test new intrinsics. * gcc.target/i386/avx512f-vminss-2.c: New. * gcc.target/i386/avx-1.c (__builtin_ia32_maxsd_mask_round) (__builtin_ia32_maxss_mask_round, __builtin_ia32_minsd_mask_round) (__builtin_ia32_minss_mask_round): Test new builtins. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/sse-14.c (_mm_maskz_max_round_sd) (_mm_maskz_max_round_ss, _mm_maskz_min_round_sd) (_mm_maskz_min_round_ss, _mm_mask_max_round_sd) (_mm_mask_max_round_ss, _mm_mask_min_round_sd) (_mm_mask_min_round_ss): Test new intrinsics. * gcc.target/i386/testround-1.c: Ditto. * gcc.target/i386/avx512f-vdivsd-1.c (_mm_mask_div_sd) (_mm_maskz_div_sd, _mm_mask_div_round_sd) (_mm_maskz_div_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vdivsd-2.c: New. * gcc.target/i386/avx512f-vdivss-1.c (_mm_mask_div_ss) (_mm_maskz_div_ss, _mm_mask_div_round_ss) (_mm_maskz_div_round_ss): Test new intrinsics. * gcc.target/i386/avx512f-vdivss-2.c: New. * gcc.target/i386/avx512f-vmulsd-1.c (_mm_mask_mul_sd) (_mm_maskz_mul_sd, _mm_mask_mul_round_sd) (_mm_maskz_mul_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vmulsd-2.c: New. * gcc.target/i386/avx512f-vmulss-1.c (_mm_mask_mul_ss) (_mm_maskz_mul_ss, _mm_mask_mul_round_ss) (_mm_maskz_mul_round_ss): Test new intrinsics. * gcc.target/i386/avx512f-vmulss-2.c: New. * gcc.target/i386/avx-1.c (__builtin_ia32_divsd_mask_round) (__builtin_ia32_divss_mask_round, __builtin_ia32_mulsd_mask_round) (__builtin_ia32_mulss_mask_round): Test new builtins. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/sse-14.c (_mm_maskz_div_round_sd) (_mm_maskz_div_round_ss, _mm_maskz_mul_round_sd) (_mm_maskz_mul_round_ss): Test new intrinsics. * gcc.target/i386/testround-1.c: Ditto. * gcc.target/i386/avx-vinsertf128-256-1: Test new intrinsics. * gcc.target/i386/avx-vinsertf128-256-2: Ditto. * gcc.target/i386/avx-vinsertf128-256-3: Ditto. * gcc.target/i386/avx512f-vrcp14sd-1.c: Test new intrinsics. * gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto. * gcc.target/i386/avx512f-vrcp14ss-1.c: Ditto. * gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto. From-SVN: r247851 --- gcc/ChangeLog | 61 +++ gcc/config/i386/avx512fintrin.h | 378 +++++++++++++++++- gcc/config/i386/avxintrin.h | 36 ++ gcc/config/i386/i386-builtin.def | 10 + gcc/config/i386/sse.md | 25 +- gcc/testsuite/ChangeLog | 67 ++++ gcc/testsuite/gcc.target/i386/avx-1.c | 8 + .../gcc.target/i386/avx-vinsertf128-256-1.c | 22 +- .../gcc.target/i386/avx-vinsertf128-256-2.c | 20 +- .../gcc.target/i386/avx-vinsertf128-256-3.c | 20 +- .../gcc.target/i386/avx512f-vdivsd-1.c | 10 +- .../gcc.target/i386/avx512f-vdivsd-2.c | 69 ++++ .../gcc.target/i386/avx512f-vdivss-1.c | 11 +- .../gcc.target/i386/avx512f-vdivss-2.c | 71 ++++ .../gcc.target/i386/avx512f-vmaxsd-1.c | 7 +- .../gcc.target/i386/avx512f-vmaxsd-2.c | 54 +++ .../gcc.target/i386/avx512f-vmaxss-1.c | 7 +- .../gcc.target/i386/avx512f-vmaxss-2.c | 58 +++ .../gcc.target/i386/avx512f-vminsd-1.c | 7 +- .../gcc.target/i386/avx512f-vminsd-2.c | 54 +++ .../gcc.target/i386/avx512f-vminss-1.c | 7 +- .../gcc.target/i386/avx512f-vminss-2.c | 58 +++ .../gcc.target/i386/avx512f-vmulsd-1.c | 11 +- .../gcc.target/i386/avx512f-vmulsd-2.c | 69 ++++ .../gcc.target/i386/avx512f-vmulss-1.c | 11 +- .../gcc.target/i386/avx512f-vmulss-2.c | 71 ++++ .../gcc.target/i386/avx512f-vrcp14sd-1.c | 7 +- .../gcc.target/i386/avx512f-vrcp14sd-2.c | 17 +- .../gcc.target/i386/avx512f-vrcp14ss-1.c | 7 +- .../gcc.target/i386/avx512f-vrcp14ss-2.c | 20 +- gcc/testsuite/gcc.target/i386/sse-13.c | 8 + gcc/testsuite/gcc.target/i386/sse-14.c | 16 + gcc/testsuite/gcc.target/i386/sse-23.c | 8 + gcc/testsuite/gcc.target/i386/testround-1.c | 40 ++ 34 files changed, 1317 insertions(+), 28 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vdivsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vdivss-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmulsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vmulss-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b6daae49b37..ce8351445bf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,64 @@ +2017-05-10 Sebastian Peryt + + * config/i386/avx512fintrin.h (_mm_mask_max_round_sd) + (_mm_maskz_max_round_sd, _mm_mask_max_round_ss) + (_mm_maskz_max_round_ss, _mm_mask_min_round_sd) + (_mm_maskz_min_round_sd, _mm_mask_min_round_ss) + (_mm_maskz_min_round_ss): New intrinsics. + * config/i386/i386-builtin-types.def (V2DF, V2DF, V2DF, V2DF, UQI, INT) + (V4SF, V4SF, V4SF, V4SF, UQI, INT): New function type aliases. + * config/i386/i386-builtin.def (__builtin_ia32_maxsd_mask_round) + (__builtin_ia32_maxss_mask_round, __builtin_ia32_minsd_mask_round) + (__builtin_ia32_minss_mask_round): New builtins. + * config/i386/i386.c (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) + (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): Handle new types. + * config/i386/sse.md (_vm3): + Rename to ... + (_vm3): ... this. + (v\t{%2, %1, %0|%0, %1, %2}): + Change to ... + (v\t{%2, %1, %0|%0, %1, %2}): + ... this. + +2017-05-10 Sebastian Peryt + + * config/i386/avx512fintrin.h (_mm_mask_mul_round_sd) + (_mm_maskz_mul_round_sd, _mm_mask_mul_round_ss) + (_mm_maskz_mul_round_ss, _mm_mask_div_round_sd) + (_mm_maskz_div_round_sd, _mm_mask_div_round_ss) + (_mm_maskz_div_round_ss, _mm_mask_mul_sd, _mm_maskz_mul_sd) + (_mm_mask_mul_ss, _mm_maskz_mul_ss, _mm_mask_div_sd) + (_mm_maskz_div_sd, _mm_mask_div_ss, _mm_maskz_div_ss): New intrinsics. + * config/i386/i386-builtin-types.def (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) + (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): New function type aliases. + * config/i386/i386-builtin.def (__builtin_ia32_divsd_mask_round) + (__builtin_ia32_divss_mask_round, __builtin_ia32_mulsd_mask_round) + (__builtin_ia32_mulss_mask_round): New builtins. + * config/i386/i386.c (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) + (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): Handle new types. + * config/i386/sse.md (_vm3): + Rename to ... + (_vm3): ... this. + (v\t{%2, %1, %0|%0, %1, %2}): + Change to ... + (v\t{%2, %1, %0|%0, %1, %2}): + ... this. + +2017-05-10 Julia Koval + + * config/i386/avxintrin.h (_mm256_set_m128, _mm256_set_m128d) + (_mm256_set_m128i, _mm256_setr_m128, _mm256_setr_m128d) + (_mm256_setr_m128i): New intrinsics. + +2017-05-10 Julia Koval + + * config/i386/avx512fintrin.h (_mm_mask_rcp14_sd) + (_mm_maskz_rcp14_sd, _mm_mask_rcp14_ss) + (_mm_maskz_rcp14_ss): New intrinsics. + * config/i386/i386-builtin.def (__builtin_ia32_rcp14sd_mask) + (__builtin_ia32_rcp14ss_mask): New builtins. + * config/i386/sse.md (srcp14_mask): New pattern. + 2017-05-10 Peter Bergner PR tree-optimization/51513 diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 15573c57409..7ac7cb6e836 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -1729,6 +1729,26 @@ _mm_rcp14_sd (__m128d __A, __m128d __B) (__v2df) __A); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B, + (__v2df) __A, + (__v2df) __W, + (__mmask8) __U); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B, + (__v2df) __A, + (__v2df) _mm_setzero_ps (), + (__mmask8) __U); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp14_ss (__m128 __A, __m128 __B) @@ -1737,6 +1757,26 @@ _mm_rcp14_ss (__m128 __A, __m128 __B) (__v4sf) __A); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B, + (__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B, + (__v4sf) __A, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U); +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_rsqrt14_pd (__m512d __A) @@ -2520,6 +2560,29 @@ _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R) __R); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, const int __R) +{ + return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R) @@ -2529,6 +2592,29 @@ _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R) __R); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_round_sd (__m128d __A, __m128d __B, const int __R) @@ -2538,6 +2624,29 @@ _mm_div_round_sd (__m128d __A, __m128d __B, const int __R) __R); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, const int __R) +{ + return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_round_ss (__m128 __A, __m128 __B, const int __R) @@ -2547,6 +2656,29 @@ _mm_div_round_ss (__m128 __A, __m128 __B, const int __R) __R); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); +} + #else #define _mm512_mul_round_pd(A, B, C) \ (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) @@ -2587,14 +2719,39 @@ _mm_div_round_ss (__m128 __A, __m128 __B, const int __R) #define _mm_mul_round_sd(A, B, C) \ (__m128d)__builtin_ia32_mulsd_round(A, B, C) +#define _mm_mask_mul_round_sd(W, U, A, B, C) \ + (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C) + +#define _mm_maskz_mul_round_sd(U, A, B, C) \ + (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) + #define _mm_mul_round_ss(A, B, C) \ (__m128)__builtin_ia32_mulss_round(A, B, C) +#define _mm_mask_mul_round_ss(W, U, A, B, C) \ + (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C) + +#define _mm_maskz_mul_round_ss(U, A, B, C) \ + (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) + #define _mm_div_round_sd(A, B, C) \ (__m128d)__builtin_ia32_divsd_round(A, B, C) +#define _mm_mask_div_round_sd(W, U, A, B, C) \ + (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C) + +#define _mm_maskz_div_round_sd(U, A, B, C) \ + (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) + #define _mm_div_round_ss(A, B, C) \ (__m128)__builtin_ia32_divss_round(A, B, C) + +#define _mm_mask_div_round_ss(W, U, A, B, C) \ + (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C) + +#define _mm_maskz_div_round_ss(U, A, B, C) \ + (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) + #endif #ifdef __OPTIMIZE__ @@ -10657,6 +10814,29 @@ _mm_max_round_sd (__m128d __A, __m128d __B, const int __R) __R); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, const int __R) +{ + return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_round_ss (__m128 __A, __m128 __B, const int __R) @@ -10666,6 +10846,29 @@ _mm_max_round_ss (__m128 __A, __m128 __B, const int __R) __R); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_round_sd (__m128d __A, __m128d __B, const int __R) @@ -10675,6 +10878,29 @@ _mm_min_round_sd (__m128d __A, __m128d __B, const int __R) __R); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, const int __R) +{ + return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_round_ss (__m128 __A, __m128 __B, const int __R) @@ -10684,18 +10910,66 @@ _mm_min_round_ss (__m128 __A, __m128 __B, const int __R) __R); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); +} + #else #define _mm_max_round_sd(A, B, C) \ - (__m128d)__builtin_ia32_addsd_round(A, B, C) + (__m128d)__builtin_ia32_maxsd_round(A, B, C) + +#define _mm_mask_max_round_sd(W, U, A, B, C) \ + (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C) + +#define _mm_maskz_max_round_sd(U, A, B, C) \ + (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) #define _mm_max_round_ss(A, B, C) \ - (__m128)__builtin_ia32_addss_round(A, B, C) + (__m128)__builtin_ia32_maxss_round(A, B, C) + +#define _mm_mask_max_round_ss(W, U, A, B, C) \ + (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C) + +#define _mm_maskz_max_round_ss(U, A, B, C) \ + (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) #define _mm_min_round_sd(A, B, C) \ - (__m128d)__builtin_ia32_subsd_round(A, B, C) + (__m128d)__builtin_ia32_minsd_round(A, B, C) + +#define _mm_mask_min_round_sd(W, U, A, B, C) \ + (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C) + +#define _mm_maskz_min_round_sd(U, A, B, C) \ + (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) #define _mm_min_round_ss(A, B, C) \ - (__m128)__builtin_ia32_subss_round(A, B, C) + (__m128)__builtin_ia32_minss_round(A, B, C) + +#define _mm_mask_min_round_ss(W, U, A, B, C) \ + (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C) + +#define _mm_maskz_min_round_ss(U, A, B, C) \ + (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) + #endif extern __inline __m512d @@ -11197,6 +11471,54 @@ _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B) +{ + return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B) +{ + return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_div_pd (__m512d __M, __m512d __V) @@ -11257,6 +11579,54 @@ _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B) +{ + return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B) +{ + return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_max_pd (__m512d __A, __m512d __B) diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 20c5a078a9a..678368c7d78 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -1484,6 +1484,42 @@ _mm256_castsi128_si256 (__m128i __A) return (__m256i) __builtin_ia32_si256_si ((__v4si)__A); } +extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_set_m128 ( __m128 __H, __m128 __L) +{ + return _mm256_insertf128_ps (_mm256_castps128_ps256 (__L), __H, 1); +} + +extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_set_m128d (__m128d __H, __m128d __L) +{ + return _mm256_insertf128_pd (_mm256_castpd128_pd256 (__L), __H, 1); +} + +extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_set_m128i (__m128i __H, __m128i __L) +{ + return _mm256_insertf128_si256 (_mm256_castsi128_si256 (__L), __H, 1); +} + +extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_setr_m128 (__m128 __L, __m128 __H) +{ + return _mm256_set_m128 (__H, __L); +} + +extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_setr_m128d (__m128d __L, __m128d __H) +{ + return _mm256_set_m128d (__H, __L); +} + +extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_setr_m128i (__m128i __L, __m128i __H) +{ + return _mm256_set_m128i (__H, __L); +} + #ifdef __DISABLE_AVX__ #undef __DISABLE_AVX__ #pragma GCC pop_options diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 5556f595477..80ee7e10d9d 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1371,7 +1371,9 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df_mask, "__builtin_ia32_rcp14sd_mask", IX86_BUILTIN_RCP14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14ss_mask", IX86_BUILTIN_RCP14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) @@ -2420,7 +2422,9 @@ BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_rou BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_mask_round, "__builtin_ia32_divsd_mask_round", IX86_BUILTIN_DIVSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_mask_round, "__builtin_ia32_divss_mask_round", IX86_BUILTIN_DIVSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT) @@ -2440,15 +2444,21 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask_round", IX86_BUILTIN_MAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask_round", IX86_BUILTIN_MAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask_round", IX86_BUILTIN_MINSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask_round", IX86_BUILTIN_MINSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_mask_round, "__builtin_ia32_mulsd_mask_round", IX86_BUILTIN_MULSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask_round", IX86_BUILTIN_MULSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 98104e4d781..a118fcea520 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1608,7 +1608,7 @@ (set_attr "btver2_decode" "direct,double") (set_attr "mode" "")]) -(define_insn "_vm3" +(define_insn "_vm3" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (multdiv:VF_128 @@ -1619,7 +1619,7 @@ "TARGET_SSE" "@ \t{%2, %0|%0, %2} - v\t{%2, %1, %0|%0, %1, %2}" + v\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sse") (set_attr "prefix" "") @@ -1721,6 +1721,23 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_insn "srcp14_mask" + [(set (match_operand:VF_128 0 "register_operand" "=v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (unspec:VF_128 + [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] + UNSPEC_RCP14) + (match_operand:VF_128 3 "vector_move_operand" "0C") + (match_operand: 4 "register_operand" "Yk")) + (match_operand:VF_128 2 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512F" + "vrcp14\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + (define_expand "sqrt2" [(set (match_operand:VF2 0 "register_operand") (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))] @@ -1910,7 +1927,7 @@ (set_attr "prefix" "") (set_attr "mode" "")]) -(define_insn "_vm3" +(define_insn "_vm3" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (smaxmin:VF_128 @@ -1921,7 +1938,7 @@ "TARGET_SSE" "@ \t{%2, %0|%0, %2} - v\t{%2, %1, %0|%0, %1, %2}" + v\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sse") (set_attr "btver2_sse_attr" "maxmin") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c552675cc31..57545d795f0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,70 @@ +2017-05-10 Sebastian Peryt + + * gcc.target/i386/avx512f-vmaxsd-1.c (_mm_mask_max_round_sd) + (_mm_maskz_max_round_sd): Test new intrinsics. + * gcc.target/i386/avx512f-vmaxsd-2.c: New. + * gcc.target/i386/avx512f-vmaxss-1.c (_mm_mask_max_round_ss) + (_mm_maskz_max_round_ss): Test new intrinsics. + * gcc.target/i386/avx512f-vmaxss-2.c: New. + * gcc.target/i386/avx512f-vminsd-1.c (_mm_mask_min_round_sd) + (_mm_maskz_min_round_sd): Test new intrinsics. + * gcc.target/i386/avx512f-vminsd-2.c: New. + * gcc.target/i386/avx512f-vminss-1.c (_mm_mask_min_round_ss) + (_mm_maskz_min_round_ss): Test new intrinsics. + * gcc.target/i386/avx512f-vminss-2.c: New. + * gcc.target/i386/avx-1.c (__builtin_ia32_maxsd_mask_round) + (__builtin_ia32_maxss_mask_round, __builtin_ia32_minsd_mask_round) + (__builtin_ia32_minss_mask_round): Test new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/sse-14.c (_mm_maskz_max_round_sd) + (_mm_maskz_max_round_ss, _mm_maskz_min_round_sd) + (_mm_maskz_min_round_ss, _mm_mask_max_round_sd) + (_mm_mask_max_round_ss, _mm_mask_min_round_sd) + (_mm_mask_min_round_ss): Test new intrinsics. + * gcc.target/i386/testround-1.c: Ditto. + +2017-05-10 Sebastian Peryt + + * gcc.target/i386/avx512f-vdivsd-1.c (_mm_mask_div_sd) + (_mm_maskz_div_sd, _mm_mask_div_round_sd) + (_mm_maskz_div_round_sd): Test new intrinsics. + * gcc.target/i386/avx512f-vdivsd-2.c: New. + * gcc.target/i386/avx512f-vdivss-1.c (_mm_mask_div_ss) + (_mm_maskz_div_ss, _mm_mask_div_round_ss) + (_mm_maskz_div_round_ss): Test new intrinsics. + * gcc.target/i386/avx512f-vdivss-2.c: New. + * gcc.target/i386/avx512f-vmulsd-1.c (_mm_mask_mul_sd) + (_mm_maskz_mul_sd, _mm_mask_mul_round_sd) + (_mm_maskz_mul_round_sd): Test new intrinsics. + * gcc.target/i386/avx512f-vmulsd-2.c: New. + * gcc.target/i386/avx512f-vmulss-1.c (_mm_mask_mul_ss) + (_mm_maskz_mul_ss, _mm_mask_mul_round_ss) + (_mm_maskz_mul_round_ss): Test new intrinsics. + * gcc.target/i386/avx512f-vmulss-2.c: New. + * gcc.target/i386/avx-1.c (__builtin_ia32_divsd_mask_round) + (__builtin_ia32_divss_mask_round, __builtin_ia32_mulsd_mask_round) + (__builtin_ia32_mulss_mask_round): Test new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/sse-14.c (_mm_maskz_div_round_sd) + (_mm_maskz_div_round_ss, _mm_maskz_mul_round_sd) + (_mm_maskz_mul_round_ss): Test new intrinsics. + * gcc.target/i386/testround-1.c: Ditto. + +2017-05-10 Julia Koval + + * gcc.target/i386/avx-vinsertf128-256-1: Test new intrinsics. + * gcc.target/i386/avx-vinsertf128-256-2: Ditto. + * gcc.target/i386/avx-vinsertf128-256-3: Ditto. + +2017-05-10 Julia Koval + + * gcc.target/i386/avx512f-vrcp14sd-1.c: Test new intrinsics. + * gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto. + * gcc.target/i386/avx512f-vrcp14ss-1.c: Ditto. + * gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto. + 2017-05-10 Peter Bergner PR tree-optimization/51513 diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index e5fe9331c29..8377555f815 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -207,7 +207,9 @@ #define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 8) #define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 8) #define __builtin_ia32_divsd_round(A, B, C) __builtin_ia32_divsd_round(A, B, 8) +#define __builtin_ia32_divsd_mask_round(A, B, C, D, E) __builtin_ia32_divsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_divss_round(A, B, C) __builtin_ia32_divss_round(A, B, 8) +#define __builtin_ia32_divss_mask_round(A, B, C, D, E) __builtin_ia32_divss_mask_round(A, B, C, D, 8) #define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D) #define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D) #define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D) @@ -243,15 +245,21 @@ #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 8) #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 8) #define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4) +#define __builtin_ia32_maxsd_mask_round(A, B, C, D, E) __builtin_ia32_maxsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4) +#define __builtin_ia32_maxss_mask_round(A, B, C, D, E) __builtin_ia32_maxss_mask_round(A, B, C, D, 8) #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 8) #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 8) #define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4) +#define __builtin_ia32_minsd_mask_round(A, B, C, D, E) __builtin_ia32_minsd_mask_round(A, B, C, D, 4) #define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4) +#define __builtin_ia32_minss_mask_round(A, B, C, D, E) __builtin_ia32_minss_mask_round(A, B, C, D, 4) #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 8) #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 8) #define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 8) +#define __builtin_ia32_mulsd_mask_round(A, B, C, D, E) __builtin_ia32_mulsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_mulss_round(A, B, C) __builtin_ia32_mulss_round(A, B, 8) +#define __builtin_ia32_mulss_mask_round(A, B, C, D, E) __builtin_ia32_mulss_mask_round(A, B, C, D, 8) #define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D) #define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D) #define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D) diff --git a/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-1.c b/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-1.c index 2390e5c7e96..a255c47826d 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-1.c @@ -16,8 +16,8 @@ void static avx_test (void) { int i; - union256d u, s1; - union128d s2; + union256d u, u2, u3, s1; + union128d s2, s3; double e [4]; s1.x = _mm256_set_pd (2134.3343,1234.635654,453.345635,54646.464356); @@ -29,7 +29,23 @@ avx_test (void) for (i=0; i < 2; i++) e[i + (OFFSET * 2)] = s2.a[i]; - + if (check_union256d (u, e)) abort (); + + s3.x = _mm_set_pd (435345.43535, 23235.316545); + u2.x = _mm256_set_m128d(s3.x, s2.x); + u3.x = _mm256_setr_m128d(s2.x, s3.x); + + for (i = 0; i < 2; i++) + e[i] = s2.a[i]; + + for (i = 0; i < 2; i++) + e[i + 2] = s3.a[i]; + + if (check_union256d (u2, e)) + abort (); + + if (check_union256d (u3, e)) + abort (); } diff --git a/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-2.c b/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-2.c index ce0b23bbf4e..edae309dd00 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-2.c +++ b/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-2.c @@ -16,8 +16,8 @@ void static avx_test (void) { int i; - union256 u, s1; - union128 s2; + union256 u, u2, u3, s1; + union128 s2, s3; float e [8]; s1.x = _mm256_set_ps (39.467, 45.789, 78.342, 67.892, 76.678, 12.963, 29.746, 24.753); @@ -32,4 +32,20 @@ avx_test (void) if (check_union256 (u, e)) abort (); + + s3.x = _mm_set_ps (435.435, 25.35, 324.76, 32.11); + u2.x = _mm256_set_m128(s3.x, s2.x); + u3.x = _mm256_setr_m128(s2.x, s3.x); + + for (i = 0; i < 4; i++) + e[i] = s2.a[i]; + + for (i = 0; i < 4; i++) + e[i + 4] = s3.a[i]; + + if (check_union256 (u2, e)) + abort (); + + if (check_union256 (u3, e)) + abort (); } diff --git a/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-3.c b/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-3.c index 89834d55408..33986929e8d 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-3.c +++ b/gcc/testsuite/gcc.target/i386/avx-vinsertf128-256-3.c @@ -16,8 +16,8 @@ void static avx_test (void) { int i; - union256i_d u, s1; - union128i_d s2; + union256i_d u, u2, u3, s1; + union128i_d s2, s3; int e [8]; s1.x = _mm256_set_epi32 (39467, 45789, 78342, 67892, 76678, 12963, 29746, 24753); @@ -32,4 +32,20 @@ avx_test (void) if (check_union256i_d (u, e)) abort (); + + s3.x = _mm_set_epi32 (43534, 23235, 6545, 11); + u2.x = _mm256_set_m128i(s3.x, s2.x); + u3.x = _mm256_setr_m128i(s2.x, s3.x); + + for (i = 0; i < 4; i++) + e[i] = s2.a[i]; + + for (i = 0; i < 4; i++) + e[i + 4] = s3.a[i]; + + if (check_union256i_d (u2, e)) + abort (); + + if (check_union256i_d (u3, e)) + abort (); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c index aeac9d0dbef..59262550a54 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c @@ -1,14 +1,22 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128d x1, x2; +volatile __m128d x1, x2, x3; volatile __mmask8 m; void extern avx512f_test (void) { + x1 = _mm_mask_div_sd (x1, m, x2, x3); + x1 = _mm_maskz_div_sd (m, x1, x2); x1 = _mm_div_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_div_round_sd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_div_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-2.c new file mode 100644 index 00000000000..5aa1617b213 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-2.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_div (double *r, double *s1, double *s2) +{ + r[0] = s1[0] / s2[0]; + r[1] = s1[1]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, res4, res5, src1, src2; + MASK_TYPE mask = MASK_VALUE; + double res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_mask_div_sd (res1.x, mask, src1.x, src2.x); + res2.x = _mm_maskz_div_sd (mask, src1.x, src2.x); + res3.x = _mm_div_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); + res4.x = _mm_mask_div_round_sd (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res5.x = _mm_maskz_div_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_div (res_ref, src1.a, src2.a); + + MASK_MERGE (d) (res_ref, mask, 1); + if (check_union128d (res1, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, 1); + if (check_union128d (res2, res_ref)) + abort (); + + calc_div (res_ref, src1.a, src2.a); + + if (check_union128d (res3, res_ref)) + abort(); + + MASK_MERGE (d) (res_ref, mask, 1); + if (check_union128d (res4, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, 1); + if (check_union128d (res5, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c index 1e4c9cb0f39..7468ac1d01e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c @@ -1,13 +1,22 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128 x1, x2; +volatile __m128 x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { + x1 = _mm_mask_div_ss (x1, m, x2, x3); + x1 = _mm_maskz_div_ss (m, x1, x2); x1 = _mm_div_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_div_round_ss (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_div_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-2.c new file mode 100644 index 00000000000..577208430d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-2.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_div (float *r, float *s1, float *s2) +{ + r[0] = s1[0] / s2[0]; + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, res4, res5, src1, src2; + MASK_TYPE mask = MASK_VALUE; + float res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_mask_div_ss (res1.x, mask, src1.x, src2.x); + res2.x = _mm_maskz_div_ss (mask, src1.x, src2.x); + res3.x = _mm_div_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); + res4.x = _mm_mask_div_round_ss (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res5.x = _mm_maskz_div_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_div (res_ref, src1.a, src2.a); + + MASK_MERGE () (res_ref, mask, 1); + if (check_union128 (res1, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, 1); + if (check_union128 (res2, res_ref)) + abort (); + + calc_div (res_ref, src1.a, src2.a); + + if (check_union128 (res3, res_ref)) + abort(); + + MASK_MERGE () (res_ref, mask, 1); + if (check_union128 (res4, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, 1); + if (check_union128 (res5, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c index 20a0a3d944c..c1c8f8d9fc6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c @@ -1,13 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128d x1, x2; +volatile __m128d x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_max_round_sd (x1, x2, _MM_FROUND_NO_EXC); + x1 = _mm_mask_max_round_sd (x1, m, x2, x3, _MM_FROUND_NO_EXC); + x1 = _mm_maskz_max_round_sd (m, x1, x2, _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c new file mode 100644 index 00000000000..29db77723b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_max (double *r, double *s1, double *s2) +{ + r[0] = s1[0] > s2[0] ? s1[0] : s2[0]; + r[1] = s1[1]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + double res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + res1.x = _mm_max_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); + res2.x = _mm_mask_max_round_sd (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res3.x = _mm_maskz_max_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_max (res_ref, src1.a, src2.a); + + if (check_union128d (res1, res_ref)) + abort(); + + MASK_MERGE (d) (res_ref, mask, 1); + if (check_union128d (res2, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, 1); + if (check_union128d (res3, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c index 0c692aa596e..ecd87578252 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c @@ -1,13 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128 x1, x2; +volatile __m128 x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_max_round_ss (x1, x2, _MM_FROUND_NO_EXC); + x1 = _mm_mask_max_round_ss (x1, m, x2, x3, _MM_FROUND_NO_EXC); + x1 = _mm_maskz_max_round_ss (m, x1, x2, _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c new file mode 100644 index 00000000000..1932a6e5c80 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c @@ -0,0 +1,58 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_max (float *r, float *s1, float *s2) +{ + r[0] = s1[0] > s2[0] ? s1[0] : s2[0]; + int i; + for (i = 1; i < SIZE; i++) + { + r[i] = s1[i]; + } +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + float res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + res1.x = _mm_max_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); + res2.x = _mm_mask_max_round_ss (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res3.x = _mm_maskz_max_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_max (res_ref, src1.a, src2.a); + + if (check_union128 (res1, res_ref)) + abort(); + + MASK_MERGE () (res_ref, mask, 1); + if (check_union128 (res2, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, 1); + if (check_union128 (res3, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c index 29c094890b1..22ada8776ad 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c @@ -1,13 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128d x1, x2; +volatile __m128d x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_min_round_sd (x1, x2, _MM_FROUND_NO_EXC); + x1 = _mm_mask_min_round_sd (x1, m, x2, x3, _MM_FROUND_NO_EXC); + x1 = _mm_maskz_min_round_sd (m, x1, x2, _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c new file mode 100644 index 00000000000..79a051fb9c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_min (double *r, double *s1, double *s2) +{ + r[0] = s1[0] < s2[0] ? s1[0] : s2[0]; + r[1] = s1[1]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + double res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + res1.x = _mm_min_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); + res2.x = _mm_mask_min_round_sd (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res3.x = _mm_maskz_min_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_min (res_ref, src1.a, src2.a); + + if (check_union128d (res1, res_ref)) + abort(); + + MASK_MERGE (d) (res_ref, mask, 1); + if (check_union128d (res2, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, 1); + if (check_union128d (res3, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c index 62a4d3888e1..5703a88d7da 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c @@ -1,13 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128 x1, x2; +volatile __m128 x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_min_round_ss (x1, x2, _MM_FROUND_NO_EXC); + x1 = _mm_mask_min_round_ss (x1, m, x2, x3, _MM_FROUND_NO_EXC); + x1 = _mm_maskz_min_round_ss (m, x1, x2, _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c new file mode 100644 index 00000000000..ad1fcb937fe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c @@ -0,0 +1,58 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_min (float *r, float *s1, float *s2) +{ + r[0] = s1[0] < s2[0] ? s1[0] : s2[0]; + int i; + for (i = 1; i < SIZE; i++) + { + r[i] = s1[i]; + } +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + float res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + res1.x = _mm_min_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); + res2.x = _mm_mask_min_round_ss (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res3.x = _mm_maskz_min_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_min (res_ref, src1.a, src2.a); + + if (check_union128 (res1, res_ref)) + abort(); + + MASK_MERGE () (res_ref, mask, 1); + if (check_union128 (res2, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, 1); + if (check_union128 (res3, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c index df7101f3655..33bdce1f148 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c @@ -1,13 +1,22 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128d x1, x2; +volatile __m128d x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { + x1 = _mm_mask_mul_sd (x1, m, x2, x3); + x1 = _mm_maskz_mul_sd (m, x1, x2); x1 = _mm_mul_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_mul_round_sd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_mul_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-2.c new file mode 100644 index 00000000000..9a139e642de --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-2.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_mul (double *r, double *s1, double *s2) +{ + r[0] = s1[0] * s2[0]; + r[1] = s1[1]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, res4, res5, src1, src2; + MASK_TYPE mask = MASK_VALUE; + double res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_mask_mul_sd (res1.x, mask, src1.x, src2.x); + res2.x = _mm_maskz_mul_sd (mask, src1.x, src2.x); + res3.x = _mm_mul_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); + res4.x = _mm_mask_mul_round_sd (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res5.x = _mm_maskz_mul_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_mul (res_ref, src1.a, src2.a); + + MASK_MERGE (d) (res_ref, mask, 1); + if (check_union128d (res1, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, 1); + if (check_union128d (res2, res_ref)) + abort (); + + calc_mul (res_ref, src1.a, src2.a); + + if (check_union128d (res3, res_ref)) + abort(); + + MASK_MERGE (d) (res_ref, mask, 1); + if (check_union128d (res4, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, 1); + if (check_union128d (res5, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c index 7dd33ec32e0..a869d967b09 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c @@ -1,13 +1,22 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128 x1, x2; +volatile __m128 x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { + x1 = _mm_mask_mul_ss (x1, m, x2, x3); + x1 = _mm_maskz_mul_ss (m, x1, x2); x1 = _mm_mul_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_mul_round_ss (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_mul_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-2.c new file mode 100644 index 00000000000..29dc3bf367d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-2.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_mul (float *r, float *s1, float *s2) +{ + r[0] = s1[0] * s2[0]; + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, res4, res5, src1, src2; + MASK_TYPE mask = MASK_VALUE; + float res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_mask_mul_ss (res1.x, mask, src1.x, src2.x); + res2.x = _mm_maskz_mul_ss (mask, src1.x, src2.x); + res3.x = _mm_mul_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); + res4.x = _mm_mask_mul_round_ss (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res5.x = _mm_maskz_mul_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_mul (res_ref, src1.a, src2.a); + + MASK_MERGE () (res_ref, mask, 1); + if (check_union128 (res1, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, 1); + if (check_union128 (res2, res_ref)) + abort (); + + calc_mul (res_ref, src1.a, src2.a); + + if (check_union128 (res3, res_ref)) + abort(); + + MASK_MERGE () (res_ref, mask, 1); + if (check_union128 (res4, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, 1); + if (check_union128 (res5, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c index 4ee3814b0c5..7b322f2d75c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c @@ -1,13 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vrcp14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrcp14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrcp14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128d x1, x2; +volatile __m128d x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_rcp14_sd (x1, x2); + x1 = _mm_mask_rcp14_sd (x3, m, x1, x2); + x1 = _mm_maskz_rcp14_sd (m, x1, x2); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c index cb405c38b08..1059c87105b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c @@ -3,6 +3,7 @@ /* { dg-require-effective-target avx512f } */ #include "avx512f-check.h" +#include "avx512f-helper.h" static void compute_vrcp14sd (double *s1, double *s2, double *r) @@ -14,7 +15,8 @@ compute_vrcp14sd (double *s1, double *s2, double *r) static void avx512f_test (void) { - union128d s1, s2, res1; + union128d s1, s2, res1, res2, res3; + __mmask8 m = 1; double res_ref[2]; s1.x = _mm_set_pd (-3.0, 111.111); @@ -26,4 +28,17 @@ avx512f_test (void) if (checkVd (res1.a, res_ref, 2)) abort (); + + res2.x = _mm_set_pd (-4.0, DEFAULT_VALUE); + res2.x = _mm_mask_rcp14_sd(res2.x, m, s1.x, s2.x); + + MASK_MERGE (d) (res_ref, m, 1); + if (checkVd (res2.a, res_ref, 2)) + abort(); + + res3.x = _mm_maskz_rcp14_sd(m, s1.x, s2.x); + + MASK_ZERO (d) (res_ref, m, 1); + if (checkVd (res3.a, res_ref, 2)) + abort(); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c index cec43866bc6..2edb4617e88 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c @@ -1,13 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vrcp14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrcp14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrcp14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128 x1, x2; +volatile __m128 x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_rcp14_ss (x1, x2); + x1 = _mm_mask_rcp14_ss (x3, m, x1, x2); + x1 = _mm_maskz_rcp14_ss (m, x1, x2); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c index 40025896a28..6cf22967182 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c @@ -3,6 +3,7 @@ /* { dg-require-effective-target avx512f } */ #include "avx512f-check.h" +#include "avx512f-helper.h" static void compute_vrcp14ss (float *s1, float *s2, float *r) @@ -16,16 +17,31 @@ compute_vrcp14ss (float *s1, float *s2, float *r) static void avx512f_test (void) { - union128 s1, s2, res1; + union128 s1, s2, res1, res2, res3; float res_ref[4]; + __mmask8 m = 0; s1.x = _mm_set_ps (-24.043, 68.346, -43.35, 546.46); s2.x = _mm_set_ps (222.222, 333.333, 444.444, -2.0); - res1.x = _mm_rcp14_ss (s1.x, s2.x); + res1.x = _mm_rcp14_ss (s1.x, s2.x); compute_vrcp14ss (s1.a, s2.a, res_ref); if (checkVf (res1.a, res_ref, 4)) abort (); + + res2.x = _mm_set_ps (5.0, 6.0, 7.0, DEFAULT_VALUE); + res2.x = _mm_mask_rcp14_ss(res2.x, m, s1.x, s2.x); + + MASK_MERGE () (res_ref, m, 1); + if (checkVf (res2.a, res_ref, 4)) + abort(); + + res3.x = _mm_maskz_rcp14_ss(m, s1.x, s2.x); + + MASK_ZERO () (res_ref, m, 1); + if (checkVf (res3.a, res_ref, 4)) + abort(); + } diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 319da22361f..79879d7cc16 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -224,7 +224,9 @@ #define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 8) #define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 8) #define __builtin_ia32_divsd_round(A, B, C) __builtin_ia32_divsd_round(A, B, 8) +#define __builtin_ia32_divsd_mask_round(A, B, C, D, E) __builtin_ia32_divsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_divss_round(A, B, C) __builtin_ia32_divss_round(A, B, 8) +#define __builtin_ia32_divss_mask_round(A, B, C, D, E) __builtin_ia32_divss_mask_round(A, B, C, D, 8) #define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D) #define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D) #define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D) @@ -260,15 +262,21 @@ #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 8) #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 8) #define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4) +#define __builtin_ia32_maxsd_mask_round(A, B, C, D, E) __builtin_ia32_maxsd_mask_round(A, B, C, D, 4) #define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4) +#define __builtin_ia32_maxss_mask_round(A, B, C, D, E) __builtin_ia32_maxss_mask_round(A, B, C, D, 4) #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 8) #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 8) #define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4) +#define __builtin_ia32_minsd_mask_round(A, B, C, D, E) __builtin_ia32_minsd_mask_round(A, B, C, D, 4) #define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4) +#define __builtin_ia32_minss_mask_round(A, B, C, D, E) __builtin_ia32_minss_mask_round(A, B, C, D, 4) #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 8) #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 8) #define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 8) +#define __builtin_ia32_mulsd_mask_round(A, B, C, D, E) __builtin_ia32_mulsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_mulss_round(A, B, C) __builtin_ia32_mulss_round(A, B, 8) +#define __builtin_ia32_mulss_mask_round(A, B, C, D, E) __builtin_ia32_mulss_mask_round(A, B, C, D, 8) #define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D) #define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D) #define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 4667998b0a4..547314aef07 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -383,17 +383,25 @@ test_3 (_mm_maskz_add_round_ss, __m128, __mmask8, __m128, __m128, 9) test_3 (_mm512_maskz_alignr_epi32, __m512i, __mmask16, __m512i, __m512i, 1) test_3 (_mm512_maskz_alignr_epi64, __m512i, __mmask8, __m512i, __m512i, 1) test_3 (_mm512_maskz_div_round_pd, __m512d, __mmask8, __m512d, __m512d, 9) +test_3 (_mm_maskz_div_round_sd, __m128d, __mmask8, __m128d, __m128d, 9) test_3 (_mm512_maskz_div_round_ps, __m512, __mmask16, __m512, __m512, 9) +test_3 (_mm_maskz_div_round_ss, __m128, __mmask8, __m128, __m128, 9) test_3 (_mm512_maskz_insertf32x4, __m512, __mmask16, __m512, __m128, 1) test_3 (_mm512_maskz_insertf64x4, __m512d, __mmask8, __m512d, __m256d, 1) test_3 (_mm512_maskz_inserti32x4, __m512i, __mmask16, __m512i, __m128i, 1) test_3 (_mm512_maskz_inserti64x4, __m512i, __mmask8, __m512i, __m256i, 1) test_3 (_mm512_maskz_max_round_pd, __m512d, __mmask8, __m512d, __m512d, 8) +test_3 (_mm_maskz_max_round_sd, __m128d, __mmask8, __m128d, __m128d, 8) test_3 (_mm512_maskz_max_round_ps, __m512, __mmask16, __m512, __m512, 8) +test_3 (_mm_maskz_max_round_ss, __m128, __mmask8, __m128, __m128, 8) test_3 (_mm512_maskz_min_round_pd, __m512d, __mmask8, __m512d, __m512d, 8) +test_3 (_mm_maskz_min_round_sd, __m128d, __mmask8, __m128d, __m128d, 8) test_3 (_mm512_maskz_min_round_ps, __m512, __mmask16, __m512, __m512, 8) +test_3 (_mm_maskz_min_round_ss, __m128, __mmask8, __m128, __m128, 8) test_3 (_mm512_maskz_mul_round_pd, __m512d, __mmask8, __m512d, __m512d, 9) +test_3 (_mm_maskz_mul_round_sd, __m128d, __mmask8, __m128d, __m128d, 9) test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 9) +test_3 (_mm_maskz_mul_round_ss, __m128, __mmask8, __m128, __m128, 9) test_3 (_mm512_maskz_scalef_round_pd, __m512d, __mmask8, __m512d, __m512d, 9) test_3 (_mm512_maskz_scalef_round_ps, __m512, __mmask16, __m512, __m512, 9) test_3 (_mm512_maskz_shuffle_f32x4, __m512, __mmask16, __m512, __m512, 1) @@ -451,7 +459,9 @@ test_4 (_mm_mask_add_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) test_4 (_mm512_mask_alignr_epi32, __m512i, __m512i, __mmask16, __m512i, __m512i, 1) test_4 (_mm512_mask_alignr_epi64, __m512i, __m512i, __mmask8, __m512i, __m512i, 1) test_4 (_mm512_mask_div_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) +test_4 (_mm_mask_div_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) test_4 (_mm512_mask_div_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) +test_4 (_mm_mask_div_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) test_4 (_mm512_mask_fmadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) test_4 (_mm512_mask_fmadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) test_4 (_mm512_mask_fmaddsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) @@ -477,11 +487,17 @@ test_4 (_mm512_mask_insertf64x4, __m512d, __m512d, __mmask8, __m512d, __m256d, 1 test_4 (_mm512_mask_inserti32x4, __m512i, __m512i, __mmask16, __m512i, __m128i, 1) test_4 (_mm512_mask_inserti64x4, __m512i, __m512i, __mmask8, __m512i, __m256i, 1) test_4 (_mm512_mask_max_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 8) +test_4 (_mm_mask_max_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 8) test_4 (_mm512_mask_max_round_ps, __m512, __m512, __mmask16, __m512, __m512, 8) +test_4 (_mm_mask_max_round_ss, __m128, __m128, __mmask8, __m128, __m128, 8) test_4 (_mm512_mask_min_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 8) +test_4 (_mm_mask_min_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 8) test_4 (_mm512_mask_min_round_ps, __m512, __m512, __mmask16, __m512, __m512, 8) +test_4 (_mm_mask_min_round_ss, __m128, __m128, __mmask8, __m128, __m128, 8) test_4 (_mm512_mask_mul_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) +test_4 (_mm_mask_mul_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) +test_4 (_mm_mask_mul_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) test_4 (_mm512_mask_scalef_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) test_4 (_mm512_mask_scalef_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) test_4 (_mm512_mask_shuffle_f32x4, __m512, __m512, __mmask16, __m512, __m512, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 92daf96f369..96c663b64d3 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -225,7 +225,9 @@ #define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 8) #define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 8) #define __builtin_ia32_divsd_round(A, B, C) __builtin_ia32_divsd_round(A, B, 8) +#define __builtin_ia32_divsd_mask_round(A, B, C, D, E) __builtin_ia32_divsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_divss_round(A, B, C) __builtin_ia32_divss_round(A, B, 8) +#define __builtin_ia32_divss_mask_round(A, B, C, D, E) __builtin_ia32_divss_mask_round(A, B, C, D, 8) #define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D) #define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D) #define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D) @@ -261,15 +263,21 @@ #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 8) #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 8) #define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4) +#define __builtin_ia32_maxsd_mask_round(A, B, C, D, E) __builtin_ia32_maxsd_mask_round(A, B, C, D, 4) #define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4) +#define __builtin_ia32_maxss_mask_round(A, B, C, D, E) __builtin_ia32_maxss_mask_round(A, B, C, D, 4) #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 8) #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 8) #define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4) +#define __builtin_ia32_minsd_mask_round(A, B, C, D, E) __builtin_ia32_minsd_mask_round(A, B, C, D, 4) #define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4) +#define __builtin_ia32_minss_mask_round(A, B, C, D, E) __builtin_ia32_minss_mask_round(A, B, C, D, 4) #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 8) #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 8) #define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 8) +#define __builtin_ia32_mulsd_mask_round(A, B, C, D, E) __builtin_ia32_mulsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_mulss_round(A, B, C) __builtin_ia32_mulss_round(A, B, 8) +#define __builtin_ia32_mulss_mask_round(A, B, C, D, E) __builtin_ia32_mulss_mask_round(A, B, C, D, 8) #define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D) #define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D) #define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D) diff --git a/gcc/testsuite/gcc.target/i386/testround-1.c b/gcc/testsuite/gcc.target/i386/testround-1.c index 8cc019b32a4..2c1338164f5 100644 --- a/gcc/testsuite/gcc.target/i386/testround-1.c +++ b/gcc/testsuite/gcc.target/i386/testround-1.c @@ -67,9 +67,17 @@ test_round (void) m512 = _mm512_mask_div_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_maskz_div_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */ m128d = _mm_mul_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_mul_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_mul_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ m128 = _mm_mul_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_mul_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_mul_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ m128d = _mm_div_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_div_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_div_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ m128 = _mm_div_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_div_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_div_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_scalef_round_pd(m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_mask_scalef_round_pd(m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */ @@ -179,12 +187,24 @@ test_round (void) m512 = _mm512_max_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_mask_max_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_maskz_max_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_max_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_max_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_max_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_max_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_max_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_max_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_min_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_mask_min_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_maskz_min_round_pd (mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_min_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_mask_min_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_maskz_min_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_min_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_min_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_min_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_min_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_min_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_min_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ m256i = _mm512_cvtt_roundpd_epi32 (m512d, 7); /* { dg-error "incorrect rounding operand" } */ m256i = _mm512_mask_cvtt_roundpd_epi32 (m256i, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand" } */ @@ -318,9 +338,17 @@ test_round_sae (void) m512 = _mm512_mask_div_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_maskz_div_round_ps (mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand" } */ m128d = _mm_mul_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_mul_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_mul_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ m128 = _mm_mul_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_mul_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_mul_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ m128d = _mm_div_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_div_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_div_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ m128 = _mm_div_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_div_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_div_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_scalef_round_pd(m512d, m512d, 5); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_mask_scalef_round_pd(m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand" } */ @@ -434,12 +462,24 @@ test_sae_only (void) m512 = _mm512_max_round_ps (m512, m512, 3); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_mask_max_round_ps (m512, mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_maskz_max_round_ps (mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_max_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_max_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_max_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_max_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_max_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_max_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_min_round_pd (m512d, m512d, 3); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_mask_min_round_pd (m512d, mmask8, m512d, m512d, 3); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_maskz_min_round_pd (mmask8, m512d, m512d, 3); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_min_round_ps (m512, m512, 3); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_mask_min_round_ps (m512, mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand" } */ m512 = _mm512_maskz_min_round_ps (mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_min_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_min_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_min_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_min_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_min_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_min_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ m256i = _mm512_cvtt_roundpd_epi32 (m512d, 3); /* { dg-error "incorrect rounding operand" } */ m256i = _mm512_mask_cvtt_roundpd_epi32 (m256i, mmask8, m512d, 3); /* { dg-error "incorrect rounding operand" } */ -- 2.30.2