From 1853f5c77f124d60d60fcc0859b742d22cd4d5c0 Mon Sep 17 00:00:00 2001 From: Sebastian Peryt Date: Tue, 2 May 2017 15:07:12 +0200 Subject: [PATCH] * config/i386/avx512fintrin.h (_mm_mask_add_round_sd) (_mm_maskz_add_round_sd, _mm_mask_add_round_ss) (mm_maskz_add_round_ss, _mm_mask_sub_round_sd) (mm_maskz_sub_round_sd, _mm_mask_sub_round_ss) (mm_maskz_sub_round_ss, _mm_mask_add_sd) (mm_maskz_add_sd, _mm_mask_add_ss, _mm_maskz_add_ss) (mm_mask_sub_sd, _mm_maskz_sub_sd, _mm_mask_sub_ss) (mm_maskz_sub_ss): New intrinsics. * config/i386/i386-builtin-types.def (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): New function type aliases. * config/i386/i386-builtin.def (__builtin_ia32_addsd_mask_round) (__builtin_ia32_addss_mask_round, __builtin_ia32_subsd_mask_round) (__builtin_ia32_subss_mask_round): New builtins. * config/i386/i386.c (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): Handle new types. * config/i386/sse.md (_vm3): Renamed to ... (_vm3): ... this. (v\t{%2, %1, %0|%0, %1, %2}): Changed to ... (v\t{%2, %1, %0|%0, %1, %2}): ... this. testsuite/ChangeLog: * gcc.target/i386/avx512f-vaddsd-1.c (_mm_mask_add_sd) (_mm_maskz_add_sd, _mm_mask_add_round_sd) (_mm_maskz_add_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vaddsd-2.c: New. * gcc.target/i386/avx512f-vaddss-1.c (_mm_mask_add_ss) (_mm_maskz_add_ss, _mm_mask_add_round_ss) (_mm_maskz_add_round_ss): Test new intrinsics. * gcc.target/i386/avx512f-vaddss-2.c: New. * gcc.target/i386/avx512f-vsubsd-1.c (_mm_mask_sub_sd) (_mm_maskz_sub_sd, _mm_mask_sub_round_sd) (_mm_maskz_sub_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vsubsd-2.c: New. * gcc.target/i386/avx512f-vsubss-1.c (_mm_mask_sub_ss) (_mm_maskz_sub_ss, _mm_mask_sub_round_ss) (_mm_maskz_sub_round_ss): Test new intrinsics. * gcc.target/i386/avx512f-vsubss-2.c: New. * gcc.target/i386/avx-1.c (__builtin_ia32_addsd_mask_round) (__builtin_ia32_addss_mask_round, __builtin_ia32_subsd_mask_round) (__builtin_ia32_subss_mask_round): Test new builtins. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/sse-14.c (_mm_maskz_add_round_sd) (_mm_maskz_add_round_ss, _mm_maskz_sub_round_sd) (_mm_maskz_sub_round_ss, _mm_mask_add_round_sd) (_mm_mask_add_round_ss, _mm_mask_sub_round_sd) (_mm_mask_sub_round_ss): Test new intrinsics. * gcc.target/i386/testround-1.c: Ditto. From-SVN: r247498 --- gcc/ChangeLog | 25 +++ gcc/config/i386/avx512fintrin.h | 209 ++++++++++++++++++ gcc/config/i386/i386-builtin-types.def | 2 + gcc/config/i386/i386-builtin.def | 4 + gcc/config/i386/i386.c | 2 + gcc/config/i386/sse.md | 4 +- gcc/testsuite/ChangeLog | 30 +++ gcc/testsuite/gcc.target/i386/avx-1.c | 4 + .../gcc.target/i386/avx512f-vaddsd-1.c | 11 +- .../gcc.target/i386/avx512f-vaddsd-2.c | 70 ++++++ .../gcc.target/i386/avx512f-vaddss-1.c | 11 +- .../gcc.target/i386/avx512f-vaddss-2.c | 70 ++++++ .../gcc.target/i386/avx512f-vsubsd-1.c | 11 +- .../gcc.target/i386/avx512f-vsubsd-2.c | 70 ++++++ .../gcc.target/i386/avx512f-vsubss-1.c | 11 +- .../gcc.target/i386/avx512f-vsubss-2.c | 70 ++++++ gcc/testsuite/gcc.target/i386/sse-13.c | 4 + gcc/testsuite/gcc.target/i386/sse-14.c | 8 + gcc/testsuite/gcc.target/i386/sse-23.c | 4 + gcc/testsuite/gcc.target/i386/testround-1.c | 16 ++ 20 files changed, 630 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vaddsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vaddss-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vsubsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vsubss-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 22c7c61e1f5..9ad9ec00fc2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2017-05-02 Sebastian Peryt + + * config/i386/avx512fintrin.h (_mm_mask_add_round_sd) + (_mm_maskz_add_round_sd, _mm_mask_add_round_ss) + (mm_maskz_add_round_ss, _mm_mask_sub_round_sd) + (mm_maskz_sub_round_sd, _mm_mask_sub_round_ss) + (mm_maskz_sub_round_ss, _mm_mask_add_sd) + (mm_maskz_add_sd, _mm_mask_add_ss, _mm_maskz_add_ss) + (mm_mask_sub_sd, _mm_maskz_sub_sd, _mm_mask_sub_ss) + (mm_maskz_sub_ss): New intrinsics. + * config/i386/i386-builtin-types.def (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) + (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): New function type aliases. + * config/i386/i386-builtin.def (__builtin_ia32_addsd_mask_round) + (__builtin_ia32_addss_mask_round, __builtin_ia32_subsd_mask_round) + (__builtin_ia32_subss_mask_round): New builtins. + * config/i386/i386.c (V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) + (V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT): Handle new types. + * config/i386/sse.md (_vm3): + Renamed to ... + (_vm3): ... this. + (v\t{%2, %1, %0|%0, %1, %2}): + Changed to ... + (v\t{%2, %1, %0|%0, %1, %2}): + ... this. + 2017-05-02 Martin Jambor PR tree-optimization/78687 diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index afc80d62d62..15573c57409 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -1415,6 +1415,29 @@ _mm_add_round_sd (__m128d __A, __m128d __B, const int __R) __R); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, const int __R) +{ + return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_round_ss (__m128 __A, __m128 __B, const int __R) @@ -1424,6 +1447,29 @@ _mm_add_round_ss (__m128 __A, __m128 __B, const int __R) __R); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) @@ -1433,6 +1479,29 @@ _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) __R); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, const int __R) +{ + return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) @@ -1442,18 +1511,66 @@ _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) __R); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); +} + #else #define _mm_add_round_sd(A, B, C) \ (__m128d)__builtin_ia32_addsd_round(A, B, C) +#define _mm_mask_add_round_sd(W, U, A, B, C) \ + (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C) + +#define _mm_maskz_add_round_sd(U, A, B, C) \ + (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) + #define _mm_add_round_ss(A, B, C) \ (__m128)__builtin_ia32_addss_round(A, B, C) +#define _mm_mask_add_round_ss(W, U, A, B, C) \ + (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C) + +#define _mm_maskz_add_round_ss(U, A, B, C) \ + (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) + #define _mm_sub_round_sd(A, B, C) \ (__m128d)__builtin_ia32_subsd_round(A, B, C) +#define _mm_mask_sub_round_sd(W, U, A, B, C) \ + (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C) + +#define _mm_maskz_sub_round_sd(U, A, B, C) \ + (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) + #define _mm_sub_round_ss(A, B, C) \ (__m128)__builtin_ia32_subss_round(A, B, C) + +#define _mm_mask_sub_round_ss(W, U, A, B, C) \ + (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C) + +#define _mm_maskz_sub_round_ss(U, A, B, C) \ + (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) + #endif #ifdef __OPTIMIZE__ @@ -10868,6 +10985,52 @@ _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_sub_pd (__m512d __A, __m512d __B) @@ -10928,6 +11091,52 @@ _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mul_pd (__m512d __A, __m512d __B) diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 1334633c65f..b3620edd106 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1004,6 +1004,8 @@ DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, QI, INT) DEF_FUNCTION_TYPE (V8SF, V8DI, V8SF, QI, INT) DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI, INT) DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, UQI, INT) +DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, UQI, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, UQI, INT) DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI, INT) DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT) DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 43b5b620e47..5556f595477 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2387,7 +2387,9 @@ BDESC_FIRST (round_args, ROUND_ARGS, OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask_round", IX86_BUILTIN_ADDSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask_round", IX86_BUILTIN_ADDSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT) @@ -2462,7 +2464,9 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask_round", IX86_BUILTIN_SUBSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_mask_round, "__builtin_ia32_subss_mask_round", IX86_BUILTIN_SUBSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7547539ea81..9c429de7f7e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -36183,6 +36183,8 @@ ix86_expand_round_builtin (const struct builtin_description *d, case INT_FTYPE_V2DF_V2DF_INT_INT: return ix86_expand_sse_comi_round (d, exp, target); case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT: + case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT: + case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT: case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index beb155d07da..98104e4d781 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1568,7 +1568,7 @@ (set_attr "prefix" "") (set_attr "mode" "")]) -(define_insn "_vm3" +(define_insn "_vm3" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (plusminus:VF_128 @@ -1579,7 +1579,7 @@ "TARGET_SSE" "@ \t{%2, %0|%0, %2} - v\t{%2, %1, %0|%0, %1, %2}" + v\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "prefix" "") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 81ecd07d6de..d47aa91110f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,33 @@ +2017-05-02 Sebastian Peryt + + * gcc.target/i386/avx512f-vaddsd-1.c (_mm_mask_add_sd) + (_mm_maskz_add_sd, _mm_mask_add_round_sd) + (_mm_maskz_add_round_sd): Test new intrinsics. + * gcc.target/i386/avx512f-vaddsd-2.c: New. + * gcc.target/i386/avx512f-vaddss-1.c (_mm_mask_add_ss) + (_mm_maskz_add_ss, _mm_mask_add_round_ss) + (_mm_maskz_add_round_ss): Test new intrinsics. + * gcc.target/i386/avx512f-vaddss-2.c: New. + * gcc.target/i386/avx512f-vsubsd-1.c (_mm_mask_sub_sd) + (_mm_maskz_sub_sd, _mm_mask_sub_round_sd) + (_mm_maskz_sub_round_sd): Test new intrinsics. + * gcc.target/i386/avx512f-vsubsd-2.c: New. + * gcc.target/i386/avx512f-vsubss-1.c (_mm_mask_sub_ss) + (_mm_maskz_sub_ss, _mm_mask_sub_round_ss) + (_mm_maskz_sub_round_ss): Test new intrinsics. + * gcc.target/i386/avx512f-vsubss-2.c: New. + * gcc.target/i386/avx-1.c (__builtin_ia32_addsd_mask_round) + (__builtin_ia32_addss_mask_round, __builtin_ia32_subsd_mask_round) + (__builtin_ia32_subss_mask_round): Test new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/sse-14.c (_mm_maskz_add_round_sd) + (_mm_maskz_add_round_ss, _mm_maskz_sub_round_sd) + (_mm_maskz_sub_round_ss, _mm_mask_add_round_sd) + (_mm_mask_add_round_ss, _mm_mask_sub_round_sd) + (_mm_mask_sub_round_ss): Test new intrinsics. + * gcc.target/i386/testround-1.c: Ditto. + 2017-05-02 Martin Jambor PR tree-optimization/78687 diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 2a0df232278..e5fe9331c29 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -173,7 +173,9 @@ #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8) #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8) #define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8) +#define __builtin_ia32_addsd_mask_round(A, B, C, D, E) __builtin_ia32_addsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_addss_round(A, B, C) __builtin_ia32_addss_round(A, B, 8) +#define __builtin_ia32_addss_mask_round(A, B, C, D, E) __builtin_ia32_addss_mask_round(A, B, C, D, 8) #define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E) #define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E) #define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D) @@ -296,7 +298,9 @@ #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 8) #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 8) #define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 8) +#define __builtin_ia32_subsd_mask_round(A, B, C, D, E) __builtin_ia32_subsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_subss_round(A, B, C) __builtin_ia32_subss_round(A, B, 8) +#define __builtin_ia32_subss_mask_round(A, B, C, D, E) __builtin_ia32_subss_mask_round(A, B, C, D, 8) #define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D) #define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D) #define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 8) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c index aac4e37e575..a0ec08231e8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c @@ -1,13 +1,22 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128d x1, x2; +volatile __m128d x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { + x1 = _mm_mask_add_sd (x1, m, x2, x3); + x1 = _mm_maskz_add_sd (m, x1, x2); x1 = _mm_add_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_add_round_sd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_add_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-2.c new file mode 100644 index 00000000000..8969d636aaf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-2.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_add (double *r, double *s1, double *s2) +{ + int i; + for (i = 0; i < SIZE; i++) + { + r[i] = s1[i] + s2[i]; + } +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, res4, res5, src1, src2; + MASK_TYPE mask = MASK_VALUE; + double res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_mask_add_sd (res1.x, mask, src1.x, src2.x); + res2.x = _mm_maskz_add_sd (mask, src1.x, src2.x); + res3.x = _mm_add_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); + res4.x = _mm_mask_add_round_sd (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res5.x = _mm_maskz_add_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_add (res_ref, src1.a, src2.a); + + MASK_MERGE (d) (res_ref, mask, SIZE); + if (check_union128d (res1, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, SIZE); + if (check_union128d (res2, res_ref)) + abort (); + + if (check_union128d (res3, res_ref)) + abort(); + + MASK_MERGE (d) (res_ref, mask, SIZE); + if (check_union128d (res4, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, SIZE); + if (check_union128d (res5, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c index 42a8e8436c0..fa5f44ec18b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c @@ -1,13 +1,22 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128 x1, x2; +volatile __m128 x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { + x1 = _mm_mask_add_ss (x1, m, x2, x3); + x1 = _mm_maskz_add_ss (m, x1, x2); x1 = _mm_add_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_add_round_ss (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_add_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-2.c new file mode 100644 index 00000000000..49c49c35530 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-2.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_add (float *r, float *s1, float *s2) +{ + int i; + for (i = 0; i < SIZE; i++) + { + r[i] = s1[i] + s2[i]; + } +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, res4, res5, src1, src2; + MASK_TYPE mask = MASK_VALUE; + float res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_mask_add_ss (res1.x, mask, src1.x, src2.x); + res2.x = _mm_maskz_add_ss (mask, src1.x, src2.x); + res3.x = _mm_add_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); + res4.x = _mm_mask_add_round_ss (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res5.x = _mm_maskz_add_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_add (res_ref, src1.a, src2.a); + + MASK_MERGE () (res_ref, mask, SIZE); + if (check_union128 (res1, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, SIZE); + if (check_union128 (res2, res_ref)) + abort (); + + if (check_union128 (res3, res_ref)) + abort(); + + MASK_MERGE () (res_ref, mask, SIZE); + if (check_union128 (res4, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, SIZE); + if (check_union128 (res5, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c index 0103aa30d4a..692350c14ce 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c @@ -1,13 +1,22 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128d x1, x2; +volatile __m128d x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { + x1 = _mm_mask_sub_sd (x1, m, x2, x3); + x1 = _mm_maskz_sub_sd (m, x1, x2); x1 = _mm_sub_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_sub_round_sd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_sub_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-2.c new file mode 100644 index 00000000000..50585e5501d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-2.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_sub (double *r, double *s1, double *s2) +{ + int i; + for (i = 0; i < SIZE; i++) + { + r[i] = s1[i] - s2[i]; + } +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, res4, res5, src1, src2; + MASK_TYPE mask = MASK_VALUE; + double res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_mask_sub_sd (res1.x, mask, src1.x, src2.x); + res2.x = _mm_maskz_sub_sd (mask, src1.x, src2.x); + res3.x = _mm_sub_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); + res4.x = _mm_mask_sub_round_sd (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res5.x = _mm_maskz_sub_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_sub (res_ref, src1.a, src2.a); + + MASK_MERGE (d) (res_ref, mask, SIZE); + if (check_union128d (res1, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, SIZE); + if (check_union128d (res2, res_ref)) + abort (); + + if (check_union128d (res3, res_ref)) + abort(); + + MASK_MERGE (d) (res_ref, mask, SIZE); + if (check_union128d (res4, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, SIZE); + if (check_union128d (res5, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c index 8f546984fe1..c144aac623a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c @@ -1,13 +1,22 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128 x1, x2; +volatile __m128 x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { + x1 = _mm_mask_sub_ss (x1, m, x2, x3); + x1 = _mm_maskz_sub_ss (m, x1, x2); x1 = _mm_sub_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_sub_round_ss (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_sub_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-2.c new file mode 100644 index 00000000000..60b3e95e503 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-2.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_sub (float *r, float *s1, float *s2) +{ + int i; + for (i = 0; i < SIZE; i++) + { + r[i] = s1[i] - s2[i]; + } +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, res4, res5, src1, src2; + MASK_TYPE mask = MASK_VALUE; + float res_ref[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 1.5 + 34.67 * i * sign; + src2.a[i] = -22.17 * i * sign + 1.0; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_mask_sub_ss (res1.x, mask, src1.x, src2.x); + res2.x = _mm_maskz_sub_ss (mask, src1.x, src2.x); + res3.x = _mm_sub_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); + res4.x = _mm_mask_sub_round_ss (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + res5.x = _mm_maskz_sub_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); + + calc_sub (res_ref, src1.a, src2.a); + + MASK_MERGE () (res_ref, mask, SIZE); + if (check_union128 (res1, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, SIZE); + if (check_union128 (res2, res_ref)) + abort (); + + if (check_union128 (res3, res_ref)) + abort(); + + MASK_MERGE () (res_ref, mask, SIZE); + if (check_union128 (res4, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, SIZE); + if (check_union128 (res5, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index ef207e8dcf1..319da22361f 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -190,7 +190,9 @@ #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8) #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8) #define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8) +#define __builtin_ia32_addsd_mask_round(A, B, C, D, E) __builtin_ia32_addsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_addss_round(A, B, C) __builtin_ia32_addss_round(A, B, 8) +#define __builtin_ia32_addss_mask_round(A, B, C, D, E) __builtin_ia32_addss_mask_round(A, B, C, D, 8) #define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E) #define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E) #define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D) @@ -313,7 +315,9 @@ #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 8) #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 8) #define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 8) +#define __builtin_ia32_subsd_mask_round(A, B, C, D, E) __builtin_ia32_subsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_subss_round(A, B, C) __builtin_ia32_subss_round(A, B, 8) +#define __builtin_ia32_subss_mask_round(A, B, C, D, E) __builtin_ia32_subss_mask_round(A, B, C, D, 8) #define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D) #define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D) #define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index fe628e57179..4667998b0a4 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -377,7 +377,9 @@ test_3 (_mm512_mask_srai_epi64, __m512i, __m512i, __mmask8, __m512i, 1) test_3 (_mm512_mask_srli_epi32, __m512i, __m512i, __mmask16, __m512i, 1) test_3 (_mm512_mask_srli_epi64, __m512i, __m512i, __mmask8, __m512i, 1) test_3 (_mm512_maskz_add_round_pd, __m512d, __mmask8, __m512d, __m512d, 9) +test_3 (_mm_maskz_add_round_sd, __m128d, __mmask8, __m128d, __m128d, 9) test_3 (_mm512_maskz_add_round_ps, __m512, __mmask16, __m512, __m512, 9) +test_3 (_mm_maskz_add_round_ss, __m128, __mmask8, __m128, __m128, 9) test_3 (_mm512_maskz_alignr_epi32, __m512i, __mmask16, __m512i, __m512i, 1) test_3 (_mm512_maskz_alignr_epi64, __m512i, __mmask8, __m512i, __m512i, 1) test_3 (_mm512_maskz_div_round_pd, __m512d, __mmask8, __m512d, __m512d, 9) @@ -401,7 +403,9 @@ test_3 (_mm512_maskz_shuffle_i64x2, __m512i, __mmask8, __m512i, __m512i, 1) test_3 (_mm512_maskz_shuffle_pd, __m512d, __mmask8, __m512d, __m512d, 1) test_3 (_mm512_maskz_shuffle_ps, __m512, __mmask16, __m512, __m512, 1) test_3 (_mm512_maskz_sub_round_pd, __m512d, __mmask8, __m512d, __m512d, 9) +test_3 (_mm_maskz_sub_round_sd, __m128d, __mmask8, __m128d, __m128d, 9) test_3 (_mm512_maskz_sub_round_ps, __m512, __mmask16, __m512, __m512, 9) +test_3 (_mm_maskz_sub_round_ss, __m128, __mmask8, __m128, __m128, 9) test_3 (_mm512_ternarylogic_epi32, __m512i, __m512i, __m512i, __m512i, 1) test_3 (_mm512_ternarylogic_epi64, __m512i, __m512i, __m512i, __m512i, 1) test_3 (_mm_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, 9) @@ -441,7 +445,9 @@ test_4 (_mm512_mask3_fnmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, test_4 (_mm512_mask3_fnmsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 9) test_4 (_mm512_mask3_fnmsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 9) test_4 (_mm512_mask_add_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) +test_4 (_mm_mask_add_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) test_4 (_mm512_mask_add_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) +test_4 (_mm_mask_add_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) test_4 (_mm512_mask_alignr_epi32, __m512i, __m512i, __mmask16, __m512i, __m512i, 1) test_4 (_mm512_mask_alignr_epi64, __m512i, __m512i, __mmask8, __m512i, __m512i, 1) test_4 (_mm512_mask_div_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) @@ -485,7 +491,9 @@ test_4 (_mm512_mask_shuffle_i64x2, __m512i, __m512i, __mmask8, __m512i, __m512i, test_4 (_mm512_mask_shuffle_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) test_4 (_mm512_mask_shuffle_ps, __m512, __m512, __mmask16, __m512, __m512, 1) test_4 (_mm512_mask_sub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) +test_4 (_mm_mask_sub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) test_4 (_mm512_mask_sub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) +test_4 (_mm_mask_sub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) test_4 (_mm512_mask_ternarylogic_epi32, __m512i, __m512i, __mmask16, __m512i, __m512i, 1) test_4 (_mm512_mask_ternarylogic_epi64, __m512i, __m512i, __mmask8, __m512i, __m512i, 1) test_4 (_mm512_maskz_fmadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 9) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 45c31250e96..92daf96f369 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -191,7 +191,9 @@ #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8) #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8) #define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8) +#define __builtin_ia32_addsd_mask_round(A, B, C, D, E) __builtin_ia32_addsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_addss_round(A, B, C) __builtin_ia32_addss_round(A, B, 8) +#define __builtin_ia32_addss_mask_round(A, B, C, D, E) __builtin_ia32_addss_mask_round(A, B, C, D, 8) #define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E) #define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E) #define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D) @@ -314,7 +316,9 @@ #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 8) #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 8) #define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 8) +#define __builtin_ia32_subsd_mask_round(A, B, C, D, E) __builtin_ia32_subsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_subss_round(A, B, C) __builtin_ia32_subss_round(A, B, 8) +#define __builtin_ia32_subss_mask_round(A, B, C, D, E) __builtin_ia32_subss_mask_round(A, B, C, D, 8) #define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D) #define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D) #define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 8) diff --git a/gcc/testsuite/gcc.target/i386/testround-1.c b/gcc/testsuite/gcc.target/i386/testround-1.c index 20c039ab0ba..8cc019b32a4 100644 --- a/gcc/testsuite/gcc.target/i386/testround-1.c +++ b/gcc/testsuite/gcc.target/i386/testround-1.c @@ -20,9 +20,17 @@ void test_round (void) { m128d = _mm_add_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_add_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_add_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ m128 = _mm_add_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_add_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_add_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ m128d = _mm_sub_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_sub_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_sub_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */ m128 = _mm_sub_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_sub_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_sub_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_sqrt_round_pd (m512d, 7); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_mask_sqrt_round_pd (m512d, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand" } */ @@ -263,9 +271,17 @@ void test_round_sae (void) { m128d = _mm_add_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_add_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_add_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ m128 = _mm_add_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_add_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_add_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ m128d = _mm_sub_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_mask_sub_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ + m128d = _mm_maskz_sub_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */ m128 = _mm_sub_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_mask_sub_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ + m128 = _mm_maskz_sub_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_sqrt_round_pd (m512d, 5); /* { dg-error "incorrect rounding operand" } */ m512d = _mm512_mask_sqrt_round_pd (m512d, mmask8, m512d, 5); /* { dg-error "incorrect rounding operand" } */ -- 2.30.2