From 5c4ade6d44914428c8f71713f15c5697d27ed3c8 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 22 Mar 2019 11:08:40 +0100 Subject: [PATCH] re PR target/89784 (Missing AVX512 intrinsics) PR target/89784 * config/i386/i386.c (enum ix86_builtins): Remove IX86_BUILTIN_VFMSUBSD3_MASK3 and IX86_BUILTIN_VFMSUBSS3_MASK3. * config/i386/i386-builtin.def (__builtin_ia32_vfmaddsd3_mask, __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, __builtin_ia32_vfmsubss3_mask3): New builtins. * config/i386/sse.md (avx512f_vmfmadd__mask, avx512f_vmfmadd__mask3, avx512f_vmfmadd__maskz_1, *avx512f_vmfmsub__mask, avx512f_vmfmsub__mask3, *avx512f_vmfmasub__maskz_1, *avx512f_vmfnmadd__mask, *avx512f_vmfnmadd__mask3, *avx512f_vmfnmadd__maskz_1, *avx512f_vmfnmsub__mask, *avx512f_vmfnmsub__mask3, *avx512f_vmfnmasub__maskz_1): New define_insns. (avx512f_vmfmadd__maskz): New define_expand. * config/i386/avx512fintrin.h (_mm_mask_fmadd_sd, _mm_mask_fmadd_ss, _mm_mask3_fmadd_sd, _mm_mask3_fmadd_ss, _mm_maskz_fmadd_sd, _mm_maskz_fmadd_ss, _mm_mask_fmsub_sd, _mm_mask_fmsub_ss, _mm_mask3_fmsub_sd, _mm_mask3_fmsub_ss, _mm_maskz_fmsub_sd, _mm_maskz_fmsub_ss, _mm_mask_fnmadd_sd, _mm_mask_fnmadd_ss, _mm_mask3_fnmadd_sd, _mm_mask3_fnmadd_ss, _mm_maskz_fnmadd_sd, _mm_maskz_fnmadd_ss, _mm_mask_fnmsub_sd, _mm_mask_fnmsub_ss, _mm_mask3_fnmsub_sd, _mm_mask3_fnmsub_ss, _mm_maskz_fnmsub_sd, _mm_maskz_fnmsub_ss, _mm_mask_fmadd_round_sd, _mm_mask_fmadd_round_ss, _mm_mask3_fmadd_round_sd, _mm_mask3_fmadd_round_ss, _mm_maskz_fmadd_round_sd, _mm_maskz_fmadd_round_ss, _mm_mask_fmsub_round_sd, _mm_mask_fmsub_round_ss, _mm_mask3_fmsub_round_sd, _mm_mask3_fmsub_round_ss, _mm_maskz_fmsub_round_sd, _mm_maskz_fmsub_round_ss, _mm_mask_fnmadd_round_sd, _mm_mask_fnmadd_round_ss, _mm_mask3_fnmadd_round_sd, _mm_mask3_fnmadd_round_ss, _mm_maskz_fnmadd_round_sd, _mm_maskz_fnmadd_round_ss, _mm_mask_fnmsub_round_sd, _mm_mask_fnmsub_round_ss, _mm_mask3_fnmsub_round_sd, _mm_mask3_fnmsub_round_ss, _mm_maskz_fnmsub_round_sd, _mm_maskz_fnmsub_round_ss): New intrinsics. * gcc.target/i386/sse-13.c (__builtin_ia32_vfmaddsd3_mask, __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, __builtin_ia32_vfmsubss3_mask3): Define. * gcc.target/i386/sse-23.c (__builtin_ia32_vfmaddsd3_mask, __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, __builtin_ia32_vfmsubss3_mask3): Define. * gcc.target/i386/avx-1.c (__builtin_ia32_vfmaddsd3_mask, __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, __builtin_ia32_vfmsubss3_mask3): Define. * gcc.target/i386/sse-14.c: Add tests for _mm_mask{,3,z}_f{,n}m{add,sub}_round_s{s,d} builtins. * gcc.target/i386/sse-22.c: Likewise. 2019-03-22 Hongtao Liu * gcc.target/i386/avx512f-vfmaddXXXsd-1.c (avx512f_test): Add tests for _mm_mask{,3,z}_*. * gcc.target/i386/avx512f-vfmaddXXXss-1.c (avx512f_test): Likewise. * gcc.target/i386/avx512f-vfmsubXXXsd-1.c (avx512f_test): Likewise. * gcc.target/i386/avx512f-vfmsubXXXss-1.c (avx512f_test): Likewise. * gcc.target/i386/avx512f-vfnmaddXXXsd-1.c (avx512f_test): Likewise. * gcc.target/i386/avx512f-vfnmaddXXXss-1.c (avx512f_test): Likewise. * gcc.target/i386/avx512f-vfnmsubXXXsd-1.c (avx512f_test): Likewise. * gcc.target/i386/avx512f-vfnmsubXXXss-1.c (avx512f_test): Likewise. * gcc.target/i386/avx512f-vfmaddXXXsd-2.c: New test. * gcc.target/i386/avx512f-vfmaddXXXss-2.c: New test. * gcc.target/i386/avx512f-vfmsubXXXsd-2.c: New test. * gcc.target/i386/avx512f-vfmsubXXXss-2.c: New test. * gcc.target/i386/avx512f-vfnmaddXXXsd-2.c: New test. * gcc.target/i386/avx512f-vfnmaddXXXss-2.c: New test. * gcc.target/i386/avx512f-vfnmsubXXXsd-2.c: New test. * gcc.target/i386/avx512f-vfnmsubXXXss-2.c: New test. From-SVN: r269868 --- gcc/ChangeLog | 44 ++ gcc/config/i386/avx512fintrin.h | 602 ++++++++++++++++++ gcc/config/i386/i386-builtin.def | 8 + gcc/config/i386/i386.c | 2 - gcc/config/i386/sse.md | 246 +++++++ gcc/testsuite/ChangeLog | 43 ++ gcc/testsuite/gcc.target/i386/avx-1.c | 8 + .../gcc.target/i386/avx512f-vfmaddXXXsd-1.c | 13 + .../gcc.target/i386/avx512f-vfmaddXXXsd-2.c | 94 +++ .../gcc.target/i386/avx512f-vfmaddXXXss-1.c | 13 + .../gcc.target/i386/avx512f-vfmaddXXXss-2.c | 94 +++ .../gcc.target/i386/avx512f-vfmsubXXXsd-1.c | 13 + .../gcc.target/i386/avx512f-vfmsubXXXsd-2.c | 94 +++ .../gcc.target/i386/avx512f-vfmsubXXXss-1.c | 13 + .../gcc.target/i386/avx512f-vfmsubXXXss-2.c | 94 +++ .../gcc.target/i386/avx512f-vfnmaddXXXsd-1.c | 13 + .../gcc.target/i386/avx512f-vfnmaddXXXsd-2.c | 94 +++ .../gcc.target/i386/avx512f-vfnmaddXXXss-1.c | 13 + .../gcc.target/i386/avx512f-vfnmaddXXXss-2.c | 94 +++ .../gcc.target/i386/avx512f-vfnmsubXXXsd-1.c | 13 + .../gcc.target/i386/avx512f-vfnmsubXXXsd-2.c | 94 +++ .../gcc.target/i386/avx512f-vfnmsubXXXss-1.c | 13 + .../gcc.target/i386/avx512f-vfnmsubXXXss-2.c | 94 +++ gcc/testsuite/gcc.target/i386/sse-13.c | 8 + gcc/testsuite/gcc.target/i386/sse-14.c | 24 + gcc/testsuite/gcc.target/i386/sse-22.c | 24 + gcc/testsuite/gcc.target/i386/sse-23.c | 8 + 27 files changed, 1871 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 10aa8e5cfd6..6601be53197 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,47 @@ +2019-03-22 Jakub Jelinek + + PR target/89784 + * config/i386/i386.c (enum ix86_builtins): Remove + IX86_BUILTIN_VFMSUBSD3_MASK3 and IX86_BUILTIN_VFMSUBSS3_MASK3. + * config/i386/i386-builtin.def (__builtin_ia32_vfmaddsd3_mask, + __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, + __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, + __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, + __builtin_ia32_vfmsubss3_mask3): New builtins. + * config/i386/sse.md (avx512f_vmfmadd__mask, + avx512f_vmfmadd__mask3, + avx512f_vmfmadd__maskz_1, + *avx512f_vmfmsub__mask, + avx512f_vmfmsub__mask3, + *avx512f_vmfmasub__maskz_1, + *avx512f_vmfnmadd__mask, + *avx512f_vmfnmadd__mask3, + *avx512f_vmfnmadd__maskz_1, + *avx512f_vmfnmsub__mask, + *avx512f_vmfnmsub__mask3, + *avx512f_vmfnmasub__maskz_1): New define_insns. + (avx512f_vmfmadd__maskz): New define_expand. + * config/i386/avx512fintrin.h (_mm_mask_fmadd_sd, _mm_mask_fmadd_ss, + _mm_mask3_fmadd_sd, _mm_mask3_fmadd_ss, _mm_maskz_fmadd_sd, + _mm_maskz_fmadd_ss, _mm_mask_fmsub_sd, _mm_mask_fmsub_ss, + _mm_mask3_fmsub_sd, _mm_mask3_fmsub_ss, _mm_maskz_fmsub_sd, + _mm_maskz_fmsub_ss, _mm_mask_fnmadd_sd, _mm_mask_fnmadd_ss, + _mm_mask3_fnmadd_sd, _mm_mask3_fnmadd_ss, _mm_maskz_fnmadd_sd, + _mm_maskz_fnmadd_ss, _mm_mask_fnmsub_sd, _mm_mask_fnmsub_ss, + _mm_mask3_fnmsub_sd, _mm_mask3_fnmsub_ss, _mm_maskz_fnmsub_sd, + _mm_maskz_fnmsub_ss, _mm_mask_fmadd_round_sd, _mm_mask_fmadd_round_ss, + _mm_mask3_fmadd_round_sd, _mm_mask3_fmadd_round_ss, + _mm_maskz_fmadd_round_sd, _mm_maskz_fmadd_round_ss, + _mm_mask_fmsub_round_sd, _mm_mask_fmsub_round_ss, + _mm_mask3_fmsub_round_sd, _mm_mask3_fmsub_round_ss, + _mm_maskz_fmsub_round_sd, _mm_maskz_fmsub_round_ss, + _mm_mask_fnmadd_round_sd, _mm_mask_fnmadd_round_ss, + _mm_mask3_fnmadd_round_sd, _mm_mask3_fnmadd_round_ss, + _mm_maskz_fnmadd_round_sd, _mm_maskz_fnmadd_round_ss, + _mm_mask_fnmsub_round_sd, _mm_mask_fnmsub_round_ss, + _mm_mask3_fnmsub_round_sd, _mm_mask3_fnmsub_round_ss, + _mm_maskz_fnmsub_round_sd, _mm_maskz_fnmsub_round_ss): New intrinsics. + 2019-03-21 Martin Sebor PR tree-optimization/89350 diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 3a4e0ad3c32..afcba5dd042 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -11562,6 +11562,608 @@ _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R) #endif +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, + (__v2df) __A, + (__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, + (__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, + (__v2df) __A, + (__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, + (__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, + (__v2df) __A, + (__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, + (__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, + (__v2df) __A, + -(__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, + (__v4sf) __A, + -(__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, + (__v2df) __A, + (__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, + (__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, + (__v2df) __A, + -(__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, + (__v4sf) __A, + -(__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, + -(__v2df) __A, + (__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, + -(__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, + -(__v2df) __A, + (__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, + -(__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, + -(__v2df) __A, + (__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, + -(__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, + -(__v2df) __A, + -(__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, + -(__v4sf) __A, + -(__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, + -(__v2df) __A, + (__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, + -(__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, + -(__v2df) __A, + -(__v2df) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, + -(__v4sf) __A, + -(__v4sf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, + (__v2df) __A, + (__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, + (__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, + (__v2df) __A, + (__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, + (__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, + (__v2df) __A, + (__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, + (__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, + (__v2df) __A, + -(__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, + (__v4sf) __A, + -(__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, + (__v2df) __A, + (__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, + const int __R) +{ + return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, + (__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, + (__v2df) __A, + -(__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, + (__v4sf) __A, + -(__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, + -(__v2df) __A, + (__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, + -(__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, + -(__v2df) __A, + (__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, + -(__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, + -(__v2df) __A, + (__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, + -(__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, + -(__v2df) __A, + -(__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, + -(__v4sf) __A, + -(__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, + -(__v2df) __A, + (__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, + const int __R) +{ + return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, + -(__v4sf) __A, + (__v4sf) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, + -(__v2df) __A, + -(__v2df) __B, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, + -(__v4sf) __A, + -(__v4sf) __B, + (__mmask8) __U, __R); +} +#else +#define _mm_mask_fmadd_round_sd(A, U, B, C, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R) + +#define _mm_mask_fmadd_round_ss(A, U, B, C, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R) + +#define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R) + +#define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R) + +#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R) + +#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R) + +#define _mm_mask_fmsub_round_sd(A, U, B, C, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R) + +#define _mm_mask_fmsub_round_ss(A, U, B, C, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R) + +#define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \ + (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R) + +#define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \ + (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R) + +#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, -(C), U, R) + +#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, -(C), U, R) + +#define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R) + +#define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R) + +#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R) + +#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R) + +#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R) + +#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R) + +#define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R) + +#define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R) + +#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \ + (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R) + +#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \ + (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R) + +#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), -(C), U, R) + +#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), -(C), U, R) +#endif + #ifdef __OPTIMIZE__ extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index eeef341726a..6580890edc0 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2827,6 +2827,14 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask3_round, "__builtin_ia32_vfmaddsd3_mask3", IX86_BUILTIN_VFMADDSD3_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_maskz_round, "__builtin_ia32_vfmaddsd3_maskz", IX86_BUILTIN_VFMADDSD3_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v2df_mask3_round, "__builtin_ia32_vfmsubsd3_mask3", IX86_BUILTIN_VFMSUBSD3_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask_round, "__builtin_ia32_vfmaddss3_mask", IX86_BUILTIN_VFMADDSS3_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 184c5c18718..b5f20f1597e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -30524,8 +30524,6 @@ enum ix86_builtins IX86_BUILTIN_GATHERSIV8SI, IX86_BUILTIN_GATHERDIV4SI, IX86_BUILTIN_GATHERDIV8SI, - IX86_BUILTIN_VFMSUBSD3_MASK3, - IX86_BUILTIN_VFMSUBSS3_MASK3, IX86_BUILTIN_GATHER3SIV8SF, IX86_BUILTIN_GATHER3SIV4SF, IX86_BUILTIN_GATHER3SIV4DF, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 86bf8661808..dc588ec1ccb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4643,6 +4643,252 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) +(define_insn "avx512f_vmfmadd__mask" + [(set (match_operand:VF_128 0 "register_operand" "=v,v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "register_operand" "0,0") + (match_operand:VF_128 2 "" ",v") + (match_operand:VF_128 3 "" "v,")) + (match_dup 1) + (match_operand:QI 4 "register_operand" "Yk,Yk")) + (match_dup 1) + (const_int 1)))] + "TARGET_AVX512F" + "@ + vfmadd132\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfmadd213\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "avx512f_vmfmadd__mask3" + [(set (match_operand:VF_128 0 "register_operand" "=v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "register_operand" "v") + (match_operand:VF_128 2 "" "") + (match_operand:VF_128 3 "register_operand" "0")) + (match_dup 3) + (match_operand:QI 4 "register_operand" "Yk")) + (match_dup 3) + (const_int 1)))] + "TARGET_AVX512F" + "vfmadd231\t{%2, %1, %0%{%4%}|%0%{%4%}, %3, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_expand "avx512f_vmfmadd__maskz" + [(match_operand:VF_128 0 "register_operand") + (match_operand:VF_128 1 "") + (match_operand:VF_128 2 "") + (match_operand:VF_128 3 "") + (match_operand:QI 4 "register_operand")] + "TARGET_AVX512F" +{ + emit_insn (gen_avx512f_vmfmadd__maskz_1 ( + operands[0], operands[1], operands[2], operands[3], + CONST0_RTX (mode), operands[4])); + DONE; +}) + +(define_insn "avx512f_vmfmadd__maskz_1" + [(set (match_operand:VF_128 0 "register_operand" "=v,v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "register_operand" "0,0") + (match_operand:VF_128 2 "" ",v") + (match_operand:VF_128 3 "" "v,")) + (match_operand:VF_128 4 "const0_operand" "C,C") + (match_operand:QI 5 "register_operand" "Yk,Yk")) + (match_dup 1) + (const_int 1)))] + "TARGET_AVX512F" + "@ + vfmadd132\t{%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %3, %2} + vfmadd213\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*avx512f_vmfmsub__mask" + [(set (match_operand:VF_128 0 "register_operand" "=v,v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "register_operand" "0,0") + (match_operand:VF_128 2 "" ",v") + (neg:VF_128 + (match_operand:VF_128 3 "" "v,"))) + (match_dup 1) + (match_operand:QI 4 "register_operand" "Yk,Yk")) + (match_dup 1) + (const_int 1)))] + "TARGET_AVX512F" + "@ + vfmsub132\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfmsub213\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "avx512f_vmfmsub__mask3" + [(set (match_operand:VF_128 0 "register_operand" "=v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "register_operand" "v") + (match_operand:VF_128 2 "" "") + (neg:VF_128 + (match_operand:VF_128 3 "register_operand" "0"))) + (match_dup 3) + (match_operand:QI 4 "register_operand" "Yk")) + (match_dup 3) + (const_int 1)))] + "TARGET_AVX512F" + "vfmsub231\t{%2, %1, %0%{%4%}|%0%{%4%}, %3, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*avx512f_vmfmsub__maskz_1" + [(set (match_operand:VF_128 0 "register_operand" "=v,v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "register_operand" "0,0") + (match_operand:VF_128 2 "" ",v") + (neg:VF_128 + (match_operand:VF_128 3 "" "v,"))) + (match_operand:VF_128 4 "const0_operand" "C,C") + (match_operand:QI 5 "register_operand" "Yk,Yk")) + (match_dup 1) + (const_int 1)))] + "TARGET_AVX512F" + "@ + vfmsub132\t{%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %3, %2} + vfmsub213\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*avx512f_vmfnmadd__mask" + [(set (match_operand:VF_128 0 "register_operand" "=v,v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 2 "" ",v")) + (match_operand:VF_128 1 "register_operand" "0,0") + (match_operand:VF_128 3 "" "v,")) + (match_dup 1) + (match_operand:QI 4 "register_operand" "Yk,Yk")) + (match_dup 1) + (const_int 1)))] + "TARGET_AVX512F" + "@ + vfnmadd132\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfnmadd213\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*avx512f_vmfnmadd__mask3" + [(set (match_operand:VF_128 0 "register_operand" "=v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 2 "" "")) + (match_operand:VF_128 1 "register_operand" "v") + (match_operand:VF_128 3 "register_operand" "0")) + (match_dup 3) + (match_operand:QI 4 "register_operand" "Yk")) + (match_dup 3) + (const_int 1)))] + "TARGET_AVX512F" + "vfnmadd231\t{%2, %1, %0%{%4%}|%0%{%4%}, %3, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*avx512f_vmfnmadd__maskz_1" + [(set (match_operand:VF_128 0 "register_operand" "=v,v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 2 "" ",v")) + (match_operand:VF_128 1 "register_operand" "0,0") + (match_operand:VF_128 3 "" "v,")) + (match_operand:VF_128 4 "const0_operand" "C,C") + (match_operand:QI 5 "register_operand" "Yk,Yk")) + (match_dup 1) + (const_int 1)))] + "TARGET_AVX512F" + "@ + vfnmadd132\t{%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %3, %2} + vfnmadd213\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*avx512f_vmfnmsub__mask" + [(set (match_operand:VF_128 0 "register_operand" "=v,v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 2 "" ",v")) + (match_operand:VF_128 1 "register_operand" "0,0") + (neg:VF_128 + (match_operand:VF_128 3 "" "v,"))) + (match_dup 1) + (match_operand:QI 4 "register_operand" "Yk,Yk")) + (match_dup 1) + (const_int 1)))] + "TARGET_AVX512F" + "@ + vfnmsub132\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfnmsub213\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "avx512f_vmfnmsub__mask3" + [(set (match_operand:VF_128 0 "register_operand" "=v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 2 "" "")) + (match_operand:VF_128 1 "register_operand" "v") + (neg:VF_128 + (match_operand:VF_128 3 "register_operand" "0"))) + (match_dup 3) + (match_operand:QI 4 "register_operand" "Yk")) + (match_dup 3) + (const_int 1)))] + "TARGET_AVX512F" + "vfnmsub231\t{%2, %1, %0%{%4%}|%0%{%4%}, %3, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*avx512f_vmfnmsub__maskz_1" + [(set (match_operand:VF_128 0 "register_operand" "=v,v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 2 "" ",v")) + (match_operand:VF_128 1 "register_operand" "0,0") + (neg:VF_128 + (match_operand:VF_128 3 "" "v,"))) + (match_operand:VF_128 4 "const0_operand" "C,C") + (match_operand:QI 5 "register_operand" "Yk,Yk")) + (match_dup 1) + (const_int 1)))] + "TARGET_AVX512F" + "@ + vfnmsub132\t{%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %3, %2} + vfnmsub213\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + ;; FMA4 floating point scalar intrinsics. These write the ;; entire destination register, with the high-order elements zeroed. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 076f79537f5..ac41542e226 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,46 @@ +2019-03-22 Hongtao Liu + + PR target/89784 + * gcc.target/i386/avx512f-vfmaddXXXsd-1.c (avx512f_test): Add tests + for _mm_mask{,3,z}_*. + * gcc.target/i386/avx512f-vfmaddXXXss-1.c (avx512f_test): Likewise. + * gcc.target/i386/avx512f-vfmsubXXXsd-1.c (avx512f_test): Likewise. + * gcc.target/i386/avx512f-vfmsubXXXss-1.c (avx512f_test): Likewise. + * gcc.target/i386/avx512f-vfnmaddXXXsd-1.c (avx512f_test): Likewise. + * gcc.target/i386/avx512f-vfnmaddXXXss-1.c (avx512f_test): Likewise. + * gcc.target/i386/avx512f-vfnmsubXXXsd-1.c (avx512f_test): Likewise. + * gcc.target/i386/avx512f-vfnmsubXXXss-1.c (avx512f_test): Likewise. + * gcc.target/i386/avx512f-vfmaddXXXsd-2.c: New test. + * gcc.target/i386/avx512f-vfmaddXXXss-2.c: New test. + * gcc.target/i386/avx512f-vfmsubXXXsd-2.c: New test. + * gcc.target/i386/avx512f-vfmsubXXXss-2.c: New test. + * gcc.target/i386/avx512f-vfnmaddXXXsd-2.c: New test. + * gcc.target/i386/avx512f-vfnmaddXXXss-2.c: New test. + * gcc.target/i386/avx512f-vfnmsubXXXsd-2.c: New test. + * gcc.target/i386/avx512f-vfnmsubXXXss-2.c: New test. + +2019-03-22 Jakub Jelinek + + PR target/89784 + * gcc.target/i386/sse-13.c (__builtin_ia32_vfmaddsd3_mask, + __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, + __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, + __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, + __builtin_ia32_vfmsubss3_mask3): Define. + * gcc.target/i386/sse-23.c (__builtin_ia32_vfmaddsd3_mask, + __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, + __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, + __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, + __builtin_ia32_vfmsubss3_mask3): Define. + * gcc.target/i386/avx-1.c (__builtin_ia32_vfmaddsd3_mask, + __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, + __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, + __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, + __builtin_ia32_vfmsubss3_mask3): Define. + * gcc.target/i386/sse-14.c: Add tests for + _mm_mask{,3,z}_f{,n}m{add,sub}_round_s{s,d} builtins. + * gcc.target/i386/sse-22.c: Likewise. + 2019-03-21 Martin Sebor PR tree-optimization/89350 diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index f67bc5f5044..e09f5a17984 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -373,6 +373,14 @@ #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8) #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D) #define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D) +#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8) /* avx512erintrin.h */ #define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 8) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c index d88286fc50d..d6f9015327b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c @@ -1,13 +1,26 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vfmadd231sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128d a, b, c; +volatile __mmask8 m; void extern avx512f_test (void) { + a = _mm_mask_fmadd_sd (a, m, b, c); + c = _mm_mask3_fmadd_sd (a, b, c, m); + a = _mm_maskz_fmadd_sd (m, a, b, c); a = _mm_fmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + a = _mm_mask_fmadd_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + c = _mm_mask3_fmadd_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + a = _mm_maskz_fmadd_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c new file mode 100644 index 00000000000..6e01bd0619b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_add (double *s1, double *s2, double *s3, double* r) +{ + r[0] = s1[0] * s2[0] + s3[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +static void +calc_add_3 (double *s1, double *s2, double *s3, double* r) +{ + r[0] = s2[0] * s3[0] + s1[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; + MASK_TYPE mask = MASK_VALUE; + double res_ref1[SIZE], res_ref2[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = DEFAULT_VALUE; + src2.a[i] = 56.78 * (i + 1) * sign; + src3.a[i] = 90.12 * (i + 2) * sign; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res5.a[i] = DEFAULT_VALUE; + res6.a[i] = DEFAULT_VALUE; + } + + calc_add (src1.a, src2.a, src3.a, res_ref1); + calc_add_3(src1.a, src2.a, src3.a, res_ref2); + + res1.x = _mm_mask_fmadd_sd (src1.x, mask, src2.x, src3.x); + res2.x = _mm_mask3_fmadd_sd (src2.x, src3.x, src1.x, mask); + res3.x = _mm_maskz_fmadd_sd (mask, src1.x, src2.x, src3.x); + res4.x = _mm_fmadd_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + res5.x = _mm_mask_fmadd_round_sd (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC); + res6.x = _mm_mask3_fmadd_round_sd (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC); + res7.x = _mm_maskz_fmadd_round_sd (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + + if (check_union128d (res4, res_ref1)) + abort(); + + MASK_ZERO (d) (res_ref1, mask, 1); + if (check_union128d (res3, res_ref1)) + abort (); + + MASK_ZERO (d) (res_ref1, mask, 1); + if (check_union128d (res7, res_ref1)) + abort (); + + MASK_MERGE (d) (res_ref2, mask, 1); + if (check_union128d (res2, res_ref2)) + abort (); + + MASK_MERGE (d) (res_ref2, mask, 1); + if (check_union128d (res6, res_ref2)) + abort (); + + MASK_MERGE (d) (res_ref1, mask, 1); + if (check_union128d (res1, res_ref1)) + abort (); + + MASK_MERGE (d) (res_ref1, mask, 1); + if (check_union128d (res5, res_ref1)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c index e1aa388638f..e50b5b586c5 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c @@ -1,13 +1,26 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vfmadd231ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128 a, b, c; +volatile __mmask8 m; void extern avx512f_test (void) { + a = _mm_mask_fmadd_ss (a, m, b, c); + c = _mm_mask3_fmadd_ss (a, b, c, m); + a = _mm_maskz_fmadd_ss (m, a, b, c); a = _mm_fmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + a = _mm_mask_fmadd_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + c = _mm_mask3_fmadd_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + a = _mm_maskz_fmadd_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c new file mode 100644 index 00000000000..4fb7a08fa7a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_add (float *s1, float *s2, float *s3, float* r) +{ + r[0] = s1[0] * s2[0] + s3[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +static void +calc_add_3 (float *s1, float *s2, float *s3, float* r) +{ + r[0] = s2[0] * s3[0] + s1[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; + MASK_TYPE mask = MASK_VALUE; + float res_ref1[SIZE], res_ref2[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = DEFAULT_VALUE; + src2.a[i] = 56.78 * (i + 1) * sign; + src3.a[i] = 90.12 * (i + 2) * sign; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res5.a[i] = DEFAULT_VALUE; + res6.a[i] = DEFAULT_VALUE; + } + + calc_add (src1.a, src2.a, src3.a, res_ref1); + calc_add_3(src1.a, src2.a, src3.a, res_ref2); + + res1.x = _mm_mask_fmadd_ss (src1.x, mask, src2.x, src3.x); + res2.x = _mm_mask3_fmadd_ss (src2.x, src3.x, src1.x, mask); + res3.x = _mm_maskz_fmadd_ss (mask, src1.x, src2.x, src3.x); + res4.x = _mm_fmadd_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + res5.x = _mm_mask_fmadd_round_ss (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC); + res6.x = _mm_mask3_fmadd_round_ss (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC); + res7.x = _mm_maskz_fmadd_round_ss (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + + if (check_union128 (res4, res_ref1)) + abort(); + + MASK_ZERO () (res_ref1, mask, 1); + if (check_union128 (res3, res_ref1)) + abort (); + + MASK_ZERO () (res_ref1, mask, 1); + if (check_union128 (res7, res_ref1)) + abort (); + + MASK_MERGE () (res_ref2, mask, 1); + if (check_union128 (res2, res_ref2)) + abort (); + + MASK_MERGE () (res_ref2, mask, 1); + if (check_union128 (res6, res_ref2)) + abort (); + + MASK_MERGE () (res_ref1, mask, 1); + if (check_union128 (res1, res_ref1)) + abort (); + + MASK_MERGE () (res_ref1, mask, 1); + if (check_union128 (res5, res_ref1)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c index bb3ec6018ef..15affb880ef 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c @@ -1,13 +1,26 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vfmsub231sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128d a, b, c; +volatile __mmask8 m; void extern avx512f_test (void) { + a = _mm_mask_fmsub_sd (a, m, b, c); + c = _mm_mask3_fmsub_sd (a, b, c, m); + a = _mm_maskz_fmsub_sd (m, a, b, c); a = _mm_fmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + a = _mm_mask_fmsub_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + c = _mm_mask3_fmsub_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + a = _mm_maskz_fmsub_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c new file mode 100644 index 00000000000..701db187bbd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_add (double *s1, double *s2, double *s3, double* r) +{ + r[0] = s1[0] * s2[0] - s3[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +static void +calc_add_3 (double *s1, double *s2, double *s3, double* r) +{ + r[0] = s2[0] * s3[0] - s1[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; + MASK_TYPE mask = MASK_VALUE; + double res_ref1[SIZE], res_ref2[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = DEFAULT_VALUE; + src2.a[i] = 56.78 * (i + 1) * sign; + src3.a[i] = 90.12 * (i + 2) * sign; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res5.a[i] = DEFAULT_VALUE; + res6.a[i] = DEFAULT_VALUE; + } + + calc_add (src1.a, src2.a, src3.a, res_ref1); + calc_add_3(src1.a, src2.a, src3.a, res_ref2); + + res1.x = _mm_mask_fmsub_sd (src1.x, mask, src2.x, src3.x); + res2.x = _mm_mask3_fmsub_sd (src2.x, src3.x, src1.x, mask); + res3.x = _mm_maskz_fmsub_sd (mask, src1.x, src2.x, src3.x); + res4.x = _mm_fmsub_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + res5.x = _mm_mask_fmsub_round_sd (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC); + res6.x = _mm_mask3_fmsub_round_sd (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC); + res7.x = _mm_maskz_fmsub_round_sd (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + + if (check_union128d (res4, res_ref1)) + abort(); + + MASK_ZERO (d) (res_ref1, mask, 1); + if (check_union128d (res3, res_ref1)) + abort (); + + MASK_ZERO (d) (res_ref1, mask, 1); + if (check_union128d (res7, res_ref1)) + abort (); + + MASK_MERGE (d) (res_ref2, mask, 1); + if (check_union128d (res2, res_ref2)) + abort (); + + MASK_MERGE (d) (res_ref2, mask, 1); + if (check_union128d (res6, res_ref2)) + abort (); + + MASK_MERGE (d) (res_ref1, mask, 1); + if (check_union128d (res1, res_ref1)) + abort (); + + MASK_MERGE (d) (res_ref1, mask, 1); + if (check_union128d (res5, res_ref1)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c index 7e521fc9deb..34e444e3e81 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c @@ -1,13 +1,26 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vfmsub231ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128 a, b, c; +volatile __mmask8 m; void extern avx512f_test (void) { + a = _mm_mask_fmsub_ss (a, m, b, c); + c = _mm_mask3_fmsub_ss (a, b, c, m); + a = _mm_maskz_fmsub_ss (m, a, b, c); a = _mm_fmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + a = _mm_mask_fmsub_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + c = _mm_mask3_fmsub_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + a = _mm_maskz_fmsub_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c new file mode 100644 index 00000000000..6beaed87898 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_add (float *s1, float *s2, float *s3, float* r) +{ + r[0] = s1[0] * s2[0] - s3[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +static void +calc_add_3 (float *s1, float *s2, float *s3, float* r) +{ + r[0] = s2[0] * s3[0] - s1[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; + MASK_TYPE mask = MASK_VALUE; + float res_ref1[SIZE], res_ref2[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = DEFAULT_VALUE; + src2.a[i] = 56.78 * (i + 1) * sign; + src3.a[i] = 90.12 * (i + 2) * sign; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res5.a[i] = DEFAULT_VALUE; + res6.a[i] = DEFAULT_VALUE; + } + + calc_add (src1.a, src2.a, src3.a, res_ref1); + calc_add_3(src1.a, src2.a, src3.a, res_ref2); + + res1.x = _mm_mask_fmsub_ss (src1.x, mask, src2.x, src3.x); + res2.x = _mm_mask3_fmsub_ss (src2.x, src3.x, src1.x, mask); + res3.x = _mm_maskz_fmsub_ss (mask, src1.x, src2.x, src3.x); + res4.x = _mm_fmsub_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + res5.x = _mm_mask_fmsub_round_ss (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC); + res6.x = _mm_mask3_fmsub_round_ss (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC); + res7.x = _mm_maskz_fmsub_round_ss (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + + if (check_union128 (res4, res_ref1)) + abort(); + + MASK_ZERO () (res_ref1, mask, 1); + if (check_union128 (res3, res_ref1)) + abort (); + + MASK_ZERO () (res_ref1, mask, 1); + if (check_union128 (res7, res_ref1)) + abort (); + + MASK_MERGE () (res_ref2, mask, 1); + if (check_union128 (res2, res_ref2)) + abort (); + + MASK_MERGE () (res_ref2, mask, 1); + if (check_union128 (res6, res_ref2)) + abort (); + + MASK_MERGE () (res_ref1, mask, 1); + if (check_union128 (res1, res_ref1)) + abort (); + + MASK_MERGE () (res_ref1, mask, 1); + if (check_union128 (res5, res_ref1)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c index e23101f2e29..09b923e3951 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c @@ -1,13 +1,26 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128d a, b, c; +volatile __mmask8 m; void extern avx512f_test (void) { + a = _mm_mask_fnmadd_sd (a, m, b, c); + c = _mm_mask3_fnmadd_sd (a, b, c, m); + a = _mm_maskz_fnmadd_sd (m, a, b, c); a = _mm_fnmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + a = _mm_mask_fnmadd_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + c = _mm_mask3_fnmadd_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + a = _mm_maskz_fnmadd_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c new file mode 100644 index 00000000000..4715f2dc892 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_add (double *s1, double *s2, double *s3, double* r) +{ + r[0] = -s1[0] * s2[0] + s3[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +static void +calc_add_3 (double *s1, double *s2, double *s3, double* r) +{ + r[0] = -s2[0] * s3[0] + s1[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; + MASK_TYPE mask = MASK_VALUE; + double res_ref1[SIZE], res_ref2[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = DEFAULT_VALUE; + src2.a[i] = 56.78 * (i + 1) * sign; + src3.a[i] = 90.12 * (i + 2) * sign; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res5.a[i] = DEFAULT_VALUE; + res6.a[i] = DEFAULT_VALUE; + } + + calc_add (src1.a, src2.a, src3.a, res_ref1); + calc_add_3(src1.a, src2.a, src3.a, res_ref2); + + res1.x = _mm_mask_fnmadd_sd (src1.x, mask, src2.x, src3.x); + res2.x = _mm_mask3_fnmadd_sd (src2.x, src3.x, src1.x, mask); + res3.x = _mm_maskz_fnmadd_sd (mask, src1.x, src2.x, src3.x); + res4.x = _mm_fnmadd_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + res5.x = _mm_mask_fnmadd_round_sd (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC); + res6.x = _mm_mask3_fnmadd_round_sd (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC); + res7.x = _mm_maskz_fnmadd_round_sd (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + + if (check_union128d (res4, res_ref1)) + abort(); + + MASK_ZERO (d) (res_ref1, mask, 1); + if (check_union128d (res3, res_ref1)) + abort (); + + MASK_ZERO (d) (res_ref1, mask, 1); + if (check_union128d (res7, res_ref1)) + abort (); + + MASK_MERGE (d) (res_ref2, mask, 1); + if (check_union128d (res2, res_ref2)) + abort (); + + MASK_MERGE (d) (res_ref2, mask, 1); + if (check_union128d (res6, res_ref2)) + abort (); + + MASK_MERGE (d) (res_ref1, mask, 1); + if (check_union128d (res1, res_ref1)) + abort (); + + MASK_MERGE (d) (res_ref1, mask, 1); + if (check_union128d (res5, res_ref1)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c index c46944c5b9e..85e8d00fa66 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c @@ -1,13 +1,26 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128 a, b, c; +volatile __mmask8 m; void extern avx512f_test (void) { + a = _mm_mask_fnmadd_ss (a, m, b, c); + c = _mm_mask3_fnmadd_ss (a, b, c, m); + a = _mm_maskz_fnmadd_ss (m, a, b, c); a = _mm_fnmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + a = _mm_mask_fnmadd_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + c = _mm_mask3_fnmadd_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + a = _mm_maskz_fnmadd_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c new file mode 100644 index 00000000000..e94b7d4aed4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_add (float *s1, float *s2, float *s3, float* r) +{ + r[0] = -s1[0] * s2[0] + s3[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +static void +calc_add_3 (float *s1, float *s2, float *s3, float* r) +{ + r[0] = -s2[0] * s3[0] + s1[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; + MASK_TYPE mask = MASK_VALUE; + float res_ref1[SIZE], res_ref2[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = DEFAULT_VALUE; + src2.a[i] = 56.78 * (i + 1) * sign; + src3.a[i] = 90.12 * (i + 2) * sign; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res5.a[i] = DEFAULT_VALUE; + res6.a[i] = DEFAULT_VALUE; + } + + calc_add (src1.a, src2.a, src3.a, res_ref1); + calc_add_3(src1.a, src2.a, src3.a, res_ref2); + + res1.x = _mm_mask_fnmadd_ss (src1.x, mask, src2.x, src3.x); + res2.x = _mm_mask3_fnmadd_ss (src2.x, src3.x, src1.x, mask); + res3.x = _mm_maskz_fnmadd_ss (mask, src1.x, src2.x, src3.x); + res4.x = _mm_fnmadd_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + res5.x = _mm_mask_fnmadd_round_ss (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC); + res6.x = _mm_mask3_fnmadd_round_ss (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC); + res7.x = _mm_maskz_fnmadd_round_ss (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + + if (check_union128 (res4, res_ref1)) + abort(); + + MASK_ZERO () (res_ref1, mask, 1); + if (check_union128 (res3, res_ref1)) + abort (); + + MASK_ZERO () (res_ref1, mask, 1); + if (check_union128 (res7, res_ref1)) + abort (); + + MASK_MERGE () (res_ref2, mask, 1); + if (check_union128 (res2, res_ref2)) + abort (); + + MASK_MERGE () (res_ref2, mask, 1); + if (check_union128 (res6, res_ref2)) + abort (); + + MASK_MERGE () (res_ref1, mask, 1); + if (check_union128 (res1, res_ref1)) + abort (); + + MASK_MERGE () (res_ref1, mask, 1); + if (check_union128 (res5, res_ref1)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c index 33b68e17372..4553e0d3772 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c @@ -1,13 +1,26 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128d a, b, c; +volatile __mmask8 m; void extern avx512f_test (void) { + a = _mm_mask_fnmsub_sd (a, m, b, c); + c = _mm_mask3_fnmsub_sd (a, b, c, m); + a = _mm_maskz_fnmsub_sd (m, a, b, c); a = _mm_fnmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + a = _mm_mask_fnmsub_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + c = _mm_mask3_fnmsub_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + a = _mm_maskz_fnmsub_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c new file mode 100644 index 00000000000..ecb4bd585bc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +calc_add (double *s1, double *s2, double *s3, double* r) +{ + r[0] = -s1[0] * s2[0] - s3[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +static void +calc_add_3 (double *s1, double *s2, double *s3, double* r) +{ + r[0] = -s2[0] * s3[0] - s1[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; + MASK_TYPE mask = MASK_VALUE; + double res_ref1[SIZE], res_ref2[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = DEFAULT_VALUE; + src2.a[i] = 56.78 * (i + 1) * sign; + src3.a[i] = 90.12 * (i + 2) * sign; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res5.a[i] = DEFAULT_VALUE; + res6.a[i] = DEFAULT_VALUE; + } + + calc_add (src1.a, src2.a, src3.a, res_ref1); + calc_add_3(src1.a, src2.a, src3.a, res_ref2); + + res1.x = _mm_mask_fnmsub_sd (src1.x, mask, src2.x, src3.x); + res2.x = _mm_mask3_fnmsub_sd (src2.x, src3.x, src1.x, mask); + res3.x = _mm_maskz_fnmsub_sd (mask, src1.x, src2.x, src3.x); + res4.x = _mm_fnmsub_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + res5.x = _mm_mask_fnmsub_round_sd (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC); + res6.x = _mm_mask3_fnmsub_round_sd (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC); + res7.x = _mm_maskz_fnmsub_round_sd (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + + if (check_union128d (res4, res_ref1)) + abort(); + + MASK_ZERO (d) (res_ref1, mask, 1); + if (check_union128d (res3, res_ref1)) + abort (); + + MASK_ZERO (d) (res_ref1, mask, 1); + if (check_union128d (res7, res_ref1)) + abort (); + + MASK_MERGE (d) (res_ref2, mask, 1); + if (check_union128d (res2, res_ref2)) + abort (); + + MASK_MERGE (d) (res_ref2, mask, 1); + if (check_union128d (res6, res_ref2)) + abort (); + + MASK_MERGE (d) (res_ref1, mask, 1); + if (check_union128d (res1, res_ref1)) + abort (); + + MASK_MERGE (d) (res_ref1, mask, 1); + if (check_union128d (res5, res_ref1)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c index cd7b52007bc..f86ae2d4417 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c @@ -1,13 +1,26 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128 a, b, c; +volatile __mmask8 m; void extern avx512f_test (void) { + a = _mm_mask_fnmsub_ss (a, m, b, c); + c = _mm_mask3_fnmsub_ss (a, b, c, m); + a = _mm_maskz_fnmsub_ss (m, a, b, c); a = _mm_fnmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + a = _mm_mask_fnmsub_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + c = _mm_mask3_fnmsub_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + a = _mm_maskz_fnmsub_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c new file mode 100644 index 00000000000..d092988e453 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +#include "avx512f-helper.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +calc_add (float *s1, float *s2, float *s3, float* r) +{ + r[0] = -s1[0] * s2[0] - s3[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +static void +calc_add_3 (float *s1, float *s2, float *s3, float* r) +{ + r[0] = -s2[0] * s3[0] - s1[0]; + + int i; + for (i = 1; i < SIZE; i++) + r[i] = s1[i]; +} + +void +avx512f_test (void) +{ + int i, sign; + union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; + MASK_TYPE mask = MASK_VALUE; + float res_ref1[SIZE], res_ref2[SIZE]; + + sign = -1; + for (i = 0; i < SIZE; i++) + { + src1.a[i] = DEFAULT_VALUE; + src2.a[i] = 56.78 * (i + 1) * sign; + src3.a[i] = 90.12 * (i + 2) * sign; + sign = sign * -1; + } + for (i = 0; i < SIZE; i++) + { + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res5.a[i] = DEFAULT_VALUE; + res6.a[i] = DEFAULT_VALUE; + } + + calc_add (src1.a, src2.a, src3.a, res_ref1); + calc_add_3(src1.a, src2.a, src3.a, res_ref2); + + res1.x = _mm_mask_fnmsub_ss (src1.x, mask, src2.x, src3.x); + res2.x = _mm_mask3_fnmsub_ss (src2.x, src3.x, src1.x, mask); + res3.x = _mm_maskz_fnmsub_ss (mask, src1.x, src2.x, src3.x); + res4.x = _mm_fnmsub_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + res5.x = _mm_mask_fnmsub_round_ss (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC); + res6.x = _mm_mask3_fnmsub_round_ss (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC); + res7.x = _mm_maskz_fnmsub_round_ss (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); + + if (check_union128 (res4, res_ref1)) + abort(); + + MASK_ZERO () (res_ref1, mask, 1); + if (check_union128 (res3, res_ref1)) + abort (); + + MASK_ZERO () (res_ref1, mask, 1); + if (check_union128 (res7, res_ref1)) + abort (); + + MASK_MERGE () (res_ref2, mask, 1); + if (check_union128 (res2, res_ref2)) + abort (); + + MASK_MERGE () (res_ref2, mask, 1); + if (check_union128 (res6, res_ref2)) + abort (); + + MASK_MERGE () (res_ref1, mask, 1); + if (check_union128 (res1, res_ref1)) + abort (); + + MASK_MERGE () (res_ref1, mask, 1); + if (check_union128 (res5, res_ref1)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 64da3cd1992..e868f6d293f 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -390,6 +390,14 @@ #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8) #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D) #define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D) +#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8) /* avx512erintrin.h */ #define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 0f663bec702..748339f7d47 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -534,6 +534,30 @@ test_4 (_mm512_maskz_fnmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m51 test_4 (_mm512_maskz_fnmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 9) test_4 (_mm512_maskz_ternarylogic_epi32, __m512i, __mmask16, __m512i, __m512i, __m512i, 1) test_4 (_mm512_maskz_ternarylogic_epi64, __m512i, __mmask8, __m512i, __m512i, __m512i, 1) +test_4 (_mm_mask_fmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) +test_4 (_mm_mask_fmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) +test_4 (_mm_mask3_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9) +test_4 (_mm_mask3_fmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9) +test_4 (_mm_maskz_fmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9) +test_4 (_mm_maskz_fmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9) +test_4 (_mm_mask_fmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) +test_4 (_mm_mask_fmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) +test_4 (_mm_mask3_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9) +test_4 (_mm_mask3_fmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9) +test_4 (_mm_maskz_fmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9) +test_4 (_mm_maskz_fmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9) +test_4 (_mm_mask_fnmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) +test_4 (_mm_mask_fnmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) +test_4 (_mm_mask3_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9) +test_4 (_mm_mask3_fnmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9) +test_4 (_mm_maskz_fnmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9) +test_4 (_mm_maskz_fnmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9) +test_4 (_mm_mask_fnmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) +test_4 (_mm_mask_fnmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) +test_4 (_mm_mask3_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9) +test_4 (_mm_mask3_fnmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9) +test_4 (_mm_maskz_fnmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9) +test_4 (_mm_maskz_fnmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9) test_4v (_mm512_mask_i32scatter_epi32, void *, __mmask16, __m512i, __m512i, 1) test_4v (_mm512_mask_i32scatter_epi64, void *, __mmask8, __m256i, __m512i, 1) test_4v (_mm512_mask_i32scatter_pd, void *, __mmask8, __m256i, __m512d, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 99af58a995d..0c62f2049c2 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -633,6 +633,30 @@ test_4 (_mm512_maskz_fnmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m51 test_4 (_mm512_maskz_fnmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 9) test_4 (_mm512_maskz_ternarylogic_epi32, __m512i, __mmask16, __m512i, __m512i, __m512i, 1) test_4 (_mm512_maskz_ternarylogic_epi64, __m512i, __mmask8, __m512i, __m512i, __m512i, 1) +test_4 (_mm_mask_fmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) +test_4 (_mm_mask_fmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) +test_4 (_mm_mask3_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9) +test_4 (_mm_mask3_fmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9) +test_4 (_mm_maskz_fmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9) +test_4 (_mm_maskz_fmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9) +test_4 (_mm_mask_fmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) +test_4 (_mm_mask_fmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) +test_4 (_mm_mask3_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9) +test_4 (_mm_mask3_fmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9) +test_4 (_mm_maskz_fmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9) +test_4 (_mm_maskz_fmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9) +test_4 (_mm_mask_fnmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) +test_4 (_mm_mask_fnmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) +test_4 (_mm_mask3_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9) +test_4 (_mm_mask3_fnmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9) +test_4 (_mm_maskz_fnmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9) +test_4 (_mm_maskz_fnmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9) +test_4 (_mm_mask_fnmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) +test_4 (_mm_mask_fnmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) +test_4 (_mm_mask3_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9) +test_4 (_mm_mask3_fnmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9) +test_4 (_mm_maskz_fnmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9) +test_4 (_mm_maskz_fnmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9) test_4v (_mm512_mask_i32scatter_epi32, void *, __mmask16, __m512i, __m512i, 1) test_4v (_mm512_mask_i32scatter_epi64, void *, __mmask8, __m256i, __m512i, 1) test_4v (_mm512_mask_i32scatter_pd, void *, __mmask8, __m256i, __m512d, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index f9d372c47e2..78a3c0aa26e 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -389,6 +389,14 @@ #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8) #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D) #define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D) +#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8) /* avx512pfintrin.h */ #define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps(A, B, C, 1, _MM_HINT_T0) -- 2.30.2