re PR target/89784 (Missing AVX512 intrinsics)
authorJakub Jelinek <jakub@gcc.gnu.org>
Fri, 22 Mar 2019 10:08:40 +0000 (11:08 +0100)
committerJakub Jelinek <jakub@gcc.gnu.org>
Fri, 22 Mar 2019 10:08:40 +0000 (11:08 +0100)
PR target/89784
* config/i386/i386.c (enum ix86_builtins): Remove
IX86_BUILTIN_VFMSUBSD3_MASK3 and IX86_BUILTIN_VFMSUBSS3_MASK3.
* config/i386/i386-builtin.def (__builtin_ia32_vfmaddsd3_mask,
__builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz,
__builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask,
__builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz,
__builtin_ia32_vfmsubss3_mask3): New builtins.
* config/i386/sse.md (avx512f_vmfmadd_<mode>_mask<round_name>,
avx512f_vmfmadd_<mode>_mask3<round_name>,
avx512f_vmfmadd_<mode>_maskz_1<round_name>,
*avx512f_vmfmsub_<mode>_mask<round_name>,
avx512f_vmfmsub_<mode>_mask3<round_name>,
*avx512f_vmfmasub_<mode>_maskz_1<round_name>,
*avx512f_vmfnmadd_<mode>_mask<round_name>,
*avx512f_vmfnmadd_<mode>_mask3<round_name>,
*avx512f_vmfnmadd_<mode>_maskz_1<round_name>,
*avx512f_vmfnmsub_<mode>_mask<round_name>,
*avx512f_vmfnmsub_<mode>_mask3<round_name>,
*avx512f_vmfnmasub_<mode>_maskz_1<round_name>): New define_insns.
(avx512f_vmfmadd_<mode>_maskz<round_expand_name>): New define_expand.
* config/i386/avx512fintrin.h (_mm_mask_fmadd_sd, _mm_mask_fmadd_ss,
_mm_mask3_fmadd_sd, _mm_mask3_fmadd_ss, _mm_maskz_fmadd_sd,
_mm_maskz_fmadd_ss, _mm_mask_fmsub_sd, _mm_mask_fmsub_ss,
_mm_mask3_fmsub_sd, _mm_mask3_fmsub_ss, _mm_maskz_fmsub_sd,
_mm_maskz_fmsub_ss, _mm_mask_fnmadd_sd, _mm_mask_fnmadd_ss,
_mm_mask3_fnmadd_sd, _mm_mask3_fnmadd_ss, _mm_maskz_fnmadd_sd,
_mm_maskz_fnmadd_ss, _mm_mask_fnmsub_sd, _mm_mask_fnmsub_ss,
_mm_mask3_fnmsub_sd, _mm_mask3_fnmsub_ss, _mm_maskz_fnmsub_sd,
_mm_maskz_fnmsub_ss, _mm_mask_fmadd_round_sd, _mm_mask_fmadd_round_ss,
_mm_mask3_fmadd_round_sd, _mm_mask3_fmadd_round_ss,
_mm_maskz_fmadd_round_sd, _mm_maskz_fmadd_round_ss,
_mm_mask_fmsub_round_sd, _mm_mask_fmsub_round_ss,
_mm_mask3_fmsub_round_sd, _mm_mask3_fmsub_round_ss,
_mm_maskz_fmsub_round_sd, _mm_maskz_fmsub_round_ss,
_mm_mask_fnmadd_round_sd, _mm_mask_fnmadd_round_ss,
_mm_mask3_fnmadd_round_sd, _mm_mask3_fnmadd_round_ss,
_mm_maskz_fnmadd_round_sd, _mm_maskz_fnmadd_round_ss,
_mm_mask_fnmsub_round_sd, _mm_mask_fnmsub_round_ss,
_mm_mask3_fnmsub_round_sd, _mm_mask3_fnmsub_round_ss,
_mm_maskz_fnmsub_round_sd, _mm_maskz_fnmsub_round_ss): New intrinsics.

* gcc.target/i386/sse-13.c (__builtin_ia32_vfmaddsd3_mask,
__builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz,
__builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask,
__builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz,
__builtin_ia32_vfmsubss3_mask3): Define.
* gcc.target/i386/sse-23.c (__builtin_ia32_vfmaddsd3_mask,
__builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz,
__builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask,
__builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz,
__builtin_ia32_vfmsubss3_mask3): Define.
* gcc.target/i386/avx-1.c (__builtin_ia32_vfmaddsd3_mask,
__builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz,
__builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask,
__builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz,
__builtin_ia32_vfmsubss3_mask3): Define.
* gcc.target/i386/sse-14.c: Add tests for
_mm_mask{,3,z}_f{,n}m{add,sub}_round_s{s,d} builtins.
* gcc.target/i386/sse-22.c: Likewise.

2019-03-22  Hongtao Liu  <hongtao.liu@intel.com>

* gcc.target/i386/avx512f-vfmaddXXXsd-1.c (avx512f_test): Add tests
for _mm_mask{,3,z}_*.
* gcc.target/i386/avx512f-vfmaddXXXss-1.c (avx512f_test): Likewise.
* gcc.target/i386/avx512f-vfmsubXXXsd-1.c (avx512f_test): Likewise.
* gcc.target/i386/avx512f-vfmsubXXXss-1.c (avx512f_test): Likewise.
* gcc.target/i386/avx512f-vfnmaddXXXsd-1.c (avx512f_test): Likewise.
* gcc.target/i386/avx512f-vfnmaddXXXss-1.c (avx512f_test): Likewise.
* gcc.target/i386/avx512f-vfnmsubXXXsd-1.c (avx512f_test): Likewise.
* gcc.target/i386/avx512f-vfnmsubXXXss-1.c (avx512f_test): Likewise.
* gcc.target/i386/avx512f-vfmaddXXXsd-2.c: New test.
* gcc.target/i386/avx512f-vfmaddXXXss-2.c: New test.
* gcc.target/i386/avx512f-vfmsubXXXsd-2.c: New test.
* gcc.target/i386/avx512f-vfmsubXXXss-2.c: New test.
* gcc.target/i386/avx512f-vfnmaddXXXsd-2.c: New test.
* gcc.target/i386/avx512f-vfnmaddXXXss-2.c: New test.
* gcc.target/i386/avx512f-vfnmsubXXXsd-2.c: New test.
* gcc.target/i386/avx512f-vfnmsubXXXss-2.c: New test.

From-SVN: r269868

27 files changed:
gcc/ChangeLog
gcc/config/i386/avx512fintrin.h
gcc/config/i386/i386-builtin.def
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx-1.c
gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c
gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c
gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c
gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c
gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c
gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c
gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c
gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c
gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse-13.c
gcc/testsuite/gcc.target/i386/sse-14.c
gcc/testsuite/gcc.target/i386/sse-22.c
gcc/testsuite/gcc.target/i386/sse-23.c

index 10aa8e5cfd6705b1ff734bd93a6c17aff96cf2ca..6601be5319713cf62c2957318fcfffca25b188b2 100644 (file)
@@ -1,3 +1,47 @@
+2019-03-22  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/89784
+       * config/i386/i386.c (enum ix86_builtins): Remove
+       IX86_BUILTIN_VFMSUBSD3_MASK3 and IX86_BUILTIN_VFMSUBSS3_MASK3.
+       * config/i386/i386-builtin.def (__builtin_ia32_vfmaddsd3_mask,
+       __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz,
+       __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask,
+       __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz,
+       __builtin_ia32_vfmsubss3_mask3): New builtins.
+       * config/i386/sse.md (avx512f_vmfmadd_<mode>_mask<round_name>,
+       avx512f_vmfmadd_<mode>_mask3<round_name>,
+       avx512f_vmfmadd_<mode>_maskz_1<round_name>,
+       *avx512f_vmfmsub_<mode>_mask<round_name>,
+       avx512f_vmfmsub_<mode>_mask3<round_name>,
+       *avx512f_vmfmasub_<mode>_maskz_1<round_name>,
+       *avx512f_vmfnmadd_<mode>_mask<round_name>,
+       *avx512f_vmfnmadd_<mode>_mask3<round_name>,
+       *avx512f_vmfnmadd_<mode>_maskz_1<round_name>,
+       *avx512f_vmfnmsub_<mode>_mask<round_name>,
+       *avx512f_vmfnmsub_<mode>_mask3<round_name>,
+       *avx512f_vmfnmasub_<mode>_maskz_1<round_name>): New define_insns.
+       (avx512f_vmfmadd_<mode>_maskz<round_expand_name>): New define_expand.
+       * config/i386/avx512fintrin.h (_mm_mask_fmadd_sd, _mm_mask_fmadd_ss,
+       _mm_mask3_fmadd_sd, _mm_mask3_fmadd_ss, _mm_maskz_fmadd_sd,
+       _mm_maskz_fmadd_ss, _mm_mask_fmsub_sd, _mm_mask_fmsub_ss,
+       _mm_mask3_fmsub_sd, _mm_mask3_fmsub_ss, _mm_maskz_fmsub_sd,
+       _mm_maskz_fmsub_ss, _mm_mask_fnmadd_sd, _mm_mask_fnmadd_ss,
+       _mm_mask3_fnmadd_sd, _mm_mask3_fnmadd_ss, _mm_maskz_fnmadd_sd,
+       _mm_maskz_fnmadd_ss, _mm_mask_fnmsub_sd, _mm_mask_fnmsub_ss,
+       _mm_mask3_fnmsub_sd, _mm_mask3_fnmsub_ss, _mm_maskz_fnmsub_sd,
+       _mm_maskz_fnmsub_ss, _mm_mask_fmadd_round_sd, _mm_mask_fmadd_round_ss,
+       _mm_mask3_fmadd_round_sd, _mm_mask3_fmadd_round_ss,
+       _mm_maskz_fmadd_round_sd, _mm_maskz_fmadd_round_ss,
+       _mm_mask_fmsub_round_sd, _mm_mask_fmsub_round_ss,
+       _mm_mask3_fmsub_round_sd, _mm_mask3_fmsub_round_ss,
+       _mm_maskz_fmsub_round_sd, _mm_maskz_fmsub_round_ss,
+       _mm_mask_fnmadd_round_sd, _mm_mask_fnmadd_round_ss,
+       _mm_mask3_fnmadd_round_sd, _mm_mask3_fnmadd_round_ss,
+       _mm_maskz_fnmadd_round_sd, _mm_maskz_fnmadd_round_ss,
+       _mm_mask_fnmsub_round_sd, _mm_mask_fnmsub_round_ss,
+       _mm_mask3_fnmsub_round_sd, _mm_mask3_fnmsub_round_ss,
+       _mm_maskz_fnmsub_round_sd, _mm_maskz_fnmsub_round_ss): New intrinsics.
+
 2019-03-21  Martin Sebor  <msebor@redhat.com>
 
        PR tree-optimization/89350
index 3a4e0ad3c32d12821cd725e4c00645cd22c2f198..afcba5dd0420cd0c0bb342c83b4078d16e957f20 100644 (file)
@@ -11562,6 +11562,608 @@ _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
     (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
 #endif
 
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+                                                 (__v2df) __A,
+                                                 (__v2df) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+                                                (__v4sf) __A,
+                                                (__v4sf) __B,
+                                                (__mmask8) __U,
+                                                _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+                                                  (__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+                                                 (__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+                                                  (__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+                                                 (__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+                                                 (__v2df) __A,
+                                                 -(__v2df) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+                                                (__v4sf) __A,
+                                                -(__v4sf) __B,
+                                                (__mmask8) __U,
+                                                _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+                                                  (__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+                                                 (__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+                                                  (__v2df) __A,
+                                                  -(__v2df) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+                                                 (__v4sf) __A,
+                                                 -(__v4sf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+                                                 -(__v2df) __A,
+                                                 (__v2df) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+                                                -(__v4sf) __A,
+                                                (__v4sf) __B,
+                                                (__mmask8) __U,
+                                                _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+                                                  -(__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+                                                 -(__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+                                                  -(__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+                                                 -(__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+                                                 -(__v2df) __A,
+                                                 -(__v2df) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+                                                -(__v4sf) __A,
+                                                -(__v4sf) __B,
+                                                (__mmask8) __U,
+                                                _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+                                                  -(__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+                                                 -(__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+                                                  -(__v2df) __A,
+                                                  -(__v2df) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+                                                 -(__v4sf) __A,
+                                                 -(__v4sf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+                        const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+                                                 (__v2df) __A,
+                                                 (__v2df) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+                        const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+                                                (__v4sf) __A,
+                                                (__v4sf) __B,
+                                                (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+                                                  (__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+                                                 (__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
+                         const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+                                                  (__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
+                         const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+                                                 (__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+                        const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+                                                 (__v2df) __A,
+                                                 -(__v2df) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+                        const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+                                                (__v4sf) __A,
+                                                -(__v4sf) __B,
+                                                (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+                                                  (__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+                                                 (__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
+                         const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+                                                  (__v2df) __A,
+                                                  -(__v2df) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
+                         const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+                                                 (__v4sf) __A,
+                                                 -(__v4sf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+                        const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+                                                 -(__v2df) __A,
+                                                 (__v2df) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+                        const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+                                                -(__v4sf) __A,
+                                                (__v4sf) __B,
+                                                (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+                                                  -(__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+                                                 -(__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
+                         const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+                                                  -(__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
+                         const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+                                                 -(__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+                        const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+                                                 -(__v2df) __A,
+                                                 -(__v2df) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+                        const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+                                                -(__v4sf) __A,
+                                                -(__v4sf) __B,
+                                                (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+                                                  -(__v2df) __A,
+                                                  (__v2df) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+                                                 -(__v4sf) __A,
+                                                 (__v4sf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
+                         const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+                                                  -(__v2df) __A,
+                                                  -(__v2df) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
+                         const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+                                                 -(__v4sf) __A,
+                                                 -(__v4sf) __B,
+                                                 (__mmask8) __U, __R);
+}
+#else
+#define _mm_mask_fmadd_round_sd(A, U, B, C, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
+
+#define _mm_mask_fmadd_round_ss(A, U, B, C, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
+
+#define _mm_mask3_fmadd_round_sd(A, B, C, U, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
+
+#define _mm_mask3_fmadd_round_ss(A, B, C, U, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
+
+#define _mm_maskz_fmadd_round_sd(U, A, B, C, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
+
+#define _mm_maskz_fmadd_round_ss(U, A, B, C, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
+
+#define _mm_mask_fmsub_round_sd(A, U, B, C, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
+
+#define _mm_mask_fmsub_round_ss(A, U, B, C, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
+
+#define _mm_mask3_fmsub_round_sd(A, B, C, U, R)            \
+    (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
+
+#define _mm_mask3_fmsub_round_ss(A, B, C, U, R)            \
+    (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
+
+#define _mm_maskz_fmsub_round_sd(U, A, B, C, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, -(C), U, R)
+
+#define _mm_maskz_fmsub_round_ss(U, A, B, C, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, -(C), U, R)
+
+#define _mm_mask_fnmadd_round_sd(A, U, B, C, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
+
+#define _mm_mask_fnmadd_round_ss(A, U, B, C, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
+
+#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
+
+#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
+
+#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
+
+#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
+
+#define _mm_mask_fnmsub_round_sd(A, U, B, C, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
+
+#define _mm_mask_fnmsub_round_ss(A, U, B, C, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
+
+#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R)            \
+    (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
+
+#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R)            \
+    (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
+
+#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R)            \
+    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), -(C), U, R)
+
+#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R)            \
+    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), -(C), U, R)
+#endif
+
 #ifdef __OPTIMIZE__
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
index eeef341726ac574dc418e3577f71ab92a73cb92e..6580890edc08468d60ac633208f17b100b94c1d0 100644 (file)
@@ -2827,6 +2827,14 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask3_round, "__builtin_ia32_vfmaddsd3_mask3", IX86_BUILTIN_VFMADDSD3_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_maskz_round, "__builtin_ia32_vfmaddsd3_maskz", IX86_BUILTIN_VFMADDSD3_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v2df_mask3_round, "__builtin_ia32_vfmsubsd3_mask3", IX86_BUILTIN_VFMSUBSD3_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask_round, "__builtin_ia32_vfmaddss3_mask", IX86_BUILTIN_VFMADDSS3_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
index 184c5c18718bd21e92948c8da1734e4f31833ac3..b5f20f1597ed56068571c02174af5fda8868bf95 100644 (file)
@@ -30524,8 +30524,6 @@ enum ix86_builtins
   IX86_BUILTIN_GATHERSIV8SI,
   IX86_BUILTIN_GATHERDIV4SI,
   IX86_BUILTIN_GATHERDIV8SI,
-  IX86_BUILTIN_VFMSUBSD3_MASK3,
-  IX86_BUILTIN_VFMSUBSS3_MASK3,
   IX86_BUILTIN_GATHER3SIV8SF,
   IX86_BUILTIN_GATHER3SIV4SF,
   IX86_BUILTIN_GATHER3SIV4DF,
index 86bf86618086ca4d9ede80887e15facc70bf855f..dc588ec1ccbdba624d3af1b12fe589a7125ba2c5 100644 (file)
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (match_operand:VF_128 1 "register_operand" "0,0")
+             (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
+             (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+           (match_dup 1)
+           (match_operand:QI 4 "register_operand" "Yk,Yk"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+   vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (match_operand:VF_128 1 "register_operand" "v")
+             (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")
+             (match_operand:VF_128 3 "register_operand" "0"))
+           (match_dup 3)
+           (match_operand:QI 4 "register_operand" "Yk"))
+         (match_dup 3)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
+  [(match_operand:VF_128 0 "register_operand")
+   (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
+   (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
+   (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
+   (match_operand:QI 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
+    operands[0], operands[1], operands[2], operands[3],
+    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+  DONE;
+})
+
+(define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (match_operand:VF_128 1 "register_operand" "0,0")
+             (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
+             (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+           (match_operand:VF_128 4 "const0_operand" "C,C")
+           (match_operand:QI 5 "register_operand" "Yk,Yk"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
+   vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (match_operand:VF_128 1 "register_operand" "0,0")
+             (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
+             (neg:VF_128
+               (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+           (match_dup 1)
+           (match_operand:QI 4 "register_operand" "Yk,Yk"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+   vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (match_operand:VF_128 1 "register_operand" "v")
+             (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")
+             (neg:VF_128
+               (match_operand:VF_128 3 "register_operand" "0")))
+           (match_dup 3)
+           (match_operand:QI 4 "register_operand" "Yk"))
+         (match_dup 3)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (match_operand:VF_128 1 "register_operand" "0,0")
+             (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
+             (neg:VF_128
+               (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+           (match_operand:VF_128 4 "const0_operand" "C,C")
+           (match_operand:QI 5 "register_operand" "Yk,Yk"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
+   vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (neg:VF_128
+               (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
+             (match_operand:VF_128 1 "register_operand" "0,0")
+             (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+           (match_dup 1)
+           (match_operand:QI 4 "register_operand" "Yk,Yk"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+   vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (neg:VF_128
+               (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>"))
+             (match_operand:VF_128 1 "register_operand" "v")
+             (match_operand:VF_128 3 "register_operand" "0"))
+           (match_dup 3)
+           (match_operand:QI 4 "register_operand" "Yk"))
+         (match_dup 3)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (neg:VF_128
+               (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
+             (match_operand:VF_128 1 "register_operand" "0,0")
+             (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+           (match_operand:VF_128 4 "const0_operand" "C,C")
+           (match_operand:QI 5 "register_operand" "Yk,Yk"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
+   vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (neg:VF_128
+               (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
+             (match_operand:VF_128 1 "register_operand" "0,0")
+             (neg:VF_128
+               (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+           (match_dup 1)
+           (match_operand:QI 4 "register_operand" "Yk,Yk"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+   vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_vmfnmsub_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (neg:VF_128
+               (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>"))
+             (match_operand:VF_128 1 "register_operand" "v")
+             (neg:VF_128
+               (match_operand:VF_128 3 "register_operand" "0")))
+           (match_dup 3)
+           (match_operand:QI 4 "register_operand" "Yk"))
+         (match_dup 3)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+       (vec_merge:VF_128
+         (vec_merge:VF_128
+           (fma:VF_128
+             (neg:VF_128
+               (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
+             (match_operand:VF_128 1 "register_operand" "0,0")
+             (neg:VF_128
+               (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+           (match_operand:VF_128 4 "const0_operand" "C,C")
+           (match_operand:QI 5 "register_operand" "Yk,Yk"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
+   vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 ;; FMA4 floating point scalar intrinsics.  These write the
 ;; entire destination register, with the high-order elements zeroed.
 
index 076f79537f5a01eefe2c6f7f195ae06375626630..ac41542e2265e5c73d8b2c9cc82dcbfbcf8763dc 100644 (file)
@@ -1,3 +1,46 @@
+2019-03-22  Hongtao Liu  <hongtao.liu@intel.com>
+
+       PR target/89784
+       * gcc.target/i386/avx512f-vfmaddXXXsd-1.c (avx512f_test): Add tests
+       for _mm_mask{,3,z}_*.
+       * gcc.target/i386/avx512f-vfmaddXXXss-1.c (avx512f_test): Likewise.
+       * gcc.target/i386/avx512f-vfmsubXXXsd-1.c (avx512f_test): Likewise.
+       * gcc.target/i386/avx512f-vfmsubXXXss-1.c (avx512f_test): Likewise.
+       * gcc.target/i386/avx512f-vfnmaddXXXsd-1.c (avx512f_test): Likewise.
+       * gcc.target/i386/avx512f-vfnmaddXXXss-1.c (avx512f_test): Likewise.
+       * gcc.target/i386/avx512f-vfnmsubXXXsd-1.c (avx512f_test): Likewise.
+       * gcc.target/i386/avx512f-vfnmsubXXXss-1.c (avx512f_test): Likewise.
+       * gcc.target/i386/avx512f-vfmaddXXXsd-2.c: New test.
+       * gcc.target/i386/avx512f-vfmaddXXXss-2.c: New test.
+       * gcc.target/i386/avx512f-vfmsubXXXsd-2.c: New test.
+       * gcc.target/i386/avx512f-vfmsubXXXss-2.c: New test.
+       * gcc.target/i386/avx512f-vfnmaddXXXsd-2.c: New test.
+       * gcc.target/i386/avx512f-vfnmaddXXXss-2.c: New test.
+       * gcc.target/i386/avx512f-vfnmsubXXXsd-2.c: New test.
+       * gcc.target/i386/avx512f-vfnmsubXXXss-2.c: New test.
+
+2019-03-22  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/89784
+       * gcc.target/i386/sse-13.c (__builtin_ia32_vfmaddsd3_mask,
+       __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz,
+       __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask,
+       __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz,
+       __builtin_ia32_vfmsubss3_mask3): Define.
+       * gcc.target/i386/sse-23.c (__builtin_ia32_vfmaddsd3_mask,
+       __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz,
+       __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask,
+       __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz,
+       __builtin_ia32_vfmsubss3_mask3): Define.
+       * gcc.target/i386/avx-1.c (__builtin_ia32_vfmaddsd3_mask,
+       __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz,
+       __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask,
+       __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz,
+       __builtin_ia32_vfmsubss3_mask3): Define.
+       * gcc.target/i386/sse-14.c: Add tests for
+       _mm_mask{,3,z}_f{,n}m{add,sub}_round_s{s,d} builtins.
+       * gcc.target/i386/sse-22.c: Likewise.
+
 2019-03-21  Martin Sebor  <msebor@redhat.com>
 
        PR tree-optimization/89350
index f67bc5f504425e76a0dfa07650476fb5856dcf2d..e09f5a17984073e8d9358072bd5694b8d54661cb 100644 (file)
 #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8)
 #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D)
 #define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D)
+#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8)
 
 /* avx512erintrin.h */
 #define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 8)
index d88286fc50d72174443034d524eae8452f71bc87..d6f9015327b01142ca04d297d8caa02d3ae26c0b 100644 (file)
@@ -1,13 +1,26 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vfmadd231sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128d a, b, c;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
+  a = _mm_mask_fmadd_sd (a, m, b, c);
+  c = _mm_mask3_fmadd_sd (a, b, c, m);
+  a = _mm_maskz_fmadd_sd (m, a, b, c);
   a = _mm_fmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+  a = _mm_mask_fmadd_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+  c = _mm_mask3_fmadd_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+  a = _mm_maskz_fmadd_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c
new file mode 100644 (file)
index 0000000..6e01bd0
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (double *s1, double *s2, double *s3, double* r)
+{
+  r[0] = s1[0] * s2[0] + s3[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+static void
+calc_add_3 (double *s1, double *s2, double *s3, double* r)
+{
+  r[0] = s2[0] * s3[0] + s1[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3;
+  MASK_TYPE mask = MASK_VALUE;
+  double res_ref1[SIZE], res_ref2[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = DEFAULT_VALUE;
+      src2.a[i] = 56.78 * (i + 1) * sign;
+      src3.a[i] = 90.12 * (i + 2) * sign;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+    }
+
+  calc_add (src1.a, src2.a, src3.a, res_ref1);
+  calc_add_3(src1.a, src2.a, src3.a, res_ref2);
+
+  res1.x = _mm_mask_fmadd_sd (src1.x, mask, src2.x, src3.x);
+  res2.x = _mm_mask3_fmadd_sd (src2.x, src3.x, src1.x, mask);
+  res3.x = _mm_maskz_fmadd_sd (mask, src1.x, src2.x, src3.x);
+  res4.x = _mm_fmadd_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_fmadd_round_sd (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res6.x = _mm_mask3_fmadd_round_sd (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC);
+  res7.x = _mm_maskz_fmadd_round_sd (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+
+  if (check_union128d (res4, res_ref1))
+    abort();
+
+  MASK_ZERO (d) (res_ref1, mask, 1);
+  if (check_union128d (res3, res_ref1))
+    abort ();
+
+  MASK_ZERO (d) (res_ref1, mask, 1);
+  if (check_union128d (res7, res_ref1))
+    abort ();
+
+  MASK_MERGE (d) (res_ref2, mask, 1);
+  if (check_union128d (res2, res_ref2))
+    abort ();
+
+  MASK_MERGE (d) (res_ref2, mask, 1);
+  if (check_union128d (res6, res_ref2))
+    abort ();
+
+  MASK_MERGE (d) (res_ref1, mask, 1);
+  if (check_union128d (res1, res_ref1))
+    abort ();
+  
+  MASK_MERGE (d) (res_ref1, mask, 1);
+  if (check_union128d (res5, res_ref1))
+    abort ();
+}
+
index e1aa388638fcfdd0e4ae23d09a8a9ddb7ee07d78..e50b5b586c5a8b4d4f7a76de754ef6624c946372 100644 (file)
@@ -1,13 +1,26 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vfmadd231ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128 a, b, c;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
+  a = _mm_mask_fmadd_ss (a, m, b, c);
+  c = _mm_mask3_fmadd_ss (a, b, c, m);
+  a = _mm_maskz_fmadd_ss (m, a, b, c);
   a = _mm_fmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+  a = _mm_mask_fmadd_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+  c = _mm_mask3_fmadd_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+  a = _mm_maskz_fmadd_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c
new file mode 100644 (file)
index 0000000..4fb7a08
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (float *s1, float *s2, float *s3, float* r)
+{
+  r[0] = s1[0] * s2[0] + s3[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+static void
+calc_add_3 (float *s1, float *s2, float *s3, float* r)
+{
+  r[0] = s2[0] * s3[0] + s1[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3;
+  MASK_TYPE mask = MASK_VALUE;
+  float res_ref1[SIZE], res_ref2[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = DEFAULT_VALUE;
+      src2.a[i] = 56.78 * (i + 1) * sign;
+      src3.a[i] = 90.12 * (i + 2) * sign;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+    }
+
+  calc_add (src1.a, src2.a, src3.a, res_ref1);
+  calc_add_3(src1.a, src2.a, src3.a, res_ref2);
+
+  res1.x = _mm_mask_fmadd_ss (src1.x, mask, src2.x, src3.x);
+  res2.x = _mm_mask3_fmadd_ss (src2.x, src3.x, src1.x, mask);
+  res3.x = _mm_maskz_fmadd_ss (mask, src1.x, src2.x, src3.x);
+  res4.x = _mm_fmadd_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_fmadd_round_ss (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res6.x = _mm_mask3_fmadd_round_ss (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC);
+  res7.x = _mm_maskz_fmadd_round_ss (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+
+  if (check_union128 (res4, res_ref1))
+    abort();
+
+  MASK_ZERO () (res_ref1, mask, 1);
+  if (check_union128 (res3, res_ref1))
+    abort ();
+
+  MASK_ZERO () (res_ref1, mask, 1);
+  if (check_union128 (res7, res_ref1))
+    abort ();
+
+  MASK_MERGE () (res_ref2, mask, 1);
+  if (check_union128 (res2, res_ref2))
+    abort ();
+
+  MASK_MERGE () (res_ref2, mask, 1);
+  if (check_union128 (res6, res_ref2))
+    abort ();
+
+  MASK_MERGE () (res_ref1, mask, 1);
+  if (check_union128 (res1, res_ref1))
+    abort ();
+  
+  MASK_MERGE () (res_ref1, mask, 1);
+  if (check_union128 (res5, res_ref1))
+    abort ();
+}
+
index bb3ec6018ef9da5ec01bf403df22401733d03304..15affb880ef45341af58e8edf2fefb152d91aea5 100644 (file)
@@ -1,13 +1,26 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vfmsub231sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128d a, b, c;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
+  a = _mm_mask_fmsub_sd (a, m, b, c);
+  c = _mm_mask3_fmsub_sd (a, b, c, m);
+  a = _mm_maskz_fmsub_sd (m, a, b, c);
   a = _mm_fmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+  a = _mm_mask_fmsub_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+  c = _mm_mask3_fmsub_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+  a = _mm_maskz_fmsub_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c
new file mode 100644 (file)
index 0000000..701db18
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (double *s1, double *s2, double *s3, double* r)
+{
+  r[0] = s1[0] * s2[0] - s3[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+static void
+calc_add_3 (double *s1, double *s2, double *s3, double* r)
+{
+  r[0] = s2[0] * s3[0] - s1[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3;
+  MASK_TYPE mask = MASK_VALUE;
+  double res_ref1[SIZE], res_ref2[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = DEFAULT_VALUE;
+      src2.a[i] = 56.78 * (i + 1) * sign;
+      src3.a[i] = 90.12 * (i + 2) * sign;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+    }
+
+  calc_add (src1.a, src2.a, src3.a, res_ref1);
+  calc_add_3(src1.a, src2.a, src3.a, res_ref2);
+
+  res1.x = _mm_mask_fmsub_sd (src1.x, mask, src2.x, src3.x);
+  res2.x = _mm_mask3_fmsub_sd (src2.x, src3.x, src1.x, mask);
+  res3.x = _mm_maskz_fmsub_sd (mask, src1.x, src2.x, src3.x);
+  res4.x = _mm_fmsub_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_fmsub_round_sd (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res6.x = _mm_mask3_fmsub_round_sd (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC);
+  res7.x = _mm_maskz_fmsub_round_sd (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+
+  if (check_union128d (res4, res_ref1))
+    abort();
+
+  MASK_ZERO (d) (res_ref1, mask, 1);
+  if (check_union128d (res3, res_ref1))
+    abort ();
+
+  MASK_ZERO (d) (res_ref1, mask, 1);
+  if (check_union128d (res7, res_ref1))
+    abort ();
+
+  MASK_MERGE (d) (res_ref2, mask, 1);
+  if (check_union128d (res2, res_ref2))
+    abort ();
+
+  MASK_MERGE (d) (res_ref2, mask, 1);
+  if (check_union128d (res6, res_ref2))
+    abort ();
+
+  MASK_MERGE (d) (res_ref1, mask, 1);
+  if (check_union128d (res1, res_ref1))
+    abort ();
+  
+  MASK_MERGE (d) (res_ref1, mask, 1);
+  if (check_union128d (res5, res_ref1))
+    abort ();
+}
+
index 7e521fc9deb38a044514e40328cbf8ed00b46bfd..34e444e3e81a4c1c04a6b8d8e7445f34a03186f7 100644 (file)
@@ -1,13 +1,26 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vfmsub231ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128 a, b, c;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
+  a = _mm_mask_fmsub_ss (a, m, b, c);
+  c = _mm_mask3_fmsub_ss (a, b, c, m);
+  a = _mm_maskz_fmsub_ss (m, a, b, c);
   a = _mm_fmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+  a = _mm_mask_fmsub_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+  c = _mm_mask3_fmsub_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+  a = _mm_maskz_fmsub_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c
new file mode 100644 (file)
index 0000000..6beaed8
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (float *s1, float *s2, float *s3, float* r)
+{
+  r[0] = s1[0] * s2[0] - s3[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+static void
+calc_add_3 (float *s1, float *s2, float *s3, float* r)
+{
+  r[0] = s2[0] * s3[0] - s1[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3;
+  MASK_TYPE mask = MASK_VALUE;
+  float res_ref1[SIZE], res_ref2[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = DEFAULT_VALUE;
+      src2.a[i] = 56.78 * (i + 1) * sign;
+      src3.a[i] = 90.12 * (i + 2) * sign;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+    }
+
+  calc_add (src1.a, src2.a, src3.a, res_ref1);
+  calc_add_3(src1.a, src2.a, src3.a, res_ref2);
+
+  res1.x = _mm_mask_fmsub_ss (src1.x, mask, src2.x, src3.x);
+  res2.x = _mm_mask3_fmsub_ss (src2.x, src3.x, src1.x, mask);
+  res3.x = _mm_maskz_fmsub_ss (mask, src1.x, src2.x, src3.x);
+  res4.x = _mm_fmsub_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_fmsub_round_ss (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res6.x = _mm_mask3_fmsub_round_ss (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC);
+  res7.x = _mm_maskz_fmsub_round_ss (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+
+  if (check_union128 (res4, res_ref1))
+    abort();
+
+  MASK_ZERO () (res_ref1, mask, 1);
+  if (check_union128 (res3, res_ref1))
+    abort ();
+
+  MASK_ZERO () (res_ref1, mask, 1);
+  if (check_union128 (res7, res_ref1))
+    abort ();
+
+  MASK_MERGE () (res_ref2, mask, 1);
+  if (check_union128 (res2, res_ref2))
+    abort ();
+
+  MASK_MERGE () (res_ref2, mask, 1);
+  if (check_union128 (res6, res_ref2))
+    abort ();
+
+  MASK_MERGE () (res_ref1, mask, 1);
+  if (check_union128 (res1, res_ref1))
+    abort ();
+  
+  MASK_MERGE () (res_ref1, mask, 1);
+  if (check_union128 (res5, res_ref1))
+    abort ();
+}
+
index e23101f2e29d671b8e9a9bce2aa20dd71a1b0969..09b923e39517e8fd556b77fdc9918e345042dd0c 100644 (file)
@@ -1,13 +1,26 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128d a, b, c;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
+  a = _mm_mask_fnmadd_sd (a, m, b, c);
+  c = _mm_mask3_fnmadd_sd (a, b, c, m);
+  a = _mm_maskz_fnmadd_sd (m, a, b, c);
   a = _mm_fnmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+  a = _mm_mask_fnmadd_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+  c = _mm_mask3_fnmadd_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+  a = _mm_maskz_fnmadd_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c
new file mode 100644 (file)
index 0000000..4715f2d
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (double *s1, double *s2, double *s3, double* r)
+{
+  r[0] = -s1[0] * s2[0] + s3[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+static void
+calc_add_3 (double *s1, double *s2, double *s3, double* r)
+{
+  r[0] = -s2[0] * s3[0] + s1[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3;
+  MASK_TYPE mask = MASK_VALUE;
+  double res_ref1[SIZE], res_ref2[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = DEFAULT_VALUE;
+      src2.a[i] = 56.78 * (i + 1) * sign;
+      src3.a[i] = 90.12 * (i + 2) * sign;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+    }
+
+  calc_add (src1.a, src2.a, src3.a, res_ref1);
+  calc_add_3(src1.a, src2.a, src3.a, res_ref2);
+
+  res1.x = _mm_mask_fnmadd_sd (src1.x, mask, src2.x, src3.x);
+  res2.x = _mm_mask3_fnmadd_sd (src2.x, src3.x, src1.x, mask);
+  res3.x = _mm_maskz_fnmadd_sd (mask, src1.x, src2.x, src3.x);
+  res4.x = _mm_fnmadd_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_fnmadd_round_sd (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res6.x = _mm_mask3_fnmadd_round_sd (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC);
+  res7.x = _mm_maskz_fnmadd_round_sd (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+
+  if (check_union128d (res4, res_ref1))
+    abort();
+
+  MASK_ZERO (d) (res_ref1, mask, 1);
+  if (check_union128d (res3, res_ref1))
+    abort ();
+
+  MASK_ZERO (d) (res_ref1, mask, 1);
+  if (check_union128d (res7, res_ref1))
+    abort ();
+
+  MASK_MERGE (d) (res_ref2, mask, 1);
+  if (check_union128d (res2, res_ref2))
+    abort ();
+
+  MASK_MERGE (d) (res_ref2, mask, 1);
+  if (check_union128d (res6, res_ref2))
+    abort ();
+
+  MASK_MERGE (d) (res_ref1, mask, 1);
+  if (check_union128d (res1, res_ref1))
+    abort ();
+  
+  MASK_MERGE (d) (res_ref1, mask, 1);
+  if (check_union128d (res5, res_ref1))
+    abort ();
+}
+
index c46944c5b9e027a2ac23b65c04b01890687e3464..85e8d00fa666c408af8ecd94df1e9f6de5795abb 100644 (file)
@@ -1,13 +1,26 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128 a, b, c;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
+  a = _mm_mask_fnmadd_ss (a, m, b, c);
+  c = _mm_mask3_fnmadd_ss (a, b, c, m);
+  a = _mm_maskz_fnmadd_ss (m, a, b, c);
   a = _mm_fnmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+  a = _mm_mask_fnmadd_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+  c = _mm_mask3_fnmadd_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+  a = _mm_maskz_fnmadd_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c
new file mode 100644 (file)
index 0000000..e94b7d4
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (float *s1, float *s2, float *s3, float* r)
+{
+  r[0] = -s1[0] * s2[0] + s3[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+static void
+calc_add_3 (float *s1, float *s2, float *s3, float* r)
+{
+  r[0] = -s2[0] * s3[0] + s1[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3;
+  MASK_TYPE mask = MASK_VALUE;
+  float res_ref1[SIZE], res_ref2[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = DEFAULT_VALUE;
+      src2.a[i] = 56.78 * (i + 1) * sign;
+      src3.a[i] = 90.12 * (i + 2) * sign;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+    }
+
+  calc_add (src1.a, src2.a, src3.a, res_ref1);
+  calc_add_3(src1.a, src2.a, src3.a, res_ref2);
+
+  res1.x = _mm_mask_fnmadd_ss (src1.x, mask, src2.x, src3.x);
+  res2.x = _mm_mask3_fnmadd_ss (src2.x, src3.x, src1.x, mask);
+  res3.x = _mm_maskz_fnmadd_ss (mask, src1.x, src2.x, src3.x);
+  res4.x = _mm_fnmadd_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_fnmadd_round_ss (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res6.x = _mm_mask3_fnmadd_round_ss (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC);
+  res7.x = _mm_maskz_fnmadd_round_ss (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+
+  if (check_union128 (res4, res_ref1))
+    abort();
+
+  MASK_ZERO () (res_ref1, mask, 1);
+  if (check_union128 (res3, res_ref1))
+    abort ();
+
+  MASK_ZERO () (res_ref1, mask, 1);
+  if (check_union128 (res7, res_ref1))
+    abort ();
+
+  MASK_MERGE () (res_ref2, mask, 1);
+  if (check_union128 (res2, res_ref2))
+    abort ();
+
+  MASK_MERGE () (res_ref2, mask, 1);
+  if (check_union128 (res6, res_ref2))
+    abort ();
+
+  MASK_MERGE () (res_ref1, mask, 1);
+  if (check_union128 (res1, res_ref1))
+    abort ();
+  
+  MASK_MERGE () (res_ref1, mask, 1);
+  if (check_union128 (res5, res_ref1))
+    abort ();
+}
+
index 33b68e1737258cc1be30fcba7e42d901ad6034c5..4553e0d37725776b495284a4aba802ca0d488d54 100644 (file)
@@ -1,13 +1,26 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128d a, b, c;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
+  a = _mm_mask_fnmsub_sd (a, m, b, c);
+  c = _mm_mask3_fnmsub_sd (a, b, c, m);
+  a = _mm_maskz_fnmsub_sd (m, a, b, c);
   a = _mm_fnmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+  a = _mm_mask_fnmsub_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+  c = _mm_mask3_fnmsub_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+  a = _mm_maskz_fnmsub_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c
new file mode 100644 (file)
index 0000000..ecb4bd5
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (double *s1, double *s2, double *s3, double* r)
+{
+  r[0] = -s1[0] * s2[0] - s3[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+static void
+calc_add_3 (double *s1, double *s2, double *s3, double* r)
+{
+  r[0] = -s2[0] * s3[0] - s1[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3;
+  MASK_TYPE mask = MASK_VALUE;
+  double res_ref1[SIZE], res_ref2[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = DEFAULT_VALUE;
+      src2.a[i] = 56.78 * (i + 1) * sign;
+      src3.a[i] = 90.12 * (i + 2) * sign;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+    }
+
+  calc_add (src1.a, src2.a, src3.a, res_ref1);
+  calc_add_3(src1.a, src2.a, src3.a, res_ref2);
+
+  res1.x = _mm_mask_fnmsub_sd (src1.x, mask, src2.x, src3.x);
+  res2.x = _mm_mask3_fnmsub_sd (src2.x, src3.x, src1.x, mask);
+  res3.x = _mm_maskz_fnmsub_sd (mask, src1.x, src2.x, src3.x);
+  res4.x = _mm_fnmsub_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_fnmsub_round_sd (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res6.x = _mm_mask3_fnmsub_round_sd (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC);
+  res7.x = _mm_maskz_fnmsub_round_sd (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+
+  if (check_union128d (res4, res_ref1))
+    abort();
+
+  MASK_ZERO (d) (res_ref1, mask, 1);
+  if (check_union128d (res3, res_ref1))
+    abort ();
+
+  MASK_ZERO (d) (res_ref1, mask, 1);
+  if (check_union128d (res7, res_ref1))
+    abort ();
+
+  MASK_MERGE (d) (res_ref2, mask, 1);
+  if (check_union128d (res2, res_ref2))
+    abort ();
+
+  MASK_MERGE (d) (res_ref2, mask, 1);
+  if (check_union128d (res6, res_ref2))
+    abort ();
+
+  MASK_MERGE (d) (res_ref1, mask, 1);
+  if (check_union128d (res1, res_ref1))
+    abort ();
+  
+  MASK_MERGE (d) (res_ref1, mask, 1);
+  if (check_union128d (res5, res_ref1))
+    abort ();
+}
+
index cd7b52007bc903195b2dd8760ff859f2dab505cc..f86ae2d44173b90a88317a59a3d44fd68f0248db 100644 (file)
@@ -1,13 +1,26 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
 volatile __m128 a, b, c;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
+  a = _mm_mask_fnmsub_ss (a, m, b, c);
+  c = _mm_mask3_fnmsub_ss (a, b, c, m);
+  a = _mm_maskz_fnmsub_ss (m, a, b, c);
   a = _mm_fnmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+  a = _mm_mask_fnmsub_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+  c = _mm_mask3_fnmsub_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+  a = _mm_maskz_fnmsub_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c
new file mode 100644 (file)
index 0000000..d092988
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (float *s1, float *s2, float *s3, float* r)
+{
+  r[0] = -s1[0] * s2[0] - s3[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+static void
+calc_add_3 (float *s1, float *s2, float *s3, float* r)
+{
+  r[0] = -s2[0] * s3[0] - s1[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3;
+  MASK_TYPE mask = MASK_VALUE;
+  float res_ref1[SIZE], res_ref2[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = DEFAULT_VALUE;
+      src2.a[i] = 56.78 * (i + 1) * sign;
+      src3.a[i] = 90.12 * (i + 2) * sign;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+    }
+
+  calc_add (src1.a, src2.a, src3.a, res_ref1);
+  calc_add_3(src1.a, src2.a, src3.a, res_ref2);
+
+  res1.x = _mm_mask_fnmsub_ss (src1.x, mask, src2.x, src3.x);
+  res2.x = _mm_mask3_fnmsub_ss (src2.x, src3.x, src1.x, mask);
+  res3.x = _mm_maskz_fnmsub_ss (mask, src1.x, src2.x, src3.x);
+  res4.x = _mm_fnmsub_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res5.x = _mm_mask_fnmsub_round_ss (src1.x, mask, src2.x, src3.x, _MM_FROUND_NO_EXC);
+  res6.x = _mm_mask3_fnmsub_round_ss (src2.x, src3.x, src1.x, mask, _MM_FROUND_NO_EXC);
+  res7.x = _mm_maskz_fnmsub_round_ss (mask, src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC);
+
+  if (check_union128 (res4, res_ref1))
+    abort();
+
+  MASK_ZERO () (res_ref1, mask, 1);
+  if (check_union128 (res3, res_ref1))
+    abort ();
+
+  MASK_ZERO () (res_ref1, mask, 1);
+  if (check_union128 (res7, res_ref1))
+    abort ();
+
+  MASK_MERGE () (res_ref2, mask, 1);
+  if (check_union128 (res2, res_ref2))
+    abort ();
+
+  MASK_MERGE () (res_ref2, mask, 1);
+  if (check_union128 (res6, res_ref2))
+    abort ();
+
+  MASK_MERGE () (res_ref1, mask, 1);
+  if (check_union128 (res1, res_ref1))
+    abort ();
+  
+  MASK_MERGE () (res_ref1, mask, 1);
+  if (check_union128 (res5, res_ref1))
+    abort ();
+}
+
index 64da3cd19928cae42d159157ff30f49ab693a50a..e868f6d293ff925b5d99bdce79f3d17aae07b190 100644 (file)
 #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8)
 #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D)
 #define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D)
+#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8)
 
 /* avx512erintrin.h */
 #define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 8)
index 0f663bec702f1409a849484e70fea10d6db13d35..748339f7d47c2403df005ac4d95b03e7a0d4858a 100644 (file)
@@ -534,6 +534,30 @@ test_4 (_mm512_maskz_fnmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m51
 test_4 (_mm512_maskz_fnmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 9)
 test_4 (_mm512_maskz_ternarylogic_epi32, __m512i, __mmask16, __m512i, __m512i, __m512i, 1)
 test_4 (_mm512_maskz_ternarylogic_epi64, __m512i, __mmask8, __m512i, __m512i, __m512i, 1)
+test_4 (_mm_mask_fmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
+test_4 (_mm_mask_fmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
+test_4 (_mm_mask3_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9)
+test_4 (_mm_mask3_fmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9)
+test_4 (_mm_maskz_fmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9)
+test_4 (_mm_maskz_fmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9)
+test_4 (_mm_mask_fmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
+test_4 (_mm_mask_fmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
+test_4 (_mm_mask3_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9)
+test_4 (_mm_mask3_fmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9)
+test_4 (_mm_maskz_fmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9)
+test_4 (_mm_maskz_fmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9)
+test_4 (_mm_mask_fnmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
+test_4 (_mm_mask_fnmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
+test_4 (_mm_mask3_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9)
+test_4 (_mm_mask3_fnmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9)
+test_4 (_mm_maskz_fnmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9)
+test_4 (_mm_maskz_fnmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9)
+test_4 (_mm_mask_fnmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
+test_4 (_mm_mask_fnmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
+test_4 (_mm_mask3_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9)
+test_4 (_mm_mask3_fnmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9)
+test_4 (_mm_maskz_fnmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9)
+test_4 (_mm_maskz_fnmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9)
 test_4v (_mm512_mask_i32scatter_epi32, void *, __mmask16, __m512i, __m512i, 1)
 test_4v (_mm512_mask_i32scatter_epi64, void *, __mmask8, __m256i, __m512i, 1)
 test_4v (_mm512_mask_i32scatter_pd, void *, __mmask8, __m256i, __m512d, 1)
index 99af58a995d39d0a1bc4013f256d740e5ee4267a..0c62f2049c289db2839dc0bc525d7e33dea37130 100644 (file)
@@ -633,6 +633,30 @@ test_4 (_mm512_maskz_fnmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m51
 test_4 (_mm512_maskz_fnmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 9)
 test_4 (_mm512_maskz_ternarylogic_epi32, __m512i, __mmask16, __m512i, __m512i, __m512i, 1)
 test_4 (_mm512_maskz_ternarylogic_epi64, __m512i, __mmask8, __m512i, __m512i, __m512i, 1)
+test_4 (_mm_mask_fmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
+test_4 (_mm_mask_fmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
+test_4 (_mm_mask3_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9)
+test_4 (_mm_mask3_fmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9)
+test_4 (_mm_maskz_fmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9)
+test_4 (_mm_maskz_fmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9)
+test_4 (_mm_mask_fmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
+test_4 (_mm_mask_fmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
+test_4 (_mm_mask3_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9)
+test_4 (_mm_mask3_fmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9)
+test_4 (_mm_maskz_fmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9)
+test_4 (_mm_maskz_fmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9)
+test_4 (_mm_mask_fnmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
+test_4 (_mm_mask_fnmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
+test_4 (_mm_mask3_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9)
+test_4 (_mm_mask3_fnmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9)
+test_4 (_mm_maskz_fnmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9)
+test_4 (_mm_maskz_fnmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9)
+test_4 (_mm_mask_fnmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
+test_4 (_mm_mask_fnmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
+test_4 (_mm_mask3_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 9)
+test_4 (_mm_mask3_fnmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 9)
+test_4 (_mm_maskz_fnmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 9)
+test_4 (_mm_maskz_fnmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 9)
 test_4v (_mm512_mask_i32scatter_epi32, void *, __mmask16, __m512i, __m512i, 1)
 test_4v (_mm512_mask_i32scatter_epi64, void *, __mmask8, __m256i, __m512i, 1)
 test_4v (_mm512_mask_i32scatter_pd, void *, __mmask8, __m256i, __m512d, 1)
index f9d372c47e2580acddfe71bca2278fc37a306f08..78a3c0aa26e6e4c7b6f4f4607c6b4ccc031aefd7 100644 (file)
 #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8)
 #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D)
 #define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D)
+#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8)
 
 /* avx512pfintrin.h */
 #define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps(A, B, C, 1, _MM_HINT_T0)