Enable AVX512 memory broadcast for FMSUB
authorH.J. Lu <hongjiu.lu@intel.com>
Sun, 21 Oct 2018 20:24:50 +0000 (20:24 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Sun, 21 Oct 2018 20:24:50 +0000 (13:24 -0700)
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for FMSUB operations.  In order to
support AVX512 memory broadcast for FMSUB, FMSUB builtin functions are
also added, instead of passing the negated value to FMA builtin functions.

gcc/

PR target/72782
* config/i386/avx512fintrin.h (_mm512_fmsub_round_pd): Use
__builtin_ia32_vfmsubpd512_mask.
(_mm512_mask_fmsub_round_pd): Likewise.
(_mm512_fmsub_pd): Likewise.
(_mm512_mask_fmsub_pd): Likewise.
(_mm512_maskz_fmsub_round_pd): Use
__builtin_ia32_vfmsubpd512_maskz.
(_mm512_maskz_fmsub_pd): Likewise.
(_mm512_fmsub_round_ps): Use __builtin_ia32_vfmsubps512_mask.
(_mm512_mask_fmsub_round_ps): Likewise.
(_mm512_fmsub_ps): Likewise.
(_mm512_mask_fmsub_ps): Likewise.
(_mm512_maskz_fmsub_round_ps): Use
__builtin_ia32_vfmsubps512_maskz.
(_mm512_maskz_fmsub_ps): Likewise.
* config/i386/avx512vlintrin.h (_mm256_mask_fmsub_pd): Use
__builtin_ia32_vfmsubpd256_mask.
(_mm256_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd256_maskz.
(_mm_mask_fmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
(_mm_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd128_maskz.
(_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
(_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
(_mm256_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps256_maskz.
(_mm_mask_fmsub_ps): Use __builtin_ia32_vfmsubps128_mask.
(_mm_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps128_maskz.
* config/i386/fmaintrin.h (_mm_fmsub_pd): Use
__builtin_ia32_vfmsubpd.
(_mm256_fmsub_pd): Use __builtin_ia32_vfmsubpd256.
(_mm_fmsub_ps): Use __builtin_ia32_vfmsubps.
(_mm256_fmsub_ps): Use __builtin_ia32_vfmsubps256.
(_mm_fmsub_sd): Use __builtin_ia32_vfmsubsd3.
(_mm_fmsub_ss): Use __builtin_ia32_vfmsubss3.
* config/i386/i386-builtin.def: Add
__builtin_ia32_vfmsubpd256_mask,
__builtin_ia32_vfmsubpd256_maskz,
__builtin_ia32_vfmsubpd128_mask,
__builtin_ia32_vfmsubpd128_maskz,
__builtin_ia32_vfmsubps256_mask,
__builtin_ia32_vfmsubps256_maskz,
__builtin_ia32_vfmsubps128_mask,
__builtin_ia32_vfmsubps128_maskz,
__builtin_ia32_vfmsubpd512_mask,
__builtin_ia32_vfmsubpd512_maskz,
__builtin_ia32_vfmsubps512_mask,
__builtin_ia32_vfmsubps512_maskz, __builtin_ia32_vfmsubss3,
__builtin_ia32_vfmsubsd3, __builtin_ia32_vfmsubps,
__builtin_ia32_vfmsubpd, __builtin_ia32_vfmsubps256 and.
__builtin_ia32_vfmsubpd256.
* config/i386/sse.md (fma4i_fmsub_<mode>): New.
(<avx512>_fmsub_<mode>_maskz<round_expand_name>): Likewise.
(*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1):
Likewise.
(*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2):
Likewise.
(*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3):
Likewise.
(fmai_vmfmsub_<mode><round_name>): Likewise.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-fmsub-df-zmm-1.c: New test.
* gcc.target/i386/avx512f-fmsub-sf-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-6.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-7.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-8.c: Likewise.
* gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c: Likewise.

From-SVN: r265356

18 files changed:
gcc/ChangeLog
gcc/config/i386/avx512fintrin.h
gcc/config/i386/avx512vlintrin.h
gcc/config/i386/fmaintrin.h
gcc/config/i386/i386-builtin.def
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c [new file with mode: 0644]

index 505c949c0136e20c01fe4663b45e1115d13464ca..11d05f7e9606d001389e641463b11dd50d494f05 100644 (file)
@@ -1,3 +1,64 @@
+2018-10-21  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/72782
+       * config/i386/avx512fintrin.h (_mm512_fmsub_round_pd): Use
+       __builtin_ia32_vfmsubpd512_mask.
+       (_mm512_mask_fmsub_round_pd): Likewise.
+       (_mm512_fmsub_pd): Likewise.
+       (_mm512_mask_fmsub_pd): Likewise.
+       (_mm512_maskz_fmsub_round_pd): Use
+       __builtin_ia32_vfmsubpd512_maskz.
+       (_mm512_maskz_fmsub_pd): Likewise.
+       (_mm512_fmsub_round_ps): Use __builtin_ia32_vfmsubps512_mask.
+       (_mm512_mask_fmsub_round_ps): Likewise.
+       (_mm512_fmsub_ps): Likewise.
+       (_mm512_mask_fmsub_ps): Likewise.
+       (_mm512_maskz_fmsub_round_ps): Use
+       __builtin_ia32_vfmsubps512_maskz.
+       (_mm512_maskz_fmsub_ps): Likewise.
+       * config/i386/avx512vlintrin.h (_mm256_mask_fmsub_pd): Use
+       __builtin_ia32_vfmsubpd256_mask.
+       (_mm256_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd256_maskz.
+       (_mm_mask_fmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
+       (_mm_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd128_maskz.
+       (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
+       (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
+       (_mm256_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps256_maskz.
+       (_mm_mask_fmsub_ps): Use __builtin_ia32_vfmsubps128_mask.
+       (_mm_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps128_maskz.
+       * config/i386/fmaintrin.h (_mm_fmsub_pd): Use
+       __builtin_ia32_vfmsubpd.
+       (_mm256_fmsub_pd): Use __builtin_ia32_vfmsubpd256.
+       (_mm_fmsub_ps): Use __builtin_ia32_vfmsubps.
+       (_mm256_fmsub_ps): Use __builtin_ia32_vfmsubps256.
+       (_mm_fmsub_sd): Use __builtin_ia32_vfmsubsd3.
+       (_mm_fmsub_ss): Use __builtin_ia32_vfmsubss3.
+       * config/i386/i386-builtin.def: Add
+       __builtin_ia32_vfmsubpd256_mask,
+       __builtin_ia32_vfmsubpd256_maskz,
+       __builtin_ia32_vfmsubpd128_mask,
+       __builtin_ia32_vfmsubpd128_maskz,
+       __builtin_ia32_vfmsubps256_mask,
+       __builtin_ia32_vfmsubps256_maskz,
+       __builtin_ia32_vfmsubps128_mask,
+       __builtin_ia32_vfmsubps128_maskz,
+       __builtin_ia32_vfmsubpd512_mask,
+       __builtin_ia32_vfmsubpd512_maskz,
+       __builtin_ia32_vfmsubps512_mask,
+       __builtin_ia32_vfmsubps512_maskz, __builtin_ia32_vfmsubss3,
+       __builtin_ia32_vfmsubsd3, __builtin_ia32_vfmsubps,
+       __builtin_ia32_vfmsubpd, __builtin_ia32_vfmsubps256 and.
+       __builtin_ia32_vfmsubpd256.
+       * config/i386/sse.md (fma4i_fmsub_<mode>): New.
+       (<avx512>_fmsub_<mode>_maskz<round_expand_name>): Likewise.
+       (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1):
+       Likewise.
+       (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2):
+       Likewise.
+       (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3):
+       Likewise.
+       (fmai_vmfmsub_<mode><round_name>): Likewise.
+
 2018-10-21  H.J. Lu  <hongjiu.lu@intel.com>
 
        * config/i386/sse.md (*<plusminus_insn><mode>3<mask_name>_bcst_1):
index 8473cd0d26c0b13d412d52d6b0f86d31437c26f9..c0c8fa1efd040a6052da12c9d21e576d9ce2c5a1 100644 (file)
@@ -3355,9 +3355,9 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+  return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
                                                    (__v8df) __B,
-                                                   -(__v8df) __C,
+                                                   (__v8df) __C,
                                                    (__mmask8) -1, __R);
 }
 
@@ -3366,9 +3366,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
                            __m512d __C, const int __R)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+  return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
                                                    (__v8df) __B,
-                                                   -(__v8df) __C,
+                                                   (__v8df) __C,
                                                    (__mmask8) __U, __R);
 }
 
@@ -3388,9 +3388,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
                             __m512d __C, const int __R)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+  return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
                                                     (__v8df) __B,
-                                                    -(__v8df) __C,
+                                                    (__v8df) __C,
                                                     (__mmask8) __U, __R);
 }
 
@@ -3398,9 +3398,9 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+  return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
                                                   (__v16sf) __B,
-                                                  -(__v16sf) __C,
+                                                  (__v16sf) __C,
                                                   (__mmask16) -1, __R);
 }
 
@@ -3409,9 +3409,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
                            __m512 __C, const int __R)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+  return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
                                                   (__v16sf) __B,
-                                                  -(__v16sf) __C,
+                                                  (__v16sf) __C,
                                                   (__mmask16) __U, __R);
 }
 
@@ -3431,9 +3431,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
                             __m512 __C, const int __R)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+  return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
                                                    (__v16sf) __B,
-                                                   -(__v16sf) __C,
+                                                   (__v16sf) __C,
                                                    (__mmask16) __U, __R);
 }
 
@@ -3806,28 +3806,28 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
     (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
 
 #define _mm512_fmsub_round_pd(A, B, C, R)            \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
+    (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
 
 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R)    \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
+    (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
 
 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R)   \
     (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
 
 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R)   \
-    (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
+    (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
 
 #define _mm512_fmsub_round_ps(A, B, C, R)            \
-    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
+    (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
 
 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R)    \
-    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
+    (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
 
 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R)   \
     (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
 
 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R)   \
-    (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
+    (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
 
 #define _mm512_fmaddsub_round_pd(A, B, C, R)            \
     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
@@ -12416,9 +12416,9 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+  return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
                                                    (__v8df) __B,
-                                                   -(__v8df) __C,
+                                                   (__v8df) __C,
                                                    (__mmask8) -1,
                                                    _MM_FROUND_CUR_DIRECTION);
 }
@@ -12427,9 +12427,9 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+  return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
                                                    (__v8df) __B,
-                                                   -(__v8df) __C,
+                                                   (__v8df) __C,
                                                    (__mmask8) __U,
                                                    _MM_FROUND_CUR_DIRECTION);
 }
@@ -12449,9 +12449,9 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+  return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
                                                     (__v8df) __B,
-                                                    -(__v8df) __C,
+                                                    (__v8df) __C,
                                                     (__mmask8) __U,
                                                     _MM_FROUND_CUR_DIRECTION);
 }
@@ -12460,9 +12460,9 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+  return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
                                                   (__v16sf) __B,
-                                                  -(__v16sf) __C,
+                                                  (__v16sf) __C,
                                                   (__mmask16) -1,
                                                   _MM_FROUND_CUR_DIRECTION);
 }
@@ -12471,9 +12471,9 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+  return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
                                                   (__v16sf) __B,
-                                                  -(__v16sf) __C,
+                                                  (__v16sf) __C,
                                                   (__mmask16) __U,
                                                   _MM_FROUND_CUR_DIRECTION);
 }
@@ -12493,9 +12493,9 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+  return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
                                                    (__v16sf) __B,
-                                                   -(__v16sf) __C,
+                                                   (__v16sf) __C,
                                                    (__mmask16) __U,
                                                    _MM_FROUND_CUR_DIRECTION);
 }
index a4fb0b0ac0029158b86eb7e3f32a1d92c26cb1d1..fcc35c30076f302e80af4c38f0a11c061725a3f2 100644 (file)
@@ -4117,9 +4117,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
                      __m256d __C)
 {
-  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+  return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
                                                    (__v4df) __B,
-                                                   -(__v4df) __C,
+                                                   (__v4df) __C,
                                                    (__mmask8) __U);
 }
 
@@ -4139,9 +4139,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
                       __m256d __C)
 {
-  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+  return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
                                                     (__v4df) __B,
-                                                    -(__v4df) __C,
+                                                    (__v4df) __C,
                                                     (__mmask8) __U);
 }
 
@@ -4149,9 +4149,9 @@ extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+  return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
                                                    (__v2df) __B,
-                                                   -(__v2df) __C,
+                                                   (__v2df) __C,
                                                    (__mmask8) __U);
 }
 
@@ -4171,9 +4171,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
                    __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+  return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
                                                     (__v2df) __B,
-                                                    -(__v2df) __C,
+                                                    (__v2df) __C,
                                                     (__mmask8) __U);
 }
 
@@ -4181,9 +4181,9 @@ extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
 {
-  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+  return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
                                                   (__v8sf) __B,
-                                                  -(__v8sf) __C,
+                                                  (__v8sf) __C,
                                                   (__mmask8) __U);
 }
 
@@ -4203,9 +4203,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
                       __m256 __C)
 {
-  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+  return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
                                                    (__v8sf) __B,
-                                                   -(__v8sf) __C,
+                                                   (__v8sf) __C,
                                                    (__mmask8) __U);
 }
 
@@ -4213,9 +4213,9 @@ extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
 {
-  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+  return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
                                                   (__v4sf) __B,
-                                                  -(__v4sf) __C,
+                                                  (__v4sf) __C,
                                                   (__mmask8) __U);
 }
 
@@ -4233,9 +4233,9 @@ extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+  return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
                                                    (__v4sf) __B,
-                                                   -(__v4sf) __C,
+                                                   (__v4sf) __C,
                                                    (__mmask8) __U);
 }
 
index 660d3453590cb6f44747ba29f897c28ce7e2d1ea..2eddd896579905585e5d896392da4d34cccf7133 100644 (file)
@@ -86,48 +86,48 @@ extern __inline __m128d
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
-                                           -(__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B,
+                                           (__v2df)__C);
 }
 
 extern __inline __m256d
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
-                                              -(__v4df)__C);
+  return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B,
+                                              (__v4df)__C);
 }
 
 extern __inline __m128
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
-                                          -(__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B,
+                                          (__v4sf)__C);
 }
 
 extern __inline __m256
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
-                                             -(__v8sf)__C);
+  return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B,
+                                             (__v8sf)__C);
 }
 
 extern __inline __m128d
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
-                                            -(__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B,
+                                            (__v2df)__C);
 }
 
 extern __inline __m128
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
-                                           -(__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B,
+                                           (__v4sf)__C);
 }
 
 extern __inline __m128d
index dc4c70c7ea33a31f927f7e4b004f3f6c42a4325e..f5b5e56a01cf8d9517649cd7c7c377166529eb24 100644 (file)
@@ -1903,10 +1903,18 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask, "__builtin_ia32_vfmsubpd256_mask", IX86_BUILTIN_VFMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_maskz, "__builtin_ia32_vfmsubpd256_maskz", IX86_BUILTIN_VFMSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask, "__builtin_ia32_vfmsubpd128_mask", IX86_BUILTIN_VFMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_maskz, "__builtin_ia32_vfmsubpd128_maskz", IX86_BUILTIN_VFMSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask, "__builtin_ia32_vfmsubps256_mask", IX86_BUILTIN_VFMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_maskz, "__builtin_ia32_vfmsubps256_maskz", IX86_BUILTIN_VFMSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask, "__builtin_ia32_vfmsubps128_mask", IX86_BUILTIN_VFMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_maskz, "__builtin_ia32_vfmsubps128_maskz", IX86_BUILTIN_VFMSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
@@ -2768,8 +2776,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
@@ -2855,11 +2867,17 @@ BDESC_FIRST (multi_arg, MULTI_ARG,
 BDESC (OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF)
 BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf, "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
 BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df, "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
+BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v4sf, "__builtin_ia32_vfmsubss3", IX86_BUILTIN_VFMSUBSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
+BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v2df, "__builtin_ia32_vfmsubsd3", IX86_BUILTIN_VFMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
 
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF)
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF)
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v4sf, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v2df, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v8sf, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v4df, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
 
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
index 520afc5627294ee9213b3cf434e95f2ebb1eb8c6..0fdaaed04aa50d2bafcd98e73e6879d01811d816 100644 (file)
          (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
          (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
 
+(define_expand "fma4i_fmsub_<mode>"
+  [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
+       (fma:FMAMODE_AVX512
+         (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
+         (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
+         (neg:FMAMODE_AVX512
+           (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
+
 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
   [(match_operand:VF_AVX512VL 0 "register_operand")
    (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
+  [(match_operand:VF_AVX512VL 0 "register_operand")
+   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
+   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
+   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
+   (match_operand:<avx512fmaskmode> 4 "register_operand")]
+  "TARGET_AVX512F && <round_mode512bit_condition>"
+{
+  emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
+    operands[0], operands[1], operands[2], operands[3],
+    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+  DONE;
+})
+
 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
        (fma:VF_SF_AVX512VL
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+       (fma:VF_AVX512
+         (match_operand:VF_AVX512 1 "register_operand" "0,v")
+         (match_operand:VF_AVX512 2 "register_operand" "v,0")
+         (neg:VF_AVX512
+           (vec_duplicate:VF_AVX512
+             (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+  "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+       (fma:VF_AVX512
+         (vec_duplicate:VF_AVX512
+           (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
+         (match_operand:VF_AVX512 2 "register_operand" "0,v")
+         (neg:VF_AVX512
+           (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+  "@
+   vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
+   vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+       (fma:VF_AVX512
+         (match_operand:VF_AVX512 1 "register_operand" "0,v")
+         (vec_duplicate:VF_AVX512
+           (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
+         (neg:VF_AVX512
+           (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+  "@
+   vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
+   vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
        (vec_merge:VF_AVX512VL
          (const_int 1)))]
   "TARGET_FMA")
 
+(define_expand "fmai_vmfmsub_<mode><round_name>"
+  [(set (match_operand:VF_128 0 "register_operand")
+       (vec_merge:VF_128
+         (fma:VF_128
+           (match_operand:VF_128 1 "<round_nimm_predicate>")
+           (match_operand:VF_128 2 "<round_nimm_predicate>")
+           (neg:VF_128
+             (match_operand:VF_128 3 "<round_nimm_predicate>")))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_FMA")
+
 (define_insn "*fmai_fmadd_<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
index c3ea50c8e3ac0541f8d4bd7f59932592a8a7d23d..a1d22409d1b55a23c68422418ca540fee4724ba6 100644 (file)
@@ -1,3 +1,18 @@
+2018-10-21  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/72782
+       * gcc.target/i386/avx512f-fmsub-df-zmm-1.c: New test.
+       * gcc.target/i386/avx512f-fmsub-sf-zmm-1.c: Likewise.
+       * gcc.target/i386/avx512f-fmsub-sf-zmm-2.c: Likewise.
+       * gcc.target/i386/avx512f-fmsub-sf-zmm-3.c: Likewise.
+       * gcc.target/i386/avx512f-fmsub-sf-zmm-4.c: Likewise.
+       * gcc.target/i386/avx512f-fmsub-sf-zmm-5.c: Likewise.
+       * gcc.target/i386/avx512f-fmsub-sf-zmm-6.c: Likewise.
+       * gcc.target/i386/avx512f-fmsub-sf-zmm-7.c: Likewise.
+       * gcc.target/i386/avx512f-fmsub-sf-zmm-8.c: Likewise.
+       * gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c: Likewise.
+       * gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c: Likewise.
+
 2018-10-21  Paul Thomas  <pault@gcc.gnu.org>
 
        PR fortran/71880
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c
new file mode 100644 (file)
index 0000000..840888a
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512d
+#define vec 512
+#define op fmsub
+#define suffix pd
+#define SCALAR double
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c
new file mode 100644 (file)
index 0000000..0cb675b
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c
new file mode 100644 (file)
index 0000000..10212d4
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-2.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c
new file mode 100644 (file)
index 0000000..feb3407
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-3.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c
new file mode 100644 (file)
index 0000000..4305fff
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-4.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c
new file mode 100644 (file)
index 0000000..d57251f
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-5.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c
new file mode 100644 (file)
index 0000000..b26a9ee
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-6.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c
new file mode 100644 (file)
index 0000000..cc705af
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[^\n\]*%zmm\[0-9\]+" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-7.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c
new file mode 100644 (file)
index 0000000..2b929fa
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+%zmm\[0-9\]+, %zmm\[0-9\]+, %zmm0" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-8.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c
new file mode 100644 (file)
index 0000000..70efbcc
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */
+
+#define type __m128
+#define vec
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c
new file mode 100644 (file)
index 0000000..a7c1b37
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */
+
+#define type __m256
+#define vec 256
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"