i386: Enable AVX512 memory broadcast for FMA
authorH.J. Lu <hongjiu.lu@intel.com>
Thu, 18 Oct 2018 20:38:41 +0000 (20:38 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Thu, 18 Oct 2018 20:38:41 +0000 (13:38 -0700)
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for FMA operations.

gcc/

PR target/72782
* config/i386/sse.md (VF_AVX512): New.
(avx512bcst): Likewise.
(*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1):
Likewise.
(*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2):
Likewise.
(*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3):
Likewise.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512-fma-1.h: New file.
* gcc.target/i386/avx512-fma-2.h: Likewise.
* gcc.target/i386/avx512-fma-3.h: Likewise.
* gcc.target/i386/avx512-fma-4.h: Likewise.
* gcc.target/i386/avx512-fma-5.h: Likewise.
* gcc.target/i386/avx512-fma-6.h: Likewise.
* gcc.target/i386/avx512-fma-7.h: Likewise.
* gcc.target/i386/avx512-fma-8.h: Likewise.
* gcc.target/i386/avx512f-fmadd-df-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-fmadd-sf-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-fmadd-sf-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-fmadd-sf-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-fmadd-sf-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-fmadd-sf-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-fmadd-sf-zmm-6.c: Likewise.
* gcc.target/i386/avx512f-fmadd-sf-zmm-7.c: Likewise.
* gcc.target/i386/avx512f-fmadd-sf-zmm-8.c: Likewise.
* gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c: Likewise.

From-SVN: r265288

22 files changed:
gcc/ChangeLog
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512-fma-1.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512-fma-2.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512-fma-3.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512-fma-4.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512-fma-5.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512-fma-6.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512-fma-7.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512-fma-8.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmadd-df-zmm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c [new file with mode: 0644]

index 0854db68602b7c20dfde38a6dfef044b86f39dd5..e36ecd20e665d4ef9d1076c41fe12b9dda3529e3 100644 (file)
@@ -1,3 +1,15 @@
+2018-10-18  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/72782
+       * config/i386/sse.md (VF_AVX512): New.
+       (avx512bcst): Likewise.
+       (*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1):
+       Likewise.
+       (*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2):
+       Likewise.
+       (*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3):
+       Likewise.
+
 2018-10-18  Jonathan Wakely  <jwakely@redhat.com>
 
        * doc/invoke.texi (-dumpversion): Improve grammar.
index ff9f81535a9a2dd8ee8ccf56d89dbb0f6a1a053b..71684d63423d74d0bcda5ae9c70b48bbd1be8700 100644 (file)
    (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
 
+(define_mode_iterator VF_AVX512
+  [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
+   (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+   V16SF V8DF])
+
+(define_mode_attr avx512bcst
+  [(V4SF "%{1to4%}") (V2DF "%{1to2%}")
+   (V8SF "%{1to8%}") (V4DF "%{1to4%}")
+   (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
+
 ;; Mapping from float mode to required SSE level
 (define_mode_attr sse
   [(SF "sse") (DF "sse2")
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+       (fma:VF_AVX512
+         (match_operand:VF_AVX512 1 "nonimmediate_operand" "0,v")
+         (match_operand:VF_AVX512 2 "nonimmediate_operand" "v,0")
+         (vec_duplicate:VF_AVX512
+           (match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+  "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+       (fma:VF_AVX512
+         (vec_duplicate:VF_AVX512
+           (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
+         (match_operand:VF_AVX512 2 "nonimmediate_operand" "0,v")
+         (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0")))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+  "@
+   vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
+   vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+       (fma:VF_AVX512
+         (match_operand:VF_AVX512 1 "nonimmediate_operand" "0,v")
+         (vec_duplicate:VF_AVX512
+           (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
+         (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0")))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+  "@
+   vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
+   vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
        (vec_merge:VF_AVX512VL
index c2a3bd1971e14a6251255070ea03b1c3840e4bd1..742316dd054edd5a0116dc3d3b812200ebc54cc7 100644 (file)
@@ -1,3 +1,26 @@
+2018-10-18  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/72782
+       * gcc.target/i386/avx512-fma-1.h: New file.
+       * gcc.target/i386/avx512-fma-2.h: Likewise.
+       * gcc.target/i386/avx512-fma-3.h: Likewise.
+       * gcc.target/i386/avx512-fma-4.h: Likewise.
+       * gcc.target/i386/avx512-fma-5.h: Likewise.
+       * gcc.target/i386/avx512-fma-6.h: Likewise.
+       * gcc.target/i386/avx512-fma-7.h: Likewise.
+       * gcc.target/i386/avx512-fma-8.h: Likewise.
+       * gcc.target/i386/avx512f-fmadd-df-zmm-1.c: Likewise.
+       * gcc.target/i386/avx512f-fmadd-sf-zmm-1.c: Likewise.
+       * gcc.target/i386/avx512f-fmadd-sf-zmm-2.c: Likewise.
+       * gcc.target/i386/avx512f-fmadd-sf-zmm-3.c: Likewise.
+       * gcc.target/i386/avx512f-fmadd-sf-zmm-4.c: Likewise.
+       * gcc.target/i386/avx512f-fmadd-sf-zmm-5.c: Likewise.
+       * gcc.target/i386/avx512f-fmadd-sf-zmm-6.c: Likewise.
+       * gcc.target/i386/avx512f-fmadd-sf-zmm-7.c: Likewise.
+       * gcc.target/i386/avx512f-fmadd-sf-zmm-8.c: Likewise.
+       * gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c: Likewise.
+       * gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c: Likewise.
+
 2018-10-18  Tobias Burnus  <burnus@net-b.de>
 
        PR fortran/87625
diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-1.h b/gcc/testsuite/gcc.target/i386/avx512-fma-1.h
new file mode 100644 (file)
index 0000000..a8dc0b5
--- /dev/null
@@ -0,0 +1,12 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y)           x##y
+#define PASTER3(x,y,z)         _mm##x##_##y##_##z
+#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x, type y, SCALAR *f)
+{
+  return OP (vec, op, suffix) (x, y, DUP (vec, suffix, *f));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-2.h b/gcc/testsuite/gcc.target/i386/avx512-fma-2.h
new file mode 100644 (file)
index 0000000..a3a0e9c
--- /dev/null
@@ -0,0 +1,13 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y)           x##y
+#define PASTER3(x,y,z)         _mm##x##_##y##_##z
+#define TYPE(vec)              PASTER2 (__m, vec)
+#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x, type y, SCALAR *f)
+{
+  return OP (vec, op, suffix) (y, x, DUP (vec, suffix, *f));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-3.h b/gcc/testsuite/gcc.target/i386/avx512-fma-3.h
new file mode 100644 (file)
index 0000000..8c38d17
--- /dev/null
@@ -0,0 +1,13 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y)           x##y
+#define PASTER3(x,y,z)         _mm##x##_##y##_##z
+#define TYPE(vec)              PASTER2 (__m, vec)
+#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x, type y, SCALAR *f)
+{
+  return OP (vec, op, suffix) (x, DUP (vec, suffix, *f), y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-4.h b/gcc/testsuite/gcc.target/i386/avx512-fma-4.h
new file mode 100644 (file)
index 0000000..fec01c2
--- /dev/null
@@ -0,0 +1,13 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y)           x##y
+#define PASTER3(x,y,z)         _mm##x##_##y##_##z
+#define TYPE(vec)              PASTER2 (__m, vec)
+#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x, type y, SCALAR *f)
+{
+  return OP (vec, op, suffix) (y, DUP (vec, suffix, *f), x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-5.h b/gcc/testsuite/gcc.target/i386/avx512-fma-5.h
new file mode 100644 (file)
index 0000000..386960a
--- /dev/null
@@ -0,0 +1,13 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y)           x##y
+#define PASTER3(x,y,z)         _mm##x##_##y##_##z
+#define TYPE(vec)              PASTER2 (__m, vec)
+#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x, type y, SCALAR *f)
+{
+  return OP (vec, op, suffix) (DUP (vec, suffix, *f), x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-6.h b/gcc/testsuite/gcc.target/i386/avx512-fma-6.h
new file mode 100644 (file)
index 0000000..de20901
--- /dev/null
@@ -0,0 +1,13 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y)           x##y
+#define PASTER3(x,y,z)         _mm##x##_##y##_##z
+#define TYPE(vec)              PASTER2 (__m, vec)
+#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x, type y, SCALAR *f)
+{
+  return OP (vec, op, suffix) (DUP (vec, suffix, *f), y, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-7.h b/gcc/testsuite/gcc.target/i386/avx512-fma-7.h
new file mode 100644 (file)
index 0000000..4898660
--- /dev/null
@@ -0,0 +1,16 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y)           x##y
+#define PASTER3(x,y,z)         _mm##x##_##y##_##z
+#define TYPE(vec)              PASTER2 (__m, vec)
+#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
+
+extern SCALAR bar (void);
+
+type
+foo (type x, type y)
+{
+  SCALAR f = bar ();
+  return OP (vec, op, suffix) (x, y, DUP (vec, suffix, f));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-8.h b/gcc/testsuite/gcc.target/i386/avx512-fma-8.h
new file mode 100644 (file)
index 0000000..e5bd28e
--- /dev/null
@@ -0,0 +1,13 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y)           x##y
+#define PASTER3(x,y,z)         _mm##x##_##y##_##z
+#define TYPE(vec)              PASTER2 (__m, vec)
+#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x, type y, SCALAR f)
+{
+  return OP (vec, op, suffix) (y, x, DUP (vec, suffix, f));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-df-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-df-zmm-1.c
new file mode 100644 (file)
index 0000000..6d6e522
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512d
+#define vec 512
+#define op fmadd
+#define suffix pd
+#define SCALAR double
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-1.c
new file mode 100644 (file)
index 0000000..9833310
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-2.c
new file mode 100644 (file)
index 0000000..34e5620
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-2.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-3.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-3.c
new file mode 100644 (file)
index 0000000..8345625
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-3.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-4.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-4.c
new file mode 100644 (file)
index 0000000..c028c8e
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-4.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-5.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-5.c
new file mode 100644 (file)
index 0000000..3eac8e8
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-5.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-6.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-6.c
new file mode 100644 (file)
index 0000000..990cdc6
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-6.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-7.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-7.c
new file mode 100644 (file)
index 0000000..8c11720
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[^\n\]*%zmm\[0-9\]+" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-7.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-8.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-8.c
new file mode 100644 (file)
index 0000000..8a9da75
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+%zmm\[0-9\]+, %zmm\[0-9\]+, %zmm0" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-8.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c
new file mode 100644 (file)
index 0000000..e03a935
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */
+
+#define type __m128
+#define vec
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c
new file mode 100644 (file)
index 0000000..225766d
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */
+
+#define type __m256
+#define vec 256
+#define op fmadd
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"