i386: Add X86_TUNE_EMIT_VZEROUPPER
authorH.J. Lu <hongjiu.lu@intel.com>
Wed, 15 Nov 2017 19:30:58 +0000 (19:30 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Wed, 15 Nov 2017 19:30:58 +0000 (11:30 -0800)
Add X86_TUNE_EMIT_VZEROUPPER to indicate if vzeroupper instruction should
be inserted before a transfer of control flow out of the function.  It is
turned on by default unless we are tuning for KNL.  Users can always use
-mzeroupper or -mno-zeroupper to override X86_TUNE_EMIT_VZEROUPPER.

gcc/

PR target/82990
* config/i386/i386.c (pass_insert_vzeroupper::gate): Remove
TARGET_AVX512ER check.
(ix86_option_override_internal): Set MASK_VZEROUPPER if
neither -mzeroupper nor -mno-zeroupper is used and
TARGET_EMIT_VZEROUPPER is set.
* config/i386/i386.h (TARGET_EMIT_VZEROUPPER): New.
* config/i386/x86-tune.def: Add X86_TUNE_EMIT_VZEROUPPER.

gcc/testsuite/

PR target/82990
* gcc.target/i386/pr82942-2.c: Add -mtune=knl.
* gcc.target/i386/pr82990-1.c: New test.
* gcc.target/i386/pr82990-2.c: Likewise.
* gcc.target/i386/pr82990-3.c: Likewise.
* gcc.target/i386/pr82990-4.c: Likewise.
* gcc.target/i386/pr82990-5.c: Likewise.
* gcc.target/i386/pr82990-6.c: Likewise.
* gcc.target/i386/pr82990-7.c: Likewise.

From-SVN: r254783

13 files changed:
gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/x86-tune.def
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr82942-2.c
gcc/testsuite/gcc.target/i386/pr82990-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr82990-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr82990-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr82990-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr82990-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr82990-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr82990-7.c [new file with mode: 0644]

index 7b271b57d284006a865518b96eebc0f06eddad65..eb4954e78c9c25a840a15bef6566daf2b3cf145b 100644 (file)
@@ -1,3 +1,14 @@
+2017-11-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/82990
+       * config/i386/i386.c (pass_insert_vzeroupper::gate): Remove
+       TARGET_AVX512ER check.
+       (ix86_option_override_internal): Set MASK_VZEROUPPER if
+       neither -mzeroupper nor -mno-zeroupper is used and
+       TARGET_EMIT_VZEROUPPER is set.
+       * config/i386/i386.h (TARGET_EMIT_VZEROUPPER): New.
+       * config/i386/x86-tune.def: Add X86_TUNE_EMIT_VZEROUPPER.
+
 2017-11-15  Will Schmidt  <will_schmidt@vnet.ibm.com>
 
        * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
index c5e84a09954583b04ab8438cb71039627e8e89ce..c6ca071275563f2ff654f0ebd7bd15ace568e611 100644 (file)
@@ -2497,7 +2497,7 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return TARGET_AVX && !TARGET_AVX512ER
+      return TARGET_AVX
             && TARGET_VZEROUPPER && flag_expensive_optimizations
             && !optimize_size;
     }
@@ -4666,7 +4666,8 @@ ix86_option_override_internal (bool main_args_p,
   if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
     sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
 
-  if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
+  if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
+      && TARGET_EMIT_VZEROUPPER)
     opts->x_target_flags |= MASK_VZEROUPPER;
   if (!(opts_set->x_target_flags & MASK_STV))
     opts->x_target_flags |= MASK_STV;
index e3e55da42329fa8f384ef55f4b2f802f5a5eb0af..a45e2df578374c65d3d9eccef98aead24cc72960 100644 (file)
@@ -517,6 +517,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
 #define TARGET_ONE_IF_CONV_INSN \
        ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
+#define TARGET_EMIT_VZEROUPPER \
+       ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
index 99282c883410a9085fdbc6a75b44c18e7655128b..19fd2b52b30427458db65b668e79ef5181c52e21 100644 (file)
@@ -543,3 +543,7 @@ DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0U)
    arithmetic to 32bit via PROMOTE_MODE macro.  This code generation scheme
    is usually used for RISC targets.  */
 DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0U)
+
+/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion
+   before a transfer of control flow out of the function.  */
+DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL)
index 3d18653a4d5ee37cadd17f0678f1c5835d893dc3..06d87b981698ca607acfa9cf6787fbee0f4c7e49 100644 (file)
@@ -1,3 +1,15 @@
+2017-11-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/82990
+       * gcc.target/i386/pr82942-2.c: Add -mtune=knl.
+       * gcc.target/i386/pr82990-1.c: New test.
+       * gcc.target/i386/pr82990-2.c: Likewise.
+       * gcc.target/i386/pr82990-3.c: Likewise.
+       * gcc.target/i386/pr82990-4.c: Likewise.
+       * gcc.target/i386/pr82990-5.c: Likewise.
+       * gcc.target/i386/pr82990-6.c: Likewise.
+       * gcc.target/i386/pr82990-7.c: Likewise.
+
 2017-11-15  Will Schmidt  <will_schmidt@vnet.ibm.com>
 
        * gcc.target/powerpc/builtins-3-p9.c: Add -O1, update
index cb0e337113e37abab429697343c6efbc3d3b8ddd..ddb4e689659ba6f6892f212d03fee5def8c14474 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-mavx512f -mavx512er -O2" } */
+/* { dg-options "-mavx512f -mavx512er -mtune=knl -O2" } */
 
 #include "pr82941-1.c"
 
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
new file mode 100644 (file)
index 0000000..ff1d6d4
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=knl -mvzeroupper" } */
+
+#include <immintrin.h>
+
+extern __m512d y, z;
+
+void
+pr82941 ()
+{
+  z = y;
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-2.c b/gcc/testsuite/gcc.target/i386/pr82990-2.c
new file mode 100644 (file)
index 0000000..0d3cb23
--- /dev/null
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-vzeroupper" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
new file mode 100644 (file)
index 0000000..201fa98
--- /dev/null
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mavx512er -mvzeroupper -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-4.c b/gcc/testsuite/gcc.target/i386/pr82990-4.c
new file mode 100644 (file)
index 0000000..09f161c
--- /dev/null
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512er -mno-vzeroupper -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
new file mode 100644 (file)
index 0000000..9932bdc
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+#include <immintrin.h>
+
+extern __m512d y, z;
+
+void
+pr82941 ()
+{
+  z = y;
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-6.c b/gcc/testsuite/gcc.target/i386/pr82990-6.c
new file mode 100644 (file)
index 0000000..063a61c
--- /dev/null
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mtune=knl" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-7.c b/gcc/testsuite/gcc.target/i386/pr82990-7.c
new file mode 100644 (file)
index 0000000..dedde8b
--- /dev/null
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mtune=generic -mtune-ctrl=^emit_vzeroupper" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */