i386: Use lock prefixed insn instead of MFENCE [PR95750]
authorUros Bizjak <ubizjak@gmail.com>
Mon, 20 Jul 2020 18:34:46 +0000 (20:34 +0200)
committerUros Bizjak <ubizjak@gmail.com>
Mon, 20 Jul 2020 18:37:10 +0000 (20:37 +0200)
Currently, __atomic_thread_fence(seq_cst) on x86 and x86-64 generates
mfence instruction. A dummy atomic instruction (a lock-prefixed instruction
or xchg with a memory operand) would provide the same sequential consistency
guarantees while being more efficient on most current CPUs. The mfence
instruction additionally orders non-temporal stores, which is not relevant
for atomic operations and are not ordered by seq_cst atomic operations anyway.

2020-07-20  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:
PR target/95750
* config/i386/i386.h (TARGET_AVOID_MFENCE):
Rename from TARGET_USE_XCHG_FOR_ATOMIC_STORE.
* config/i386/sync.md (mfence_sse2): Disable for TARGET_AVOID_MFENCE.
(mfence_nosse): Enable also for TARGET_AVOID_MFENCE. Emit stack
referred memory in word_mode.
(mem_thread_fence): Do not generate mfence_sse2 pattern when
TARGET_AVOID_MFENCE is true.
(atomic_store<mode>): Update for rename.
* config/i386/x86-tune.def (X86_TUNE_AVOID_MFENCE):
Rename from X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE.

gcc/testsuite/ChangeLog:
PR target/95750
* gcc.target/i386/pr95750.c: New test.

gcc/config/i386/i386.h
gcc/config/i386/sync.md
gcc/config/i386/x86-tune.def
gcc/testsuite/gcc.target/i386/pr95750.c [new file with mode: 0644]

index f4a8f1391fae7ce60e6d47b8c15899d23eae98ea..114967e49a37b7693a6b4e8709a63b8b36eaf1a6 100644 (file)
@@ -598,8 +598,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
 #define TARGET_ONE_IF_CONV_INSN \
        ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
-#define TARGET_USE_XCHG_FOR_ATOMIC_STORE \
-       ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE]
+#define TARGET_AVOID_MFENCE ix86_tune_features[X86_TUNE_AVOID_MFENCE]
 #define TARGET_EMIT_VZEROUPPER \
        ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
 #define TARGET_EXPAND_ABS \
index e22109039c10459cc38ca657a9015d9537c687bb..c6827037abf350e62580121734f9fddb98fd4327 100644 (file)
@@ -89,7 +89,8 @@
 (define_insn "mfence_sse2"
   [(set (match_operand:BLK 0)
        (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
-  "TARGET_64BIT || TARGET_SSE2"
+  "(TARGET_64BIT || TARGET_SSE2)
+   && !TARGET_AVOID_MFENCE"
   "mfence"
   [(set_attr "type" "sse")
    (set_attr "length_address" "0")
   [(set (match_operand:BLK 0)
        (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
    (clobber (reg:CC FLAGS_REG))]
-  "!(TARGET_64BIT || TARGET_SSE2)"
-  "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
+  "!(TARGET_64BIT || TARGET_SSE2)
+   || TARGET_AVOID_MFENCE"
+{
+  rtx mem = gen_rtx_MEM (word_mode, stack_pointer_rtx);
+
+  output_asm_insn ("lock{%;} or%z0\t{$0, %0|%0, 0}", &mem);
+  return "";
+}
   [(set_attr "memory" "unknown")])
 
 (define_expand "mem_thread_fence"
       rtx (*mfence_insn)(rtx);
       rtx mem;
 
-      if (TARGET_64BIT || TARGET_SSE2)
+      if ((TARGET_64BIT || TARGET_SSE2)
+         && !TARGET_AVOID_MFENCE)
        mfence_insn = gen_mfence_sse2;
       else
        mfence_insn = gen_mfence_nosse;
     {
       operands[1] = force_reg (<MODE>mode, operands[1]);
 
-      /* For seq-cst stores, use XCHG when we lack MFENCE
-        or when target prefers XCHG.  */
+      /* For seq-cst stores, use XCHG when we lack MFENCE.  */
       if (is_mm_seq_cst (model)
          && (!(TARGET_64BIT || TARGET_SSE2)
-             || TARGET_USE_XCHG_FOR_ATOMIC_STORE))
+             || TARGET_AVOID_MFENCE))
        {
          emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
                                                operands[0], operands[1],
index 1776aba2d1715dc119c2f2c035baf22ef77ebfaa..6eff825689704cb2120d5b5ab320a118012d6096 100644 (file)
@@ -313,8 +313,8 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
          m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
          | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
 
-/* X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE: Use xchg instead of mov+mfence.  */
-DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store",
+/* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence.  */
+DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
         m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC)
 
 /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by
diff --git a/gcc/testsuite/gcc.target/i386/pr95750.c b/gcc/testsuite/gcc.target/i386/pr95750.c
new file mode 100644 (file)
index 0000000..c47108f
--- /dev/null
@@ -0,0 +1,19 @@
+/* PR target/95750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=core2" } */
+
+void
+foo (void)
+{
+  __atomic_thread_fence (__ATOMIC_SEQ_CST);
+}
+
+int x;
+
+void
+bar (void)
+{
+  __atomic_store_n (&x, -1, __ATOMIC_SEQ_CST);
+}
+
+/* { dg-final { scan-assembler-not "mfence" } } */