pa.md (UNSPEC_MEMORY_BARRIER): New unspec enum.

author John David Anglin <danglin@gcc.gnu.org>

Sat, 11 Aug 2018 21:37:55 +0000 (21:37 +0000)

committer John David Anglin <danglin@gcc.gnu.org>

Sat, 11 Aug 2018 21:37:55 +0000 (21:37 +0000)
author John David Anglin <danglin@gcc.gnu.org>
Sat, 11 Aug 2018 21:37:55 +0000 (21:37 +0000)
committer John David Anglin <danglin@gcc.gnu.org>
Sat, 11 Aug 2018 21:37:55 +0000 (21:37 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 0da6f84cf938cf39b54b53e20fc5fa2b4189ebf9..c669cadfec53ccd40bf668d6e4fedd5092d65f52 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2018-08-11  John David Anglin  <danglin@gcc.gnu.org>
+
+       * config/pa/pa.md (UNSPEC_MEMORY_BARRIER): New unspec enum.
+       Update comment for atomic instructions.
+       (atomic_storeqi, atomic_storehi, atomic_storesi, atomic_storesf,
+       atomic_loaddf, atomic_loaddf_1, atomic_storedf, atomic_storedf_1):
+       Remove.
+       (atomic_loaddi): Revise fence expansion to only emit fence prior to
+       load for __ATOMIC_SEQ_CST model.
+       (atomic_loaddi_1): Remove float register target.
+       (atomic_storedi): Handle CONST_INT values.
+       (atomic_storedi_1): Remove float register source.  Add special case
+       for zero value.
+       (memory_barrier): New expander and insn.
+
  2018-08-11  Jakub Jelinek  <jakub@redhat.com>
  
         PR tree-optimization/86835
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md

index e748f053651c4615d9f9099282c4aa3dcfa96cbe..0094c58f5e4f097ca4f24ed424c11866e0a131fb 100644 (file)
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -84,6 +84,7 @@
     UNSPEC_TLSGD_PIC
     UNSPEC_TLSLDM_PIC
     UNSPEC_TLSIE_PIC
+   UNSPEC_MEMORY_BARRIER
    ])
  
  ;; UNSPEC_VOLATILE:
@@ -9955,84 +9956,8 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
  ;; doubleword loads and stores are not guaranteed to be atomic
  ;; when referencing the I/O address space.
  
-;; The kernel cmpxchg operation on linux is not atomic with respect to
-;; memory stores on SMP machines, so we must do stores using a cmpxchg
-;; operation.
-
  ;; These patterns are at the bottom so the non atomic versions are preferred.
  
-;; Implement atomic QImode store using exchange.
-
-(define_expand "atomic_storeqi"
-  [(match_operand:QI 0 "memory_operand")                ;; memory
-   (match_operand:QI 1 "register_operand")              ;; val out
-   (match_operand:SI 2 "const_int_operand")]            ;; model
-  ""
-{
-  if (TARGET_SYNC_LIBCALL)
-    {
-      rtx mem = operands[0];
-      rtx val = operands[1];
-      if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
-       DONE;
-    }
-  FAIL;
-})
-
-;; Implement atomic HImode stores using exchange.
-
-(define_expand "atomic_storehi"
-  [(match_operand:HI 0 "memory_operand")                ;; memory
-   (match_operand:HI 1 "register_operand")              ;; val out
-   (match_operand:SI 2 "const_int_operand")]            ;; model
-  ""
-{
-  if (TARGET_SYNC_LIBCALL)
-    {
-      rtx mem = operands[0];
-      rtx val = operands[1];
-      if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
-       DONE;
-    }
-  FAIL;
-})
-
-;; Implement atomic SImode store using exchange.
-
-(define_expand "atomic_storesi"
-  [(match_operand:SI 0 "memory_operand")                ;; memory
-   (match_operand:SI 1 "register_operand")              ;; val out
-   (match_operand:SI 2 "const_int_operand")]            ;; model
-  ""
-{
-  if (TARGET_SYNC_LIBCALL)
-    {
-      rtx mem = operands[0];
-      rtx val = operands[1];
-      if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
-       DONE;
-    }
-  FAIL;
-})
-
-;; Implement atomic SFmode store using exchange.
-
-(define_expand "atomic_storesf"
-  [(match_operand:SF 0 "memory_operand")                ;; memory
-   (match_operand:SF 1 "register_operand")              ;; val out
-   (match_operand:SI 2 "const_int_operand")]            ;; model
-  ""
-{
-  if (TARGET_SYNC_LIBCALL)
-    {
-      rtx mem = operands[0];
-      rtx val = operands[1];
-      if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
-       DONE;
-    }
-  FAIL;
-})
-
  ;; Implement atomic DImode load using 64-bit floating point load.
  
  (define_expand "atomic_loaddi"
@@ -10048,47 +9973,39 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
  
    model = memmodel_from_int (INTVAL (operands[2]));
    operands[1] = force_reg (SImode, XEXP (operands[1], 0));
-  expand_mem_thread_fence (model);
-  emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
    if (is_mm_seq_cst (model))
      expand_mem_thread_fence (model);
+  emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
+  expand_mem_thread_fence (model);
    DONE;
  })
  
  (define_insn "atomic_loaddi_1"
-  [(set (match_operand:DI 0 "register_operand" "=f,r")
-        (mem:DI (match_operand:SI 1 "register_operand" "r,r")))
-   (clobber (match_scratch:DI 2 "=X,f"))]
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (mem:DI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (match_scratch:DI 2 "=f"))]
    "!TARGET_64BIT && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT"
-  "@
-   {fldds|fldd} 0(%1),%0
-   {fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0"
-  [(set_attr "type" "move,move")
-   (set_attr "length" "4,16")])
+  "{fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0"
+  [(set_attr "type" "move")
+   (set_attr "length" "16")])
  
  ;; Implement atomic DImode store.
  
  (define_expand "atomic_storedi"
    [(match_operand:DI 0 "memory_operand")                ;; memory
-   (match_operand:DI 1 "register_operand")              ;; val out
+   (match_operand:DI 1 "reg_or_cint_move_operand")      ;; val out
     (match_operand:SI 2 "const_int_operand")]            ;; model
    ""
  {
    enum memmodel model;
  
-  if (TARGET_SYNC_LIBCALL)
-    {
-      rtx mem = operands[0];
-      rtx val = operands[1];
-      if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
-       DONE;
-    }
-
    if (TARGET_64BIT || TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
      FAIL;
  
    model = memmodel_from_int (INTVAL (operands[2]));
    operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+  if (operands[1] != CONST0_RTX (DImode))
+    operands[1] = force_reg (DImode, operands[1]);
    expand_mem_thread_fence (model);
    emit_insn (gen_atomic_storedi_1 (operands[0], operands[1]));
    if (is_mm_seq_cst (model))
@@ -10098,87 +10015,33 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
  
  (define_insn "atomic_storedi_1"
    [(set (mem:DI (match_operand:SI 0 "register_operand" "r,r"))
-        (match_operand:DI 1 "register_operand" "f,r"))
+        (match_operand:DI 1 "reg_or_0_operand" "M,r"))
     (clobber (match_scratch:DI 2 "=X,f"))]
-  "!TARGET_64BIT && !TARGET_DISABLE_FPREGS
-   && !TARGET_SOFT_FLOAT && !TARGET_SYNC_LIBCALL"
+  "!TARGET_64BIT && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT"
    "@
-   {fstds|fstd} %1,0(%0)
+   {fstds|fstd} %%fr0,0(%0)
     {stws|stw} %1,-16(%%sp)\n\t{stws|stw} %R1,-12(%%sp)\n\t{fldds|fldd} -16(%%sp),%2\n\t{fstds|fstd} %2,0(%0)"
    [(set_attr "type" "move,move")
     (set_attr "length" "4,16")])
  
-;; Implement atomic DFmode load using 64-bit floating point load.
+;; PA 2.0 hardware supports out-of-order execution of loads and stores, so
+;; we need a memory barrier to enforce program order for memory references.
+;; Since we want PA 1.x code to be PA 2.0 compatible, we also need the
+;; barrier when generating PA 1.x code.
  
-(define_expand "atomic_loaddf"
-  [(match_operand:DF 0 "register_operand")              ;; val out
-   (match_operand:DF 1 "memory_operand")                ;; memory
-   (match_operand:SI 2 "const_int_operand")]            ;; model
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+        (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
    ""
  {
-  enum memmodel model;
-
-  if (TARGET_64BIT || TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
-    FAIL;
-
-  model = memmodel_from_int (INTVAL (operands[2]));
-  operands[1] = force_reg (SImode, XEXP (operands[1], 0));
-  expand_mem_thread_fence (model);
-  emit_insn (gen_atomic_loaddf_1 (operands[0], operands[1]));
-  if (is_mm_seq_cst (model))
-    expand_mem_thread_fence (model);
-  DONE;
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
  })
  
-(define_insn "atomic_loaddf_1"
-  [(set (match_operand:DF 0 "register_operand" "=f,r")
-        (mem:DF (match_operand:SI 1 "register_operand" "r,r")))
-   (clobber (match_scratch:DF 2 "=X,f"))]
-  "!TARGET_64BIT && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT"
-  "@
-   {fldds|fldd} 0(%1),%0
-   {fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0"
-  [(set_attr "type" "move,move")
-   (set_attr "length" "4,16")])
-
-;; Implement atomic DFmode store using 64-bit floating point store.
-
-(define_expand "atomic_storedf"
-  [(match_operand:DF 0 "memory_operand")                ;; memory
-   (match_operand:DF 1 "register_operand")              ;; val out
-   (match_operand:SI 2 "const_int_operand")]            ;; model
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+        (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
    ""
-{
-  enum memmodel model;
-
-  if (TARGET_SYNC_LIBCALL)
-    {
-      rtx mem = operands[0];
-      rtx val = operands[1];
-      if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
-       DONE;
-    }
-
-  if (TARGET_64BIT || TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
-    FAIL;
-
-  model = memmodel_from_int (INTVAL (operands[2]));
-  operands[0] = force_reg (SImode, XEXP (operands[0], 0));
-  expand_mem_thread_fence (model);
-  emit_insn (gen_atomic_storedf_1 (operands[0], operands[1]));
-  if (is_mm_seq_cst (model))
-    expand_mem_thread_fence (model);
-  DONE;
-})
-
-(define_insn "atomic_storedf_1"
-  [(set (mem:DF (match_operand:SI 0 "register_operand" "r,r"))
-        (match_operand:DF 1 "register_operand" "f,r"))
-   (clobber (match_scratch:DF 2 "=X,f"))]
-  "!TARGET_64BIT && !TARGET_DISABLE_FPREGS
-   && !TARGET_SOFT_FLOAT && !TARGET_SYNC_LIBCALL"
-  "@
-   {fstds|fstd} %1,0(%0)
-   {stws|stw} %1,-16(%%sp)\n\t{stws|stw} %R1,-12(%%sp)\n\t{fldds|fldd} -16(%%sp),%2\n\t{fstds|fstd} %2,0(%0)"
-  [(set_attr "type" "move,move")
-   (set_attr "length" "4,16")])
+  "sync"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog

index bfdda6e22830032d5b693031f434fdbd52994ee0..084131abd0442d90c7221fa38616818b035367b0 100644 (file)
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,15 @@
+2018-08-11  John David Anglin  <danglin@gcc.gnu.org>
+
+       * config/pa/linux-atomic.c: Update comment.
+       (FETCH_AND_OP_2, OP_AND_FETCH_2, FETCH_AND_OP_WORD, OP_AND_FETCH_WORD,
+       COMPARE_AND_SWAP_2, __sync_val_compare_and_swap_4,
+       SYNC_LOCK_TEST_AND_SET_2, __sync_lock_test_and_set_4): Use
+       __ATOMIC_RELAXED for atomic loads.
+       (SYNC_LOCK_RELEASE_1): New define.  Use __sync_synchronize() and
+       unordered store to release lock.
+       (__sync_lock_release_8): Likewise.
+       (SYNC_LOCK_RELEASE_2): Remove define.
+        
  2018-08-02  Nicolas Pitre <nico@fluxnic.net>
  
         PR libgcc/86512
diff --git a/libgcc/config/pa/linux-atomic.c b/libgcc/config/pa/linux-atomic.c

index 79c89e19900e4a751a51119b407e6c9704094adc..ddd0b1e9708172fd14ba10c736e4b74dcd45a5d5 100644 (file)
--- a/libgcc/config/pa/linux-atomic.c
+++ b/libgcc/config/pa/linux-atomic.c
@@ -28,14 +28,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  #define EBUSY   16
  #define ENOSYS 251 
  
-/* All PA-RISC implementations supported by linux have strongly
-   ordered loads and stores.  Only cache flushes and purges can be
-   delayed.  The data cache implementations are all globally
-   coherent.  Thus, there is no need to synchonize memory accesses.
-
-   GCC automatically issues a asm memory barrier when it encounters
-   a __sync_synchronize builtin.  Thus, we do not need to define this
-   builtin.
+/* PA-RISC 2.0 supports out-of-order execution for loads and stores.
+   Thus, we need to synchonize memory accesses.  For more info, see:
+   "Advanced Performance Features of the 64-bit PA-8000" by Doug Hunt.
  
     We implement byte, short and int versions of each atomic operation
     using the kernel helper defined below.  There is no support for
@@ -119,7 +114,7 @@ __kernel_cmpxchg2 (void *mem, const void *oldval, const void *newval,
      long failure;                                                      \
                                                                         \
      do {                                                               \
-      tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);                   \
+      tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED);                   \
        newval = PFX_OP (tmp INF_OP val);                                        \
        failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX);         \
      } while (failure != 0);                                            \
@@ -156,7 +151,7 @@ FETCH_AND_OP_2 (nand, ~, &, signed char, 1, 0)
      long failure;                                                      \
                                                                         \
      do {                                                               \
-      tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);                   \
+      tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED);                   \
        newval = PFX_OP (tmp INF_OP val);                                        \
        failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX);         \
      } while (failure != 0);                                            \
@@ -193,7 +188,7 @@ OP_AND_FETCH_2 (nand, ~, &, signed char, 1, 0)
      long failure;                                                      \
                                                                         \
      do {                                                               \
-      tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);                   \
+      tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED);                   \
        failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val));  \
      } while (failure != 0);                                            \
                                                                         \
@@ -215,7 +210,7 @@ FETCH_AND_OP_WORD (nand, ~, &)
      long failure;                                                      \
                                                                         \
      do {                                                               \
-      tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);                   \
+      tmp = __atomic_load_n (ptr, __ATOMIC_RELAXED);                   \
        failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val));  \
      } while (failure != 0);                                            \
                                                                         \
@@ -241,7 +236,7 @@ typedef unsigned char bool;
                                                                         \
      while (1)                                                          \
        {                                                                        \
-       actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);        \
+       actual_oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED);        \
                                                                         \
         if (__builtin_expect (oldval != actual_oldval, 0))              \
           return actual_oldval;                                         \
@@ -273,7 +268,7 @@ __sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
      
    while (1)
      {
-      actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
+      actual_oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED);
  
        if (__builtin_expect (oldval != actual_oldval, 0))
         return actual_oldval;
@@ -300,7 +295,7 @@ TYPE HIDDEN                                                         \
      long failure;                                                      \
                                                                         \
      do {                                                               \
-      oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);                        \
+      oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED);                        \
        failure = __kernel_cmpxchg2 (ptr, &oldval, &val, INDEX);         \
      } while (failure != 0);                                            \
                                                                         \
@@ -318,38 +313,31 @@ __sync_lock_test_and_set_4 (int *ptr, int val)
    int oldval;
  
    do {
-    oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
+    oldval = __atomic_load_n (ptr, __ATOMIC_RELAXED);
      failure = __kernel_cmpxchg (ptr, oldval, val);
    } while (failure != 0);
  
    return oldval;
  }
  
-#define SYNC_LOCK_RELEASE_2(TYPE, WIDTH, INDEX)                        \
+void HIDDEN
+__sync_lock_release_8 (long long *ptr)
+{
+  /* All accesses must be complete before we release the lock.  */
+  __sync_synchronize ();
+  *(double *)ptr = 0;
+}
+
+#define SYNC_LOCK_RELEASE_1(TYPE, WIDTH)                       \
    void HIDDEN                                                  \
    __sync_lock_release_##WIDTH (TYPE *ptr)                      \
    {                                                            \
-    TYPE oldval, zero = 0;                                     \
-    long failure;                                              \
-                                                               \
-    do {                                                       \
-      oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);                \
-      failure = __kernel_cmpxchg2 (ptr, &oldval, &zero, INDEX);        \
-    } while (failure != 0);                                    \
+    /* All accesses must be complete before we release         \
+       the lock.  */                                           \
+    __sync_synchronize ();                                     \
+    *ptr = 0;                                                  \
    }
  
-SYNC_LOCK_RELEASE_2 (long long, 8, 3)
-SYNC_LOCK_RELEASE_2 (short, 2, 1)
-SYNC_LOCK_RELEASE_2 (signed char, 1, 0)
-
-void HIDDEN
-__sync_lock_release_4 (int *ptr)
-{
-  long failure;
-  int oldval;
-
-  do {
-    oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
-    failure = __kernel_cmpxchg (ptr, oldval, 0);
-  } while (failure != 0);
-}
+SYNC_LOCK_RELEASE_1 (int, 4)
+SYNC_LOCK_RELEASE_1 (short, 2)
+SYNC_LOCK_RELEASE_1 (signed char, 1)
author	John David Anglin <danglin@gcc.gnu.org>
	Sat, 11 Aug 2018 21:37:55 +0000 (21:37 +0000)
committer	John David Anglin <danglin@gcc.gnu.org>
	Sat, 11 Aug 2018 21:37:55 +0000 (21:37 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/pa/pa.md		patch \| blob \| history
libgcc/ChangeLog		patch \| blob \| history
libgcc/config/pa/linux-atomic.c		patch \| blob \| history