re PR target/71245 (std::atomic<double> load/store bounces the data to the stack...
authorUros Bizjak <ubizjak@gmail.com>
Sun, 29 May 2016 20:50:32 +0000 (22:50 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Sun, 29 May 2016 20:50:32 +0000 (22:50 +0200)
PR target/71245
* config/i386/sync.md (define_peephole2 atomic_storedi_fpu):
New peepholes to remove unneeded fild/fistp pairs.
(define_peephole2 atomic_loaddi_fpu): Ditto.

testsuite/ChangeLog:

PR target/71245
* gcc.target/i386/pr71245-1.c: New test.
* gcc.target/i386/pr71245-2.c: Ditto.

From-SVN: r236863

gcc/ChangeLog
gcc/config/i386/sync.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr71245-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr71245-2.c [new file with mode: 0644]

index 41b863b60f36cad2b8a1550cc215b29bbabd258e..cde2b8df151d47473f30572226047478e4ed2e68 100644 (file)
@@ -1,3 +1,10 @@
+2016-05-29  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/71245
+       * config/i386/sync.md (define_peephole2 atomic_storedi_fpu):
+       New peepholes to remove unneeded fild/fistp pairs.
+       (define_peephole2 atomic_loaddi_fpu): Ditto.
+
 2016-05-27  Jan Hubicka  <hubicka@ucw.cz>
 
        * predict.c (maybe_hot_frequency_p): Avoid division.
index 8322676a7b05e8856b621cb7e09ff9bf42c5332c..9acf5ca8a873669ee46aedc13bf82f5109bd82a3 100644 (file)
   DONE;
 })
 
+(define_peephole2
+  [(set (match_operand:DF 0 "fp_register_operand")
+       (unspec:DF [(match_operand:DI 1 "memory_operand")]
+                  UNSPEC_FILD_ATOMIC))
+   (set (match_operand:DI 2 "memory_operand")
+       (unspec:DI [(match_dup 0)]
+                  UNSPEC_FIST_ATOMIC))
+   (set (match_operand:DF 3 "fp_register_operand")
+       (match_operand:DF 4 "memory_operand"))]
+  "!TARGET_64BIT
+   && peep2_reg_dead_p (2, operands[0])
+   && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
+  [(set (match_dup 3) (match_dup 5))]
+  "operands[5] = gen_lowpart (DFmode, operands[1]);")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "sse_reg_operand")
+       (match_operand:DI 1 "memory_operand"))
+   (set (match_operand:DI 2 "memory_operand")
+       (match_dup 0))
+   (set (match_operand:DF 3 "fp_register_operand")
+       (match_operand:DF 4 "memory_operand"))]
+  "!TARGET_64BIT
+   && peep2_reg_dead_p (2, operands[0])
+   && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
+  [(set (match_dup 3) (match_dup 5))]
+  "operands[5] = gen_lowpart (DFmode, operands[1]);")
+
 (define_expand "atomic_store<mode>"
   [(set (match_operand:ATOMIC 0 "memory_operand")
        (unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
   DONE;
 })
 
+(define_peephole2
+  [(set (match_operand:DF 0 "memory_operand")
+       (match_operand:DF 1 "fp_register_operand"))
+   (set (match_operand:DF 2 "fp_register_operand")
+       (unspec:DF [(match_operand:DI 3 "memory_operand")]
+                  UNSPEC_FILD_ATOMIC))
+   (set (match_operand:DI 4 "memory_operand")
+       (unspec:DI [(match_dup 2)]
+                  UNSPEC_FIST_ATOMIC))]
+  "!TARGET_64BIT
+   && peep2_reg_dead_p (3, operands[2])
+   && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
+  [(set (match_dup 5) (match_dup 1))]
+  "operands[5] = gen_lowpart (DFmode, operands[4]);")
+
+(define_peephole2
+  [(set (match_operand:DF 0 "memory_operand")
+       (match_operand:DF 1 "fp_register_operand"))
+   (set (match_operand:DI 2 "sse_reg_operand")
+       (match_operand:DI 3 "memory_operand"))
+   (set (match_operand:DI 4 "memory_operand")
+       (match_dup 2))]
+  "!TARGET_64BIT
+   && peep2_reg_dead_p (3, operands[2])
+   && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
+  [(set (match_dup 5) (match_dup 1))]
+  "operands[5] = gen_lowpart (DFmode, operands[4]);")
+
 ;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
 ;; operations.  But the fix_trunc patterns want way more setup than we want
 ;; to provide.  Note that the scratch is DFmode instead of XFmode in order
index 7857e7fdce8c56327e0944edc8bfb8c740801b34..2313b8656c4b1de3f63f6a5c8c283a8c67babb0a 100644 (file)
@@ -1,3 +1,9 @@
+2016-05-29  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/71245
+       * gcc.target/i386/pr71245-1.c: New test.
+       * gcc.target/i386/pr71245-2.c: Ditto.
+
 2016-05-29  Paolo Carlini  <paolo.carlini@oracle.com>
 
        PR c++/71105
diff --git a/gcc/testsuite/gcc.target/i386/pr71245-1.c b/gcc/testsuite/gcc.target/i386/pr71245-1.c
new file mode 100644 (file)
index 0000000..be0b760
--- /dev/null
@@ -0,0 +1,22 @@
+/* PR target/71245 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=pentium -mno-sse -mfpmath=387" } */
+
+typedef union
+{
+  unsigned long long ll;
+  double d;
+} u_t;
+
+u_t d = { .d = 5.0 };
+
+void foo_d (void)
+{
+  u_t tmp;
+  
+  tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST);
+  tmp.d += 1.0;
+  __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
+}
+
+/* { dg-final { scan-assembler-not "(fistp|fild)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr71245-2.c b/gcc/testsuite/gcc.target/i386/pr71245-2.c
new file mode 100644 (file)
index 0000000..65c1398
--- /dev/null
@@ -0,0 +1,22 @@
+/* PR target/71245 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=pentium -msse -mno-sse2 -mfpmath=387" } */
+
+typedef union
+{
+  unsigned long long ll;
+  double d;
+} u_t;
+
+u_t d = { .d = 5.0 };
+
+void foo_d (void)
+{
+  u_t tmp;
+  
+  tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST);
+  tmp.d += 1.0;
+  __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
+}
+
+/* { dg-final { scan-assembler-not "movlps" } } */