re PR rtl-optimization/89865 (FAIL: gcc.target/i386/pr49095.c scan-assembler-times...
authorJakub Jelinek <jakub@redhat.com>
Mon, 8 Apr 2019 12:35:22 +0000 (14:35 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Mon, 8 Apr 2019 12:35:22 +0000 (14:35 +0200)
PR rtl-optimization/89865
* config/i386/i386.md
(SWI12 peephole for mem {+,-,&,|,^}= x; mem != 0): Fix up operand
numbers not to clash with the additional operands[4].
(peepholes for mem {+,-,&,|,^}= x; mem != 0): New peephole2s
with extra register copy in the middle.

* gcc.target/i386/pr49095.c: Adjust number of expected RMW spots
on ia32.

From-SVN: r270205

gcc/ChangeLog
gcc/config/i386/i386.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr49095.c

index b991f60048188b88b89da4891c0ba70baf3a89ce..5ee20a9bbaa5d5a0e5190968005ed39c263be712 100644 (file)
@@ -1,3 +1,12 @@
+2019-04-08  Jakub Jelinek  <jakub@redhat.com>
+
+       PR rtl-optimization/89865
+       * config/i386/i386.md
+       (SWI12 peephole for mem {+,-,&,|,^}= x; mem != 0): Fix up operand
+       numbers not to clash with the additional operands[4].
+       (peepholes for mem {+,-,&,|,^}= x; mem != 0): New peephole2s
+       with extra register copy in the middle.
+
 2019-04-08  Martin Liska  <mliska@suse.cz>
 
        PR gcov-profile/89961
index 0fe778fda928e1f426a639761d6c3519e9b36d67..b797e406a9202b670d958562f35ef0d6d291fb69 100644 (file)
                         (GET_CODE (operands[3]) == PLUS
                          || GET_CODE (operands[3]) == MINUS)
                         ? CCGOCmode : CCNOmode)"
-  [(parallel [(set (match_dup 4) (match_dup 6))
-             (set (match_dup 1) (match_dup 5))])]
+  [(parallel [(set (match_dup 5) (match_dup 7))
+             (set (match_dup 1) (match_dup 6))])]
 {
-  operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
-  operands[5]
+  operands[5] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[6]
     = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
                      copy_rtx (operands[1]),
                      gen_lowpart (<MODE>mode, operands[2]));
+  operands[7]
+    = gen_rtx_COMPARE (GET_MODE (operands[5]),
+                      copy_rtx (operands[6]),
+                      const0_rtx);
+})
+
+;; peephole2 comes before regcprop, so deal also with a case that
+;; would be cleaned up by regcprop.
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (match_dup 0)
+                  (match_operator:SWI 3 "plusminuslogic_operator"
+                    [(match_dup 0)
+                     (match_operand:SWI 2 "<nonmemory_operand>")]))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand:SWI 4 "register_operand") (match_dup 0))
+   (set (match_dup 1) (match_dup 4))
+   (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && peep2_reg_dead_p (5, operands[4])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[4], operands[1])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], QImode)
+       || any_QIreg_operand (operands[2], QImode))
+   && ix86_match_ccmode (peep2_next_insn (4),
+                        (GET_CODE (operands[3]) == PLUS
+                         || GET_CODE (operands[3]) == MINUS)
+                        ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 5) (match_dup 7))
+             (set (match_dup 1) (match_dup 6))])]
+{
+  operands[5] = SET_DEST (PATTERN (peep2_next_insn (4)));
   operands[6]
-    = gen_rtx_COMPARE (GET_MODE (operands[4]),
-                      copy_rtx (operands[5]),
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
+                     copy_rtx (operands[1]),
+                     operands[2]);
+  operands[7]
+    = gen_rtx_COMPARE (GET_MODE (operands[5]),
+                      copy_rtx (operands[6]),
+                      const0_rtx);
+})
+
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand")
+       (match_operand:SWI12 1 "memory_operand"))
+   (parallel [(set (match_operand:SI 4 "register_operand")
+                  (match_operator:SI 3 "plusminuslogic_operator"
+                    [(match_dup 4)
+                     (match_operand:SI 2 "nonmemory_operand")]))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand:SWI12 5 "register_operand") (match_dup 0))
+   (set (match_dup 1) (match_dup 5))
+   (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && peep2_reg_dead_p (3, operands[0])
+   && peep2_reg_dead_p (5, operands[5])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], SImode)
+       || any_QIreg_operand (operands[2], SImode))
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[5], operands[1])
+   && ix86_match_ccmode (peep2_next_insn (4),
+                        (GET_CODE (operands[3]) == PLUS
+                         || GET_CODE (operands[3]) == MINUS)
+                        ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 6) (match_dup 8))
+             (set (match_dup 1) (match_dup 7))])]
+{
+  operands[6] = SET_DEST (PATTERN (peep2_next_insn (4)));
+  operands[7]
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
+                     copy_rtx (operands[1]),
+                     gen_lowpart (<MODE>mode, operands[2]));
+  operands[8]
+    = gen_rtx_COMPARE (GET_MODE (operands[6]),
+                      copy_rtx (operands[7]),
+                      const0_rtx);
+})
+
+;; Likewise for cmpelim optimized pattern.
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (reg FLAGS_REG)
+                  (compare (match_operator:SWI 3 "plusminuslogic_operator"
+                             [(match_dup 0)
+                              (match_operand:SWI 2 "<nonmemory_operand>")])
+                           (const_int 0)))
+             (set (match_dup 0) (match_dup 3))])
+   (set (match_operand:SWI 4 "register_operand") (match_dup 0))
+   (set (match_dup 1) (match_dup 4))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && peep2_reg_dead_p (4, operands[4])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[4], operands[1])
+   && ix86_match_ccmode (peep2_next_insn (1),
+                        (GET_CODE (operands[3]) == PLUS
+                         || GET_CODE (operands[3]) == MINUS)
+                        ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 5) (match_dup 7))
+             (set (match_dup 1) (match_dup 6))])]
+{
+  operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
+  operands[6]
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
+                     copy_rtx (operands[1]), operands[2]);
+  operands[7]
+    = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]),
                       const0_rtx);
 })
 
index c8f538b26e6b20d4ece54031c9409e4a053adb01..087c58b2e4f78858bcf5dab58d10bacb7e05db54 100644 (file)
@@ -1,3 +1,9 @@
+2019-04-08  Jakub Jelinek  <jakub@redhat.com>
+
+       PR rtl-optimization/89865
+       * gcc.target/i386/pr49095.c: Adjust number of expected RMW spots
+       on ia32.
+
 2019-04-01  Bin Cheng  <bin.cheng@linux.alibaba.com>
 
        PR tree-optimization/89725
index 177e4ceaa62049006bccbb3ef977a3a86c2cf6e2..3ae14e3d4e0ef9345ba61818a7f26f3cd233c99b 100644 (file)
@@ -73,5 +73,5 @@ G (long)
 /* { dg-final { scan-assembler-not "test\[lq\]" } } */
 /* The {f,h}{char,short,int,long}xor functions aren't optimized into
    a RMW instruction, so need load, modify and store.  FIXME eventually.  */
-/* { dg-final { scan-assembler-times "\\(%eax\\), %" 12 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\\(%eax\\), %" 8 { target { ia32 } } } } */
 /* { dg-final { scan-assembler-times "\\(%\[re\]di\\), %" 8 { target { ! ia32 } } } } */