ix86: Add peephole2 for *add<mode>3_cc_overflow_1 followed by matching memory store...
authorJakub Jelinek <jakub@redhat.com>
Fri, 8 May 2020 08:03:56 +0000 (10:03 +0200)
committerJakub Jelinek <jakub@redhat.com>
Fri, 8 May 2020 08:03:56 +0000 (10:03 +0200)
The following peephole2 changes:
- addl (%rdi), %esi
+ xorl %eax, %eax
+ addl %esi, (%rdi)
  setc %al
- movl %esi, (%rdi)
- movzbl %al, %eax
  ret
on the testcase.  *add<mode>3_cc_overflow_1, being an add{l,q} insn, is
commutative, so if TARGET_READ_MODIFY_WRITE we can replace
addl (%rdi), %esi; movl %esi, (%rdi)
with
addl %esi, (%rdi)
if %esi is dead after those two insns.

2020-05-08  Jakub Jelinek  <jakub@redhat.com>

PR target/94857
* config/i386/i386.md (peephole2 after *add<mode>3_cc_overflow_1): New
define_peephole2.

* gcc.target/i386/pr94857.c: New test.

gcc/ChangeLog
gcc/config/i386/i386.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr94857.c [new file with mode: 0644]

index 2e5a05134abe7ed4d27b23983280960574a46997..9a380a5505c7795beb9c7b65405e55abe71ba8e5 100644 (file)
@@ -1,5 +1,9 @@
 2020-05-08  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/94857
+       * config/i386/i386.md (peephole2 after *add<mode>3_cc_overflow_1): New
+       define_peephole2.
+
        PR middle-end/94724
        * tree.c (get_narrower): Reuse the op temporary instead of
        shadowing it.
index 5fe851e0312849aea5ac1aa01e6c1dc9a1e5666e..8bfc9cb0b7180c07d4202f7ff23c8b500fe2bded 100644 (file)
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
+(define_peephole2
+  [(parallel [(set (reg:CCC FLAGS_REG)
+                  (compare:CCC
+                    (plus:SWI (match_operand:SWI 0 "general_reg_operand")
+                              (match_operand:SWI 1 "memory_operand"))
+                    (match_dup 0)))
+             (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 1)))])
+   (set (match_dup 1) (match_dup 0))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])"
+  [(parallel [(set (reg:CCC FLAGS_REG)
+                  (compare:CCC
+                    (plus:SWI (match_dup 1) (match_dup 0))
+                    (match_dup 1)))
+             (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
+
 (define_insn "*addsi3_zext_cc_overflow_1"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
index 174198fdbe49c3acfa3040374bc61b83ea1d1497..db0a837829f1146aa4ac20915427d3888abb0f5e 100644 (file)
@@ -1,5 +1,8 @@
 2020-05-08  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/94857
+       * gcc.target/i386/pr94857.c: New test.
+
        PR tree-optimization/94783
        * gcc.dg/tree-ssa/pr94783.c: New test.
 
diff --git a/gcc/testsuite/gcc.target/i386/pr94857.c b/gcc/testsuite/gcc.target/i386/pr94857.c
new file mode 100644 (file)
index 0000000..f84ee22
--- /dev/null
@@ -0,0 +1,13 @@
+/* PR target/94857 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=skylake -masm=att" } */
+/* { dg-additional-options "-mregparm=2" { target ia32 } } */
+/* { dg-final { scan-assembler "\taddl\t%\[a-z0-9]\*, \\\(" } } */
+
+int
+foo (unsigned *p, unsigned x)
+{
+  unsigned u = *p;
+  *p += x;
+  return u > *p;
+}