[arm] Allow the summation result of signed add-with-overflow to be discarded.
authorRichard Earnshaw <rearnsha@arm.com>
Fri, 18 Oct 2019 19:04:30 +0000 (19:04 +0000)
committerRichard Earnshaw <rearnsha@gcc.gnu.org>
Fri, 18 Oct 2019 19:04:30 +0000 (19:04 +0000)
This patch matches the signed add-with-overflow patterns when the
summation itself is dropped.  In this case we can use CMN (or CMP with
some immediates).  There are a small number of constants in thumb2
where this can result in less dense code (as we lack 16-bit CMN with
immediate patterns).  To handle this we use peepholes to try these
alternatives when either a scratch is available (0 <= i <= 7) or the
original register is dead (0 <= i <= 255).  We don't use a scratch in
the pattern as if those conditions are not satisfied then the 32-bit
form is preferable to forcing a reload.

* config/arm/arm.md (addsi3_compareV_reg_nosum): New insn.
(addsi3_compareV_imm_nosum): New insn.  Also add peephole2 patterns
to transform this back into the summation version when that leads
to smaller code.

From-SVN: r277185

gcc/ChangeLog
gcc/config/arm/arm.md

index 4c82758a060b937779da4ddf9f4f4a8d57fc97d0..4a5a139f7ffb6f9a26afb2e26cc3fa68c20b4646 100644 (file)
@@ -1,3 +1,10 @@
+2019-10-18  Richard Earnshaw  <rearnsha@arm.com>
+
+       * config/arm/arm.md (addsi3_compareV_reg_nosum): New insn.
+       (addsi3_compareV_imm_nosum): New insn.  Also add peephole2 patterns
+       to transform this back into the summation version when that leads
+       to smaller code.
+
 2019-10-18  Richard Earnshaw  <rearnsha@arm.com>
 
        * config/arm/arm.md (addv<mode>4): Delete.
index b5214c79c3580f1db8457962e261bc9320ef8305..be002f7738258ad8f7c9abd6db02b1e4443af676 100644 (file)
    (set_attr "type" "alus_sreg")]
 )
 
+(define_insn "*addsi3_compareV_reg_nosum"
+  [(set (reg:CC_V CC_REGNUM)
+       (compare:CC_V
+         (plus:DI
+           (sign_extend:DI (match_operand:SI 0 "register_operand" "%l,r"))
+           (sign_extend:DI (match_operand:SI 1 "register_operand" "l,r")))
+         (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)))))]
+  "TARGET_32BIT"
+  "cmn%?\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,*")
+   (set_attr "length" "2,4")
+   (set_attr "type" "alus_sreg")]
+)
+
 (define_insn "addsi3_compareV_imm"
   [(set (reg:CC_V CC_REGNUM)
        (compare:CC_V
    (set_attr "type" "alus_imm")]
 )
 
+(define_insn "addsi3_compareV_imm_nosum"
+  [(set (reg:CC_V CC_REGNUM)
+       (compare:CC_V
+         (plus:DI
+           (sign_extend:DI
+            (match_operand:SI 0 "register_operand" "l,r,r"))
+           (match_operand 1 "arm_addimm_operand" "Pw,I,L"))
+         (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)))))]
+  "TARGET_32BIT
+   && INTVAL (operands[1]) == ARM_SIGN_EXTEND (INTVAL (operands[1]))"
+  "@
+   cmp%?\\t%0, #%n1
+   cmn%?\\t%0, %1
+   cmp%?\\t%0, #%n1"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,*,*")
+   (set_attr "length" "2,4,4")
+   (set_attr "type" "alus_imm")]
+)
+
+;; We can handle more constants efficently if we can clobber either a scratch
+;; or the other source operand.  We deliberately leave this late as in
+;; high register pressure situations it's not worth forcing any reloads.
+(define_peephole2
+  [(match_scratch:SI 2 "l")
+   (set (reg:CC_V CC_REGNUM)
+       (compare:CC_V
+         (plus:DI
+           (sign_extend:DI
+            (match_operand:SI 0 "low_register_operand"))
+           (match_operand 1 "const_int_operand"))
+         (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)))))]
+  "TARGET_THUMB2
+   && satisfies_constraint_Pd (operands[1])"
+  [(parallel[
+    (set (reg:CC_V CC_REGNUM)
+        (compare:CC_V
+         (plus:DI (sign_extend:DI (match_dup 0))
+                  (sign_extend:DI (match_dup 1)))
+         (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)))))
+    (set (match_dup 2) (plus:SI (match_dup 0) (match_dup 1)))])]
+)
+
+(define_peephole2
+  [(set (reg:CC_V CC_REGNUM)
+       (compare:CC_V
+         (plus:DI
+           (sign_extend:DI
+            (match_operand:SI 0 "low_register_operand"))
+           (match_operand 1 "const_int_operand"))
+         (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)))))]
+  "TARGET_THUMB2
+   && dead_or_set_p (peep2_next_insn (0), operands[0])
+   && satisfies_constraint_Py (operands[1])"
+  [(parallel[
+    (set (reg:CC_V CC_REGNUM)
+        (compare:CC_V
+         (plus:DI (sign_extend:DI (match_dup 0))
+                  (sign_extend:DI (match_dup 1)))
+         (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)))))
+    (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))])]
+)
+
 (define_insn "addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
        (compare:CC_NOOV