[AArch64][v2] Improve comparison with complex immediates followed by branch/cset
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 24 Nov 2015 13:08:56 +0000 (13:08 +0000)
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>
Tue, 24 Nov 2015 13:08:56 +0000 (13:08 +0000)
* config/aarch64/aarch64.md (*condjump): Rename to...
(condjump): ... This.
(*compare_condjump<mode>): New define_insn_and_split.
(*compare_cstore<mode>_insn): Likewise.
(*cstore<mode>_insn): Rename to...
(cstore<mode>_insn): ... This.
* config/aarch64/iterators.md (CMP): Handle ne code.
* config/aarch64/predicates.md (aarch64_imm24): New predicate.

* gcc.target/aarch64/cmpimm_branch_1.c: New test.
* gcc.target/aarch64/cmpimm_cset_1.c: Likewise.

From-SVN: r230805

gcc/ChangeLog
gcc/config/aarch64/aarch64.md
gcc/config/aarch64/iterators.md
gcc/config/aarch64/predicates.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/cmpimm_branch_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/cmpimm_cset_1.c [new file with mode: 0644]

index 02590a2eaea31bf6cb5c29b64fb9c795abe9171c..ada1e3094c861f214a833f266d76e0a0a81107f3 100644 (file)
@@ -1,3 +1,14 @@
+2015-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * config/aarch64/aarch64.md (*condjump): Rename to...
+       (condjump): ... This.
+       (*compare_condjump<mode>): New define_insn_and_split.
+       (*compare_cstore<mode>_insn): Likewise.
+       (*cstore<mode>_insn): Rename to...
+       (cstore<mode>_insn): ... This.
+       * config/aarch64/iterators.md (CMP): Handle ne code.
+       * config/aarch64/predicates.md (aarch64_imm24): New predicate.
+
 2015-11-24  Mikhail Maltsev  <maltsevm@gmail.com>
 
        PR target/68497
index d46f837df7f085f03f4f575ec1871781547896ed..28a7f8c6fc8f63e194cfa3ede3cea39e1abefe25 100644 (file)
   }
 )
 
-(define_insn "*condjump"
+(define_insn "condjump"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
                            [(match_operand 1 "cc_register" "") (const_int 0)])
                           (label_ref (match_operand 2 "" ""))
                      (const_int 1)))]
 )
 
+;; For a 24-bit immediate CST we can optimize the compare for equality
+;; and branch sequence from:
+;;     mov     x0, #imm1
+;;     movk    x0, #imm2, lsl 16 /* x0 contains CST.  */
+;;     cmp     x1, x0
+;;     b<ne,eq> .Label
+;; into the shorter:
+;;     sub     x0, x1, #(CST & 0xfff000)
+;;     subs    x0, x0, #(CST & 0x000fff)
+;;     b<ne,eq> .Label
+(define_insn_and_split "*compare_condjump<mode>"
+  [(set (pc) (if_then_else (EQL
+                             (match_operand:GPI 0 "register_operand" "r")
+                             (match_operand:GPI 1 "aarch64_imm24" "n"))
+                          (label_ref:P (match_operand 2 "" ""))
+                          (pc)))]
+  "!aarch64_move_imm (INTVAL (operands[1]), <MODE>mode)
+   && !aarch64_plus_operand (operands[1], <MODE>mode)
+   && !reload_completed"
+  "#"
+  "&& true"
+  [(const_int 0)]
+  {
+    HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
+    HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_add<mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
+    emit_insn (gen_add<mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
+    rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <MODE>mode, cc_reg, const0_rtx);
+    emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2]));
+    DONE;
+  }
+)
+
 (define_expand "casesi"
   [(match_operand:SI 0 "register_operand" "")  ; Index
    (match_operand:SI 1 "const_int_operand" "") ; Lower bound
   "
 )
 
-(define_insn "*cstore<mode>_insn"
+(define_insn "aarch64_cstore<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=r")
        (match_operator:ALLI 1 "aarch64_comparison_operator"
         [(match_operand 2 "cc_register" "") (const_int 0)]))]
   [(set_attr "type" "csel")]
 )
 
+;; For a 24-bit immediate CST we can optimize the compare for equality
+;; and branch sequence from:
+;;     mov     x0, #imm1
+;;     movk    x0, #imm2, lsl 16 /* x0 contains CST.  */
+;;     cmp     x1, x0
+;;     cset    x2, <ne,eq>
+;; into the shorter:
+;;     sub     x0, x1, #(CST & 0xfff000)
+;;     subs    x0, x0, #(CST & 0x000fff)
+;;     cset x2, <ne, eq>.
+(define_insn_and_split "*compare_cstore<mode>_insn"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (EQL:GPI (match_operand:GPI 1 "register_operand" "r")
+                 (match_operand:GPI 2 "aarch64_imm24" "n")))]
+  "!aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)
+   && !aarch64_plus_operand (operands[2], <MODE>mode)
+   && !reload_completed"
+  "#"
+  "&& true"
+  [(const_int 0)]
+  {
+    HOST_WIDE_INT lo_imm = UINTVAL (operands[2]) & 0xfff;
+    HOST_WIDE_INT hi_imm = UINTVAL (operands[2]) & 0xfff000;
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (-hi_imm)));
+    emit_insn (gen_add<mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
+    rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <MODE>mode, cc_reg, const0_rtx);
+    emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp_rtx, cc_reg));
+    DONE;
+  }
+  [(set_attr "type" "csel")]
+)
+
 ;; zero_extend version of the above
 (define_insn "*cstoresi_insn_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=r")
index c2eb7dec99d06b103c86ec11c590ac746706a8ab..422bc879f7f6c495349bf0a40a471e7dce5780e6 100644 (file)
                           (ltu "1") (leu "1") (geu "2") (gtu "2")])
 
 (define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT")
-                          (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")])
+                       (ltu "LTU") (leu "LEU") (ne "NE") (geu "GEU")
+                       (gtu "GTU")])
 
 (define_code_attr fix_trunc_optab [(fix "fix_trunc")
                                   (unsigned_fix "fixuns_trunc")])
index e7f76e048543d27b325f915ced028b7a493b6789..c0c3ff5dc20ea9e4d7cf3ade1ec9d6c869f55399 100644 (file)
   (and (match_code "const_int")
        (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) <= 4")))
 
+;; An immediate that fits into 24 bits.
+(define_predicate "aarch64_imm24"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (UINTVAL (op), 0, 0xffffff)")))
+
 (define_predicate "aarch64_pwr_imm3"
   (and (match_code "const_int")
        (match_test "INTVAL (op) != 0
index 3ec74df4db362decbfd0fd9c20d81c20747c2201..47f7ea55c21426d3343959c2776e4c37f9c31c39 100644 (file)
@@ -1,3 +1,8 @@
+2015-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * gcc.target/aarch64/cmpimm_branch_1.c: New test.
+       * gcc.target/aarch64/cmpimm_cset_1.c: Likewise.
+
 2015-11-24  Mikhail Maltsev  <maltsevm@gmail.com>
 
        PR target/68497
diff --git a/gcc/testsuite/gcc.target/aarch64/cmpimm_branch_1.c b/gcc/testsuite/gcc.target/aarch64/cmpimm_branch_1.c
new file mode 100644 (file)
index 0000000..7ad736b
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O2" } */
+
+/* Test that we emit a sub+subs sequence rather than mov+movk+cmp.  */
+
+void g (void);
+void
+foo (int x)
+{
+  if (x != 0x123456)
+    g ();
+}
+
+void
+fool (long long x)
+{
+  if (x != 0x123456)
+    g ();
+}
+
+/* { dg-final { scan-assembler-not "cmp\tw\[0-9\]*.*" } } */
+/* { dg-final { scan-assembler-not "cmp\tx\[0-9\]*.*" } } */
+/* { dg-final { scan-assembler-times "sub\tw\[0-9\]+.*" 1 } } */
+/* { dg-final { scan-assembler-times "sub\tx\[0-9\]+.*" 1 } } */
+/* { dg-final { scan-assembler-times "subs\tw\[0-9\]+.*" 1 } } */
+/* { dg-final { scan-assembler-times "subs\tx\[0-9\]+.*" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/cmpimm_cset_1.c b/gcc/testsuite/gcc.target/aarch64/cmpimm_cset_1.c
new file mode 100644 (file)
index 0000000..f6fd69f
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O2" } */
+
+/* Test that we emit a sub+subs sequence rather than mov+movk+cmp.  */
+
+int
+foo (int x)
+{
+  return x == 0x123456;
+}
+
+long long
+fool (long long x)
+{
+  return x == 0x123456;
+}
+
+/* { dg-final { scan-assembler-not "cmp\tw\[0-9\]*.*" } } */
+/* { dg-final { scan-assembler-not "cmp\tx\[0-9\]*.*" } } */
+/* { dg-final { scan-assembler-times "sub\tw\[0-9\]+.*" 1 } } */
+/* { dg-final { scan-assembler-times "sub\tx\[0-9\]+.*" 1 } } */
+/* { dg-final { scan-assembler-times "subs\tw\[0-9\]+.*" 1 } } */
+/* { dg-final { scan-assembler-times "subs\tx\[0-9\]+.*" 1 } } */