From 0406dccda5c9adfaf65e132fda5b8c0fbc5ce1d5 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 18 Oct 2019 19:01:49 +0000 Subject: [PATCH] [arm] Perform early splitting of adddi3. This patch causes the expansion of adddi3 to split the operation immediately for Arm and Thumb-2. This is desirable as it frees up the register allocator to pick what ever combination of registers suits best and reduces the number of auxiliary patterns that we need in the back-end. Three of the testcases that we disabled earlier are already fixed by this patch. Finally, we add a new pattern to match the canonicalization of add-with-carry when using an immediate of zero. gcc: * config/arm/arm-protos.h (arm_decompose_di_binop): New prototype. * config/arm/arm.c (arm_decompose_di_binop): New function. * config/arm/arm.md (adddi3): Also accept any const_int for op2. If not generating Thumb-1 code, decompose the operation into 32-bit pieces. * add0si_carryin_: New pattern. testsuite: * gcc.target/arm/pr53447-1.c: Remove XFAIL. * gcc.target/arm/pr53447-3.c: Remove XFAIL. * gcc.target/arm/pr53447-4.c: Remove XFAIL. From-SVN: r277165 --- gcc/ChangeLog | 9 +++ gcc/config/arm/arm-protos.h | 1 + gcc/config/arm/arm.c | 15 +++++ gcc/config/arm/arm.md | 73 ++++++++++++++++++------ gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/gcc.target/arm/pr53447-1.c | 2 +- gcc/testsuite/gcc.target/arm/pr53447-3.c | 2 +- gcc/testsuite/gcc.target/arm/pr53447-4.c | 2 +- 8 files changed, 91 insertions(+), 19 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 05ae1eaa088..a4acdccc770 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2019-10-18 Richard Earnshaw + + * config/arm/arm-protos.h (arm_decompose_di_binop): New prototype. + * config/arm/arm.c (arm_decompose_di_binop): New function. + * config/arm/arm.md (adddi3): Also accept any const_int for op2. + If not generating Thumb-1 code, decompose the operation into 32-bit + pieces. + * add0si_carryin_: New pattern. + 2019-10-18 Richard Earnshaw * arm.md (adddi3): Only accept register operands. diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index f995974f9bb..c685bcbf99c 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -57,6 +57,7 @@ extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high); extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode, bool high); extern void arm_emit_speculation_barrier_function (void); +extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *); #ifdef RTX_CODE extern void arm_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode, diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index e8ec426a0f8..f26945dbcf0 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -14933,6 +14933,21 @@ gen_cpymem_ldrd_strd (rtx *operands) return true; } +/* Decompose operands for a 64-bit binary operation in OP1 and OP2 + into its component 32-bit subregs. OP2 may be an immediate + constant and we want to simplify it in that case. */ +void +arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1, + rtx *lo_op2, rtx *hi_op2) +{ + *lo_op1 = gen_lowpart (SImode, op1); + *hi_op1 = gen_highpart (SImode, op1); + *lo_op2 = simplify_gen_subreg (SImode, op2, DImode, + subreg_lowpart_offset (SImode, DImode)); + *hi_op2 = simplify_gen_subreg (SImode, op2, DImode, + subreg_highpart_offset (SImode, DImode)); +} + /* Select a dominance comparison mode if possible for a test of the general form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms. COND_OR == DOM_CC_X_AND_Y => (X && Y) diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 241ba97c4ba..5ba42a13430 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -437,25 +437,53 @@ [(parallel [(set (match_operand:DI 0 "s_register_operand") (plus:DI (match_operand:DI 1 "s_register_operand") - (match_operand:DI 2 "s_register_operand"))) + (match_operand:DI 2 "reg_or_int_operand"))) (clobber (reg:CC CC_REGNUM))])] "TARGET_EITHER" " - if (TARGET_THUMB1 && !REG_P (operands[2])) - operands[2] = force_reg (DImode, operands[2]); - " -) + if (TARGET_THUMB1) + { + if (!REG_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); + } + else + { + rtx lo_result, hi_result, lo_dest, hi_dest; + rtx lo_op1, hi_op1, lo_op2, hi_op2; + arm_decompose_di_binop (operands[1], operands[2], &lo_op1, &hi_op1, + &lo_op2, &hi_op2); + lo_result = lo_dest = gen_lowpart (SImode, operands[0]); + hi_result = hi_dest = gen_highpart (SImode, operands[0]); + + if (lo_op2 == const0_rtx) + { + lo_dest = lo_op1; + if (!arm_add_operand (hi_op2, SImode)) + hi_op2 = force_reg (SImode, hi_op2); + /* Assume hi_op2 won't also be zero. */ + emit_insn (gen_addsi3 (hi_dest, hi_op1, hi_op2)); + } + else + { + if (!arm_add_operand (lo_op2, SImode)) + lo_op2 = force_reg (SImode, lo_op2); + if (!arm_not_operand (hi_op2, SImode)) + hi_op2 = force_reg (SImode, hi_op2); + + emit_insn (gen_addsi3_compareC (lo_dest, lo_op1, lo_op2)); + if (hi_op2 == const0_rtx) + emit_insn (gen_add0si3_carryin_ltu (hi_dest, hi_op1)); + else + emit_insn (gen_addsi3_carryin_ltu (hi_dest, hi_op1, hi_op2)); + } -(define_insn "*arm_adddi3" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r") - (plus:DI (match_operand:DI 1 "s_register_operand" " %0,0,r") - (match_operand:DI 2 "s_register_operand" " r,0,r"))) - (clobber (reg:CC CC_REGNUM))] - "TARGET_32BIT" - "adds\\t%Q0, %Q1, %Q2;adc\\t%R0, %R1, %R2" - [(set_attr "conds" "clob") - (set_attr "length" "8") - (set_attr "type" "multiple")] + if (lo_result != lo_dest) + emit_move_insn (lo_result, lo_dest); + if (hi_result != hi_dest) + emit_move_insn (gen_highpart (SImode, operands[0]), hi_dest); + DONE; + } + " ) (define_expand "addv4" @@ -830,7 +858,7 @@ (set_attr "type" "alus_imm,alus_sreg,alus_imm,alus_imm,alus_sreg")] ) -(define_insn "*addsi3_carryin_" +(define_insn "addsi3_carryin_" [(set (match_operand:SI 0 "s_register_operand" "=l,r,r") (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%l,r,r") (match_operand:SI 2 "arm_not_operand" "0,rI,K")) @@ -848,6 +876,19 @@ (set_attr "type" "adc_reg,adc_reg,adc_imm")] ) +;; Canonicalization of the above when the immediate is zero. +(define_insn "add0si3_carryin_" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (LTUGEU:SI (reg: CC_REGNUM) (const_int 0)) + (match_operand:SI 1 "arm_not_operand" "r")))] + "TARGET_32BIT" + "adc%?\\t%0, %1, #0" + [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set_attr "length" "4") + (set_attr "type" "adc_imm")] +) + (define_insn "*addsi3_carryin_alt2_" [(set (match_operand:SI 0 "s_register_operand" "=l,r,r") (plus:SI (plus:SI (LTUGEU:SI (reg: CC_REGNUM) (const_int 0)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0c5b6e4180d..ae9d216e1e3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2019-10-18 Richard Earnshaw + + * gcc.target/arm/pr53447-1.c: Remove XFAIL. + * gcc.target/arm/pr53447-3.c: Remove XFAIL. + * gcc.target/arm/pr53447-4.c: Remove XFAIL. + 2019-10-18 Richard Earnshaw * gcc.target/arm/negdi-3.c: Add XFAILS. diff --git a/gcc/testsuite/gcc.target/arm/pr53447-1.c b/gcc/testsuite/gcc.target/arm/pr53447-1.c index 0fd98b791fe..dc094180c85 100644 --- a/gcc/testsuite/gcc.target/arm/pr53447-1.c +++ b/gcc/testsuite/gcc.target/arm/pr53447-1.c @@ -1,6 +1,6 @@ /* { dg-options "-O2" } */ /* { dg-require-effective-target arm32 } */ -/* { dg-final { scan-assembler-not "mov" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-not "mov" } } */ void t0p(long long * p) { diff --git a/gcc/testsuite/gcc.target/arm/pr53447-3.c b/gcc/testsuite/gcc.target/arm/pr53447-3.c index 79d3691ee14..8e48f119b74 100644 --- a/gcc/testsuite/gcc.target/arm/pr53447-3.c +++ b/gcc/testsuite/gcc.target/arm/pr53447-3.c @@ -1,6 +1,6 @@ /* { dg-options "-O2" } */ /* { dg-require-effective-target arm32 } */ -/* { dg-final { scan-assembler-not "mov" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-not "mov" } } */ void t0p(long long * p) diff --git a/gcc/testsuite/gcc.target/arm/pr53447-4.c b/gcc/testsuite/gcc.target/arm/pr53447-4.c index bfa20df7ccd..22acb97270e 100644 --- a/gcc/testsuite/gcc.target/arm/pr53447-4.c +++ b/gcc/testsuite/gcc.target/arm/pr53447-4.c @@ -1,6 +1,6 @@ /* { dg-options "-O2" } */ /* { dg-require-effective-target arm32 } */ -/* { dg-final { scan-assembler-not "mov" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-not "mov" } } */ void t0p(long long * p) -- 2.30.2