From 55994b971b02a3808f3776ce66e890ecc1c7b759 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Wed, 28 Jun 2017 14:13:02 +0000 Subject: [PATCH] Improve Cortex-A53 shift bypass The aarch_forward_to_shift_is_not_shifted_reg bypass always returns true on AArch64 shifted instructions. This causes the bypass to activate in too many cases, resulting in slower execution on Cortex-A53 like reported in PR79665. This patch uses the arm_no_early_alu_shift_dep condition instead which improves the example in PR79665 by ~7%. Given it is no longer used, remove aarch_forward_to_shift_is_not_shifted_reg. Also remove an unnecessary REG_P check. gcc/ PR target/79665 * config/arm/aarch-common.c (arm_no_early_alu_shift_dep): Remove redundant if. (aarch_forward_to_shift_is_not_shifted_reg): Remove. * config/arm/aarch-common-protos.h (aarch_forward_to_shift_is_not_shifted_re): Remove. * config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass. From-SVN: r249740 --- gcc/ChangeLog | 10 +++++++ gcc/config/arm/aarch-common-protos.h | 1 - gcc/config/arm/aarch-common.c | 39 +--------------------------- gcc/config/arm/cortex-a53.md | 2 +- 4 files changed, 12 insertions(+), 40 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e2d7261795b..0ed9bfc8580 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2017-06-28 Wilco Dijkstra + + PR target/79665 + * config/arm/aarch-common.c (arm_no_early_alu_shift_dep): + Remove redundant if. + (aarch_forward_to_shift_is_not_shifted_reg): Remove. + * config/arm/aarch-common-protos.h + (aarch_forward_to_shift_is_not_shifted_re): Remove. + * config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass. + 2017-06-28 Michael Meissner PR ipa/81238 diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 35d2d96c9fd..a51121193ce 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -25,7 +25,6 @@ extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *); extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *); -extern int aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *, rtx_insn *); extern bool aarch_rev16_p (rtx); extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode); extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode); diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c index 6a04711335d..979f86e5bed 100644 --- a/gcc/config/arm/aarch-common.c +++ b/gcc/config/arm/aarch-common.c @@ -272,12 +272,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer) return 0; if ((early_op = arm_find_shift_sub_rtx (op))) - { - if (REG_P (early_op)) - early_op = op; - - return !reg_overlap_mentioned_p (value, early_op); - } + return !reg_overlap_mentioned_p (value, early_op); return 0; } @@ -508,38 +503,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer) return (REGNO (dest) == REGNO (accumulator)); } -/* Return nonzero if the CONSUMER instruction is some sort of - arithmetic or logic + shift operation, and the register we are - writing in PRODUCER is not used in a register shift by register - operation. */ - -int -aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer, - rtx_insn *consumer) -{ - rtx value, op; - rtx early_op; - - if (!arm_get_set_operands (producer, consumer, &value, &op)) - return 0; - - if ((early_op = arm_find_shift_sub_rtx (op))) - { - if (REG_P (early_op)) - early_op = op; - - /* Any other canonicalisation of a shift is a shift-by-constant - so we don't care. */ - if (GET_CODE (early_op) == ASHIFT) - return (!REG_P (XEXP (early_op, 0)) - || !REG_P (XEXP (early_op, 1))); - else - return 1; - } - - return 0; -} - /* Return non-zero if the consumer (a multiply-accumulate instruction) has an accumulator dependency on the result of the producer (a multiplication instruction) and no other dependency on that result. */ diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md index 3c613375756..6734e37be49 100644 --- a/gcc/config/arm/cortex-a53.md +++ b/gcc/config/arm/cortex-a53.md @@ -211,7 +211,7 @@ (define_bypass 1 "cortex_a53_alu*" "cortex_a53_alu_shift*" - "aarch_forward_to_shift_is_not_shifted_reg") + "arm_no_early_alu_shift_dep") (define_bypass 2 "cortex_a53_alu*" "cortex_a53_alu_*,cortex_a53_shift*") -- 2.30.2