From d5a0a47beef90da73f9f105c30ed380dce36e26b Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Tue, 13 Jan 2009 14:09:50 +0000 Subject: [PATCH] arm.c (struct processors): Pass for speed down into cost helper functions. * arm.c (struct processors): Pass for speed down into cost helper functions. (const_ok_for_op): Handle COMPARE and inequality nodes. (arm_rtx_costs_1): Rewrite. (arm_size_rtx_costs): Update prototype. (arm_rtx_costs): Pass speed down to helper functions. (arm_slowmul_rtx_costs): Rework cost calculations. (arm_fastmul_rtx_costs, arm_xscale_rtx_costs): Likewise. (arm_9e_rtx_costs): Likewise. From-SVN: r143338 --- gcc/ChangeLog | 12 + gcc/config/arm/arm.c | 827 ++++++++++++++++++++++++++++++------------- 2 files changed, 595 insertions(+), 244 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 84354df369b..7bb4789fd36 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2009-01-13 Richard Earnshaw + + * arm.c (struct processors): Pass for speed down into cost helper + functions. + (const_ok_for_op): Handle COMPARE and inequality nodes. + (arm_rtx_costs_1): Rewrite. + (arm_size_rtx_costs): Update prototype. + (arm_rtx_costs): Pass speed down to helper functions. + (arm_slowmul_rtx_costs): Rework cost calculations. + (arm_fastmul_rtx_costs, arm_xscale_rtx_costs): Likewise. + (arm_9e_rtx_costs): Likewise. + 2009-01-13 Uros Bizjak * config/alpha/alpha.c (alpha_legitimate_address_p): Explicit diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index dce0a523bd4..8c2aa5e19a0 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -126,12 +126,12 @@ static bool arm_function_ok_for_sibcall (tree, tree); static void arm_internal_label (FILE *, const char *, unsigned long); static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); -static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code); -static bool arm_size_rtx_costs (rtx, int, int, int *); -static bool arm_slowmul_rtx_costs (rtx, int, int, int *); -static bool arm_fastmul_rtx_costs (rtx, int, int, int *); -static bool arm_xscale_rtx_costs (rtx, int, int, int *); -static bool arm_9e_rtx_costs (rtx, int, int, int *); +static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); +static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); +static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); static bool arm_rtx_costs (rtx, int, int, int *, bool); static int arm_address_cost (rtx, bool); static bool arm_memory_load_p (rtx); @@ -619,7 +619,7 @@ struct processors enum processor_type core; const char *arch; const unsigned long flags; - bool (* rtx_costs) (rtx, int, int, int *); + bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool); }; /* Not all of these give usefully different compilation alternatives, @@ -1883,6 +1883,24 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) switch (code) { case PLUS: + case COMPARE: + case EQ: + case NE: + case GT: + case LE: + case LT: + case GE: + case GEU: + case LTU: + case GTU: + case LEU: + case UNORDERED: + case ORDERED: + case UNEQ: + case UNGE: + case UNLT: + case UNGT: + case UNLE: return const_ok_for_arm (ARM_SIGN_EXTEND (-i)); case MINUS: /* Should only occur with (MINUS I reg) => rsb */ @@ -4882,130 +4900,227 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) } } - -/* Worker routine for arm_rtx_costs. */ -/* ??? This needs updating for thumb2. */ -static inline int -arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer) +static inline bool +arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) { enum machine_mode mode = GET_MODE (x); enum rtx_code subcode; + rtx operand; + enum rtx_code code = GET_CODE (x); int extra_cost; + *total = 0; switch (code) { case MEM: /* Memory costs quite a lot for the first word, but subsequent words load at the equivalent of a single insn each. */ - return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) - + (GET_CODE (x) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0)); + *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); + return true; case DIV: case MOD: case UDIV: case UMOD: - return optimize_size ? COSTS_N_INSNS (2) : 100; + if (TARGET_HARD_FLOAT && mode == SFmode) + *total = COSTS_N_INSNS (2); + else if (TARGET_HARD_FLOAT && mode == DFmode) + *total = COSTS_N_INSNS (4); + else + *total = COSTS_N_INSNS (20); + return false; case ROTATE: - if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG) - return 4; + if (GET_CODE (XEXP (x, 1)) == REG) + *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */ + else if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total = rtx_cost (XEXP (x, 1), code, speed); + /* Fall through */ case ROTATERT: if (mode != SImode) - return 8; + { + *total += COSTS_N_INSNS (4); + return true; + } + /* Fall through */ case ASHIFT: case LSHIFTRT: case ASHIFTRT: + *total += rtx_cost (XEXP (x, 0), code, speed); if (mode == DImode) - return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8) - + ((GET_CODE (XEXP (x, 0)) == REG - || (GET_CODE (XEXP (x, 0)) == SUBREG - && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG)) - ? 0 : 8)); + { + *total += COSTS_N_INSNS (3); + return true; + } - extra_cost = 1; + *total += COSTS_N_INSNS (1); /* Increase the cost of complex shifts because they aren't any faster, and reduce dual issue opportunities. */ if (arm_tune_cortex_a9 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT) - extra_cost++; - - return (extra_cost + ((GET_CODE (XEXP (x, 0)) == REG - || (GET_CODE (XEXP (x, 0)) == SUBREG - && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG)) - ? 0 : 4) - + ((GET_CODE (XEXP (x, 1)) == REG - || (GET_CODE (XEXP (x, 1)) == SUBREG - && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG) - || (GET_CODE (XEXP (x, 1)) == CONST_INT)) - ? 0 : 4)); + ++*total; + + return true; case MINUS: - if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2) + if (TARGET_THUMB2) { - extra_cost = rtx_cost (XEXP (x, 1), code, true); - if (!REG_OR_SUBREG_REG (XEXP (x, 0))) - extra_cost += 4 * ARM_NUM_REGS (mode); - return extra_cost; + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (20); + } + else + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + /* Thumb2 does not have RSB, so all arguments must be + registers (subtracting a constant is canonicalized as + addition of the negated constant). */ + return false; } if (mode == DImode) - return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8) - + ((REG_OR_SUBREG_REG (XEXP (x, 0)) - || (GET_CODE (XEXP (x, 0)) == CONST_INT - && const_ok_for_arm (INTVAL (XEXP (x, 0))))) - ? 0 : 8)); + { + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + if (GET_CODE (XEXP (x, 0)) == CONST_INT + && const_ok_for_arm (INTVAL (XEXP (x, 0)))) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } + + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && const_ok_for_arm (INTVAL (XEXP (x, 1)))) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + return false; + } if (GET_MODE_CLASS (mode) == MODE_FLOAT) - return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1)) - || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE - && arm_const_double_rtx (XEXP (x, 1)))) - ? 0 : 8) - + ((REG_OR_SUBREG_REG (XEXP (x, 0)) - || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE - && arm_const_double_rtx (XEXP (x, 0)))) - ? 0 : 8)); - - if (((GET_CODE (XEXP (x, 0)) == CONST_INT - && const_ok_for_arm (INTVAL (XEXP (x, 0))) - && REG_OR_SUBREG_REG (XEXP (x, 1)))) - || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT - || subcode == ASHIFTRT || subcode == LSHIFTRT - || subcode == ROTATE || subcode == ROTATERT - || (subcode == MULT - && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT - && ((INTVAL (XEXP (XEXP (x, 1), 1)) & - (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0))) - && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0)) - && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1)) - || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT) - && REG_OR_SUBREG_REG (XEXP (x, 0)))) - return 1; + { + if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + { + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE + && arm_const_double_rtx (XEXP (x, 0))) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } + + if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE + && arm_const_double_rtx (XEXP (x, 1))) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + return false; + } + *total = COSTS_N_INSNS (20); + return false; + } + + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 0)) == CONST_INT + && const_ok_for_arm (INTVAL (XEXP (x, 0)))) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } + + subcode = GET_CODE (XEXP (x, 1)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed); + return true; + } + + if (subcode == MULT + && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT + && ((INTVAL (XEXP (XEXP (x, 1), 1)) & + (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed); + return true; + } + + if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed); + if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG + && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM) + *total += COSTS_N_INSNS (1); + + return true; + } + /* Fall through */ case PLUS: - if (arm_arch6 && mode == SImode + if (code == PLUS && arm_arch6 && mode == SImode && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) - return 1 + (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM ? 10 : 0) - + (GET_CODE (XEXP (x, 1)) == MEM ? 10 : 0); + { + *total = COSTS_N_INSNS (1); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)), + speed); + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } - if (GET_CODE (XEXP (x, 0)) == MULT) + /* MLA: All arguments must be registers. We filter out + multiplication by a power of two, so that we fall down into + the code below. */ + if (GET_CODE (XEXP (x, 0)) == MULT + && ! (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && ((INTVAL (XEXP (XEXP (x, 0), 1)) & + (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))) { - extra_cost = rtx_cost (XEXP (x, 0), code, true); - if (!REG_OR_SUBREG_REG (XEXP (x, 1))) - extra_cost += 4 * ARM_NUM_REGS (mode); - return extra_cost; + /* The cost comes from the cost of the multiply. */ + return false; } if (GET_MODE_CLASS (mode) == MODE_FLOAT) - return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8) - + ((REG_OR_SUBREG_REG (XEXP (x, 1)) - || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE - && arm_const_double_rtx (XEXP (x, 1)))) - ? 0 : 8)); + { + if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + { + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE + && arm_const_double_rtx (XEXP (x, 1))) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + return false; + } + + *total = COSTS_N_INSNS (20); + return false; + } + + if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed); + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG + && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM) + *total += COSTS_N_INSNS (1); + return true; + } /* Fall through */ + case AND: case XOR: case IOR: extra_cost = 0; @@ -5019,38 +5134,56 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer) && GET_CODE (XEXP (x, 1)) != CONST_INT) || (REG_OR_SUBREG_REG (XEXP (x, 0)) && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))))) - extra_cost = 4; + *total = 4; if (mode == DImode) - return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8) - + ((REG_OR_SUBREG_REG (XEXP (x, 1)) - || (GET_CODE (XEXP (x, 1)) == CONST_INT - && const_ok_for_op (INTVAL (XEXP (x, 1)), code))) - ? 0 : 8)); - - if (REG_OR_SUBREG_REG (XEXP (x, 0))) - return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost) - + ((REG_OR_SUBREG_REG (XEXP (x, 1)) - || (GET_CODE (XEXP (x, 1)) == CONST_INT - && const_ok_for_op (INTVAL (XEXP (x, 1)), code))) - ? 0 : 4)); - - else if (REG_OR_SUBREG_REG (XEXP (x, 1))) - return (1 + extra_cost - + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT - || subcode == LSHIFTRT || subcode == ASHIFTRT - || subcode == ROTATE || subcode == ROTATERT - || (subcode == MULT - && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT - && ((INTVAL (XEXP (XEXP (x, 0), 1)) & - (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))) - && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0))) - && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)) - && !arm_tune_cortex_a9) - || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)) - ? 0 : 4)); + { + *total += COSTS_N_INSNS (2); + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } - return 8; + return false; + } + + *total += COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + subcode = GET_CODE (XEXP (x, 0)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + return true; + } + + if (subcode == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && ((INTVAL (XEXP (XEXP (x, 0), 1)) & + (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + return true; + } + + if (subcode == UMIN || subcode == UMAX + || subcode == SMIN || subcode == SMAX) + { + *total = COSTS_N_INSNS (3); + return true; + } + + return false; case MULT: /* This should have been handled by the CPU specific routines. */ @@ -5064,108 +5197,290 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer) == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))) && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND)) - return 8; - return 99; + { + *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed); + return true; + } + *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */ + return false; case NEG: if (GET_MODE_CLASS (mode) == MODE_FLOAT) - return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6); + { + if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + { + *total = COSTS_N_INSNS (1); + return false; + } + *total = COSTS_N_INSNS (2); + return false; + } + /* Fall through */ case NOT: - if (mode == DImode) - return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4); + *total = COSTS_N_INSNS (ARM_NUM_REGS(mode)); + if (mode == SImode && code == NOT) + { + subcode = GET_CODE (XEXP (x, 0)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT + || (subcode == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && ((INTVAL (XEXP (XEXP (x, 0), 1)) & + (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))) + { + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + /* Register shifts cost an extra cycle. */ + if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT) + *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1), + subcode, speed); + return true; + } + } - return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4); + return false; case IF_THEN_ELSE: if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) - return 14; - return 2; + { + *total = COSTS_N_INSNS (4); + return true; + } + + operand = XEXP (x, 0); + + if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE) + && GET_CODE (XEXP (operand, 0)) == REG + && REGNO (XEXP (operand, 0)) == CC_REGNUM)) + *total += COSTS_N_INSNS (1); + *total += (rtx_cost (XEXP (x, 1), code, speed) + + rtx_cost (XEXP (x, 2), code, speed)); + return true; + + case NE: + if (mode == SImode && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed); + return true; + } + goto scc_insn; + + case GE: + if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM) + && mode == SImode && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed); + return true; + } + goto scc_insn; + + case LT: + if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM) + && mode == SImode && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed); + return true; + } + goto scc_insn; + + case EQ: + case GT: + case LE: + case GEU: + case LTU: + case GTU: + case LEU: + case UNORDERED: + case ORDERED: + case UNEQ: + case UNGE: + case UNLT: + case UNGT: + case UNLE: + scc_insn: + /* SCC insns. In the case where the comparison has already been + performed, then they cost 2 instructions. Otherwise they need + an additional comparison before them. */ + *total = COSTS_N_INSNS (2); + if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM) + { + return true; + } + /* Fall through */ case COMPARE: - return 1; + if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM) + { + *total = 0; + return true; + } + + *total += COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + subcode = GET_CODE (XEXP (x, 0)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + return true; + } + + if (subcode == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && ((INTVAL (XEXP (XEXP (x, 0), 1)) & + (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + return true; + } + + return false; + + case UMIN: + case UMAX: + case SMIN: + case SMAX: + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed); + if (GET_CODE (XEXP (x, 1)) != CONST_INT + || !const_ok_for_arm (INTVAL (XEXP (x, 1)))) + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; case ABS: - return 4 + (mode == DImode ? 4 : 0); + if (GET_MODE_CLASS (mode == MODE_FLOAT)) + { + if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + { + *total = COSTS_N_INSNS (1); + return false; + } + *total = COSTS_N_INSNS (20); + return false; + } + *total = COSTS_N_INSNS (1); + if (mode == DImode) + *total += COSTS_N_INSNS (3); + return false; case SIGN_EXTEND: - if (arm_arch_thumb2 && mode == SImode) - return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0); + if (GET_MODE_CLASS (mode) == MODE_INT) + { + *total = 0; + if (mode == DImode) + *total += COSTS_N_INSNS (1); + + if (GET_MODE (XEXP (x, 0)) != SImode) + { + if (arm_arch6) + { + if (GET_CODE (XEXP (x, 0)) != MEM) + *total += COSTS_N_INSNS (1); + } + else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM) + *total += COSTS_N_INSNS (2); + } + + return false; + } - if (GET_MODE (XEXP (x, 0)) == QImode) - return (4 + (mode == DImode ? 4 : 0) - + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); /* Fall through */ case ZERO_EXTEND: - if (arm_arch6 && mode == SImode) - return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0); - - switch (GET_MODE (XEXP (x, 0))) + *total = 0; + if (GET_MODE_CLASS (mode) == MODE_INT) { - case QImode: - return (1 + (mode == DImode ? 4 : 0) - + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); + if (mode == DImode) + *total += COSTS_N_INSNS (1); - case HImode: - return (4 + (mode == DImode ? 4 : 0) - + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); + if (GET_MODE (XEXP (x, 0)) != SImode) + { + if (arm_arch6) + { + if (GET_CODE (XEXP (x, 0)) != MEM) + *total += COSTS_N_INSNS (1); + } + else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM) + *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ? + 1 : 2); + } - case SImode: - return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); + return false; + } + switch (GET_MODE (XEXP (x, 0))) + { case V8QImode: case V4HImode: case V2SImode: case V4QImode: case V2HImode: - return 1; + *total = COSTS_N_INSNS (1); + return false; default: gcc_unreachable (); } gcc_unreachable (); + case ZERO_EXTRACT: + case SIGN_EXTRACT: + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed); + return true; + case CONST_INT: - if (const_ok_for_arm (INTVAL (x))) - return outer == SET ? 2 : -1; - else if (outer == AND - && const_ok_for_arm (~INTVAL (x))) - return -1; - else if ((outer == COMPARE - || outer == PLUS || outer == MINUS) - && const_ok_for_arm (-INTVAL (x))) - return -1; + if (const_ok_for_arm (INTVAL (x)) + || const_ok_for_arm (~INTVAL (x))) + *total = COSTS_N_INSNS (1); else - return 5; + *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX, + INTVAL (x), NULL_RTX, + NULL_RTX, 0, 0)); + return true; case CONST: case LABEL_REF: case SYMBOL_REF: - return 6; + *total = COSTS_N_INSNS (3); + return true; case HIGH: + *total = COSTS_N_INSNS (1); + return true; + case LO_SUM: - return (outer == SET) ? 1 : -1; + *total = COSTS_N_INSNS (1); + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; case CONST_DOUBLE: - if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x)) - return outer == SET ? 2 : -1; - else if ((outer == COMPARE || outer == PLUS) - && neg_const_double_rtx_ok_for_fpa (x)) - return -1; - return 7; + if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); + return true; default: - return 99; + *total = COSTS_N_INSNS (4); + return false; } } /* RTX costs when optimizing for size. */ static bool -arm_size_rtx_costs (rtx x, int code, int outer_code, int *total) +arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total) { enum machine_mode mode = GET_MODE (x); - if (TARGET_THUMB1) { /* XXX TBD. For now, use the standard costs. */ @@ -5395,19 +5710,22 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total) /* RTX costs when optimizing for size. */ static bool -arm_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed) +arm_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed) { if (!speed) return arm_size_rtx_costs (x, code, outer_code, total); else - return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total); + return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total, + speed); } /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not supported on any "slowmul" cores, so it can be ignored. */ static bool -arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total) +arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) { enum machine_mode mode = GET_MODE (x); @@ -5423,8 +5741,8 @@ arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total) if (GET_MODE_CLASS (mode) == MODE_FLOAT || mode == DImode) { - *total = 30; - return true; + *total = COSTS_N_INSNS (20); + return false; } if (GET_CODE (XEXP (x, 1)) == CONST_INT) @@ -5440,20 +5758,19 @@ arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total) for (j = 0; i && j < 32; j += booth_unit_size) { i >>= booth_unit_size; - cost += 2; + cost++; } - *total = cost; + *total = COSTS_N_INSNS (cost); + *total += rtx_cost (XEXP (x, 0), code, speed); return true; } - *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4) - + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4); - return true; + *total = COSTS_N_INSNS (20); + return false; default: - *total = arm_rtx_costs_1 (x, code, outer_code); - return true; + return arm_rtx_costs_1 (x, outer_code, total, speed);; } } @@ -5461,7 +5778,8 @@ arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total) /* RTX cost for cores with a fast multiply unit (M variants). */ static bool -arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total) +arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) { enum machine_mode mode = GET_MODE (x); @@ -5482,16 +5800,15 @@ arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total) && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) { - *total = 8; - return true; + *total = COSTS_N_INSNS(2); + return false; } - if (GET_MODE_CLASS (mode) == MODE_FLOAT - || mode == DImode) + if (mode == DImode) { - *total = 30; - return true; + *total = COSTS_N_INSNS (5); + return false; } if (GET_CODE (XEXP (x, 1)) == CONST_INT) @@ -5507,20 +5824,34 @@ arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total) for (j = 0; i && j < 32; j += booth_unit_size) { i >>= booth_unit_size; - cost += 2; + cost++; } - *total = cost; - return true; + *total = COSTS_N_INSNS(cost); + return false; } - *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4) - + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4); - return true; + if (mode == SImode) + { + *total = COSTS_N_INSNS (4); + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + { + *total = COSTS_N_INSNS (1); + return false; + } + } + + /* Requires a lib call */ + *total = COSTS_N_INSNS (20); + return false; default: - *total = arm_rtx_costs_1 (x, code, outer_code); - return true; + return arm_rtx_costs_1 (x, outer_code, total, speed); } } @@ -5529,7 +5860,7 @@ arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total) so it can be ignored. */ static bool -arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total) +arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed) { enum machine_mode mode = GET_MODE (x); @@ -5541,6 +5872,15 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total) switch (code) { + case COMPARE: + if (GET_CODE (XEXP (x, 0)) != MULT) + return arm_rtx_costs_1 (x, outer_code, total, speed); + + /* A COMPARE of a MULT is slow on XScale; the muls instruction + will stall until the multiplication is complete. */ + *total = COSTS_N_INSNS (3); + return false; + case MULT: /* There is no point basing this on the tuning, since it is always the fast variant if it exists at all. */ @@ -5549,60 +5889,58 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total) && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) { - *total = 8; - return true; + *total = COSTS_N_INSNS (2); + return false; } - if (GET_MODE_CLASS (mode) == MODE_FLOAT - || mode == DImode) + if (mode == DImode) { - *total = 30; - return true; + *total = COSTS_N_INSNS (5); + return false; } if (GET_CODE (XEXP (x, 1)) == CONST_INT) { - unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) - & (unsigned HOST_WIDE_INT) 0xffffffff); - int cost, const_ok = const_ok_for_arm (i); + /* If operand 1 is a constant we can more accurately + calculate the cost of the multiply. The multiplier can + retire 15 bits on the first cycle and a further 12 on the + second. We do, of course, have to load the constant into + a register first. */ + unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); + /* There's a general overhead of one cycle. */ + int cost = 1; unsigned HOST_WIDE_INT masked_const; - /* The cost will be related to two insns. - First a load of the constant (MOV or LDR), then a multiply. */ - cost = 2; - if (! const_ok) - cost += 1; /* LDR is probably more expensive because - of longer result latency. */ + if (i & 0x80000000) + i = ~i; + + i &= (unsigned HOST_WIDE_INT) 0xffffffff; + masked_const = i & 0xffff8000; - if (masked_const != 0 && masked_const != 0xffff8000) + if (masked_const != 0) { + cost++; masked_const = i & 0xf8000000; - if (masked_const == 0 || masked_const == 0xf8000000) - cost += 1; - else - cost += 2; + if (masked_const != 0) + cost++; } - *total = cost; - return true; + *total = COSTS_N_INSNS (cost); + return false; } - *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4) - + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4); - return true; + if (mode == SImode) + { + *total = COSTS_N_INSNS (3); + return false; + } - case COMPARE: - /* A COMPARE of a MULT is slow on XScale; the muls instruction - will stall until the multiplication is complete. */ - if (GET_CODE (XEXP (x, 0)) == MULT) - *total = 4 + rtx_cost (XEXP (x, 0), code, true); - else - *total = arm_rtx_costs_1 (x, code, outer_code); - return true; + /* Requires a lib call */ + *total = COSTS_N_INSNS (20); + return false; default: - *total = arm_rtx_costs_1 (x, code, outer_code); - return true; + return arm_rtx_costs_1 (x, outer_code, total, speed); } } @@ -5610,11 +5948,10 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total) /* RTX costs for 9e (and later) cores. */ static bool -arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total) +arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) { enum machine_mode mode = GET_MODE (x); - int nonreg_cost; - int cost; if (TARGET_THUMB1) { @@ -5640,35 +5977,37 @@ arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total) && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) { - *total = 3; - return true; + *total = COSTS_N_INSNS (2); + return false; } - if (GET_MODE_CLASS (mode) == MODE_FLOAT) - { - *total = 30; - return true; - } if (mode == DImode) { - cost = 7; - nonreg_cost = 8; + *total = COSTS_N_INSNS (5); + return false; } - else + + if (mode == SImode) { - cost = 2; - nonreg_cost = 4; + *total = COSTS_N_INSNS (2); + return false; } + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + { + *total = COSTS_N_INSNS (1); + return false; + } + } - *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost) - + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost); - return true; + *total = COSTS_N_INSNS (20); + return false; default: - *total = arm_rtx_costs_1 (x, code, outer_code); - return true; + return arm_rtx_costs_1 (x, outer_code, total, speed); } } /* All address computations that can be done are free, but rtx cost returns -- 2.30.2